2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
52 #include "gromacs/compat/make_unique.h"
53 #include "gromacs/domdec/collect.h"
54 #include "gromacs/domdec/dlbtiming.h"
55 #include "gromacs/domdec/domdec_network.h"
56 #include "gromacs/domdec/ga2la.h"
57 #include "gromacs/domdec/localatomsetmanager.h"
58 #include "gromacs/ewald/pme.h"
59 #include "gromacs/fileio/gmxfio.h"
60 #include "gromacs/fileio/pdbio.h"
61 #include "gromacs/gmxlib/chargegroup.h"
62 #include "gromacs/gmxlib/network.h"
63 #include "gromacs/gmxlib/nrnb.h"
64 #include "gromacs/gpu_utils/gpu_utils.h"
65 #include "gromacs/hardware/hw_info.h"
66 #include "gromacs/imd/imd.h"
67 #include "gromacs/listed-forces/manage-threading.h"
68 #include "gromacs/math/functions.h"
69 #include "gromacs/math/vec.h"
70 #include "gromacs/math/vectypes.h"
71 #include "gromacs/mdlib/constr.h"
72 #include "gromacs/mdlib/constraintrange.h"
73 #include "gromacs/mdlib/forcerec.h"
74 #include "gromacs/mdlib/gmx_omp_nthreads.h"
75 #include "gromacs/mdlib/lincs.h"
76 #include "gromacs/mdlib/mdatoms.h"
77 #include "gromacs/mdlib/mdrun.h"
78 #include "gromacs/mdlib/mdsetup.h"
79 #include "gromacs/mdlib/nb_verlet.h"
80 #include "gromacs/mdlib/nbnxn_grid.h"
81 #include "gromacs/mdlib/nsgrid.h"
82 #include "gromacs/mdlib/vsite.h"
83 #include "gromacs/mdtypes/commrec.h"
84 #include "gromacs/mdtypes/df_history.h"
85 #include "gromacs/mdtypes/forcerec.h"
86 #include "gromacs/mdtypes/inputrec.h"
87 #include "gromacs/mdtypes/md_enums.h"
88 #include "gromacs/mdtypes/mdatom.h"
89 #include "gromacs/mdtypes/nblist.h"
90 #include "gromacs/mdtypes/state.h"
91 #include "gromacs/pbcutil/ishift.h"
92 #include "gromacs/pbcutil/pbc.h"
93 #include "gromacs/pulling/pull.h"
94 #include "gromacs/pulling/pull_rotation.h"
95 #include "gromacs/timing/wallcycle.h"
96 #include "gromacs/topology/block.h"
97 #include "gromacs/topology/idef.h"
98 #include "gromacs/topology/ifunc.h"
99 #include "gromacs/topology/mtop_lookup.h"
100 #include "gromacs/topology/mtop_util.h"
101 #include "gromacs/topology/topology.h"
102 #include "gromacs/utility/basedefinitions.h"
103 #include "gromacs/utility/basenetwork.h"
104 #include "gromacs/utility/cstringutil.h"
105 #include "gromacs/utility/exceptions.h"
106 #include "gromacs/utility/fatalerror.h"
107 #include "gromacs/utility/gmxmpi.h"
108 #include "gromacs/utility/qsort_threadsafe.h"
109 #include "gromacs/utility/real.h"
110 #include "gromacs/utility/smalloc.h"
111 #include "gromacs/utility/strconvert.h"
112 #include "gromacs/utility/stringutil.h"
114 #include "atomdistribution.h"
115 #include "cellsizes.h"
116 #include "distribute.h"
117 #include "domdec_constraints.h"
118 #include "domdec_internal.h"
119 #include "domdec_vsite.h"
120 #include "redistribute.h"
123 #define DD_NLOAD_MAX 9
125 static const char *edlbs_names[edlbsNR] = { "off", "auto", "locked", "on", "on" };
127 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
130 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
131 #define DD_FLAG_NRCG 65535
132 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
133 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
135 /* The DD zone order */
136 static const ivec dd_zo[DD_MAXZONE] =
137 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
139 /* The non-bonded zone-pair setup for domain decomposition
140 * The first number is the i-zone, the second number the first j-zone seen by
141 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
142 * As is, this is for 3D decomposition, where there are 4 i-zones.
143 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
144 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
147 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
152 /* Turn on DLB when the load imbalance causes this amount of total loss.
153 * There is a bit of overhead with DLB and it's difficult to achieve
154 * a load imbalance of less than 2% with DLB.
156 #define DD_PERF_LOSS_DLB_ON 0.02
158 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
159 #define DD_PERF_LOSS_WARN 0.05
162 /* We check if to turn on DLB at the first and every 100 DD partitionings.
163 * With large imbalance DLB will turn on at the first step, so we can
164 * make the interval so large that the MPI overhead of the check is negligible.
166 static const int c_checkTurnDlbOnInterval = 100;
167 /* We need to check if DLB results in worse performance and then turn it off.
168 * We check this more often then for turning DLB on, because the DLB can scale
169 * the domains very rapidly, so if unlucky the load imbalance can go up quickly
170 * and furthermore, we are already synchronizing often with DLB, so
171 * the overhead of the MPI Bcast is not that high.
173 static const int c_checkTurnDlbOffInterval = 20;
175 /* Forward declaration */
176 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue);
180 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
182 static void index2xyz(ivec nc,int ind,ivec xyz)
184 xyz[XX] = ind % nc[XX];
185 xyz[YY] = (ind / nc[XX]) % nc[YY];
186 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
190 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
192 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
193 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
194 xyz[ZZ] = ind % nc[ZZ];
197 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
202 ddindex = dd_index(dd->nc, c);
203 if (dd->comm->bCartesianPP_PME)
205 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
207 else if (dd->comm->bCartesianPP)
210 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
221 static gmx_bool dynamic_dd_box(const gmx_ddbox_t *ddbox, const t_inputrec *ir)
223 return (ddbox->nboundeddim < DIM || inputrecDynamicBox(ir));
226 int ddglatnr(const gmx_domdec_t *dd, int i)
236 if (i >= dd->comm->atomRanges.numAtomsTotal())
238 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
240 atnr = dd->globalAtomIndices[i] + 1;
246 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
248 return &dd->comm->cgs_gl;
251 void dd_store_state(gmx_domdec_t *dd, t_state *state)
255 if (state->ddp_count != dd->ddp_count)
257 gmx_incons("The MD state does not match the domain decomposition state");
260 state->cg_gl.resize(dd->ncg_home);
261 for (i = 0; i < dd->ncg_home; i++)
263 state->cg_gl[i] = dd->globalAtomGroupIndices[i];
266 state->ddp_count_cg_gl = dd->ddp_count;
269 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
271 return &dd->comm->zones;
274 void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
275 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
277 gmx_domdec_zones_t *zones;
280 zones = &dd->comm->zones;
283 while (icg >= zones->izone[izone].cg1)
292 else if (izone < zones->nizone)
294 *jcg0 = zones->izone[izone].jcg0;
298 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
299 icg, izone, zones->nizone);
302 *jcg1 = zones->izone[izone].jcg1;
304 for (d = 0; d < dd->ndim; d++)
307 shift0[dim] = zones->izone[izone].shift0[dim];
308 shift1[dim] = zones->izone[izone].shift1[dim];
309 if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
311 /* A conservative approach, this can be optimized */
318 int dd_numHomeAtoms(const gmx_domdec_t &dd)
320 return dd.comm->atomRanges.numHomeAtoms();
323 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
325 /* We currently set mdatoms entries for all atoms:
326 * local + non-local + communicated for vsite + constraints
329 return dd->comm->atomRanges.numAtomsTotal();
332 int dd_natoms_vsite(const gmx_domdec_t *dd)
334 return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
337 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
339 *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
340 *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
343 void dd_move_x(gmx_domdec_t *dd,
345 gmx::ArrayRef<gmx::RVec> x,
346 gmx_wallcycle *wcycle)
348 wallcycle_start(wcycle, ewcMOVEX);
351 gmx_domdec_comm_t *comm;
352 gmx_domdec_comm_dim_t *cd;
353 rvec shift = {0, 0, 0};
354 gmx_bool bPBC, bScrew;
358 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
361 nat_tot = comm->atomRanges.numHomeAtoms();
362 for (int d = 0; d < dd->ndim; d++)
364 bPBC = (dd->ci[dd->dim[d]] == 0);
365 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
368 copy_rvec(box[dd->dim[d]], shift);
371 for (const gmx_domdec_ind_t &ind : cd->ind)
373 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
374 gmx::ArrayRef<gmx::RVec> &sendBuffer = sendBufferAccess.buffer;
378 for (int g : ind.index)
380 for (int j : atomGrouping.block(g))
382 sendBuffer[n] = x[j];
389 for (int g : ind.index)
391 for (int j : atomGrouping.block(g))
393 /* We need to shift the coordinates */
394 for (int d = 0; d < DIM; d++)
396 sendBuffer[n][d] = x[j][d] + shift[d];
404 for (int g : ind.index)
406 for (int j : atomGrouping.block(g))
409 sendBuffer[n][XX] = x[j][XX] + shift[XX];
411 * This operation requires a special shift force
412 * treatment, which is performed in calc_vir.
414 sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
415 sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
421 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
423 gmx::ArrayRef<gmx::RVec> receiveBuffer;
424 if (cd->receiveInPlace)
426 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
430 receiveBuffer = receiveBufferAccess.buffer;
432 /* Send and receive the coordinates */
433 ddSendrecv(dd, d, dddirBackward,
434 sendBuffer, receiveBuffer);
436 if (!cd->receiveInPlace)
439 for (int zone = 0; zone < nzone; zone++)
441 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
443 x[i] = receiveBuffer[j++];
447 nat_tot += ind.nrecv[nzone+1];
452 wallcycle_stop(wcycle, ewcMOVEX);
455 void dd_move_f(gmx_domdec_t *dd,
456 gmx::ArrayRef<gmx::RVec> f,
458 gmx_wallcycle *wcycle)
460 wallcycle_start(wcycle, ewcMOVEF);
463 gmx_domdec_comm_t *comm;
464 gmx_domdec_comm_dim_t *cd;
467 gmx_bool bShiftForcesNeedPbc, bScrew;
471 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
473 nzone = comm->zones.n/2;
474 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
475 for (int d = dd->ndim-1; d >= 0; d--)
477 /* Only forces in domains near the PBC boundaries need to
478 consider PBC in the treatment of fshift */
479 bShiftForcesNeedPbc = (dd->ci[dd->dim[d]] == 0);
480 bScrew = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
481 if (fshift == nullptr && !bScrew)
483 bShiftForcesNeedPbc = FALSE;
485 /* Determine which shift vector we need */
491 for (int p = cd->numPulses() - 1; p >= 0; p--)
493 const gmx_domdec_ind_t &ind = cd->ind[p];
494 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
495 gmx::ArrayRef<gmx::RVec> &receiveBuffer = receiveBufferAccess.buffer;
497 nat_tot -= ind.nrecv[nzone+1];
499 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
501 gmx::ArrayRef<gmx::RVec> sendBuffer;
502 if (cd->receiveInPlace)
504 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
508 sendBuffer = sendBufferAccess.buffer;
510 for (int zone = 0; zone < nzone; zone++)
512 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
514 sendBuffer[j++] = f[i];
518 /* Communicate the forces */
519 ddSendrecv(dd, d, dddirForward,
520 sendBuffer, receiveBuffer);
521 /* Add the received forces */
523 if (!bShiftForcesNeedPbc)
525 for (int g : ind.index)
527 for (int j : atomGrouping.block(g))
529 for (int d = 0; d < DIM; d++)
531 f[j][d] += receiveBuffer[n][d];
539 /* fshift should always be defined if this function is
540 * called when bShiftForcesNeedPbc is true */
541 assert(nullptr != fshift);
542 for (int g : ind.index)
544 for (int j : atomGrouping.block(g))
546 for (int d = 0; d < DIM; d++)
548 f[j][d] += receiveBuffer[n][d];
550 /* Add this force to the shift force */
551 for (int d = 0; d < DIM; d++)
553 fshift[is][d] += receiveBuffer[n][d];
561 for (int g : ind.index)
563 for (int j : atomGrouping.block(g))
565 /* Rotate the force */
566 f[j][XX] += receiveBuffer[n][XX];
567 f[j][YY] -= receiveBuffer[n][YY];
568 f[j][ZZ] -= receiveBuffer[n][ZZ];
571 /* Add this force to the shift force */
572 for (int d = 0; d < DIM; d++)
574 fshift[is][d] += receiveBuffer[n][d];
584 wallcycle_stop(wcycle, ewcMOVEF);
587 /* Convenience function for extracting a real buffer from an rvec buffer
589 * To reduce the number of temporary communication buffers and avoid
590 * cache polution, we reuse gmx::RVec buffers for storing reals.
591 * This functions return a real buffer reference with the same number
592 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
594 static gmx::ArrayRef<real>
595 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
597 return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
601 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
604 gmx_domdec_comm_t *comm;
605 gmx_domdec_comm_dim_t *cd;
609 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
612 nat_tot = comm->atomRanges.numHomeAtoms();
613 for (int d = 0; d < dd->ndim; d++)
616 for (const gmx_domdec_ind_t &ind : cd->ind)
618 /* Note: We provision for RVec instead of real, so a factor of 3
619 * more than needed. The buffer actually already has this size
620 * and we pass a plain pointer below, so this does not matter.
622 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
623 gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
625 for (int g : ind.index)
627 for (int j : atomGrouping.block(g))
629 sendBuffer[n++] = v[j];
633 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
635 gmx::ArrayRef<real> receiveBuffer;
636 if (cd->receiveInPlace)
638 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
642 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
644 /* Send and receive the data */
645 ddSendrecv(dd, d, dddirBackward,
646 sendBuffer, receiveBuffer);
647 if (!cd->receiveInPlace)
650 for (int zone = 0; zone < nzone; zone++)
652 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
654 v[i] = receiveBuffer[j++];
658 nat_tot += ind.nrecv[nzone+1];
664 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
667 gmx_domdec_comm_t *comm;
668 gmx_domdec_comm_dim_t *cd;
672 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
674 nzone = comm->zones.n/2;
675 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
676 for (int d = dd->ndim-1; d >= 0; d--)
679 for (int p = cd->numPulses() - 1; p >= 0; p--)
681 const gmx_domdec_ind_t &ind = cd->ind[p];
683 /* Note: We provision for RVec instead of real, so a factor of 3
684 * more than needed. The buffer actually already has this size
685 * and we typecast, so this works as intended.
687 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
688 gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
689 nat_tot -= ind.nrecv[nzone + 1];
691 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
693 gmx::ArrayRef<real> sendBuffer;
694 if (cd->receiveInPlace)
696 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
700 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
702 for (int zone = 0; zone < nzone; zone++)
704 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
706 sendBuffer[j++] = v[i];
710 /* Communicate the forces */
711 ddSendrecv(dd, d, dddirForward,
712 sendBuffer, receiveBuffer);
713 /* Add the received forces */
715 for (int g : ind.index)
717 for (int j : atomGrouping.block(g))
719 v[j] += receiveBuffer[n];
728 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
730 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
732 zone->min0, zone->max1,
733 zone->mch0, zone->mch0,
734 zone->p1_0, zone->p1_1);
737 /* Using the home grid size as input in cell_ns_x0 and cell_ns_x1
738 * takes the extremes over all home and remote zones in the halo
739 * and returns the results in cell_ns_x0 and cell_ns_x1.
740 * Note: only used with the group cut-off scheme.
742 static void dd_move_cellx(gmx_domdec_t *dd,
743 const gmx_ddbox_t *ddbox,
747 constexpr int c_ddZoneCommMaxNumZones = 5;
748 gmx_ddzone_t buf_s[c_ddZoneCommMaxNumZones];
749 gmx_ddzone_t buf_r[c_ddZoneCommMaxNumZones];
750 gmx_ddzone_t buf_e[c_ddZoneCommMaxNumZones];
751 gmx_domdec_comm_t *comm = dd->comm;
755 for (int d = 1; d < dd->ndim; d++)
757 int dim = dd->dim[d];
758 gmx_ddzone_t &zp = (d == 1) ? comm->zone_d1[0] : comm->zone_d2[0][0];
760 /* Copy the base sizes of the home zone */
761 zp.min0 = cell_ns_x0[dim];
762 zp.max1 = cell_ns_x1[dim];
763 zp.min1 = cell_ns_x1[dim];
764 zp.mch0 = cell_ns_x0[dim];
765 zp.mch1 = cell_ns_x1[dim];
766 zp.p1_0 = cell_ns_x0[dim];
767 zp.p1_1 = cell_ns_x1[dim];
770 gmx::ArrayRef<DDCellsizesWithDlb> cellsizes = comm->cellsizesWithDlb;
772 /* Loop backward over the dimensions and aggregate the extremes
775 for (int d = dd->ndim - 2; d >= 0; d--)
777 const int dim = dd->dim[d];
778 const bool applyPbc = (dim < ddbox->npbcdim);
780 /* Use an rvec to store two reals */
781 extr_s[d][0] = cellsizes[d + 1].fracLower;
782 extr_s[d][1] = cellsizes[d + 1].fracUpper;
783 extr_s[d][2] = cellsizes[d + 1].fracUpper;
786 GMX_ASSERT(pos < c_ddZoneCommMaxNumZones, "The buffers should be sufficiently large");
787 /* Store the extremes in the backward sending buffer,
788 * so they get updated separately from the forward communication.
790 for (int d1 = d; d1 < dd->ndim-1; d1++)
792 /* We invert the order to be able to use the same loop for buf_e */
793 buf_s[pos].min0 = extr_s[d1][1];
794 buf_s[pos].max1 = extr_s[d1][0];
795 buf_s[pos].min1 = extr_s[d1][2];
798 /* Store the cell corner of the dimension we communicate along */
799 buf_s[pos].p1_0 = comm->cell_x0[dim];
804 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
807 if (dd->ndim == 3 && d == 0)
809 buf_s[pos] = comm->zone_d2[0][1];
811 buf_s[pos] = comm->zone_d1[0];
815 /* We only need to communicate the extremes
816 * in the forward direction
818 int numPulses = comm->cd[d].numPulses();
822 /* Take the minimum to avoid double communication */
823 numPulsesMin = std::min(numPulses, dd->nc[dim] - 1 - numPulses);
827 /* Without PBC we should really not communicate over
828 * the boundaries, but implementing that complicates
829 * the communication setup and therefore we simply
830 * do all communication, but ignore some data.
832 numPulsesMin = numPulses;
834 for (int pulse = 0; pulse < numPulsesMin; pulse++)
836 /* Communicate the extremes forward */
837 bool receiveValidData = (applyPbc || dd->ci[dim] > 0);
839 int numElements = dd->ndim - d - 1;
840 ddSendrecv(dd, d, dddirForward,
841 extr_s + d, numElements,
842 extr_r + d, numElements);
844 if (receiveValidData)
846 for (int d1 = d; d1 < dd->ndim - 1; d1++)
848 extr_s[d1][0] = std::max(extr_s[d1][0], extr_r[d1][0]);
849 extr_s[d1][1] = std::min(extr_s[d1][1], extr_r[d1][1]);
850 extr_s[d1][2] = std::min(extr_s[d1][2], extr_r[d1][2]);
855 const int numElementsInBuffer = pos;
856 for (int pulse = 0; pulse < numPulses; pulse++)
858 /* Communicate all the zone information backward */
859 bool receiveValidData = (applyPbc || dd->ci[dim] < dd->nc[dim] - 1);
861 static_assert(sizeof(gmx_ddzone_t) == c_ddzoneNumReals*sizeof(real), "Here we expect gmx_ddzone_t to consist of c_ddzoneNumReals reals (only)");
863 int numReals = numElementsInBuffer*c_ddzoneNumReals;
864 ddSendrecv(dd, d, dddirBackward,
865 gmx::arrayRefFromArray(&buf_s[0].min0, numReals),
866 gmx::arrayRefFromArray(&buf_r[0].min0, numReals));
871 for (int d1 = d + 1; d1 < dd->ndim; d1++)
873 /* Determine the decrease of maximum required
874 * communication height along d1 due to the distance along d,
875 * this avoids a lot of useless atom communication.
877 real dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
880 if (ddbox->tric_dir[dim])
882 /* c is the off-diagonal coupling between the cell planes
883 * along directions d and d1.
885 c = ddbox->v[dim][dd->dim[d1]][dim];
891 real det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
894 dh[d1] = comm->cutoff - (c*dist_d + std::sqrt(det))/(1 + c*c);
898 /* A negative value signals out of range */
904 /* Accumulate the extremes over all pulses */
905 for (int i = 0; i < numElementsInBuffer; i++)
913 if (receiveValidData)
915 buf_e[i].min0 = std::min(buf_e[i].min0, buf_r[i].min0);
916 buf_e[i].max1 = std::max(buf_e[i].max1, buf_r[i].max1);
917 buf_e[i].min1 = std::min(buf_e[i].min1, buf_r[i].min1);
921 if (dd->ndim == 3 && d == 0 && i == numElementsInBuffer - 1)
929 if (receiveValidData && dh[d1] >= 0)
931 buf_e[i].mch0 = std::max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
932 buf_e[i].mch1 = std::max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
935 /* Copy the received buffer to the send buffer,
936 * to pass the data through with the next pulse.
940 if (((applyPbc || dd->ci[dim] + numPulses < dd->nc[dim]) && pulse == numPulses - 1) ||
941 (!applyPbc && dd->ci[dim] + 1 + pulse == dd->nc[dim] - 1))
943 /* Store the extremes */
946 for (int d1 = d; d1 < dd->ndim-1; d1++)
948 extr_s[d1][1] = std::min(extr_s[d1][1], buf_e[pos].min0);
949 extr_s[d1][0] = std::max(extr_s[d1][0], buf_e[pos].max1);
950 extr_s[d1][2] = std::min(extr_s[d1][2], buf_e[pos].min1);
954 if (d == 1 || (d == 0 && dd->ndim == 3))
956 for (int i = d; i < 2; i++)
958 comm->zone_d2[1-d][i] = buf_e[pos];
964 comm->zone_d1[1] = buf_e[pos];
973 int dim = dd->dim[1];
974 for (int i = 0; i < 2; i++)
978 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
980 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d1[i].min0);
981 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d1[i].max1);
986 int dim = dd->dim[2];
987 for (int i = 0; i < 2; i++)
989 for (int j = 0; j < 2; j++)
993 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
995 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
996 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1000 for (int d = 1; d < dd->ndim; d++)
1002 cellsizes[d].fracLowerMax = extr_s[d-1][0];
1003 cellsizes[d].fracUpperMin = extr_s[d-1][1];
1006 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1007 d, cellsizes[d].fracLowerMax, cellsizes[d].fracUpperMin);
1012 static void write_dd_grid_pdb(const char *fn, int64_t step,
1013 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1015 rvec grid_s[2], *grid_r = nullptr, cx, r;
1016 char fname[STRLEN], buf[22];
1018 int a, i, d, z, y, x;
1022 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1023 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1027 snew(grid_r, 2*dd->nnodes);
1030 dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : nullptr);
1034 for (d = 0; d < DIM; d++)
1036 for (i = 0; i < DIM; i++)
1044 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1046 tric[d][i] = box[i][d]/box[i][i];
1055 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1056 out = gmx_fio_fopen(fname, "w");
1057 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1059 for (i = 0; i < dd->nnodes; i++)
1061 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1062 for (d = 0; d < DIM; d++)
1064 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1066 for (z = 0; z < 2; z++)
1068 for (y = 0; y < 2; y++)
1070 for (x = 0; x < 2; x++)
1072 cx[XX] = grid_r[i*2+x][XX];
1073 cx[YY] = grid_r[i*2+y][YY];
1074 cx[ZZ] = grid_r[i*2+z][ZZ];
1076 gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
1077 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
1081 for (d = 0; d < DIM; d++)
1083 for (x = 0; x < 4; x++)
1087 case 0: y = 1 + i*8 + 2*x; break;
1088 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1089 case 2: y = 1 + i*8 + x; break;
1091 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
1095 gmx_fio_fclose(out);
1100 void write_dd_pdb(const char *fn, int64_t step, const char *title,
1101 const gmx_mtop_t *mtop, const t_commrec *cr,
1102 int natoms, const rvec x[], const matrix box)
1104 char fname[STRLEN], buf[22];
1107 const char *atomname, *resname;
1113 natoms = dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
1116 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
1118 out = gmx_fio_fopen(fname, "w");
1120 fprintf(out, "TITLE %s\n", title);
1121 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1123 for (int i = 0; i < natoms; i++)
1125 int ii = dd->globalAtomIndices[i];
1126 mtopGetAtomAndResidueName(mtop, ii, &molb, &atomname, &resnr, &resname, nullptr);
1129 if (i < dd->comm->atomRanges.end(DDAtomRanges::Type::Zones))
1132 while (i >= dd->atomGrouping().subRange(0, dd->comm->zones.cg_range[c + 1]).end())
1138 else if (i < dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites))
1140 b = dd->comm->zones.n;
1144 b = dd->comm->zones.n + 1;
1146 gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
1147 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
1149 fprintf(out, "TER\n");
1151 gmx_fio_fclose(out);
1154 real dd_cutoff_multibody(const gmx_domdec_t *dd)
1156 gmx_domdec_comm_t *comm;
1163 if (comm->bInterCGBondeds)
1165 if (comm->cutoff_mbody > 0)
1167 r = comm->cutoff_mbody;
1171 /* cutoff_mbody=0 means we do not have DLB */
1172 r = comm->cellsize_min[dd->dim[0]];
1173 for (di = 1; di < dd->ndim; di++)
1175 r = std::min(r, comm->cellsize_min[dd->dim[di]]);
1177 if (comm->bBondComm)
1179 r = std::max(r, comm->cutoff_mbody);
1183 r = std::min(r, comm->cutoff);
1191 real dd_cutoff_twobody(const gmx_domdec_t *dd)
1195 r_mb = dd_cutoff_multibody(dd);
1197 return std::max(dd->comm->cutoff, r_mb);
1201 static void dd_cart_coord2pmecoord(const gmx_domdec_t *dd, const ivec coord,
1206 nc = dd->nc[dd->comm->cartpmedim];
1207 ntot = dd->comm->ntot[dd->comm->cartpmedim];
1208 copy_ivec(coord, coord_pme);
1209 coord_pme[dd->comm->cartpmedim] =
1210 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
1213 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
1218 npme = dd->comm->npmenodes;
1220 /* Here we assign a PME node to communicate with this DD node
1221 * by assuming that the major index of both is x.
1222 * We add cr->npmenodes/2 to obtain an even distribution.
1224 return (ddindex*npme + npme/2)/npp;
1227 static int *dd_interleaved_pme_ranks(const gmx_domdec_t *dd)
1232 snew(pme_rank, dd->comm->npmenodes);
1234 for (i = 0; i < dd->nnodes; i++)
1236 p0 = ddindex2pmeindex(dd, i);
1237 p1 = ddindex2pmeindex(dd, i+1);
1238 if (i+1 == dd->nnodes || p1 > p0)
1242 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
1244 pme_rank[n] = i + 1 + n;
1252 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
1260 if (dd->comm->bCartesian) {
1261 gmx_ddindex2xyz(dd->nc,ddindex,coords);
1262 dd_coords2pmecoords(dd,coords,coords_pme);
1263 copy_ivec(dd->ntot,nc);
1264 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1265 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1267 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
1269 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
1275 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
1280 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
1282 gmx_domdec_comm_t *comm;
1284 int ddindex, nodeid = -1;
1286 comm = cr->dd->comm;
1291 if (comm->bCartesianPP_PME)
1294 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
1299 ddindex = dd_index(cr->dd->nc, coords);
1300 if (comm->bCartesianPP)
1302 nodeid = comm->ddindex2simnodeid[ddindex];
1308 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
1320 static int dd_simnode2pmenode(const gmx_domdec_t *dd,
1321 const t_commrec gmx_unused *cr,
1326 const gmx_domdec_comm_t *comm = dd->comm;
1328 /* This assumes a uniform x domain decomposition grid cell size */
1329 if (comm->bCartesianPP_PME)
1332 ivec coord, coord_pme;
1333 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
1334 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1336 /* This is a PP node */
1337 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1338 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
1342 else if (comm->bCartesianPP)
1344 if (sim_nodeid < dd->nnodes)
1346 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1351 /* This assumes DD cells with identical x coordinates
1352 * are numbered sequentially.
1354 if (dd->comm->pmenodes == nullptr)
1356 if (sim_nodeid < dd->nnodes)
1358 /* The DD index equals the nodeid */
1359 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1365 while (sim_nodeid > dd->comm->pmenodes[i])
1369 if (sim_nodeid < dd->comm->pmenodes[i])
1371 pmenode = dd->comm->pmenodes[i];
1379 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
1384 dd->comm->npmenodes_x, dd->comm->npmenodes_y
1395 std::vector<int> get_pme_ddranks(const t_commrec *cr, int pmenodeid)
1399 ivec coord, coord_pme;
1403 std::vector<int> ddranks;
1404 ddranks.reserve((dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
1406 for (x = 0; x < dd->nc[XX]; x++)
1408 for (y = 0; y < dd->nc[YY]; y++)
1410 for (z = 0; z < dd->nc[ZZ]; z++)
1412 if (dd->comm->bCartesianPP_PME)
1417 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1418 if (dd->ci[XX] == coord_pme[XX] &&
1419 dd->ci[YY] == coord_pme[YY] &&
1420 dd->ci[ZZ] == coord_pme[ZZ])
1422 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
1427 /* The slab corresponds to the nodeid in the PME group */
1428 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
1430 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
1439 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd, const t_commrec *cr)
1441 gmx_bool bReceive = TRUE;
1443 if (cr->npmenodes < dd->nnodes)
1445 gmx_domdec_comm_t *comm = dd->comm;
1446 if (comm->bCartesianPP_PME)
1449 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1451 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
1452 coords[comm->cartpmedim]++;
1453 if (coords[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1456 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
1457 if (dd_simnode2pmenode(dd, cr, rank) == pmenode)
1459 /* This is not the last PP node for pmenode */
1464 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
1469 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1470 if (cr->sim_nodeid+1 < cr->nnodes &&
1471 dd_simnode2pmenode(dd, cr, cr->sim_nodeid+1) == pmenode)
1473 /* This is not the last PP node for pmenode */
1482 static void set_zones_ncg_home(gmx_domdec_t *dd)
1484 gmx_domdec_zones_t *zones;
1487 zones = &dd->comm->zones;
1489 zones->cg_range[0] = 0;
1490 for (i = 1; i < zones->n+1; i++)
1492 zones->cg_range[i] = dd->ncg_home;
1494 /* zone_ncg1[0] should always be equal to ncg_home */
1495 dd->comm->zone_ncg1[0] = dd->ncg_home;
1498 static void restoreAtomGroups(gmx_domdec_t *dd,
1499 const int *gcgs_index, const t_state *state)
1501 gmx::ArrayRef<const int> atomGroupsState = state->cg_gl;
1503 std::vector<int> &globalAtomGroupIndices = dd->globalAtomGroupIndices;
1504 gmx::RangePartitioning &atomGrouping = dd->atomGrouping_;
1506 globalAtomGroupIndices.resize(atomGroupsState.size());
1507 atomGrouping.clear();
1509 /* Copy back the global charge group indices from state
1510 * and rebuild the local charge group to atom index.
1512 for (gmx::index i = 0; i < atomGroupsState.size(); i++)
1514 const int atomGroupGlobal = atomGroupsState[i];
1515 const int groupSize = gcgs_index[atomGroupGlobal + 1] - gcgs_index[atomGroupGlobal];
1516 globalAtomGroupIndices[i] = atomGroupGlobal;
1517 atomGrouping.appendBlock(groupSize);
1520 dd->ncg_home = atomGroupsState.size();
1521 dd->comm->atomRanges.setEnd(DDAtomRanges::Type::Home, atomGrouping.fullRange().end());
1523 set_zones_ncg_home(dd);
1526 static void dd_set_cginfo(gmx::ArrayRef<const int> index_gl, int cg0, int cg1,
1527 t_forcerec *fr, char *bLocalCG)
1529 cginfo_mb_t *cginfo_mb;
1535 cginfo_mb = fr->cginfo_mb;
1536 cginfo = fr->cginfo;
1538 for (cg = cg0; cg < cg1; cg++)
1540 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
1544 if (bLocalCG != nullptr)
1546 for (cg = cg0; cg < cg1; cg++)
1548 bLocalCG[index_gl[cg]] = TRUE;
1553 static void make_dd_indices(gmx_domdec_t *dd,
1554 const int *gcgs_index, int cg_start)
1556 const int numZones = dd->comm->zones.n;
1557 const int *zone2cg = dd->comm->zones.cg_range;
1558 const int *zone_ncg1 = dd->comm->zone_ncg1;
1559 gmx::ArrayRef<const int> globalAtomGroupIndices = dd->globalAtomGroupIndices;
1560 const gmx_bool bCGs = dd->comm->bCGs;
1562 std::vector<int> &globalAtomIndices = dd->globalAtomIndices;
1564 if (zone2cg[1] != dd->ncg_home)
1566 gmx_incons("dd->ncg_zone is not up to date");
1569 /* Make the local to global and global to local atom index */
1570 int a = dd->atomGrouping().subRange(cg_start, cg_start).begin();
1571 globalAtomIndices.resize(a);
1572 for (int zone = 0; zone < numZones; zone++)
1581 cg0 = zone2cg[zone];
1583 int cg1 = zone2cg[zone+1];
1584 int cg1_p1 = cg0 + zone_ncg1[zone];
1586 for (int cg = cg0; cg < cg1; cg++)
1591 /* Signal that this cg is from more than one pulse away */
1594 int cg_gl = globalAtomGroupIndices[cg];
1597 for (int a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
1599 globalAtomIndices.push_back(a_gl);
1600 ga2la_set(dd->ga2la, a_gl, a, zone1);
1606 globalAtomIndices.push_back(cg_gl);
1607 ga2la_set(dd->ga2la, cg_gl, a, zone1);
1614 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
1618 if (bLocalCG == nullptr)
1622 for (size_t i = 0; i < dd->globalAtomGroupIndices.size(); i++)
1624 if (!bLocalCG[dd->globalAtomGroupIndices[i]])
1627 "DD rank %d, %s: atom group %zu, global atom group %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i + 1, dd->globalAtomGroupIndices[i] + 1, dd->ncg_home);
1632 for (int i = 0; i < ncg_sys; i++)
1639 if (ngl != dd->globalAtomGroupIndices.size())
1641 fprintf(stderr, "DD rank %d, %s: In bLocalCG %zu atom groups are marked as local, whereas there are %zu\n", dd->rank, where, ngl, dd->globalAtomGroupIndices.size());
1648 static void check_index_consistency(gmx_domdec_t *dd,
1649 int natoms_sys, int ncg_sys,
1654 const int numAtomsInZones = dd->comm->atomRanges.end(DDAtomRanges::Type::Zones);
1656 if (dd->comm->DD_debug > 1)
1658 std::vector<int> have(natoms_sys);
1659 for (int a = 0; a < numAtomsInZones; a++)
1661 int globalAtomIndex = dd->globalAtomIndices[a];
1662 if (have[globalAtomIndex] > 0)
1664 fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, globalAtomIndex + 1, have[globalAtomIndex], a+1);
1668 have[globalAtomIndex] = a + 1;
1673 std::vector<int> have(numAtomsInZones);
1676 for (int i = 0; i < natoms_sys; i++)
1680 if (ga2la_get(dd->ga2la, i, &a, &cell))
1682 if (a >= numAtomsInZones)
1684 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, numAtomsInZones);
1690 if (dd->globalAtomIndices[a] != i)
1692 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->globalAtomIndices[a]+1);
1699 if (ngl != numAtomsInZones)
1702 "DD rank %d, %s: %d global atom indices, %d local atoms\n",
1703 dd->rank, where, ngl, numAtomsInZones);
1705 for (int a = 0; a < numAtomsInZones; a++)
1710 "DD rank %d, %s: local atom %d, global %d has no global index\n",
1711 dd->rank, where, a + 1, dd->globalAtomIndices[a] + 1);
1715 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
1719 gmx_fatal(FARGS, "DD rank %d, %s: %d atom(group) index inconsistencies",
1720 dd->rank, where, nerr);
1724 /* Clear all DD global state indices, starting from \p atomGroupStart and \p atomStart */
1725 static void clearDDStateIndices(gmx_domdec_t *dd,
1731 /* Clear the whole list without searching */
1732 ga2la_clear(dd->ga2la);
1736 const int numAtomsInZones = dd->comm->atomRanges.end(DDAtomRanges::Type::Zones);
1737 for (int i = 0; i < numAtomsInZones; i++)
1739 ga2la_del(dd->ga2la, dd->globalAtomIndices[i]);
1743 char *bLocalCG = dd->comm->bLocalCG;
1746 for (size_t atomGroup = atomGroupStart; atomGroup < dd->globalAtomGroupIndices.size(); atomGroup++)
1748 bLocalCG[dd->globalAtomGroupIndices[atomGroup]] = FALSE;
1752 dd_clear_local_vsite_indices(dd);
1754 if (dd->constraints)
1756 dd_clear_local_constraint_indices(dd);
1760 static bool check_grid_jump(int64_t step,
1761 const gmx_domdec_t *dd,
1763 const gmx_ddbox_t *ddbox,
1766 gmx_domdec_comm_t *comm = dd->comm;
1767 bool invalid = false;
1769 for (int d = 1; d < dd->ndim; d++)
1771 const DDCellsizesWithDlb &cellsizes = comm->cellsizesWithDlb[d];
1772 const int dim = dd->dim[d];
1773 const real limit = grid_jump_limit(comm, cutoff, d);
1774 real bfac = ddbox->box_size[dim];
1775 if (ddbox->tric_dir[dim])
1777 bfac *= ddbox->skew_fac[dim];
1779 if ((cellsizes.fracUpper - cellsizes.fracLowerMax)*bfac < limit ||
1780 (cellsizes.fracLower - cellsizes.fracUpperMin)*bfac > -limit)
1788 /* This error should never be triggered under normal
1789 * circumstances, but you never know ...
1791 gmx_fatal(FARGS, "step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
1792 gmx_step_str(step, buf),
1793 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1801 static float dd_force_load(gmx_domdec_comm_t *comm)
1808 if (comm->eFlop > 1)
1810 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
1815 load = comm->cycl[ddCyclF];
1816 if (comm->cycl_n[ddCyclF] > 1)
1818 /* Subtract the maximum of the last n cycle counts
1819 * to get rid of possible high counts due to other sources,
1820 * for instance system activity, that would otherwise
1821 * affect the dynamic load balancing.
1823 load -= comm->cycl_max[ddCyclF];
1827 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
1829 float gpu_wait, gpu_wait_sum;
1831 gpu_wait = comm->cycl[ddCyclWaitGPU];
1832 if (comm->cycl_n[ddCyclF] > 1)
1834 /* We should remove the WaitGPU time of the same MD step
1835 * as the one with the maximum F time, since the F time
1836 * and the wait time are not independent.
1837 * Furthermore, the step for the max F time should be chosen
1838 * the same on all ranks that share the same GPU.
1839 * But to keep the code simple, we remove the average instead.
1840 * The main reason for artificially long times at some steps
1841 * is spurious CPU activity or MPI time, so we don't expect
1842 * that changes in the GPU wait time matter a lot here.
1844 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
1846 /* Sum the wait times over the ranks that share the same GPU */
1847 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
1848 comm->mpi_comm_gpu_shared);
1849 /* Replace the wait time by the average over the ranks */
1850 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
1858 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
1860 gmx_domdec_comm_t *comm;
1865 snew(*dim_f, dd->nc[dim]+1);
1867 for (i = 1; i < dd->nc[dim]; i++)
1869 if (comm->slb_frac[dim])
1871 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
1875 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
1878 (*dim_f)[dd->nc[dim]] = 1;
1881 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
1883 int pmeindex, slab, nso, i;
1886 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
1892 ddpme->dim = dimind;
1894 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
1896 ddpme->nslab = (ddpme->dim == 0 ?
1897 dd->comm->npmenodes_x :
1898 dd->comm->npmenodes_y);
1900 if (ddpme->nslab <= 1)
1905 nso = dd->comm->npmenodes/ddpme->nslab;
1906 /* Determine for each PME slab the PP location range for dimension dim */
1907 snew(ddpme->pp_min, ddpme->nslab);
1908 snew(ddpme->pp_max, ddpme->nslab);
1909 for (slab = 0; slab < ddpme->nslab; slab++)
1911 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
1912 ddpme->pp_max[slab] = 0;
1914 for (i = 0; i < dd->nnodes; i++)
1916 ddindex2xyz(dd->nc, i, xyz);
1917 /* For y only use our y/z slab.
1918 * This assumes that the PME x grid size matches the DD grid size.
1920 if (dimind == 0 || xyz[XX] == dd->ci[XX])
1922 pmeindex = ddindex2pmeindex(dd, i);
1925 slab = pmeindex/nso;
1929 slab = pmeindex % ddpme->nslab;
1931 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1932 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1936 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1939 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1941 if (dd->comm->ddpme[0].dim == XX)
1943 return dd->comm->ddpme[0].maxshift;
1951 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1953 if (dd->comm->ddpme[0].dim == YY)
1955 return dd->comm->ddpme[0].maxshift;
1957 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
1959 return dd->comm->ddpme[1].maxshift;
1967 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
1969 rvec cell_ns_x0, rvec cell_ns_x1,
1972 gmx_domdec_comm_t *comm;
1977 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
1979 dim = dd->dim[dim_ind];
1981 /* Without PBC we don't have restrictions on the outer cells */
1982 if (!(dim >= ddbox->npbcdim &&
1983 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
1985 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
1986 comm->cellsize_min[dim])
1989 gmx_fatal(FARGS, "step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
1990 gmx_step_str(step, buf), dim2char(dim),
1991 comm->cell_x1[dim] - comm->cell_x0[dim],
1992 ddbox->skew_fac[dim],
1993 dd->comm->cellsize_min[dim],
1994 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1998 if ((isDlbOn(dd->comm) && dd->ndim > 1) || ddbox->nboundeddim < DIM)
2000 /* Communicate the boundaries and update cell_ns_x0/1 */
2001 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
2002 if (isDlbOn(dd->comm) && dd->ndim > 1)
2004 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
2009 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
2011 /* Note that the cycles value can be incorrect, either 0 or some
2012 * extremely large value, when our thread migrated to another core
2013 * with an unsynchronized cycle counter. If this happens less often
2014 * that once per nstlist steps, this will not cause issues, since
2015 * we later subtract the maximum value from the sum over nstlist steps.
2016 * A zero count will slightly lower the total, but that's a small effect.
2017 * Note that the main purpose of the subtraction of the maximum value
2018 * is to avoid throwing off the load balancing when stalls occur due
2019 * e.g. system activity or network congestion.
2021 dd->comm->cycl[ddCycl] += cycles;
2022 dd->comm->cycl_n[ddCycl]++;
2023 if (cycles > dd->comm->cycl_max[ddCycl])
2025 dd->comm->cycl_max[ddCycl] = cycles;
2029 static double force_flop_count(t_nrnb *nrnb)
2036 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
2038 /* To get closer to the real timings, we half the count
2039 * for the normal loops and again half it for water loops.
2042 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
2044 sum += nrnb->n[i]*0.25*cost_nrnb(i);
2048 sum += nrnb->n[i]*0.50*cost_nrnb(i);
2051 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
2054 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
2056 sum += nrnb->n[i]*cost_nrnb(i);
2059 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
2061 sum += nrnb->n[i]*cost_nrnb(i);
2067 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
2069 if (dd->comm->eFlop)
2071 dd->comm->flop -= force_flop_count(nrnb);
2074 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
2076 if (dd->comm->eFlop)
2078 dd->comm->flop += force_flop_count(nrnb);
2083 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
2087 for (i = 0; i < ddCyclNr; i++)
2089 dd->comm->cycl[i] = 0;
2090 dd->comm->cycl_n[i] = 0;
2091 dd->comm->cycl_max[i] = 0;
2094 dd->comm->flop_n = 0;
2097 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
2099 gmx_domdec_comm_t *comm;
2100 domdec_load_t *load;
2101 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
2106 fprintf(debug, "get_load_distribution start\n");
2109 wallcycle_start(wcycle, ewcDDCOMMLOAD);
2113 bSepPME = (dd->pme_nodeid >= 0);
2115 if (dd->ndim == 0 && bSepPME)
2117 /* Without decomposition, but with PME nodes, we need the load */
2118 comm->load[0].mdf = comm->cycl[ddCyclPPduringPME];
2119 comm->load[0].pme = comm->cycl[ddCyclPME];
2122 for (int d = dd->ndim - 1; d >= 0; d--)
2124 const DDCellsizesWithDlb &cellsizes = comm->cellsizesWithDlb[d];
2125 const int dim = dd->dim[d];
2126 /* Check if we participate in the communication in this dimension */
2127 if (d == dd->ndim-1 ||
2128 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
2130 load = &comm->load[d];
2131 if (isDlbOn(dd->comm))
2133 cell_frac = cellsizes.fracUpper - cellsizes.fracLower;
2136 if (d == dd->ndim-1)
2138 sbuf[pos++] = dd_force_load(comm);
2139 sbuf[pos++] = sbuf[0];
2140 if (isDlbOn(dd->comm))
2142 sbuf[pos++] = sbuf[0];
2143 sbuf[pos++] = cell_frac;
2146 sbuf[pos++] = cellsizes.fracLowerMax;
2147 sbuf[pos++] = cellsizes.fracUpperMin;
2152 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
2153 sbuf[pos++] = comm->cycl[ddCyclPME];
2158 sbuf[pos++] = comm->load[d+1].sum;
2159 sbuf[pos++] = comm->load[d+1].max;
2160 if (isDlbOn(dd->comm))
2162 sbuf[pos++] = comm->load[d+1].sum_m;
2163 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
2164 sbuf[pos++] = comm->load[d+1].flags;
2167 sbuf[pos++] = cellsizes.fracLowerMax;
2168 sbuf[pos++] = cellsizes.fracUpperMin;
2173 sbuf[pos++] = comm->load[d+1].mdf;
2174 sbuf[pos++] = comm->load[d+1].pme;
2178 /* Communicate a row in DD direction d.
2179 * The communicators are setup such that the root always has rank 0.
2182 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
2183 load->load, load->nload*sizeof(float), MPI_BYTE,
2184 0, comm->mpi_comm_load[d]);
2186 if (dd->ci[dim] == dd->master_ci[dim])
2188 /* We are the master along this row, process this row */
2189 RowMaster *rowMaster = nullptr;
2193 rowMaster = cellsizes.rowMaster.get();
2203 for (int i = 0; i < dd->nc[dim]; i++)
2205 load->sum += load->load[pos++];
2206 load->max = std::max(load->max, load->load[pos]);
2208 if (isDlbOn(dd->comm))
2210 if (rowMaster->dlbIsLimited)
2212 /* This direction could not be load balanced properly,
2213 * therefore we need to use the maximum iso the average load.
2215 load->sum_m = std::max(load->sum_m, load->load[pos]);
2219 load->sum_m += load->load[pos];
2222 load->cvol_min = std::min(load->cvol_min, load->load[pos]);
2226 load->flags = (int)(load->load[pos++] + 0.5);
2230 rowMaster->bounds[i].cellFracLowerMax = load->load[pos++];
2231 rowMaster->bounds[i].cellFracUpperMin = load->load[pos++];
2236 load->mdf = std::max(load->mdf, load->load[pos]);
2238 load->pme = std::max(load->pme, load->load[pos]);
2242 if (isDlbOn(comm) && rowMaster->dlbIsLimited)
2244 load->sum_m *= dd->nc[dim];
2245 load->flags |= (1<<d);
2253 comm->nload += dd_load_count(comm);
2254 comm->load_step += comm->cycl[ddCyclStep];
2255 comm->load_sum += comm->load[0].sum;
2256 comm->load_max += comm->load[0].max;
2259 for (int d = 0; d < dd->ndim; d++)
2261 if (comm->load[0].flags & (1<<d))
2263 comm->load_lim[d]++;
2269 comm->load_mdf += comm->load[0].mdf;
2270 comm->load_pme += comm->load[0].pme;
2274 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
2278 fprintf(debug, "get_load_distribution finished\n");
2282 static float dd_force_load_fraction(gmx_domdec_t *dd)
2284 /* Return the relative performance loss on the total run time
2285 * due to the force calculation load imbalance.
2287 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
2289 return dd->comm->load_sum/(dd->comm->load_step*dd->nnodes);
2297 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
2299 /* Return the relative performance loss on the total run time
2300 * due to the force calculation load imbalance.
2302 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
2305 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
2306 (dd->comm->load_step*dd->nnodes);
2314 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
2316 gmx_domdec_comm_t *comm = dd->comm;
2318 /* Only the master rank prints loads and only if we measured loads */
2319 if (!DDMASTER(dd) || comm->nload == 0)
2325 int numPpRanks = dd->nnodes;
2326 int numPmeRanks = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
2327 int numRanks = numPpRanks + numPmeRanks;
2328 float lossFraction = 0;
2330 /* Print the average load imbalance and performance loss */
2331 if (dd->nnodes > 1 && comm->load_sum > 0)
2333 float imbalance = comm->load_max*numPpRanks/comm->load_sum - 1;
2334 lossFraction = dd_force_imb_perf_loss(dd);
2336 std::string msg = "\n Dynamic load balancing report:\n";
2337 std::string dlbStateStr;
2339 switch (dd->comm->dlbState)
2342 dlbStateStr = "DLB was off during the run per user request.";
2344 case edlbsOffForever:
2345 /* Currectly this can happen due to performance loss observed, cell size
2346 * limitations or incompatibility with other settings observed during
2347 * determineInitialDlbState(). */
2348 dlbStateStr = "DLB got disabled because it was unsuitable to use.";
2350 case edlbsOffCanTurnOn:
2351 dlbStateStr = "DLB was off during the run due to low measured imbalance.";
2353 case edlbsOffTemporarilyLocked:
2354 dlbStateStr = "DLB was locked at the end of the run due to unfinished PP-PME balancing.";
2356 case edlbsOnCanTurnOff:
2357 dlbStateStr = "DLB was turned on during the run due to measured imbalance.";
2360 dlbStateStr = "DLB was permanently on during the run per user request.";
2363 GMX_ASSERT(false, "Undocumented DLB state");
2366 msg += " " + dlbStateStr + "\n";
2367 msg += gmx::formatString(" Average load imbalance: %.1f%%.\n", imbalance*100);
2368 msg += gmx::formatString(" The balanceable part of the MD step is %d%%, load imbalance is computed from this.\n",
2369 static_cast<int>(dd_force_load_fraction(dd)*100 + 0.5));
2370 msg += gmx::formatString(" Part of the total run time spent waiting due to load imbalance: %.1f%%.\n",
2372 fprintf(fplog, "%s", msg.c_str());
2373 fprintf(stderr, "%s", msg.c_str());
2376 /* Print during what percentage of steps the load balancing was limited */
2377 bool dlbWasLimited = false;
2380 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
2381 for (int d = 0; d < dd->ndim; d++)
2383 int limitPercentage = (200*comm->load_lim[d] + 1)/(2*comm->nload);
2384 sprintf(buf+strlen(buf), " %c %d %%",
2385 dim2char(dd->dim[d]), limitPercentage);
2386 if (limitPercentage >= 50)
2388 dlbWasLimited = true;
2391 sprintf(buf + strlen(buf), "\n");
2392 fprintf(fplog, "%s", buf);
2393 fprintf(stderr, "%s", buf);
2396 /* Print the performance loss due to separate PME - PP rank imbalance */
2397 float lossFractionPme = 0;
2398 if (numPmeRanks > 0 && comm->load_mdf > 0 && comm->load_step > 0)
2400 float pmeForceRatio = comm->load_pme/comm->load_mdf;
2401 lossFractionPme = (comm->load_pme - comm->load_mdf)/comm->load_step;
2402 if (lossFractionPme <= 0)
2404 lossFractionPme *= numPmeRanks/static_cast<float>(numRanks);
2408 lossFractionPme *= numPpRanks/static_cast<float>(numRanks);
2410 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pmeForceRatio);
2411 fprintf(fplog, "%s", buf);
2412 fprintf(stderr, "%s", buf);
2413 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", std::fabs(lossFractionPme)*100);
2414 fprintf(fplog, "%s", buf);
2415 fprintf(stderr, "%s", buf);
2417 fprintf(fplog, "\n");
2418 fprintf(stderr, "\n");
2420 if (lossFraction >= DD_PERF_LOSS_WARN)
2423 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
2424 " in the domain decomposition.\n", lossFraction*100);
2427 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
2429 else if (dlbWasLimited)
2431 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
2433 fprintf(fplog, "%s\n", buf);
2434 fprintf(stderr, "%s\n", buf);
2436 if (numPmeRanks > 0 && std::fabs(lossFractionPme) >= DD_PERF_LOSS_WARN)
2439 "NOTE: %.1f %% performance was lost because the PME ranks\n"
2440 " had %s work to do than the PP ranks.\n"
2441 " You might want to %s the number of PME ranks\n"
2442 " or %s the cut-off and the grid spacing.\n",
2443 std::fabs(lossFractionPme*100),
2444 (lossFractionPme < 0) ? "less" : "more",
2445 (lossFractionPme < 0) ? "decrease" : "increase",
2446 (lossFractionPme < 0) ? "decrease" : "increase");
2447 fprintf(fplog, "%s\n", buf);
2448 fprintf(stderr, "%s\n", buf);
2452 static float dd_vol_min(gmx_domdec_t *dd)
2454 return dd->comm->load[0].cvol_min*dd->nnodes;
2457 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
2459 return dd->comm->load[0].flags;
2462 static float dd_f_imbal(gmx_domdec_t *dd)
2464 if (dd->comm->load[0].sum > 0)
2466 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1.0f;
2470 /* Something is wrong in the cycle counting, report no load imbalance */
2475 float dd_pme_f_ratio(gmx_domdec_t *dd)
2477 /* Should only be called on the DD master rank */
2478 assert(DDMASTER(dd));
2480 if (dd->comm->load[0].mdf > 0 && dd->comm->cycl_n[ddCyclPME] > 0)
2482 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
2490 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, int64_t step)
2495 flags = dd_load_flags(dd);
2499 "DD load balancing is limited by minimum cell size in dimension");
2500 for (d = 0; d < dd->ndim; d++)
2504 fprintf(fplog, " %c", dim2char(dd->dim[d]));
2507 fprintf(fplog, "\n");
2509 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
2510 if (isDlbOn(dd->comm))
2512 fprintf(fplog, " vol min/aver %5.3f%c",
2513 dd_vol_min(dd), flags ? '!' : ' ');
2517 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
2519 if (dd->comm->cycl_n[ddCyclPME])
2521 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
2523 fprintf(fplog, "\n\n");
2526 static void dd_print_load_verbose(gmx_domdec_t *dd)
2528 if (isDlbOn(dd->comm))
2530 fprintf(stderr, "vol %4.2f%c ",
2531 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
2535 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
2537 if (dd->comm->cycl_n[ddCyclPME])
2539 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
2544 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
2549 gmx_bool bPartOfGroup = FALSE;
2551 dim = dd->dim[dim_ind];
2552 copy_ivec(loc, loc_c);
2553 for (i = 0; i < dd->nc[dim]; i++)
2556 rank = dd_index(dd->nc, loc_c);
2557 if (rank == dd->rank)
2559 /* This process is part of the group */
2560 bPartOfGroup = TRUE;
2563 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
2567 dd->comm->mpi_comm_load[dim_ind] = c_row;
2568 if (!isDlbDisabled(dd->comm))
2570 DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
2572 if (dd->ci[dim] == dd->master_ci[dim])
2574 /* This is the root process of this row */
2575 cellsizes.rowMaster = gmx::compat::make_unique<RowMaster>();
2577 RowMaster &rowMaster = *cellsizes.rowMaster;
2578 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
2579 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
2580 rowMaster.isCellMin.resize(dd->nc[dim]);
2583 rowMaster.bounds.resize(dd->nc[dim]);
2585 rowMaster.buf_ncd.resize(dd->nc[dim]);
2589 /* This is not a root process, we only need to receive cell_f */
2590 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
2593 if (dd->ci[dim] == dd->master_ci[dim])
2595 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
2601 void dd_setup_dlb_resource_sharing(t_commrec *cr,
2605 int physicalnode_id_hash;
2607 MPI_Comm mpi_comm_pp_physicalnode;
2609 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
2611 /* Only ranks with short-ranged tasks (currently) use GPUs.
2612 * If we don't have GPUs assigned, there are no resources to share.
2617 physicalnode_id_hash = gmx_physicalnode_id_hash();
2623 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
2624 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
2625 dd->rank, physicalnode_id_hash, gpu_id);
2627 /* Split the PP communicator over the physical nodes */
2628 /* TODO: See if we should store this (before), as it's also used for
2629 * for the nodecomm summation.
2631 // TODO PhysicalNodeCommunicator could be extended/used to handle
2632 // the need for per-node per-group communicators.
2633 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
2634 &mpi_comm_pp_physicalnode);
2635 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
2636 &dd->comm->mpi_comm_gpu_shared);
2637 MPI_Comm_free(&mpi_comm_pp_physicalnode);
2638 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
2642 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
2645 /* Note that some ranks could share a GPU, while others don't */
2647 if (dd->comm->nrank_gpu_shared == 1)
2649 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
2652 GMX_UNUSED_VALUE(cr);
2653 GMX_UNUSED_VALUE(gpu_id);
2657 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
2660 int dim0, dim1, i, j;
2665 fprintf(debug, "Making load communicators\n");
2668 snew(dd->comm->load, std::max(dd->ndim, 1));
2669 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
2677 make_load_communicator(dd, 0, loc);
2681 for (i = 0; i < dd->nc[dim0]; i++)
2684 make_load_communicator(dd, 1, loc);
2690 for (i = 0; i < dd->nc[dim0]; i++)
2694 for (j = 0; j < dd->nc[dim1]; j++)
2697 make_load_communicator(dd, 2, loc);
2704 fprintf(debug, "Finished making load communicators\n");
2709 /*! \brief Sets up the relation between neighboring domains and zones */
2710 static void setup_neighbor_relations(gmx_domdec_t *dd)
2712 int d, dim, i, j, m;
2714 gmx_domdec_zones_t *zones;
2715 gmx_domdec_ns_ranges_t *izone;
2717 for (d = 0; d < dd->ndim; d++)
2720 copy_ivec(dd->ci, tmp);
2721 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
2722 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
2723 copy_ivec(dd->ci, tmp);
2724 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
2725 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
2728 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
2731 dd->neighbor[d][1]);
2735 int nzone = (1 << dd->ndim);
2736 int nizone = (1 << std::max(dd->ndim - 1, 0));
2737 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
2739 zones = &dd->comm->zones;
2741 for (i = 0; i < nzone; i++)
2744 clear_ivec(zones->shift[i]);
2745 for (d = 0; d < dd->ndim; d++)
2747 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
2752 for (i = 0; i < nzone; i++)
2754 for (d = 0; d < DIM; d++)
2756 s[d] = dd->ci[d] - zones->shift[i][d];
2761 else if (s[d] >= dd->nc[d])
2767 zones->nizone = nizone;
2768 for (i = 0; i < zones->nizone; i++)
2770 assert(ddNonbondedZonePairRanges[i][0] == i);
2772 izone = &zones->izone[i];
2773 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
2774 * j-zones up to nzone.
2776 izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
2777 izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
2778 for (dim = 0; dim < DIM; dim++)
2780 if (dd->nc[dim] == 1)
2782 /* All shifts should be allowed */
2783 izone->shift0[dim] = -1;
2784 izone->shift1[dim] = 1;
2788 /* Determine the min/max j-zone shift wrt the i-zone */
2789 izone->shift0[dim] = 1;
2790 izone->shift1[dim] = -1;
2791 for (j = izone->j0; j < izone->j1; j++)
2793 int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
2794 if (shift_diff < izone->shift0[dim])
2796 izone->shift0[dim] = shift_diff;
2798 if (shift_diff > izone->shift1[dim])
2800 izone->shift1[dim] = shift_diff;
2807 if (!isDlbDisabled(dd->comm))
2809 dd->comm->cellsizesWithDlb.resize(dd->ndim);
2812 if (dd->comm->bRecordLoad)
2814 make_load_communicators(dd);
2818 static void make_pp_communicator(FILE *fplog,
2820 t_commrec gmx_unused *cr,
2821 int gmx_unused reorder)
2824 gmx_domdec_comm_t *comm;
2831 if (comm->bCartesianPP)
2833 /* Set up cartesian communication for the particle-particle part */
2836 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
2837 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
2840 for (int i = 0; i < DIM; i++)
2844 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
2846 /* We overwrite the old communicator with the new cartesian one */
2847 cr->mpi_comm_mygroup = comm_cart;
2850 dd->mpi_comm_all = cr->mpi_comm_mygroup;
2851 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
2853 if (comm->bCartesianPP_PME)
2855 /* Since we want to use the original cartesian setup for sim,
2856 * and not the one after split, we need to make an index.
2858 snew(comm->ddindex2ddnodeid, dd->nnodes);
2859 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
2860 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
2861 /* Get the rank of the DD master,
2862 * above we made sure that the master node is a PP node.
2872 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
2874 else if (comm->bCartesianPP)
2876 if (cr->npmenodes == 0)
2878 /* The PP communicator is also
2879 * the communicator for this simulation
2881 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
2883 cr->nodeid = dd->rank;
2885 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
2887 /* We need to make an index to go from the coordinates
2888 * to the nodeid of this simulation.
2890 snew(comm->ddindex2simnodeid, dd->nnodes);
2891 snew(buf, dd->nnodes);
2892 if (thisRankHasDuty(cr, DUTY_PP))
2894 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
2896 /* Communicate the ddindex to simulation nodeid index */
2897 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
2898 cr->mpi_comm_mysim);
2901 /* Determine the master coordinates and rank.
2902 * The DD master should be the same node as the master of this sim.
2904 for (int i = 0; i < dd->nnodes; i++)
2906 if (comm->ddindex2simnodeid[i] == 0)
2908 ddindex2xyz(dd->nc, i, dd->master_ci);
2909 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
2914 fprintf(debug, "The master rank is %d\n", dd->masterrank);
2919 /* No Cartesian communicators */
2920 /* We use the rank in dd->comm->all as DD index */
2921 ddindex2xyz(dd->nc, dd->rank, dd->ci);
2922 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
2924 clear_ivec(dd->master_ci);
2931 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
2932 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2937 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
2938 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2942 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
2946 gmx_domdec_comm_t *comm = dd->comm;
2948 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
2951 snew(comm->ddindex2simnodeid, dd->nnodes);
2952 snew(buf, dd->nnodes);
2953 if (thisRankHasDuty(cr, DUTY_PP))
2955 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
2957 /* Communicate the ddindex to simulation nodeid index */
2958 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
2959 cr->mpi_comm_mysim);
2963 GMX_UNUSED_VALUE(dd);
2964 GMX_UNUSED_VALUE(cr);
2968 static void split_communicator(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
2969 DdRankOrder gmx_unused rankOrder,
2970 int gmx_unused reorder)
2972 gmx_domdec_comm_t *comm;
2981 if (comm->bCartesianPP)
2983 for (i = 1; i < DIM; i++)
2985 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
2987 if (bDiv[YY] || bDiv[ZZ])
2989 comm->bCartesianPP_PME = TRUE;
2990 /* If we have 2D PME decomposition, which is always in x+y,
2991 * we stack the PME only nodes in z.
2992 * Otherwise we choose the direction that provides the thinnest slab
2993 * of PME only nodes as this will have the least effect
2994 * on the PP communication.
2995 * But for the PME communication the opposite might be better.
2997 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
2999 dd->nc[YY] > dd->nc[ZZ]))
3001 comm->cartpmedim = ZZ;
3005 comm->cartpmedim = YY;
3007 comm->ntot[comm->cartpmedim]
3008 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
3012 fprintf(fplog, "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
3014 "Will not use a Cartesian communicator for PP <-> PME\n\n");
3018 if (comm->bCartesianPP_PME)
3026 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
3029 for (i = 0; i < DIM; i++)
3033 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
3035 MPI_Comm_rank(comm_cart, &rank);
3036 if (MASTER(cr) && rank != 0)
3038 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
3041 /* With this assigment we loose the link to the original communicator
3042 * which will usually be MPI_COMM_WORLD, unless have multisim.
3044 cr->mpi_comm_mysim = comm_cart;
3045 cr->sim_nodeid = rank;
3047 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
3051 fprintf(fplog, "Cartesian rank %d, coordinates %d %d %d\n\n",
3052 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3055 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
3059 if (cr->npmenodes == 0 ||
3060 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
3062 cr->duty = DUTY_PME;
3065 /* Split the sim communicator into PP and PME only nodes */
3066 MPI_Comm_split(cr->mpi_comm_mysim,
3067 getThisRankDuties(cr),
3068 dd_index(comm->ntot, dd->ci),
3069 &cr->mpi_comm_mygroup);
3076 case DdRankOrder::pp_pme:
3079 fprintf(fplog, "Order of the ranks: PP first, PME last\n");
3082 case DdRankOrder::interleave:
3083 /* Interleave the PP-only and PME-only ranks */
3086 fprintf(fplog, "Interleaving PP and PME ranks\n");
3088 comm->pmenodes = dd_interleaved_pme_ranks(dd);
3090 case DdRankOrder::cartesian:
3093 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(rankOrder));
3096 if (dd_simnode2pmenode(dd, cr, cr->sim_nodeid) == -1)
3098 cr->duty = DUTY_PME;
3105 /* Split the sim communicator into PP and PME only nodes */
3106 MPI_Comm_split(cr->mpi_comm_mysim,
3107 getThisRankDuties(cr),
3109 &cr->mpi_comm_mygroup);
3110 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
3116 fprintf(fplog, "This rank does only %s work.\n\n",
3117 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
3121 /*! \brief Generates the MPI communicators for domain decomposition */
3122 static void make_dd_communicators(FILE *fplog, t_commrec *cr,
3123 gmx_domdec_t *dd, DdRankOrder ddRankOrder)
3125 gmx_domdec_comm_t *comm;
3130 copy_ivec(dd->nc, comm->ntot);
3132 comm->bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
3133 comm->bCartesianPP_PME = FALSE;
3135 /* Reorder the nodes by default. This might change the MPI ranks.
3136 * Real reordering is only supported on very few architectures,
3137 * Blue Gene is one of them.
3139 CartReorder = (getenv("GMX_NO_CART_REORDER") == nullptr);
3141 if (cr->npmenodes > 0)
3143 /* Split the communicator into a PP and PME part */
3144 split_communicator(fplog, cr, dd, ddRankOrder, CartReorder);
3145 if (comm->bCartesianPP_PME)
3147 /* We (possibly) reordered the nodes in split_communicator,
3148 * so it is no longer required in make_pp_communicator.
3150 CartReorder = FALSE;
3155 /* All nodes do PP and PME */
3157 /* We do not require separate communicators */
3158 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
3162 if (thisRankHasDuty(cr, DUTY_PP))
3164 /* Copy or make a new PP communicator */
3165 make_pp_communicator(fplog, dd, cr, CartReorder);
3169 receive_ddindex2simnodeid(dd, cr);
3172 if (!thisRankHasDuty(cr, DUTY_PME))
3174 /* Set up the commnuication to our PME node */
3175 dd->pme_nodeid = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
3176 dd->pme_receive_vir_ener = receive_vir_ener(dd, cr);
3179 fprintf(debug, "My pme_nodeid %d receive ener %s\n",
3180 dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
3185 dd->pme_nodeid = -1;
3190 dd->ma = gmx::compat::make_unique<AtomDistribution>(dd->nc,
3192 comm->cgs_gl.index[comm->cgs_gl.nr]);
3196 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
3198 real *slb_frac, tot;
3203 if (nc > 1 && size_string != nullptr)
3207 fprintf(fplog, "Using static load balancing for the %s direction\n",
3212 for (i = 0; i < nc; i++)
3215 sscanf(size_string, "%20lf%n", &dbl, &n);
3218 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
3227 fprintf(fplog, "Relative cell sizes:");
3229 for (i = 0; i < nc; i++)
3234 fprintf(fplog, " %5.3f", slb_frac[i]);
3239 fprintf(fplog, "\n");
3246 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
3249 gmx_mtop_ilistloop_t iloop;
3253 iloop = gmx_mtop_ilistloop_init(mtop);
3254 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
3256 for (ftype = 0; ftype < F_NRE; ftype++)
3258 if ((interaction_function[ftype].flags & IF_BOND) &&
3261 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
3269 static int dd_getenv(FILE *fplog, const char *env_var, int def)
3275 val = getenv(env_var);
3278 if (sscanf(val, "%20d", &nst) <= 0)
3284 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
3292 static void dd_warning(const t_commrec *cr, FILE *fplog, const char *warn_string)
3296 fprintf(stderr, "\n%s\n", warn_string);
3300 fprintf(fplog, "\n%s\n", warn_string);
3304 static void check_dd_restrictions(t_commrec *cr, const gmx_domdec_t *dd,
3305 const t_inputrec *ir, FILE *fplog)
3307 if (ir->ePBC == epbcSCREW &&
3308 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
3310 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
3313 if (ir->ns_type == ensSIMPLE)
3315 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
3318 if (ir->nstlist == 0)
3320 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
3323 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
3325 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
3329 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3334 r = ddbox->box_size[XX];
3335 for (di = 0; di < dd->ndim; di++)
3338 /* Check using the initial average cell size */
3339 r = std::min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
3345 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
3347 static int forceDlbOffOrBail(int cmdlineDlbState,
3348 const std::string &reasonStr,
3352 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
3353 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
3355 if (cmdlineDlbState == edlbsOnUser)
3357 gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
3359 else if (cmdlineDlbState == edlbsOffCanTurnOn)
3361 dd_warning(cr, fplog, (dlbDisableNote + reasonStr + "\n").c_str());
3363 return edlbsOffForever;
3366 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
3368 * This function parses the parameters of "-dlb" command line option setting
3369 * corresponding state values. Then it checks the consistency of the determined
3370 * state with other run parameters and settings. As a result, the initial state
3371 * may be altered or an error may be thrown if incompatibility of options is detected.
3373 * \param [in] fplog Pointer to mdrun log file.
3374 * \param [in] cr Pointer to MPI communication object.
3375 * \param [in] dlbOption Enum value for the DLB option.
3376 * \param [in] bRecordLoad True if the load balancer is recording load information.
3377 * \param [in] mdrunOptions Options for mdrun.
3378 * \param [in] ir Pointer mdrun to input parameters.
3379 * \returns DLB initial/startup state.
3381 static int determineInitialDlbState(FILE *fplog, t_commrec *cr,
3382 DlbOption dlbOption, gmx_bool bRecordLoad,
3383 const MdrunOptions &mdrunOptions,
3384 const t_inputrec *ir)
3386 int dlbState = edlbsOffCanTurnOn;
3390 case DlbOption::turnOnWhenUseful: dlbState = edlbsOffCanTurnOn; break;
3391 case DlbOption::no: dlbState = edlbsOffUser; break;
3392 case DlbOption::yes: dlbState = edlbsOnUser; break;
3393 default: gmx_incons("Invalid dlbOption enum value");
3396 /* Reruns don't support DLB: bail or override auto mode */
3397 if (mdrunOptions.rerun)
3399 std::string reasonStr = "it is not supported in reruns.";
3400 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3403 /* Unsupported integrators */
3404 if (!EI_DYNAMICS(ir->eI))
3406 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
3407 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3410 /* Without cycle counters we can't time work to balance on */
3413 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
3414 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3417 if (mdrunOptions.reproducible)
3419 std::string reasonStr = "you started a reproducible run.";
3424 case edlbsOffForever:
3425 GMX_RELEASE_ASSERT(false, "edlbsOffForever is not a valid initial state");
3427 case edlbsOffCanTurnOn:
3428 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3429 case edlbsOnCanTurnOff:
3430 GMX_RELEASE_ASSERT(false, "edlbsOffCanTurnOff is not a valid initial state");
3433 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", cr, fplog);
3435 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", dlbState);
3442 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
3445 if (getenv("GMX_DD_ORDER_ZYX") != nullptr)
3447 /* Decomposition order z,y,x */
3450 fprintf(fplog, "Using domain decomposition order z, y, x\n");
3452 for (int dim = DIM-1; dim >= 0; dim--)
3454 if (dd->nc[dim] > 1)
3456 dd->dim[dd->ndim++] = dim;
3462 /* Decomposition order x,y,z */
3463 for (int dim = 0; dim < DIM; dim++)
3465 if (dd->nc[dim] > 1)
3467 dd->dim[dd->ndim++] = dim;
3474 /* Set dim[0] to avoid extra checks on ndim in several places */
3479 static gmx_domdec_comm_t *init_dd_comm()
3481 gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
3483 comm->n_load_have = 0;
3484 comm->n_load_collect = 0;
3486 comm->haveTurnedOffDlb = false;
3488 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
3490 comm->sum_nat[i] = 0;
3494 comm->load_step = 0;
3497 clear_ivec(comm->load_lim);
3501 /* This should be replaced by a unique pointer */
3502 comm->balanceRegion = ddBalanceRegionAllocate();
3507 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
3508 static void set_dd_limits_and_grid(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
3509 const DomdecOptions &options,
3510 const MdrunOptions &mdrunOptions,
3511 const gmx_mtop_t *mtop,
3512 const t_inputrec *ir,
3514 gmx::ArrayRef<const gmx::RVec> xGlobal,
3518 real r_bonded_limit = -1;
3519 const real tenPercentMargin = 1.1;
3520 gmx_domdec_comm_t *comm = dd->comm;
3522 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
3523 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
3525 dd->pme_recv_f_alloc = 0;
3526 dd->pme_recv_f_buf = nullptr;
3528 /* Initialize to GPU share count to 0, might change later */
3529 comm->nrank_gpu_shared = 0;
3531 comm->dlbState = determineInitialDlbState(fplog, cr, options.dlbOption, comm->bRecordLoad, mdrunOptions, ir);
3532 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
3533 /* To consider turning DLB on after 2*nstlist steps we need to check
3534 * at partitioning count 3. Thus we need to increase the first count by 2.
3536 comm->ddPartioningCountFirstDlbOff += 2;
3540 fprintf(fplog, "Dynamic load balancing: %s\n",
3541 edlbs_names[comm->dlbState]);
3543 comm->bPMELoadBalDLBLimits = FALSE;
3545 /* Allocate the charge group/atom sorting struct */
3546 comm->sort = gmx::compat::make_unique<gmx_domdec_sort_t>();
3548 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
3550 comm->bInterCGBondeds = ((ncg_mtop(mtop) > gmx_mtop_num_molecules(*mtop)) ||
3551 mtop->bIntermolecularInteractions);
3552 if (comm->bInterCGBondeds)
3554 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
3558 comm->bInterCGMultiBody = FALSE;
3561 dd->bInterCGcons = gmx::inter_charge_group_constraints(*mtop);
3562 dd->bInterCGsettles = gmx::inter_charge_group_settles(*mtop);
3566 /* Set the cut-off to some very large value,
3567 * so we don't need if statements everywhere in the code.
3568 * We use sqrt, since the cut-off is squared in some places.
3570 comm->cutoff = GMX_CUTOFF_INF;
3574 comm->cutoff = ir->rlist;
3576 comm->cutoff_mbody = 0;
3578 comm->cellsize_limit = 0;
3579 comm->bBondComm = FALSE;
3581 /* Atoms should be able to move by up to half the list buffer size (if > 0)
3582 * within nstlist steps. Since boundaries are allowed to displace by half
3583 * a cell size, DD cells should be at least the size of the list buffer.
3585 comm->cellsize_limit = std::max(comm->cellsize_limit,
3586 ir->rlist - std::max(ir->rvdw, ir->rcoulomb));
3588 if (comm->bInterCGBondeds)
3590 if (options.minimumCommunicationRange > 0)
3592 comm->cutoff_mbody = options.minimumCommunicationRange;
3593 if (options.useBondedCommunication)
3595 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
3599 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
3601 r_bonded_limit = comm->cutoff_mbody;
3603 else if (ir->bPeriodicMols)
3605 /* Can not easily determine the required cut-off */
3606 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
3607 comm->cutoff_mbody = comm->cutoff/2;
3608 r_bonded_limit = comm->cutoff_mbody;
3616 dd_bonded_cg_distance(fplog, mtop, ir, as_rvec_array(xGlobal.data()), box,
3617 options.checkBondedInteractions,
3620 gmx_bcast(sizeof(r_2b), &r_2b, cr);
3621 gmx_bcast(sizeof(r_mb), &r_mb, cr);
3623 /* We use an initial margin of 10% for the minimum cell size,
3624 * except when we are just below the non-bonded cut-off.
3626 if (options.useBondedCommunication)
3628 if (std::max(r_2b, r_mb) > comm->cutoff)
3630 r_bonded = std::max(r_2b, r_mb);
3631 r_bonded_limit = tenPercentMargin*r_bonded;
3632 comm->bBondComm = TRUE;
3637 r_bonded_limit = std::min(tenPercentMargin*r_bonded, comm->cutoff);
3639 /* We determine cutoff_mbody later */
3643 /* No special bonded communication,
3644 * simply increase the DD cut-off.
3646 r_bonded_limit = tenPercentMargin*std::max(r_2b, r_mb);
3647 comm->cutoff_mbody = r_bonded_limit;
3648 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
3654 "Minimum cell size due to bonded interactions: %.3f nm\n",
3657 comm->cellsize_limit = std::max(comm->cellsize_limit, r_bonded_limit);
3661 if (dd->bInterCGcons && options.constraintCommunicationRange <= 0)
3663 /* There is a cell size limit due to the constraints (P-LINCS) */
3664 rconstr = gmx::constr_r_max(fplog, mtop, ir);
3668 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
3670 if (rconstr > comm->cellsize_limit)
3672 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
3676 else if (options.constraintCommunicationRange > 0 && fplog)
3678 /* Here we do not check for dd->bInterCGcons,
3679 * because one can also set a cell size limit for virtual sites only
3680 * and at this point we don't know yet if there are intercg v-sites.
3683 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
3684 options.constraintCommunicationRange);
3685 rconstr = options.constraintCommunicationRange;
3687 comm->cellsize_limit = std::max(comm->cellsize_limit, rconstr);
3689 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
3691 if (options.numCells[XX] > 0)
3693 copy_ivec(options.numCells, dd->nc);
3694 set_dd_dim(fplog, dd);
3695 set_ddbox_cr(cr, &dd->nc, ir, box, xGlobal, ddbox);
3697 if (options.numPmeRanks >= 0)
3699 cr->npmenodes = options.numPmeRanks;
3703 /* When the DD grid is set explicitly and -npme is set to auto,
3704 * don't use PME ranks. We check later if the DD grid is
3705 * compatible with the total number of ranks.
3710 real acs = average_cellsize_min(dd, ddbox);
3711 if (acs < comm->cellsize_limit)
3715 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
3717 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3718 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
3719 acs, comm->cellsize_limit);
3724 set_ddbox_cr(cr, nullptr, ir, box, xGlobal, ddbox);
3726 /* We need to choose the optimal DD grid and possibly PME nodes */
3728 dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
3729 options.numPmeRanks,
3730 !isDlbDisabled(comm),
3732 comm->cellsize_limit, comm->cutoff,
3733 comm->bInterCGBondeds);
3735 if (dd->nc[XX] == 0)
3738 gmx_bool bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
3739 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
3740 !bC ? "-rdd" : "-rcon",
3741 comm->dlbState != edlbsOffUser ? " or -dds" : "",
3742 bC ? " or your LINCS settings" : "");
3744 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3745 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
3747 "Look in the log file for details on the domain decomposition",
3748 cr->nnodes-cr->npmenodes, limit, buf);
3750 set_dd_dim(fplog, dd);
3756 "Domain decomposition grid %d x %d x %d, separate PME ranks %d\n",
3757 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
3760 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
3761 if (cr->nnodes - dd->nnodes != cr->npmenodes)
3763 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3764 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
3765 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
3767 if (cr->npmenodes > dd->nnodes)
3769 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3770 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
3772 if (cr->npmenodes > 0)
3774 comm->npmenodes = cr->npmenodes;
3778 comm->npmenodes = dd->nnodes;
3781 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
3783 /* The following choices should match those
3784 * in comm_cost_est in domdec_setup.c.
3785 * Note that here the checks have to take into account
3786 * that the decomposition might occur in a different order than xyz
3787 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
3788 * in which case they will not match those in comm_cost_est,
3789 * but since that is mainly for testing purposes that's fine.
3791 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
3792 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
3793 getenv("GMX_PMEONEDD") == nullptr)
3795 comm->npmedecompdim = 2;
3796 comm->npmenodes_x = dd->nc[XX];
3797 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
3801 /* In case nc is 1 in both x and y we could still choose to
3802 * decompose pme in y instead of x, but we use x for simplicity.
3804 comm->npmedecompdim = 1;
3805 if (dd->dim[0] == YY)
3807 comm->npmenodes_x = 1;
3808 comm->npmenodes_y = comm->npmenodes;
3812 comm->npmenodes_x = comm->npmenodes;
3813 comm->npmenodes_y = 1;
3818 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
3819 comm->npmenodes_x, comm->npmenodes_y, 1);
3824 comm->npmedecompdim = 0;
3825 comm->npmenodes_x = 0;
3826 comm->npmenodes_y = 0;
3829 snew(comm->slb_frac, DIM);
3830 if (isDlbDisabled(comm))
3832 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], options.cellSizeX);
3833 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], options.cellSizeY);
3834 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], options.cellSizeZ);
3837 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
3839 if (comm->bBondComm || !isDlbDisabled(comm))
3841 /* Set the bonded communication distance to halfway
3842 * the minimum and the maximum,
3843 * since the extra communication cost is nearly zero.
3845 real acs = average_cellsize_min(dd, ddbox);
3846 comm->cutoff_mbody = 0.5*(r_bonded + acs);
3847 if (!isDlbDisabled(comm))
3849 /* Check if this does not limit the scaling */
3850 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
3851 options.dlbScaling*acs);
3853 if (!comm->bBondComm)
3855 /* Without bBondComm do not go beyond the n.b. cut-off */
3856 comm->cutoff_mbody = std::min(comm->cutoff_mbody, comm->cutoff);
3857 if (comm->cellsize_limit >= comm->cutoff)
3859 /* We don't loose a lot of efficieny
3860 * when increasing it to the n.b. cut-off.
3861 * It can even be slightly faster, because we need
3862 * less checks for the communication setup.
3864 comm->cutoff_mbody = comm->cutoff;
3867 /* Check if we did not end up below our original limit */
3868 comm->cutoff_mbody = std::max(comm->cutoff_mbody, r_bonded_limit);
3870 if (comm->cutoff_mbody > comm->cellsize_limit)
3872 comm->cellsize_limit = comm->cutoff_mbody;
3875 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
3880 fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
3881 "cellsize limit %f\n",
3882 gmx::boolToString(comm->bBondComm), comm->cellsize_limit);
3887 check_dd_restrictions(cr, dd, ir, fplog);
3891 static void set_dlb_limits(gmx_domdec_t *dd)
3894 for (int d = 0; d < dd->ndim; d++)
3896 /* Set the number of pulses to the value for DLB */
3897 dd->comm->cd[d].ind.resize(dd->comm->cd[d].np_dlb);
3899 dd->comm->cellsize_min[dd->dim[d]] =
3900 dd->comm->cellsize_min_dlb[dd->dim[d]];
3905 static void turn_on_dlb(FILE *fplog, const t_commrec *cr, int64_t step)
3907 gmx_domdec_t *dd = cr->dd;
3908 gmx_domdec_comm_t *comm = dd->comm;
3910 real cellsize_min = comm->cellsize_min[dd->dim[0]];
3911 for (int d = 1; d < dd->ndim; d++)
3913 cellsize_min = std::min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
3916 /* Turn off DLB if we're too close to the cell size limit. */
3917 if (cellsize_min < comm->cellsize_limit*1.05)
3919 auto str = gmx::formatString("step %" PRId64 " Measured %.1f %% performance loss due to load imbalance, "
3920 "but the minimum cell size is smaller than 1.05 times the cell size limit."
3921 "Will no longer try dynamic load balancing.\n", step, dd_force_imb_perf_loss(dd)*100);
3922 dd_warning(cr, fplog, str.c_str());
3924 comm->dlbState = edlbsOffForever;
3929 sprintf(buf, "step %" PRId64 " Turning on dynamic load balancing, because the performance loss due to load imbalance is %.1f %%.\n", step, dd_force_imb_perf_loss(dd)*100);
3930 dd_warning(cr, fplog, buf);
3931 comm->dlbState = edlbsOnCanTurnOff;
3933 /* Store the non-DLB performance, so we can check if DLB actually
3934 * improves performance.
3936 GMX_RELEASE_ASSERT(comm->cycl_n[ddCyclStep] > 0, "When we turned on DLB, we should have measured cycles");
3937 comm->cyclesPerStepBeforeDLB = comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
3941 /* We can set the required cell size info here,
3942 * so we do not need to communicate this.
3943 * The grid is completely uniform.
3945 for (int d = 0; d < dd->ndim; d++)
3947 RowMaster *rowMaster = comm->cellsizesWithDlb[d].rowMaster.get();
3951 comm->load[d].sum_m = comm->load[d].sum;
3953 int nc = dd->nc[dd->dim[d]];
3954 for (int i = 0; i < nc; i++)
3956 rowMaster->cellFrac[i] = i/static_cast<real>(nc);
3959 rowMaster->bounds[i].cellFracLowerMax = i /static_cast<real>(nc);
3960 rowMaster->bounds[i].cellFracUpperMin = (i + 1)/static_cast<real>(nc);
3963 rowMaster->cellFrac[nc] = 1.0;
3968 static void turn_off_dlb(FILE *fplog, const t_commrec *cr, int64_t step)
3970 gmx_domdec_t *dd = cr->dd;
3973 sprintf(buf, "step %" PRId64 " Turning off dynamic load balancing, because it is degrading performance.\n", step);
3974 dd_warning(cr, fplog, buf);
3975 dd->comm->dlbState = edlbsOffCanTurnOn;
3976 dd->comm->haveTurnedOffDlb = true;
3977 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
3980 static void turn_off_dlb_forever(FILE *fplog, const t_commrec *cr, int64_t step)
3982 GMX_RELEASE_ASSERT(cr->dd->comm->dlbState == edlbsOffCanTurnOn, "Can only turn off DLB forever when it was in the can-turn-on state");
3984 sprintf(buf, "step %" PRId64 " Will no longer try dynamic load balancing, as it degraded performance.\n", step);
3985 dd_warning(cr, fplog, buf);
3986 cr->dd->comm->dlbState = edlbsOffForever;
3989 static char *init_bLocalCG(const gmx_mtop_t *mtop)
3994 ncg = ncg_mtop(mtop);
3995 snew(bLocalCG, ncg);
3996 for (cg = 0; cg < ncg; cg++)
3998 bLocalCG[cg] = FALSE;
4004 void dd_init_bondeds(FILE *fplog,
4006 const gmx_mtop_t *mtop,
4007 const gmx_vsite_t *vsite,
4008 const t_inputrec *ir,
4009 gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
4011 gmx_domdec_comm_t *comm;
4013 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
4017 if (comm->bBondComm)
4019 /* Communicate atoms beyond the cut-off for bonded interactions */
4022 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
4024 comm->bLocalCG = init_bLocalCG(mtop);
4028 /* Only communicate atoms based on cut-off */
4029 comm->cglink = nullptr;
4030 comm->bLocalCG = nullptr;
4034 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
4035 const gmx_mtop_t *mtop, const t_inputrec *ir,
4036 gmx_bool bDynLoadBal, real dlb_scale,
4037 const gmx_ddbox_t *ddbox)
4039 gmx_domdec_comm_t *comm;
4045 if (fplog == nullptr)
4054 fprintf(fplog, "The maximum number of communication pulses is:");
4055 for (d = 0; d < dd->ndim; d++)
4057 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
4059 fprintf(fplog, "\n");
4060 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
4061 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
4062 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
4063 for (d = 0; d < DIM; d++)
4067 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
4074 comm->cellsize_min_dlb[d]/
4075 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
4077 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
4080 fprintf(fplog, "\n");
4084 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
4085 fprintf(fplog, "The initial number of communication pulses is:");
4086 for (d = 0; d < dd->ndim; d++)
4088 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
4090 fprintf(fplog, "\n");
4091 fprintf(fplog, "The initial domain decomposition cell size is:");
4092 for (d = 0; d < DIM; d++)
4096 fprintf(fplog, " %c %.2f nm",
4097 dim2char(d), dd->comm->cellsize_min[d]);
4100 fprintf(fplog, "\n\n");
4103 gmx_bool bInterCGVsites = count_intercg_vsites(mtop);
4105 if (comm->bInterCGBondeds ||
4107 dd->bInterCGcons || dd->bInterCGsettles)
4109 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
4110 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4111 "non-bonded interactions", "", comm->cutoff);
4115 limit = dd->comm->cellsize_limit;
4119 if (dynamic_dd_box(ddbox, ir))
4121 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
4123 limit = dd->comm->cellsize_min[XX];
4124 for (d = 1; d < DIM; d++)
4126 limit = std::min(limit, dd->comm->cellsize_min[d]);
4130 if (comm->bInterCGBondeds)
4132 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4133 "two-body bonded interactions", "(-rdd)",
4134 std::max(comm->cutoff, comm->cutoff_mbody));
4135 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4136 "multi-body bonded interactions", "(-rdd)",
4137 (comm->bBondComm || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
4141 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4142 "virtual site constructions", "(-rcon)", limit);
4144 if (dd->bInterCGcons || dd->bInterCGsettles)
4146 sprintf(buf, "atoms separated by up to %d constraints",
4148 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4149 buf, "(-rcon)", limit);
4151 fprintf(fplog, "\n");
4157 static void set_cell_limits_dlb(gmx_domdec_t *dd,
4159 const t_inputrec *ir,
4160 const gmx_ddbox_t *ddbox)
4162 gmx_domdec_comm_t *comm;
4163 int d, dim, npulse, npulse_d_max, npulse_d;
4168 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
4170 /* Determine the maximum number of comm. pulses in one dimension */
4172 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
4174 /* Determine the maximum required number of grid pulses */
4175 if (comm->cellsize_limit >= comm->cutoff)
4177 /* Only a single pulse is required */
4180 else if (!bNoCutOff && comm->cellsize_limit > 0)
4182 /* We round down slightly here to avoid overhead due to the latency
4183 * of extra communication calls when the cut-off
4184 * would be only slightly longer than the cell size.
4185 * Later cellsize_limit is redetermined,
4186 * so we can not miss interactions due to this rounding.
4188 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
4192 /* There is no cell size limit */
4193 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
4196 if (!bNoCutOff && npulse > 1)
4198 /* See if we can do with less pulses, based on dlb_scale */
4200 for (d = 0; d < dd->ndim; d++)
4203 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
4204 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
4205 npulse_d_max = std::max(npulse_d_max, npulse_d);
4207 npulse = std::min(npulse, npulse_d_max);
4210 /* This env var can override npulse */
4211 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
4218 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
4219 for (d = 0; d < dd->ndim; d++)
4221 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
4222 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
4223 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
4225 comm->bVacDLBNoLimit = FALSE;
4229 /* cellsize_limit is set for LINCS in init_domain_decomposition */
4230 if (!comm->bVacDLBNoLimit)
4232 comm->cellsize_limit = std::max(comm->cellsize_limit,
4233 comm->cutoff/comm->maxpulse);
4235 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
4236 /* Set the minimum cell size for each DD dimension */
4237 for (d = 0; d < dd->ndim; d++)
4239 if (comm->bVacDLBNoLimit ||
4240 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
4242 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
4246 comm->cellsize_min_dlb[dd->dim[d]] =
4247 comm->cutoff/comm->cd[d].np_dlb;
4250 if (comm->cutoff_mbody <= 0)
4252 comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
4260 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
4262 /* If each molecule is a single charge group
4263 * or we use domain decomposition for each periodic dimension,
4264 * we do not need to take pbc into account for the bonded interactions.
4266 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
4269 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
4272 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
4273 static void set_ddgrid_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
4274 const gmx_mtop_t *mtop, const t_inputrec *ir,
4275 const gmx_ddbox_t *ddbox)
4277 gmx_domdec_comm_t *comm;
4283 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
4285 init_ddpme(dd, &comm->ddpme[0], 0);
4286 if (comm->npmedecompdim >= 2)
4288 init_ddpme(dd, &comm->ddpme[1], 1);
4293 comm->npmenodes = 0;
4294 if (dd->pme_nodeid >= 0)
4296 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
4297 "Can not have separate PME ranks without PME electrostatics");
4303 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
4305 if (!isDlbDisabled(comm))
4307 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
4310 print_dd_settings(fplog, dd, mtop, ir, isDlbOn(comm), dlb_scale, ddbox);
4311 if (comm->dlbState == edlbsOffCanTurnOn)
4315 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
4317 print_dd_settings(fplog, dd, mtop, ir, TRUE, dlb_scale, ddbox);
4320 if (ir->ePBC == epbcNONE)
4322 vol_frac = 1 - 1/(double)dd->nnodes;
4327 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
4331 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
4333 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
4335 dd->ga2la = ga2la_init(natoms_tot, static_cast<int>(vol_frac*natoms_tot));
4338 /*! \brief Set some important DD parameters that can be modified by env.vars */
4339 static void set_dd_envvar_options(FILE *fplog, gmx_domdec_t *dd, int rank_mysim)
4341 gmx_domdec_comm_t *comm = dd->comm;
4343 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
4344 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
4345 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
4346 int recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
4347 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
4348 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
4349 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
4351 if (dd->bSendRecv2 && fplog)
4353 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
4360 fprintf(fplog, "Will load balance based on FLOP count\n");
4362 if (comm->eFlop > 1)
4364 srand(1 + rank_mysim);
4366 comm->bRecordLoad = TRUE;
4370 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
4374 DomdecOptions::DomdecOptions() :
4375 checkBondedInteractions(TRUE),
4376 useBondedCommunication(TRUE),
4378 rankOrder(DdRankOrder::pp_pme),
4379 minimumCommunicationRange(0),
4380 constraintCommunicationRange(0),
4381 dlbOption(DlbOption::turnOnWhenUseful),
4387 clear_ivec(numCells);
4390 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
4391 const DomdecOptions &options,
4392 const MdrunOptions &mdrunOptions,
4393 const gmx_mtop_t *mtop,
4394 const t_inputrec *ir,
4396 gmx::ArrayRef<const gmx::RVec> xGlobal,
4397 gmx::LocalAtomSetManager *atomSets)
4404 "\nInitializing Domain Decomposition on %d ranks\n", cr->nnodes);
4407 dd = new gmx_domdec_t;
4409 dd->comm = init_dd_comm();
4411 /* Initialize DD paritioning counters */
4412 dd->comm->partition_step = INT_MIN;
4415 set_dd_envvar_options(fplog, dd, cr->nodeid);
4417 gmx_ddbox_t ddbox = {0};
4418 set_dd_limits_and_grid(fplog, cr, dd, options, mdrunOptions,
4423 make_dd_communicators(fplog, cr, dd, options.rankOrder);
4425 if (thisRankHasDuty(cr, DUTY_PP))
4427 set_ddgrid_parameters(fplog, dd, options.dlbScaling, mtop, ir, &ddbox);
4429 setup_neighbor_relations(dd);
4432 /* Set overallocation to avoid frequent reallocation of arrays */
4433 set_over_alloc_dd(TRUE);
4435 clear_dd_cycle_counts(dd);
4437 dd->atomSets = atomSets;
4442 static gmx_bool test_dd_cutoff(t_commrec *cr,
4443 t_state *state, const t_inputrec *ir,
4454 set_ddbox(dd, false, ir, state->box, true, state->x, &ddbox);
4458 for (d = 0; d < dd->ndim; d++)
4462 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
4463 if (dynamic_dd_box(&ddbox, ir))
4465 inv_cell_size *= DD_PRES_SCALE_MARGIN;
4468 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
4470 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
4472 if (np > dd->comm->cd[d].np_dlb)
4477 /* If a current local cell size is smaller than the requested
4478 * cut-off, we could still fix it, but this gets very complicated.
4479 * Without fixing here, we might actually need more checks.
4481 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
4488 if (!isDlbDisabled(dd->comm))
4490 /* If DLB is not active yet, we don't need to check the grid jumps.
4491 * Actually we shouldn't, because then the grid jump data is not set.
4493 if (isDlbOn(dd->comm) &&
4494 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
4499 gmx_sumi(1, &LocallyLimited, cr);
4501 if (LocallyLimited > 0)
4510 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, const t_inputrec *ir,
4513 gmx_bool bCutoffAllowed;
4515 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
4519 cr->dd->comm->cutoff = cutoff_req;
4522 return bCutoffAllowed;
4525 void set_dd_dlb_max_cutoff(t_commrec *cr, real cutoff)
4527 gmx_domdec_comm_t *comm;
4529 comm = cr->dd->comm;
4531 /* Turn on the DLB limiting (might have been on already) */
4532 comm->bPMELoadBalDLBLimits = TRUE;
4534 /* Change the cut-off limit */
4535 comm->PMELoadBal_max_cutoff = cutoff;
4539 fprintf(debug, "PME load balancing set a limit to the DLB staggering such that a %f cut-off will continue to fit\n",
4540 comm->PMELoadBal_max_cutoff);
4544 /* Sets whether we should later check the load imbalance data, so that
4545 * we can trigger dynamic load balancing if enough imbalance has
4548 * Used after PME load balancing unlocks DLB, so that the check
4549 * whether DLB will be useful can happen immediately.
4551 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue)
4553 if (dd->comm->dlbState == edlbsOffCanTurnOn)
4555 dd->comm->bCheckWhetherToTurnDlbOn = bValue;
4559 /* Store the DD partitioning count, so we can ignore cycle counts
4560 * over the next nstlist steps, which are often slower.
4562 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
4567 /* Returns if we should check whether there has been enough load
4568 * imbalance to trigger dynamic load balancing.
4570 static gmx_bool dd_dlb_get_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd)
4572 if (dd->comm->dlbState != edlbsOffCanTurnOn)
4577 if (dd->ddp_count <= dd->comm->ddPartioningCountFirstDlbOff)
4579 /* We ignore the first nstlist steps at the start of the run
4580 * or after PME load balancing or after turning DLB off, since
4581 * these often have extra allocation or cache miss overhead.
4586 if (dd->comm->cycl_n[ddCyclStep] == 0)
4588 /* We can have zero timed steps when dd_partition_system is called
4589 * more than once at the same step, e.g. with replica exchange.
4590 * Turning on DLB would trigger an assertion failure later, but is
4591 * also useless right after exchanging replicas.
4596 /* We should check whether we should use DLB directly after
4598 if (dd->comm->bCheckWhetherToTurnDlbOn)
4600 /* This flag was set when the PME load-balancing routines
4601 unlocked DLB, and should now be cleared. */
4602 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, FALSE);
4605 /* We check whether we should use DLB every c_checkTurnDlbOnInterval
4606 * partitionings (we do not do this every partioning, so that we
4607 * avoid excessive communication). */
4608 return dd->comm->n_load_have % c_checkTurnDlbOnInterval == c_checkTurnDlbOnInterval - 1;
4611 gmx_bool dd_dlb_is_on(const gmx_domdec_t *dd)
4613 return isDlbOn(dd->comm);
4616 gmx_bool dd_dlb_is_locked(const gmx_domdec_t *dd)
4618 return (dd->comm->dlbState == edlbsOffTemporarilyLocked);
4621 void dd_dlb_lock(gmx_domdec_t *dd)
4623 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
4624 if (dd->comm->dlbState == edlbsOffCanTurnOn)
4626 dd->comm->dlbState = edlbsOffTemporarilyLocked;
4630 void dd_dlb_unlock(gmx_domdec_t *dd)
4632 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
4633 if (dd->comm->dlbState == edlbsOffTemporarilyLocked)
4635 dd->comm->dlbState = edlbsOffCanTurnOn;
4636 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
4640 static void merge_cg_buffers(int ncell,
4641 gmx_domdec_comm_dim_t *cd, int pulse,
4643 gmx::ArrayRef<int> index_gl,
4645 rvec *cg_cm, rvec *recv_vr,
4646 gmx::ArrayRef<int> cgindex,
4647 cginfo_mb_t *cginfo_mb, int *cginfo)
4649 gmx_domdec_ind_t *ind, *ind_p;
4650 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
4651 int shift, shift_at;
4653 ind = &cd->ind[pulse];
4655 /* First correct the already stored data */
4656 shift = ind->nrecv[ncell];
4657 for (cell = ncell-1; cell >= 0; cell--)
4659 shift -= ind->nrecv[cell];
4662 /* Move the cg's present from previous grid pulses */
4663 cg0 = ncg_cell[ncell+cell];
4664 cg1 = ncg_cell[ncell+cell+1];
4665 cgindex[cg1+shift] = cgindex[cg1];
4666 for (cg = cg1-1; cg >= cg0; cg--)
4668 index_gl[cg+shift] = index_gl[cg];
4669 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
4670 cgindex[cg+shift] = cgindex[cg];
4671 cginfo[cg+shift] = cginfo[cg];
4673 /* Correct the already stored send indices for the shift */
4674 for (p = 1; p <= pulse; p++)
4676 ind_p = &cd->ind[p];
4678 for (c = 0; c < cell; c++)
4680 cg0 += ind_p->nsend[c];
4682 cg1 = cg0 + ind_p->nsend[cell];
4683 for (cg = cg0; cg < cg1; cg++)
4685 ind_p->index[cg] += shift;
4691 /* Merge in the communicated buffers */
4695 for (cell = 0; cell < ncell; cell++)
4697 cg1 = ncg_cell[ncell+cell+1] + shift;
4700 /* Correct the old cg indices */
4701 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
4703 cgindex[cg+1] += shift_at;
4706 for (cg = 0; cg < ind->nrecv[cell]; cg++)
4708 /* Copy this charge group from the buffer */
4709 index_gl[cg1] = recv_i[cg0];
4710 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
4711 /* Add it to the cgindex */
4712 cg_gl = index_gl[cg1];
4713 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
4714 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
4715 cgindex[cg1+1] = cgindex[cg1] + nat;
4720 shift += ind->nrecv[cell];
4721 ncg_cell[ncell+cell+1] = cg1;
4725 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
4728 const gmx::RangePartitioning &atomGroups)
4730 /* Store the atom block boundaries for easy copying of communication buffers
4732 int g = atomGroupStart;
4733 for (int zone = 0; zone < nzone; zone++)
4735 for (gmx_domdec_ind_t &ind : cd->ind)
4737 const auto range = atomGroups.subRange(g, g + ind.nrecv[zone]);
4738 ind.cell2at0[zone] = range.begin();
4739 ind.cell2at1[zone] = range.end();
4740 g += ind.nrecv[zone];
4745 static gmx_bool missing_link(t_blocka *link, int cg_gl, const char *bLocalCG)
4751 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
4753 if (!bLocalCG[link->a[i]])
4762 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
4764 real c[DIM][4]; /* the corners for the non-bonded communication */
4765 real cr0; /* corner for rounding */
4766 real cr1[4]; /* corners for rounding */
4767 real bc[DIM]; /* corners for bounded communication */
4768 real bcr1; /* corner for rounding for bonded communication */
4771 /* Determine the corners of the domain(s) we are communicating with */
4773 set_dd_corners(const gmx_domdec_t *dd,
4774 int dim0, int dim1, int dim2,
4778 const gmx_domdec_comm_t *comm;
4779 const gmx_domdec_zones_t *zones;
4784 zones = &comm->zones;
4786 /* Keep the compiler happy */
4790 /* The first dimension is equal for all cells */
4791 c->c[0][0] = comm->cell_x0[dim0];
4794 c->bc[0] = c->c[0][0];
4799 /* This cell row is only seen from the first row */
4800 c->c[1][0] = comm->cell_x0[dim1];
4801 /* All rows can see this row */
4802 c->c[1][1] = comm->cell_x0[dim1];
4803 if (isDlbOn(dd->comm))
4805 c->c[1][1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
4808 /* For the multi-body distance we need the maximum */
4809 c->bc[1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
4812 /* Set the upper-right corner for rounding */
4813 c->cr0 = comm->cell_x1[dim0];
4818 for (j = 0; j < 4; j++)
4820 c->c[2][j] = comm->cell_x0[dim2];
4822 if (isDlbOn(dd->comm))
4824 /* Use the maximum of the i-cells that see a j-cell */
4825 for (i = 0; i < zones->nizone; i++)
4827 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
4832 std::max(c->c[2][j-4],
4833 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
4839 /* For the multi-body distance we need the maximum */
4840 c->bc[2] = comm->cell_x0[dim2];
4841 for (i = 0; i < 2; i++)
4843 for (j = 0; j < 2; j++)
4845 c->bc[2] = std::max(c->bc[2], comm->zone_d2[i][j].p1_0);
4851 /* Set the upper-right corner for rounding */
4852 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
4853 * Only cell (0,0,0) can see cell 7 (1,1,1)
4855 c->cr1[0] = comm->cell_x1[dim1];
4856 c->cr1[3] = comm->cell_x1[dim1];
4857 if (isDlbOn(dd->comm))
4859 c->cr1[0] = std::max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
4862 /* For the multi-body distance we need the maximum */
4863 c->bcr1 = std::max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
4870 /* Add the atom groups we need to send in this pulse from this zone to
4871 * \p localAtomGroups and \p work
4874 get_zone_pulse_cgs(gmx_domdec_t *dd,
4875 int zonei, int zone,
4877 gmx::ArrayRef<const int> globalAtomGroupIndices,
4878 const gmx::RangePartitioning &atomGroups,
4879 int dim, int dim_ind,
4880 int dim0, int dim1, int dim2,
4881 real r_comm2, real r_bcomm2,
4883 bool distanceIsTriclinic,
4885 real skew_fac2_d, real skew_fac_01,
4886 rvec *v_d, rvec *v_0, rvec *v_1,
4887 const dd_corners_t *c,
4888 const rvec sf2_round,
4889 gmx_bool bDistBonded,
4895 std::vector<int> *localAtomGroups,
4896 dd_comm_setup_work_t *work)
4898 gmx_domdec_comm_t *comm;
4900 gmx_bool bDistMB_pulse;
4902 real r2, rb2, r, tric_sh;
4909 bScrew = (dd->bScrewPBC && dim == XX);
4911 bDistMB_pulse = (bDistMB && bDistBonded);
4913 /* Unpack the work data */
4914 std::vector<int> &ibuf = work->atomGroupBuffer;
4915 std::vector<gmx::RVec> &vbuf = work->positionBuffer;
4919 for (cg = cg0; cg < cg1; cg++)
4923 if (!distanceIsTriclinic)
4925 /* Rectangular direction, easy */
4926 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
4933 r = cg_cm[cg][dim] - c->bc[dim_ind];
4939 /* Rounding gives at most a 16% reduction
4940 * in communicated atoms
4942 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
4944 r = cg_cm[cg][dim0] - c->cr0;
4945 /* This is the first dimension, so always r >= 0 */
4952 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
4954 r = cg_cm[cg][dim1] - c->cr1[zone];
4961 r = cg_cm[cg][dim1] - c->bcr1;
4971 /* Triclinic direction, more complicated */
4974 /* Rounding, conservative as the skew_fac multiplication
4975 * will slightly underestimate the distance.
4977 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
4979 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
4980 for (i = dim0+1; i < DIM; i++)
4982 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
4984 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
4987 rb[dim0] = rn[dim0];
4990 /* Take care that the cell planes along dim0 might not
4991 * be orthogonal to those along dim1 and dim2.
4993 for (i = 1; i <= dim_ind; i++)
4996 if (normal[dim0][dimd] > 0)
4998 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
5001 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
5006 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
5008 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
5010 for (i = dim1+1; i < DIM; i++)
5012 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
5014 rn[dim1] += tric_sh;
5017 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
5018 /* Take care of coupling of the distances
5019 * to the planes along dim0 and dim1 through dim2.
5021 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
5022 /* Take care that the cell planes along dim1
5023 * might not be orthogonal to that along dim2.
5025 if (normal[dim1][dim2] > 0)
5027 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
5033 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
5036 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
5037 /* Take care of coupling of the distances
5038 * to the planes along dim0 and dim1 through dim2.
5040 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
5041 /* Take care that the cell planes along dim1
5042 * might not be orthogonal to that along dim2.
5044 if (normal[dim1][dim2] > 0)
5046 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
5051 /* The distance along the communication direction */
5052 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
5054 for (i = dim+1; i < DIM; i++)
5056 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
5061 r2 += rn[dim]*rn[dim]*skew_fac2_d;
5062 /* Take care of coupling of the distances
5063 * to the planes along dim0 and dim1 through dim2.
5065 if (dim_ind == 1 && zonei == 1)
5067 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
5073 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
5076 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
5077 /* Take care of coupling of the distances
5078 * to the planes along dim0 and dim1 through dim2.
5080 if (dim_ind == 1 && zonei == 1)
5082 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
5090 ((bDistMB && rb2 < r_bcomm2) ||
5091 (bDist2B && r2 < r_bcomm2)) &&
5093 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
5094 missing_link(comm->cglink, globalAtomGroupIndices[cg],
5097 /* Store the local and global atom group indices and position */
5098 localAtomGroups->push_back(cg);
5099 ibuf.push_back(globalAtomGroupIndices[cg]);
5103 if (dd->ci[dim] == 0)
5105 /* Correct cg_cm for pbc */
5106 rvec_add(cg_cm[cg], box[dim], posPbc);
5109 posPbc[YY] = box[YY][YY] - posPbc[YY];
5110 posPbc[ZZ] = box[ZZ][ZZ] - posPbc[ZZ];
5115 copy_rvec(cg_cm[cg], posPbc);
5117 vbuf.emplace_back(posPbc[XX], posPbc[YY], posPbc[ZZ]);
5119 nat += atomGroups.block(cg).size();
5124 work->nsend_zone = nsend_z;
5127 static void clearCommSetupData(dd_comm_setup_work_t *work)
5129 work->localAtomGroupBuffer.clear();
5130 work->atomGroupBuffer.clear();
5131 work->positionBuffer.clear();
5133 work->nsend_zone = 0;
5136 static void setup_dd_communication(gmx_domdec_t *dd,
5137 matrix box, gmx_ddbox_t *ddbox,
5139 t_state *state, PaddedRVecVector *f)
5141 int dim_ind, dim, dim0, dim1, dim2, dimd, nat_tot;
5142 int nzone, nzone_send, zone, zonei, cg0, cg1;
5143 int c, i, cg, cg_gl, nrcg;
5144 int *zone_cg_range, pos_cg;
5145 gmx_domdec_comm_t *comm;
5146 gmx_domdec_zones_t *zones;
5147 gmx_domdec_comm_dim_t *cd;
5148 cginfo_mb_t *cginfo_mb;
5149 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
5150 real r_comm2, r_bcomm2;
5151 dd_corners_t corners;
5152 rvec *cg_cm, *normal, *v_d, *v_0 = nullptr, *v_1 = nullptr;
5153 real skew_fac2_d, skew_fac_01;
5158 fprintf(debug, "Setting up DD communication\n");
5163 if (comm->dth.empty())
5165 /* Initialize the thread data.
5166 * This can not be done in init_domain_decomposition,
5167 * as the numbers of threads is determined later.
5169 int numThreads = gmx_omp_nthreads_get(emntDomdec);
5170 comm->dth.resize(numThreads);
5173 switch (fr->cutoff_scheme)
5179 cg_cm = as_rvec_array(state->x.data());
5182 gmx_incons("unimplemented");
5185 bBondComm = comm->bBondComm;
5187 /* Do we need to determine extra distances for multi-body bondeds? */
5188 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
5190 /* Do we need to determine extra distances for only two-body bondeds? */
5191 bDist2B = (bBondComm && !bDistMB);
5193 r_comm2 = gmx::square(comm->cutoff);
5194 r_bcomm2 = gmx::square(comm->cutoff_mbody);
5198 fprintf(debug, "bBondComm %s, r_bc %f\n", gmx::boolToString(bBondComm), std::sqrt(r_bcomm2));
5201 zones = &comm->zones;
5204 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
5205 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
5207 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
5209 /* Triclinic stuff */
5210 normal = ddbox->normal;
5214 v_0 = ddbox->v[dim0];
5215 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
5217 /* Determine the coupling coefficient for the distances
5218 * to the cell planes along dim0 and dim1 through dim2.
5219 * This is required for correct rounding.
5222 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
5225 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
5231 v_1 = ddbox->v[dim1];
5234 zone_cg_range = zones->cg_range;
5235 cginfo_mb = fr->cginfo_mb;
5237 zone_cg_range[0] = 0;
5238 zone_cg_range[1] = dd->ncg_home;
5239 comm->zone_ncg1[0] = dd->ncg_home;
5240 pos_cg = dd->ncg_home;
5242 nat_tot = comm->atomRanges.numHomeAtoms();
5244 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
5246 dim = dd->dim[dim_ind];
5247 cd = &comm->cd[dim_ind];
5249 /* Check if we need to compute triclinic distances along this dim */
5250 bool distanceIsTriclinic = false;
5251 for (i = 0; i <= dim_ind; i++)
5253 if (ddbox->tric_dir[dd->dim[i]])
5255 distanceIsTriclinic = true;
5259 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
5261 /* No pbc in this dimension, the first node should not comm. */
5269 v_d = ddbox->v[dim];
5270 skew_fac2_d = gmx::square(ddbox->skew_fac[dim]);
5272 cd->receiveInPlace = true;
5273 for (int p = 0; p < cd->numPulses(); p++)
5275 /* Only atoms communicated in the first pulse are used
5276 * for multi-body bonded interactions or for bBondComm.
5278 bDistBonded = ((bDistMB || bDist2B) && p == 0);
5280 gmx_domdec_ind_t *ind = &cd->ind[p];
5282 /* Thread 0 writes in the global index array */
5284 clearCommSetupData(&comm->dth[0]);
5286 for (zone = 0; zone < nzone_send; zone++)
5288 if (dim_ind > 0 && distanceIsTriclinic)
5290 /* Determine slightly more optimized skew_fac's
5292 * This reduces the number of communicated atoms
5293 * by about 10% for 3D DD of rhombic dodecahedra.
5295 for (dimd = 0; dimd < dim; dimd++)
5297 sf2_round[dimd] = 1;
5298 if (ddbox->tric_dir[dimd])
5300 for (i = dd->dim[dimd]+1; i < DIM; i++)
5302 /* If we are shifted in dimension i
5303 * and the cell plane is tilted forward
5304 * in dimension i, skip this coupling.
5306 if (!(zones->shift[nzone+zone][i] &&
5307 ddbox->v[dimd][i][dimd] >= 0))
5310 gmx::square(ddbox->v[dimd][i][dimd]);
5313 sf2_round[dimd] = 1/sf2_round[dimd];
5318 zonei = zone_perm[dim_ind][zone];
5321 /* Here we permutate the zones to obtain a convenient order
5322 * for neighbor searching
5324 cg0 = zone_cg_range[zonei];
5325 cg1 = zone_cg_range[zonei+1];
5329 /* Look only at the cg's received in the previous grid pulse
5331 cg1 = zone_cg_range[nzone+zone+1];
5332 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
5335 const int numThreads = static_cast<int>(comm->dth.size());
5336 #pragma omp parallel for num_threads(numThreads) schedule(static)
5337 for (int th = 0; th < numThreads; th++)
5341 dd_comm_setup_work_t &work = comm->dth[th];
5343 /* Retain data accumulated into buffers of thread 0 */
5346 clearCommSetupData(&work);
5349 int cg0_th = cg0 + ((cg1 - cg0)* th )/numThreads;
5350 int cg1_th = cg0 + ((cg1 - cg0)*(th+1))/numThreads;
5352 /* Get the cg's for this pulse in this zone */
5353 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
5354 dd->globalAtomGroupIndices,
5356 dim, dim_ind, dim0, dim1, dim2,
5358 box, distanceIsTriclinic,
5359 normal, skew_fac2_d, skew_fac_01,
5360 v_d, v_0, v_1, &corners, sf2_round,
5361 bDistBonded, bBondComm,
5364 th == 0 ? &ind->index : &work.localAtomGroupBuffer,
5367 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
5370 std::vector<int> &atomGroups = comm->dth[0].atomGroupBuffer;
5371 std::vector<gmx::RVec> &positions = comm->dth[0].positionBuffer;
5372 ind->nsend[zone] = comm->dth[0].nsend_zone;
5373 /* Append data of threads>=1 to the communication buffers */
5374 for (int th = 1; th < numThreads; th++)
5376 const dd_comm_setup_work_t &dth = comm->dth[th];
5378 ind->index.insert(ind->index.end(), dth.localAtomGroupBuffer.begin(), dth.localAtomGroupBuffer.end());
5379 atomGroups.insert(atomGroups.end(), dth.atomGroupBuffer.begin(), dth.atomGroupBuffer.end());
5380 positions.insert(positions.end(), dth.positionBuffer.begin(), dth.positionBuffer.end());
5381 comm->dth[0].nat += dth.nat;
5382 ind->nsend[zone] += dth.nsend_zone;
5385 /* Clear the counts in case we do not have pbc */
5386 for (zone = nzone_send; zone < nzone; zone++)
5388 ind->nsend[zone] = 0;
5390 ind->nsend[nzone] = ind->index.size();
5391 ind->nsend[nzone + 1] = comm->dth[0].nat;
5392 /* Communicate the number of cg's and atoms to receive */
5393 ddSendrecv(dd, dim_ind, dddirBackward,
5394 ind->nsend, nzone+2,
5395 ind->nrecv, nzone+2);
5399 /* We can receive in place if only the last zone is not empty */
5400 for (zone = 0; zone < nzone-1; zone++)
5402 if (ind->nrecv[zone] > 0)
5404 cd->receiveInPlace = false;
5409 int receiveBufferSize = 0;
5410 if (!cd->receiveInPlace)
5412 receiveBufferSize = ind->nrecv[nzone];
5414 /* These buffer are actually only needed with in-place */
5415 DDBufferAccess<int> globalAtomGroupBuffer(comm->intBuffer, receiveBufferSize);
5416 DDBufferAccess<gmx::RVec> rvecBuffer(comm->rvecBuffer, receiveBufferSize);
5418 dd_comm_setup_work_t &work = comm->dth[0];
5420 /* Make space for the global cg indices */
5421 int numAtomGroupsNew = pos_cg + ind->nrecv[nzone];
5422 dd->globalAtomGroupIndices.resize(numAtomGroupsNew);
5423 /* Communicate the global cg indices */
5424 gmx::ArrayRef<int> integerBufferRef;
5425 if (cd->receiveInPlace)
5427 integerBufferRef = gmx::arrayRefFromArray(dd->globalAtomGroupIndices.data() + pos_cg, ind->nrecv[nzone]);
5431 integerBufferRef = globalAtomGroupBuffer.buffer;
5433 ddSendrecv<int>(dd, dim_ind, dddirBackward,
5434 work.atomGroupBuffer, integerBufferRef);
5436 /* Make space for cg_cm */
5437 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
5438 if (fr->cutoff_scheme == ecutsGROUP)
5444 cg_cm = as_rvec_array(state->x.data());
5446 /* Communicate cg_cm */
5447 gmx::ArrayRef<gmx::RVec> rvecBufferRef;
5448 if (cd->receiveInPlace)
5450 rvecBufferRef = gmx::arrayRefFromArray(reinterpret_cast<gmx::RVec *>(cg_cm + pos_cg), ind->nrecv[nzone]);
5454 rvecBufferRef = rvecBuffer.buffer;
5456 ddSendrecv<gmx::RVec>(dd, dim_ind, dddirBackward,
5457 work.positionBuffer, rvecBufferRef);
5459 /* Make the charge group index */
5460 if (cd->receiveInPlace)
5462 zone = (p == 0 ? 0 : nzone - 1);
5463 while (zone < nzone)
5465 for (cg = 0; cg < ind->nrecv[zone]; cg++)
5467 cg_gl = dd->globalAtomGroupIndices[pos_cg];
5468 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
5469 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
5470 dd->atomGrouping_.appendBlock(nrcg);
5473 /* Update the charge group presence,
5474 * so we can use it in the next pass of the loop.
5476 comm->bLocalCG[cg_gl] = TRUE;
5482 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
5485 zone_cg_range[nzone+zone] = pos_cg;
5490 /* This part of the code is never executed with bBondComm. */
5491 std::vector<int> &atomGroupsIndex = dd->atomGrouping_.rawIndex();
5492 atomGroupsIndex.resize(numAtomGroupsNew + 1);
5494 merge_cg_buffers(nzone, cd, p, zone_cg_range,
5495 dd->globalAtomGroupIndices, integerBufferRef.data(),
5496 cg_cm, as_rvec_array(rvecBufferRef.data()),
5498 fr->cginfo_mb, fr->cginfo);
5499 pos_cg += ind->nrecv[nzone];
5501 nat_tot += ind->nrecv[nzone+1];
5503 if (!cd->receiveInPlace)
5505 /* Store the atom block for easy copying of communication buffers */
5506 make_cell2at_index(cd, nzone, zone_cg_range[nzone], dd->atomGrouping());
5511 comm->atomRanges.setEnd(DDAtomRanges::Type::Zones, nat_tot);
5515 /* We don't need to update cginfo, since that was alrady done above.
5516 * So we pass NULL for the forcerec.
5518 dd_set_cginfo(dd->globalAtomGroupIndices,
5519 dd->ncg_home, dd->globalAtomGroupIndices.size(),
5520 nullptr, comm->bLocalCG);
5525 fprintf(debug, "Finished setting up DD communication, zones:");
5526 for (c = 0; c < zones->n; c++)
5528 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
5530 fprintf(debug, "\n");
5534 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
5538 for (c = 0; c < zones->nizone; c++)
5540 zones->izone[c].cg1 = zones->cg_range[c+1];
5541 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
5542 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
5546 /* \brief Set zone dimensions for zones \p zone_start to \p zone_end-1
5548 * Also sets the atom density for the home zone when \p zone_start=0.
5549 * For this \p numMovedChargeGroupsInHomeZone needs to be passed to tell
5550 * how many charge groups will move but are still part of the current range.
5551 * \todo When converting domdec to use proper classes, all these variables
5552 * should be private and a method should return the correct count
5553 * depending on an internal state.
5555 * \param[in,out] dd The domain decomposition struct
5556 * \param[in] box The box
5557 * \param[in] ddbox The domain decomposition box struct
5558 * \param[in] zone_start The start of the zone range to set sizes for
5559 * \param[in] zone_end The end of the zone range to set sizes for
5560 * \param[in] numMovedChargeGroupsInHomeZone The number of charge groups in the home zone that should moved but are still present in dd->comm->zones.cg_range
5562 static void set_zones_size(gmx_domdec_t *dd,
5563 matrix box, const gmx_ddbox_t *ddbox,
5564 int zone_start, int zone_end,
5565 int numMovedChargeGroupsInHomeZone)
5567 gmx_domdec_comm_t *comm;
5568 gmx_domdec_zones_t *zones;
5577 zones = &comm->zones;
5579 /* Do we need to determine extra distances for multi-body bondeds? */
5580 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
5582 for (z = zone_start; z < zone_end; z++)
5584 /* Copy cell limits to zone limits.
5585 * Valid for non-DD dims and non-shifted dims.
5587 copy_rvec(comm->cell_x0, zones->size[z].x0);
5588 copy_rvec(comm->cell_x1, zones->size[z].x1);
5591 for (d = 0; d < dd->ndim; d++)
5595 for (z = 0; z < zones->n; z++)
5597 /* With a staggered grid we have different sizes
5598 * for non-shifted dimensions.
5600 if (isDlbOn(dd->comm) && zones->shift[z][dim] == 0)
5604 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
5605 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
5609 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
5610 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
5616 rcmbs = comm->cutoff_mbody;
5617 if (ddbox->tric_dir[dim])
5619 rcs /= ddbox->skew_fac[dim];
5620 rcmbs /= ddbox->skew_fac[dim];
5623 /* Set the lower limit for the shifted zone dimensions */
5624 for (z = zone_start; z < zone_end; z++)
5626 if (zones->shift[z][dim] > 0)
5629 if (!isDlbOn(dd->comm) || d == 0)
5631 zones->size[z].x0[dim] = comm->cell_x1[dim];
5632 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
5636 /* Here we take the lower limit of the zone from
5637 * the lowest domain of the zone below.
5641 zones->size[z].x0[dim] =
5642 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
5648 zones->size[z].x0[dim] =
5649 zones->size[zone_perm[2][z-4]].x0[dim];
5653 zones->size[z].x0[dim] =
5654 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
5657 /* A temporary limit, is updated below */
5658 zones->size[z].x1[dim] = zones->size[z].x0[dim];
5662 for (zi = 0; zi < zones->nizone; zi++)
5664 if (zones->shift[zi][dim] == 0)
5666 /* This takes the whole zone into account.
5667 * With multiple pulses this will lead
5668 * to a larger zone then strictly necessary.
5670 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
5671 zones->size[zi].x1[dim]+rcmbs);
5679 /* Loop over the i-zones to set the upper limit of each
5682 for (zi = 0; zi < zones->nizone; zi++)
5684 if (zones->shift[zi][dim] == 0)
5686 /* We should only use zones up to zone_end */
5687 int jZoneEnd = std::min(zones->izone[zi].j1, zone_end);
5688 for (z = zones->izone[zi].j0; z < jZoneEnd; z++)
5690 if (zones->shift[z][dim] > 0)
5692 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
5693 zones->size[zi].x1[dim]+rcs);
5700 for (z = zone_start; z < zone_end; z++)
5702 /* Initialization only required to keep the compiler happy */
5703 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
5706 /* To determine the bounding box for a zone we need to find
5707 * the extreme corners of 4, 2 or 1 corners.
5709 nc = 1 << (ddbox->nboundeddim - 1);
5711 for (c = 0; c < nc; c++)
5713 /* Set up a zone corner at x=0, ignoring trilinic couplings */
5717 corner[YY] = zones->size[z].x0[YY];
5721 corner[YY] = zones->size[z].x1[YY];
5725 corner[ZZ] = zones->size[z].x0[ZZ];
5729 corner[ZZ] = zones->size[z].x1[ZZ];
5731 if (dd->ndim == 1 && dd->dim[0] < ZZ && ZZ < dd->npbcdim &&
5732 box[ZZ][1 - dd->dim[0]] != 0)
5734 /* With 1D domain decomposition the cg's are not in
5735 * the triclinic box, but triclinic x-y and rectangular y/x-z.
5736 * Shift the corner of the z-vector back to along the box
5737 * vector of dimension d, so it will later end up at 0 along d.
5738 * This can affect the location of this corner along dd->dim[0]
5739 * through the matrix operation below if box[d][dd->dim[0]]!=0.
5741 int d = 1 - dd->dim[0];
5743 corner[d] -= corner[ZZ]*box[ZZ][d]/box[ZZ][ZZ];
5745 /* Apply the triclinic couplings */
5746 assert(ddbox->npbcdim <= DIM);
5747 for (i = YY; i < ddbox->npbcdim; i++)
5749 for (j = XX; j < i; j++)
5751 corner[j] += corner[i]*box[i][j]/box[i][i];
5756 copy_rvec(corner, corner_min);
5757 copy_rvec(corner, corner_max);
5761 for (i = 0; i < DIM; i++)
5763 corner_min[i] = std::min(corner_min[i], corner[i]);
5764 corner_max[i] = std::max(corner_max[i], corner[i]);
5768 /* Copy the extreme cornes without offset along x */
5769 for (i = 0; i < DIM; i++)
5771 zones->size[z].bb_x0[i] = corner_min[i];
5772 zones->size[z].bb_x1[i] = corner_max[i];
5774 /* Add the offset along x */
5775 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
5776 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
5779 if (zone_start == 0)
5782 for (dim = 0; dim < DIM; dim++)
5784 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
5786 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0] - numMovedChargeGroupsInHomeZone)/vol;
5791 for (z = zone_start; z < zone_end; z++)
5793 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
5795 zones->size[z].x0[XX], zones->size[z].x1[XX],
5796 zones->size[z].x0[YY], zones->size[z].x1[YY],
5797 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
5798 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
5800 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
5801 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
5802 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
5807 static int comp_cgsort(const void *a, const void *b)
5811 gmx_cgsort_t *cga, *cgb;
5812 cga = (gmx_cgsort_t *)a;
5813 cgb = (gmx_cgsort_t *)b;
5815 comp = cga->nsc - cgb->nsc;
5818 comp = cga->ind_gl - cgb->ind_gl;
5824 /* Order data in \p dataToSort according to \p sort
5826 * Note: both buffers should have at least \p sort.size() elements.
5828 template <typename T>
5830 orderVector(gmx::ArrayRef<const gmx_cgsort_t> sort,
5831 gmx::ArrayRef<T> dataToSort,
5832 gmx::ArrayRef<T> sortBuffer)
5834 GMX_ASSERT(dataToSort.size() >= sort.size(), "The vector needs to be sufficiently large");
5835 GMX_ASSERT(sortBuffer.size() >= sort.size(), "The sorting buffer needs to be sufficiently large");
5837 /* Order the data into the temporary buffer */
5839 for (const gmx_cgsort_t &entry : sort)
5841 sortBuffer[i++] = dataToSort[entry.ind];
5844 /* Copy back to the original array */
5845 std::copy(sortBuffer.begin(), sortBuffer.begin() + sort.size(),
5846 dataToSort.begin());
5849 /* Order data in \p dataToSort according to \p sort
5851 * Note: \p vectorToSort should have at least \p sort.size() elements,
5852 * \p workVector is resized when it is too small.
5854 template <typename T>
5856 orderVector(gmx::ArrayRef<const gmx_cgsort_t> sort,
5857 gmx::ArrayRef<T> vectorToSort,
5858 std::vector<T> *workVector)
5860 if (gmx::index(workVector->size()) < sort.size())
5862 workVector->resize(sort.size());
5864 orderVector<T>(sort, vectorToSort, *workVector);
5867 static void order_vec_atom(const gmx::RangePartitioning *atomGroups,
5868 gmx::ArrayRef<const gmx_cgsort_t> sort,
5869 gmx::ArrayRef<gmx::RVec> v,
5870 gmx::ArrayRef<gmx::RVec> buf)
5872 if (atomGroups == nullptr)
5874 /* Avoid the useless loop of the atoms within a cg */
5875 orderVector(sort, v, buf);
5880 /* Order the data */
5882 for (const gmx_cgsort_t &entry : sort)
5884 for (int i : atomGroups->block(entry.ind))
5886 copy_rvec(v[i], buf[a]);
5892 /* Copy back to the original array */
5893 for (int a = 0; a < atot; a++)
5895 copy_rvec(buf[a], v[a]);
5899 /* Returns whether a < b */
5900 static bool compareCgsort(const gmx_cgsort_t &a,
5901 const gmx_cgsort_t &b)
5903 return (a.nsc < b.nsc ||
5904 (a.nsc == b.nsc && a.ind_gl < b.ind_gl));
5907 static void orderedSort(gmx::ArrayRef<const gmx_cgsort_t> stationary,
5908 gmx::ArrayRef<gmx_cgsort_t> moved,
5909 std::vector<gmx_cgsort_t> *sort1)
5911 /* The new indices are not very ordered, so we qsort them */
5912 gmx_qsort_threadsafe(moved.data(), moved.size(), sizeof(moved[0]), comp_cgsort);
5914 /* stationary is already ordered, so now we can merge the two arrays */
5915 sort1->resize(stationary.size() + moved.size());
5916 std::merge(stationary.begin(), stationary.end(),
5917 moved.begin(), moved.end(),
5922 /* Set the sorting order for systems with charge groups, returned in sort->sort.
5923 * The order is according to the global charge group index.
5924 * This adds and removes charge groups that moved between domains.
5926 static void dd_sort_order(const gmx_domdec_t *dd,
5927 const t_forcerec *fr,
5929 gmx_domdec_sort_t *sort)
5931 const int *a = fr->ns->grid->cell_index;
5933 const int movedValue = NSGRID_SIGNAL_MOVED_FAC*fr->ns->grid->ncells;
5935 if (ncg_home_old >= 0)
5937 std::vector<gmx_cgsort_t> &stationary = sort->stationary;
5938 std::vector<gmx_cgsort_t> &moved = sort->moved;
5940 /* The charge groups that remained in the same ns grid cell
5941 * are completely ordered. So we can sort efficiently by sorting
5942 * the charge groups that did move into the stationary list.
5943 * Note: push_back() seems to be slightly slower than direct access.
5947 for (int i = 0; i < dd->ncg_home; i++)
5949 /* Check if this cg did not move to another node */
5950 if (a[i] < movedValue)
5954 entry.ind_gl = dd->globalAtomGroupIndices[i];
5957 if (i >= ncg_home_old || a[i] != sort->sorted[i].nsc)
5959 /* This cg is new on this node or moved ns grid cell */
5960 moved.push_back(entry);
5964 /* This cg did not move */
5965 stationary.push_back(entry);
5972 fprintf(debug, "ordered sort cgs: stationary %zu moved %zu\n",
5973 stationary.size(), moved.size());
5975 /* Sort efficiently */
5976 orderedSort(stationary, moved, &sort->sorted);
5980 std::vector<gmx_cgsort_t> &cgsort = sort->sorted;
5982 cgsort.reserve(dd->ncg_home);
5984 for (int i = 0; i < dd->ncg_home; i++)
5986 /* Sort on the ns grid cell indices
5987 * and the global topology index
5991 entry.ind_gl = dd->globalAtomGroupIndices[i];
5993 cgsort.push_back(entry);
5994 if (cgsort[i].nsc < movedValue)
6001 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, numCGNew);
6003 /* Determine the order of the charge groups using qsort */
6004 gmx_qsort_threadsafe(cgsort.data(), dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
6006 /* Remove the charge groups which are no longer at home here */
6007 cgsort.resize(numCGNew);
6011 /* Returns the sorting order for atoms based on the nbnxn grid order in sort */
6012 static void dd_sort_order_nbnxn(const t_forcerec *fr,
6013 std::vector<gmx_cgsort_t> *sort)
6015 gmx::ArrayRef<const int> atomOrder = nbnxn_get_atomorder(fr->nbv->nbs.get());
6017 /* Using push_back() instead of this resize results in much slower code */
6018 sort->resize(atomOrder.size());
6019 gmx::ArrayRef<gmx_cgsort_t> buffer = *sort;
6020 size_t numSorted = 0;
6021 for (int i : atomOrder)
6025 /* The values of nsc and ind_gl are not used in this case */
6026 buffer[numSorted++].ind = i;
6029 sort->resize(numSorted);
6032 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
6035 gmx_domdec_sort_t *sort = dd->comm->sort.get();
6037 switch (fr->cutoff_scheme)
6040 dd_sort_order(dd, fr, ncg_home_old, sort);
6043 dd_sort_order_nbnxn(fr, &sort->sorted);
6046 gmx_incons("unimplemented");
6049 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
6051 /* We alloc with the old size, since cgindex is still old */
6052 GMX_ASSERT(atomGrouping.numBlocks() == dd->ncg_home, "atomGroups and dd should be consistent");
6053 DDBufferAccess<gmx::RVec> rvecBuffer(dd->comm->rvecBuffer, atomGrouping.fullRange().end());
6055 const gmx::RangePartitioning *atomGroupsPtr = (dd->comm->bCGs ? &atomGrouping : nullptr);
6057 /* Set the new home atom/charge group count */
6058 dd->ncg_home = sort->sorted.size();
6061 fprintf(debug, "Set the new home charge group count to %d\n",
6065 /* Reorder the state */
6066 gmx::ArrayRef<const gmx_cgsort_t> cgsort = sort->sorted;
6067 GMX_RELEASE_ASSERT(cgsort.size() == dd->ncg_home, "We should sort all the home atom groups");
6069 if (state->flags & (1 << estX))
6071 order_vec_atom(atomGroupsPtr, cgsort, state->x, rvecBuffer.buffer);
6073 if (state->flags & (1 << estV))
6075 order_vec_atom(atomGroupsPtr, cgsort, state->v, rvecBuffer.buffer);
6077 if (state->flags & (1 << estCGP))
6079 order_vec_atom(atomGroupsPtr, cgsort, state->cg_p, rvecBuffer.buffer);
6082 if (fr->cutoff_scheme == ecutsGROUP)
6085 gmx::ArrayRef<gmx::RVec> cgcmRef = gmx::arrayRefFromArray(reinterpret_cast<gmx::RVec *>(cgcm[0]), cgsort.size());
6086 orderVector(cgsort, cgcmRef, rvecBuffer.buffer);
6089 /* Reorder the global cg index */
6090 orderVector<int>(cgsort, dd->globalAtomGroupIndices, &sort->intBuffer);
6091 /* Reorder the cginfo */
6092 orderVector<int>(cgsort, gmx::arrayRefFromArray(fr->cginfo, cgsort.size()), &sort->intBuffer);
6093 /* Rebuild the local cg index */
6096 /* We make a new, ordered atomGroups object and assign it to
6097 * the old one. This causes some allocation overhead, but saves
6098 * a copy back of the whole index.
6100 gmx::RangePartitioning ordered;
6101 for (const gmx_cgsort_t &entry : cgsort)
6103 ordered.appendBlock(atomGrouping.block(entry.ind).size());
6105 dd->atomGrouping_ = ordered;
6109 dd->atomGrouping_.setAllBlocksSizeOne(dd->ncg_home);
6111 /* Set the home atom number */
6112 dd->comm->atomRanges.setEnd(DDAtomRanges::Type::Home, dd->atomGrouping().fullRange().end());
6114 if (fr->cutoff_scheme == ecutsVERLET)
6116 /* The atoms are now exactly in grid order, update the grid order */
6117 nbnxn_set_atomorder(fr->nbv->nbs.get());
6121 /* Copy the sorted ns cell indices back to the ns grid struct */
6122 for (gmx::index i = 0; i < cgsort.size(); i++)
6124 fr->ns->grid->cell_index[i] = cgsort[i].nsc;
6126 fr->ns->grid->nr = cgsort.size();
6130 static void add_dd_statistics(gmx_domdec_t *dd)
6132 gmx_domdec_comm_t *comm = dd->comm;
6134 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6136 auto range = static_cast<DDAtomRanges::Type>(i);
6138 comm->atomRanges.end(range) - comm->atomRanges.start(range);
6143 void reset_dd_statistics_counters(gmx_domdec_t *dd)
6145 gmx_domdec_comm_t *comm = dd->comm;
6147 /* Reset all the statistics and counters for total run counting */
6148 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6150 comm->sum_nat[i] = 0;
6154 comm->load_step = 0;
6157 clear_ivec(comm->load_lim);
6162 void print_dd_statistics(const t_commrec *cr, const t_inputrec *ir, FILE *fplog)
6164 gmx_domdec_comm_t *comm = cr->dd->comm;
6166 const int numRanges = static_cast<int>(DDAtomRanges::Type::Number);
6167 gmx_sumd(numRanges, comm->sum_nat, cr);
6169 if (fplog == nullptr)
6174 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
6176 for (int i = static_cast<int>(DDAtomRanges::Type::Zones); i < numRanges; i++)
6178 auto range = static_cast<DDAtomRanges::Type>(i);
6179 double av = comm->sum_nat[i]/comm->ndecomp;
6182 case DDAtomRanges::Type::Zones:
6184 " av. #atoms communicated per step for force: %d x %.1f\n",
6187 case DDAtomRanges::Type::Vsites:
6188 if (cr->dd->vsite_comm)
6191 " av. #atoms communicated per step for vsites: %d x %.1f\n",
6192 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
6196 case DDAtomRanges::Type::Constraints:
6197 if (cr->dd->constraint_comm)
6200 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
6201 1 + ir->nLincsIter, av);
6205 gmx_incons(" Unknown type for DD statistics");
6208 fprintf(fplog, "\n");
6210 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
6212 print_dd_load_av(fplog, cr->dd);
6216 void dd_partition_system(FILE *fplog,
6218 const t_commrec *cr,
6219 gmx_bool bMasterState,
6221 t_state *state_global,
6222 const gmx_mtop_t *top_global,
6223 const t_inputrec *ir,
6224 gmx_enfrot *enforcedRotation,
6225 t_state *state_local,
6226 PaddedRVecVector *f,
6227 gmx::MDAtoms *mdAtoms,
6228 gmx_localtop_t *top_local,
6231 gmx::Constraints *constr,
6233 gmx_wallcycle *wcycle,
6237 gmx_domdec_comm_t *comm;
6238 gmx_ddbox_t ddbox = {0};
6240 int64_t step_pcoupl;
6241 rvec cell_ns_x0, cell_ns_x1;
6242 int ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
6243 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckWhetherToTurnDlbOn, bLogLoad;
6244 gmx_bool bRedist, bSortCG, bResortAll;
6245 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
6249 wallcycle_start(wcycle, ewcDOMDEC);
6254 // TODO if the update code becomes accessible here, use
6255 // upd->deform for this logic.
6256 bBoxChanged = (bMasterState || inputrecDeform(ir));
6257 if (ir->epc != epcNO)
6259 /* With nstpcouple > 1 pressure coupling happens.
6260 * one step after calculating the pressure.
6261 * Box scaling happens at the end of the MD step,
6262 * after the DD partitioning.
6263 * We therefore have to do DLB in the first partitioning
6264 * after an MD step where P-coupling occurred.
6265 * We need to determine the last step in which p-coupling occurred.
6266 * MRS -- need to validate this for vv?
6268 int n = ir->nstpcouple;
6271 step_pcoupl = step - 1;
6275 step_pcoupl = ((step - 1)/n)*n + 1;
6277 if (step_pcoupl >= comm->partition_step)
6283 bNStGlobalComm = (step % nstglobalcomm == 0);
6291 /* Should we do dynamic load balacing this step?
6292 * Since it requires (possibly expensive) global communication,
6293 * we might want to do DLB less frequently.
6295 if (bBoxChanged || ir->epc != epcNO)
6297 bDoDLB = bBoxChanged;
6301 bDoDLB = bNStGlobalComm;
6305 /* Check if we have recorded loads on the nodes */
6306 if (comm->bRecordLoad && dd_load_count(comm) > 0)
6308 bCheckWhetherToTurnDlbOn = dd_dlb_get_should_check_whether_to_turn_dlb_on(dd);
6310 /* Print load every nstlog, first and last step to the log file */
6311 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
6312 comm->n_load_collect == 0 ||
6314 (step + ir->nstlist > ir->init_step + ir->nsteps)));
6316 /* Avoid extra communication due to verbose screen output
6317 * when nstglobalcomm is set.
6319 if (bDoDLB || bLogLoad || bCheckWhetherToTurnDlbOn ||
6320 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
6322 get_load_distribution(dd, wcycle);
6327 dd_print_load(fplog, dd, step-1);
6331 dd_print_load_verbose(dd);
6334 comm->n_load_collect++;
6340 /* Add the measured cycles to the running average */
6341 const float averageFactor = 0.1f;
6342 comm->cyclesPerStepDlbExpAverage =
6343 (1 - averageFactor)*comm->cyclesPerStepDlbExpAverage +
6344 averageFactor*comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
6346 if (comm->dlbState == edlbsOnCanTurnOff &&
6347 dd->comm->n_load_have % c_checkTurnDlbOffInterval == c_checkTurnDlbOffInterval - 1)
6349 gmx_bool turnOffDlb;
6352 /* If the running averaged cycles with DLB are more
6353 * than before we turned on DLB, turn off DLB.
6354 * We will again run and check the cycles without DLB
6355 * and we can then decide if to turn off DLB forever.
6357 turnOffDlb = (comm->cyclesPerStepDlbExpAverage >
6358 comm->cyclesPerStepBeforeDLB);
6360 dd_bcast(dd, sizeof(turnOffDlb), &turnOffDlb);
6363 /* To turn off DLB, we need to redistribute the atoms */
6364 dd_collect_state(dd, state_local, state_global);
6365 bMasterState = TRUE;
6366 turn_off_dlb(fplog, cr, step);
6370 else if (bCheckWhetherToTurnDlbOn)
6372 gmx_bool turnOffDlbForever = FALSE;
6373 gmx_bool turnOnDlb = FALSE;
6375 /* Since the timings are node dependent, the master decides */
6378 /* If we recently turned off DLB, we want to check if
6379 * performance is better without DLB. We want to do this
6380 * ASAP to minimize the chance that external factors
6381 * slowed down the DLB step are gone here and we
6382 * incorrectly conclude that DLB was causing the slowdown.
6383 * So we measure one nstlist block, no running average.
6385 if (comm->haveTurnedOffDlb &&
6386 comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep] <
6387 comm->cyclesPerStepDlbExpAverage)
6389 /* After turning off DLB we ran nstlist steps in fewer
6390 * cycles than with DLB. This likely means that DLB
6391 * in not benefical, but this could be due to a one
6392 * time unlucky fluctuation, so we require two such
6393 * observations in close succession to turn off DLB
6396 if (comm->dlbSlowerPartitioningCount > 0 &&
6397 dd->ddp_count < comm->dlbSlowerPartitioningCount + 10*c_checkTurnDlbOnInterval)
6399 turnOffDlbForever = TRUE;
6401 comm->haveTurnedOffDlb = false;
6402 /* Register when we last measured DLB slowdown */
6403 comm->dlbSlowerPartitioningCount = dd->ddp_count;
6407 /* Here we check if the max PME rank load is more than 0.98
6408 * the max PP force load. If so, PP DLB will not help,
6409 * since we are (almost) limited by PME. Furthermore,
6410 * DLB will cause a significant extra x/f redistribution
6411 * cost on the PME ranks, which will then surely result
6412 * in lower total performance.
6414 if (cr->npmenodes > 0 &&
6415 dd_pme_f_ratio(dd) > 1 - DD_PERF_LOSS_DLB_ON)
6421 turnOnDlb = (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
6427 gmx_bool turnOffDlbForever;
6431 turnOffDlbForever, turnOnDlb
6433 dd_bcast(dd, sizeof(bools), &bools);
6434 if (bools.turnOffDlbForever)
6436 turn_off_dlb_forever(fplog, cr, step);
6438 else if (bools.turnOnDlb)
6440 turn_on_dlb(fplog, cr, step);
6445 comm->n_load_have++;
6448 cgs_gl = &comm->cgs_gl;
6453 /* Clear the old state */
6454 clearDDStateIndices(dd, 0, 0);
6457 auto xGlobal = positionsFromStatePointer(state_global);
6459 set_ddbox(dd, true, ir,
6460 DDMASTER(dd) ? state_global->box : nullptr,
6464 distributeState(fplog, dd, state_global, ddbox, state_local, f);
6466 dd_make_local_cgs(dd, &top_local->cgs);
6468 /* Ensure that we have space for the new distribution */
6469 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
6471 if (fr->cutoff_scheme == ecutsGROUP)
6473 calc_cgcm(fplog, 0, dd->ncg_home,
6474 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
6477 inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
6479 dd_set_cginfo(dd->globalAtomGroupIndices, 0, dd->ncg_home, fr, comm->bLocalCG);
6481 else if (state_local->ddp_count != dd->ddp_count)
6483 if (state_local->ddp_count > dd->ddp_count)
6485 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%ld)", state_local->ddp_count, dd->ddp_count);
6488 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
6490 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
6493 /* Clear the old state */
6494 clearDDStateIndices(dd, 0, 0);
6496 /* Restore the atom group indices from state_local */
6497 restoreAtomGroups(dd, cgs_gl->index, state_local);
6498 make_dd_indices(dd, cgs_gl->index, 0);
6499 ncgindex_set = dd->ncg_home;
6501 if (fr->cutoff_scheme == ecutsGROUP)
6503 /* Redetermine the cg COMs */
6504 calc_cgcm(fplog, 0, dd->ncg_home,
6505 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
6508 inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
6510 dd_set_cginfo(dd->globalAtomGroupIndices, 0, dd->ncg_home, fr, comm->bLocalCG);
6512 set_ddbox(dd, bMasterState, ir, state_local->box,
6513 true, state_local->x, &ddbox);
6515 bRedist = isDlbOn(comm);
6519 /* We have the full state, only redistribute the cgs */
6521 /* Clear the non-home indices */
6522 clearDDStateIndices(dd, dd->ncg_home, comm->atomRanges.numHomeAtoms());
6525 /* Avoid global communication for dim's without pbc and -gcom */
6526 if (!bNStGlobalComm)
6528 copy_rvec(comm->box0, ddbox.box0 );
6529 copy_rvec(comm->box_size, ddbox.box_size);
6531 set_ddbox(dd, bMasterState, ir, state_local->box,
6532 bNStGlobalComm, state_local->x, &ddbox);
6537 /* For dim's without pbc and -gcom */
6538 copy_rvec(ddbox.box0, comm->box0 );
6539 copy_rvec(ddbox.box_size, comm->box_size);
6541 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
6544 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
6546 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
6549 /* Check if we should sort the charge groups */
6550 bSortCG = (bMasterState || bRedist);
6552 ncg_home_old = dd->ncg_home;
6554 /* When repartitioning we mark charge groups that will move to neighboring
6555 * DD cells, but we do not move them right away for performance reasons.
6556 * Thus we need to keep track of how many charge groups will move for
6557 * obtaining correct local charge group / atom counts.
6562 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
6564 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
6566 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
6568 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
6571 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
6573 &comm->cell_x0, &comm->cell_x1,
6574 dd->ncg_home, fr->cg_cm,
6575 cell_ns_x0, cell_ns_x1, &grid_density);
6579 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
6582 switch (fr->cutoff_scheme)
6585 copy_ivec(fr->ns->grid->n, ncells_old);
6586 grid_first(fplog, fr->ns->grid, dd, &ddbox,
6587 state_local->box, cell_ns_x0, cell_ns_x1,
6588 fr->rlist, grid_density);
6591 nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_old[XX], &ncells_old[YY]);
6594 gmx_incons("unimplemented");
6596 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
6597 copy_ivec(ddbox.tric_dir, comm->tric_dir);
6601 wallcycle_sub_start(wcycle, ewcsDD_GRID);
6603 /* Sort the state on charge group position.
6604 * This enables exact restarts from this step.
6605 * It also improves performance by about 15% with larger numbers
6606 * of atoms per node.
6609 /* Fill the ns grid with the home cell,
6610 * so we can sort with the indices.
6612 set_zones_ncg_home(dd);
6614 switch (fr->cutoff_scheme)
6617 set_zones_size(dd, state_local->box, &ddbox, 0, 1, ncg_moved);
6619 nbnxn_put_on_grid(fr->nbv->nbs.get(), fr->ePBC, state_local->box,
6621 comm->zones.size[0].bb_x0,
6622 comm->zones.size[0].bb_x1,
6624 comm->zones.dens_zone0,
6626 as_rvec_array(state_local->x.data()),
6627 ncg_moved, bRedist ? comm->movedBuffer.data() : nullptr,
6628 fr->nbv->grp[eintLocal].kernel_type,
6631 nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_new[XX], &ncells_new[YY]);
6634 fill_grid(&comm->zones, fr->ns->grid, dd->ncg_home,
6635 0, dd->ncg_home, fr->cg_cm);
6637 copy_ivec(fr->ns->grid->n, ncells_new);
6640 gmx_incons("unimplemented");
6643 bResortAll = bMasterState;
6645 /* Check if we can user the old order and ns grid cell indices
6646 * of the charge groups to sort the charge groups efficiently.
6648 if (ncells_new[XX] != ncells_old[XX] ||
6649 ncells_new[YY] != ncells_old[YY] ||
6650 ncells_new[ZZ] != ncells_old[ZZ])
6657 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
6658 gmx_step_str(step, sbuf), dd->ncg_home);
6660 dd_sort_state(dd, fr->cg_cm, fr, state_local,
6661 bResortAll ? -1 : ncg_home_old);
6663 /* After sorting and compacting we set the correct size */
6664 dd_resize_state(state_local, f, comm->atomRanges.numHomeAtoms());
6666 /* Rebuild all the indices */
6667 ga2la_clear(dd->ga2la);
6670 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
6673 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
6675 /* Setup up the communication and communicate the coordinates */
6676 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
6678 /* Set the indices */
6679 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
6681 /* Set the charge group boundaries for neighbor searching */
6682 set_cg_boundaries(&comm->zones);
6684 if (fr->cutoff_scheme == ecutsVERLET)
6686 /* When bSortCG=true, we have already set the size for zone 0 */
6687 set_zones_size(dd, state_local->box, &ddbox,
6688 bSortCG ? 1 : 0, comm->zones.n,
6692 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
6695 write_dd_pdb("dd_home",step,"dump",top_global,cr,
6696 -1,as_rvec_array(state_local->x.data()),state_local->box);
6699 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
6701 /* Extract a local topology from the global topology */
6702 for (int i = 0; i < dd->ndim; i++)
6704 np[dd->dim[i]] = comm->cd[i].numPulses();
6706 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
6707 comm->cellsize_min, np,
6709 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : as_rvec_array(state_local->x.data()),
6710 vsite, top_global, top_local);
6712 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
6714 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
6716 /* Set up the special atom communication */
6717 int n = comm->atomRanges.end(DDAtomRanges::Type::Zones);
6718 for (int i = static_cast<int>(DDAtomRanges::Type::Zones) + 1; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6720 auto range = static_cast<DDAtomRanges::Type>(i);
6723 case DDAtomRanges::Type::Vsites:
6724 if (vsite && vsite->n_intercg_vsite)
6726 n = dd_make_local_vsites(dd, n, top_local->idef.il);
6729 case DDAtomRanges::Type::Constraints:
6730 if (dd->bInterCGcons || dd->bInterCGsettles)
6732 /* Only for inter-cg constraints we need special code */
6733 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
6734 constr, ir->nProjOrder,
6735 top_local->idef.il);
6739 gmx_incons("Unknown special atom type setup");
6741 comm->atomRanges.setEnd(range, n);
6744 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
6746 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
6748 /* Make space for the extra coordinates for virtual site
6749 * or constraint communication.
6751 state_local->natoms = comm->atomRanges.numAtomsTotal();
6753 dd_resize_state(state_local, f, state_local->natoms);
6755 if (fr->haveDirectVirialContributions)
6757 if (vsite && vsite->n_intercg_vsite)
6759 nat_f_novirsum = comm->atomRanges.end(DDAtomRanges::Type::Vsites);
6763 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
6765 nat_f_novirsum = comm->atomRanges.end(DDAtomRanges::Type::Zones);
6769 nat_f_novirsum = comm->atomRanges.numHomeAtoms();
6778 /* Set the number of atoms required for the force calculation.
6779 * Forces need to be constrained when doing energy
6780 * minimization. For simple simulations we could avoid some
6781 * allocation, zeroing and copying, but this is probably not worth
6782 * the complications and checking.
6784 forcerec_set_ranges(fr, dd->ncg_home, dd->globalAtomGroupIndices.size(),
6785 comm->atomRanges.end(DDAtomRanges::Type::Zones),
6786 comm->atomRanges.end(DDAtomRanges::Type::Constraints),
6789 /* Update atom data for mdatoms and several algorithms */
6790 mdAlgorithmsSetupAtomData(cr, ir, top_global, top_local, fr,
6791 nullptr, mdAtoms, constr, vsite, nullptr);
6793 auto mdatoms = mdAtoms->mdatoms();
6794 if (!thisRankHasDuty(cr, DUTY_PME))
6796 /* Send the charges and/or c6/sigmas to our PME only node */
6797 gmx_pme_send_parameters(cr,
6799 mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
6800 mdatoms->chargeA, mdatoms->chargeB,
6801 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
6802 mdatoms->sigmaA, mdatoms->sigmaB,
6803 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
6808 /* Update the local pull groups */
6809 dd_make_local_pull_groups(cr, ir->pull_work);
6814 /* Update the local rotation groups */
6815 dd_make_local_rotation_groups(dd, enforcedRotation);
6818 if (dd->atomSets != nullptr)
6820 /* Update the local atom sets */
6821 dd->atomSets->setIndicesInDomainDecomposition(*(dd->ga2la));
6824 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
6825 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
6827 add_dd_statistics(dd);
6829 /* Make sure we only count the cycles for this DD partitioning */
6830 clear_dd_cycle_counts(dd);
6832 /* Because the order of the atoms might have changed since
6833 * the last vsite construction, we need to communicate the constructing
6834 * atom coordinates again (for spreading the forces this MD step).
6836 dd_move_x_vsites(dd, state_local->box, as_rvec_array(state_local->x.data()));
6838 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
6840 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
6842 dd_move_x(dd, state_local->box, state_local->x, nullWallcycle);
6843 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
6844 -1, as_rvec_array(state_local->x.data()), state_local->box);
6847 /* Store the partitioning step */
6848 comm->partition_step = step;
6850 /* Increase the DD partitioning counter */
6852 /* The state currently matches this DD partitioning count, store it */
6853 state_local->ddp_count = dd->ddp_count;
6856 /* The DD master node knows the complete cg distribution,
6857 * store the count so we can possibly skip the cg info communication.
6859 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
6862 if (comm->DD_debug > 0)
6864 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
6865 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
6866 "after partitioning");
6869 wallcycle_stop(wcycle, ewcDOMDEC);
6872 /*! \brief Check whether bonded interactions are missing, if appropriate */
6873 void checkNumberOfBondedInteractions(FILE *fplog,
6875 int totalNumberOfBondedInteractions,
6876 const gmx_mtop_t *top_global,
6877 const gmx_localtop_t *top_local,
6878 const t_state *state,
6879 bool *shouldCheckNumberOfBondedInteractions)
6881 if (*shouldCheckNumberOfBondedInteractions)
6883 if (totalNumberOfBondedInteractions != cr->dd->nbonded_global)
6885 dd_print_missing_interactions(fplog, cr, totalNumberOfBondedInteractions, top_global, top_local, state); // Does not return
6887 *shouldCheckNumberOfBondedInteractions = false;