2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
51 #include "gromacs/compat/make_unique.h"
52 #include "gromacs/domdec/collect.h"
53 #include "gromacs/domdec/dlbtiming.h"
54 #include "gromacs/domdec/domdec_network.h"
55 #include "gromacs/domdec/ga2la.h"
56 #include "gromacs/domdec/localatomsetmanager.h"
57 #include "gromacs/ewald/pme.h"
58 #include "gromacs/fileio/gmxfio.h"
59 #include "gromacs/fileio/pdbio.h"
60 #include "gromacs/gmxlib/chargegroup.h"
61 #include "gromacs/gmxlib/network.h"
62 #include "gromacs/gmxlib/nrnb.h"
63 #include "gromacs/gpu_utils/gpu_utils.h"
64 #include "gromacs/hardware/hw_info.h"
65 #include "gromacs/imd/imd.h"
66 #include "gromacs/listed-forces/manage-threading.h"
67 #include "gromacs/math/functions.h"
68 #include "gromacs/math/vec.h"
69 #include "gromacs/math/vectypes.h"
70 #include "gromacs/mdlib/constr.h"
71 #include "gromacs/mdlib/constraintrange.h"
72 #include "gromacs/mdlib/forcerec.h"
73 #include "gromacs/mdlib/gmx_omp_nthreads.h"
74 #include "gromacs/mdlib/lincs.h"
75 #include "gromacs/mdlib/mdatoms.h"
76 #include "gromacs/mdlib/mdrun.h"
77 #include "gromacs/mdlib/mdsetup.h"
78 #include "gromacs/mdlib/nb_verlet.h"
79 #include "gromacs/mdlib/nbnxn_grid.h"
80 #include "gromacs/mdlib/nsgrid.h"
81 #include "gromacs/mdlib/vsite.h"
82 #include "gromacs/mdtypes/commrec.h"
83 #include "gromacs/mdtypes/df_history.h"
84 #include "gromacs/mdtypes/forcerec.h"
85 #include "gromacs/mdtypes/inputrec.h"
86 #include "gromacs/mdtypes/md_enums.h"
87 #include "gromacs/mdtypes/mdatom.h"
88 #include "gromacs/mdtypes/nblist.h"
89 #include "gromacs/mdtypes/state.h"
90 #include "gromacs/pbcutil/ishift.h"
91 #include "gromacs/pbcutil/pbc.h"
92 #include "gromacs/pulling/pull.h"
93 #include "gromacs/timing/wallcycle.h"
94 #include "gromacs/topology/block.h"
95 #include "gromacs/topology/idef.h"
96 #include "gromacs/topology/ifunc.h"
97 #include "gromacs/topology/mtop_lookup.h"
98 #include "gromacs/topology/mtop_util.h"
99 #include "gromacs/topology/topology.h"
100 #include "gromacs/utility/basedefinitions.h"
101 #include "gromacs/utility/basenetwork.h"
102 #include "gromacs/utility/cstringutil.h"
103 #include "gromacs/utility/exceptions.h"
104 #include "gromacs/utility/fatalerror.h"
105 #include "gromacs/utility/gmxmpi.h"
106 #include "gromacs/utility/real.h"
107 #include "gromacs/utility/smalloc.h"
108 #include "gromacs/utility/strconvert.h"
109 #include "gromacs/utility/stringutil.h"
111 #include "atomdistribution.h"
112 #include "cellsizes.h"
113 #include "distribute.h"
114 #include "domdec_constraints.h"
115 #include "domdec_internal.h"
116 #include "domdec_vsite.h"
117 #include "redistribute.h"
120 #define DD_NLOAD_MAX 9
122 static const char *edlbs_names[edlbsNR] = { "off", "auto", "locked", "on", "on" };
124 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
127 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
128 #define DD_FLAG_NRCG 65535
129 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
130 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
132 /* The DD zone order */
133 static const ivec dd_zo[DD_MAXZONE] =
134 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
136 /* The non-bonded zone-pair setup for domain decomposition
137 * The first number is the i-zone, the second number the first j-zone seen by
138 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
139 * As is, this is for 3D decomposition, where there are 4 i-zones.
140 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
141 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
144 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
149 /* Turn on DLB when the load imbalance causes this amount of total loss.
150 * There is a bit of overhead with DLB and it's difficult to achieve
151 * a load imbalance of less than 2% with DLB.
153 #define DD_PERF_LOSS_DLB_ON 0.02
155 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
156 #define DD_PERF_LOSS_WARN 0.05
159 /* We check if to turn on DLB at the first and every 100 DD partitionings.
160 * With large imbalance DLB will turn on at the first step, so we can
161 * make the interval so large that the MPI overhead of the check is negligible.
163 static const int c_checkTurnDlbOnInterval = 100;
164 /* We need to check if DLB results in worse performance and then turn it off.
165 * We check this more often then for turning DLB on, because the DLB can scale
166 * the domains very rapidly, so if unlucky the load imbalance can go up quickly
167 * and furthermore, we are already synchronizing often with DLB, so
168 * the overhead of the MPI Bcast is not that high.
170 static const int c_checkTurnDlbOffInterval = 20;
172 /* Forward declaration */
173 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue);
177 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
179 static void index2xyz(ivec nc,int ind,ivec xyz)
181 xyz[XX] = ind % nc[XX];
182 xyz[YY] = (ind / nc[XX]) % nc[YY];
183 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
187 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
189 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
190 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
191 xyz[ZZ] = ind % nc[ZZ];
194 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
199 ddindex = dd_index(dd->nc, c);
200 if (dd->comm->bCartesianPP_PME)
202 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
204 else if (dd->comm->bCartesianPP)
207 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
218 static gmx_bool dynamic_dd_box(const gmx_ddbox_t *ddbox, const t_inputrec *ir)
220 return (ddbox->nboundeddim < DIM || inputrecDynamicBox(ir));
223 int ddglatnr(const gmx_domdec_t *dd, int i)
233 if (i >= dd->comm->atomRanges.numAtomsTotal())
235 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
237 atnr = dd->globalAtomIndices[i] + 1;
243 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
245 return &dd->comm->cgs_gl;
248 void dd_store_state(gmx_domdec_t *dd, t_state *state)
252 if (state->ddp_count != dd->ddp_count)
254 gmx_incons("The MD state does not match the domain decomposition state");
257 state->cg_gl.resize(dd->ncg_home);
258 for (i = 0; i < dd->ncg_home; i++)
260 state->cg_gl[i] = dd->globalAtomGroupIndices[i];
263 state->ddp_count_cg_gl = dd->ddp_count;
266 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
268 return &dd->comm->zones;
271 void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
272 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
274 gmx_domdec_zones_t *zones;
277 zones = &dd->comm->zones;
280 while (icg >= zones->izone[izone].cg1)
289 else if (izone < zones->nizone)
291 *jcg0 = zones->izone[izone].jcg0;
295 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
296 icg, izone, zones->nizone);
299 *jcg1 = zones->izone[izone].jcg1;
301 for (d = 0; d < dd->ndim; d++)
304 shift0[dim] = zones->izone[izone].shift0[dim];
305 shift1[dim] = zones->izone[izone].shift1[dim];
306 if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
308 /* A conservative approach, this can be optimized */
315 int dd_numHomeAtoms(const gmx_domdec_t &dd)
317 return dd.comm->atomRanges.numHomeAtoms();
320 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
322 /* We currently set mdatoms entries for all atoms:
323 * local + non-local + communicated for vsite + constraints
326 return dd->comm->atomRanges.numAtomsTotal();
329 int dd_natoms_vsite(const gmx_domdec_t *dd)
331 return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
334 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
336 *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
337 *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
340 void dd_move_x(gmx_domdec_t *dd,
342 gmx::ArrayRef<gmx::RVec> x,
343 gmx_wallcycle *wcycle)
345 wallcycle_start(wcycle, ewcMOVEX);
348 gmx_domdec_comm_t *comm;
349 gmx_domdec_comm_dim_t *cd;
350 rvec shift = {0, 0, 0};
351 gmx_bool bPBC, bScrew;
355 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
358 nat_tot = comm->atomRanges.numHomeAtoms();
359 for (int d = 0; d < dd->ndim; d++)
361 bPBC = (dd->ci[dd->dim[d]] == 0);
362 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
365 copy_rvec(box[dd->dim[d]], shift);
368 for (const gmx_domdec_ind_t &ind : cd->ind)
370 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
371 gmx::ArrayRef<gmx::RVec> &sendBuffer = sendBufferAccess.buffer;
375 for (int g : ind.index)
377 for (int j : atomGrouping.block(g))
379 sendBuffer[n] = x[j];
386 for (int g : ind.index)
388 for (int j : atomGrouping.block(g))
390 /* We need to shift the coordinates */
391 for (int d = 0; d < DIM; d++)
393 sendBuffer[n][d] = x[j][d] + shift[d];
401 for (int g : ind.index)
403 for (int j : atomGrouping.block(g))
406 sendBuffer[n][XX] = x[j][XX] + shift[XX];
408 * This operation requires a special shift force
409 * treatment, which is performed in calc_vir.
411 sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
412 sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
418 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
420 gmx::ArrayRef<gmx::RVec> receiveBuffer;
421 if (cd->receiveInPlace)
423 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
427 receiveBuffer = receiveBufferAccess.buffer;
429 /* Send and receive the coordinates */
430 ddSendrecv(dd, d, dddirBackward,
431 sendBuffer, receiveBuffer);
433 if (!cd->receiveInPlace)
436 for (int zone = 0; zone < nzone; zone++)
438 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
440 x[i] = receiveBuffer[j++];
444 nat_tot += ind.nrecv[nzone+1];
449 wallcycle_stop(wcycle, ewcMOVEX);
452 void dd_move_f(gmx_domdec_t *dd,
453 gmx::ArrayRef<gmx::RVec> f,
455 gmx_wallcycle *wcycle)
457 wallcycle_start(wcycle, ewcMOVEF);
460 gmx_domdec_comm_t *comm;
461 gmx_domdec_comm_dim_t *cd;
464 gmx_bool bShiftForcesNeedPbc, bScrew;
468 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
470 nzone = comm->zones.n/2;
471 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
472 for (int d = dd->ndim-1; d >= 0; d--)
474 /* Only forces in domains near the PBC boundaries need to
475 consider PBC in the treatment of fshift */
476 bShiftForcesNeedPbc = (dd->ci[dd->dim[d]] == 0);
477 bScrew = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
478 if (fshift == nullptr && !bScrew)
480 bShiftForcesNeedPbc = FALSE;
482 /* Determine which shift vector we need */
488 for (int p = cd->numPulses() - 1; p >= 0; p--)
490 const gmx_domdec_ind_t &ind = cd->ind[p];
491 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
492 gmx::ArrayRef<gmx::RVec> &receiveBuffer = receiveBufferAccess.buffer;
494 nat_tot -= ind.nrecv[nzone+1];
496 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
498 gmx::ArrayRef<gmx::RVec> sendBuffer;
499 if (cd->receiveInPlace)
501 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
505 sendBuffer = sendBufferAccess.buffer;
507 for (int zone = 0; zone < nzone; zone++)
509 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
511 sendBuffer[j++] = f[i];
515 /* Communicate the forces */
516 ddSendrecv(dd, d, dddirForward,
517 sendBuffer, receiveBuffer);
518 /* Add the received forces */
520 if (!bShiftForcesNeedPbc)
522 for (int g : ind.index)
524 for (int j : atomGrouping.block(g))
526 for (int d = 0; d < DIM; d++)
528 f[j][d] += receiveBuffer[n][d];
536 /* fshift should always be defined if this function is
537 * called when bShiftForcesNeedPbc is true */
538 assert(nullptr != fshift);
539 for (int g : ind.index)
541 for (int j : atomGrouping.block(g))
543 for (int d = 0; d < DIM; d++)
545 f[j][d] += receiveBuffer[n][d];
547 /* Add this force to the shift force */
548 for (int d = 0; d < DIM; d++)
550 fshift[is][d] += receiveBuffer[n][d];
558 for (int g : ind.index)
560 for (int j : atomGrouping.block(g))
562 /* Rotate the force */
563 f[j][XX] += receiveBuffer[n][XX];
564 f[j][YY] -= receiveBuffer[n][YY];
565 f[j][ZZ] -= receiveBuffer[n][ZZ];
568 /* Add this force to the shift force */
569 for (int d = 0; d < DIM; d++)
571 fshift[is][d] += receiveBuffer[n][d];
581 wallcycle_stop(wcycle, ewcMOVEF);
584 /* Convenience function for extracting a real buffer from an rvec buffer
586 * To reduce the number of temporary communication buffers and avoid
587 * cache polution, we reuse gmx::RVec buffers for storing reals.
588 * This functions return a real buffer reference with the same number
589 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
591 static gmx::ArrayRef<real>
592 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
594 return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
598 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
601 gmx_domdec_comm_t *comm;
602 gmx_domdec_comm_dim_t *cd;
606 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
609 nat_tot = comm->atomRanges.numHomeAtoms();
610 for (int d = 0; d < dd->ndim; d++)
613 for (const gmx_domdec_ind_t &ind : cd->ind)
615 /* Note: We provision for RVec instead of real, so a factor of 3
616 * more than needed. The buffer actually already has this size
617 * and we pass a plain pointer below, so this does not matter.
619 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
620 gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
622 for (int g : ind.index)
624 for (int j : atomGrouping.block(g))
626 sendBuffer[n++] = v[j];
630 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
632 gmx::ArrayRef<real> receiveBuffer;
633 if (cd->receiveInPlace)
635 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
639 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
641 /* Send and receive the data */
642 ddSendrecv(dd, d, dddirBackward,
643 sendBuffer, receiveBuffer);
644 if (!cd->receiveInPlace)
647 for (int zone = 0; zone < nzone; zone++)
649 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
651 v[i] = receiveBuffer[j++];
655 nat_tot += ind.nrecv[nzone+1];
661 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
664 gmx_domdec_comm_t *comm;
665 gmx_domdec_comm_dim_t *cd;
669 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
671 nzone = comm->zones.n/2;
672 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
673 for (int d = dd->ndim-1; d >= 0; d--)
676 for (int p = cd->numPulses() - 1; p >= 0; p--)
678 const gmx_domdec_ind_t &ind = cd->ind[p];
680 /* Note: We provision for RVec instead of real, so a factor of 3
681 * more than needed. The buffer actually already has this size
682 * and we typecast, so this works as intended.
684 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
685 gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
686 nat_tot -= ind.nrecv[nzone + 1];
688 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
690 gmx::ArrayRef<real> sendBuffer;
691 if (cd->receiveInPlace)
693 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
697 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
699 for (int zone = 0; zone < nzone; zone++)
701 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
703 sendBuffer[j++] = v[i];
707 /* Communicate the forces */
708 ddSendrecv(dd, d, dddirForward,
709 sendBuffer, receiveBuffer);
710 /* Add the received forces */
712 for (int g : ind.index)
714 for (int j : atomGrouping.block(g))
716 v[j] += receiveBuffer[n];
725 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
727 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
729 zone->min0, zone->max1,
730 zone->mch0, zone->mch0,
731 zone->p1_0, zone->p1_1);
734 /* Using the home grid size as input in cell_ns_x0 and cell_ns_x1
735 * takes the extremes over all home and remote zones in the halo
736 * and returns the results in cell_ns_x0 and cell_ns_x1.
737 * Note: only used with the group cut-off scheme.
739 static void dd_move_cellx(gmx_domdec_t *dd,
740 const gmx_ddbox_t *ddbox,
744 constexpr int c_ddZoneCommMaxNumZones = 5;
745 gmx_ddzone_t buf_s[c_ddZoneCommMaxNumZones];
746 gmx_ddzone_t buf_r[c_ddZoneCommMaxNumZones];
747 gmx_ddzone_t buf_e[c_ddZoneCommMaxNumZones];
748 gmx_domdec_comm_t *comm = dd->comm;
752 for (int d = 1; d < dd->ndim; d++)
754 int dim = dd->dim[d];
755 gmx_ddzone_t &zp = (d == 1) ? comm->zone_d1[0] : comm->zone_d2[0][0];
757 /* Copy the base sizes of the home zone */
758 zp.min0 = cell_ns_x0[dim];
759 zp.max1 = cell_ns_x1[dim];
760 zp.min1 = cell_ns_x1[dim];
761 zp.mch0 = cell_ns_x0[dim];
762 zp.mch1 = cell_ns_x1[dim];
763 zp.p1_0 = cell_ns_x0[dim];
764 zp.p1_1 = cell_ns_x1[dim];
767 gmx::ArrayRef<DDCellsizesWithDlb> cellsizes = comm->cellsizesWithDlb;
769 /* Loop backward over the dimensions and aggregate the extremes
772 for (int d = dd->ndim - 2; d >= 0; d--)
774 const int dim = dd->dim[d];
775 const bool applyPbc = (dim < ddbox->npbcdim);
777 /* Use an rvec to store two reals */
778 extr_s[d][0] = cellsizes[d + 1].fracLower;
779 extr_s[d][1] = cellsizes[d + 1].fracUpper;
780 extr_s[d][2] = cellsizes[d + 1].fracUpper;
783 GMX_ASSERT(pos < c_ddZoneCommMaxNumZones, "The buffers should be sufficiently large");
784 /* Store the extremes in the backward sending buffer,
785 * so they get updated separately from the forward communication.
787 for (int d1 = d; d1 < dd->ndim-1; d1++)
789 /* We invert the order to be able to use the same loop for buf_e */
790 buf_s[pos].min0 = extr_s[d1][1];
791 buf_s[pos].max1 = extr_s[d1][0];
792 buf_s[pos].min1 = extr_s[d1][2];
795 /* Store the cell corner of the dimension we communicate along */
796 buf_s[pos].p1_0 = comm->cell_x0[dim];
801 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
804 if (dd->ndim == 3 && d == 0)
806 buf_s[pos] = comm->zone_d2[0][1];
808 buf_s[pos] = comm->zone_d1[0];
812 /* We only need to communicate the extremes
813 * in the forward direction
815 int numPulses = comm->cd[d].numPulses();
819 /* Take the minimum to avoid double communication */
820 numPulsesMin = std::min(numPulses, dd->nc[dim] - 1 - numPulses);
824 /* Without PBC we should really not communicate over
825 * the boundaries, but implementing that complicates
826 * the communication setup and therefore we simply
827 * do all communication, but ignore some data.
829 numPulsesMin = numPulses;
831 for (int pulse = 0; pulse < numPulsesMin; pulse++)
833 /* Communicate the extremes forward */
834 bool receiveValidData = (applyPbc || dd->ci[dim] > 0);
836 int numElements = dd->ndim - d - 1;
837 ddSendrecv(dd, d, dddirForward,
838 extr_s + d, numElements,
839 extr_r + d, numElements);
841 if (receiveValidData)
843 for (int d1 = d; d1 < dd->ndim - 1; d1++)
845 extr_s[d1][0] = std::max(extr_s[d1][0], extr_r[d1][0]);
846 extr_s[d1][1] = std::min(extr_s[d1][1], extr_r[d1][1]);
847 extr_s[d1][2] = std::min(extr_s[d1][2], extr_r[d1][2]);
852 const int numElementsInBuffer = pos;
853 for (int pulse = 0; pulse < numPulses; pulse++)
855 /* Communicate all the zone information backward */
856 bool receiveValidData = (applyPbc || dd->ci[dim] < dd->nc[dim] - 1);
858 static_assert(sizeof(gmx_ddzone_t) == c_ddzoneNumReals*sizeof(real), "Here we expect gmx_ddzone_t to consist of c_ddzoneNumReals reals (only)");
860 int numReals = numElementsInBuffer*c_ddzoneNumReals;
861 ddSendrecv(dd, d, dddirBackward,
862 gmx::arrayRefFromArray(&buf_s[0].min0, numReals),
863 gmx::arrayRefFromArray(&buf_r[0].min0, numReals));
868 for (int d1 = d + 1; d1 < dd->ndim; d1++)
870 /* Determine the decrease of maximum required
871 * communication height along d1 due to the distance along d,
872 * this avoids a lot of useless atom communication.
874 real dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
877 if (ddbox->tric_dir[dim])
879 /* c is the off-diagonal coupling between the cell planes
880 * along directions d and d1.
882 c = ddbox->v[dim][dd->dim[d1]][dim];
888 real det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
891 dh[d1] = comm->cutoff - (c*dist_d + std::sqrt(det))/(1 + c*c);
895 /* A negative value signals out of range */
901 /* Accumulate the extremes over all pulses */
902 for (int i = 0; i < numElementsInBuffer; i++)
910 if (receiveValidData)
912 buf_e[i].min0 = std::min(buf_e[i].min0, buf_r[i].min0);
913 buf_e[i].max1 = std::max(buf_e[i].max1, buf_r[i].max1);
914 buf_e[i].min1 = std::min(buf_e[i].min1, buf_r[i].min1);
918 if (dd->ndim == 3 && d == 0 && i == numElementsInBuffer - 1)
926 if (receiveValidData && dh[d1] >= 0)
928 buf_e[i].mch0 = std::max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
929 buf_e[i].mch1 = std::max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
932 /* Copy the received buffer to the send buffer,
933 * to pass the data through with the next pulse.
937 if (((applyPbc || dd->ci[dim] + numPulses < dd->nc[dim]) && pulse == numPulses - 1) ||
938 (!applyPbc && dd->ci[dim] + 1 + pulse == dd->nc[dim] - 1))
940 /* Store the extremes */
943 for (int d1 = d; d1 < dd->ndim-1; d1++)
945 extr_s[d1][1] = std::min(extr_s[d1][1], buf_e[pos].min0);
946 extr_s[d1][0] = std::max(extr_s[d1][0], buf_e[pos].max1);
947 extr_s[d1][2] = std::min(extr_s[d1][2], buf_e[pos].min1);
951 if (d == 1 || (d == 0 && dd->ndim == 3))
953 for (int i = d; i < 2; i++)
955 comm->zone_d2[1-d][i] = buf_e[pos];
961 comm->zone_d1[1] = buf_e[pos];
970 int dim = dd->dim[1];
971 for (int i = 0; i < 2; i++)
975 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
977 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d1[i].min0);
978 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d1[i].max1);
983 int dim = dd->dim[2];
984 for (int i = 0; i < 2; i++)
986 for (int j = 0; j < 2; j++)
990 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
992 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
993 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
997 for (int d = 1; d < dd->ndim; d++)
999 cellsizes[d].fracLowerMax = extr_s[d-1][0];
1000 cellsizes[d].fracUpperMin = extr_s[d-1][1];
1003 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1004 d, cellsizes[d].fracLowerMax, cellsizes[d].fracUpperMin);
1009 static void write_dd_grid_pdb(const char *fn, int64_t step,
1010 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1012 rvec grid_s[2], *grid_r = nullptr, cx, r;
1013 char fname[STRLEN], buf[22];
1015 int a, i, d, z, y, x;
1019 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1020 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1024 snew(grid_r, 2*dd->nnodes);
1027 dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : nullptr);
1031 for (d = 0; d < DIM; d++)
1033 for (i = 0; i < DIM; i++)
1041 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1043 tric[d][i] = box[i][d]/box[i][i];
1052 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1053 out = gmx_fio_fopen(fname, "w");
1054 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1056 for (i = 0; i < dd->nnodes; i++)
1058 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1059 for (d = 0; d < DIM; d++)
1061 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1063 for (z = 0; z < 2; z++)
1065 for (y = 0; y < 2; y++)
1067 for (x = 0; x < 2; x++)
1069 cx[XX] = grid_r[i*2+x][XX];
1070 cx[YY] = grid_r[i*2+y][YY];
1071 cx[ZZ] = grid_r[i*2+z][ZZ];
1073 gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
1074 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
1078 for (d = 0; d < DIM; d++)
1080 for (x = 0; x < 4; x++)
1084 case 0: y = 1 + i*8 + 2*x; break;
1085 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1086 case 2: y = 1 + i*8 + x; break;
1088 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
1092 gmx_fio_fclose(out);
1097 void write_dd_pdb(const char *fn, int64_t step, const char *title,
1098 const gmx_mtop_t *mtop, const t_commrec *cr,
1099 int natoms, const rvec x[], const matrix box)
1101 char fname[STRLEN], buf[22];
1104 const char *atomname, *resname;
1110 natoms = dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
1113 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
1115 out = gmx_fio_fopen(fname, "w");
1117 fprintf(out, "TITLE %s\n", title);
1118 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1120 for (int i = 0; i < natoms; i++)
1122 int ii = dd->globalAtomIndices[i];
1123 mtopGetAtomAndResidueName(mtop, ii, &molb, &atomname, &resnr, &resname, nullptr);
1126 if (i < dd->comm->atomRanges.end(DDAtomRanges::Type::Zones))
1129 while (i >= dd->atomGrouping().subRange(0, dd->comm->zones.cg_range[c + 1]).end())
1135 else if (i < dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites))
1137 b = dd->comm->zones.n;
1141 b = dd->comm->zones.n + 1;
1143 gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
1144 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
1146 fprintf(out, "TER\n");
1148 gmx_fio_fclose(out);
1151 real dd_cutoff_multibody(const gmx_domdec_t *dd)
1153 gmx_domdec_comm_t *comm;
1160 if (comm->bInterCGBondeds)
1162 if (comm->cutoff_mbody > 0)
1164 r = comm->cutoff_mbody;
1168 /* cutoff_mbody=0 means we do not have DLB */
1169 r = comm->cellsize_min[dd->dim[0]];
1170 for (di = 1; di < dd->ndim; di++)
1172 r = std::min(r, comm->cellsize_min[dd->dim[di]]);
1174 if (comm->bBondComm)
1176 r = std::max(r, comm->cutoff_mbody);
1180 r = std::min(r, comm->cutoff);
1188 real dd_cutoff_twobody(const gmx_domdec_t *dd)
1192 r_mb = dd_cutoff_multibody(dd);
1194 return std::max(dd->comm->cutoff, r_mb);
1198 static void dd_cart_coord2pmecoord(const gmx_domdec_t *dd, const ivec coord,
1203 nc = dd->nc[dd->comm->cartpmedim];
1204 ntot = dd->comm->ntot[dd->comm->cartpmedim];
1205 copy_ivec(coord, coord_pme);
1206 coord_pme[dd->comm->cartpmedim] =
1207 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
1210 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
1215 npme = dd->comm->npmenodes;
1217 /* Here we assign a PME node to communicate with this DD node
1218 * by assuming that the major index of both is x.
1219 * We add cr->npmenodes/2 to obtain an even distribution.
1221 return (ddindex*npme + npme/2)/npp;
1224 static int *dd_interleaved_pme_ranks(const gmx_domdec_t *dd)
1229 snew(pme_rank, dd->comm->npmenodes);
1231 for (i = 0; i < dd->nnodes; i++)
1233 p0 = ddindex2pmeindex(dd, i);
1234 p1 = ddindex2pmeindex(dd, i+1);
1235 if (i+1 == dd->nnodes || p1 > p0)
1239 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
1241 pme_rank[n] = i + 1 + n;
1249 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
1257 if (dd->comm->bCartesian) {
1258 gmx_ddindex2xyz(dd->nc,ddindex,coords);
1259 dd_coords2pmecoords(dd,coords,coords_pme);
1260 copy_ivec(dd->ntot,nc);
1261 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1262 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1264 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
1266 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
1272 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
1277 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
1279 gmx_domdec_comm_t *comm;
1281 int ddindex, nodeid = -1;
1283 comm = cr->dd->comm;
1288 if (comm->bCartesianPP_PME)
1291 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
1296 ddindex = dd_index(cr->dd->nc, coords);
1297 if (comm->bCartesianPP)
1299 nodeid = comm->ddindex2simnodeid[ddindex];
1305 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
1317 static int dd_simnode2pmenode(const gmx_domdec_t *dd,
1318 const t_commrec gmx_unused *cr,
1323 const gmx_domdec_comm_t *comm = dd->comm;
1325 /* This assumes a uniform x domain decomposition grid cell size */
1326 if (comm->bCartesianPP_PME)
1329 ivec coord, coord_pme;
1330 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
1331 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1333 /* This is a PP node */
1334 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1335 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
1339 else if (comm->bCartesianPP)
1341 if (sim_nodeid < dd->nnodes)
1343 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1348 /* This assumes DD cells with identical x coordinates
1349 * are numbered sequentially.
1351 if (dd->comm->pmenodes == nullptr)
1353 if (sim_nodeid < dd->nnodes)
1355 /* The DD index equals the nodeid */
1356 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1362 while (sim_nodeid > dd->comm->pmenodes[i])
1366 if (sim_nodeid < dd->comm->pmenodes[i])
1368 pmenode = dd->comm->pmenodes[i];
1376 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
1381 dd->comm->npmenodes_x, dd->comm->npmenodes_y
1392 std::vector<int> get_pme_ddranks(const t_commrec *cr, int pmenodeid)
1396 ivec coord, coord_pme;
1400 std::vector<int> ddranks;
1401 ddranks.reserve((dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
1403 for (x = 0; x < dd->nc[XX]; x++)
1405 for (y = 0; y < dd->nc[YY]; y++)
1407 for (z = 0; z < dd->nc[ZZ]; z++)
1409 if (dd->comm->bCartesianPP_PME)
1414 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1415 if (dd->ci[XX] == coord_pme[XX] &&
1416 dd->ci[YY] == coord_pme[YY] &&
1417 dd->ci[ZZ] == coord_pme[ZZ])
1419 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
1424 /* The slab corresponds to the nodeid in the PME group */
1425 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
1427 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
1436 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd, const t_commrec *cr)
1438 gmx_bool bReceive = TRUE;
1440 if (cr->npmenodes < dd->nnodes)
1442 gmx_domdec_comm_t *comm = dd->comm;
1443 if (comm->bCartesianPP_PME)
1446 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1448 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
1449 coords[comm->cartpmedim]++;
1450 if (coords[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1453 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
1454 if (dd_simnode2pmenode(dd, cr, rank) == pmenode)
1456 /* This is not the last PP node for pmenode */
1461 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
1466 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1467 if (cr->sim_nodeid+1 < cr->nnodes &&
1468 dd_simnode2pmenode(dd, cr, cr->sim_nodeid+1) == pmenode)
1470 /* This is not the last PP node for pmenode */
1479 static void set_zones_ncg_home(gmx_domdec_t *dd)
1481 gmx_domdec_zones_t *zones;
1484 zones = &dd->comm->zones;
1486 zones->cg_range[0] = 0;
1487 for (i = 1; i < zones->n+1; i++)
1489 zones->cg_range[i] = dd->ncg_home;
1491 /* zone_ncg1[0] should always be equal to ncg_home */
1492 dd->comm->zone_ncg1[0] = dd->ncg_home;
1495 static void restoreAtomGroups(gmx_domdec_t *dd,
1496 const int *gcgs_index, const t_state *state)
1498 gmx::ArrayRef<const int> atomGroupsState = state->cg_gl;
1500 std::vector<int> &globalAtomGroupIndices = dd->globalAtomGroupIndices;
1501 gmx::RangePartitioning &atomGrouping = dd->atomGrouping_;
1503 globalAtomGroupIndices.resize(atomGroupsState.size());
1504 atomGrouping.clear();
1506 /* Copy back the global charge group indices from state
1507 * and rebuild the local charge group to atom index.
1509 for (gmx::index i = 0; i < atomGroupsState.size(); i++)
1511 const int atomGroupGlobal = atomGroupsState[i];
1512 const int groupSize = gcgs_index[atomGroupGlobal + 1] - gcgs_index[atomGroupGlobal];
1513 globalAtomGroupIndices[i] = atomGroupGlobal;
1514 atomGrouping.appendBlock(groupSize);
1517 dd->ncg_home = atomGroupsState.size();
1518 dd->comm->atomRanges.setEnd(DDAtomRanges::Type::Home, atomGrouping.fullRange().end());
1520 set_zones_ncg_home(dd);
1523 static void dd_set_cginfo(gmx::ArrayRef<const int> index_gl, int cg0, int cg1,
1524 t_forcerec *fr, char *bLocalCG)
1526 cginfo_mb_t *cginfo_mb;
1532 cginfo_mb = fr->cginfo_mb;
1533 cginfo = fr->cginfo;
1535 for (cg = cg0; cg < cg1; cg++)
1537 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
1541 if (bLocalCG != nullptr)
1543 for (cg = cg0; cg < cg1; cg++)
1545 bLocalCG[index_gl[cg]] = TRUE;
1550 static void make_dd_indices(gmx_domdec_t *dd,
1551 const int *gcgs_index, int cg_start)
1553 const int numZones = dd->comm->zones.n;
1554 const int *zone2cg = dd->comm->zones.cg_range;
1555 const int *zone_ncg1 = dd->comm->zone_ncg1;
1556 gmx::ArrayRef<const int> globalAtomGroupIndices = dd->globalAtomGroupIndices;
1557 const gmx_bool bCGs = dd->comm->bCGs;
1559 std::vector<int> &globalAtomIndices = dd->globalAtomIndices;
1560 gmx_ga2la_t &ga2la = *dd->ga2la;
1562 if (zone2cg[1] != dd->ncg_home)
1564 gmx_incons("dd->ncg_zone is not up to date");
1567 /* Make the local to global and global to local atom index */
1568 int a = dd->atomGrouping().subRange(cg_start, cg_start).begin();
1569 globalAtomIndices.resize(a);
1570 for (int zone = 0; zone < numZones; zone++)
1579 cg0 = zone2cg[zone];
1581 int cg1 = zone2cg[zone+1];
1582 int cg1_p1 = cg0 + zone_ncg1[zone];
1584 for (int cg = cg0; cg < cg1; cg++)
1589 /* Signal that this cg is from more than one pulse away */
1592 int cg_gl = globalAtomGroupIndices[cg];
1595 for (int a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
1597 globalAtomIndices.push_back(a_gl);
1598 ga2la.insert(a_gl, {a, zone1});
1604 globalAtomIndices.push_back(cg_gl);
1605 ga2la.insert(cg_gl, {a, zone1});
1612 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
1616 if (bLocalCG == nullptr)
1620 for (size_t i = 0; i < dd->globalAtomGroupIndices.size(); i++)
1622 if (!bLocalCG[dd->globalAtomGroupIndices[i]])
1625 "DD rank %d, %s: atom group %zu, global atom group %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i + 1, dd->globalAtomGroupIndices[i] + 1, dd->ncg_home);
1630 for (int i = 0; i < ncg_sys; i++)
1637 if (ngl != dd->globalAtomGroupIndices.size())
1639 fprintf(stderr, "DD rank %d, %s: In bLocalCG %zu atom groups are marked as local, whereas there are %zu\n", dd->rank, where, ngl, dd->globalAtomGroupIndices.size());
1646 static void check_index_consistency(gmx_domdec_t *dd,
1647 int natoms_sys, int ncg_sys,
1652 const int numAtomsInZones = dd->comm->atomRanges.end(DDAtomRanges::Type::Zones);
1654 if (dd->comm->DD_debug > 1)
1656 std::vector<int> have(natoms_sys);
1657 for (int a = 0; a < numAtomsInZones; a++)
1659 int globalAtomIndex = dd->globalAtomIndices[a];
1660 if (have[globalAtomIndex] > 0)
1662 fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, globalAtomIndex + 1, have[globalAtomIndex], a+1);
1666 have[globalAtomIndex] = a + 1;
1671 std::vector<int> have(numAtomsInZones);
1674 for (int i = 0; i < natoms_sys; i++)
1676 if (const auto entry = dd->ga2la->find(i))
1678 const int a = entry->la;
1679 if (a >= numAtomsInZones)
1681 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, numAtomsInZones);
1687 if (dd->globalAtomIndices[a] != i)
1689 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->globalAtomIndices[a]+1);
1696 if (ngl != numAtomsInZones)
1699 "DD rank %d, %s: %d global atom indices, %d local atoms\n",
1700 dd->rank, where, ngl, numAtomsInZones);
1702 for (int a = 0; a < numAtomsInZones; a++)
1707 "DD rank %d, %s: local atom %d, global %d has no global index\n",
1708 dd->rank, where, a + 1, dd->globalAtomIndices[a] + 1);
1712 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
1716 gmx_fatal(FARGS, "DD rank %d, %s: %d atom(group) index inconsistencies",
1717 dd->rank, where, nerr);
1721 /* Clear all DD global state indices, starting from \p atomGroupStart and \p atomStart */
1722 static void clearDDStateIndices(gmx_domdec_t *dd,
1726 gmx_ga2la_t &ga2la = *dd->ga2la;
1730 /* Clear the whole list without the overhead of searching */
1735 const int numAtomsInZones = dd->comm->atomRanges.end(DDAtomRanges::Type::Zones);
1736 for (int i = 0; i < numAtomsInZones; i++)
1738 ga2la.erase(dd->globalAtomIndices[i]);
1742 char *bLocalCG = dd->comm->bLocalCG;
1745 for (size_t atomGroup = atomGroupStart; atomGroup < dd->globalAtomGroupIndices.size(); atomGroup++)
1747 bLocalCG[dd->globalAtomGroupIndices[atomGroup]] = FALSE;
1751 dd_clear_local_vsite_indices(dd);
1753 if (dd->constraints)
1755 dd_clear_local_constraint_indices(dd);
1759 static bool check_grid_jump(int64_t step,
1760 const gmx_domdec_t *dd,
1762 const gmx_ddbox_t *ddbox,
1765 gmx_domdec_comm_t *comm = dd->comm;
1766 bool invalid = false;
1768 for (int d = 1; d < dd->ndim; d++)
1770 const DDCellsizesWithDlb &cellsizes = comm->cellsizesWithDlb[d];
1771 const int dim = dd->dim[d];
1772 const real limit = grid_jump_limit(comm, cutoff, d);
1773 real bfac = ddbox->box_size[dim];
1774 if (ddbox->tric_dir[dim])
1776 bfac *= ddbox->skew_fac[dim];
1778 if ((cellsizes.fracUpper - cellsizes.fracLowerMax)*bfac < limit ||
1779 (cellsizes.fracLower - cellsizes.fracUpperMin)*bfac > -limit)
1787 /* This error should never be triggered under normal
1788 * circumstances, but you never know ...
1790 gmx_fatal(FARGS, "step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
1791 gmx_step_str(step, buf),
1792 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1800 static float dd_force_load(gmx_domdec_comm_t *comm)
1807 if (comm->eFlop > 1)
1809 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
1814 load = comm->cycl[ddCyclF];
1815 if (comm->cycl_n[ddCyclF] > 1)
1817 /* Subtract the maximum of the last n cycle counts
1818 * to get rid of possible high counts due to other sources,
1819 * for instance system activity, that would otherwise
1820 * affect the dynamic load balancing.
1822 load -= comm->cycl_max[ddCyclF];
1826 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
1828 float gpu_wait, gpu_wait_sum;
1830 gpu_wait = comm->cycl[ddCyclWaitGPU];
1831 if (comm->cycl_n[ddCyclF] > 1)
1833 /* We should remove the WaitGPU time of the same MD step
1834 * as the one with the maximum F time, since the F time
1835 * and the wait time are not independent.
1836 * Furthermore, the step for the max F time should be chosen
1837 * the same on all ranks that share the same GPU.
1838 * But to keep the code simple, we remove the average instead.
1839 * The main reason for artificially long times at some steps
1840 * is spurious CPU activity or MPI time, so we don't expect
1841 * that changes in the GPU wait time matter a lot here.
1843 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/static_cast<float>(comm->cycl_n[ddCyclF]);
1845 /* Sum the wait times over the ranks that share the same GPU */
1846 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
1847 comm->mpi_comm_gpu_shared);
1848 /* Replace the wait time by the average over the ranks */
1849 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
1857 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
1859 gmx_domdec_comm_t *comm;
1864 snew(*dim_f, dd->nc[dim]+1);
1866 for (i = 1; i < dd->nc[dim]; i++)
1868 if (comm->slb_frac[dim])
1870 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
1874 (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
1877 (*dim_f)[dd->nc[dim]] = 1;
1880 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
1882 int pmeindex, slab, nso, i;
1885 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
1891 ddpme->dim = dimind;
1893 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
1895 ddpme->nslab = (ddpme->dim == 0 ?
1896 dd->comm->npmenodes_x :
1897 dd->comm->npmenodes_y);
1899 if (ddpme->nslab <= 1)
1904 nso = dd->comm->npmenodes/ddpme->nslab;
1905 /* Determine for each PME slab the PP location range for dimension dim */
1906 snew(ddpme->pp_min, ddpme->nslab);
1907 snew(ddpme->pp_max, ddpme->nslab);
1908 for (slab = 0; slab < ddpme->nslab; slab++)
1910 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
1911 ddpme->pp_max[slab] = 0;
1913 for (i = 0; i < dd->nnodes; i++)
1915 ddindex2xyz(dd->nc, i, xyz);
1916 /* For y only use our y/z slab.
1917 * This assumes that the PME x grid size matches the DD grid size.
1919 if (dimind == 0 || xyz[XX] == dd->ci[XX])
1921 pmeindex = ddindex2pmeindex(dd, i);
1924 slab = pmeindex/nso;
1928 slab = pmeindex % ddpme->nslab;
1930 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1931 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1935 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1938 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1940 if (dd->comm->ddpme[0].dim == XX)
1942 return dd->comm->ddpme[0].maxshift;
1950 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1952 if (dd->comm->ddpme[0].dim == YY)
1954 return dd->comm->ddpme[0].maxshift;
1956 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
1958 return dd->comm->ddpme[1].maxshift;
1966 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
1968 rvec cell_ns_x0, rvec cell_ns_x1,
1971 gmx_domdec_comm_t *comm;
1976 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
1978 dim = dd->dim[dim_ind];
1980 /* Without PBC we don't have restrictions on the outer cells */
1981 if (!(dim >= ddbox->npbcdim &&
1982 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
1984 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
1985 comm->cellsize_min[dim])
1988 gmx_fatal(FARGS, "step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
1989 gmx_step_str(step, buf), dim2char(dim),
1990 comm->cell_x1[dim] - comm->cell_x0[dim],
1991 ddbox->skew_fac[dim],
1992 dd->comm->cellsize_min[dim],
1993 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1997 if ((isDlbOn(dd->comm) && dd->ndim > 1) || ddbox->nboundeddim < DIM)
1999 /* Communicate the boundaries and update cell_ns_x0/1 */
2000 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
2001 if (isDlbOn(dd->comm) && dd->ndim > 1)
2003 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
2008 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
2010 /* Note that the cycles value can be incorrect, either 0 or some
2011 * extremely large value, when our thread migrated to another core
2012 * with an unsynchronized cycle counter. If this happens less often
2013 * that once per nstlist steps, this will not cause issues, since
2014 * we later subtract the maximum value from the sum over nstlist steps.
2015 * A zero count will slightly lower the total, but that's a small effect.
2016 * Note that the main purpose of the subtraction of the maximum value
2017 * is to avoid throwing off the load balancing when stalls occur due
2018 * e.g. system activity or network congestion.
2020 dd->comm->cycl[ddCycl] += cycles;
2021 dd->comm->cycl_n[ddCycl]++;
2022 if (cycles > dd->comm->cycl_max[ddCycl])
2024 dd->comm->cycl_max[ddCycl] = cycles;
2028 static double force_flop_count(t_nrnb *nrnb)
2035 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
2037 /* To get closer to the real timings, we half the count
2038 * for the normal loops and again half it for water loops.
2041 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
2043 sum += nrnb->n[i]*0.25*cost_nrnb(i);
2047 sum += nrnb->n[i]*0.50*cost_nrnb(i);
2050 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
2053 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
2055 sum += nrnb->n[i]*cost_nrnb(i);
2058 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
2060 sum += nrnb->n[i]*cost_nrnb(i);
2066 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
2068 if (dd->comm->eFlop)
2070 dd->comm->flop -= force_flop_count(nrnb);
2073 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
2075 if (dd->comm->eFlop)
2077 dd->comm->flop += force_flop_count(nrnb);
2082 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
2086 for (i = 0; i < ddCyclNr; i++)
2088 dd->comm->cycl[i] = 0;
2089 dd->comm->cycl_n[i] = 0;
2090 dd->comm->cycl_max[i] = 0;
2093 dd->comm->flop_n = 0;
2096 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
2098 gmx_domdec_comm_t *comm;
2099 domdec_load_t *load;
2100 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
2105 fprintf(debug, "get_load_distribution start\n");
2108 wallcycle_start(wcycle, ewcDDCOMMLOAD);
2112 bSepPME = (dd->pme_nodeid >= 0);
2114 if (dd->ndim == 0 && bSepPME)
2116 /* Without decomposition, but with PME nodes, we need the load */
2117 comm->load[0].mdf = comm->cycl[ddCyclPPduringPME];
2118 comm->load[0].pme = comm->cycl[ddCyclPME];
2121 for (int d = dd->ndim - 1; d >= 0; d--)
2123 const DDCellsizesWithDlb &cellsizes = comm->cellsizesWithDlb[d];
2124 const int dim = dd->dim[d];
2125 /* Check if we participate in the communication in this dimension */
2126 if (d == dd->ndim-1 ||
2127 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
2129 load = &comm->load[d];
2130 if (isDlbOn(dd->comm))
2132 cell_frac = cellsizes.fracUpper - cellsizes.fracLower;
2135 if (d == dd->ndim-1)
2137 sbuf[pos++] = dd_force_load(comm);
2138 sbuf[pos++] = sbuf[0];
2139 if (isDlbOn(dd->comm))
2141 sbuf[pos++] = sbuf[0];
2142 sbuf[pos++] = cell_frac;
2145 sbuf[pos++] = cellsizes.fracLowerMax;
2146 sbuf[pos++] = cellsizes.fracUpperMin;
2151 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
2152 sbuf[pos++] = comm->cycl[ddCyclPME];
2157 sbuf[pos++] = comm->load[d+1].sum;
2158 sbuf[pos++] = comm->load[d+1].max;
2159 if (isDlbOn(dd->comm))
2161 sbuf[pos++] = comm->load[d+1].sum_m;
2162 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
2163 sbuf[pos++] = comm->load[d+1].flags;
2166 sbuf[pos++] = cellsizes.fracLowerMax;
2167 sbuf[pos++] = cellsizes.fracUpperMin;
2172 sbuf[pos++] = comm->load[d+1].mdf;
2173 sbuf[pos++] = comm->load[d+1].pme;
2177 /* Communicate a row in DD direction d.
2178 * The communicators are setup such that the root always has rank 0.
2181 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
2182 load->load, load->nload*sizeof(float), MPI_BYTE,
2183 0, comm->mpi_comm_load[d]);
2185 if (dd->ci[dim] == dd->master_ci[dim])
2187 /* We are the master along this row, process this row */
2188 RowMaster *rowMaster = nullptr;
2192 rowMaster = cellsizes.rowMaster.get();
2202 for (int i = 0; i < dd->nc[dim]; i++)
2204 load->sum += load->load[pos++];
2205 load->max = std::max(load->max, load->load[pos]);
2207 if (isDlbOn(dd->comm))
2209 if (rowMaster->dlbIsLimited)
2211 /* This direction could not be load balanced properly,
2212 * therefore we need to use the maximum iso the average load.
2214 load->sum_m = std::max(load->sum_m, load->load[pos]);
2218 load->sum_m += load->load[pos];
2221 load->cvol_min = std::min(load->cvol_min, load->load[pos]);
2225 load->flags = static_cast<int>(load->load[pos++] + 0.5);
2229 rowMaster->bounds[i].cellFracLowerMax = load->load[pos++];
2230 rowMaster->bounds[i].cellFracUpperMin = load->load[pos++];
2235 load->mdf = std::max(load->mdf, load->load[pos]);
2237 load->pme = std::max(load->pme, load->load[pos]);
2241 if (isDlbOn(comm) && rowMaster->dlbIsLimited)
2243 load->sum_m *= dd->nc[dim];
2244 load->flags |= (1<<d);
2252 comm->nload += dd_load_count(comm);
2253 comm->load_step += comm->cycl[ddCyclStep];
2254 comm->load_sum += comm->load[0].sum;
2255 comm->load_max += comm->load[0].max;
2258 for (int d = 0; d < dd->ndim; d++)
2260 if (comm->load[0].flags & (1<<d))
2262 comm->load_lim[d]++;
2268 comm->load_mdf += comm->load[0].mdf;
2269 comm->load_pme += comm->load[0].pme;
2273 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
2277 fprintf(debug, "get_load_distribution finished\n");
2281 static float dd_force_load_fraction(gmx_domdec_t *dd)
2283 /* Return the relative performance loss on the total run time
2284 * due to the force calculation load imbalance.
2286 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
2288 return dd->comm->load_sum/(dd->comm->load_step*dd->nnodes);
2296 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
2298 /* Return the relative performance loss on the total run time
2299 * due to the force calculation load imbalance.
2301 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
2304 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
2305 (dd->comm->load_step*dd->nnodes);
2313 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
2315 gmx_domdec_comm_t *comm = dd->comm;
2317 /* Only the master rank prints loads and only if we measured loads */
2318 if (!DDMASTER(dd) || comm->nload == 0)
2324 int numPpRanks = dd->nnodes;
2325 int numPmeRanks = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
2326 int numRanks = numPpRanks + numPmeRanks;
2327 float lossFraction = 0;
2329 /* Print the average load imbalance and performance loss */
2330 if (dd->nnodes > 1 && comm->load_sum > 0)
2332 float imbalance = comm->load_max*numPpRanks/comm->load_sum - 1;
2333 lossFraction = dd_force_imb_perf_loss(dd);
2335 std::string msg = "\n Dynamic load balancing report:\n";
2336 std::string dlbStateStr;
2338 switch (dd->comm->dlbState)
2341 dlbStateStr = "DLB was off during the run per user request.";
2343 case edlbsOffForever:
2344 /* Currectly this can happen due to performance loss observed, cell size
2345 * limitations or incompatibility with other settings observed during
2346 * determineInitialDlbState(). */
2347 dlbStateStr = "DLB got disabled because it was unsuitable to use.";
2349 case edlbsOffCanTurnOn:
2350 dlbStateStr = "DLB was off during the run due to low measured imbalance.";
2352 case edlbsOffTemporarilyLocked:
2353 dlbStateStr = "DLB was locked at the end of the run due to unfinished PP-PME balancing.";
2355 case edlbsOnCanTurnOff:
2356 dlbStateStr = "DLB was turned on during the run due to measured imbalance.";
2359 dlbStateStr = "DLB was permanently on during the run per user request.";
2362 GMX_ASSERT(false, "Undocumented DLB state");
2365 msg += " " + dlbStateStr + "\n";
2366 msg += gmx::formatString(" Average load imbalance: %.1f%%.\n", imbalance*100);
2367 msg += gmx::formatString(" The balanceable part of the MD step is %d%%, load imbalance is computed from this.\n",
2368 static_cast<int>(dd_force_load_fraction(dd)*100 + 0.5));
2369 msg += gmx::formatString(" Part of the total run time spent waiting due to load imbalance: %.1f%%.\n",
2371 fprintf(fplog, "%s", msg.c_str());
2372 fprintf(stderr, "%s", msg.c_str());
2375 /* Print during what percentage of steps the load balancing was limited */
2376 bool dlbWasLimited = false;
2379 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
2380 for (int d = 0; d < dd->ndim; d++)
2382 int limitPercentage = (200*comm->load_lim[d] + 1)/(2*comm->nload);
2383 sprintf(buf+strlen(buf), " %c %d %%",
2384 dim2char(dd->dim[d]), limitPercentage);
2385 if (limitPercentage >= 50)
2387 dlbWasLimited = true;
2390 sprintf(buf + strlen(buf), "\n");
2391 fprintf(fplog, "%s", buf);
2392 fprintf(stderr, "%s", buf);
2395 /* Print the performance loss due to separate PME - PP rank imbalance */
2396 float lossFractionPme = 0;
2397 if (numPmeRanks > 0 && comm->load_mdf > 0 && comm->load_step > 0)
2399 float pmeForceRatio = comm->load_pme/comm->load_mdf;
2400 lossFractionPme = (comm->load_pme - comm->load_mdf)/comm->load_step;
2401 if (lossFractionPme <= 0)
2403 lossFractionPme *= numPmeRanks/static_cast<float>(numRanks);
2407 lossFractionPme *= numPpRanks/static_cast<float>(numRanks);
2409 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pmeForceRatio);
2410 fprintf(fplog, "%s", buf);
2411 fprintf(stderr, "%s", buf);
2412 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", std::fabs(lossFractionPme)*100);
2413 fprintf(fplog, "%s", buf);
2414 fprintf(stderr, "%s", buf);
2416 fprintf(fplog, "\n");
2417 fprintf(stderr, "\n");
2419 if (lossFraction >= DD_PERF_LOSS_WARN)
2422 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
2423 " in the domain decomposition.\n", lossFraction*100);
2426 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
2428 else if (dlbWasLimited)
2430 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
2432 fprintf(fplog, "%s\n", buf);
2433 fprintf(stderr, "%s\n", buf);
2435 if (numPmeRanks > 0 && std::fabs(lossFractionPme) >= DD_PERF_LOSS_WARN)
2438 "NOTE: %.1f %% performance was lost because the PME ranks\n"
2439 " had %s work to do than the PP ranks.\n"
2440 " You might want to %s the number of PME ranks\n"
2441 " or %s the cut-off and the grid spacing.\n",
2442 std::fabs(lossFractionPme*100),
2443 (lossFractionPme < 0) ? "less" : "more",
2444 (lossFractionPme < 0) ? "decrease" : "increase",
2445 (lossFractionPme < 0) ? "decrease" : "increase");
2446 fprintf(fplog, "%s\n", buf);
2447 fprintf(stderr, "%s\n", buf);
2451 static float dd_vol_min(gmx_domdec_t *dd)
2453 return dd->comm->load[0].cvol_min*dd->nnodes;
2456 static int dd_load_flags(gmx_domdec_t *dd)
2458 return dd->comm->load[0].flags;
2461 static float dd_f_imbal(gmx_domdec_t *dd)
2463 if (dd->comm->load[0].sum > 0)
2465 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1.0f;
2469 /* Something is wrong in the cycle counting, report no load imbalance */
2474 float dd_pme_f_ratio(gmx_domdec_t *dd)
2476 /* Should only be called on the DD master rank */
2477 assert(DDMASTER(dd));
2479 if (dd->comm->load[0].mdf > 0 && dd->comm->cycl_n[ddCyclPME] > 0)
2481 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
2489 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, int64_t step)
2494 flags = dd_load_flags(dd);
2498 "DD load balancing is limited by minimum cell size in dimension");
2499 for (d = 0; d < dd->ndim; d++)
2503 fprintf(fplog, " %c", dim2char(dd->dim[d]));
2506 fprintf(fplog, "\n");
2508 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
2509 if (isDlbOn(dd->comm))
2511 fprintf(fplog, " vol min/aver %5.3f%c",
2512 dd_vol_min(dd), flags ? '!' : ' ');
2516 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
2518 if (dd->comm->cycl_n[ddCyclPME])
2520 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
2522 fprintf(fplog, "\n\n");
2525 static void dd_print_load_verbose(gmx_domdec_t *dd)
2527 if (isDlbOn(dd->comm))
2529 fprintf(stderr, "vol %4.2f%c ",
2530 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
2534 fprintf(stderr, "imb F %2d%% ", static_cast<int>(dd_f_imbal(dd)*100+0.5));
2536 if (dd->comm->cycl_n[ddCyclPME])
2538 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
2543 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
2548 gmx_bool bPartOfGroup = FALSE;
2550 dim = dd->dim[dim_ind];
2551 copy_ivec(loc, loc_c);
2552 for (i = 0; i < dd->nc[dim]; i++)
2555 rank = dd_index(dd->nc, loc_c);
2556 if (rank == dd->rank)
2558 /* This process is part of the group */
2559 bPartOfGroup = TRUE;
2562 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
2566 dd->comm->mpi_comm_load[dim_ind] = c_row;
2567 if (!isDlbDisabled(dd->comm))
2569 DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
2571 if (dd->ci[dim] == dd->master_ci[dim])
2573 /* This is the root process of this row */
2574 cellsizes.rowMaster = gmx::compat::make_unique<RowMaster>();
2576 RowMaster &rowMaster = *cellsizes.rowMaster;
2577 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
2578 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
2579 rowMaster.isCellMin.resize(dd->nc[dim]);
2582 rowMaster.bounds.resize(dd->nc[dim]);
2584 rowMaster.buf_ncd.resize(dd->nc[dim]);
2588 /* This is not a root process, we only need to receive cell_f */
2589 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
2592 if (dd->ci[dim] == dd->master_ci[dim])
2594 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
2600 void dd_setup_dlb_resource_sharing(t_commrec *cr,
2604 int physicalnode_id_hash;
2606 MPI_Comm mpi_comm_pp_physicalnode;
2608 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
2610 /* Only ranks with short-ranged tasks (currently) use GPUs.
2611 * If we don't have GPUs assigned, there are no resources to share.
2616 physicalnode_id_hash = gmx_physicalnode_id_hash();
2622 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
2623 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
2624 dd->rank, physicalnode_id_hash, gpu_id);
2626 /* Split the PP communicator over the physical nodes */
2627 /* TODO: See if we should store this (before), as it's also used for
2628 * for the nodecomm summation.
2630 // TODO PhysicalNodeCommunicator could be extended/used to handle
2631 // the need for per-node per-group communicators.
2632 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
2633 &mpi_comm_pp_physicalnode);
2634 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
2635 &dd->comm->mpi_comm_gpu_shared);
2636 MPI_Comm_free(&mpi_comm_pp_physicalnode);
2637 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
2641 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
2644 /* Note that some ranks could share a GPU, while others don't */
2646 if (dd->comm->nrank_gpu_shared == 1)
2648 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
2651 GMX_UNUSED_VALUE(cr);
2652 GMX_UNUSED_VALUE(gpu_id);
2656 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
2659 int dim0, dim1, i, j;
2664 fprintf(debug, "Making load communicators\n");
2667 snew(dd->comm->load, std::max(dd->ndim, 1));
2668 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
2676 make_load_communicator(dd, 0, loc);
2680 for (i = 0; i < dd->nc[dim0]; i++)
2683 make_load_communicator(dd, 1, loc);
2689 for (i = 0; i < dd->nc[dim0]; i++)
2693 for (j = 0; j < dd->nc[dim1]; j++)
2696 make_load_communicator(dd, 2, loc);
2703 fprintf(debug, "Finished making load communicators\n");
2708 /*! \brief Sets up the relation between neighboring domains and zones */
2709 static void setup_neighbor_relations(gmx_domdec_t *dd)
2711 int d, dim, i, j, m;
2713 gmx_domdec_zones_t *zones;
2714 gmx_domdec_ns_ranges_t *izone;
2716 for (d = 0; d < dd->ndim; d++)
2719 copy_ivec(dd->ci, tmp);
2720 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
2721 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
2722 copy_ivec(dd->ci, tmp);
2723 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
2724 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
2727 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
2730 dd->neighbor[d][1]);
2734 int nzone = (1 << dd->ndim);
2735 int nizone = (1 << std::max(dd->ndim - 1, 0));
2736 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
2738 zones = &dd->comm->zones;
2740 for (i = 0; i < nzone; i++)
2743 clear_ivec(zones->shift[i]);
2744 for (d = 0; d < dd->ndim; d++)
2746 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
2751 for (i = 0; i < nzone; i++)
2753 for (d = 0; d < DIM; d++)
2755 s[d] = dd->ci[d] - zones->shift[i][d];
2760 else if (s[d] >= dd->nc[d])
2766 zones->nizone = nizone;
2767 for (i = 0; i < zones->nizone; i++)
2769 assert(ddNonbondedZonePairRanges[i][0] == i);
2771 izone = &zones->izone[i];
2772 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
2773 * j-zones up to nzone.
2775 izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
2776 izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
2777 for (dim = 0; dim < DIM; dim++)
2779 if (dd->nc[dim] == 1)
2781 /* All shifts should be allowed */
2782 izone->shift0[dim] = -1;
2783 izone->shift1[dim] = 1;
2787 /* Determine the min/max j-zone shift wrt the i-zone */
2788 izone->shift0[dim] = 1;
2789 izone->shift1[dim] = -1;
2790 for (j = izone->j0; j < izone->j1; j++)
2792 int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
2793 if (shift_diff < izone->shift0[dim])
2795 izone->shift0[dim] = shift_diff;
2797 if (shift_diff > izone->shift1[dim])
2799 izone->shift1[dim] = shift_diff;
2806 if (!isDlbDisabled(dd->comm))
2808 dd->comm->cellsizesWithDlb.resize(dd->ndim);
2811 if (dd->comm->bRecordLoad)
2813 make_load_communicators(dd);
2817 static void make_pp_communicator(FILE *fplog,
2819 t_commrec gmx_unused *cr,
2820 bool gmx_unused reorder)
2823 gmx_domdec_comm_t *comm;
2830 if (comm->bCartesianPP)
2832 /* Set up cartesian communication for the particle-particle part */
2835 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
2836 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
2839 for (int i = 0; i < DIM; i++)
2843 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
2845 /* We overwrite the old communicator with the new cartesian one */
2846 cr->mpi_comm_mygroup = comm_cart;
2849 dd->mpi_comm_all = cr->mpi_comm_mygroup;
2850 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
2852 if (comm->bCartesianPP_PME)
2854 /* Since we want to use the original cartesian setup for sim,
2855 * and not the one after split, we need to make an index.
2857 snew(comm->ddindex2ddnodeid, dd->nnodes);
2858 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
2859 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
2860 /* Get the rank of the DD master,
2861 * above we made sure that the master node is a PP node.
2871 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
2873 else if (comm->bCartesianPP)
2875 if (cr->npmenodes == 0)
2877 /* The PP communicator is also
2878 * the communicator for this simulation
2880 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
2882 cr->nodeid = dd->rank;
2884 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
2886 /* We need to make an index to go from the coordinates
2887 * to the nodeid of this simulation.
2889 snew(comm->ddindex2simnodeid, dd->nnodes);
2890 snew(buf, dd->nnodes);
2891 if (thisRankHasDuty(cr, DUTY_PP))
2893 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
2895 /* Communicate the ddindex to simulation nodeid index */
2896 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
2897 cr->mpi_comm_mysim);
2900 /* Determine the master coordinates and rank.
2901 * The DD master should be the same node as the master of this sim.
2903 for (int i = 0; i < dd->nnodes; i++)
2905 if (comm->ddindex2simnodeid[i] == 0)
2907 ddindex2xyz(dd->nc, i, dd->master_ci);
2908 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
2913 fprintf(debug, "The master rank is %d\n", dd->masterrank);
2918 /* No Cartesian communicators */
2919 /* We use the rank in dd->comm->all as DD index */
2920 ddindex2xyz(dd->nc, dd->rank, dd->ci);
2921 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
2923 clear_ivec(dd->master_ci);
2930 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
2931 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2936 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
2937 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2941 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
2945 gmx_domdec_comm_t *comm = dd->comm;
2947 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
2950 snew(comm->ddindex2simnodeid, dd->nnodes);
2951 snew(buf, dd->nnodes);
2952 if (thisRankHasDuty(cr, DUTY_PP))
2954 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
2956 /* Communicate the ddindex to simulation nodeid index */
2957 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
2958 cr->mpi_comm_mysim);
2962 GMX_UNUSED_VALUE(dd);
2963 GMX_UNUSED_VALUE(cr);
2967 static void split_communicator(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
2968 DdRankOrder gmx_unused rankOrder,
2969 bool gmx_unused reorder)
2971 gmx_domdec_comm_t *comm;
2980 if (comm->bCartesianPP)
2982 for (i = 1; i < DIM; i++)
2984 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
2986 if (bDiv[YY] || bDiv[ZZ])
2988 comm->bCartesianPP_PME = TRUE;
2989 /* If we have 2D PME decomposition, which is always in x+y,
2990 * we stack the PME only nodes in z.
2991 * Otherwise we choose the direction that provides the thinnest slab
2992 * of PME only nodes as this will have the least effect
2993 * on the PP communication.
2994 * But for the PME communication the opposite might be better.
2996 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
2998 dd->nc[YY] > dd->nc[ZZ]))
3000 comm->cartpmedim = ZZ;
3004 comm->cartpmedim = YY;
3006 comm->ntot[comm->cartpmedim]
3007 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
3011 fprintf(fplog, "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
3013 "Will not use a Cartesian communicator for PP <-> PME\n\n");
3017 if (comm->bCartesianPP_PME)
3025 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
3028 for (i = 0; i < DIM; i++)
3032 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, static_cast<int>(reorder),
3034 MPI_Comm_rank(comm_cart, &rank);
3035 if (MASTER(cr) && rank != 0)
3037 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
3040 /* With this assigment we loose the link to the original communicator
3041 * which will usually be MPI_COMM_WORLD, unless have multisim.
3043 cr->mpi_comm_mysim = comm_cart;
3044 cr->sim_nodeid = rank;
3046 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
3050 fprintf(fplog, "Cartesian rank %d, coordinates %d %d %d\n\n",
3051 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3054 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
3058 if (cr->npmenodes == 0 ||
3059 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
3061 cr->duty = DUTY_PME;
3064 /* Split the sim communicator into PP and PME only nodes */
3065 MPI_Comm_split(cr->mpi_comm_mysim,
3066 getThisRankDuties(cr),
3067 dd_index(comm->ntot, dd->ci),
3068 &cr->mpi_comm_mygroup);
3075 case DdRankOrder::pp_pme:
3078 fprintf(fplog, "Order of the ranks: PP first, PME last\n");
3081 case DdRankOrder::interleave:
3082 /* Interleave the PP-only and PME-only ranks */
3085 fprintf(fplog, "Interleaving PP and PME ranks\n");
3087 comm->pmenodes = dd_interleaved_pme_ranks(dd);
3089 case DdRankOrder::cartesian:
3092 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(rankOrder));
3095 if (dd_simnode2pmenode(dd, cr, cr->sim_nodeid) == -1)
3097 cr->duty = DUTY_PME;
3104 /* Split the sim communicator into PP and PME only nodes */
3105 MPI_Comm_split(cr->mpi_comm_mysim,
3106 getThisRankDuties(cr),
3108 &cr->mpi_comm_mygroup);
3109 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
3115 fprintf(fplog, "This rank does only %s work.\n\n",
3116 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
3120 /*! \brief Generates the MPI communicators for domain decomposition */
3121 static void make_dd_communicators(FILE *fplog, t_commrec *cr,
3122 gmx_domdec_t *dd, DdRankOrder ddRankOrder)
3124 gmx_domdec_comm_t *comm;
3129 copy_ivec(dd->nc, comm->ntot);
3131 comm->bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
3132 comm->bCartesianPP_PME = FALSE;
3134 /* Reorder the nodes by default. This might change the MPI ranks.
3135 * Real reordering is only supported on very few architectures,
3136 * Blue Gene is one of them.
3138 CartReorder = getenv("GMX_NO_CART_REORDER") == nullptr;
3140 if (cr->npmenodes > 0)
3142 /* Split the communicator into a PP and PME part */
3143 split_communicator(fplog, cr, dd, ddRankOrder, CartReorder);
3144 if (comm->bCartesianPP_PME)
3146 /* We (possibly) reordered the nodes in split_communicator,
3147 * so it is no longer required in make_pp_communicator.
3149 CartReorder = FALSE;
3154 /* All nodes do PP and PME */
3156 /* We do not require separate communicators */
3157 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
3161 if (thisRankHasDuty(cr, DUTY_PP))
3163 /* Copy or make a new PP communicator */
3164 make_pp_communicator(fplog, dd, cr, CartReorder);
3168 receive_ddindex2simnodeid(dd, cr);
3171 if (!thisRankHasDuty(cr, DUTY_PME))
3173 /* Set up the commnuication to our PME node */
3174 dd->pme_nodeid = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
3175 dd->pme_receive_vir_ener = receive_vir_ener(dd, cr);
3178 fprintf(debug, "My pme_nodeid %d receive ener %s\n",
3179 dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
3184 dd->pme_nodeid = -1;
3189 dd->ma = gmx::compat::make_unique<AtomDistribution>(dd->nc,
3191 comm->cgs_gl.index[comm->cgs_gl.nr]);
3195 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
3197 real *slb_frac, tot;
3202 if (nc > 1 && size_string != nullptr)
3206 fprintf(fplog, "Using static load balancing for the %s direction\n",
3211 for (i = 0; i < nc; i++)
3214 sscanf(size_string, "%20lf%n", &dbl, &n);
3217 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
3226 fprintf(fplog, "Relative cell sizes:");
3228 for (i = 0; i < nc; i++)
3233 fprintf(fplog, " %5.3f", slb_frac[i]);
3238 fprintf(fplog, "\n");
3245 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
3248 gmx_mtop_ilistloop_t iloop;
3252 iloop = gmx_mtop_ilistloop_init(mtop);
3253 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
3255 for (ftype = 0; ftype < F_NRE; ftype++)
3257 if ((interaction_function[ftype].flags & IF_BOND) &&
3260 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
3268 static int dd_getenv(FILE *fplog, const char *env_var, int def)
3274 val = getenv(env_var);
3277 if (sscanf(val, "%20d", &nst) <= 0)
3283 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
3291 static void dd_warning(const t_commrec *cr, FILE *fplog, const char *warn_string)
3295 fprintf(stderr, "\n%s\n", warn_string);
3299 fprintf(fplog, "\n%s\n", warn_string);
3303 static void check_dd_restrictions(t_commrec *cr, const gmx_domdec_t *dd,
3304 const t_inputrec *ir, FILE *fplog)
3306 if (ir->ePBC == epbcSCREW &&
3307 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
3309 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
3312 if (ir->ns_type == ensSIMPLE)
3314 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
3317 if (ir->nstlist == 0)
3319 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
3322 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
3324 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
3328 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3333 r = ddbox->box_size[XX];
3334 for (di = 0; di < dd->ndim; di++)
3337 /* Check using the initial average cell size */
3338 r = std::min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
3344 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
3346 static int forceDlbOffOrBail(int cmdlineDlbState,
3347 const std::string &reasonStr,
3351 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
3352 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
3354 if (cmdlineDlbState == edlbsOnUser)
3356 gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
3358 else if (cmdlineDlbState == edlbsOffCanTurnOn)
3360 dd_warning(cr, fplog, (dlbDisableNote + reasonStr + "\n").c_str());
3362 return edlbsOffForever;
3365 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
3367 * This function parses the parameters of "-dlb" command line option setting
3368 * corresponding state values. Then it checks the consistency of the determined
3369 * state with other run parameters and settings. As a result, the initial state
3370 * may be altered or an error may be thrown if incompatibility of options is detected.
3372 * \param [in] fplog Pointer to mdrun log file.
3373 * \param [in] cr Pointer to MPI communication object.
3374 * \param [in] dlbOption Enum value for the DLB option.
3375 * \param [in] bRecordLoad True if the load balancer is recording load information.
3376 * \param [in] mdrunOptions Options for mdrun.
3377 * \param [in] ir Pointer mdrun to input parameters.
3378 * \returns DLB initial/startup state.
3380 static int determineInitialDlbState(FILE *fplog, t_commrec *cr,
3381 DlbOption dlbOption, gmx_bool bRecordLoad,
3382 const MdrunOptions &mdrunOptions,
3383 const t_inputrec *ir)
3385 int dlbState = edlbsOffCanTurnOn;
3389 case DlbOption::turnOnWhenUseful: dlbState = edlbsOffCanTurnOn; break;
3390 case DlbOption::no: dlbState = edlbsOffUser; break;
3391 case DlbOption::yes: dlbState = edlbsOnUser; break;
3392 default: gmx_incons("Invalid dlbOption enum value");
3395 /* Reruns don't support DLB: bail or override auto mode */
3396 if (mdrunOptions.rerun)
3398 std::string reasonStr = "it is not supported in reruns.";
3399 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3402 /* Unsupported integrators */
3403 if (!EI_DYNAMICS(ir->eI))
3405 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
3406 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3409 /* Without cycle counters we can't time work to balance on */
3412 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
3413 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3416 if (mdrunOptions.reproducible)
3418 std::string reasonStr = "you started a reproducible run.";
3423 case edlbsOffForever:
3424 GMX_RELEASE_ASSERT(false, "edlbsOffForever is not a valid initial state");
3426 case edlbsOffCanTurnOn:
3427 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3428 case edlbsOnCanTurnOff:
3429 GMX_RELEASE_ASSERT(false, "edlbsOffCanTurnOff is not a valid initial state");
3432 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", cr, fplog);
3434 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", dlbState);
3441 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
3444 if (getenv("GMX_DD_ORDER_ZYX") != nullptr)
3446 /* Decomposition order z,y,x */
3449 fprintf(fplog, "Using domain decomposition order z, y, x\n");
3451 for (int dim = DIM-1; dim >= 0; dim--)
3453 if (dd->nc[dim] > 1)
3455 dd->dim[dd->ndim++] = dim;
3461 /* Decomposition order x,y,z */
3462 for (int dim = 0; dim < DIM; dim++)
3464 if (dd->nc[dim] > 1)
3466 dd->dim[dd->ndim++] = dim;
3473 /* Set dim[0] to avoid extra checks on ndim in several places */
3478 static gmx_domdec_comm_t *init_dd_comm()
3480 gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
3482 comm->n_load_have = 0;
3483 comm->n_load_collect = 0;
3485 comm->haveTurnedOffDlb = false;
3487 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
3489 comm->sum_nat[i] = 0;
3493 comm->load_step = 0;
3496 clear_ivec(comm->load_lim);
3500 /* This should be replaced by a unique pointer */
3501 comm->balanceRegion = ddBalanceRegionAllocate();
3506 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
3507 static void set_dd_limits_and_grid(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
3508 const DomdecOptions &options,
3509 const MdrunOptions &mdrunOptions,
3510 const gmx_mtop_t *mtop,
3511 const t_inputrec *ir,
3513 gmx::ArrayRef<const gmx::RVec> xGlobal,
3517 real r_bonded_limit = -1;
3518 const real tenPercentMargin = 1.1;
3519 gmx_domdec_comm_t *comm = dd->comm;
3521 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
3522 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
3524 dd->pme_recv_f_alloc = 0;
3525 dd->pme_recv_f_buf = nullptr;
3527 /* Initialize to GPU share count to 0, might change later */
3528 comm->nrank_gpu_shared = 0;
3530 comm->dlbState = determineInitialDlbState(fplog, cr, options.dlbOption, comm->bRecordLoad, mdrunOptions, ir);
3531 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
3532 /* To consider turning DLB on after 2*nstlist steps we need to check
3533 * at partitioning count 3. Thus we need to increase the first count by 2.
3535 comm->ddPartioningCountFirstDlbOff += 2;
3539 fprintf(fplog, "Dynamic load balancing: %s\n",
3540 edlbs_names[comm->dlbState]);
3542 comm->bPMELoadBalDLBLimits = FALSE;
3544 /* Allocate the charge group/atom sorting struct */
3545 comm->sort = gmx::compat::make_unique<gmx_domdec_sort_t>();
3547 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
3549 comm->bInterCGBondeds = ((ncg_mtop(mtop) > gmx_mtop_num_molecules(*mtop)) ||
3550 mtop->bIntermolecularInteractions);
3551 if (comm->bInterCGBondeds)
3553 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
3557 comm->bInterCGMultiBody = FALSE;
3560 dd->bInterCGcons = gmx::inter_charge_group_constraints(*mtop);
3561 dd->bInterCGsettles = gmx::inter_charge_group_settles(*mtop);
3565 /* Set the cut-off to some very large value,
3566 * so we don't need if statements everywhere in the code.
3567 * We use sqrt, since the cut-off is squared in some places.
3569 comm->cutoff = GMX_CUTOFF_INF;
3573 comm->cutoff = ir->rlist;
3575 comm->cutoff_mbody = 0;
3577 comm->cellsize_limit = 0;
3578 comm->bBondComm = FALSE;
3580 /* Atoms should be able to move by up to half the list buffer size (if > 0)
3581 * within nstlist steps. Since boundaries are allowed to displace by half
3582 * a cell size, DD cells should be at least the size of the list buffer.
3584 comm->cellsize_limit = std::max(comm->cellsize_limit,
3585 ir->rlist - std::max(ir->rvdw, ir->rcoulomb));
3587 if (comm->bInterCGBondeds)
3589 if (options.minimumCommunicationRange > 0)
3591 comm->cutoff_mbody = options.minimumCommunicationRange;
3592 if (options.useBondedCommunication)
3594 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
3598 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
3600 r_bonded_limit = comm->cutoff_mbody;
3602 else if (ir->bPeriodicMols)
3604 /* Can not easily determine the required cut-off */
3605 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
3606 comm->cutoff_mbody = comm->cutoff/2;
3607 r_bonded_limit = comm->cutoff_mbody;
3615 dd_bonded_cg_distance(fplog, mtop, ir, as_rvec_array(xGlobal.data()), box,
3616 options.checkBondedInteractions,
3619 gmx_bcast(sizeof(r_2b), &r_2b, cr);
3620 gmx_bcast(sizeof(r_mb), &r_mb, cr);
3622 /* We use an initial margin of 10% for the minimum cell size,
3623 * except when we are just below the non-bonded cut-off.
3625 if (options.useBondedCommunication)
3627 if (std::max(r_2b, r_mb) > comm->cutoff)
3629 r_bonded = std::max(r_2b, r_mb);
3630 r_bonded_limit = tenPercentMargin*r_bonded;
3631 comm->bBondComm = TRUE;
3636 r_bonded_limit = std::min(tenPercentMargin*r_bonded, comm->cutoff);
3638 /* We determine cutoff_mbody later */
3642 /* No special bonded communication,
3643 * simply increase the DD cut-off.
3645 r_bonded_limit = tenPercentMargin*std::max(r_2b, r_mb);
3646 comm->cutoff_mbody = r_bonded_limit;
3647 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
3653 "Minimum cell size due to bonded interactions: %.3f nm\n",
3656 comm->cellsize_limit = std::max(comm->cellsize_limit, r_bonded_limit);
3660 if (dd->bInterCGcons && options.constraintCommunicationRange <= 0)
3662 /* There is a cell size limit due to the constraints (P-LINCS) */
3663 rconstr = gmx::constr_r_max(fplog, mtop, ir);
3667 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
3669 if (rconstr > comm->cellsize_limit)
3671 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
3675 else if (options.constraintCommunicationRange > 0 && fplog)
3677 /* Here we do not check for dd->bInterCGcons,
3678 * because one can also set a cell size limit for virtual sites only
3679 * and at this point we don't know yet if there are intercg v-sites.
3682 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
3683 options.constraintCommunicationRange);
3684 rconstr = options.constraintCommunicationRange;
3686 comm->cellsize_limit = std::max(comm->cellsize_limit, rconstr);
3688 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
3690 if (options.numCells[XX] > 0)
3692 copy_ivec(options.numCells, dd->nc);
3693 set_dd_dim(fplog, dd);
3694 set_ddbox_cr(cr, &dd->nc, ir, box, xGlobal, ddbox);
3696 if (options.numPmeRanks >= 0)
3698 cr->npmenodes = options.numPmeRanks;
3702 /* When the DD grid is set explicitly and -npme is set to auto,
3703 * don't use PME ranks. We check later if the DD grid is
3704 * compatible with the total number of ranks.
3709 real acs = average_cellsize_min(dd, ddbox);
3710 if (acs < comm->cellsize_limit)
3714 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
3716 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3717 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
3718 acs, comm->cellsize_limit);
3723 set_ddbox_cr(cr, nullptr, ir, box, xGlobal, ddbox);
3725 /* We need to choose the optimal DD grid and possibly PME nodes */
3727 dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
3728 options.numPmeRanks,
3729 !isDlbDisabled(comm),
3731 comm->cellsize_limit, comm->cutoff,
3732 comm->bInterCGBondeds);
3734 if (dd->nc[XX] == 0)
3737 gmx_bool bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
3738 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
3739 !bC ? "-rdd" : "-rcon",
3740 comm->dlbState != edlbsOffUser ? " or -dds" : "",
3741 bC ? " or your LINCS settings" : "");
3743 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3744 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
3746 "Look in the log file for details on the domain decomposition",
3747 cr->nnodes-cr->npmenodes, limit, buf);
3749 set_dd_dim(fplog, dd);
3755 "Domain decomposition grid %d x %d x %d, separate PME ranks %d\n",
3756 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
3759 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
3760 if (cr->nnodes - dd->nnodes != cr->npmenodes)
3762 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3763 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
3764 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
3766 if (cr->npmenodes > dd->nnodes)
3768 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3769 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
3771 if (cr->npmenodes > 0)
3773 comm->npmenodes = cr->npmenodes;
3777 comm->npmenodes = dd->nnodes;
3780 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
3782 /* The following choices should match those
3783 * in comm_cost_est in domdec_setup.c.
3784 * Note that here the checks have to take into account
3785 * that the decomposition might occur in a different order than xyz
3786 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
3787 * in which case they will not match those in comm_cost_est,
3788 * but since that is mainly for testing purposes that's fine.
3790 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
3791 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
3792 getenv("GMX_PMEONEDD") == nullptr)
3794 comm->npmedecompdim = 2;
3795 comm->npmenodes_x = dd->nc[XX];
3796 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
3800 /* In case nc is 1 in both x and y we could still choose to
3801 * decompose pme in y instead of x, but we use x for simplicity.
3803 comm->npmedecompdim = 1;
3804 if (dd->dim[0] == YY)
3806 comm->npmenodes_x = 1;
3807 comm->npmenodes_y = comm->npmenodes;
3811 comm->npmenodes_x = comm->npmenodes;
3812 comm->npmenodes_y = 1;
3817 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
3818 comm->npmenodes_x, comm->npmenodes_y, 1);
3823 comm->npmedecompdim = 0;
3824 comm->npmenodes_x = 0;
3825 comm->npmenodes_y = 0;
3828 snew(comm->slb_frac, DIM);
3829 if (isDlbDisabled(comm))
3831 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], options.cellSizeX);
3832 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], options.cellSizeY);
3833 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], options.cellSizeZ);
3836 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
3838 if (comm->bBondComm || !isDlbDisabled(comm))
3840 /* Set the bonded communication distance to halfway
3841 * the minimum and the maximum,
3842 * since the extra communication cost is nearly zero.
3844 real acs = average_cellsize_min(dd, ddbox);
3845 comm->cutoff_mbody = 0.5*(r_bonded + acs);
3846 if (!isDlbDisabled(comm))
3848 /* Check if this does not limit the scaling */
3849 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
3850 options.dlbScaling*acs);
3852 if (!comm->bBondComm)
3854 /* Without bBondComm do not go beyond the n.b. cut-off */
3855 comm->cutoff_mbody = std::min(comm->cutoff_mbody, comm->cutoff);
3856 if (comm->cellsize_limit >= comm->cutoff)
3858 /* We don't loose a lot of efficieny
3859 * when increasing it to the n.b. cut-off.
3860 * It can even be slightly faster, because we need
3861 * less checks for the communication setup.
3863 comm->cutoff_mbody = comm->cutoff;
3866 /* Check if we did not end up below our original limit */
3867 comm->cutoff_mbody = std::max(comm->cutoff_mbody, r_bonded_limit);
3869 if (comm->cutoff_mbody > comm->cellsize_limit)
3871 comm->cellsize_limit = comm->cutoff_mbody;
3874 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
3879 fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
3880 "cellsize limit %f\n",
3881 gmx::boolToString(comm->bBondComm), comm->cellsize_limit);
3886 check_dd_restrictions(cr, dd, ir, fplog);
3890 static void set_dlb_limits(gmx_domdec_t *dd)
3893 for (int d = 0; d < dd->ndim; d++)
3895 /* Set the number of pulses to the value for DLB */
3896 dd->comm->cd[d].ind.resize(dd->comm->cd[d].np_dlb);
3898 dd->comm->cellsize_min[dd->dim[d]] =
3899 dd->comm->cellsize_min_dlb[dd->dim[d]];
3904 static void turn_on_dlb(FILE *fplog, const t_commrec *cr, int64_t step)
3906 gmx_domdec_t *dd = cr->dd;
3907 gmx_domdec_comm_t *comm = dd->comm;
3909 real cellsize_min = comm->cellsize_min[dd->dim[0]];
3910 for (int d = 1; d < dd->ndim; d++)
3912 cellsize_min = std::min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
3915 /* Turn off DLB if we're too close to the cell size limit. */
3916 if (cellsize_min < comm->cellsize_limit*1.05)
3918 auto str = gmx::formatString("step %" PRId64 " Measured %.1f %% performance loss due to load imbalance, "
3919 "but the minimum cell size is smaller than 1.05 times the cell size limit."
3920 "Will no longer try dynamic load balancing.\n", step, dd_force_imb_perf_loss(dd)*100);
3921 dd_warning(cr, fplog, str.c_str());
3923 comm->dlbState = edlbsOffForever;
3928 sprintf(buf, "step %" PRId64 " Turning on dynamic load balancing, because the performance loss due to load imbalance is %.1f %%.\n", step, dd_force_imb_perf_loss(dd)*100);
3929 dd_warning(cr, fplog, buf);
3930 comm->dlbState = edlbsOnCanTurnOff;
3932 /* Store the non-DLB performance, so we can check if DLB actually
3933 * improves performance.
3935 GMX_RELEASE_ASSERT(comm->cycl_n[ddCyclStep] > 0, "When we turned on DLB, we should have measured cycles");
3936 comm->cyclesPerStepBeforeDLB = comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
3940 /* We can set the required cell size info here,
3941 * so we do not need to communicate this.
3942 * The grid is completely uniform.
3944 for (int d = 0; d < dd->ndim; d++)
3946 RowMaster *rowMaster = comm->cellsizesWithDlb[d].rowMaster.get();
3950 comm->load[d].sum_m = comm->load[d].sum;
3952 int nc = dd->nc[dd->dim[d]];
3953 for (int i = 0; i < nc; i++)
3955 rowMaster->cellFrac[i] = i/static_cast<real>(nc);
3958 rowMaster->bounds[i].cellFracLowerMax = i /static_cast<real>(nc);
3959 rowMaster->bounds[i].cellFracUpperMin = (i + 1)/static_cast<real>(nc);
3962 rowMaster->cellFrac[nc] = 1.0;
3967 static void turn_off_dlb(FILE *fplog, const t_commrec *cr, int64_t step)
3969 gmx_domdec_t *dd = cr->dd;
3972 sprintf(buf, "step %" PRId64 " Turning off dynamic load balancing, because it is degrading performance.\n", step);
3973 dd_warning(cr, fplog, buf);
3974 dd->comm->dlbState = edlbsOffCanTurnOn;
3975 dd->comm->haveTurnedOffDlb = true;
3976 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
3979 static void turn_off_dlb_forever(FILE *fplog, const t_commrec *cr, int64_t step)
3981 GMX_RELEASE_ASSERT(cr->dd->comm->dlbState == edlbsOffCanTurnOn, "Can only turn off DLB forever when it was in the can-turn-on state");
3983 sprintf(buf, "step %" PRId64 " Will no longer try dynamic load balancing, as it degraded performance.\n", step);
3984 dd_warning(cr, fplog, buf);
3985 cr->dd->comm->dlbState = edlbsOffForever;
3988 static char *init_bLocalCG(const gmx_mtop_t *mtop)
3993 ncg = ncg_mtop(mtop);
3994 snew(bLocalCG, ncg);
3995 for (cg = 0; cg < ncg; cg++)
3997 bLocalCG[cg] = FALSE;
4003 void dd_init_bondeds(FILE *fplog,
4005 const gmx_mtop_t *mtop,
4006 const gmx_vsite_t *vsite,
4007 const t_inputrec *ir,
4008 gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
4010 gmx_domdec_comm_t *comm;
4012 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
4016 if (comm->bBondComm)
4018 /* Communicate atoms beyond the cut-off for bonded interactions */
4021 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
4023 comm->bLocalCG = init_bLocalCG(mtop);
4027 /* Only communicate atoms based on cut-off */
4028 comm->cglink = nullptr;
4029 comm->bLocalCG = nullptr;
4033 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
4034 const gmx_mtop_t *mtop, const t_inputrec *ir,
4035 gmx_bool bDynLoadBal, real dlb_scale,
4036 const gmx_ddbox_t *ddbox)
4038 gmx_domdec_comm_t *comm;
4044 if (fplog == nullptr)
4053 fprintf(fplog, "The maximum number of communication pulses is:");
4054 for (d = 0; d < dd->ndim; d++)
4056 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
4058 fprintf(fplog, "\n");
4059 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
4060 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
4061 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
4062 for (d = 0; d < DIM; d++)
4066 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
4073 comm->cellsize_min_dlb[d]/
4074 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
4076 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
4079 fprintf(fplog, "\n");
4083 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
4084 fprintf(fplog, "The initial number of communication pulses is:");
4085 for (d = 0; d < dd->ndim; d++)
4087 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
4089 fprintf(fplog, "\n");
4090 fprintf(fplog, "The initial domain decomposition cell size is:");
4091 for (d = 0; d < DIM; d++)
4095 fprintf(fplog, " %c %.2f nm",
4096 dim2char(d), dd->comm->cellsize_min[d]);
4099 fprintf(fplog, "\n\n");
4102 gmx_bool bInterCGVsites = count_intercg_vsites(mtop) != 0;
4104 if (comm->bInterCGBondeds ||
4106 dd->bInterCGcons || dd->bInterCGsettles)
4108 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
4109 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4110 "non-bonded interactions", "", comm->cutoff);
4114 limit = dd->comm->cellsize_limit;
4118 if (dynamic_dd_box(ddbox, ir))
4120 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
4122 limit = dd->comm->cellsize_min[XX];
4123 for (d = 1; d < DIM; d++)
4125 limit = std::min(limit, dd->comm->cellsize_min[d]);
4129 if (comm->bInterCGBondeds)
4131 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4132 "two-body bonded interactions", "(-rdd)",
4133 std::max(comm->cutoff, comm->cutoff_mbody));
4134 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4135 "multi-body bonded interactions", "(-rdd)",
4136 (comm->bBondComm || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
4140 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4141 "virtual site constructions", "(-rcon)", limit);
4143 if (dd->bInterCGcons || dd->bInterCGsettles)
4145 sprintf(buf, "atoms separated by up to %d constraints",
4147 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4148 buf, "(-rcon)", limit);
4150 fprintf(fplog, "\n");
4156 static void set_cell_limits_dlb(gmx_domdec_t *dd,
4158 const t_inputrec *ir,
4159 const gmx_ddbox_t *ddbox)
4161 gmx_domdec_comm_t *comm;
4162 int d, dim, npulse, npulse_d_max, npulse_d;
4167 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
4169 /* Determine the maximum number of comm. pulses in one dimension */
4171 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
4173 /* Determine the maximum required number of grid pulses */
4174 if (comm->cellsize_limit >= comm->cutoff)
4176 /* Only a single pulse is required */
4179 else if (!bNoCutOff && comm->cellsize_limit > 0)
4181 /* We round down slightly here to avoid overhead due to the latency
4182 * of extra communication calls when the cut-off
4183 * would be only slightly longer than the cell size.
4184 * Later cellsize_limit is redetermined,
4185 * so we can not miss interactions due to this rounding.
4187 npulse = static_cast<int>(0.96 + comm->cutoff/comm->cellsize_limit);
4191 /* There is no cell size limit */
4192 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
4195 if (!bNoCutOff && npulse > 1)
4197 /* See if we can do with less pulses, based on dlb_scale */
4199 for (d = 0; d < dd->ndim; d++)
4202 npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->cutoff
4203 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
4204 npulse_d_max = std::max(npulse_d_max, npulse_d);
4206 npulse = std::min(npulse, npulse_d_max);
4209 /* This env var can override npulse */
4210 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
4217 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
4218 for (d = 0; d < dd->ndim; d++)
4220 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
4221 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
4222 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
4224 comm->bVacDLBNoLimit = FALSE;
4228 /* cellsize_limit is set for LINCS in init_domain_decomposition */
4229 if (!comm->bVacDLBNoLimit)
4231 comm->cellsize_limit = std::max(comm->cellsize_limit,
4232 comm->cutoff/comm->maxpulse);
4234 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
4235 /* Set the minimum cell size for each DD dimension */
4236 for (d = 0; d < dd->ndim; d++)
4238 if (comm->bVacDLBNoLimit ||
4239 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
4241 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
4245 comm->cellsize_min_dlb[dd->dim[d]] =
4246 comm->cutoff/comm->cd[d].np_dlb;
4249 if (comm->cutoff_mbody <= 0)
4251 comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
4259 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
4261 /* If each molecule is a single charge group
4262 * or we use domain decomposition for each periodic dimension,
4263 * we do not need to take pbc into account for the bonded interactions.
4265 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
4268 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
4271 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
4272 static void set_ddgrid_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
4273 const gmx_mtop_t *mtop, const t_inputrec *ir,
4274 const gmx_ddbox_t *ddbox)
4276 gmx_domdec_comm_t *comm;
4282 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
4284 init_ddpme(dd, &comm->ddpme[0], 0);
4285 if (comm->npmedecompdim >= 2)
4287 init_ddpme(dd, &comm->ddpme[1], 1);
4292 comm->npmenodes = 0;
4293 if (dd->pme_nodeid >= 0)
4295 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
4296 "Can not have separate PME ranks without PME electrostatics");
4302 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
4304 if (!isDlbDisabled(comm))
4306 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
4309 print_dd_settings(fplog, dd, mtop, ir, isDlbOn(comm), dlb_scale, ddbox);
4310 if (comm->dlbState == edlbsOffCanTurnOn)
4314 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
4316 print_dd_settings(fplog, dd, mtop, ir, TRUE, dlb_scale, ddbox);
4319 if (ir->ePBC == epbcNONE)
4321 vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
4326 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/static_cast<double>(dd->nnodes);
4330 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
4332 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
4334 dd->ga2la = new gmx_ga2la_t(natoms_tot,
4335 static_cast<int>(vol_frac*natoms_tot));
4338 /*! \brief Set some important DD parameters that can be modified by env.vars */
4339 static void set_dd_envvar_options(FILE *fplog, gmx_domdec_t *dd, int rank_mysim)
4341 gmx_domdec_comm_t *comm = dd->comm;
4343 dd->bSendRecv2 = (dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0) != 0);
4344 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
4345 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
4346 int recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
4347 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
4348 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
4349 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
4351 if (dd->bSendRecv2 && fplog)
4353 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
4360 fprintf(fplog, "Will load balance based on FLOP count\n");
4362 if (comm->eFlop > 1)
4364 srand(1 + rank_mysim);
4366 comm->bRecordLoad = TRUE;
4370 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
4374 DomdecOptions::DomdecOptions() :
4375 checkBondedInteractions(TRUE),
4376 useBondedCommunication(TRUE),
4378 rankOrder(DdRankOrder::pp_pme),
4379 minimumCommunicationRange(0),
4380 constraintCommunicationRange(0),
4381 dlbOption(DlbOption::turnOnWhenUseful),
4387 clear_ivec(numCells);
4390 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
4391 const DomdecOptions &options,
4392 const MdrunOptions &mdrunOptions,
4393 const gmx_mtop_t *mtop,
4394 const t_inputrec *ir,
4396 gmx::ArrayRef<const gmx::RVec> xGlobal,
4397 gmx::LocalAtomSetManager *atomSets)
4404 "\nInitializing Domain Decomposition on %d ranks\n", cr->nnodes);
4407 dd = new gmx_domdec_t;
4409 dd->comm = init_dd_comm();
4411 /* Initialize DD paritioning counters */
4412 dd->comm->partition_step = INT_MIN;
4415 set_dd_envvar_options(fplog, dd, cr->nodeid);
4417 gmx_ddbox_t ddbox = {0};
4418 set_dd_limits_and_grid(fplog, cr, dd, options, mdrunOptions,
4423 make_dd_communicators(fplog, cr, dd, options.rankOrder);
4425 if (thisRankHasDuty(cr, DUTY_PP))
4427 set_ddgrid_parameters(fplog, dd, options.dlbScaling, mtop, ir, &ddbox);
4429 setup_neighbor_relations(dd);
4432 /* Set overallocation to avoid frequent reallocation of arrays */
4433 set_over_alloc_dd(TRUE);
4435 clear_dd_cycle_counts(dd);
4437 dd->atomSets = atomSets;
4442 static gmx_bool test_dd_cutoff(t_commrec *cr,
4443 t_state *state, const t_inputrec *ir,
4454 set_ddbox(dd, false, ir, state->box, true, state->x, &ddbox);
4458 for (d = 0; d < dd->ndim; d++)
4462 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
4463 if (dynamic_dd_box(&ddbox, ir))
4465 inv_cell_size *= DD_PRES_SCALE_MARGIN;
4468 np = 1 + static_cast<int>(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
4470 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
4472 if (np > dd->comm->cd[d].np_dlb)
4477 /* If a current local cell size is smaller than the requested
4478 * cut-off, we could still fix it, but this gets very complicated.
4479 * Without fixing here, we might actually need more checks.
4481 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
4488 if (!isDlbDisabled(dd->comm))
4490 /* If DLB is not active yet, we don't need to check the grid jumps.
4491 * Actually we shouldn't, because then the grid jump data is not set.
4493 if (isDlbOn(dd->comm) &&
4494 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
4499 gmx_sumi(1, &LocallyLimited, cr);
4501 if (LocallyLimited > 0)
4510 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, const t_inputrec *ir,
4513 gmx_bool bCutoffAllowed;
4515 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
4519 cr->dd->comm->cutoff = cutoff_req;
4522 return bCutoffAllowed;
4525 void set_dd_dlb_max_cutoff(t_commrec *cr, real cutoff)
4527 gmx_domdec_comm_t *comm;
4529 comm = cr->dd->comm;
4531 /* Turn on the DLB limiting (might have been on already) */
4532 comm->bPMELoadBalDLBLimits = TRUE;
4534 /* Change the cut-off limit */
4535 comm->PMELoadBal_max_cutoff = cutoff;
4539 fprintf(debug, "PME load balancing set a limit to the DLB staggering such that a %f cut-off will continue to fit\n",
4540 comm->PMELoadBal_max_cutoff);
4544 /* Sets whether we should later check the load imbalance data, so that
4545 * we can trigger dynamic load balancing if enough imbalance has
4548 * Used after PME load balancing unlocks DLB, so that the check
4549 * whether DLB will be useful can happen immediately.
4551 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue)
4553 if (dd->comm->dlbState == edlbsOffCanTurnOn)
4555 dd->comm->bCheckWhetherToTurnDlbOn = bValue;
4559 /* Store the DD partitioning count, so we can ignore cycle counts
4560 * over the next nstlist steps, which are often slower.
4562 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
4567 /* Returns if we should check whether there has been enough load
4568 * imbalance to trigger dynamic load balancing.
4570 static gmx_bool dd_dlb_get_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd)
4572 if (dd->comm->dlbState != edlbsOffCanTurnOn)
4577 if (dd->ddp_count <= dd->comm->ddPartioningCountFirstDlbOff)
4579 /* We ignore the first nstlist steps at the start of the run
4580 * or after PME load balancing or after turning DLB off, since
4581 * these often have extra allocation or cache miss overhead.
4586 if (dd->comm->cycl_n[ddCyclStep] == 0)
4588 /* We can have zero timed steps when dd_partition_system is called
4589 * more than once at the same step, e.g. with replica exchange.
4590 * Turning on DLB would trigger an assertion failure later, but is
4591 * also useless right after exchanging replicas.
4596 /* We should check whether we should use DLB directly after
4598 if (dd->comm->bCheckWhetherToTurnDlbOn)
4600 /* This flag was set when the PME load-balancing routines
4601 unlocked DLB, and should now be cleared. */
4602 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, FALSE);
4605 /* We check whether we should use DLB every c_checkTurnDlbOnInterval
4606 * partitionings (we do not do this every partioning, so that we
4607 * avoid excessive communication). */
4608 return dd->comm->n_load_have % c_checkTurnDlbOnInterval == c_checkTurnDlbOnInterval - 1;
4611 gmx_bool dd_dlb_is_on(const gmx_domdec_t *dd)
4613 return isDlbOn(dd->comm);
4616 gmx_bool dd_dlb_is_locked(const gmx_domdec_t *dd)
4618 return (dd->comm->dlbState == edlbsOffTemporarilyLocked);
4621 void dd_dlb_lock(gmx_domdec_t *dd)
4623 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
4624 if (dd->comm->dlbState == edlbsOffCanTurnOn)
4626 dd->comm->dlbState = edlbsOffTemporarilyLocked;
4630 void dd_dlb_unlock(gmx_domdec_t *dd)
4632 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
4633 if (dd->comm->dlbState == edlbsOffTemporarilyLocked)
4635 dd->comm->dlbState = edlbsOffCanTurnOn;
4636 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
4640 static void merge_cg_buffers(int ncell,
4641 gmx_domdec_comm_dim_t *cd, int pulse,
4643 gmx::ArrayRef<int> index_gl,
4645 rvec *cg_cm, rvec *recv_vr,
4646 gmx::ArrayRef<int> cgindex,
4647 cginfo_mb_t *cginfo_mb, int *cginfo)
4649 gmx_domdec_ind_t *ind, *ind_p;
4650 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
4651 int shift, shift_at;
4653 ind = &cd->ind[pulse];
4655 /* First correct the already stored data */
4656 shift = ind->nrecv[ncell];
4657 for (cell = ncell-1; cell >= 0; cell--)
4659 shift -= ind->nrecv[cell];
4662 /* Move the cg's present from previous grid pulses */
4663 cg0 = ncg_cell[ncell+cell];
4664 cg1 = ncg_cell[ncell+cell+1];
4665 cgindex[cg1+shift] = cgindex[cg1];
4666 for (cg = cg1-1; cg >= cg0; cg--)
4668 index_gl[cg+shift] = index_gl[cg];
4669 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
4670 cgindex[cg+shift] = cgindex[cg];
4671 cginfo[cg+shift] = cginfo[cg];
4673 /* Correct the already stored send indices for the shift */
4674 for (p = 1; p <= pulse; p++)
4676 ind_p = &cd->ind[p];
4678 for (c = 0; c < cell; c++)
4680 cg0 += ind_p->nsend[c];
4682 cg1 = cg0 + ind_p->nsend[cell];
4683 for (cg = cg0; cg < cg1; cg++)
4685 ind_p->index[cg] += shift;
4691 /* Merge in the communicated buffers */
4695 for (cell = 0; cell < ncell; cell++)
4697 cg1 = ncg_cell[ncell+cell+1] + shift;
4700 /* Correct the old cg indices */
4701 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
4703 cgindex[cg+1] += shift_at;
4706 for (cg = 0; cg < ind->nrecv[cell]; cg++)
4708 /* Copy this charge group from the buffer */
4709 index_gl[cg1] = recv_i[cg0];
4710 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
4711 /* Add it to the cgindex */
4712 cg_gl = index_gl[cg1];
4713 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
4714 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
4715 cgindex[cg1+1] = cgindex[cg1] + nat;
4720 shift += ind->nrecv[cell];
4721 ncg_cell[ncell+cell+1] = cg1;
4725 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
4728 const gmx::RangePartitioning &atomGroups)
4730 /* Store the atom block boundaries for easy copying of communication buffers
4732 int g = atomGroupStart;
4733 for (int zone = 0; zone < nzone; zone++)
4735 for (gmx_domdec_ind_t &ind : cd->ind)
4737 const auto range = atomGroups.subRange(g, g + ind.nrecv[zone]);
4738 ind.cell2at0[zone] = range.begin();
4739 ind.cell2at1[zone] = range.end();
4740 g += ind.nrecv[zone];
4745 static gmx_bool missing_link(t_blocka *link, int cg_gl, const char *bLocalCG)
4751 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
4753 if (!bLocalCG[link->a[i]])
4762 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
4764 real c[DIM][4]; /* the corners for the non-bonded communication */
4765 real cr0; /* corner for rounding */
4766 real cr1[4]; /* corners for rounding */
4767 real bc[DIM]; /* corners for bounded communication */
4768 real bcr1; /* corner for rounding for bonded communication */
4771 /* Determine the corners of the domain(s) we are communicating with */
4773 set_dd_corners(const gmx_domdec_t *dd,
4774 int dim0, int dim1, int dim2,
4778 const gmx_domdec_comm_t *comm;
4779 const gmx_domdec_zones_t *zones;
4784 zones = &comm->zones;
4786 /* Keep the compiler happy */
4790 /* The first dimension is equal for all cells */
4791 c->c[0][0] = comm->cell_x0[dim0];
4794 c->bc[0] = c->c[0][0];
4799 /* This cell row is only seen from the first row */
4800 c->c[1][0] = comm->cell_x0[dim1];
4801 /* All rows can see this row */
4802 c->c[1][1] = comm->cell_x0[dim1];
4803 if (isDlbOn(dd->comm))
4805 c->c[1][1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
4808 /* For the multi-body distance we need the maximum */
4809 c->bc[1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
4812 /* Set the upper-right corner for rounding */
4813 c->cr0 = comm->cell_x1[dim0];
4818 for (j = 0; j < 4; j++)
4820 c->c[2][j] = comm->cell_x0[dim2];
4822 if (isDlbOn(dd->comm))
4824 /* Use the maximum of the i-cells that see a j-cell */
4825 for (i = 0; i < zones->nizone; i++)
4827 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
4832 std::max(c->c[2][j-4],
4833 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
4839 /* For the multi-body distance we need the maximum */
4840 c->bc[2] = comm->cell_x0[dim2];
4841 for (i = 0; i < 2; i++)
4843 for (j = 0; j < 2; j++)
4845 c->bc[2] = std::max(c->bc[2], comm->zone_d2[i][j].p1_0);
4851 /* Set the upper-right corner for rounding */
4852 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
4853 * Only cell (0,0,0) can see cell 7 (1,1,1)
4855 c->cr1[0] = comm->cell_x1[dim1];
4856 c->cr1[3] = comm->cell_x1[dim1];
4857 if (isDlbOn(dd->comm))
4859 c->cr1[0] = std::max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
4862 /* For the multi-body distance we need the maximum */
4863 c->bcr1 = std::max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
4870 /* Add the atom groups we need to send in this pulse from this zone to
4871 * \p localAtomGroups and \p work
4874 get_zone_pulse_cgs(gmx_domdec_t *dd,
4875 int zonei, int zone,
4877 gmx::ArrayRef<const int> globalAtomGroupIndices,
4878 const gmx::RangePartitioning &atomGroups,
4879 int dim, int dim_ind,
4880 int dim0, int dim1, int dim2,
4881 real r_comm2, real r_bcomm2,
4883 bool distanceIsTriclinic,
4885 real skew_fac2_d, real skew_fac_01,
4886 rvec *v_d, rvec *v_0, rvec *v_1,
4887 const dd_corners_t *c,
4888 const rvec sf2_round,
4889 gmx_bool bDistBonded,
4895 std::vector<int> *localAtomGroups,
4896 dd_comm_setup_work_t *work)
4898 gmx_domdec_comm_t *comm;
4900 gmx_bool bDistMB_pulse;
4902 real r2, rb2, r, tric_sh;
4909 bScrew = (dd->bScrewPBC && dim == XX);
4911 bDistMB_pulse = (bDistMB && bDistBonded);
4913 /* Unpack the work data */
4914 std::vector<int> &ibuf = work->atomGroupBuffer;
4915 std::vector<gmx::RVec> &vbuf = work->positionBuffer;
4919 for (cg = cg0; cg < cg1; cg++)
4923 if (!distanceIsTriclinic)
4925 /* Rectangular direction, easy */
4926 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
4933 r = cg_cm[cg][dim] - c->bc[dim_ind];
4939 /* Rounding gives at most a 16% reduction
4940 * in communicated atoms
4942 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
4944 r = cg_cm[cg][dim0] - c->cr0;
4945 /* This is the first dimension, so always r >= 0 */
4952 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
4954 r = cg_cm[cg][dim1] - c->cr1[zone];
4961 r = cg_cm[cg][dim1] - c->bcr1;
4971 /* Triclinic direction, more complicated */
4974 /* Rounding, conservative as the skew_fac multiplication
4975 * will slightly underestimate the distance.
4977 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
4979 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
4980 for (i = dim0+1; i < DIM; i++)
4982 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
4984 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
4987 rb[dim0] = rn[dim0];
4990 /* Take care that the cell planes along dim0 might not
4991 * be orthogonal to those along dim1 and dim2.
4993 for (i = 1; i <= dim_ind; i++)
4996 if (normal[dim0][dimd] > 0)
4998 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
5001 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
5006 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
5008 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
5010 for (i = dim1+1; i < DIM; i++)
5012 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
5014 rn[dim1] += tric_sh;
5017 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
5018 /* Take care of coupling of the distances
5019 * to the planes along dim0 and dim1 through dim2.
5021 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
5022 /* Take care that the cell planes along dim1
5023 * might not be orthogonal to that along dim2.
5025 if (normal[dim1][dim2] > 0)
5027 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
5033 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
5036 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
5037 /* Take care of coupling of the distances
5038 * to the planes along dim0 and dim1 through dim2.
5040 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
5041 /* Take care that the cell planes along dim1
5042 * might not be orthogonal to that along dim2.
5044 if (normal[dim1][dim2] > 0)
5046 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
5051 /* The distance along the communication direction */
5052 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
5054 for (i = dim+1; i < DIM; i++)
5056 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
5061 r2 += rn[dim]*rn[dim]*skew_fac2_d;
5062 /* Take care of coupling of the distances
5063 * to the planes along dim0 and dim1 through dim2.
5065 if (dim_ind == 1 && zonei == 1)
5067 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
5073 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
5076 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
5077 /* Take care of coupling of the distances
5078 * to the planes along dim0 and dim1 through dim2.
5080 if (dim_ind == 1 && zonei == 1)
5082 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
5090 ((bDistMB && rb2 < r_bcomm2) ||
5091 (bDist2B && r2 < r_bcomm2)) &&
5093 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
5094 missing_link(comm->cglink, globalAtomGroupIndices[cg],
5097 /* Store the local and global atom group indices and position */
5098 localAtomGroups->push_back(cg);
5099 ibuf.push_back(globalAtomGroupIndices[cg]);
5103 if (dd->ci[dim] == 0)
5105 /* Correct cg_cm for pbc */
5106 rvec_add(cg_cm[cg], box[dim], posPbc);
5109 posPbc[YY] = box[YY][YY] - posPbc[YY];
5110 posPbc[ZZ] = box[ZZ][ZZ] - posPbc[ZZ];
5115 copy_rvec(cg_cm[cg], posPbc);
5117 vbuf.emplace_back(posPbc[XX], posPbc[YY], posPbc[ZZ]);
5119 nat += atomGroups.block(cg).size();
5124 work->nsend_zone = nsend_z;
5127 static void clearCommSetupData(dd_comm_setup_work_t *work)
5129 work->localAtomGroupBuffer.clear();
5130 work->atomGroupBuffer.clear();
5131 work->positionBuffer.clear();
5133 work->nsend_zone = 0;
5136 static void setup_dd_communication(gmx_domdec_t *dd,
5137 matrix box, gmx_ddbox_t *ddbox,
5139 t_state *state, PaddedRVecVector *f)
5141 int dim_ind, dim, dim0, dim1, dim2, dimd, nat_tot;
5142 int nzone, nzone_send, zone, zonei, cg0, cg1;
5143 int c, i, cg, cg_gl, nrcg;
5144 int *zone_cg_range, pos_cg;
5145 gmx_domdec_comm_t *comm;
5146 gmx_domdec_zones_t *zones;
5147 gmx_domdec_comm_dim_t *cd;
5148 cginfo_mb_t *cginfo_mb;
5149 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
5150 real r_comm2, r_bcomm2;
5151 dd_corners_t corners;
5152 rvec *cg_cm, *normal, *v_d, *v_0 = nullptr, *v_1 = nullptr;
5153 real skew_fac2_d, skew_fac_01;
5158 fprintf(debug, "Setting up DD communication\n");
5163 if (comm->dth.empty())
5165 /* Initialize the thread data.
5166 * This can not be done in init_domain_decomposition,
5167 * as the numbers of threads is determined later.
5169 int numThreads = gmx_omp_nthreads_get(emntDomdec);
5170 comm->dth.resize(numThreads);
5173 switch (fr->cutoff_scheme)
5179 cg_cm = as_rvec_array(state->x.data());
5182 gmx_incons("unimplemented");
5185 bBondComm = comm->bBondComm;
5187 /* Do we need to determine extra distances for multi-body bondeds? */
5188 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
5190 /* Do we need to determine extra distances for only two-body bondeds? */
5191 bDist2B = (bBondComm && !bDistMB);
5193 r_comm2 = gmx::square(comm->cutoff);
5194 r_bcomm2 = gmx::square(comm->cutoff_mbody);
5198 fprintf(debug, "bBondComm %s, r_bc %f\n", gmx::boolToString(bBondComm), std::sqrt(r_bcomm2));
5201 zones = &comm->zones;
5204 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
5205 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
5207 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
5209 /* Triclinic stuff */
5210 normal = ddbox->normal;
5214 v_0 = ddbox->v[dim0];
5215 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
5217 /* Determine the coupling coefficient for the distances
5218 * to the cell planes along dim0 and dim1 through dim2.
5219 * This is required for correct rounding.
5222 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
5225 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
5231 v_1 = ddbox->v[dim1];
5234 zone_cg_range = zones->cg_range;
5235 cginfo_mb = fr->cginfo_mb;
5237 zone_cg_range[0] = 0;
5238 zone_cg_range[1] = dd->ncg_home;
5239 comm->zone_ncg1[0] = dd->ncg_home;
5240 pos_cg = dd->ncg_home;
5242 nat_tot = comm->atomRanges.numHomeAtoms();
5244 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
5246 dim = dd->dim[dim_ind];
5247 cd = &comm->cd[dim_ind];
5249 /* Check if we need to compute triclinic distances along this dim */
5250 bool distanceIsTriclinic = false;
5251 for (i = 0; i <= dim_ind; i++)
5253 if (ddbox->tric_dir[dd->dim[i]])
5255 distanceIsTriclinic = true;
5259 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
5261 /* No pbc in this dimension, the first node should not comm. */
5269 v_d = ddbox->v[dim];
5270 skew_fac2_d = gmx::square(ddbox->skew_fac[dim]);
5272 cd->receiveInPlace = true;
5273 for (int p = 0; p < cd->numPulses(); p++)
5275 /* Only atoms communicated in the first pulse are used
5276 * for multi-body bonded interactions or for bBondComm.
5278 bDistBonded = ((bDistMB || bDist2B) && p == 0);
5280 gmx_domdec_ind_t *ind = &cd->ind[p];
5282 /* Thread 0 writes in the global index array */
5284 clearCommSetupData(&comm->dth[0]);
5286 for (zone = 0; zone < nzone_send; zone++)
5288 if (dim_ind > 0 && distanceIsTriclinic)
5290 /* Determine slightly more optimized skew_fac's
5292 * This reduces the number of communicated atoms
5293 * by about 10% for 3D DD of rhombic dodecahedra.
5295 for (dimd = 0; dimd < dim; dimd++)
5297 sf2_round[dimd] = 1;
5298 if (ddbox->tric_dir[dimd])
5300 for (i = dd->dim[dimd]+1; i < DIM; i++)
5302 /* If we are shifted in dimension i
5303 * and the cell plane is tilted forward
5304 * in dimension i, skip this coupling.
5306 if (!(zones->shift[nzone+zone][i] &&
5307 ddbox->v[dimd][i][dimd] >= 0))
5310 gmx::square(ddbox->v[dimd][i][dimd]);
5313 sf2_round[dimd] = 1/sf2_round[dimd];
5318 zonei = zone_perm[dim_ind][zone];
5321 /* Here we permutate the zones to obtain a convenient order
5322 * for neighbor searching
5324 cg0 = zone_cg_range[zonei];
5325 cg1 = zone_cg_range[zonei+1];
5329 /* Look only at the cg's received in the previous grid pulse
5331 cg1 = zone_cg_range[nzone+zone+1];
5332 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
5335 const int numThreads = static_cast<int>(comm->dth.size());
5336 #pragma omp parallel for num_threads(numThreads) schedule(static)
5337 for (int th = 0; th < numThreads; th++)
5341 dd_comm_setup_work_t &work = comm->dth[th];
5343 /* Retain data accumulated into buffers of thread 0 */
5346 clearCommSetupData(&work);
5349 int cg0_th = cg0 + ((cg1 - cg0)* th )/numThreads;
5350 int cg1_th = cg0 + ((cg1 - cg0)*(th+1))/numThreads;
5352 /* Get the cg's for this pulse in this zone */
5353 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
5354 dd->globalAtomGroupIndices,
5356 dim, dim_ind, dim0, dim1, dim2,
5358 box, distanceIsTriclinic,
5359 normal, skew_fac2_d, skew_fac_01,
5360 v_d, v_0, v_1, &corners, sf2_round,
5361 bDistBonded, bBondComm,
5364 th == 0 ? &ind->index : &work.localAtomGroupBuffer,
5367 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
5370 std::vector<int> &atomGroups = comm->dth[0].atomGroupBuffer;
5371 std::vector<gmx::RVec> &positions = comm->dth[0].positionBuffer;
5372 ind->nsend[zone] = comm->dth[0].nsend_zone;
5373 /* Append data of threads>=1 to the communication buffers */
5374 for (int th = 1; th < numThreads; th++)
5376 const dd_comm_setup_work_t &dth = comm->dth[th];
5378 ind->index.insert(ind->index.end(), dth.localAtomGroupBuffer.begin(), dth.localAtomGroupBuffer.end());
5379 atomGroups.insert(atomGroups.end(), dth.atomGroupBuffer.begin(), dth.atomGroupBuffer.end());
5380 positions.insert(positions.end(), dth.positionBuffer.begin(), dth.positionBuffer.end());
5381 comm->dth[0].nat += dth.nat;
5382 ind->nsend[zone] += dth.nsend_zone;
5385 /* Clear the counts in case we do not have pbc */
5386 for (zone = nzone_send; zone < nzone; zone++)
5388 ind->nsend[zone] = 0;
5390 ind->nsend[nzone] = ind->index.size();
5391 ind->nsend[nzone + 1] = comm->dth[0].nat;
5392 /* Communicate the number of cg's and atoms to receive */
5393 ddSendrecv(dd, dim_ind, dddirBackward,
5394 ind->nsend, nzone+2,
5395 ind->nrecv, nzone+2);
5399 /* We can receive in place if only the last zone is not empty */
5400 for (zone = 0; zone < nzone-1; zone++)
5402 if (ind->nrecv[zone] > 0)
5404 cd->receiveInPlace = false;
5409 int receiveBufferSize = 0;
5410 if (!cd->receiveInPlace)
5412 receiveBufferSize = ind->nrecv[nzone];
5414 /* These buffer are actually only needed with in-place */
5415 DDBufferAccess<int> globalAtomGroupBuffer(comm->intBuffer, receiveBufferSize);
5416 DDBufferAccess<gmx::RVec> rvecBuffer(comm->rvecBuffer, receiveBufferSize);
5418 dd_comm_setup_work_t &work = comm->dth[0];
5420 /* Make space for the global cg indices */
5421 int numAtomGroupsNew = pos_cg + ind->nrecv[nzone];
5422 dd->globalAtomGroupIndices.resize(numAtomGroupsNew);
5423 /* Communicate the global cg indices */
5424 gmx::ArrayRef<int> integerBufferRef;
5425 if (cd->receiveInPlace)
5427 integerBufferRef = gmx::arrayRefFromArray(dd->globalAtomGroupIndices.data() + pos_cg, ind->nrecv[nzone]);
5431 integerBufferRef = globalAtomGroupBuffer.buffer;
5433 ddSendrecv<int>(dd, dim_ind, dddirBackward,
5434 work.atomGroupBuffer, integerBufferRef);
5436 /* Make space for cg_cm */
5437 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
5438 if (fr->cutoff_scheme == ecutsGROUP)
5444 cg_cm = as_rvec_array(state->x.data());
5446 /* Communicate cg_cm */
5447 gmx::ArrayRef<gmx::RVec> rvecBufferRef;
5448 if (cd->receiveInPlace)
5450 rvecBufferRef = gmx::arrayRefFromArray(reinterpret_cast<gmx::RVec *>(cg_cm + pos_cg), ind->nrecv[nzone]);
5454 rvecBufferRef = rvecBuffer.buffer;
5456 ddSendrecv<gmx::RVec>(dd, dim_ind, dddirBackward,
5457 work.positionBuffer, rvecBufferRef);
5459 /* Make the charge group index */
5460 if (cd->receiveInPlace)
5462 zone = (p == 0 ? 0 : nzone - 1);
5463 while (zone < nzone)
5465 for (cg = 0; cg < ind->nrecv[zone]; cg++)
5467 cg_gl = dd->globalAtomGroupIndices[pos_cg];
5468 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
5469 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
5470 dd->atomGrouping_.appendBlock(nrcg);
5473 /* Update the charge group presence,
5474 * so we can use it in the next pass of the loop.
5476 comm->bLocalCG[cg_gl] = TRUE;
5482 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
5485 zone_cg_range[nzone+zone] = pos_cg;
5490 /* This part of the code is never executed with bBondComm. */
5491 std::vector<int> &atomGroupsIndex = dd->atomGrouping_.rawIndex();
5492 atomGroupsIndex.resize(numAtomGroupsNew + 1);
5494 merge_cg_buffers(nzone, cd, p, zone_cg_range,
5495 dd->globalAtomGroupIndices, integerBufferRef.data(),
5496 cg_cm, as_rvec_array(rvecBufferRef.data()),
5498 fr->cginfo_mb, fr->cginfo);
5499 pos_cg += ind->nrecv[nzone];
5501 nat_tot += ind->nrecv[nzone+1];
5503 if (!cd->receiveInPlace)
5505 /* Store the atom block for easy copying of communication buffers */
5506 make_cell2at_index(cd, nzone, zone_cg_range[nzone], dd->atomGrouping());
5511 comm->atomRanges.setEnd(DDAtomRanges::Type::Zones, nat_tot);
5515 /* We don't need to update cginfo, since that was alrady done above.
5516 * So we pass NULL for the forcerec.
5518 dd_set_cginfo(dd->globalAtomGroupIndices,
5519 dd->ncg_home, dd->globalAtomGroupIndices.size(),
5520 nullptr, comm->bLocalCG);
5525 fprintf(debug, "Finished setting up DD communication, zones:");
5526 for (c = 0; c < zones->n; c++)
5528 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
5530 fprintf(debug, "\n");
5534 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
5538 for (c = 0; c < zones->nizone; c++)
5540 zones->izone[c].cg1 = zones->cg_range[c+1];
5541 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
5542 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
5546 /* \brief Set zone dimensions for zones \p zone_start to \p zone_end-1
5548 * Also sets the atom density for the home zone when \p zone_start=0.
5549 * For this \p numMovedChargeGroupsInHomeZone needs to be passed to tell
5550 * how many charge groups will move but are still part of the current range.
5551 * \todo When converting domdec to use proper classes, all these variables
5552 * should be private and a method should return the correct count
5553 * depending on an internal state.
5555 * \param[in,out] dd The domain decomposition struct
5556 * \param[in] box The box
5557 * \param[in] ddbox The domain decomposition box struct
5558 * \param[in] zone_start The start of the zone range to set sizes for
5559 * \param[in] zone_end The end of the zone range to set sizes for
5560 * \param[in] numMovedChargeGroupsInHomeZone The number of charge groups in the home zone that should moved but are still present in dd->comm->zones.cg_range
5562 static void set_zones_size(gmx_domdec_t *dd,
5563 matrix box, const gmx_ddbox_t *ddbox,
5564 int zone_start, int zone_end,
5565 int numMovedChargeGroupsInHomeZone)
5567 gmx_domdec_comm_t *comm;
5568 gmx_domdec_zones_t *zones;
5577 zones = &comm->zones;
5579 /* Do we need to determine extra distances for multi-body bondeds? */
5580 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
5582 for (z = zone_start; z < zone_end; z++)
5584 /* Copy cell limits to zone limits.
5585 * Valid for non-DD dims and non-shifted dims.
5587 copy_rvec(comm->cell_x0, zones->size[z].x0);
5588 copy_rvec(comm->cell_x1, zones->size[z].x1);
5591 for (d = 0; d < dd->ndim; d++)
5595 for (z = 0; z < zones->n; z++)
5597 /* With a staggered grid we have different sizes
5598 * for non-shifted dimensions.
5600 if (isDlbOn(dd->comm) && zones->shift[z][dim] == 0)
5604 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
5605 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
5609 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
5610 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
5616 rcmbs = comm->cutoff_mbody;
5617 if (ddbox->tric_dir[dim])
5619 rcs /= ddbox->skew_fac[dim];
5620 rcmbs /= ddbox->skew_fac[dim];
5623 /* Set the lower limit for the shifted zone dimensions */
5624 for (z = zone_start; z < zone_end; z++)
5626 if (zones->shift[z][dim] > 0)
5629 if (!isDlbOn(dd->comm) || d == 0)
5631 zones->size[z].x0[dim] = comm->cell_x1[dim];
5632 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
5636 /* Here we take the lower limit of the zone from
5637 * the lowest domain of the zone below.
5641 zones->size[z].x0[dim] =
5642 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
5648 zones->size[z].x0[dim] =
5649 zones->size[zone_perm[2][z-4]].x0[dim];
5653 zones->size[z].x0[dim] =
5654 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
5657 /* A temporary limit, is updated below */
5658 zones->size[z].x1[dim] = zones->size[z].x0[dim];
5662 for (zi = 0; zi < zones->nizone; zi++)
5664 if (zones->shift[zi][dim] == 0)
5666 /* This takes the whole zone into account.
5667 * With multiple pulses this will lead
5668 * to a larger zone then strictly necessary.
5670 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
5671 zones->size[zi].x1[dim]+rcmbs);
5679 /* Loop over the i-zones to set the upper limit of each
5682 for (zi = 0; zi < zones->nizone; zi++)
5684 if (zones->shift[zi][dim] == 0)
5686 /* We should only use zones up to zone_end */
5687 int jZoneEnd = std::min(zones->izone[zi].j1, zone_end);
5688 for (z = zones->izone[zi].j0; z < jZoneEnd; z++)
5690 if (zones->shift[z][dim] > 0)
5692 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
5693 zones->size[zi].x1[dim]+rcs);
5700 for (z = zone_start; z < zone_end; z++)
5702 /* Initialization only required to keep the compiler happy */
5703 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
5706 /* To determine the bounding box for a zone we need to find
5707 * the extreme corners of 4, 2 or 1 corners.
5709 nc = 1 << (ddbox->nboundeddim - 1);
5711 for (c = 0; c < nc; c++)
5713 /* Set up a zone corner at x=0, ignoring trilinic couplings */
5717 corner[YY] = zones->size[z].x0[YY];
5721 corner[YY] = zones->size[z].x1[YY];
5725 corner[ZZ] = zones->size[z].x0[ZZ];
5729 corner[ZZ] = zones->size[z].x1[ZZ];
5731 if (dd->ndim == 1 && dd->dim[0] < ZZ && ZZ < dd->npbcdim &&
5732 box[ZZ][1 - dd->dim[0]] != 0)
5734 /* With 1D domain decomposition the cg's are not in
5735 * the triclinic box, but triclinic x-y and rectangular y/x-z.
5736 * Shift the corner of the z-vector back to along the box
5737 * vector of dimension d, so it will later end up at 0 along d.
5738 * This can affect the location of this corner along dd->dim[0]
5739 * through the matrix operation below if box[d][dd->dim[0]]!=0.
5741 int d = 1 - dd->dim[0];
5743 corner[d] -= corner[ZZ]*box[ZZ][d]/box[ZZ][ZZ];
5745 /* Apply the triclinic couplings */
5746 assert(ddbox->npbcdim <= DIM);
5747 for (i = YY; i < ddbox->npbcdim; i++)
5749 for (j = XX; j < i; j++)
5751 corner[j] += corner[i]*box[i][j]/box[i][i];
5756 copy_rvec(corner, corner_min);
5757 copy_rvec(corner, corner_max);
5761 for (i = 0; i < DIM; i++)
5763 corner_min[i] = std::min(corner_min[i], corner[i]);
5764 corner_max[i] = std::max(corner_max[i], corner[i]);
5768 /* Copy the extreme cornes without offset along x */
5769 for (i = 0; i < DIM; i++)
5771 zones->size[z].bb_x0[i] = corner_min[i];
5772 zones->size[z].bb_x1[i] = corner_max[i];
5774 /* Add the offset along x */
5775 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
5776 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
5779 if (zone_start == 0)
5782 for (dim = 0; dim < DIM; dim++)
5784 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
5786 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0] - numMovedChargeGroupsInHomeZone)/vol;
5791 for (z = zone_start; z < zone_end; z++)
5793 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
5795 zones->size[z].x0[XX], zones->size[z].x1[XX],
5796 zones->size[z].x0[YY], zones->size[z].x1[YY],
5797 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
5798 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
5800 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
5801 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
5802 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
5807 static bool comp_cgsort(const gmx_cgsort_t &a, const gmx_cgsort_t &b)
5812 return a.ind_gl < b.ind_gl;
5814 return a.nsc < b.nsc;
5817 /* Order data in \p dataToSort according to \p sort
5819 * Note: both buffers should have at least \p sort.size() elements.
5821 template <typename T>
5823 orderVector(gmx::ArrayRef<const gmx_cgsort_t> sort,
5824 gmx::ArrayRef<T> dataToSort,
5825 gmx::ArrayRef<T> sortBuffer)
5827 GMX_ASSERT(dataToSort.size() >= sort.size(), "The vector needs to be sufficiently large");
5828 GMX_ASSERT(sortBuffer.size() >= sort.size(), "The sorting buffer needs to be sufficiently large");
5830 /* Order the data into the temporary buffer */
5832 for (const gmx_cgsort_t &entry : sort)
5834 sortBuffer[i++] = dataToSort[entry.ind];
5837 /* Copy back to the original array */
5838 std::copy(sortBuffer.begin(), sortBuffer.begin() + sort.size(),
5839 dataToSort.begin());
5842 /* Order data in \p dataToSort according to \p sort
5844 * Note: \p vectorToSort should have at least \p sort.size() elements,
5845 * \p workVector is resized when it is too small.
5847 template <typename T>
5849 orderVector(gmx::ArrayRef<const gmx_cgsort_t> sort,
5850 gmx::ArrayRef<T> vectorToSort,
5851 std::vector<T> *workVector)
5853 if (gmx::index(workVector->size()) < sort.size())
5855 workVector->resize(sort.size());
5857 orderVector<T>(sort, vectorToSort, *workVector);
5860 static void order_vec_atom(const gmx::RangePartitioning *atomGroups,
5861 gmx::ArrayRef<const gmx_cgsort_t> sort,
5862 gmx::ArrayRef<gmx::RVec> v,
5863 gmx::ArrayRef<gmx::RVec> buf)
5865 if (atomGroups == nullptr)
5867 /* Avoid the useless loop of the atoms within a cg */
5868 orderVector(sort, v, buf);
5873 /* Order the data */
5875 for (const gmx_cgsort_t &entry : sort)
5877 for (int i : atomGroups->block(entry.ind))
5879 copy_rvec(v[i], buf[a]);
5885 /* Copy back to the original array */
5886 for (int a = 0; a < atot; a++)
5888 copy_rvec(buf[a], v[a]);
5892 /* Returns whether a < b */
5893 static bool compareCgsort(const gmx_cgsort_t &a,
5894 const gmx_cgsort_t &b)
5896 return (a.nsc < b.nsc ||
5897 (a.nsc == b.nsc && a.ind_gl < b.ind_gl));
5900 static void orderedSort(gmx::ArrayRef<const gmx_cgsort_t> stationary,
5901 gmx::ArrayRef<gmx_cgsort_t> moved,
5902 std::vector<gmx_cgsort_t> *sort1)
5904 /* The new indices are not very ordered, so we qsort them */
5905 std::sort(moved.begin(), moved.end(), comp_cgsort);
5907 /* stationary is already ordered, so now we can merge the two arrays */
5908 sort1->resize(stationary.size() + moved.size());
5909 std::merge(stationary.begin(), stationary.end(),
5910 moved.begin(), moved.end(),
5915 /* Set the sorting order for systems with charge groups, returned in sort->sort.
5916 * The order is according to the global charge group index.
5917 * This adds and removes charge groups that moved between domains.
5919 static void dd_sort_order(const gmx_domdec_t *dd,
5920 const t_forcerec *fr,
5922 gmx_domdec_sort_t *sort)
5924 const int *a = fr->ns->grid->cell_index;
5926 const int movedValue = NSGRID_SIGNAL_MOVED_FAC*fr->ns->grid->ncells;
5928 if (ncg_home_old >= 0)
5930 std::vector<gmx_cgsort_t> &stationary = sort->stationary;
5931 std::vector<gmx_cgsort_t> &moved = sort->moved;
5933 /* The charge groups that remained in the same ns grid cell
5934 * are completely ordered. So we can sort efficiently by sorting
5935 * the charge groups that did move into the stationary list.
5936 * Note: push_back() seems to be slightly slower than direct access.
5940 for (int i = 0; i < dd->ncg_home; i++)
5942 /* Check if this cg did not move to another node */
5943 if (a[i] < movedValue)
5947 entry.ind_gl = dd->globalAtomGroupIndices[i];
5950 if (i >= ncg_home_old || a[i] != sort->sorted[i].nsc)
5952 /* This cg is new on this node or moved ns grid cell */
5953 moved.push_back(entry);
5957 /* This cg did not move */
5958 stationary.push_back(entry);
5965 fprintf(debug, "ordered sort cgs: stationary %zu moved %zu\n",
5966 stationary.size(), moved.size());
5968 /* Sort efficiently */
5969 orderedSort(stationary, moved, &sort->sorted);
5973 std::vector<gmx_cgsort_t> &cgsort = sort->sorted;
5975 cgsort.reserve(dd->ncg_home);
5977 for (int i = 0; i < dd->ncg_home; i++)
5979 /* Sort on the ns grid cell indices
5980 * and the global topology index
5984 entry.ind_gl = dd->globalAtomGroupIndices[i];
5986 cgsort.push_back(entry);
5987 if (cgsort[i].nsc < movedValue)
5994 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, numCGNew);
5996 /* Determine the order of the charge groups using qsort */
5997 std::sort(cgsort.begin(), cgsort.end(), comp_cgsort);
5999 /* Remove the charge groups which are no longer at home here */
6000 cgsort.resize(numCGNew);
6004 /* Returns the sorting order for atoms based on the nbnxn grid order in sort */
6005 static void dd_sort_order_nbnxn(const t_forcerec *fr,
6006 std::vector<gmx_cgsort_t> *sort)
6008 gmx::ArrayRef<const int> atomOrder = nbnxn_get_atomorder(fr->nbv->nbs.get());
6010 /* Using push_back() instead of this resize results in much slower code */
6011 sort->resize(atomOrder.size());
6012 gmx::ArrayRef<gmx_cgsort_t> buffer = *sort;
6013 size_t numSorted = 0;
6014 for (int i : atomOrder)
6018 /* The values of nsc and ind_gl are not used in this case */
6019 buffer[numSorted++].ind = i;
6022 sort->resize(numSorted);
6025 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
6028 gmx_domdec_sort_t *sort = dd->comm->sort.get();
6030 switch (fr->cutoff_scheme)
6033 dd_sort_order(dd, fr, ncg_home_old, sort);
6036 dd_sort_order_nbnxn(fr, &sort->sorted);
6039 gmx_incons("unimplemented");
6042 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
6044 /* We alloc with the old size, since cgindex is still old */
6045 GMX_ASSERT(atomGrouping.numBlocks() == dd->ncg_home, "atomGroups and dd should be consistent");
6046 DDBufferAccess<gmx::RVec> rvecBuffer(dd->comm->rvecBuffer, atomGrouping.fullRange().end());
6048 const gmx::RangePartitioning *atomGroupsPtr = (dd->comm->bCGs ? &atomGrouping : nullptr);
6050 /* Set the new home atom/charge group count */
6051 dd->ncg_home = sort->sorted.size();
6054 fprintf(debug, "Set the new home charge group count to %d\n",
6058 /* Reorder the state */
6059 gmx::ArrayRef<const gmx_cgsort_t> cgsort = sort->sorted;
6060 GMX_RELEASE_ASSERT(cgsort.size() == dd->ncg_home, "We should sort all the home atom groups");
6062 if (state->flags & (1 << estX))
6064 order_vec_atom(atomGroupsPtr, cgsort, state->x, rvecBuffer.buffer);
6066 if (state->flags & (1 << estV))
6068 order_vec_atom(atomGroupsPtr, cgsort, state->v, rvecBuffer.buffer);
6070 if (state->flags & (1 << estCGP))
6072 order_vec_atom(atomGroupsPtr, cgsort, state->cg_p, rvecBuffer.buffer);
6075 if (fr->cutoff_scheme == ecutsGROUP)
6078 gmx::ArrayRef<gmx::RVec> cgcmRef = gmx::arrayRefFromArray(reinterpret_cast<gmx::RVec *>(cgcm[0]), cgsort.size());
6079 orderVector(cgsort, cgcmRef, rvecBuffer.buffer);
6082 /* Reorder the global cg index */
6083 orderVector<int>(cgsort, dd->globalAtomGroupIndices, &sort->intBuffer);
6084 /* Reorder the cginfo */
6085 orderVector<int>(cgsort, gmx::arrayRefFromArray(fr->cginfo, cgsort.size()), &sort->intBuffer);
6086 /* Rebuild the local cg index */
6089 /* We make a new, ordered atomGroups object and assign it to
6090 * the old one. This causes some allocation overhead, but saves
6091 * a copy back of the whole index.
6093 gmx::RangePartitioning ordered;
6094 for (const gmx_cgsort_t &entry : cgsort)
6096 ordered.appendBlock(atomGrouping.block(entry.ind).size());
6098 dd->atomGrouping_ = ordered;
6102 dd->atomGrouping_.setAllBlocksSizeOne(dd->ncg_home);
6104 /* Set the home atom number */
6105 dd->comm->atomRanges.setEnd(DDAtomRanges::Type::Home, dd->atomGrouping().fullRange().end());
6107 if (fr->cutoff_scheme == ecutsVERLET)
6109 /* The atoms are now exactly in grid order, update the grid order */
6110 nbnxn_set_atomorder(fr->nbv->nbs.get());
6114 /* Copy the sorted ns cell indices back to the ns grid struct */
6115 for (gmx::index i = 0; i < cgsort.size(); i++)
6117 fr->ns->grid->cell_index[i] = cgsort[i].nsc;
6119 fr->ns->grid->nr = cgsort.size();
6123 static void add_dd_statistics(gmx_domdec_t *dd)
6125 gmx_domdec_comm_t *comm = dd->comm;
6127 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6129 auto range = static_cast<DDAtomRanges::Type>(i);
6131 comm->atomRanges.end(range) - comm->atomRanges.start(range);
6136 void reset_dd_statistics_counters(gmx_domdec_t *dd)
6138 gmx_domdec_comm_t *comm = dd->comm;
6140 /* Reset all the statistics and counters for total run counting */
6141 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6143 comm->sum_nat[i] = 0;
6147 comm->load_step = 0;
6150 clear_ivec(comm->load_lim);
6155 void print_dd_statistics(const t_commrec *cr, const t_inputrec *ir, FILE *fplog)
6157 gmx_domdec_comm_t *comm = cr->dd->comm;
6159 const int numRanges = static_cast<int>(DDAtomRanges::Type::Number);
6160 gmx_sumd(numRanges, comm->sum_nat, cr);
6162 if (fplog == nullptr)
6167 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
6169 for (int i = static_cast<int>(DDAtomRanges::Type::Zones); i < numRanges; i++)
6171 auto range = static_cast<DDAtomRanges::Type>(i);
6172 double av = comm->sum_nat[i]/comm->ndecomp;
6175 case DDAtomRanges::Type::Zones:
6177 " av. #atoms communicated per step for force: %d x %.1f\n",
6180 case DDAtomRanges::Type::Vsites:
6181 if (cr->dd->vsite_comm)
6184 " av. #atoms communicated per step for vsites: %d x %.1f\n",
6185 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
6189 case DDAtomRanges::Type::Constraints:
6190 if (cr->dd->constraint_comm)
6193 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
6194 1 + ir->nLincsIter, av);
6198 gmx_incons(" Unknown type for DD statistics");
6201 fprintf(fplog, "\n");
6203 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
6205 print_dd_load_av(fplog, cr->dd);
6209 void dd_partition_system(FILE *fplog,
6211 const t_commrec *cr,
6212 gmx_bool bMasterState,
6214 t_state *state_global,
6215 const gmx_mtop_t *top_global,
6216 const t_inputrec *ir,
6217 t_state *state_local,
6218 PaddedRVecVector *f,
6219 gmx::MDAtoms *mdAtoms,
6220 gmx_localtop_t *top_local,
6223 gmx::Constraints *constr,
6225 gmx_wallcycle *wcycle,
6229 gmx_domdec_comm_t *comm;
6230 gmx_ddbox_t ddbox = {0};
6232 int64_t step_pcoupl;
6233 rvec cell_ns_x0, cell_ns_x1;
6234 int ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
6235 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckWhetherToTurnDlbOn, bLogLoad;
6236 gmx_bool bRedist, bResortAll;
6237 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
6241 wallcycle_start(wcycle, ewcDOMDEC);
6246 // TODO if the update code becomes accessible here, use
6247 // upd->deform for this logic.
6248 bBoxChanged = (bMasterState || inputrecDeform(ir));
6249 if (ir->epc != epcNO)
6251 /* With nstpcouple > 1 pressure coupling happens.
6252 * one step after calculating the pressure.
6253 * Box scaling happens at the end of the MD step,
6254 * after the DD partitioning.
6255 * We therefore have to do DLB in the first partitioning
6256 * after an MD step where P-coupling occurred.
6257 * We need to determine the last step in which p-coupling occurred.
6258 * MRS -- need to validate this for vv?
6260 int n = ir->nstpcouple;
6263 step_pcoupl = step - 1;
6267 step_pcoupl = ((step - 1)/n)*n + 1;
6269 if (step_pcoupl >= comm->partition_step)
6275 bNStGlobalComm = (step % nstglobalcomm == 0);
6283 /* Should we do dynamic load balacing this step?
6284 * Since it requires (possibly expensive) global communication,
6285 * we might want to do DLB less frequently.
6287 if (bBoxChanged || ir->epc != epcNO)
6289 bDoDLB = bBoxChanged;
6293 bDoDLB = bNStGlobalComm;
6297 /* Check if we have recorded loads on the nodes */
6298 if (comm->bRecordLoad && dd_load_count(comm) > 0)
6300 bCheckWhetherToTurnDlbOn = dd_dlb_get_should_check_whether_to_turn_dlb_on(dd);
6302 /* Print load every nstlog, first and last step to the log file */
6303 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
6304 comm->n_load_collect == 0 ||
6306 (step + ir->nstlist > ir->init_step + ir->nsteps)));
6308 /* Avoid extra communication due to verbose screen output
6309 * when nstglobalcomm is set.
6311 if (bDoDLB || bLogLoad || bCheckWhetherToTurnDlbOn ||
6312 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
6314 get_load_distribution(dd, wcycle);
6319 dd_print_load(fplog, dd, step-1);
6323 dd_print_load_verbose(dd);
6326 comm->n_load_collect++;
6332 /* Add the measured cycles to the running average */
6333 const float averageFactor = 0.1f;
6334 comm->cyclesPerStepDlbExpAverage =
6335 (1 - averageFactor)*comm->cyclesPerStepDlbExpAverage +
6336 averageFactor*comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
6338 if (comm->dlbState == edlbsOnCanTurnOff &&
6339 dd->comm->n_load_have % c_checkTurnDlbOffInterval == c_checkTurnDlbOffInterval - 1)
6341 gmx_bool turnOffDlb;
6344 /* If the running averaged cycles with DLB are more
6345 * than before we turned on DLB, turn off DLB.
6346 * We will again run and check the cycles without DLB
6347 * and we can then decide if to turn off DLB forever.
6349 turnOffDlb = (comm->cyclesPerStepDlbExpAverage >
6350 comm->cyclesPerStepBeforeDLB);
6352 dd_bcast(dd, sizeof(turnOffDlb), &turnOffDlb);
6355 /* To turn off DLB, we need to redistribute the atoms */
6356 dd_collect_state(dd, state_local, state_global);
6357 bMasterState = TRUE;
6358 turn_off_dlb(fplog, cr, step);
6362 else if (bCheckWhetherToTurnDlbOn)
6364 gmx_bool turnOffDlbForever = FALSE;
6365 gmx_bool turnOnDlb = FALSE;
6367 /* Since the timings are node dependent, the master decides */
6370 /* If we recently turned off DLB, we want to check if
6371 * performance is better without DLB. We want to do this
6372 * ASAP to minimize the chance that external factors
6373 * slowed down the DLB step are gone here and we
6374 * incorrectly conclude that DLB was causing the slowdown.
6375 * So we measure one nstlist block, no running average.
6377 if (comm->haveTurnedOffDlb &&
6378 comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep] <
6379 comm->cyclesPerStepDlbExpAverage)
6381 /* After turning off DLB we ran nstlist steps in fewer
6382 * cycles than with DLB. This likely means that DLB
6383 * in not benefical, but this could be due to a one
6384 * time unlucky fluctuation, so we require two such
6385 * observations in close succession to turn off DLB
6388 if (comm->dlbSlowerPartitioningCount > 0 &&
6389 dd->ddp_count < comm->dlbSlowerPartitioningCount + 10*c_checkTurnDlbOnInterval)
6391 turnOffDlbForever = TRUE;
6393 comm->haveTurnedOffDlb = false;
6394 /* Register when we last measured DLB slowdown */
6395 comm->dlbSlowerPartitioningCount = dd->ddp_count;
6399 /* Here we check if the max PME rank load is more than 0.98
6400 * the max PP force load. If so, PP DLB will not help,
6401 * since we are (almost) limited by PME. Furthermore,
6402 * DLB will cause a significant extra x/f redistribution
6403 * cost on the PME ranks, which will then surely result
6404 * in lower total performance.
6406 if (cr->npmenodes > 0 &&
6407 dd_pme_f_ratio(dd) > 1 - DD_PERF_LOSS_DLB_ON)
6413 turnOnDlb = (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
6419 gmx_bool turnOffDlbForever;
6423 turnOffDlbForever, turnOnDlb
6425 dd_bcast(dd, sizeof(bools), &bools);
6426 if (bools.turnOffDlbForever)
6428 turn_off_dlb_forever(fplog, cr, step);
6430 else if (bools.turnOnDlb)
6432 turn_on_dlb(fplog, cr, step);
6437 comm->n_load_have++;
6440 cgs_gl = &comm->cgs_gl;
6445 /* Clear the old state */
6446 clearDDStateIndices(dd, 0, 0);
6449 auto xGlobal = positionsFromStatePointer(state_global);
6451 set_ddbox(dd, true, ir,
6452 DDMASTER(dd) ? state_global->box : nullptr,
6456 distributeState(fplog, dd, state_global, ddbox, state_local, f);
6458 dd_make_local_cgs(dd, &top_local->cgs);
6460 /* Ensure that we have space for the new distribution */
6461 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
6463 if (fr->cutoff_scheme == ecutsGROUP)
6465 calc_cgcm(fplog, 0, dd->ncg_home,
6466 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
6469 inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
6471 dd_set_cginfo(dd->globalAtomGroupIndices, 0, dd->ncg_home, fr, comm->bLocalCG);
6473 else if (state_local->ddp_count != dd->ddp_count)
6475 if (state_local->ddp_count > dd->ddp_count)
6477 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%ld)", state_local->ddp_count, dd->ddp_count);
6480 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
6482 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
6485 /* Clear the old state */
6486 clearDDStateIndices(dd, 0, 0);
6488 /* Restore the atom group indices from state_local */
6489 restoreAtomGroups(dd, cgs_gl->index, state_local);
6490 make_dd_indices(dd, cgs_gl->index, 0);
6491 ncgindex_set = dd->ncg_home;
6493 if (fr->cutoff_scheme == ecutsGROUP)
6495 /* Redetermine the cg COMs */
6496 calc_cgcm(fplog, 0, dd->ncg_home,
6497 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
6500 inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
6502 dd_set_cginfo(dd->globalAtomGroupIndices, 0, dd->ncg_home, fr, comm->bLocalCG);
6504 set_ddbox(dd, bMasterState, ir, state_local->box,
6505 true, state_local->x, &ddbox);
6507 bRedist = isDlbOn(comm);
6511 /* We have the full state, only redistribute the cgs */
6513 /* Clear the non-home indices */
6514 clearDDStateIndices(dd, dd->ncg_home, comm->atomRanges.numHomeAtoms());
6517 /* Avoid global communication for dim's without pbc and -gcom */
6518 if (!bNStGlobalComm)
6520 copy_rvec(comm->box0, ddbox.box0 );
6521 copy_rvec(comm->box_size, ddbox.box_size);
6523 set_ddbox(dd, bMasterState, ir, state_local->box,
6524 bNStGlobalComm, state_local->x, &ddbox);
6529 /* For dim's without pbc and -gcom */
6530 copy_rvec(ddbox.box0, comm->box0 );
6531 copy_rvec(ddbox.box_size, comm->box_size);
6533 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
6536 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
6538 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
6541 /* Check if we should sort the charge groups */
6542 const bool bSortCG = (bMasterState || bRedist);
6544 ncg_home_old = dd->ncg_home;
6546 /* When repartitioning we mark charge groups that will move to neighboring
6547 * DD cells, but we do not move them right away for performance reasons.
6548 * Thus we need to keep track of how many charge groups will move for
6549 * obtaining correct local charge group / atom counts.
6554 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
6556 ncgindex_set = dd->ncg_home;
6557 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
6561 GMX_RELEASE_ASSERT(bSortCG, "Sorting is required after redistribution");
6563 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
6566 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
6568 &comm->cell_x0, &comm->cell_x1,
6569 dd->ncg_home, fr->cg_cm,
6570 cell_ns_x0, cell_ns_x1, &grid_density);
6574 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
6577 switch (fr->cutoff_scheme)
6580 copy_ivec(fr->ns->grid->n, ncells_old);
6581 grid_first(fplog, fr->ns->grid, dd, &ddbox,
6582 state_local->box, cell_ns_x0, cell_ns_x1,
6583 fr->rlist, grid_density);
6586 nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_old[XX], &ncells_old[YY]);
6589 gmx_incons("unimplemented");
6591 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
6592 copy_ivec(ddbox.tric_dir, comm->tric_dir);
6596 wallcycle_sub_start(wcycle, ewcsDD_GRID);
6598 /* Sort the state on charge group position.
6599 * This enables exact restarts from this step.
6600 * It also improves performance by about 15% with larger numbers
6601 * of atoms per node.
6604 /* Fill the ns grid with the home cell,
6605 * so we can sort with the indices.
6607 set_zones_ncg_home(dd);
6609 switch (fr->cutoff_scheme)
6612 set_zones_size(dd, state_local->box, &ddbox, 0, 1, ncg_moved);
6614 nbnxn_put_on_grid(fr->nbv->nbs.get(), fr->ePBC, state_local->box,
6616 comm->zones.size[0].bb_x0,
6617 comm->zones.size[0].bb_x1,
6619 comm->zones.dens_zone0,
6621 as_rvec_array(state_local->x.data()),
6622 ncg_moved, bRedist ? comm->movedBuffer.data() : nullptr,
6623 fr->nbv->grp[eintLocal].kernel_type,
6626 nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_new[XX], &ncells_new[YY]);
6629 fill_grid(&comm->zones, fr->ns->grid, dd->ncg_home,
6630 0, dd->ncg_home, fr->cg_cm);
6632 copy_ivec(fr->ns->grid->n, ncells_new);
6635 gmx_incons("unimplemented");
6638 bResortAll = bMasterState;
6640 /* Check if we can user the old order and ns grid cell indices
6641 * of the charge groups to sort the charge groups efficiently.
6643 if (ncells_new[XX] != ncells_old[XX] ||
6644 ncells_new[YY] != ncells_old[YY] ||
6645 ncells_new[ZZ] != ncells_old[ZZ])
6652 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
6653 gmx_step_str(step, sbuf), dd->ncg_home);
6655 dd_sort_state(dd, fr->cg_cm, fr, state_local,
6656 bResortAll ? -1 : ncg_home_old);
6658 /* After sorting and compacting we set the correct size */
6659 dd_resize_state(state_local, f, comm->atomRanges.numHomeAtoms());
6661 /* Rebuild all the indices */
6665 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
6668 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
6670 /* Setup up the communication and communicate the coordinates */
6671 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
6673 /* Set the indices */
6674 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
6676 /* Set the charge group boundaries for neighbor searching */
6677 set_cg_boundaries(&comm->zones);
6679 if (fr->cutoff_scheme == ecutsVERLET)
6681 /* When bSortCG=true, we have already set the size for zone 0 */
6682 set_zones_size(dd, state_local->box, &ddbox,
6683 bSortCG ? 1 : 0, comm->zones.n,
6687 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
6690 write_dd_pdb("dd_home",step,"dump",top_global,cr,
6691 -1,as_rvec_array(state_local->x.data()),state_local->box);
6694 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
6696 /* Extract a local topology from the global topology */
6697 for (int i = 0; i < dd->ndim; i++)
6699 np[dd->dim[i]] = comm->cd[i].numPulses();
6701 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
6702 comm->cellsize_min, np,
6704 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : as_rvec_array(state_local->x.data()),
6705 vsite, top_global, top_local);
6707 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
6709 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
6711 /* Set up the special atom communication */
6712 int n = comm->atomRanges.end(DDAtomRanges::Type::Zones);
6713 for (int i = static_cast<int>(DDAtomRanges::Type::Zones) + 1; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6715 auto range = static_cast<DDAtomRanges::Type>(i);
6718 case DDAtomRanges::Type::Vsites:
6719 if (vsite && vsite->n_intercg_vsite)
6721 n = dd_make_local_vsites(dd, n, top_local->idef.il);
6724 case DDAtomRanges::Type::Constraints:
6725 if (dd->bInterCGcons || dd->bInterCGsettles)
6727 /* Only for inter-cg constraints we need special code */
6728 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
6729 constr, ir->nProjOrder,
6730 top_local->idef.il);
6734 gmx_incons("Unknown special atom type setup");
6736 comm->atomRanges.setEnd(range, n);
6739 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
6741 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
6743 /* Make space for the extra coordinates for virtual site
6744 * or constraint communication.
6746 state_local->natoms = comm->atomRanges.numAtomsTotal();
6748 dd_resize_state(state_local, f, state_local->natoms);
6750 if (fr->haveDirectVirialContributions)
6752 if (vsite && vsite->n_intercg_vsite)
6754 nat_f_novirsum = comm->atomRanges.end(DDAtomRanges::Type::Vsites);
6758 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
6760 nat_f_novirsum = comm->atomRanges.end(DDAtomRanges::Type::Zones);
6764 nat_f_novirsum = comm->atomRanges.numHomeAtoms();
6773 /* Set the number of atoms required for the force calculation.
6774 * Forces need to be constrained when doing energy
6775 * minimization. For simple simulations we could avoid some
6776 * allocation, zeroing and copying, but this is probably not worth
6777 * the complications and checking.
6779 forcerec_set_ranges(fr, dd->ncg_home, dd->globalAtomGroupIndices.size(),
6780 comm->atomRanges.end(DDAtomRanges::Type::Zones),
6781 comm->atomRanges.end(DDAtomRanges::Type::Constraints),
6784 /* Update atom data for mdatoms and several algorithms */
6785 mdAlgorithmsSetupAtomData(cr, ir, top_global, top_local, fr,
6786 nullptr, mdAtoms, constr, vsite, nullptr);
6788 auto mdatoms = mdAtoms->mdatoms();
6789 if (!thisRankHasDuty(cr, DUTY_PME))
6791 /* Send the charges and/or c6/sigmas to our PME only node */
6792 gmx_pme_send_parameters(cr,
6794 mdatoms->nChargePerturbed != 0, mdatoms->nTypePerturbed != 0,
6795 mdatoms->chargeA, mdatoms->chargeB,
6796 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
6797 mdatoms->sigmaA, mdatoms->sigmaB,
6798 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
6803 /* Update the local pull groups */
6804 dd_make_local_pull_groups(cr, ir->pull_work);
6807 if (dd->atomSets != nullptr)
6809 /* Update the local atom sets */
6810 dd->atomSets->setIndicesInDomainDecomposition(*(dd->ga2la));
6813 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
6814 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
6816 add_dd_statistics(dd);
6818 /* Make sure we only count the cycles for this DD partitioning */
6819 clear_dd_cycle_counts(dd);
6821 /* Because the order of the atoms might have changed since
6822 * the last vsite construction, we need to communicate the constructing
6823 * atom coordinates again (for spreading the forces this MD step).
6825 dd_move_x_vsites(dd, state_local->box, as_rvec_array(state_local->x.data()));
6827 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
6829 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
6831 dd_move_x(dd, state_local->box, state_local->x, nullWallcycle);
6832 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
6833 -1, as_rvec_array(state_local->x.data()), state_local->box);
6836 /* Store the partitioning step */
6837 comm->partition_step = step;
6839 /* Increase the DD partitioning counter */
6841 /* The state currently matches this DD partitioning count, store it */
6842 state_local->ddp_count = dd->ddp_count;
6845 /* The DD master node knows the complete cg distribution,
6846 * store the count so we can possibly skip the cg info communication.
6848 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
6851 if (comm->DD_debug > 0)
6853 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
6854 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
6855 "after partitioning");
6858 wallcycle_stop(wcycle, ewcDOMDEC);
6861 /*! \brief Check whether bonded interactions are missing, if appropriate */
6862 void checkNumberOfBondedInteractions(FILE *fplog,
6864 int totalNumberOfBondedInteractions,
6865 const gmx_mtop_t *top_global,
6866 const gmx_localtop_t *top_local,
6867 const t_state *state,
6868 bool *shouldCheckNumberOfBondedInteractions)
6870 if (*shouldCheckNumberOfBondedInteractions)
6872 if (totalNumberOfBondedInteractions != cr->dd->nbonded_global)
6874 dd_print_missing_interactions(fplog, cr, totalNumberOfBondedInteractions, top_global, top_local, state); // Does not return
6876 *shouldCheckNumberOfBondedInteractions = false;