2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
52 #include "gromacs/domdec/domdec_network.h"
53 #include "gromacs/domdec/ga2la.h"
54 #include "gromacs/ewald/pme.h"
55 #include "gromacs/fileio/gmxfio.h"
56 #include "gromacs/fileio/pdbio.h"
57 #include "gromacs/gmxlib/chargegroup.h"
58 #include "gromacs/gmxlib/network.h"
59 #include "gromacs/gmxlib/nrnb.h"
60 #include "gromacs/gpu_utils/gpu_utils.h"
61 #include "gromacs/hardware/hw_info.h"
62 #include "gromacs/imd/imd.h"
63 #include "gromacs/listed-forces/manage-threading.h"
64 #include "gromacs/math/functions.h"
65 #include "gromacs/math/vec.h"
66 #include "gromacs/math/vectypes.h"
67 #include "gromacs/mdlib/constr.h"
68 #include "gromacs/mdlib/force.h"
69 #include "gromacs/mdlib/forcerec.h"
70 #include "gromacs/mdlib/gmx_omp_nthreads.h"
71 #include "gromacs/mdlib/mdatoms.h"
72 #include "gromacs/mdlib/mdrun.h"
73 #include "gromacs/mdlib/mdsetup.h"
74 #include "gromacs/mdlib/nb_verlet.h"
75 #include "gromacs/mdlib/nbnxn_grid.h"
76 #include "gromacs/mdlib/nsgrid.h"
77 #include "gromacs/mdlib/vsite.h"
78 #include "gromacs/mdtypes/commrec.h"
79 #include "gromacs/mdtypes/df_history.h"
80 #include "gromacs/mdtypes/forcerec.h"
81 #include "gromacs/mdtypes/inputrec.h"
82 #include "gromacs/mdtypes/md_enums.h"
83 #include "gromacs/mdtypes/mdatom.h"
84 #include "gromacs/mdtypes/nblist.h"
85 #include "gromacs/mdtypes/state.h"
86 #include "gromacs/pbcutil/ishift.h"
87 #include "gromacs/pbcutil/pbc.h"
88 #include "gromacs/pulling/pull.h"
89 #include "gromacs/pulling/pull_rotation.h"
90 #include "gromacs/swap/swapcoords.h"
91 #include "gromacs/timing/wallcycle.h"
92 #include "gromacs/topology/block.h"
93 #include "gromacs/topology/idef.h"
94 #include "gromacs/topology/ifunc.h"
95 #include "gromacs/topology/mtop_lookup.h"
96 #include "gromacs/topology/mtop_util.h"
97 #include "gromacs/topology/topology.h"
98 #include "gromacs/utility/basedefinitions.h"
99 #include "gromacs/utility/basenetwork.h"
100 #include "gromacs/utility/cstringutil.h"
101 #include "gromacs/utility/exceptions.h"
102 #include "gromacs/utility/fatalerror.h"
103 #include "gromacs/utility/gmxmpi.h"
104 #include "gromacs/utility/qsort_threadsafe.h"
105 #include "gromacs/utility/real.h"
106 #include "gromacs/utility/smalloc.h"
107 #include "gromacs/utility/stringutil.h"
109 #include "domdec_constraints.h"
110 #include "domdec_internal.h"
111 #include "domdec_vsite.h"
113 #define DDRANK(dd, rank) (rank)
114 #define DDMASTERRANK(dd) (dd->masterrank)
116 struct gmx_domdec_master_t
118 /* The cell boundaries */
120 /* The global charge group division */
121 int *ncg; /* Number of home charge groups for each node */
122 int *index; /* Index of nnodes+1 into cg */
123 int *cg; /* Global charge group index */
124 int *nat; /* Number of home atoms for each node. */
125 int *ibuf; /* Buffer for communication */
126 rvec *vbuf; /* Buffer for state scattering and gathering */
129 #define DD_NLOAD_MAX 9
131 const char *edlbs_names[edlbsNR] = { "off", "auto", "locked", "on", "on" };
133 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
136 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
137 #define DD_FLAG_NRCG 65535
138 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
139 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
141 /* The DD zone order */
142 static const ivec dd_zo[DD_MAXZONE] =
143 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
145 /* The non-bonded zone-pair setup for domain decomposition
146 * The first number is the i-zone, the second number the first j-zone seen by
147 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
148 * As is, this is for 3D decomposition, where there are 4 i-zones.
149 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
150 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
153 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
158 /* Factors used to avoid problems due to rounding issues */
159 #define DD_CELL_MARGIN 1.0001
160 #define DD_CELL_MARGIN2 1.00005
161 /* Factor to account for pressure scaling during nstlist steps */
162 #define DD_PRES_SCALE_MARGIN 1.02
164 /* Turn on DLB when the load imbalance causes this amount of total loss.
165 * There is a bit of overhead with DLB and it's difficult to achieve
166 * a load imbalance of less than 2% with DLB.
168 #define DD_PERF_LOSS_DLB_ON 0.02
170 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
171 #define DD_PERF_LOSS_WARN 0.05
173 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
175 /* Use separate MPI send and receive commands
176 * when nnodes <= GMX_DD_NNODES_SENDRECV.
177 * This saves memory (and some copying for small nnodes).
178 * For high parallelization scatter and gather calls are used.
180 #define GMX_DD_NNODES_SENDRECV 4
183 /* We check if to turn on DLB at the first and every 100 DD partitionings.
184 * With large imbalance DLB will turn on at the first step, so we can
185 * make the interval so large that the MPI overhead of the check is negligible.
187 static const int c_checkTurnDlbOnInterval = 100;
188 /* We need to check if DLB results in worse performance and then turn it off.
189 * We check this more often then for turning DLB on, because the DLB can scale
190 * the domains very rapidly, so if unlucky the load imbalance can go up quickly
191 * and furthermore, we are already synchronizing often with DLB, so
192 * the overhead of the MPI Bcast is not that high.
194 static const int c_checkTurnDlbOffInterval = 20;
196 /* Forward declaration */
197 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue);
201 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
203 static void index2xyz(ivec nc,int ind,ivec xyz)
205 xyz[XX] = ind % nc[XX];
206 xyz[YY] = (ind / nc[XX]) % nc[YY];
207 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
211 /* This order is required to minimize the coordinate communication in PME
212 * which uses decomposition in the x direction.
214 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
216 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
218 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
219 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
220 xyz[ZZ] = ind % nc[ZZ];
223 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
228 ddindex = dd_index(dd->nc, c);
229 if (dd->comm->bCartesianPP_PME)
231 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
233 else if (dd->comm->bCartesianPP)
236 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
247 static gmx_bool dynamic_dd_box(const gmx_ddbox_t *ddbox, const t_inputrec *ir)
249 return (ddbox->nboundeddim < DIM || inputrecDynamicBox(ir));
252 int ddglatnr(const gmx_domdec_t *dd, int i)
262 if (i >= dd->comm->nat[ddnatNR-1])
264 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
266 atnr = dd->gatindex[i] + 1;
272 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
274 return &dd->comm->cgs_gl;
277 /*! \brief Returns true if the DLB state indicates that the balancer is on. */
278 static bool isDlbOn(const gmx_domdec_comm_t *comm)
280 return (comm->dlbState == edlbsOnCanTurnOff ||
281 comm->dlbState == edlbsOnUser);
283 /*! \brief Returns true if the DLB state indicates that the balancer is off/disabled.
285 static bool isDlbDisabled(const gmx_domdec_comm_t *comm)
287 return (comm->dlbState == edlbsOffUser ||
288 comm->dlbState == edlbsOffForever);
291 static void vec_rvec_init(vec_rvec_t *v)
297 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
301 v->nalloc = over_alloc_dd(n);
302 srenew(v->v, v->nalloc);
306 void dd_store_state(gmx_domdec_t *dd, t_state *state)
310 if (state->ddp_count != dd->ddp_count)
312 gmx_incons("The MD state does not match the domain decomposition state");
315 state->cg_gl.resize(dd->ncg_home);
316 for (i = 0; i < dd->ncg_home; i++)
318 state->cg_gl[i] = dd->index_gl[i];
321 state->ddp_count_cg_gl = dd->ddp_count;
324 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
326 return &dd->comm->zones;
329 void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
330 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
332 gmx_domdec_zones_t *zones;
335 zones = &dd->comm->zones;
338 while (icg >= zones->izone[izone].cg1)
347 else if (izone < zones->nizone)
349 *jcg0 = zones->izone[izone].jcg0;
353 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
354 icg, izone, zones->nizone);
357 *jcg1 = zones->izone[izone].jcg1;
359 for (d = 0; d < dd->ndim; d++)
362 shift0[dim] = zones->izone[izone].shift0[dim];
363 shift1[dim] = zones->izone[izone].shift1[dim];
364 if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
366 /* A conservative approach, this can be optimized */
373 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
375 /* We currently set mdatoms entries for all atoms:
376 * local + non-local + communicated for vsite + constraints
379 return dd->comm->nat[ddnatNR - 1];
382 int dd_natoms_vsite(const gmx_domdec_t *dd)
384 return dd->comm->nat[ddnatVSITE];
387 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
389 *at_start = dd->comm->nat[ddnatCON-1];
390 *at_end = dd->comm->nat[ddnatCON];
393 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
395 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
396 int *index, *cgindex;
397 gmx_domdec_comm_t *comm;
398 gmx_domdec_comm_dim_t *cd;
399 gmx_domdec_ind_t *ind;
400 rvec shift = {0, 0, 0}, *buf, *rbuf;
401 gmx_bool bPBC, bScrew;
405 cgindex = dd->cgindex;
410 nat_tot = dd->nat_home;
411 for (d = 0; d < dd->ndim; d++)
413 bPBC = (dd->ci[dd->dim[d]] == 0);
414 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
417 copy_rvec(box[dd->dim[d]], shift);
420 for (p = 0; p < cd->np; p++)
427 for (i = 0; i < ind->nsend[nzone]; i++)
429 at0 = cgindex[index[i]];
430 at1 = cgindex[index[i]+1];
431 for (j = at0; j < at1; j++)
433 copy_rvec(x[j], buf[n]);
440 for (i = 0; i < ind->nsend[nzone]; i++)
442 at0 = cgindex[index[i]];
443 at1 = cgindex[index[i]+1];
444 for (j = at0; j < at1; j++)
446 /* We need to shift the coordinates */
447 rvec_add(x[j], shift, buf[n]);
454 for (i = 0; i < ind->nsend[nzone]; i++)
456 at0 = cgindex[index[i]];
457 at1 = cgindex[index[i]+1];
458 for (j = at0; j < at1; j++)
461 buf[n][XX] = x[j][XX] + shift[XX];
463 * This operation requires a special shift force
464 * treatment, which is performed in calc_vir.
466 buf[n][YY] = box[YY][YY] - x[j][YY];
467 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
479 rbuf = comm->vbuf2.v;
481 /* Send and receive the coordinates */
482 dd_sendrecv_rvec(dd, d, dddirBackward,
483 buf, ind->nsend[nzone+1],
484 rbuf, ind->nrecv[nzone+1]);
488 for (zone = 0; zone < nzone; zone++)
490 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
492 copy_rvec(rbuf[j], x[i]);
497 nat_tot += ind->nrecv[nzone+1];
503 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
505 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
506 int *index, *cgindex;
507 gmx_domdec_comm_t *comm;
508 gmx_domdec_comm_dim_t *cd;
509 gmx_domdec_ind_t *ind;
513 gmx_bool bShiftForcesNeedPbc, bScrew;
517 cgindex = dd->cgindex;
521 nzone = comm->zones.n/2;
522 nat_tot = dd->nat_tot;
523 for (d = dd->ndim-1; d >= 0; d--)
525 /* Only forces in domains near the PBC boundaries need to
526 consider PBC in the treatment of fshift */
527 bShiftForcesNeedPbc = (dd->ci[dd->dim[d]] == 0);
528 bScrew = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
529 if (fshift == nullptr && !bScrew)
531 bShiftForcesNeedPbc = FALSE;
533 /* Determine which shift vector we need */
539 for (p = cd->np-1; p >= 0; p--)
542 nat_tot -= ind->nrecv[nzone+1];
549 sbuf = comm->vbuf2.v;
551 for (zone = 0; zone < nzone; zone++)
553 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
555 copy_rvec(f[i], sbuf[j]);
560 /* Communicate the forces */
561 dd_sendrecv_rvec(dd, d, dddirForward,
562 sbuf, ind->nrecv[nzone+1],
563 buf, ind->nsend[nzone+1]);
565 /* Add the received forces */
567 if (!bShiftForcesNeedPbc)
569 for (i = 0; i < ind->nsend[nzone]; i++)
571 at0 = cgindex[index[i]];
572 at1 = cgindex[index[i]+1];
573 for (j = at0; j < at1; j++)
575 rvec_inc(f[j], buf[n]);
582 /* fshift should always be defined if this function is
583 * called when bShiftForcesNeedPbc is true */
584 assert(NULL != fshift);
585 for (i = 0; i < ind->nsend[nzone]; i++)
587 at0 = cgindex[index[i]];
588 at1 = cgindex[index[i]+1];
589 for (j = at0; j < at1; j++)
591 rvec_inc(f[j], buf[n]);
592 /* Add this force to the shift force */
593 rvec_inc(fshift[is], buf[n]);
600 for (i = 0; i < ind->nsend[nzone]; i++)
602 at0 = cgindex[index[i]];
603 at1 = cgindex[index[i]+1];
604 for (j = at0; j < at1; j++)
606 /* Rotate the force */
607 f[j][XX] += buf[n][XX];
608 f[j][YY] -= buf[n][YY];
609 f[j][ZZ] -= buf[n][ZZ];
612 /* Add this force to the shift force */
613 rvec_inc(fshift[is], buf[n]);
624 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
626 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
627 int *index, *cgindex;
628 gmx_domdec_comm_t *comm;
629 gmx_domdec_comm_dim_t *cd;
630 gmx_domdec_ind_t *ind;
635 cgindex = dd->cgindex;
637 buf = &comm->vbuf.v[0][0];
640 nat_tot = dd->nat_home;
641 for (d = 0; d < dd->ndim; d++)
644 for (p = 0; p < cd->np; p++)
649 for (i = 0; i < ind->nsend[nzone]; i++)
651 at0 = cgindex[index[i]];
652 at1 = cgindex[index[i]+1];
653 for (j = at0; j < at1; j++)
666 rbuf = &comm->vbuf2.v[0][0];
668 /* Send and receive the coordinates */
669 dd_sendrecv_real(dd, d, dddirBackward,
670 buf, ind->nsend[nzone+1],
671 rbuf, ind->nrecv[nzone+1]);
675 for (zone = 0; zone < nzone; zone++)
677 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
684 nat_tot += ind->nrecv[nzone+1];
690 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
692 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
693 int *index, *cgindex;
694 gmx_domdec_comm_t *comm;
695 gmx_domdec_comm_dim_t *cd;
696 gmx_domdec_ind_t *ind;
701 cgindex = dd->cgindex;
703 buf = &comm->vbuf.v[0][0];
705 nzone = comm->zones.n/2;
706 nat_tot = dd->nat_tot;
707 for (d = dd->ndim-1; d >= 0; d--)
710 for (p = cd->np-1; p >= 0; p--)
713 nat_tot -= ind->nrecv[nzone+1];
720 sbuf = &comm->vbuf2.v[0][0];
722 for (zone = 0; zone < nzone; zone++)
724 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
731 /* Communicate the forces */
732 dd_sendrecv_real(dd, d, dddirForward,
733 sbuf, ind->nrecv[nzone+1],
734 buf, ind->nsend[nzone+1]);
736 /* Add the received forces */
738 for (i = 0; i < ind->nsend[nzone]; i++)
740 at0 = cgindex[index[i]];
741 at1 = cgindex[index[i]+1];
742 for (j = at0; j < at1; j++)
753 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
755 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
757 zone->min0, zone->max1,
758 zone->mch0, zone->mch0,
759 zone->p1_0, zone->p1_1);
763 #define DDZONECOMM_MAXZONE 5
764 #define DDZONECOMM_BUFSIZE 3
766 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
767 int ddimind, int direction,
768 gmx_ddzone_t *buf_s, int n_s,
769 gmx_ddzone_t *buf_r, int n_r)
771 #define ZBS DDZONECOMM_BUFSIZE
772 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
773 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
776 for (i = 0; i < n_s; i++)
778 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
779 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
780 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
781 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
782 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
783 vbuf_s[i*ZBS+1][2] = 0;
784 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
785 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
786 vbuf_s[i*ZBS+2][2] = 0;
789 dd_sendrecv_rvec(dd, ddimind, direction,
793 for (i = 0; i < n_r; i++)
795 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
796 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
797 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
798 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
799 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
800 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
801 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
807 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
808 rvec cell_ns_x0, rvec cell_ns_x1)
810 int d, d1, dim, pos, buf_size, i, j, p, npulse, npulse_min;
812 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
813 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
814 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
815 rvec extr_s[2], extr_r[2];
817 real dist_d, c = 0, det;
818 gmx_domdec_comm_t *comm;
823 for (d = 1; d < dd->ndim; d++)
826 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
827 zp->min0 = cell_ns_x0[dim];
828 zp->max1 = cell_ns_x1[dim];
829 zp->min1 = cell_ns_x1[dim];
830 zp->mch0 = cell_ns_x0[dim];
831 zp->mch1 = cell_ns_x1[dim];
832 zp->p1_0 = cell_ns_x0[dim];
833 zp->p1_1 = cell_ns_x1[dim];
836 for (d = dd->ndim-2; d >= 0; d--)
839 bPBC = (dim < ddbox->npbcdim);
841 /* Use an rvec to store two reals */
842 extr_s[d][0] = comm->cell_f0[d+1];
843 extr_s[d][1] = comm->cell_f1[d+1];
844 extr_s[d][2] = comm->cell_f1[d+1];
847 /* Store the extremes in the backward sending buffer,
848 * so the get updated separately from the forward communication.
850 for (d1 = d; d1 < dd->ndim-1; d1++)
852 /* We invert the order to be able to use the same loop for buf_e */
853 buf_s[pos].min0 = extr_s[d1][1];
854 buf_s[pos].max1 = extr_s[d1][0];
855 buf_s[pos].min1 = extr_s[d1][2];
858 /* Store the cell corner of the dimension we communicate along */
859 buf_s[pos].p1_0 = comm->cell_x0[dim];
864 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
867 if (dd->ndim == 3 && d == 0)
869 buf_s[pos] = comm->zone_d2[0][1];
871 buf_s[pos] = comm->zone_d1[0];
875 /* We only need to communicate the extremes
876 * in the forward direction
878 npulse = comm->cd[d].np;
881 /* Take the minimum to avoid double communication */
882 npulse_min = std::min(npulse, dd->nc[dim]-1-npulse);
886 /* Without PBC we should really not communicate over
887 * the boundaries, but implementing that complicates
888 * the communication setup and therefore we simply
889 * do all communication, but ignore some data.
893 for (p = 0; p < npulse_min; p++)
895 /* Communicate the extremes forward */
896 bUse = (bPBC || dd->ci[dim] > 0);
898 dd_sendrecv_rvec(dd, d, dddirForward,
899 extr_s+d, dd->ndim-d-1,
900 extr_r+d, dd->ndim-d-1);
904 for (d1 = d; d1 < dd->ndim-1; d1++)
906 extr_s[d1][0] = std::max(extr_s[d1][0], extr_r[d1][0]);
907 extr_s[d1][1] = std::min(extr_s[d1][1], extr_r[d1][1]);
908 extr_s[d1][2] = std::min(extr_s[d1][2], extr_r[d1][2]);
914 for (p = 0; p < npulse; p++)
916 /* Communicate all the zone information backward */
917 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
919 dd_sendrecv_ddzone(dd, d, dddirBackward,
926 for (d1 = d+1; d1 < dd->ndim; d1++)
928 /* Determine the decrease of maximum required
929 * communication height along d1 due to the distance along d,
930 * this avoids a lot of useless atom communication.
932 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
934 if (ddbox->tric_dir[dim])
936 /* c is the off-diagonal coupling between the cell planes
937 * along directions d and d1.
939 c = ddbox->v[dim][dd->dim[d1]][dim];
945 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
948 dh[d1] = comm->cutoff - (c*dist_d + std::sqrt(det))/(1 + c*c);
952 /* A negative value signals out of range */
958 /* Accumulate the extremes over all pulses */
959 for (i = 0; i < buf_size; i++)
969 buf_e[i].min0 = std::min(buf_e[i].min0, buf_r[i].min0);
970 buf_e[i].max1 = std::max(buf_e[i].max1, buf_r[i].max1);
971 buf_e[i].min1 = std::min(buf_e[i].min1, buf_r[i].min1);
974 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
982 if (bUse && dh[d1] >= 0)
984 buf_e[i].mch0 = std::max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
985 buf_e[i].mch1 = std::max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
988 /* Copy the received buffer to the send buffer,
989 * to pass the data through with the next pulse.
993 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
994 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
996 /* Store the extremes */
999 for (d1 = d; d1 < dd->ndim-1; d1++)
1001 extr_s[d1][1] = std::min(extr_s[d1][1], buf_e[pos].min0);
1002 extr_s[d1][0] = std::max(extr_s[d1][0], buf_e[pos].max1);
1003 extr_s[d1][2] = std::min(extr_s[d1][2], buf_e[pos].min1);
1007 if (d == 1 || (d == 0 && dd->ndim == 3))
1009 for (i = d; i < 2; i++)
1011 comm->zone_d2[1-d][i] = buf_e[pos];
1017 comm->zone_d1[1] = buf_e[pos];
1027 for (i = 0; i < 2; i++)
1031 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1033 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1034 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1040 for (i = 0; i < 2; i++)
1042 for (j = 0; j < 2; j++)
1046 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1048 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1049 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1053 for (d = 1; d < dd->ndim; d++)
1055 comm->cell_f_max0[d] = extr_s[d-1][0];
1056 comm->cell_f_min1[d] = extr_s[d-1][1];
1059 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1060 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1065 static void dd_collect_cg(gmx_domdec_t *dd,
1066 const t_state *state_local)
1068 gmx_domdec_master_t *ma = nullptr;
1069 int buf2[2], *ibuf, i, ncg_home = 0, nat_home = 0;
1071 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1073 /* The master has the correct distribution */
1079 if (state_local->ddp_count == dd->ddp_count)
1081 /* The local state and DD are in sync, use the DD indices */
1082 ncg_home = dd->ncg_home;
1084 nat_home = dd->nat_home;
1086 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1088 /* The DD is out of sync with the local state, but we have stored
1089 * the cg indices with the local state, so we can use those.
1093 cgs_gl = &dd->comm->cgs_gl;
1095 ncg_home = state_local->cg_gl.size();
1096 cg = state_local->cg_gl.data();
1098 for (i = 0; i < ncg_home; i++)
1100 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1105 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1119 /* Collect the charge group and atom counts on the master */
1120 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1125 for (i = 0; i < dd->nnodes; i++)
1127 ma->ncg[i] = ma->ibuf[2*i];
1128 ma->nat[i] = ma->ibuf[2*i+1];
1129 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1132 /* Make byte counts and indices */
1133 for (i = 0; i < dd->nnodes; i++)
1135 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1136 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1140 fprintf(debug, "Initial charge group distribution: ");
1141 for (i = 0; i < dd->nnodes; i++)
1143 fprintf(debug, " %d", ma->ncg[i]);
1145 fprintf(debug, "\n");
1149 /* Collect the charge group indices on the master */
1151 ncg_home*sizeof(int), cg,
1152 DDMASTER(dd) ? ma->ibuf : nullptr,
1153 DDMASTER(dd) ? ma->ibuf+dd->nnodes : nullptr,
1154 DDMASTER(dd) ? ma->cg : nullptr);
1156 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1159 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1160 gmx::ArrayRef<const gmx::RVec> lv,
1161 gmx::ArrayRef<gmx::RVec> v)
1163 gmx_domdec_master_t *ma;
1164 int n, i, c, a, nalloc = 0;
1165 rvec *buf = nullptr;
1173 MPI_Send(const_cast<void *>(static_cast<const void *>(lv.data())), dd->nat_home*sizeof(rvec), MPI_BYTE,
1174 DDMASTERRANK(dd), dd->rank, dd->mpi_comm_all);
1179 /* Copy the master coordinates to the global array */
1180 cgs_gl = &dd->comm->cgs_gl;
1182 n = DDMASTERRANK(dd);
1184 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1186 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1188 copy_rvec(lv[a++], v[c]);
1192 for (n = 0; n < dd->nnodes; n++)
1196 if (ma->nat[n] > nalloc)
1198 nalloc = over_alloc_dd(ma->nat[n]);
1199 srenew(buf, nalloc);
1202 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1203 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1206 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1208 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1210 copy_rvec(buf[a++], v[c]);
1219 static void get_commbuffer_counts(gmx_domdec_t *dd,
1220 int **counts, int **disps)
1222 gmx_domdec_master_t *ma;
1227 /* Make the rvec count and displacment arrays */
1229 *disps = ma->ibuf + dd->nnodes;
1230 for (n = 0; n < dd->nnodes; n++)
1232 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1233 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1237 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1238 gmx::ArrayRef<const gmx::RVec> lv,
1239 gmx::ArrayRef<gmx::RVec> v)
1241 gmx_domdec_master_t *ma;
1242 int *rcounts = nullptr, *disps = nullptr;
1244 rvec *buf = nullptr;
1251 get_commbuffer_counts(dd, &rcounts, &disps);
1256 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv.data(), rcounts, disps, buf);
1260 cgs_gl = &dd->comm->cgs_gl;
1263 for (n = 0; n < dd->nnodes; n++)
1265 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1267 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1269 copy_rvec(buf[a++], v[c]);
1276 void dd_collect_vec(gmx_domdec_t *dd,
1277 const t_state *state_local,
1278 gmx::ArrayRef<const gmx::RVec> lv,
1279 gmx::ArrayRef<gmx::RVec> v)
1281 dd_collect_cg(dd, state_local);
1283 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1285 dd_collect_vec_sendrecv(dd, lv, v);
1289 dd_collect_vec_gatherv(dd, lv, v);
1294 void dd_collect_state(gmx_domdec_t *dd,
1295 const t_state *state_local, t_state *state)
1297 int nh = state_local->nhchainlength;
1301 GMX_RELEASE_ASSERT(state->nhchainlength == nh, "The global and local Nose-Hoover chain lengths should match");
1303 for (int i = 0; i < efptNR; i++)
1305 state->lambda[i] = state_local->lambda[i];
1307 state->fep_state = state_local->fep_state;
1308 state->veta = state_local->veta;
1309 state->vol0 = state_local->vol0;
1310 copy_mat(state_local->box, state->box);
1311 copy_mat(state_local->boxv, state->boxv);
1312 copy_mat(state_local->svir_prev, state->svir_prev);
1313 copy_mat(state_local->fvir_prev, state->fvir_prev);
1314 copy_mat(state_local->pres_prev, state->pres_prev);
1316 for (int i = 0; i < state_local->ngtc; i++)
1318 for (int j = 0; j < nh; j++)
1320 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1321 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1323 state->therm_integral[i] = state_local->therm_integral[i];
1325 for (int i = 0; i < state_local->nnhpres; i++)
1327 for (int j = 0; j < nh; j++)
1329 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1330 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1333 state->baros_integral = state_local->baros_integral;
1335 if (state_local->flags & (1 << estX))
1337 gmx::ArrayRef<gmx::RVec> globalXRef = state ? gmx::makeArrayRef(state->x) : gmx::EmptyArrayRef();
1338 dd_collect_vec(dd, state_local, state_local->x, globalXRef);
1340 if (state_local->flags & (1 << estV))
1342 gmx::ArrayRef<gmx::RVec> globalVRef = state ? gmx::makeArrayRef(state->v) : gmx::EmptyArrayRef();
1343 dd_collect_vec(dd, state_local, state_local->v, globalVRef);
1345 if (state_local->flags & (1 << estCGP))
1347 gmx::ArrayRef<gmx::RVec> globalCgpRef = state ? gmx::makeArrayRef(state->cg_p) : gmx::EmptyArrayRef();
1348 dd_collect_vec(dd, state_local, state_local->cg_p, globalCgpRef);
1352 static void dd_resize_state(t_state *state, PaddedRVecVector *f, int natoms)
1356 fprintf(debug, "Resizing state: currently %d, required %d\n", state->natoms, natoms);
1359 state_change_natoms(state, natoms);
1363 /* We need to allocate one element extra, since we might use
1364 * (unaligned) 4-wide SIMD loads to access rvec entries.
1366 f->resize(paddedRVecVectorSize(natoms));
1370 static void dd_check_alloc_ncg(t_forcerec *fr,
1372 PaddedRVecVector *f,
1373 int numChargeGroups)
1375 if (numChargeGroups > fr->cg_nalloc)
1379 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, numChargeGroups, over_alloc_dd(numChargeGroups));
1381 fr->cg_nalloc = over_alloc_dd(numChargeGroups);
1382 srenew(fr->cginfo, fr->cg_nalloc);
1383 if (fr->cutoff_scheme == ecutsGROUP)
1385 srenew(fr->cg_cm, fr->cg_nalloc);
1388 if (fr->cutoff_scheme == ecutsVERLET)
1390 /* We don't use charge groups, we use x in state to set up
1391 * the atom communication.
1393 dd_resize_state(state, f, numChargeGroups);
1397 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1398 const rvec *v, rvec *lv)
1400 gmx_domdec_master_t *ma;
1401 int n, i, c, a, nalloc = 0;
1402 rvec *buf = nullptr;
1408 for (n = 0; n < dd->nnodes; n++)
1412 if (ma->nat[n] > nalloc)
1414 nalloc = over_alloc_dd(ma->nat[n]);
1415 srenew(buf, nalloc);
1417 /* Use lv as a temporary buffer */
1419 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1421 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1423 copy_rvec(v[c], buf[a++]);
1426 if (a != ma->nat[n])
1428 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1433 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1434 DDRANK(dd, n), n, dd->mpi_comm_all);
1439 n = DDMASTERRANK(dd);
1441 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1443 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1445 copy_rvec(v[c], lv[a++]);
1452 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1453 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1458 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1459 const rvec *v, rvec *lv)
1461 gmx_domdec_master_t *ma;
1462 int *scounts = nullptr, *disps = nullptr;
1464 rvec *buf = nullptr;
1470 get_commbuffer_counts(dd, &scounts, &disps);
1474 for (n = 0; n < dd->nnodes; n++)
1476 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1478 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1480 copy_rvec(v[c], buf[a++]);
1486 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1489 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs,
1490 const rvec *v, rvec *lv)
1492 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1494 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1498 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1502 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1504 if (dfhist == nullptr)
1509 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1510 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1511 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1513 if (dfhist->nlambda > 0)
1515 int nlam = dfhist->nlambda;
1516 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1517 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1518 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1519 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1520 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1521 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1523 for (int i = 0; i < nlam; i++)
1525 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1526 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1527 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1528 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1529 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1530 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1535 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1536 t_state *state, t_state *state_local,
1537 PaddedRVecVector *f)
1539 int nh = state_local->nhchainlength;
1543 GMX_RELEASE_ASSERT(state->nhchainlength == nh, "The global and local Nose-Hoover chain lengths should match");
1545 for (int i = 0; i < efptNR; i++)
1547 state_local->lambda[i] = state->lambda[i];
1549 state_local->fep_state = state->fep_state;
1550 state_local->veta = state->veta;
1551 state_local->vol0 = state->vol0;
1552 copy_mat(state->box, state_local->box);
1553 copy_mat(state->box_rel, state_local->box_rel);
1554 copy_mat(state->boxv, state_local->boxv);
1555 copy_mat(state->svir_prev, state_local->svir_prev);
1556 copy_mat(state->fvir_prev, state_local->fvir_prev);
1557 if (state->dfhist != nullptr)
1559 copy_df_history(state_local->dfhist, state->dfhist);
1561 for (int i = 0; i < state_local->ngtc; i++)
1563 for (int j = 0; j < nh; j++)
1565 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1566 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1568 state_local->therm_integral[i] = state->therm_integral[i];
1570 for (int i = 0; i < state_local->nnhpres; i++)
1572 for (int j = 0; j < nh; j++)
1574 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1575 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1578 state_local->baros_integral = state->baros_integral;
1580 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda.data());
1581 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1582 dd_bcast(dd, sizeof(real), &state_local->veta);
1583 dd_bcast(dd, sizeof(real), &state_local->vol0);
1584 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1585 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1586 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1587 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1588 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1589 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi.data());
1590 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi.data());
1591 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral.data());
1592 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi.data());
1593 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi.data());
1595 /* communicate df_history -- required for restarting from checkpoint */
1596 dd_distribute_dfhist(dd, state_local->dfhist);
1598 dd_resize_state(state_local, f, dd->nat_home);
1600 if (state_local->flags & (1 << estX))
1602 const rvec *xGlobal = (DDMASTER(dd) ? as_rvec_array(state->x.data()) : nullptr);
1603 dd_distribute_vec(dd, cgs, xGlobal, as_rvec_array(state_local->x.data()));
1605 if (state_local->flags & (1 << estV))
1607 const rvec *vGlobal = (DDMASTER(dd) ? as_rvec_array(state->v.data()) : nullptr);
1608 dd_distribute_vec(dd, cgs, vGlobal, as_rvec_array(state_local->v.data()));
1610 if (state_local->flags & (1 << estCGP))
1612 const rvec *cgpGlobal = (DDMASTER(dd) ? as_rvec_array(state->cg_p.data()) : nullptr);
1613 dd_distribute_vec(dd, cgs, cgpGlobal, as_rvec_array(state_local->cg_p.data()));
1617 static char dim2char(int dim)
1623 case XX: c = 'X'; break;
1624 case YY: c = 'Y'; break;
1625 case ZZ: c = 'Z'; break;
1626 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1632 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1633 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1635 rvec grid_s[2], *grid_r = nullptr, cx, r;
1636 char fname[STRLEN], buf[22];
1638 int a, i, d, z, y, x;
1642 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1643 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1647 snew(grid_r, 2*dd->nnodes);
1650 dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : nullptr);
1654 for (d = 0; d < DIM; d++)
1656 for (i = 0; i < DIM; i++)
1664 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1666 tric[d][i] = box[i][d]/box[i][i];
1675 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1676 out = gmx_fio_fopen(fname, "w");
1677 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1679 for (i = 0; i < dd->nnodes; i++)
1681 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1682 for (d = 0; d < DIM; d++)
1684 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1686 for (z = 0; z < 2; z++)
1688 for (y = 0; y < 2; y++)
1690 for (x = 0; x < 2; x++)
1692 cx[XX] = grid_r[i*2+x][XX];
1693 cx[YY] = grid_r[i*2+y][YY];
1694 cx[ZZ] = grid_r[i*2+z][ZZ];
1696 gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
1697 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
1701 for (d = 0; d < DIM; d++)
1703 for (x = 0; x < 4; x++)
1707 case 0: y = 1 + i*8 + 2*x; break;
1708 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1709 case 2: y = 1 + i*8 + x; break;
1711 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
1715 gmx_fio_fclose(out);
1720 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
1721 const gmx_mtop_t *mtop, t_commrec *cr,
1722 int natoms, rvec x[], matrix box)
1724 char fname[STRLEN], buf[22];
1726 int i, ii, resnr, c;
1727 const char *atomname, *resname;
1734 natoms = dd->comm->nat[ddnatVSITE];
1737 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
1739 out = gmx_fio_fopen(fname, "w");
1741 fprintf(out, "TITLE %s\n", title);
1742 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1744 for (i = 0; i < natoms; i++)
1746 ii = dd->gatindex[i];
1747 mtopGetAtomAndResidueName(mtop, ii, &molb, &atomname, &resnr, &resname, nullptr);
1748 if (i < dd->comm->nat[ddnatZONE])
1751 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
1757 else if (i < dd->comm->nat[ddnatVSITE])
1759 b = dd->comm->zones.n;
1763 b = dd->comm->zones.n + 1;
1765 gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
1766 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
1768 fprintf(out, "TER\n");
1770 gmx_fio_fclose(out);
1773 real dd_cutoff_multibody(const gmx_domdec_t *dd)
1775 gmx_domdec_comm_t *comm;
1782 if (comm->bInterCGBondeds)
1784 if (comm->cutoff_mbody > 0)
1786 r = comm->cutoff_mbody;
1790 /* cutoff_mbody=0 means we do not have DLB */
1791 r = comm->cellsize_min[dd->dim[0]];
1792 for (di = 1; di < dd->ndim; di++)
1794 r = std::min(r, comm->cellsize_min[dd->dim[di]]);
1796 if (comm->bBondComm)
1798 r = std::max(r, comm->cutoff_mbody);
1802 r = std::min(r, comm->cutoff);
1810 real dd_cutoff_twobody(const gmx_domdec_t *dd)
1814 r_mb = dd_cutoff_multibody(dd);
1816 return std::max(dd->comm->cutoff, r_mb);
1820 static void dd_cart_coord2pmecoord(const gmx_domdec_t *dd, const ivec coord,
1825 nc = dd->nc[dd->comm->cartpmedim];
1826 ntot = dd->comm->ntot[dd->comm->cartpmedim];
1827 copy_ivec(coord, coord_pme);
1828 coord_pme[dd->comm->cartpmedim] =
1829 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
1832 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
1837 npme = dd->comm->npmenodes;
1839 /* Here we assign a PME node to communicate with this DD node
1840 * by assuming that the major index of both is x.
1841 * We add cr->npmenodes/2 to obtain an even distribution.
1843 return (ddindex*npme + npme/2)/npp;
1846 static int *dd_interleaved_pme_ranks(const gmx_domdec_t *dd)
1851 snew(pme_rank, dd->comm->npmenodes);
1853 for (i = 0; i < dd->nnodes; i++)
1855 p0 = ddindex2pmeindex(dd, i);
1856 p1 = ddindex2pmeindex(dd, i+1);
1857 if (i+1 == dd->nnodes || p1 > p0)
1861 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
1863 pme_rank[n] = i + 1 + n;
1871 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
1879 if (dd->comm->bCartesian) {
1880 gmx_ddindex2xyz(dd->nc,ddindex,coords);
1881 dd_coords2pmecoords(dd,coords,coords_pme);
1882 copy_ivec(dd->ntot,nc);
1883 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1884 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1886 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
1888 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
1894 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
1899 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
1901 gmx_domdec_comm_t *comm;
1903 int ddindex, nodeid = -1;
1905 comm = cr->dd->comm;
1910 if (comm->bCartesianPP_PME)
1913 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
1918 ddindex = dd_index(cr->dd->nc, coords);
1919 if (comm->bCartesianPP)
1921 nodeid = comm->ddindex2simnodeid[ddindex];
1927 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
1939 static int dd_simnode2pmenode(const gmx_domdec_t *dd,
1940 const t_commrec gmx_unused *cr,
1945 const gmx_domdec_comm_t *comm = dd->comm;
1947 /* This assumes a uniform x domain decomposition grid cell size */
1948 if (comm->bCartesianPP_PME)
1951 ivec coord, coord_pme;
1952 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
1953 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1955 /* This is a PP node */
1956 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1957 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
1961 else if (comm->bCartesianPP)
1963 if (sim_nodeid < dd->nnodes)
1965 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1970 /* This assumes DD cells with identical x coordinates
1971 * are numbered sequentially.
1973 if (dd->comm->pmenodes == nullptr)
1975 if (sim_nodeid < dd->nnodes)
1977 /* The DD index equals the nodeid */
1978 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1984 while (sim_nodeid > dd->comm->pmenodes[i])
1988 if (sim_nodeid < dd->comm->pmenodes[i])
1990 pmenode = dd->comm->pmenodes[i];
1998 void get_pme_nnodes(const gmx_domdec_t *dd,
1999 int *npmenodes_x, int *npmenodes_y)
2003 *npmenodes_x = dd->comm->npmenodes_x;
2004 *npmenodes_y = dd->comm->npmenodes_y;
2013 std::vector<int> get_pme_ddranks(t_commrec *cr, int pmenodeid)
2017 ivec coord, coord_pme;
2021 std::vector<int> ddranks;
2022 ddranks.reserve((dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2024 for (x = 0; x < dd->nc[XX]; x++)
2026 for (y = 0; y < dd->nc[YY]; y++)
2028 for (z = 0; z < dd->nc[ZZ]; z++)
2030 if (dd->comm->bCartesianPP_PME)
2035 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2036 if (dd->ci[XX] == coord_pme[XX] &&
2037 dd->ci[YY] == coord_pme[YY] &&
2038 dd->ci[ZZ] == coord_pme[ZZ])
2040 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
2045 /* The slab corresponds to the nodeid in the PME group */
2046 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2048 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
2057 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd, const t_commrec *cr)
2059 gmx_bool bReceive = TRUE;
2061 if (cr->npmenodes < dd->nnodes)
2063 gmx_domdec_comm_t *comm = dd->comm;
2064 if (comm->bCartesianPP_PME)
2067 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
2069 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2070 coords[comm->cartpmedim]++;
2071 if (coords[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2074 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2075 if (dd_simnode2pmenode(dd, cr, rank) == pmenode)
2077 /* This is not the last PP node for pmenode */
2082 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
2087 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
2088 if (cr->sim_nodeid+1 < cr->nnodes &&
2089 dd_simnode2pmenode(dd, cr, cr->sim_nodeid+1) == pmenode)
2091 /* This is not the last PP node for pmenode */
2100 static void set_zones_ncg_home(gmx_domdec_t *dd)
2102 gmx_domdec_zones_t *zones;
2105 zones = &dd->comm->zones;
2107 zones->cg_range[0] = 0;
2108 for (i = 1; i < zones->n+1; i++)
2110 zones->cg_range[i] = dd->ncg_home;
2112 /* zone_ncg1[0] should always be equal to ncg_home */
2113 dd->comm->zone_ncg1[0] = dd->ncg_home;
2116 static void rebuild_cgindex(gmx_domdec_t *dd,
2117 const int *gcgs_index, const t_state *state)
2119 int * gmx_restrict dd_cg_gl = dd->index_gl;
2120 int * gmx_restrict cgindex = dd->cgindex;
2123 /* Copy back the global charge group indices from state
2124 * and rebuild the local charge group to atom index.
2127 for (unsigned int i = 0; i < state->cg_gl.size(); i++)
2130 int cg_gl = state->cg_gl[i];
2131 dd_cg_gl[i] = cg_gl;
2132 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2134 cgindex[state->cg_gl.size()] = nat;
2136 dd->ncg_home = state->cg_gl.size();
2139 set_zones_ncg_home(dd);
2142 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2144 while (cg >= cginfo_mb->cg_end)
2149 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2152 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2153 t_forcerec *fr, char *bLocalCG)
2155 cginfo_mb_t *cginfo_mb;
2161 cginfo_mb = fr->cginfo_mb;
2162 cginfo = fr->cginfo;
2164 for (cg = cg0; cg < cg1; cg++)
2166 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2170 if (bLocalCG != nullptr)
2172 for (cg = cg0; cg < cg1; cg++)
2174 bLocalCG[index_gl[cg]] = TRUE;
2179 static void make_dd_indices(gmx_domdec_t *dd,
2180 const int *gcgs_index, int cg_start)
2182 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2183 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2186 if (dd->nat_tot > dd->gatindex_nalloc)
2188 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2189 srenew(dd->gatindex, dd->gatindex_nalloc);
2192 nzone = dd->comm->zones.n;
2193 zone2cg = dd->comm->zones.cg_range;
2194 zone_ncg1 = dd->comm->zone_ncg1;
2195 index_gl = dd->index_gl;
2196 gatindex = dd->gatindex;
2197 bCGs = dd->comm->bCGs;
2199 if (zone2cg[1] != dd->ncg_home)
2201 gmx_incons("dd->ncg_zone is not up to date");
2204 /* Make the local to global and global to local atom index */
2205 a = dd->cgindex[cg_start];
2206 for (zone = 0; zone < nzone; zone++)
2214 cg0 = zone2cg[zone];
2216 cg1 = zone2cg[zone+1];
2217 cg1_p1 = cg0 + zone_ncg1[zone];
2219 for (cg = cg0; cg < cg1; cg++)
2224 /* Signal that this cg is from more than one pulse away */
2227 cg_gl = index_gl[cg];
2230 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2233 ga2la_set(dd->ga2la, a_gl, a, zone1);
2239 gatindex[a] = cg_gl;
2240 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2247 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2253 if (bLocalCG == nullptr)
2257 for (i = 0; i < dd->ncg_tot; i++)
2259 if (!bLocalCG[dd->index_gl[i]])
2262 "DD rank %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2267 for (i = 0; i < ncg_sys; i++)
2274 if (ngl != dd->ncg_tot)
2276 fprintf(stderr, "DD rank %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2283 static void check_index_consistency(gmx_domdec_t *dd,
2284 int natoms_sys, int ncg_sys,
2287 int nerr, ngl, i, a, cell;
2292 if (dd->comm->DD_debug > 1)
2294 snew(have, natoms_sys);
2295 for (a = 0; a < dd->nat_tot; a++)
2297 if (have[dd->gatindex[a]] > 0)
2299 fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2303 have[dd->gatindex[a]] = a + 1;
2309 snew(have, dd->nat_tot);
2312 for (i = 0; i < natoms_sys; i++)
2314 if (ga2la_get(dd->ga2la, i, &a, &cell))
2316 if (a >= dd->nat_tot)
2318 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2324 if (dd->gatindex[a] != i)
2326 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2333 if (ngl != dd->nat_tot)
2336 "DD rank %d, %s: %d global atom indices, %d local atoms\n",
2337 dd->rank, where, ngl, dd->nat_tot);
2339 for (a = 0; a < dd->nat_tot; a++)
2344 "DD rank %d, %s: local atom %d, global %d has no global index\n",
2345 dd->rank, where, a+1, dd->gatindex[a]+1);
2350 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2354 gmx_fatal(FARGS, "DD rank %d, %s: %d atom/cg index inconsistencies",
2355 dd->rank, where, nerr);
2359 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2366 /* Clear the whole list without searching */
2367 ga2la_clear(dd->ga2la);
2371 for (i = a_start; i < dd->nat_tot; i++)
2373 ga2la_del(dd->ga2la, dd->gatindex[i]);
2377 bLocalCG = dd->comm->bLocalCG;
2380 for (i = cg_start; i < dd->ncg_tot; i++)
2382 bLocalCG[dd->index_gl[i]] = FALSE;
2386 dd_clear_local_vsite_indices(dd);
2388 if (dd->constraints)
2390 dd_clear_local_constraint_indices(dd);
2394 /* This function should be used for moving the domain boudaries during DLB,
2395 * for obtaining the minimum cell size. It checks the initially set limit
2396 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2397 * and, possibly, a longer cut-off limit set for PME load balancing.
2399 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2403 cellsize_min = comm->cellsize_min[dim];
2405 if (!comm->bVacDLBNoLimit)
2407 /* The cut-off might have changed, e.g. by PME load balacning,
2408 * from the value used to set comm->cellsize_min, so check it.
2410 cellsize_min = std::max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2412 if (comm->bPMELoadBalDLBLimits)
2414 /* Check for the cut-off limit set by the PME load balancing */
2415 cellsize_min = std::max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2419 return cellsize_min;
2422 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2425 real grid_jump_limit;
2427 /* The distance between the boundaries of cells at distance
2428 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2429 * and by the fact that cells should not be shifted by more than
2430 * half their size, such that cg's only shift by one cell
2431 * at redecomposition.
2433 grid_jump_limit = comm->cellsize_limit;
2434 if (!comm->bVacDLBNoLimit)
2436 if (comm->bPMELoadBalDLBLimits)
2438 cutoff = std::max(cutoff, comm->PMELoadBal_max_cutoff);
2440 grid_jump_limit = std::max(grid_jump_limit,
2441 cutoff/comm->cd[dim_ind].np);
2444 return grid_jump_limit;
2447 static gmx_bool check_grid_jump(gmx_int64_t step,
2453 gmx_domdec_comm_t *comm;
2462 for (d = 1; d < dd->ndim; d++)
2465 limit = grid_jump_limit(comm, cutoff, d);
2466 bfac = ddbox->box_size[dim];
2467 if (ddbox->tric_dir[dim])
2469 bfac *= ddbox->skew_fac[dim];
2471 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2472 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2480 /* This error should never be triggered under normal
2481 * circumstances, but you never know ...
2483 gmx_fatal(FARGS, "step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
2484 gmx_step_str(step, buf),
2485 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2493 static int dd_load_count(gmx_domdec_comm_t *comm)
2495 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2498 static float dd_force_load(gmx_domdec_comm_t *comm)
2505 if (comm->eFlop > 1)
2507 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2512 load = comm->cycl[ddCyclF];
2513 if (comm->cycl_n[ddCyclF] > 1)
2515 /* Subtract the maximum of the last n cycle counts
2516 * to get rid of possible high counts due to other sources,
2517 * for instance system activity, that would otherwise
2518 * affect the dynamic load balancing.
2520 load -= comm->cycl_max[ddCyclF];
2524 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2526 float gpu_wait, gpu_wait_sum;
2528 gpu_wait = comm->cycl[ddCyclWaitGPU];
2529 if (comm->cycl_n[ddCyclF] > 1)
2531 /* We should remove the WaitGPU time of the same MD step
2532 * as the one with the maximum F time, since the F time
2533 * and the wait time are not independent.
2534 * Furthermore, the step for the max F time should be chosen
2535 * the same on all ranks that share the same GPU.
2536 * But to keep the code simple, we remove the average instead.
2537 * The main reason for artificially long times at some steps
2538 * is spurious CPU activity or MPI time, so we don't expect
2539 * that changes in the GPU wait time matter a lot here.
2541 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2543 /* Sum the wait times over the ranks that share the same GPU */
2544 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2545 comm->mpi_comm_gpu_shared);
2546 /* Replace the wait time by the average over the ranks */
2547 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2555 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2557 gmx_domdec_comm_t *comm;
2562 snew(*dim_f, dd->nc[dim]+1);
2564 for (i = 1; i < dd->nc[dim]; i++)
2566 if (comm->slb_frac[dim])
2568 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2572 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2575 (*dim_f)[dd->nc[dim]] = 1;
2578 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2580 int pmeindex, slab, nso, i;
2583 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2589 ddpme->dim = dimind;
2591 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2593 ddpme->nslab = (ddpme->dim == 0 ?
2594 dd->comm->npmenodes_x :
2595 dd->comm->npmenodes_y);
2597 if (ddpme->nslab <= 1)
2602 nso = dd->comm->npmenodes/ddpme->nslab;
2603 /* Determine for each PME slab the PP location range for dimension dim */
2604 snew(ddpme->pp_min, ddpme->nslab);
2605 snew(ddpme->pp_max, ddpme->nslab);
2606 for (slab = 0; slab < ddpme->nslab; slab++)
2608 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2609 ddpme->pp_max[slab] = 0;
2611 for (i = 0; i < dd->nnodes; i++)
2613 ddindex2xyz(dd->nc, i, xyz);
2614 /* For y only use our y/z slab.
2615 * This assumes that the PME x grid size matches the DD grid size.
2617 if (dimind == 0 || xyz[XX] == dd->ci[XX])
2619 pmeindex = ddindex2pmeindex(dd, i);
2622 slab = pmeindex/nso;
2626 slab = pmeindex % ddpme->nslab;
2628 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
2629 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
2633 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
2636 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
2638 if (dd->comm->ddpme[0].dim == XX)
2640 return dd->comm->ddpme[0].maxshift;
2648 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
2650 if (dd->comm->ddpme[0].dim == YY)
2652 return dd->comm->ddpme[0].maxshift;
2654 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
2656 return dd->comm->ddpme[1].maxshift;
2664 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
2665 gmx_bool bUniform, const gmx_ddbox_t *ddbox,
2668 gmx_domdec_comm_t *comm;
2671 real range, pme_boundary;
2675 nc = dd->nc[ddpme->dim];
2678 if (!ddpme->dim_match)
2680 /* PP decomposition is not along dim: the worst situation */
2683 else if (ns <= 3 || (bUniform && ns == nc))
2685 /* The optimal situation */
2690 /* We need to check for all pme nodes which nodes they
2691 * could possibly need to communicate with.
2693 xmin = ddpme->pp_min;
2694 xmax = ddpme->pp_max;
2695 /* Allow for atoms to be maximally 2/3 times the cut-off
2696 * out of their DD cell. This is a reasonable balance between
2697 * between performance and support for most charge-group/cut-off
2700 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
2701 /* Avoid extra communication when we are exactly at a boundary */
2705 for (s = 0; s < ns; s++)
2707 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
2708 pme_boundary = (real)s/ns;
2711 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
2713 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
2717 pme_boundary = (real)(s+1)/ns;
2720 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
2722 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
2729 ddpme->maxshift = sh;
2733 fprintf(debug, "PME slab communication range for dim %d is %d\n",
2734 ddpme->dim, ddpme->maxshift);
2738 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
2742 for (d = 0; d < dd->ndim; d++)
2745 if (dim < ddbox->nboundeddim &&
2746 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
2747 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
2749 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
2750 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
2751 dd->nc[dim], dd->comm->cellsize_limit);
2757 setcellsizeslbLOCAL, setcellsizeslbMASTER, setcellsizeslbPULSE_ONLY
2760 /* Set the domain boundaries. Use for static (or no) load balancing,
2761 * and also for the starting state for dynamic load balancing.
2762 * setmode determine if and where the boundaries are stored, use enum above.
2763 * Returns the number communication pulses in npulse.
2765 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, const gmx_ddbox_t *ddbox,
2766 int setmode, ivec npulse)
2768 gmx_domdec_comm_t *comm;
2771 real *cell_x, cell_dx, cellsize;
2775 for (d = 0; d < DIM; d++)
2777 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
2779 if (dd->nc[d] == 1 || comm->slb_frac[d] == nullptr)
2782 cell_dx = ddbox->box_size[d]/dd->nc[d];
2785 case setcellsizeslbMASTER:
2786 for (j = 0; j < dd->nc[d]+1; j++)
2788 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
2791 case setcellsizeslbLOCAL:
2792 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
2793 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
2798 cellsize = cell_dx*ddbox->skew_fac[d];
2799 while (cellsize*npulse[d] < comm->cutoff)
2803 cellsize_min[d] = cellsize;
2807 /* Statically load balanced grid */
2808 /* Also when we are not doing a master distribution we determine
2809 * all cell borders in a loop to obtain identical values
2810 * to the master distribution case and to determine npulse.
2812 if (setmode == setcellsizeslbMASTER)
2814 cell_x = dd->ma->cell_x[d];
2818 snew(cell_x, dd->nc[d]+1);
2820 cell_x[0] = ddbox->box0[d];
2821 for (j = 0; j < dd->nc[d]; j++)
2823 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
2824 cell_x[j+1] = cell_x[j] + cell_dx;
2825 cellsize = cell_dx*ddbox->skew_fac[d];
2826 while (cellsize*npulse[d] < comm->cutoff &&
2827 npulse[d] < dd->nc[d]-1)
2831 cellsize_min[d] = std::min(cellsize_min[d], cellsize);
2833 if (setmode == setcellsizeslbLOCAL)
2835 comm->cell_x0[d] = cell_x[dd->ci[d]];
2836 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
2838 if (setmode != setcellsizeslbMASTER)
2843 /* The following limitation is to avoid that a cell would receive
2844 * some of its own home charge groups back over the periodic boundary.
2845 * Double charge groups cause trouble with the global indices.
2847 if (d < ddbox->npbcdim &&
2848 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
2850 char error_string[STRLEN];
2852 sprintf(error_string,
2853 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
2854 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
2856 dd->nc[d], dd->nc[d],
2857 dd->nnodes > dd->nc[d] ? "cells" : "ranks");
2859 if (setmode == setcellsizeslbLOCAL)
2861 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2866 gmx_fatal(FARGS, error_string);
2873 copy_rvec(cellsize_min, comm->cellsize_min);
2876 for (d = 0; d < comm->npmedecompdim; d++)
2878 set_pme_maxshift(dd, &comm->ddpme[d],
2879 comm->slb_frac[dd->dim[d]] == nullptr, ddbox,
2880 comm->ddpme[d].slb_dim_f);
2885 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
2886 int d, int dim, domdec_root_t *root,
2887 const gmx_ddbox_t *ddbox,
2888 gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
2890 gmx_domdec_comm_t *comm;
2891 int ncd, i, j, nmin, nmin_old;
2892 gmx_bool bLimLo, bLimHi;
2894 real fac, halfway, cellsize_limit_f_i, region_size;
2895 gmx_bool bPBC, bLastHi = FALSE;
2896 int nrange[] = {range[0], range[1]};
2898 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
2904 bPBC = (dim < ddbox->npbcdim);
2906 cell_size = root->buf_ncd;
2910 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
2913 /* First we need to check if the scaling does not make cells
2914 * smaller than the smallest allowed size.
2915 * We need to do this iteratively, since if a cell is too small,
2916 * it needs to be enlarged, which makes all the other cells smaller,
2917 * which could in turn make another cell smaller than allowed.
2919 for (i = range[0]; i < range[1]; i++)
2921 root->bCellMin[i] = FALSE;
2927 /* We need the total for normalization */
2929 for (i = range[0]; i < range[1]; i++)
2931 if (root->bCellMin[i] == FALSE)
2933 fac += cell_size[i];
2936 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
2937 /* Determine the cell boundaries */
2938 for (i = range[0]; i < range[1]; i++)
2940 if (root->bCellMin[i] == FALSE)
2942 cell_size[i] *= fac;
2943 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
2945 cellsize_limit_f_i = 0;
2949 cellsize_limit_f_i = cellsize_limit_f;
2951 if (cell_size[i] < cellsize_limit_f_i)
2953 root->bCellMin[i] = TRUE;
2954 cell_size[i] = cellsize_limit_f_i;
2958 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
2961 while (nmin > nmin_old);
2964 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
2965 /* For this check we should not use DD_CELL_MARGIN,
2966 * but a slightly smaller factor,
2967 * since rounding could get use below the limit.
2969 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
2972 gmx_fatal(FARGS, "step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
2973 gmx_step_str(step, buf),
2974 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
2975 ncd, comm->cellsize_min[dim]);
2978 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
2982 /* Check if the boundary did not displace more than halfway
2983 * each of the cells it bounds, as this could cause problems,
2984 * especially when the differences between cell sizes are large.
2985 * If changes are applied, they will not make cells smaller
2986 * than the cut-off, as we check all the boundaries which
2987 * might be affected by a change and if the old state was ok,
2988 * the cells will at most be shrunk back to their old size.
2990 for (i = range[0]+1; i < range[1]; i++)
2992 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
2993 if (root->cell_f[i] < halfway)
2995 root->cell_f[i] = halfway;
2996 /* Check if the change also causes shifts of the next boundaries */
2997 for (j = i+1; j < range[1]; j++)
2999 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3001 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3005 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3006 if (root->cell_f[i] > halfway)
3008 root->cell_f[i] = halfway;
3009 /* Check if the change also causes shifts of the next boundaries */
3010 for (j = i-1; j >= range[0]+1; j--)
3012 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3014 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3021 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3022 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3023 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3024 * for a and b nrange is used */
3027 /* Take care of the staggering of the cell boundaries */
3030 for (i = range[0]; i < range[1]; i++)
3032 root->cell_f_max0[i] = root->cell_f[i];
3033 root->cell_f_min1[i] = root->cell_f[i+1];
3038 for (i = range[0]+1; i < range[1]; i++)
3040 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3041 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3042 if (bLimLo && bLimHi)
3044 /* Both limits violated, try the best we can */
3045 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3046 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3047 nrange[0] = range[0];
3049 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3052 nrange[1] = range[1];
3053 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3059 /* root->cell_f[i] = root->bound_min[i]; */
3060 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3063 else if (bLimHi && !bLastHi)
3066 if (nrange[1] < range[1]) /* found a LimLo before */
3068 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3069 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3070 nrange[0] = nrange[1];
3072 root->cell_f[i] = root->bound_max[i];
3074 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3076 nrange[1] = range[1];
3079 if (nrange[1] < range[1]) /* found last a LimLo */
3081 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3082 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3083 nrange[0] = nrange[1];
3084 nrange[1] = range[1];
3085 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3087 else if (nrange[0] > range[0]) /* found at least one LimHi */
3089 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3096 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3097 int d, int dim, domdec_root_t *root,
3098 const gmx_ddbox_t *ddbox,
3099 gmx_bool bDynamicBox,
3100 gmx_bool bUniform, gmx_int64_t step)
3102 gmx_domdec_comm_t *comm;
3103 int ncd, d1, i, pos;
3105 real load_aver, load_i, imbalance, change, change_max, sc;
3106 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3110 int range[] = { 0, 0 };
3114 /* Convert the maximum change from the input percentage to a fraction */
3115 change_limit = comm->dlb_scale_lim*0.01;
3119 bPBC = (dim < ddbox->npbcdim);
3121 cell_size = root->buf_ncd;
3123 /* Store the original boundaries */
3124 for (i = 0; i < ncd+1; i++)
3126 root->old_cell_f[i] = root->cell_f[i];
3130 for (i = 0; i < ncd; i++)
3132 cell_size[i] = 1.0/ncd;
3135 else if (dd_load_count(comm) > 0)
3137 load_aver = comm->load[d].sum_m/ncd;
3139 for (i = 0; i < ncd; i++)
3141 /* Determine the relative imbalance of cell i */
3142 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3143 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3144 /* Determine the change of the cell size using underrelaxation */
3145 change = -relax*imbalance;
3146 change_max = std::max(change_max, std::max(change, -change));
3148 /* Limit the amount of scaling.
3149 * We need to use the same rescaling for all cells in one row,
3150 * otherwise the load balancing might not converge.
3153 if (change_max > change_limit)
3155 sc *= change_limit/change_max;
3157 for (i = 0; i < ncd; i++)
3159 /* Determine the relative imbalance of cell i */
3160 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3161 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3162 /* Determine the change of the cell size using underrelaxation */
3163 change = -sc*imbalance;
3164 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3168 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3169 cellsize_limit_f *= DD_CELL_MARGIN;
3170 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3171 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3172 if (ddbox->tric_dir[dim])
3174 cellsize_limit_f /= ddbox->skew_fac[dim];
3175 dist_min_f /= ddbox->skew_fac[dim];
3177 if (bDynamicBox && d > 0)
3179 dist_min_f *= DD_PRES_SCALE_MARGIN;
3181 if (d > 0 && !bUniform)
3183 /* Make sure that the grid is not shifted too much */
3184 for (i = 1; i < ncd; i++)
3186 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3188 gmx_incons("Inconsistent DD boundary staggering limits!");
3190 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3191 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3194 root->bound_min[i] += 0.5*space;
3196 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3197 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3200 root->bound_max[i] += 0.5*space;
3205 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3207 root->cell_f_max0[i-1] + dist_min_f,
3208 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3209 root->cell_f_min1[i] - dist_min_f);
3214 root->cell_f[0] = 0;
3215 root->cell_f[ncd] = 1;
3216 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3219 /* After the checks above, the cells should obey the cut-off
3220 * restrictions, but it does not hurt to check.
3222 for (i = 0; i < ncd; i++)
3226 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3227 dim, i, root->cell_f[i], root->cell_f[i+1]);
3230 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3231 root->cell_f[i+1] - root->cell_f[i] <
3232 cellsize_limit_f/DD_CELL_MARGIN)
3236 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3237 gmx_step_str(step, buf), dim2char(dim), i,
3238 (root->cell_f[i+1] - root->cell_f[i])
3239 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3244 /* Store the cell boundaries of the lower dimensions at the end */
3245 for (d1 = 0; d1 < d; d1++)
3247 root->cell_f[pos++] = comm->cell_f0[d1];
3248 root->cell_f[pos++] = comm->cell_f1[d1];
3251 if (d < comm->npmedecompdim)
3253 /* The master determines the maximum shift for
3254 * the coordinate communication between separate PME nodes.
3256 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3258 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3261 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3265 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3266 const gmx_ddbox_t *ddbox,
3269 gmx_domdec_comm_t *comm;
3274 /* Set the cell dimensions */
3275 dim = dd->dim[dimind];
3276 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3277 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3278 if (dim >= ddbox->nboundeddim)
3280 comm->cell_x0[dim] += ddbox->box0[dim];
3281 comm->cell_x1[dim] += ddbox->box0[dim];
3285 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3286 int d, int dim, real *cell_f_row,
3287 const gmx_ddbox_t *ddbox)
3289 gmx_domdec_comm_t *comm;
3295 /* Each node would only need to know two fractions,
3296 * but it is probably cheaper to broadcast the whole array.
3298 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3299 0, comm->mpi_comm_load[d]);
3301 /* Copy the fractions for this dimension from the buffer */
3302 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3303 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3304 /* The whole array was communicated, so set the buffer position */
3305 pos = dd->nc[dim] + 1;
3306 for (d1 = 0; d1 <= d; d1++)
3310 /* Copy the cell fractions of the lower dimensions */
3311 comm->cell_f0[d1] = cell_f_row[pos++];
3312 comm->cell_f1[d1] = cell_f_row[pos++];
3314 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3316 /* Convert the communicated shift from float to int */
3317 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3320 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3324 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3325 const gmx_ddbox_t *ddbox,
3326 gmx_bool bDynamicBox,
3327 gmx_bool bUniform, gmx_int64_t step)
3329 gmx_domdec_comm_t *comm;
3331 gmx_bool bRowMember, bRowRoot;
3336 for (d = 0; d < dd->ndim; d++)
3341 for (d1 = d; d1 < dd->ndim; d1++)
3343 if (dd->ci[dd->dim[d1]] > 0)
3356 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3357 ddbox, bDynamicBox, bUniform, step);
3358 cell_f_row = comm->root[d]->cell_f;
3362 cell_f_row = comm->cell_f_row;
3364 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3369 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd,
3370 const gmx_ddbox_t *ddbox)
3374 /* This function assumes the box is static and should therefore
3375 * not be called when the box has changed since the last
3376 * call to dd_partition_system.
3378 for (d = 0; d < dd->ndim; d++)
3380 relative_to_absolute_cell_bounds(dd, ddbox, d);
3386 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3387 const gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3388 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3389 gmx_wallcycle_t wcycle)
3391 gmx_domdec_comm_t *comm;
3398 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3399 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3400 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3402 else if (bDynamicBox)
3404 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3407 /* Set the dimensions for which no DD is used */
3408 for (dim = 0; dim < DIM; dim++)
3410 if (dd->nc[dim] == 1)
3412 comm->cell_x0[dim] = 0;
3413 comm->cell_x1[dim] = ddbox->box_size[dim];
3414 if (dim >= ddbox->nboundeddim)
3416 comm->cell_x0[dim] += ddbox->box0[dim];
3417 comm->cell_x1[dim] += ddbox->box0[dim];
3423 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3426 gmx_domdec_comm_dim_t *cd;
3428 for (d = 0; d < dd->ndim; d++)
3430 cd = &dd->comm->cd[d];
3431 np = npulse[dd->dim[d]];
3432 if (np > cd->np_nalloc)
3436 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3437 dim2char(dd->dim[d]), np);
3439 if (DDMASTER(dd) && cd->np_nalloc > 0)
3441 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3443 srenew(cd->ind, np);
3444 for (i = cd->np_nalloc; i < np; i++)
3446 cd->ind[i].index = nullptr;
3447 cd->ind[i].nalloc = 0;
3456 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3457 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3458 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3459 gmx_wallcycle_t wcycle)
3461 gmx_domdec_comm_t *comm;
3467 /* Copy the old cell boundaries for the cg displacement check */
3468 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3469 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3475 check_box_size(dd, ddbox);
3477 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3481 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbLOCAL, npulse);
3482 realloc_comm_ind(dd, npulse);
3487 for (d = 0; d < DIM; d++)
3489 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3490 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3495 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3497 rvec cell_ns_x0, rvec cell_ns_x1,
3500 gmx_domdec_comm_t *comm;
3505 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3507 dim = dd->dim[dim_ind];
3509 /* Without PBC we don't have restrictions on the outer cells */
3510 if (!(dim >= ddbox->npbcdim &&
3511 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3513 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3514 comm->cellsize_min[dim])
3517 gmx_fatal(FARGS, "step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3518 gmx_step_str(step, buf), dim2char(dim),
3519 comm->cell_x1[dim] - comm->cell_x0[dim],
3520 ddbox->skew_fac[dim],
3521 dd->comm->cellsize_min[dim],
3522 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3526 if ((isDlbOn(dd->comm) && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3528 /* Communicate the boundaries and update cell_ns_x0/1 */
3529 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3530 if (isDlbOn(dd->comm) && dd->ndim > 1)
3532 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3537 static void make_tric_corr_matrix(int npbcdim, const matrix box, matrix tcm)
3541 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3549 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3550 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3559 static void check_screw_box(const matrix box)
3561 /* Mathematical limitation */
3562 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3564 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3567 /* Limitation due to the asymmetry of the eighth shell method */
3568 if (box[ZZ][YY] != 0)
3570 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3574 static void distribute_cg(FILE *fplog,
3575 const matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3578 gmx_domdec_master_t *ma;
3579 int **tmp_ind = nullptr, *tmp_nalloc = nullptr;
3580 int i, icg, j, k, k0, k1, d;
3584 real nrcg, inv_ncg, pos_d;
3590 snew(tmp_nalloc, dd->nnodes);
3591 snew(tmp_ind, dd->nnodes);
3592 for (i = 0; i < dd->nnodes; i++)
3594 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3595 snew(tmp_ind[i], tmp_nalloc[i]);
3598 /* Clear the count */
3599 for (i = 0; i < dd->nnodes; i++)
3605 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3607 cgindex = cgs->index;
3609 /* Compute the center of geometry for all charge groups */
3610 for (icg = 0; icg < cgs->nr; icg++)
3613 k1 = cgindex[icg+1];
3617 copy_rvec(pos[k0], cg_cm);
3624 for (k = k0; (k < k1); k++)
3626 rvec_inc(cg_cm, pos[k]);
3628 for (d = 0; (d < DIM); d++)
3630 cg_cm[d] *= inv_ncg;
3633 /* Put the charge group in the box and determine the cell index */
3634 for (d = DIM-1; d >= 0; d--)
3637 if (d < dd->npbcdim)
3639 bScrew = (dd->bScrewPBC && d == XX);
3640 if (tric_dir[d] && dd->nc[d] > 1)
3642 /* Use triclinic coordintates for this dimension */
3643 for (j = d+1; j < DIM; j++)
3645 pos_d += cg_cm[j]*tcm[j][d];
3648 while (pos_d >= box[d][d])
3651 rvec_dec(cg_cm, box[d]);
3654 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3655 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3657 for (k = k0; (k < k1); k++)
3659 rvec_dec(pos[k], box[d]);
3662 pos[k][YY] = box[YY][YY] - pos[k][YY];
3663 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3670 rvec_inc(cg_cm, box[d]);
3673 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3674 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3676 for (k = k0; (k < k1); k++)
3678 rvec_inc(pos[k], box[d]);
3681 pos[k][YY] = box[YY][YY] - pos[k][YY];
3682 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3687 /* This could be done more efficiently */
3689 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
3694 i = dd_index(dd->nc, ind);
3695 if (ma->ncg[i] == tmp_nalloc[i])
3697 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
3698 srenew(tmp_ind[i], tmp_nalloc[i]);
3700 tmp_ind[i][ma->ncg[i]] = icg;
3702 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
3706 for (i = 0; i < dd->nnodes; i++)
3709 for (k = 0; k < ma->ncg[i]; k++)
3711 ma->cg[k1++] = tmp_ind[i][k];
3714 ma->index[dd->nnodes] = k1;
3716 for (i = 0; i < dd->nnodes; i++)
3725 // Use double for the sums to avoid natoms^2 overflowing
3727 int nat_sum, nat_min, nat_max;
3732 nat_min = ma->nat[0];
3733 nat_max = ma->nat[0];
3734 for (i = 0; i < dd->nnodes; i++)
3736 nat_sum += ma->nat[i];
3737 // cast to double to avoid integer overflows when squaring
3738 nat2_sum += gmx::square(static_cast<double>(ma->nat[i]));
3739 nat_min = std::min(nat_min, ma->nat[i]);
3740 nat_max = std::max(nat_max, ma->nat[i]);
3742 nat_sum /= dd->nnodes;
3743 nat2_sum /= dd->nnodes;
3745 fprintf(fplog, "Atom distribution over %d domains: av %d stddev %d min %d max %d\n",
3748 static_cast<int>(std::sqrt(nat2_sum - gmx::square(static_cast<double>(nat_sum)) + 0.5)),
3753 static void get_cg_distribution(FILE *fplog, gmx_domdec_t *dd,
3754 t_block *cgs, const matrix box, gmx_ddbox_t *ddbox,
3757 gmx_domdec_master_t *ma = nullptr;
3760 int *ibuf, buf2[2] = { 0, 0 };
3761 gmx_bool bMaster = DDMASTER(dd);
3769 check_screw_box(box);
3772 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbMASTER, npulse);
3774 distribute_cg(fplog, box, ddbox->tric_dir, cgs, pos, dd);
3775 for (i = 0; i < dd->nnodes; i++)
3777 ma->ibuf[2*i] = ma->ncg[i];
3778 ma->ibuf[2*i+1] = ma->nat[i];
3786 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
3788 dd->ncg_home = buf2[0];
3789 dd->nat_home = buf2[1];
3790 dd->ncg_tot = dd->ncg_home;
3791 dd->nat_tot = dd->nat_home;
3792 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
3794 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
3795 srenew(dd->index_gl, dd->cg_nalloc);
3796 srenew(dd->cgindex, dd->cg_nalloc+1);
3800 for (i = 0; i < dd->nnodes; i++)
3802 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
3803 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
3808 bMaster ? ma->ibuf : nullptr,
3809 bMaster ? ma->ibuf+dd->nnodes : nullptr,
3810 bMaster ? ma->cg : nullptr,
3811 dd->ncg_home*sizeof(int), dd->index_gl);
3813 /* Determine the home charge group sizes */
3815 for (i = 0; i < dd->ncg_home; i++)
3817 cg_gl = dd->index_gl[i];
3819 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
3824 fprintf(debug, "Home charge groups:\n");
3825 for (i = 0; i < dd->ncg_home; i++)
3827 fprintf(debug, " %d", dd->index_gl[i]);
3830 fprintf(debug, "\n");
3833 fprintf(debug, "\n");
3837 static int compact_and_copy_vec_at(int ncg, int *move,
3840 rvec *src, gmx_domdec_comm_t *comm,
3843 int m, icg, i, i0, i1, nrcg;
3849 for (m = 0; m < DIM*2; m++)
3855 for (icg = 0; icg < ncg; icg++)
3857 i1 = cgindex[icg+1];
3863 /* Compact the home array in place */
3864 for (i = i0; i < i1; i++)
3866 copy_rvec(src[i], src[home_pos++]);
3872 /* Copy to the communication buffer */
3874 pos_vec[m] += 1 + vec*nrcg;
3875 for (i = i0; i < i1; i++)
3877 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
3879 pos_vec[m] += (nvec - vec - 1)*nrcg;
3883 home_pos += i1 - i0;
3891 static int compact_and_copy_vec_cg(int ncg, int *move,
3893 int nvec, rvec *src, gmx_domdec_comm_t *comm,
3896 int m, icg, i0, i1, nrcg;
3902 for (m = 0; m < DIM*2; m++)
3908 for (icg = 0; icg < ncg; icg++)
3910 i1 = cgindex[icg+1];
3916 /* Compact the home array in place */
3917 copy_rvec(src[icg], src[home_pos++]);
3923 /* Copy to the communication buffer */
3924 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
3925 pos_vec[m] += 1 + nrcg*nvec;
3937 static int compact_ind(int ncg, int *move,
3938 int *index_gl, int *cgindex,
3940 gmx_ga2la_t *ga2la, char *bLocalCG,
3943 int cg, nat, a0, a1, a, a_gl;
3948 for (cg = 0; cg < ncg; cg++)
3954 /* Compact the home arrays in place.
3955 * Anything that can be done here avoids access to global arrays.
3957 cgindex[home_pos] = nat;
3958 for (a = a0; a < a1; a++)
3961 gatindex[nat] = a_gl;
3962 /* The cell number stays 0, so we don't need to set it */
3963 ga2la_change_la(ga2la, a_gl, nat);
3966 index_gl[home_pos] = index_gl[cg];
3967 cginfo[home_pos] = cginfo[cg];
3968 /* The charge group remains local, so bLocalCG does not change */
3973 /* Clear the global indices */
3974 for (a = a0; a < a1; a++)
3976 ga2la_del(ga2la, gatindex[a]);
3980 bLocalCG[index_gl[cg]] = FALSE;
3984 cgindex[home_pos] = nat;
3989 static void clear_and_mark_ind(int ncg, int *move,
3990 int *index_gl, int *cgindex, int *gatindex,
3991 gmx_ga2la_t *ga2la, char *bLocalCG,
3996 for (cg = 0; cg < ncg; cg++)
4002 /* Clear the global indices */
4003 for (a = a0; a < a1; a++)
4005 ga2la_del(ga2la, gatindex[a]);
4009 bLocalCG[index_gl[cg]] = FALSE;
4011 /* Signal that this cg has moved using the ns cell index.
4012 * Here we set it to -1. fill_grid will change it
4013 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4015 cell_index[cg] = -1;
4020 static void print_cg_move(FILE *fplog,
4022 gmx_int64_t step, int cg, int dim, int dir,
4023 gmx_bool bHaveCgcmOld, real limitd,
4024 rvec cm_old, rvec cm_new, real pos_d)
4026 gmx_domdec_comm_t *comm;
4031 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4034 fprintf(fplog, "%s %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4035 dd->comm->bCGs ? "The charge group starting at atom" : "Atom",
4036 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4040 /* We don't have a limiting distance available: don't print it */
4041 fprintf(fplog, "%s %d moved more than the distance allowed by the domain decomposition in direction %c\n",
4042 dd->comm->bCGs ? "The charge group starting at atom" : "Atom",
4043 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4045 fprintf(fplog, "distance out of cell %f\n",
4046 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4049 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4050 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4052 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4053 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4054 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4056 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4057 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4059 comm->cell_x0[dim], comm->cell_x1[dim]);
4062 static void cg_move_error(FILE *fplog,
4064 gmx_int64_t step, int cg, int dim, int dir,
4065 gmx_bool bHaveCgcmOld, real limitd,
4066 rvec cm_old, rvec cm_new, real pos_d)
4070 print_cg_move(fplog, dd, step, cg, dim, dir,
4071 bHaveCgcmOld, limitd, cm_old, cm_new, pos_d);
4073 print_cg_move(stderr, dd, step, cg, dim, dir,
4074 bHaveCgcmOld, limitd, cm_old, cm_new, pos_d);
4076 "%s moved too far between two domain decomposition steps\n"
4077 "This usually means that your system is not well equilibrated",
4078 dd->comm->bCGs ? "A charge group" : "An atom");
4081 static void rotate_state_atom(t_state *state, int a)
4083 if (state->flags & (1 << estX))
4085 /* Rotate the complete state; for a rectangular box only */
4086 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4087 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4089 if (state->flags & (1 << estV))
4091 state->v[a][YY] = -state->v[a][YY];
4092 state->v[a][ZZ] = -state->v[a][ZZ];
4094 if (state->flags & (1 << estCGP))
4096 state->cg_p[a][YY] = -state->cg_p[a][YY];
4097 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4101 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4103 if (natoms > comm->moved_nalloc)
4105 /* Contents should be preserved here */
4106 comm->moved_nalloc = over_alloc_dd(natoms);
4107 srenew(comm->moved, comm->moved_nalloc);
4113 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
4116 ivec tric_dir, matrix tcm,
4117 rvec cell_x0, rvec cell_x1,
4118 rvec limitd, rvec limit0, rvec limit1,
4120 int cg_start, int cg_end,
4125 int cg, k, k0, k1, d, dim, d2;
4130 real inv_ncg, pos_d;
4133 npbcdim = dd->npbcdim;
4135 for (cg = cg_start; cg < cg_end; cg++)
4142 copy_rvec(state->x[k0], cm_new);
4149 for (k = k0; (k < k1); k++)
4151 rvec_inc(cm_new, state->x[k]);
4153 for (d = 0; (d < DIM); d++)
4155 cm_new[d] = inv_ncg*cm_new[d];
4160 /* Do pbc and check DD cell boundary crossings */
4161 for (d = DIM-1; d >= 0; d--)
4165 bScrew = (dd->bScrewPBC && d == XX);
4166 /* Determine the location of this cg in lattice coordinates */
4170 for (d2 = d+1; d2 < DIM; d2++)
4172 pos_d += cm_new[d2]*tcm[d2][d];
4175 /* Put the charge group in the triclinic unit-cell */
4176 if (pos_d >= cell_x1[d])
4178 if (pos_d >= limit1[d])
4180 cg_move_error(fplog, dd, step, cg, d, 1,
4181 cg_cm != as_rvec_array(state->x.data()), limitd[d],
4182 cg_cm[cg], cm_new, pos_d);
4185 if (dd->ci[d] == dd->nc[d] - 1)
4187 rvec_dec(cm_new, state->box[d]);
4190 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4191 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4193 for (k = k0; (k < k1); k++)
4195 rvec_dec(state->x[k], state->box[d]);
4198 rotate_state_atom(state, k);
4203 else if (pos_d < cell_x0[d])
4205 if (pos_d < limit0[d])
4207 cg_move_error(fplog, dd, step, cg, d, -1,
4208 cg_cm != as_rvec_array(state->x.data()), limitd[d],
4209 cg_cm[cg], cm_new, pos_d);
4214 rvec_inc(cm_new, state->box[d]);
4217 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4218 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4220 for (k = k0; (k < k1); k++)
4222 rvec_inc(state->x[k], state->box[d]);
4225 rotate_state_atom(state, k);
4231 else if (d < npbcdim)
4233 /* Put the charge group in the rectangular unit-cell */
4234 while (cm_new[d] >= state->box[d][d])
4236 rvec_dec(cm_new, state->box[d]);
4237 for (k = k0; (k < k1); k++)
4239 rvec_dec(state->x[k], state->box[d]);
4242 while (cm_new[d] < 0)
4244 rvec_inc(cm_new, state->box[d]);
4245 for (k = k0; (k < k1); k++)
4247 rvec_inc(state->x[k], state->box[d]);
4253 copy_rvec(cm_new, cg_cm[cg]);
4255 /* Determine where this cg should go */
4258 for (d = 0; d < dd->ndim; d++)
4263 flag |= DD_FLAG_FW(d);
4269 else if (dev[dim] == -1)
4271 flag |= DD_FLAG_BW(d);
4274 if (dd->nc[dim] > 2)
4285 /* Temporarily store the flag in move */
4286 move[cg] = mc + flag;
4290 static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
4291 gmx_domdec_t *dd, ivec tric_dir,
4292 t_state *state, PaddedRVecVector *f,
4301 int ncg[DIM*2] = { 0 }, nat[DIM*2] = { 0 };
4302 int i, cg, k, d, dim, dim2, dir, d2, d3;
4303 int mc, cdd, nrcg, ncg_recv, nvs, nvr, nvec, vec;
4304 int sbuf[2], rbuf[2];
4305 int home_pos_cg, home_pos_at, buf_pos;
4309 rvec *cg_cm = nullptr, cell_x0, cell_x1, limitd, limit0, limit1;
4311 cginfo_mb_t *cginfo_mb;
4312 gmx_domdec_comm_t *comm;
4314 int nthread, thread;
4318 check_screw_box(state->box);
4322 if (fr->cutoff_scheme == ecutsGROUP)
4327 // Positions are always present, so there's nothing to flag
4328 bool bV = state->flags & (1<<estV);
4329 bool bCGP = state->flags & (1<<estCGP);
4331 if (dd->ncg_tot > comm->nalloc_int)
4333 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4334 srenew(comm->buf_int, comm->nalloc_int);
4336 move = comm->buf_int;
4338 npbcdim = dd->npbcdim;
4340 for (d = 0; (d < DIM); d++)
4342 limitd[d] = dd->comm->cellsize_min[d];
4343 if (d >= npbcdim && dd->ci[d] == 0)
4345 cell_x0[d] = -GMX_FLOAT_MAX;
4349 cell_x0[d] = comm->cell_x0[d];
4351 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4353 cell_x1[d] = GMX_FLOAT_MAX;
4357 cell_x1[d] = comm->cell_x1[d];
4361 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4362 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4366 /* We check after communication if a charge group moved
4367 * more than one cell. Set the pre-comm check limit to float_max.
4369 limit0[d] = -GMX_FLOAT_MAX;
4370 limit1[d] = GMX_FLOAT_MAX;
4374 make_tric_corr_matrix(npbcdim, state->box, tcm);
4376 cgindex = dd->cgindex;
4378 nthread = gmx_omp_nthreads_get(emntDomdec);
4380 /* Compute the center of geometry for all home charge groups
4381 * and put them in the box and determine where they should go.
4383 #pragma omp parallel for num_threads(nthread) schedule(static)
4384 for (thread = 0; thread < nthread; thread++)
4388 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4389 cell_x0, cell_x1, limitd, limit0, limit1,
4391 ( thread *dd->ncg_home)/nthread,
4392 ((thread+1)*dd->ncg_home)/nthread,
4393 fr->cutoff_scheme == ecutsGROUP ? cg_cm : as_rvec_array(state->x.data()),
4396 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
4399 for (cg = 0; cg < dd->ncg_home; cg++)
4404 flag = mc & ~DD_FLAG_NRCG;
4405 mc = mc & DD_FLAG_NRCG;
4408 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4410 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4411 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4413 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4414 /* We store the cg size in the lower 16 bits
4415 * and the place where the charge group should go
4416 * in the next 6 bits. This saves some communication volume.
4418 nrcg = cgindex[cg+1] - cgindex[cg];
4419 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4425 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4426 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4429 for (i = 0; i < dd->ndim*2; i++)
4431 *ncg_moved += ncg[i];
4444 /* Make sure the communication buffers are large enough */
4445 for (mc = 0; mc < dd->ndim*2; mc++)
4447 nvr = ncg[mc] + nat[mc]*nvec;
4448 if (nvr > comm->cgcm_state_nalloc[mc])
4450 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4451 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4455 switch (fr->cutoff_scheme)
4458 /* Recalculating cg_cm might be cheaper than communicating,
4459 * but that could give rise to rounding issues.
4462 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4463 nvec, cg_cm, comm, bCompact);
4466 /* Without charge groups we send the moved atom coordinates
4467 * over twice. This is so the code below can be used without
4468 * many conditionals for both for with and without charge groups.
4471 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4472 nvec, as_rvec_array(state->x.data()), comm, FALSE);
4475 home_pos_cg -= *ncg_moved;
4479 gmx_incons("unimplemented");
4485 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4486 nvec, vec++, as_rvec_array(state->x.data()),
4490 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4491 nvec, vec++, as_rvec_array(state->v.data()),
4496 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4497 nvec, vec++, as_rvec_array(state->cg_p.data()),
4503 compact_ind(dd->ncg_home, move,
4504 dd->index_gl, dd->cgindex, dd->gatindex,
4505 dd->ga2la, comm->bLocalCG,
4510 if (fr->cutoff_scheme == ecutsVERLET)
4512 moved = get_moved(comm, dd->ncg_home);
4514 for (k = 0; k < dd->ncg_home; k++)
4521 moved = fr->ns->grid->cell_index;
4524 clear_and_mark_ind(dd->ncg_home, move,
4525 dd->index_gl, dd->cgindex, dd->gatindex,
4526 dd->ga2la, comm->bLocalCG,
4530 cginfo_mb = fr->cginfo_mb;
4532 *ncg_stay_home = home_pos_cg;
4533 for (d = 0; d < dd->ndim; d++)
4538 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4541 /* Communicate the cg and atom counts */
4546 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4547 d, dir, sbuf[0], sbuf[1]);
4549 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4551 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4553 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4554 srenew(comm->buf_int, comm->nalloc_int);
4557 /* Communicate the charge group indices, sizes and flags */
4558 dd_sendrecv_int(dd, d, dir,
4559 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4560 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4562 nvs = ncg[cdd] + nat[cdd]*nvec;
4563 i = rbuf[0] + rbuf[1] *nvec;
4564 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4566 /* Communicate cgcm and state */
4567 dd_sendrecv_rvec(dd, d, dir,
4568 comm->cgcm_state[cdd], nvs,
4569 comm->vbuf.v+nvr, i);
4570 ncg_recv += rbuf[0];
4574 dd_check_alloc_ncg(fr, state, f, home_pos_cg + ncg_recv);
4575 if (fr->cutoff_scheme == ecutsGROUP)
4577 /* Here we resize to more than necessary and shrink later */
4578 dd_resize_state(state, f, home_pos_at + ncg_recv*MAX_CGCGSIZE);
4581 /* Process the received charge groups */
4583 for (cg = 0; cg < ncg_recv; cg++)
4585 flag = comm->buf_int[cg*DD_CGIBS+1];
4587 if (dim >= npbcdim && dd->nc[dim] > 2)
4589 /* No pbc in this dim and more than one domain boundary.
4590 * We do a separate check if a charge group didn't move too far.
4592 if (((flag & DD_FLAG_FW(d)) &&
4593 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4594 ((flag & DD_FLAG_BW(d)) &&
4595 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4597 cg_move_error(fplog, dd, step, cg, dim,
4598 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4599 fr->cutoff_scheme == ecutsGROUP, 0,
4600 comm->vbuf.v[buf_pos],
4601 comm->vbuf.v[buf_pos],
4602 comm->vbuf.v[buf_pos][dim]);
4609 /* Check which direction this cg should go */
4610 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4612 if (isDlbOn(dd->comm))
4614 /* The cell boundaries for dimension d2 are not equal
4615 * for each cell row of the lower dimension(s),
4616 * therefore we might need to redetermine where
4617 * this cg should go.
4620 /* If this cg crosses the box boundary in dimension d2
4621 * we can use the communicated flag, so we do not
4622 * have to worry about pbc.
4624 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4625 (flag & DD_FLAG_FW(d2))) ||
4626 (dd->ci[dim2] == 0 &&
4627 (flag & DD_FLAG_BW(d2)))))
4629 /* Clear the two flags for this dimension */
4630 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4631 /* Determine the location of this cg
4632 * in lattice coordinates
4634 pos_d = comm->vbuf.v[buf_pos][dim2];
4637 for (d3 = dim2+1; d3 < DIM; d3++)
4640 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
4643 /* Check of we are not at the box edge.
4644 * pbc is only handled in the first step above,
4645 * but this check could move over pbc while
4646 * the first step did not due to different rounding.
4648 if (pos_d >= cell_x1[dim2] &&
4649 dd->ci[dim2] != dd->nc[dim2]-1)
4651 flag |= DD_FLAG_FW(d2);
4653 else if (pos_d < cell_x0[dim2] &&
4656 flag |= DD_FLAG_BW(d2);
4658 comm->buf_int[cg*DD_CGIBS+1] = flag;
4661 /* Set to which neighboring cell this cg should go */
4662 if (flag & DD_FLAG_FW(d2))
4666 else if (flag & DD_FLAG_BW(d2))
4668 if (dd->nc[dd->dim[d2]] > 2)
4680 nrcg = flag & DD_FLAG_NRCG;
4683 if (home_pos_cg+1 > dd->cg_nalloc)
4685 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
4686 srenew(dd->index_gl, dd->cg_nalloc);
4687 srenew(dd->cgindex, dd->cg_nalloc+1);
4689 /* Set the global charge group index and size */
4690 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
4691 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
4692 /* Copy the state from the buffer */
4693 if (fr->cutoff_scheme == ecutsGROUP)
4696 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
4700 /* Set the cginfo */
4701 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
4702 dd->index_gl[home_pos_cg]);
4705 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
4708 for (i = 0; i < nrcg; i++)
4710 copy_rvec(comm->vbuf.v[buf_pos++],
4711 state->x[home_pos_at+i]);
4715 for (i = 0; i < nrcg; i++)
4717 copy_rvec(comm->vbuf.v[buf_pos++],
4718 state->v[home_pos_at+i]);
4723 for (i = 0; i < nrcg; i++)
4725 copy_rvec(comm->vbuf.v[buf_pos++],
4726 state->cg_p[home_pos_at+i]);
4730 home_pos_at += nrcg;
4734 /* Reallocate the buffers if necessary */
4735 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4737 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4738 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4740 nvr = ncg[mc] + nat[mc]*nvec;
4741 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
4743 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
4744 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4746 /* Copy from the receive to the send buffers */
4747 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
4748 comm->buf_int + cg*DD_CGIBS,
4749 DD_CGIBS*sizeof(int));
4750 memcpy(comm->cgcm_state[mc][nvr],
4751 comm->vbuf.v[buf_pos],
4752 (1+nrcg*nvec)*sizeof(rvec));
4753 buf_pos += 1 + nrcg*nvec;
4760 /* With sorting (!bCompact) the indices are now only partially up to date
4761 * and ncg_home and nat_home are not the real count, since there are
4762 * "holes" in the arrays for the charge groups that moved to neighbors.
4764 if (fr->cutoff_scheme == ecutsVERLET)
4766 moved = get_moved(comm, home_pos_cg);
4768 for (i = dd->ncg_home; i < home_pos_cg; i++)
4773 dd->ncg_home = home_pos_cg;
4774 dd->nat_home = home_pos_at;
4776 if (fr->cutoff_scheme == ecutsGROUP && !bCompact)
4778 /* We overallocated before, we need to set the right size here */
4779 dd_resize_state(state, f, dd->nat_home);
4785 "Finished repartitioning: cgs moved out %d, new home %d\n",
4786 *ncg_moved, dd->ncg_home-*ncg_moved);
4791 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
4793 /* Note that the cycles value can be incorrect, either 0 or some
4794 * extremely large value, when our thread migrated to another core
4795 * with an unsynchronized cycle counter. If this happens less often
4796 * that once per nstlist steps, this will not cause issues, since
4797 * we later subtract the maximum value from the sum over nstlist steps.
4798 * A zero count will slightly lower the total, but that's a small effect.
4799 * Note that the main purpose of the subtraction of the maximum value
4800 * is to avoid throwing off the load balancing when stalls occur due
4801 * e.g. system activity or network congestion.
4803 dd->comm->cycl[ddCycl] += cycles;
4804 dd->comm->cycl_n[ddCycl]++;
4805 if (cycles > dd->comm->cycl_max[ddCycl])
4807 dd->comm->cycl_max[ddCycl] = cycles;
4811 static double force_flop_count(t_nrnb *nrnb)
4818 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
4820 /* To get closer to the real timings, we half the count
4821 * for the normal loops and again half it for water loops.
4824 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
4826 sum += nrnb->n[i]*0.25*cost_nrnb(i);
4830 sum += nrnb->n[i]*0.50*cost_nrnb(i);
4833 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
4836 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
4838 sum += nrnb->n[i]*cost_nrnb(i);
4841 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
4843 sum += nrnb->n[i]*cost_nrnb(i);
4849 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
4851 if (dd->comm->eFlop)
4853 dd->comm->flop -= force_flop_count(nrnb);
4856 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
4858 if (dd->comm->eFlop)
4860 dd->comm->flop += force_flop_count(nrnb);
4865 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
4869 for (i = 0; i < ddCyclNr; i++)
4871 dd->comm->cycl[i] = 0;
4872 dd->comm->cycl_n[i] = 0;
4873 dd->comm->cycl_max[i] = 0;
4876 dd->comm->flop_n = 0;
4879 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
4881 gmx_domdec_comm_t *comm;
4882 domdec_load_t *load;
4883 domdec_root_t *root = nullptr;
4885 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
4890 fprintf(debug, "get_load_distribution start\n");
4893 wallcycle_start(wcycle, ewcDDCOMMLOAD);
4897 bSepPME = (dd->pme_nodeid >= 0);
4899 if (dd->ndim == 0 && bSepPME)
4901 /* Without decomposition, but with PME nodes, we need the load */
4902 comm->load[0].mdf = comm->cycl[ddCyclPPduringPME];
4903 comm->load[0].pme = comm->cycl[ddCyclPME];
4906 for (d = dd->ndim-1; d >= 0; d--)
4909 /* Check if we participate in the communication in this dimension */
4910 if (d == dd->ndim-1 ||
4911 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
4913 load = &comm->load[d];
4914 if (isDlbOn(dd->comm))
4916 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
4919 if (d == dd->ndim-1)
4921 sbuf[pos++] = dd_force_load(comm);
4922 sbuf[pos++] = sbuf[0];
4923 if (isDlbOn(dd->comm))
4925 sbuf[pos++] = sbuf[0];
4926 sbuf[pos++] = cell_frac;
4929 sbuf[pos++] = comm->cell_f_max0[d];
4930 sbuf[pos++] = comm->cell_f_min1[d];
4935 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
4936 sbuf[pos++] = comm->cycl[ddCyclPME];
4941 sbuf[pos++] = comm->load[d+1].sum;
4942 sbuf[pos++] = comm->load[d+1].max;
4943 if (isDlbOn(dd->comm))
4945 sbuf[pos++] = comm->load[d+1].sum_m;
4946 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
4947 sbuf[pos++] = comm->load[d+1].flags;
4950 sbuf[pos++] = comm->cell_f_max0[d];
4951 sbuf[pos++] = comm->cell_f_min1[d];
4956 sbuf[pos++] = comm->load[d+1].mdf;
4957 sbuf[pos++] = comm->load[d+1].pme;
4961 /* Communicate a row in DD direction d.
4962 * The communicators are setup such that the root always has rank 0.
4965 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
4966 load->load, load->nload*sizeof(float), MPI_BYTE,
4967 0, comm->mpi_comm_load[d]);
4969 if (dd->ci[dim] == dd->master_ci[dim])
4971 /* We are the root, process this row */
4974 root = comm->root[d];
4984 for (i = 0; i < dd->nc[dim]; i++)
4986 load->sum += load->load[pos++];
4987 load->max = std::max(load->max, load->load[pos]);
4989 if (isDlbOn(dd->comm))
4993 /* This direction could not be load balanced properly,
4994 * therefore we need to use the maximum iso the average load.
4996 load->sum_m = std::max(load->sum_m, load->load[pos]);
5000 load->sum_m += load->load[pos];
5003 load->cvol_min = std::min(load->cvol_min, load->load[pos]);
5007 load->flags = (int)(load->load[pos++] + 0.5);
5011 root->cell_f_max0[i] = load->load[pos++];
5012 root->cell_f_min1[i] = load->load[pos++];
5017 load->mdf = std::max(load->mdf, load->load[pos]);
5019 load->pme = std::max(load->pme, load->load[pos]);
5023 if (isDlbOn(comm) && root->bLimited)
5025 load->sum_m *= dd->nc[dim];
5026 load->flags |= (1<<d);
5034 comm->nload += dd_load_count(comm);
5035 comm->load_step += comm->cycl[ddCyclStep];
5036 comm->load_sum += comm->load[0].sum;
5037 comm->load_max += comm->load[0].max;
5040 for (d = 0; d < dd->ndim; d++)
5042 if (comm->load[0].flags & (1<<d))
5044 comm->load_lim[d]++;
5050 comm->load_mdf += comm->load[0].mdf;
5051 comm->load_pme += comm->load[0].pme;
5055 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5059 fprintf(debug, "get_load_distribution finished\n");
5063 static float dd_force_load_fraction(gmx_domdec_t *dd)
5065 /* Return the relative performance loss on the total run time
5066 * due to the force calculation load imbalance.
5068 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
5070 return dd->comm->load_sum/(dd->comm->load_step*dd->nnodes);
5078 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5080 /* Return the relative performance loss on the total run time
5081 * due to the force calculation load imbalance.
5083 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
5086 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5087 (dd->comm->load_step*dd->nnodes);
5095 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5097 gmx_domdec_comm_t *comm = dd->comm;
5099 /* Only the master rank prints loads and only if we measured loads */
5100 if (!DDMASTER(dd) || comm->nload == 0)
5106 int numPpRanks = dd->nnodes;
5107 int numPmeRanks = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5108 int numRanks = numPpRanks + numPmeRanks;
5109 float lossFraction = 0;
5111 /* Print the average load imbalance and performance loss */
5112 if (dd->nnodes > 1 && comm->load_sum > 0)
5114 float imbalance = comm->load_max*numPpRanks/comm->load_sum - 1;
5115 lossFraction = dd_force_imb_perf_loss(dd);
5117 std::string msg = "\n Dynamic load balancing report:\n";
5118 std::string dlbStateStr = "";
5120 switch (dd->comm->dlbState)
5123 dlbStateStr = "DLB was off during the run per user request.";
5125 case edlbsOffForever:
5126 /* Currectly this can happen due to performance loss observed, cell size
5127 * limitations or incompatibility with other settings observed during
5128 * determineInitialDlbState(). */
5129 dlbStateStr = "DLB got disabled because it was unsuitable to use.";
5131 case edlbsOffCanTurnOn:
5132 dlbStateStr = "DLB was off during the run due to low measured imbalance.";
5134 case edlbsOffTemporarilyLocked:
5135 dlbStateStr = "DLB was locked at the end of the run due to unfinished PP-PME balancing.";
5137 case edlbsOnCanTurnOff:
5138 dlbStateStr = "DLB was turned on during the run due to measured imbalance.";
5141 dlbStateStr = "DLB was permanently on during the run per user request.";
5144 GMX_ASSERT(false, "Undocumented DLB state");
5147 msg += " " + dlbStateStr + "\n";
5148 msg += gmx::formatString(" Average load imbalance: %.1f%%.\n", imbalance*100);
5149 msg += gmx::formatString(" The balanceable part of the MD step is %d%%, load imbalance is computed from this.\n",
5150 static_cast<int>(dd_force_load_fraction(dd)*100 + 0.5));
5151 msg += gmx::formatString(" Part of the total run time spent waiting due to load imbalance: %.1f%%.\n",
5153 fprintf(fplog, "%s", msg.c_str());
5154 fprintf(stderr, "%s", msg.c_str());
5157 /* Print during what percentage of steps the load balancing was limited */
5158 bool dlbWasLimited = false;
5161 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5162 for (int d = 0; d < dd->ndim; d++)
5164 int limitPercentage = (200*comm->load_lim[d] + 1)/(2*comm->nload);
5165 sprintf(buf+strlen(buf), " %c %d %%",
5166 dim2char(dd->dim[d]), limitPercentage);
5167 if (limitPercentage >= 50)
5169 dlbWasLimited = true;
5172 sprintf(buf + strlen(buf), "\n");
5173 fprintf(fplog, "%s", buf);
5174 fprintf(stderr, "%s", buf);
5177 /* Print the performance loss due to separate PME - PP rank imbalance */
5178 float lossFractionPme = 0;
5179 if (numPmeRanks > 0 && comm->load_mdf > 0 && comm->load_step > 0)
5181 float pmeForceRatio = comm->load_pme/comm->load_mdf;
5182 lossFractionPme = (comm->load_pme - comm->load_mdf)/comm->load_step;
5183 if (lossFractionPme <= 0)
5185 lossFractionPme *= numPmeRanks/static_cast<float>(numRanks);
5189 lossFractionPme *= numPpRanks/static_cast<float>(numRanks);
5191 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pmeForceRatio);
5192 fprintf(fplog, "%s", buf);
5193 fprintf(stderr, "%s", buf);
5194 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossFractionPme)*100);
5195 fprintf(fplog, "%s", buf);
5196 fprintf(stderr, "%s", buf);
5198 fprintf(fplog, "\n");
5199 fprintf(stderr, "\n");
5201 if (lossFraction >= DD_PERF_LOSS_WARN)
5204 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5205 " in the domain decomposition.\n", lossFraction*100);
5208 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5210 else if (dlbWasLimited)
5212 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5214 fprintf(fplog, "%s\n", buf);
5215 fprintf(stderr, "%s\n", buf);
5217 if (numPmeRanks > 0 && fabs(lossFractionPme) >= DD_PERF_LOSS_WARN)
5220 "NOTE: %.1f %% performance was lost because the PME ranks\n"
5221 " had %s work to do than the PP ranks.\n"
5222 " You might want to %s the number of PME ranks\n"
5223 " or %s the cut-off and the grid spacing.\n",
5224 fabs(lossFractionPme*100),
5225 (lossFractionPme < 0) ? "less" : "more",
5226 (lossFractionPme < 0) ? "decrease" : "increase",
5227 (lossFractionPme < 0) ? "decrease" : "increase");
5228 fprintf(fplog, "%s\n", buf);
5229 fprintf(stderr, "%s\n", buf);
5233 static float dd_vol_min(gmx_domdec_t *dd)
5235 return dd->comm->load[0].cvol_min*dd->nnodes;
5238 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5240 return dd->comm->load[0].flags;
5243 static float dd_f_imbal(gmx_domdec_t *dd)
5245 if (dd->comm->load[0].sum > 0)
5247 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1.0f;
5251 /* Something is wrong in the cycle counting, report no load imbalance */
5256 float dd_pme_f_ratio(gmx_domdec_t *dd)
5258 /* Should only be called on the DD master rank */
5259 assert(DDMASTER(dd));
5261 if (dd->comm->load[0].mdf > 0 && dd->comm->cycl_n[ddCyclPME] > 0)
5263 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5271 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
5276 flags = dd_load_flags(dd);
5280 "DD load balancing is limited by minimum cell size in dimension");
5281 for (d = 0; d < dd->ndim; d++)
5285 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5288 fprintf(fplog, "\n");
5290 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5291 if (isDlbOn(dd->comm))
5293 fprintf(fplog, " vol min/aver %5.3f%c",
5294 dd_vol_min(dd), flags ? '!' : ' ');
5298 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5300 if (dd->comm->cycl_n[ddCyclPME])
5302 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5304 fprintf(fplog, "\n\n");
5307 static void dd_print_load_verbose(gmx_domdec_t *dd)
5309 if (isDlbOn(dd->comm))
5311 fprintf(stderr, "vol %4.2f%c ",
5312 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5316 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5318 if (dd->comm->cycl_n[ddCyclPME])
5320 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5325 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5330 domdec_root_t *root;
5331 gmx_bool bPartOfGroup = FALSE;
5333 dim = dd->dim[dim_ind];
5334 copy_ivec(loc, loc_c);
5335 for (i = 0; i < dd->nc[dim]; i++)
5338 rank = dd_index(dd->nc, loc_c);
5339 if (rank == dd->rank)
5341 /* This process is part of the group */
5342 bPartOfGroup = TRUE;
5345 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5349 dd->comm->mpi_comm_load[dim_ind] = c_row;
5350 if (!isDlbDisabled(dd->comm))
5352 if (dd->ci[dim] == dd->master_ci[dim])
5354 /* This is the root process of this row */
5355 snew(dd->comm->root[dim_ind], 1);
5356 root = dd->comm->root[dim_ind];
5357 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5358 snew(root->old_cell_f, dd->nc[dim]+1);
5359 snew(root->bCellMin, dd->nc[dim]);
5362 snew(root->cell_f_max0, dd->nc[dim]);
5363 snew(root->cell_f_min1, dd->nc[dim]);
5364 snew(root->bound_min, dd->nc[dim]);
5365 snew(root->bound_max, dd->nc[dim]);
5367 snew(root->buf_ncd, dd->nc[dim]);
5371 /* This is not a root process, we only need to receive cell_f */
5372 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5375 if (dd->ci[dim] == dd->master_ci[dim])
5377 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5383 void dd_setup_dlb_resource_sharing(t_commrec *cr,
5387 int physicalnode_id_hash;
5389 MPI_Comm mpi_comm_pp_physicalnode;
5391 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
5393 /* Only ranks with short-ranged tasks (currently) use GPUs.
5394 * If we don't have GPUs assigned, there are no resources to share.
5399 physicalnode_id_hash = gmx_physicalnode_id_hash();
5405 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5406 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5407 dd->rank, physicalnode_id_hash, gpu_id);
5409 /* Split the PP communicator over the physical nodes */
5410 /* TODO: See if we should store this (before), as it's also used for
5411 * for the nodecomm summution.
5413 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5414 &mpi_comm_pp_physicalnode);
5415 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5416 &dd->comm->mpi_comm_gpu_shared);
5417 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5418 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5422 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5425 /* Note that some ranks could share a GPU, while others don't */
5427 if (dd->comm->nrank_gpu_shared == 1)
5429 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5432 GMX_UNUSED_VALUE(cr);
5433 GMX_UNUSED_VALUE(gpu_id);
5437 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
5440 int dim0, dim1, i, j;
5445 fprintf(debug, "Making load communicators\n");
5448 snew(dd->comm->load, std::max(dd->ndim, 1));
5449 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
5457 make_load_communicator(dd, 0, loc);
5461 for (i = 0; i < dd->nc[dim0]; i++)
5464 make_load_communicator(dd, 1, loc);
5470 for (i = 0; i < dd->nc[dim0]; i++)
5474 for (j = 0; j < dd->nc[dim1]; j++)
5477 make_load_communicator(dd, 2, loc);
5484 fprintf(debug, "Finished making load communicators\n");
5489 /*! \brief Sets up the relation between neighboring domains and zones */
5490 static void setup_neighbor_relations(gmx_domdec_t *dd)
5492 int d, dim, i, j, m;
5494 gmx_domdec_zones_t *zones;
5495 gmx_domdec_ns_ranges_t *izone;
5497 for (d = 0; d < dd->ndim; d++)
5500 copy_ivec(dd->ci, tmp);
5501 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5502 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5503 copy_ivec(dd->ci, tmp);
5504 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5505 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5508 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5511 dd->neighbor[d][1]);
5515 int nzone = (1 << dd->ndim);
5516 int nizone = (1 << std::max(dd->ndim - 1, 0));
5517 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
5519 zones = &dd->comm->zones;
5521 for (i = 0; i < nzone; i++)
5524 clear_ivec(zones->shift[i]);
5525 for (d = 0; d < dd->ndim; d++)
5527 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5532 for (i = 0; i < nzone; i++)
5534 for (d = 0; d < DIM; d++)
5536 s[d] = dd->ci[d] - zones->shift[i][d];
5541 else if (s[d] >= dd->nc[d])
5547 zones->nizone = nizone;
5548 for (i = 0; i < zones->nizone; i++)
5550 assert(ddNonbondedZonePairRanges[i][0] == i);
5552 izone = &zones->izone[i];
5553 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
5554 * j-zones up to nzone.
5556 izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
5557 izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
5558 for (dim = 0; dim < DIM; dim++)
5560 if (dd->nc[dim] == 1)
5562 /* All shifts should be allowed */
5563 izone->shift0[dim] = -1;
5564 izone->shift1[dim] = 1;
5568 /* Determine the min/max j-zone shift wrt the i-zone */
5569 izone->shift0[dim] = 1;
5570 izone->shift1[dim] = -1;
5571 for (j = izone->j0; j < izone->j1; j++)
5573 int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5574 if (shift_diff < izone->shift0[dim])
5576 izone->shift0[dim] = shift_diff;
5578 if (shift_diff > izone->shift1[dim])
5580 izone->shift1[dim] = shift_diff;
5587 if (!isDlbDisabled(dd->comm))
5589 snew(dd->comm->root, dd->ndim);
5592 if (dd->comm->bRecordLoad)
5594 make_load_communicators(dd);
5598 static void make_pp_communicator(FILE *fplog,
5600 t_commrec gmx_unused *cr,
5601 int gmx_unused reorder)
5604 gmx_domdec_comm_t *comm;
5611 if (comm->bCartesianPP)
5613 /* Set up cartesian communication for the particle-particle part */
5616 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5617 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5620 for (int i = 0; i < DIM; i++)
5624 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5626 /* We overwrite the old communicator with the new cartesian one */
5627 cr->mpi_comm_mygroup = comm_cart;
5630 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5631 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5633 if (comm->bCartesianPP_PME)
5635 /* Since we want to use the original cartesian setup for sim,
5636 * and not the one after split, we need to make an index.
5638 snew(comm->ddindex2ddnodeid, dd->nnodes);
5639 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
5640 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5641 /* Get the rank of the DD master,
5642 * above we made sure that the master node is a PP node.
5652 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
5654 else if (comm->bCartesianPP)
5656 if (cr->npmenodes == 0)
5658 /* The PP communicator is also
5659 * the communicator for this simulation
5661 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5663 cr->nodeid = dd->rank;
5665 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
5667 /* We need to make an index to go from the coordinates
5668 * to the nodeid of this simulation.
5670 snew(comm->ddindex2simnodeid, dd->nnodes);
5671 snew(buf, dd->nnodes);
5672 if (thisRankHasDuty(cr, DUTY_PP))
5674 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5676 /* Communicate the ddindex to simulation nodeid index */
5677 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
5678 cr->mpi_comm_mysim);
5681 /* Determine the master coordinates and rank.
5682 * The DD master should be the same node as the master of this sim.
5684 for (int i = 0; i < dd->nnodes; i++)
5686 if (comm->ddindex2simnodeid[i] == 0)
5688 ddindex2xyz(dd->nc, i, dd->master_ci);
5689 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
5694 fprintf(debug, "The master rank is %d\n", dd->masterrank);
5699 /* No Cartesian communicators */
5700 /* We use the rank in dd->comm->all as DD index */
5701 ddindex2xyz(dd->nc, dd->rank, dd->ci);
5702 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
5704 clear_ivec(dd->master_ci);
5711 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
5712 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5717 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
5718 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5722 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
5726 gmx_domdec_comm_t *comm = dd->comm;
5728 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
5731 snew(comm->ddindex2simnodeid, dd->nnodes);
5732 snew(buf, dd->nnodes);
5733 if (thisRankHasDuty(cr, DUTY_PP))
5735 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5737 /* Communicate the ddindex to simulation nodeid index */
5738 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
5739 cr->mpi_comm_mysim);
5743 GMX_UNUSED_VALUE(dd);
5744 GMX_UNUSED_VALUE(cr);
5748 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
5749 int ncg, int natoms)
5751 gmx_domdec_master_t *ma;
5756 snew(ma->ncg, dd->nnodes);
5757 snew(ma->index, dd->nnodes+1);
5759 snew(ma->nat, dd->nnodes);
5760 snew(ma->ibuf, dd->nnodes*2);
5761 snew(ma->cell_x, DIM);
5762 for (i = 0; i < DIM; i++)
5764 snew(ma->cell_x[i], dd->nc[i]+1);
5767 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
5773 snew(ma->vbuf, natoms);
5779 static void split_communicator(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
5780 DdRankOrder gmx_unused rankOrder,
5781 int gmx_unused reorder)
5783 gmx_domdec_comm_t *comm;
5792 if (comm->bCartesianPP)
5794 for (i = 1; i < DIM; i++)
5796 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
5798 if (bDiv[YY] || bDiv[ZZ])
5800 comm->bCartesianPP_PME = TRUE;
5801 /* If we have 2D PME decomposition, which is always in x+y,
5802 * we stack the PME only nodes in z.
5803 * Otherwise we choose the direction that provides the thinnest slab
5804 * of PME only nodes as this will have the least effect
5805 * on the PP communication.
5806 * But for the PME communication the opposite might be better.
5808 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
5810 dd->nc[YY] > dd->nc[ZZ]))
5812 comm->cartpmedim = ZZ;
5816 comm->cartpmedim = YY;
5818 comm->ntot[comm->cartpmedim]
5819 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
5823 fprintf(fplog, "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
5825 "Will not use a Cartesian communicator for PP <-> PME\n\n");
5829 if (comm->bCartesianPP_PME)
5837 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
5840 for (i = 0; i < DIM; i++)
5844 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
5846 MPI_Comm_rank(comm_cart, &rank);
5847 if (MASTER(cr) && rank != 0)
5849 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
5852 /* With this assigment we loose the link to the original communicator
5853 * which will usually be MPI_COMM_WORLD, unless have multisim.
5855 cr->mpi_comm_mysim = comm_cart;
5856 cr->sim_nodeid = rank;
5858 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
5862 fprintf(fplog, "Cartesian rank %d, coordinates %d %d %d\n\n",
5863 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5866 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
5870 if (cr->npmenodes == 0 ||
5871 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
5873 cr->duty = DUTY_PME;
5876 /* Split the sim communicator into PP and PME only nodes */
5877 MPI_Comm_split(cr->mpi_comm_mysim,
5878 getThisRankDuties(cr),
5879 dd_index(comm->ntot, dd->ci),
5880 &cr->mpi_comm_mygroup);
5887 case DdRankOrder::pp_pme:
5890 fprintf(fplog, "Order of the ranks: PP first, PME last\n");
5893 case DdRankOrder::interleave:
5894 /* Interleave the PP-only and PME-only ranks */
5897 fprintf(fplog, "Interleaving PP and PME ranks\n");
5899 comm->pmenodes = dd_interleaved_pme_ranks(dd);
5901 case DdRankOrder::cartesian:
5904 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(rankOrder));
5907 if (dd_simnode2pmenode(dd, cr, cr->sim_nodeid) == -1)
5909 cr->duty = DUTY_PME;
5916 /* Split the sim communicator into PP and PME only nodes */
5917 MPI_Comm_split(cr->mpi_comm_mysim,
5918 getThisRankDuties(cr),
5920 &cr->mpi_comm_mygroup);
5921 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
5927 fprintf(fplog, "This rank does only %s work.\n\n",
5928 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
5932 /*! \brief Generates the MPI communicators for domain decomposition */
5933 static void make_dd_communicators(FILE *fplog, t_commrec *cr,
5934 gmx_domdec_t *dd, DdRankOrder ddRankOrder)
5936 gmx_domdec_comm_t *comm;
5941 copy_ivec(dd->nc, comm->ntot);
5943 comm->bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
5944 comm->bCartesianPP_PME = FALSE;
5946 /* Reorder the nodes by default. This might change the MPI ranks.
5947 * Real reordering is only supported on very few architectures,
5948 * Blue Gene is one of them.
5950 CartReorder = (getenv("GMX_NO_CART_REORDER") == nullptr);
5952 if (cr->npmenodes > 0)
5954 /* Split the communicator into a PP and PME part */
5955 split_communicator(fplog, cr, dd, ddRankOrder, CartReorder);
5956 if (comm->bCartesianPP_PME)
5958 /* We (possibly) reordered the nodes in split_communicator,
5959 * so it is no longer required in make_pp_communicator.
5961 CartReorder = FALSE;
5966 /* All nodes do PP and PME */
5968 /* We do not require separate communicators */
5969 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
5973 if (thisRankHasDuty(cr, DUTY_PP))
5975 /* Copy or make a new PP communicator */
5976 make_pp_communicator(fplog, dd, cr, CartReorder);
5980 receive_ddindex2simnodeid(dd, cr);
5983 if (!thisRankHasDuty(cr, DUTY_PME))
5985 /* Set up the commnuication to our PME node */
5986 dd->pme_nodeid = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
5987 dd->pme_receive_vir_ener = receive_vir_ener(dd, cr);
5990 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
5991 dd->pme_nodeid, dd->pme_receive_vir_ener);
5996 dd->pme_nodeid = -1;
6001 dd->ma = init_gmx_domdec_master_t(dd,
6003 comm->cgs_gl.index[comm->cgs_gl.nr]);
6007 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6009 real *slb_frac, tot;
6014 if (nc > 1 && size_string != nullptr)
6018 fprintf(fplog, "Using static load balancing for the %s direction\n",
6023 for (i = 0; i < nc; i++)
6026 sscanf(size_string, "%20lf%n", &dbl, &n);
6029 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6038 fprintf(fplog, "Relative cell sizes:");
6040 for (i = 0; i < nc; i++)
6045 fprintf(fplog, " %5.3f", slb_frac[i]);
6050 fprintf(fplog, "\n");
6057 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
6060 gmx_mtop_ilistloop_t iloop;
6064 iloop = gmx_mtop_ilistloop_init(mtop);
6065 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6067 for (ftype = 0; ftype < F_NRE; ftype++)
6069 if ((interaction_function[ftype].flags & IF_BOND) &&
6072 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6080 static int dd_getenv(FILE *fplog, const char *env_var, int def)
6086 val = getenv(env_var);
6089 if (sscanf(val, "%20d", &nst) <= 0)
6095 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6103 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6107 fprintf(stderr, "\n%s\n", warn_string);
6111 fprintf(fplog, "\n%s\n", warn_string);
6115 static void check_dd_restrictions(t_commrec *cr, const gmx_domdec_t *dd,
6116 const t_inputrec *ir, FILE *fplog)
6118 if (ir->ePBC == epbcSCREW &&
6119 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6121 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6124 if (ir->ns_type == ensSIMPLE)
6126 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
6129 if (ir->nstlist == 0)
6131 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6134 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6136 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6140 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6145 r = ddbox->box_size[XX];
6146 for (di = 0; di < dd->ndim; di++)
6149 /* Check using the initial average cell size */
6150 r = std::min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6156 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
6158 static int forceDlbOffOrBail(int cmdlineDlbState,
6159 const std::string &reasonStr,
6163 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
6164 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
6166 if (cmdlineDlbState == edlbsOnUser)
6168 gmx_fatal(FARGS, (dlbNotSupportedErr + reasonStr).c_str());
6170 else if (cmdlineDlbState == edlbsOffCanTurnOn)
6172 dd_warning(cr, fplog, (dlbDisableNote + reasonStr + "\n").c_str());
6174 return edlbsOffForever;
6177 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
6179 * This function parses the parameters of "-dlb" command line option setting
6180 * corresponding state values. Then it checks the consistency of the determined
6181 * state with other run parameters and settings. As a result, the initial state
6182 * may be altered or an error may be thrown if incompatibility of options is detected.
6184 * \param [in] fplog Pointer to mdrun log file.
6185 * \param [in] cr Pointer to MPI communication object.
6186 * \param [in] dlbOption Enum value for the DLB option.
6187 * \param [in] bRecordLoad True if the load balancer is recording load information.
6188 * \param [in] mdrunOptions Options for mdrun.
6189 * \param [in] ir Pointer mdrun to input parameters.
6190 * \returns DLB initial/startup state.
6192 static int determineInitialDlbState(FILE *fplog, t_commrec *cr,
6193 DlbOption dlbOption, gmx_bool bRecordLoad,
6194 const MdrunOptions &mdrunOptions,
6195 const t_inputrec *ir)
6197 int dlbState = edlbsOffCanTurnOn;
6201 case DlbOption::turnOnWhenUseful: dlbState = edlbsOffCanTurnOn; break;
6202 case DlbOption::no: dlbState = edlbsOffUser; break;
6203 case DlbOption::yes: dlbState = edlbsOnUser; break;
6204 default: gmx_incons("Invalid dlbOption enum value");
6207 /* Reruns don't support DLB: bail or override auto mode */
6208 if (mdrunOptions.rerun)
6210 std::string reasonStr = "it is not supported in reruns.";
6211 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
6214 /* Unsupported integrators */
6215 if (!EI_DYNAMICS(ir->eI))
6217 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
6218 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
6221 /* Without cycle counters we can't time work to balance on */
6224 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
6225 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
6228 if (mdrunOptions.reproducible)
6230 std::string reasonStr = "you started a reproducible run.";
6235 case edlbsOffForever:
6236 GMX_RELEASE_ASSERT(false, "edlbsOffForever is not a valid initial state");
6238 case edlbsOffCanTurnOn:
6239 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
6241 case edlbsOnCanTurnOff:
6242 GMX_RELEASE_ASSERT(false, "edlbsOffCanTurnOff is not a valid initial state");
6245 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", cr, fplog);
6248 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", dlbState);
6256 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6261 if (getenv("GMX_DD_ORDER_ZYX") != nullptr)
6263 /* Decomposition order z,y,x */
6266 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6268 for (dim = DIM-1; dim >= 0; dim--)
6270 if (dd->nc[dim] > 1)
6272 dd->dim[dd->ndim++] = dim;
6278 /* Decomposition order x,y,z */
6279 for (dim = 0; dim < DIM; dim++)
6281 if (dd->nc[dim] > 1)
6283 dd->dim[dd->ndim++] = dim;
6289 static gmx_domdec_comm_t *init_dd_comm()
6291 gmx_domdec_comm_t *comm;
6295 snew(comm->cggl_flag, DIM*2);
6296 snew(comm->cgcm_state, DIM*2);
6297 for (i = 0; i < DIM*2; i++)
6299 comm->cggl_flag_nalloc[i] = 0;
6300 comm->cgcm_state_nalloc[i] = 0;
6303 comm->nalloc_int = 0;
6304 comm->buf_int = nullptr;
6306 vec_rvec_init(&comm->vbuf);
6308 comm->n_load_have = 0;
6309 comm->n_load_collect = 0;
6311 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6313 comm->sum_nat[i] = 0;
6317 comm->load_step = 0;
6320 clear_ivec(comm->load_lim);
6324 /* This should be replaced by a unique pointer */
6325 comm->balanceRegion = ddBalanceRegionAllocate();
6330 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
6331 static void set_dd_limits_and_grid(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
6332 const DomdecOptions &options,
6333 const MdrunOptions &mdrunOptions,
6334 const gmx_mtop_t *mtop,
6335 const t_inputrec *ir,
6336 const matrix box, const rvec *xGlobal,
6338 int *npme_x, int *npme_y)
6341 real r_bonded_limit = -1;
6342 const real tenPercentMargin = 1.1;
6343 gmx_domdec_comm_t *comm = dd->comm;
6345 snew(comm->cggl_flag, DIM*2);
6346 snew(comm->cgcm_state, DIM*2);
6348 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6349 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6351 dd->pme_recv_f_alloc = 0;
6352 dd->pme_recv_f_buf = nullptr;
6354 /* Initialize to GPU share count to 0, might change later */
6355 comm->nrank_gpu_shared = 0;
6357 comm->dlbState = determineInitialDlbState(fplog, cr, options.dlbOption, comm->bRecordLoad, mdrunOptions, ir);
6358 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
6359 /* To consider turning DLB on after 2*nstlist steps we need to check
6360 * at partitioning count 3. Thus we need to increase the first count by 2.
6362 comm->ddPartioningCountFirstDlbOff += 2;
6366 fprintf(fplog, "Dynamic load balancing: %s\n",
6367 edlbs_names[comm->dlbState]);
6369 comm->bPMELoadBalDLBLimits = FALSE;
6371 /* Allocate the charge group/atom sorting struct */
6372 snew(comm->sort, 1);
6374 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6376 comm->bInterCGBondeds = ((ncg_mtop(mtop) > mtop->mols.nr) ||
6377 mtop->bIntermolecularInteractions);
6378 if (comm->bInterCGBondeds)
6380 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6384 comm->bInterCGMultiBody = FALSE;
6387 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6388 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6392 /* Set the cut-off to some very large value,
6393 * so we don't need if statements everywhere in the code.
6394 * We use sqrt, since the cut-off is squared in some places.
6396 comm->cutoff = GMX_CUTOFF_INF;
6400 comm->cutoff = ir->rlist;
6402 comm->cutoff_mbody = 0;
6404 comm->cellsize_limit = 0;
6405 comm->bBondComm = FALSE;
6407 /* Atoms should be able to move by up to half the list buffer size (if > 0)
6408 * within nstlist steps. Since boundaries are allowed to displace by half
6409 * a cell size, DD cells should be at least the size of the list buffer.
6411 comm->cellsize_limit = std::max(comm->cellsize_limit,
6412 ir->rlist - std::max(ir->rvdw, ir->rcoulomb));
6414 if (comm->bInterCGBondeds)
6416 if (options.minimumCommunicationRange > 0)
6418 comm->cutoff_mbody = options.minimumCommunicationRange;
6419 if (options.useBondedCommunication)
6421 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6425 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
6427 r_bonded_limit = comm->cutoff_mbody;
6429 else if (ir->bPeriodicMols)
6431 /* Can not easily determine the required cut-off */
6432 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6433 comm->cutoff_mbody = comm->cutoff/2;
6434 r_bonded_limit = comm->cutoff_mbody;
6442 dd_bonded_cg_distance(fplog, mtop, ir, xGlobal, box,
6443 options.checkBondedInteractions,
6446 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6447 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6449 /* We use an initial margin of 10% for the minimum cell size,
6450 * except when we are just below the non-bonded cut-off.
6452 if (options.useBondedCommunication)
6454 if (std::max(r_2b, r_mb) > comm->cutoff)
6456 r_bonded = std::max(r_2b, r_mb);
6457 r_bonded_limit = tenPercentMargin*r_bonded;
6458 comm->bBondComm = TRUE;
6463 r_bonded_limit = std::min(tenPercentMargin*r_bonded, comm->cutoff);
6465 /* We determine cutoff_mbody later */
6469 /* No special bonded communication,
6470 * simply increase the DD cut-off.
6472 r_bonded_limit = tenPercentMargin*std::max(r_2b, r_mb);
6473 comm->cutoff_mbody = r_bonded_limit;
6474 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
6480 "Minimum cell size due to bonded interactions: %.3f nm\n",
6483 comm->cellsize_limit = std::max(comm->cellsize_limit, r_bonded_limit);
6487 if (dd->bInterCGcons && options.constraintCommunicationRange <= 0)
6489 /* There is a cell size limit due to the constraints (P-LINCS) */
6490 rconstr = constr_r_max(fplog, mtop, ir);
6494 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6496 if (rconstr > comm->cellsize_limit)
6498 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6502 else if (options.constraintCommunicationRange > 0 && fplog)
6504 /* Here we do not check for dd->bInterCGcons,
6505 * because one can also set a cell size limit for virtual sites only
6506 * and at this point we don't know yet if there are intercg v-sites.
6509 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6510 options.constraintCommunicationRange);
6511 rconstr = options.constraintCommunicationRange;
6513 comm->cellsize_limit = std::max(comm->cellsize_limit, rconstr);
6515 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6517 if (options.numCells[XX] > 0)
6519 copy_ivec(options.numCells, dd->nc);
6520 set_dd_dim(fplog, dd);
6521 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, xGlobal, ddbox);
6523 if (options.numPmeRanks >= 0)
6525 cr->npmenodes = options.numPmeRanks;
6529 /* When the DD grid is set explicitly and -npme is set to auto,
6530 * don't use PME ranks. We check later if the DD grid is
6531 * compatible with the total number of ranks.
6536 real acs = average_cellsize_min(dd, ddbox);
6537 if (acs < comm->cellsize_limit)
6541 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6543 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
6544 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6545 acs, comm->cellsize_limit);
6550 set_ddbox_cr(cr, nullptr, ir, box, &comm->cgs_gl, xGlobal, ddbox);
6552 /* We need to choose the optimal DD grid and possibly PME nodes */
6554 dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6555 options.numPmeRanks,
6556 !isDlbDisabled(comm),
6558 comm->cellsize_limit, comm->cutoff,
6559 comm->bInterCGBondeds);
6561 if (dd->nc[XX] == 0)
6564 gmx_bool bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6565 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
6566 !bC ? "-rdd" : "-rcon",
6567 comm->dlbState != edlbsOffUser ? " or -dds" : "",
6568 bC ? " or your LINCS settings" : "");
6570 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
6571 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
6573 "Look in the log file for details on the domain decomposition",
6574 cr->nnodes-cr->npmenodes, limit, buf);
6576 set_dd_dim(fplog, dd);
6582 "Domain decomposition grid %d x %d x %d, separate PME ranks %d\n",
6583 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6586 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6587 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6589 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
6590 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
6591 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6593 if (cr->npmenodes > dd->nnodes)
6595 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
6596 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6598 if (cr->npmenodes > 0)
6600 comm->npmenodes = cr->npmenodes;
6604 comm->npmenodes = dd->nnodes;
6607 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
6609 /* The following choices should match those
6610 * in comm_cost_est in domdec_setup.c.
6611 * Note that here the checks have to take into account
6612 * that the decomposition might occur in a different order than xyz
6613 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6614 * in which case they will not match those in comm_cost_est,
6615 * but since that is mainly for testing purposes that's fine.
6617 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6618 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6619 getenv("GMX_PMEONEDD") == nullptr)
6621 comm->npmedecompdim = 2;
6622 comm->npmenodes_x = dd->nc[XX];
6623 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6627 /* In case nc is 1 in both x and y we could still choose to
6628 * decompose pme in y instead of x, but we use x for simplicity.
6630 comm->npmedecompdim = 1;
6631 if (dd->dim[0] == YY)
6633 comm->npmenodes_x = 1;
6634 comm->npmenodes_y = comm->npmenodes;
6638 comm->npmenodes_x = comm->npmenodes;
6639 comm->npmenodes_y = 1;
6644 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6645 comm->npmenodes_x, comm->npmenodes_y, 1);
6650 comm->npmedecompdim = 0;
6651 comm->npmenodes_x = 0;
6652 comm->npmenodes_y = 0;
6655 /* Technically we don't need both of these,
6656 * but it simplifies code not having to recalculate it.
6658 *npme_x = comm->npmenodes_x;
6659 *npme_y = comm->npmenodes_y;
6661 snew(comm->slb_frac, DIM);
6662 if (isDlbDisabled(comm))
6664 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], options.cellSizeX);
6665 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], options.cellSizeY);
6666 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], options.cellSizeZ);
6669 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6671 if (comm->bBondComm || !isDlbDisabled(comm))
6673 /* Set the bonded communication distance to halfway
6674 * the minimum and the maximum,
6675 * since the extra communication cost is nearly zero.
6677 real acs = average_cellsize_min(dd, ddbox);
6678 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6679 if (!isDlbDisabled(comm))
6681 /* Check if this does not limit the scaling */
6682 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
6683 options.dlbScaling*acs);
6685 if (!comm->bBondComm)
6687 /* Without bBondComm do not go beyond the n.b. cut-off */
6688 comm->cutoff_mbody = std::min(comm->cutoff_mbody, comm->cutoff);
6689 if (comm->cellsize_limit >= comm->cutoff)
6691 /* We don't loose a lot of efficieny
6692 * when increasing it to the n.b. cut-off.
6693 * It can even be slightly faster, because we need
6694 * less checks for the communication setup.
6696 comm->cutoff_mbody = comm->cutoff;
6699 /* Check if we did not end up below our original limit */
6700 comm->cutoff_mbody = std::max(comm->cutoff_mbody, r_bonded_limit);
6702 if (comm->cutoff_mbody > comm->cellsize_limit)
6704 comm->cellsize_limit = comm->cutoff_mbody;
6707 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
6712 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
6713 "cellsize limit %f\n",
6714 comm->bBondComm, comm->cellsize_limit);
6719 check_dd_restrictions(cr, dd, ir, fplog);
6723 static void set_dlb_limits(gmx_domdec_t *dd)
6728 for (d = 0; d < dd->ndim; d++)
6730 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
6731 dd->comm->cellsize_min[dd->dim[d]] =
6732 dd->comm->cellsize_min_dlb[dd->dim[d]];
6737 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
6740 gmx_domdec_comm_t *comm;
6747 cellsize_min = comm->cellsize_min[dd->dim[0]];
6748 for (d = 1; d < dd->ndim; d++)
6750 cellsize_min = std::min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
6753 /* Turn off DLB if we're too close to the cell size limit. */
6754 if (cellsize_min < comm->cellsize_limit*1.05)
6756 auto str = gmx::formatString("step %" GMX_PRId64 " Measured %.1f %% performance loss due to load imbalance, "
6757 "but the minimum cell size is smaller than 1.05 times the cell size limit."
6758 "Will no longer try dynamic load balancing.\n", step, dd_force_imb_perf_loss(dd)*100);
6759 dd_warning(cr, fplog, str.c_str());
6761 comm->dlbState = edlbsOffForever;
6766 sprintf(buf, "step %" GMX_PRId64 " Turning on dynamic load balancing, because the performance loss due to load imbalance is %.1f %%.\n", step, dd_force_imb_perf_loss(dd)*100);
6767 dd_warning(cr, fplog, buf);
6768 comm->dlbState = edlbsOnCanTurnOff;
6770 /* Store the non-DLB performance, so we can check if DLB actually
6771 * improves performance.
6773 GMX_RELEASE_ASSERT(comm->cycl_n[ddCyclStep] > 0, "When we turned on DLB, we should have measured cycles");
6774 comm->cyclesPerStepBeforeDLB = comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
6778 /* We can set the required cell size info here,
6779 * so we do not need to communicate this.
6780 * The grid is completely uniform.
6782 for (d = 0; d < dd->ndim; d++)
6786 comm->load[d].sum_m = comm->load[d].sum;
6788 nc = dd->nc[dd->dim[d]];
6789 for (i = 0; i < nc; i++)
6791 comm->root[d]->cell_f[i] = i/(real)nc;
6794 comm->root[d]->cell_f_max0[i] = i /(real)nc;
6795 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
6798 comm->root[d]->cell_f[nc] = 1.0;
6803 static void turn_off_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
6805 gmx_domdec_t *dd = cr->dd;
6808 sprintf(buf, "step %" GMX_PRId64 " Turning off dynamic load balancing, because it is degrading performance.\n", step);
6809 dd_warning(cr, fplog, buf);
6810 dd->comm->dlbState = edlbsOffCanTurnOn;
6811 dd->comm->haveTurnedOffDlb = true;
6812 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
6815 static void turn_off_dlb_forever(FILE *fplog, t_commrec *cr, gmx_int64_t step)
6817 GMX_RELEASE_ASSERT(cr->dd->comm->dlbState == edlbsOffCanTurnOn, "Can only turn off DLB forever when it was in the can-turn-on state");
6819 sprintf(buf, "step %" GMX_PRId64 " Will no longer try dynamic load balancing, as it degraded performance.\n", step);
6820 dd_warning(cr, fplog, buf);
6821 cr->dd->comm->dlbState = edlbsOffForever;
6824 static char *init_bLocalCG(const gmx_mtop_t *mtop)
6829 ncg = ncg_mtop(mtop);
6830 snew(bLocalCG, ncg);
6831 for (cg = 0; cg < ncg; cg++)
6833 bLocalCG[cg] = FALSE;
6839 void dd_init_bondeds(FILE *fplog,
6841 const gmx_mtop_t *mtop,
6842 const gmx_vsite_t *vsite,
6843 const t_inputrec *ir,
6844 gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
6846 gmx_domdec_comm_t *comm;
6848 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
6852 if (comm->bBondComm)
6854 /* Communicate atoms beyond the cut-off for bonded interactions */
6857 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
6859 comm->bLocalCG = init_bLocalCG(mtop);
6863 /* Only communicate atoms based on cut-off */
6864 comm->cglink = nullptr;
6865 comm->bLocalCG = nullptr;
6869 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
6870 const gmx_mtop_t *mtop, const t_inputrec *ir,
6871 gmx_bool bDynLoadBal, real dlb_scale,
6872 const gmx_ddbox_t *ddbox)
6874 gmx_domdec_comm_t *comm;
6880 if (fplog == nullptr)
6889 fprintf(fplog, "The maximum number of communication pulses is:");
6890 for (d = 0; d < dd->ndim; d++)
6892 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
6894 fprintf(fplog, "\n");
6895 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
6896 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
6897 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
6898 for (d = 0; d < DIM; d++)
6902 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
6909 comm->cellsize_min_dlb[d]/
6910 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6912 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
6915 fprintf(fplog, "\n");
6919 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
6920 fprintf(fplog, "The initial number of communication pulses is:");
6921 for (d = 0; d < dd->ndim; d++)
6923 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
6925 fprintf(fplog, "\n");
6926 fprintf(fplog, "The initial domain decomposition cell size is:");
6927 for (d = 0; d < DIM; d++)
6931 fprintf(fplog, " %c %.2f nm",
6932 dim2char(d), dd->comm->cellsize_min[d]);
6935 fprintf(fplog, "\n\n");
6938 gmx_bool bInterCGVsites = count_intercg_vsites(mtop);
6940 if (comm->bInterCGBondeds ||
6942 dd->bInterCGcons || dd->bInterCGsettles)
6944 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
6945 fprintf(fplog, "%40s %-7s %6.3f nm\n",
6946 "non-bonded interactions", "", comm->cutoff);
6950 limit = dd->comm->cellsize_limit;
6954 if (dynamic_dd_box(ddbox, ir))
6956 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
6958 limit = dd->comm->cellsize_min[XX];
6959 for (d = 1; d < DIM; d++)
6961 limit = std::min(limit, dd->comm->cellsize_min[d]);
6965 if (comm->bInterCGBondeds)
6967 fprintf(fplog, "%40s %-7s %6.3f nm\n",
6968 "two-body bonded interactions", "(-rdd)",
6969 std::max(comm->cutoff, comm->cutoff_mbody));
6970 fprintf(fplog, "%40s %-7s %6.3f nm\n",
6971 "multi-body bonded interactions", "(-rdd)",
6972 (comm->bBondComm || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
6976 fprintf(fplog, "%40s %-7s %6.3f nm\n",
6977 "virtual site constructions", "(-rcon)", limit);
6979 if (dd->bInterCGcons || dd->bInterCGsettles)
6981 sprintf(buf, "atoms separated by up to %d constraints",
6983 fprintf(fplog, "%40s %-7s %6.3f nm\n",
6984 buf, "(-rcon)", limit);
6986 fprintf(fplog, "\n");
6992 static void set_cell_limits_dlb(gmx_domdec_t *dd,
6994 const t_inputrec *ir,
6995 const gmx_ddbox_t *ddbox)
6997 gmx_domdec_comm_t *comm;
6998 int d, dim, npulse, npulse_d_max, npulse_d;
7003 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7005 /* Determine the maximum number of comm. pulses in one dimension */
7007 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
7009 /* Determine the maximum required number of grid pulses */
7010 if (comm->cellsize_limit >= comm->cutoff)
7012 /* Only a single pulse is required */
7015 else if (!bNoCutOff && comm->cellsize_limit > 0)
7017 /* We round down slightly here to avoid overhead due to the latency
7018 * of extra communication calls when the cut-off
7019 * would be only slightly longer than the cell size.
7020 * Later cellsize_limit is redetermined,
7021 * so we can not miss interactions due to this rounding.
7023 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7027 /* There is no cell size limit */
7028 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7031 if (!bNoCutOff && npulse > 1)
7033 /* See if we can do with less pulses, based on dlb_scale */
7035 for (d = 0; d < dd->ndim; d++)
7038 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7039 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7040 npulse_d_max = std::max(npulse_d_max, npulse_d);
7042 npulse = std::min(npulse, npulse_d_max);
7045 /* This env var can override npulse */
7046 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
7053 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7054 for (d = 0; d < dd->ndim; d++)
7056 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
7057 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7058 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7059 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
7060 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7062 comm->bVacDLBNoLimit = FALSE;
7066 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7067 if (!comm->bVacDLBNoLimit)
7069 comm->cellsize_limit = std::max(comm->cellsize_limit,
7070 comm->cutoff/comm->maxpulse);
7072 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
7073 /* Set the minimum cell size for each DD dimension */
7074 for (d = 0; d < dd->ndim; d++)
7076 if (comm->bVacDLBNoLimit ||
7077 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7079 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7083 comm->cellsize_min_dlb[dd->dim[d]] =
7084 comm->cutoff/comm->cd[d].np_dlb;
7087 if (comm->cutoff_mbody <= 0)
7089 comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
7097 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
7099 /* If each molecule is a single charge group
7100 * or we use domain decomposition for each periodic dimension,
7101 * we do not need to take pbc into account for the bonded interactions.
7103 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7106 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7109 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
7110 static void set_ddgrid_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7111 const gmx_mtop_t *mtop, const t_inputrec *ir,
7112 const gmx_ddbox_t *ddbox)
7114 gmx_domdec_comm_t *comm;
7120 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
7122 init_ddpme(dd, &comm->ddpme[0], 0);
7123 if (comm->npmedecompdim >= 2)
7125 init_ddpme(dd, &comm->ddpme[1], 1);
7130 comm->npmenodes = 0;
7131 if (dd->pme_nodeid >= 0)
7133 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
7134 "Can not have separate PME ranks without PME electrostatics");
7140 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7142 if (!isDlbDisabled(comm))
7144 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7147 print_dd_settings(fplog, dd, mtop, ir, isDlbOn(comm), dlb_scale, ddbox);
7148 if (comm->dlbState == edlbsOffCanTurnOn)
7152 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7154 print_dd_settings(fplog, dd, mtop, ir, TRUE, dlb_scale, ddbox);
7157 if (ir->ePBC == epbcNONE)
7159 vol_frac = 1 - 1/(double)dd->nnodes;
7164 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7168 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7170 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7172 dd->ga2la = ga2la_init(natoms_tot, static_cast<int>(vol_frac*natoms_tot));
7175 /*! \brief Set some important DD parameters that can be modified by env.vars */
7176 static void set_dd_envvar_options(FILE *fplog, gmx_domdec_t *dd, int rank_mysim)
7178 gmx_domdec_comm_t *comm = dd->comm;
7180 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
7181 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
7182 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
7183 int recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
7184 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
7185 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
7186 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
7188 if (dd->bSendRecv2 && fplog)
7190 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
7197 fprintf(fplog, "Will load balance based on FLOP count\n");
7199 if (comm->eFlop > 1)
7201 srand(1 + rank_mysim);
7203 comm->bRecordLoad = TRUE;
7207 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
7211 DomdecOptions::DomdecOptions() :
7212 checkBondedInteractions(TRUE),
7213 useBondedCommunication(TRUE),
7215 rankOrder(DdRankOrder::pp_pme),
7216 minimumCommunicationRange(0),
7217 constraintCommunicationRange(0),
7218 dlbOption(DlbOption::turnOnWhenUseful),
7224 clear_ivec(numCells);
7227 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
7228 const DomdecOptions &options,
7229 const MdrunOptions &mdrunOptions,
7230 const gmx_mtop_t *mtop,
7231 const t_inputrec *ir,
7233 const rvec *xGlobal,
7235 int *npme_x, int *npme_y)
7242 "\nInitializing Domain Decomposition on %d ranks\n", cr->nnodes);
7247 dd->comm = init_dd_comm();
7249 set_dd_envvar_options(fplog, dd, cr->nodeid);
7251 set_dd_limits_and_grid(fplog, cr, dd, options, mdrunOptions,
7257 make_dd_communicators(fplog, cr, dd, options.rankOrder);
7259 if (thisRankHasDuty(cr, DUTY_PP))
7261 set_ddgrid_parameters(fplog, dd, options.dlbScaling, mtop, ir, ddbox);
7263 setup_neighbor_relations(dd);
7266 /* Set overallocation to avoid frequent reallocation of arrays */
7267 set_over_alloc_dd(TRUE);
7269 /* Initialize DD paritioning counters */
7270 dd->comm->partition_step = INT_MIN;
7273 /* We currently don't know the number of threads yet, we set this later */
7276 clear_dd_cycle_counts(dd);
7281 static gmx_bool test_dd_cutoff(t_commrec *cr,
7282 t_state *state, const t_inputrec *ir,
7293 set_ddbox(dd, FALSE, cr, ir, state->box,
7294 TRUE, &dd->comm->cgs_gl, as_rvec_array(state->x.data()), &ddbox);
7298 for (d = 0; d < dd->ndim; d++)
7302 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7303 if (dynamic_dd_box(&ddbox, ir))
7305 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7308 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7310 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
7312 if (np > dd->comm->cd[d].np_dlb)
7317 /* If a current local cell size is smaller than the requested
7318 * cut-off, we could still fix it, but this gets very complicated.
7319 * Without fixing here, we might actually need more checks.
7321 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7328 if (!isDlbDisabled(dd->comm))
7330 /* If DLB is not active yet, we don't need to check the grid jumps.
7331 * Actually we shouldn't, because then the grid jump data is not set.
7333 if (isDlbOn(dd->comm) &&
7334 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7339 gmx_sumi(1, &LocallyLimited, cr);
7341 if (LocallyLimited > 0)
7350 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, const t_inputrec *ir,
7353 gmx_bool bCutoffAllowed;
7355 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7359 cr->dd->comm->cutoff = cutoff_req;
7362 return bCutoffAllowed;
7365 void set_dd_dlb_max_cutoff(t_commrec *cr, real cutoff)
7367 gmx_domdec_comm_t *comm;
7369 comm = cr->dd->comm;
7371 /* Turn on the DLB limiting (might have been on already) */
7372 comm->bPMELoadBalDLBLimits = TRUE;
7374 /* Change the cut-off limit */
7375 comm->PMELoadBal_max_cutoff = cutoff;
7379 fprintf(debug, "PME load balancing set a limit to the DLB staggering such that a %f cut-off will continue to fit\n",
7380 comm->PMELoadBal_max_cutoff);
7384 /* Sets whether we should later check the load imbalance data, so that
7385 * we can trigger dynamic load balancing if enough imbalance has
7388 * Used after PME load balancing unlocks DLB, so that the check
7389 * whether DLB will be useful can happen immediately.
7391 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue)
7393 if (dd->comm->dlbState == edlbsOffCanTurnOn)
7395 dd->comm->bCheckWhetherToTurnDlbOn = bValue;
7399 /* Store the DD partitioning count, so we can ignore cycle counts
7400 * over the next nstlist steps, which are often slower.
7402 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
7407 /* Returns if we should check whether there has been enough load
7408 * imbalance to trigger dynamic load balancing.
7410 static gmx_bool dd_dlb_get_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd)
7412 if (dd->comm->dlbState != edlbsOffCanTurnOn)
7417 if (dd->ddp_count <= dd->comm->ddPartioningCountFirstDlbOff)
7419 /* We ignore the first nstlist steps at the start of the run
7420 * or after PME load balancing or after turning DLB off, since
7421 * these often have extra allocation or cache miss overhead.
7426 if (dd->comm->cycl_n[ddCyclStep] == 0)
7428 /* We can have zero timed steps when dd_partition_system is called
7429 * more than once at the same step, e.g. with replica exchange.
7430 * Turning on DLB would trigger an assertion failure later, but is
7431 * also useless right after exchanging replicas.
7436 /* We should check whether we should use DLB directly after
7438 if (dd->comm->bCheckWhetherToTurnDlbOn)
7440 /* This flag was set when the PME load-balancing routines
7441 unlocked DLB, and should now be cleared. */
7442 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, FALSE);
7445 /* We check whether we should use DLB every c_checkTurnDlbOnInterval
7446 * partitionings (we do not do this every partioning, so that we
7447 * avoid excessive communication). */
7448 if (dd->comm->n_load_have % c_checkTurnDlbOnInterval == c_checkTurnDlbOnInterval - 1)
7456 gmx_bool dd_dlb_is_on(const gmx_domdec_t *dd)
7458 return isDlbOn(dd->comm);
7461 gmx_bool dd_dlb_is_locked(const gmx_domdec_t *dd)
7463 return (dd->comm->dlbState == edlbsOffTemporarilyLocked);
7466 void dd_dlb_lock(gmx_domdec_t *dd)
7468 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
7469 if (dd->comm->dlbState == edlbsOffCanTurnOn)
7471 dd->comm->dlbState = edlbsOffTemporarilyLocked;
7475 void dd_dlb_unlock(gmx_domdec_t *dd)
7477 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
7478 if (dd->comm->dlbState == edlbsOffTemporarilyLocked)
7480 dd->comm->dlbState = edlbsOffCanTurnOn;
7481 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
7485 static void merge_cg_buffers(int ncell,
7486 gmx_domdec_comm_dim_t *cd, int pulse,
7488 int *index_gl, int *recv_i,
7489 rvec *cg_cm, rvec *recv_vr,
7491 cginfo_mb_t *cginfo_mb, int *cginfo)
7493 gmx_domdec_ind_t *ind, *ind_p;
7494 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7495 int shift, shift_at;
7497 ind = &cd->ind[pulse];
7499 /* First correct the already stored data */
7500 shift = ind->nrecv[ncell];
7501 for (cell = ncell-1; cell >= 0; cell--)
7503 shift -= ind->nrecv[cell];
7506 /* Move the cg's present from previous grid pulses */
7507 cg0 = ncg_cell[ncell+cell];
7508 cg1 = ncg_cell[ncell+cell+1];
7509 cgindex[cg1+shift] = cgindex[cg1];
7510 for (cg = cg1-1; cg >= cg0; cg--)
7512 index_gl[cg+shift] = index_gl[cg];
7513 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7514 cgindex[cg+shift] = cgindex[cg];
7515 cginfo[cg+shift] = cginfo[cg];
7517 /* Correct the already stored send indices for the shift */
7518 for (p = 1; p <= pulse; p++)
7520 ind_p = &cd->ind[p];
7522 for (c = 0; c < cell; c++)
7524 cg0 += ind_p->nsend[c];
7526 cg1 = cg0 + ind_p->nsend[cell];
7527 for (cg = cg0; cg < cg1; cg++)
7529 ind_p->index[cg] += shift;
7535 /* Merge in the communicated buffers */
7539 for (cell = 0; cell < ncell; cell++)
7541 cg1 = ncg_cell[ncell+cell+1] + shift;
7544 /* Correct the old cg indices */
7545 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7547 cgindex[cg+1] += shift_at;
7550 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7552 /* Copy this charge group from the buffer */
7553 index_gl[cg1] = recv_i[cg0];
7554 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7555 /* Add it to the cgindex */
7556 cg_gl = index_gl[cg1];
7557 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7558 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7559 cgindex[cg1+1] = cgindex[cg1] + nat;
7564 shift += ind->nrecv[cell];
7565 ncg_cell[ncell+cell+1] = cg1;
7569 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7570 int nzone, int cg0, const int *cgindex)
7574 /* Store the atom block boundaries for easy copying of communication buffers
7577 for (zone = 0; zone < nzone; zone++)
7579 for (p = 0; p < cd->np; p++)
7581 cd->ind[p].cell2at0[zone] = cgindex[cg];
7582 cg += cd->ind[p].nrecv[zone];
7583 cd->ind[p].cell2at1[zone] = cgindex[cg];
7588 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7594 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7596 if (!bLocalCG[link->a[i]])
7605 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7607 real c[DIM][4]; /* the corners for the non-bonded communication */
7608 real cr0; /* corner for rounding */
7609 real cr1[4]; /* corners for rounding */
7610 real bc[DIM]; /* corners for bounded communication */
7611 real bcr1; /* corner for rounding for bonded communication */
7614 /* Determine the corners of the domain(s) we are communicating with */
7616 set_dd_corners(const gmx_domdec_t *dd,
7617 int dim0, int dim1, int dim2,
7621 const gmx_domdec_comm_t *comm;
7622 const gmx_domdec_zones_t *zones;
7627 zones = &comm->zones;
7629 /* Keep the compiler happy */
7633 /* The first dimension is equal for all cells */
7634 c->c[0][0] = comm->cell_x0[dim0];
7637 c->bc[0] = c->c[0][0];
7642 /* This cell row is only seen from the first row */
7643 c->c[1][0] = comm->cell_x0[dim1];
7644 /* All rows can see this row */
7645 c->c[1][1] = comm->cell_x0[dim1];
7646 if (isDlbOn(dd->comm))
7648 c->c[1][1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7651 /* For the multi-body distance we need the maximum */
7652 c->bc[1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7655 /* Set the upper-right corner for rounding */
7656 c->cr0 = comm->cell_x1[dim0];
7661 for (j = 0; j < 4; j++)
7663 c->c[2][j] = comm->cell_x0[dim2];
7665 if (isDlbOn(dd->comm))
7667 /* Use the maximum of the i-cells that see a j-cell */
7668 for (i = 0; i < zones->nizone; i++)
7670 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7675 std::max(c->c[2][j-4],
7676 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7682 /* For the multi-body distance we need the maximum */
7683 c->bc[2] = comm->cell_x0[dim2];
7684 for (i = 0; i < 2; i++)
7686 for (j = 0; j < 2; j++)
7688 c->bc[2] = std::max(c->bc[2], comm->zone_d2[i][j].p1_0);
7694 /* Set the upper-right corner for rounding */
7695 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7696 * Only cell (0,0,0) can see cell 7 (1,1,1)
7698 c->cr1[0] = comm->cell_x1[dim1];
7699 c->cr1[3] = comm->cell_x1[dim1];
7700 if (isDlbOn(dd->comm))
7702 c->cr1[0] = std::max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7705 /* For the multi-body distance we need the maximum */
7706 c->bcr1 = std::max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7713 /* Determine which cg's we need to send in this pulse from this zone */
7715 get_zone_pulse_cgs(gmx_domdec_t *dd,
7716 int zonei, int zone,
7718 const int *index_gl,
7720 int dim, int dim_ind,
7721 int dim0, int dim1, int dim2,
7722 real r_comm2, real r_bcomm2,
7726 real skew_fac2_d, real skew_fac_01,
7727 rvec *v_d, rvec *v_0, rvec *v_1,
7728 const dd_corners_t *c,
7730 gmx_bool bDistBonded,
7736 gmx_domdec_ind_t *ind,
7737 int **ibuf, int *ibuf_nalloc,
7743 gmx_domdec_comm_t *comm;
7745 gmx_bool bDistMB_pulse;
7747 real r2, rb2, r, tric_sh;
7750 int nsend_z, nsend, nat;
7754 bScrew = (dd->bScrewPBC && dim == XX);
7756 bDistMB_pulse = (bDistMB && bDistBonded);
7762 for (cg = cg0; cg < cg1; cg++)
7766 if (tric_dist[dim_ind] == 0)
7768 /* Rectangular direction, easy */
7769 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7776 r = cg_cm[cg][dim] - c->bc[dim_ind];
7782 /* Rounding gives at most a 16% reduction
7783 * in communicated atoms
7785 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7787 r = cg_cm[cg][dim0] - c->cr0;
7788 /* This is the first dimension, so always r >= 0 */
7795 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7797 r = cg_cm[cg][dim1] - c->cr1[zone];
7804 r = cg_cm[cg][dim1] - c->bcr1;
7814 /* Triclinic direction, more complicated */
7817 /* Rounding, conservative as the skew_fac multiplication
7818 * will slightly underestimate the distance.
7820 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7822 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7823 for (i = dim0+1; i < DIM; i++)
7825 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7827 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7830 rb[dim0] = rn[dim0];
7833 /* Take care that the cell planes along dim0 might not
7834 * be orthogonal to those along dim1 and dim2.
7836 for (i = 1; i <= dim_ind; i++)
7839 if (normal[dim0][dimd] > 0)
7841 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7844 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7849 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7851 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7853 for (i = dim1+1; i < DIM; i++)
7855 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7857 rn[dim1] += tric_sh;
7860 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7861 /* Take care of coupling of the distances
7862 * to the planes along dim0 and dim1 through dim2.
7864 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7865 /* Take care that the cell planes along dim1
7866 * might not be orthogonal to that along dim2.
7868 if (normal[dim1][dim2] > 0)
7870 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7876 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7879 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7880 /* Take care of coupling of the distances
7881 * to the planes along dim0 and dim1 through dim2.
7883 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7884 /* Take care that the cell planes along dim1
7885 * might not be orthogonal to that along dim2.
7887 if (normal[dim1][dim2] > 0)
7889 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7894 /* The distance along the communication direction */
7895 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
7897 for (i = dim+1; i < DIM; i++)
7899 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7904 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7905 /* Take care of coupling of the distances
7906 * to the planes along dim0 and dim1 through dim2.
7908 if (dim_ind == 1 && zonei == 1)
7910 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
7916 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
7919 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
7920 /* Take care of coupling of the distances
7921 * to the planes along dim0 and dim1 through dim2.
7923 if (dim_ind == 1 && zonei == 1)
7925 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
7933 ((bDistMB && rb2 < r_bcomm2) ||
7934 (bDist2B && r2 < r_bcomm2)) &&
7936 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
7937 missing_link(comm->cglink, index_gl[cg],
7940 /* Make an index to the local charge groups */
7941 if (nsend+1 > ind->nalloc)
7943 ind->nalloc = over_alloc_large(nsend+1);
7944 srenew(ind->index, ind->nalloc);
7946 if (nsend+1 > *ibuf_nalloc)
7948 *ibuf_nalloc = over_alloc_large(nsend+1);
7949 srenew(*ibuf, *ibuf_nalloc);
7951 ind->index[nsend] = cg;
7952 (*ibuf)[nsend] = index_gl[cg];
7954 vec_rvec_check_alloc(vbuf, nsend+1);
7956 if (dd->ci[dim] == 0)
7958 /* Correct cg_cm for pbc */
7959 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
7962 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
7963 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
7968 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
7971 nat += cgindex[cg+1] - cgindex[cg];
7977 *nsend_z_ptr = nsend_z;
7980 static void setup_dd_communication(gmx_domdec_t *dd,
7981 matrix box, gmx_ddbox_t *ddbox,
7983 t_state *state, PaddedRVecVector *f)
7985 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
7986 int nzone, nzone_send, zone, zonei, cg0, cg1;
7987 int c, i, cg, cg_gl, nrcg;
7988 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
7989 gmx_domdec_comm_t *comm;
7990 gmx_domdec_zones_t *zones;
7991 gmx_domdec_comm_dim_t *cd;
7992 gmx_domdec_ind_t *ind;
7993 cginfo_mb_t *cginfo_mb;
7994 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
7995 real r_comm2, r_bcomm2;
7996 dd_corners_t corners;
7998 rvec *cg_cm, *normal, *v_d, *v_0 = nullptr, *v_1 = nullptr, *recv_vr;
7999 real skew_fac2_d, skew_fac_01;
8006 fprintf(debug, "Setting up DD communication\n");
8013 /* Initialize the thread data.
8014 * This can not be done in init_domain_decomposition,
8015 * as the numbers of threads is determined later.
8017 comm->nth = gmx_omp_nthreads_get(emntDomdec);
8020 snew(comm->dth, comm->nth);
8024 switch (fr->cutoff_scheme)
8030 cg_cm = as_rvec_array(state->x.data());
8033 gmx_incons("unimplemented");
8037 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8039 /* Check if we need to use triclinic distances */
8040 tric_dist[dim_ind] = 0;
8041 for (i = 0; i <= dim_ind; i++)
8043 if (ddbox->tric_dir[dd->dim[i]])
8045 tric_dist[dim_ind] = 1;
8050 bBondComm = comm->bBondComm;
8052 /* Do we need to determine extra distances for multi-body bondeds? */
8053 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
8055 /* Do we need to determine extra distances for only two-body bondeds? */
8056 bDist2B = (bBondComm && !bDistMB);
8058 r_comm2 = gmx::square(comm->cutoff);
8059 r_bcomm2 = gmx::square(comm->cutoff_mbody);
8063 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, std::sqrt(r_bcomm2));
8066 zones = &comm->zones;
8069 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8070 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8072 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8074 /* Triclinic stuff */
8075 normal = ddbox->normal;
8079 v_0 = ddbox->v[dim0];
8080 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8082 /* Determine the coupling coefficient for the distances
8083 * to the cell planes along dim0 and dim1 through dim2.
8084 * This is required for correct rounding.
8087 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8090 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8096 v_1 = ddbox->v[dim1];
8099 zone_cg_range = zones->cg_range;
8100 index_gl = dd->index_gl;
8101 cgindex = dd->cgindex;
8102 cginfo_mb = fr->cginfo_mb;
8104 zone_cg_range[0] = 0;
8105 zone_cg_range[1] = dd->ncg_home;
8106 comm->zone_ncg1[0] = dd->ncg_home;
8107 pos_cg = dd->ncg_home;
8109 nat_tot = dd->nat_home;
8111 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8113 dim = dd->dim[dim_ind];
8114 cd = &comm->cd[dim_ind];
8116 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8118 /* No pbc in this dimension, the first node should not comm. */
8126 v_d = ddbox->v[dim];
8127 skew_fac2_d = gmx::square(ddbox->skew_fac[dim]);
8129 cd->bInPlace = TRUE;
8130 for (p = 0; p < cd->np; p++)
8132 /* Only atoms communicated in the first pulse are used
8133 * for multi-body bonded interactions or for bBondComm.
8135 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8140 for (zone = 0; zone < nzone_send; zone++)
8142 if (tric_dist[dim_ind] && dim_ind > 0)
8144 /* Determine slightly more optimized skew_fac's
8146 * This reduces the number of communicated atoms
8147 * by about 10% for 3D DD of rhombic dodecahedra.
8149 for (dimd = 0; dimd < dim; dimd++)
8151 sf2_round[dimd] = 1;
8152 if (ddbox->tric_dir[dimd])
8154 for (i = dd->dim[dimd]+1; i < DIM; i++)
8156 /* If we are shifted in dimension i
8157 * and the cell plane is tilted forward
8158 * in dimension i, skip this coupling.
8160 if (!(zones->shift[nzone+zone][i] &&
8161 ddbox->v[dimd][i][dimd] >= 0))
8164 gmx::square(ddbox->v[dimd][i][dimd]);
8167 sf2_round[dimd] = 1/sf2_round[dimd];
8172 zonei = zone_perm[dim_ind][zone];
8175 /* Here we permutate the zones to obtain a convenient order
8176 * for neighbor searching
8178 cg0 = zone_cg_range[zonei];
8179 cg1 = zone_cg_range[zonei+1];
8183 /* Look only at the cg's received in the previous grid pulse
8185 cg1 = zone_cg_range[nzone+zone+1];
8186 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8189 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8190 for (th = 0; th < comm->nth; th++)
8194 gmx_domdec_ind_t *ind_p;
8195 int **ibuf_p, *ibuf_nalloc_p;
8197 int *nsend_p, *nat_p;
8203 /* Thread 0 writes in the comm buffers */
8205 ibuf_p = &comm->buf_int;
8206 ibuf_nalloc_p = &comm->nalloc_int;
8207 vbuf_p = &comm->vbuf;
8210 nsend_zone_p = &ind->nsend[zone];
8214 /* Other threads write into temp buffers */
8215 ind_p = &comm->dth[th].ind;
8216 ibuf_p = &comm->dth[th].ibuf;
8217 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8218 vbuf_p = &comm->dth[th].vbuf;
8219 nsend_p = &comm->dth[th].nsend;
8220 nat_p = &comm->dth[th].nat;
8221 nsend_zone_p = &comm->dth[th].nsend_zone;
8223 comm->dth[th].nsend = 0;
8224 comm->dth[th].nat = 0;
8225 comm->dth[th].nsend_zone = 0;
8235 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8236 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8239 /* Get the cg's for this pulse in this zone */
8240 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8242 dim, dim_ind, dim0, dim1, dim2,
8245 normal, skew_fac2_d, skew_fac_01,
8246 v_d, v_0, v_1, &corners, sf2_round,
8247 bDistBonded, bBondComm,
8251 ibuf_p, ibuf_nalloc_p,
8256 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
8259 /* Append data of threads>=1 to the communication buffers */
8260 for (th = 1; th < comm->nth; th++)
8262 dd_comm_setup_work_t *dth;
8265 dth = &comm->dth[th];
8267 ns1 = nsend + dth->nsend_zone;
8268 if (ns1 > ind->nalloc)
8270 ind->nalloc = over_alloc_dd(ns1);
8271 srenew(ind->index, ind->nalloc);
8273 if (ns1 > comm->nalloc_int)
8275 comm->nalloc_int = over_alloc_dd(ns1);
8276 srenew(comm->buf_int, comm->nalloc_int);
8278 if (ns1 > comm->vbuf.nalloc)
8280 comm->vbuf.nalloc = over_alloc_dd(ns1);
8281 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8284 for (i = 0; i < dth->nsend_zone; i++)
8286 ind->index[nsend] = dth->ind.index[i];
8287 comm->buf_int[nsend] = dth->ibuf[i];
8288 copy_rvec(dth->vbuf.v[i],
8289 comm->vbuf.v[nsend]);
8293 ind->nsend[zone] += dth->nsend_zone;
8296 /* Clear the counts in case we do not have pbc */
8297 for (zone = nzone_send; zone < nzone; zone++)
8299 ind->nsend[zone] = 0;
8301 ind->nsend[nzone] = nsend;
8302 ind->nsend[nzone+1] = nat;
8303 /* Communicate the number of cg's and atoms to receive */
8304 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8305 ind->nsend, nzone+2,
8306 ind->nrecv, nzone+2);
8308 /* The rvec buffer is also required for atom buffers of size nsend
8309 * in dd_move_x and dd_move_f.
8311 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8315 /* We can receive in place if only the last zone is not empty */
8316 for (zone = 0; zone < nzone-1; zone++)
8318 if (ind->nrecv[zone] > 0)
8320 cd->bInPlace = FALSE;
8325 /* The int buffer is only required here for the cg indices */
8326 if (ind->nrecv[nzone] > comm->nalloc_int2)
8328 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8329 srenew(comm->buf_int2, comm->nalloc_int2);
8331 /* The rvec buffer is also required for atom buffers
8332 * of size nrecv in dd_move_x and dd_move_f.
8334 i = std::max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8335 vec_rvec_check_alloc(&comm->vbuf2, i);
8339 /* Make space for the global cg indices */
8340 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8341 || dd->cg_nalloc == 0)
8343 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8344 srenew(index_gl, dd->cg_nalloc);
8345 srenew(cgindex, dd->cg_nalloc+1);
8347 /* Communicate the global cg indices */
8350 recv_i = index_gl + pos_cg;
8354 recv_i = comm->buf_int2;
8356 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8357 comm->buf_int, nsend,
8358 recv_i, ind->nrecv[nzone]);
8360 /* Make space for cg_cm */
8361 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8362 if (fr->cutoff_scheme == ecutsGROUP)
8368 cg_cm = as_rvec_array(state->x.data());
8370 /* Communicate cg_cm */
8373 recv_vr = cg_cm + pos_cg;
8377 recv_vr = comm->vbuf2.v;
8379 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8380 comm->vbuf.v, nsend,
8381 recv_vr, ind->nrecv[nzone]);
8383 /* Make the charge group index */
8386 zone = (p == 0 ? 0 : nzone - 1);
8387 while (zone < nzone)
8389 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8391 cg_gl = index_gl[pos_cg];
8392 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8393 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8394 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8397 /* Update the charge group presence,
8398 * so we can use it in the next pass of the loop.
8400 comm->bLocalCG[cg_gl] = TRUE;
8406 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8409 zone_cg_range[nzone+zone] = pos_cg;
8414 /* This part of the code is never executed with bBondComm. */
8415 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8416 index_gl, recv_i, cg_cm, recv_vr,
8417 cgindex, fr->cginfo_mb, fr->cginfo);
8418 pos_cg += ind->nrecv[nzone];
8420 nat_tot += ind->nrecv[nzone+1];
8424 /* Store the atom block for easy copying of communication buffers */
8425 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8429 dd->index_gl = index_gl;
8430 dd->cgindex = cgindex;
8432 dd->ncg_tot = zone_cg_range[zones->n];
8433 dd->nat_tot = nat_tot;
8434 comm->nat[ddnatHOME] = dd->nat_home;
8435 for (i = ddnatZONE; i < ddnatNR; i++)
8437 comm->nat[i] = dd->nat_tot;
8442 /* We don't need to update cginfo, since that was alrady done above.
8443 * So we pass NULL for the forcerec.
8445 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8446 nullptr, comm->bLocalCG);
8451 fprintf(debug, "Finished setting up DD communication, zones:");
8452 for (c = 0; c < zones->n; c++)
8454 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8456 fprintf(debug, "\n");
8460 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8464 for (c = 0; c < zones->nizone; c++)
8466 zones->izone[c].cg1 = zones->cg_range[c+1];
8467 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8468 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8472 /* \brief Set zone dimensions for zones \p zone_start to \p zone_end-1
8474 * Also sets the atom density for the home zone when \p zone_start=0.
8475 * For this \p numMovedChargeGroupsInHomeZone needs to be passed to tell
8476 * how many charge groups will move but are still part of the current range.
8477 * \todo When converting domdec to use proper classes, all these variables
8478 * should be private and a method should return the correct count
8479 * depending on an internal state.
8481 * \param[in,out] dd The domain decomposition struct
8482 * \param[in] box The box
8483 * \param[in] ddbox The domain decomposition box struct
8484 * \param[in] zone_start The start of the zone range to set sizes for
8485 * \param[in] zone_end The end of the zone range to set sizes for
8486 * \param[in] numMovedChargeGroupsInHomeZone The number of charge groups in the home zone that should moved but are still present in dd->comm->zones.cg_range
8488 static void set_zones_size(gmx_domdec_t *dd,
8489 matrix box, const gmx_ddbox_t *ddbox,
8490 int zone_start, int zone_end,
8491 int numMovedChargeGroupsInHomeZone)
8493 gmx_domdec_comm_t *comm;
8494 gmx_domdec_zones_t *zones;
8503 zones = &comm->zones;
8505 /* Do we need to determine extra distances for multi-body bondeds? */
8506 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
8508 for (z = zone_start; z < zone_end; z++)
8510 /* Copy cell limits to zone limits.
8511 * Valid for non-DD dims and non-shifted dims.
8513 copy_rvec(comm->cell_x0, zones->size[z].x0);
8514 copy_rvec(comm->cell_x1, zones->size[z].x1);
8517 for (d = 0; d < dd->ndim; d++)
8521 for (z = 0; z < zones->n; z++)
8523 /* With a staggered grid we have different sizes
8524 * for non-shifted dimensions.
8526 if (isDlbOn(dd->comm) && zones->shift[z][dim] == 0)
8530 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8531 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8535 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8536 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8542 rcmbs = comm->cutoff_mbody;
8543 if (ddbox->tric_dir[dim])
8545 rcs /= ddbox->skew_fac[dim];
8546 rcmbs /= ddbox->skew_fac[dim];
8549 /* Set the lower limit for the shifted zone dimensions */
8550 for (z = zone_start; z < zone_end; z++)
8552 if (zones->shift[z][dim] > 0)
8555 if (!isDlbOn(dd->comm) || d == 0)
8557 zones->size[z].x0[dim] = comm->cell_x1[dim];
8558 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8562 /* Here we take the lower limit of the zone from
8563 * the lowest domain of the zone below.
8567 zones->size[z].x0[dim] =
8568 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8574 zones->size[z].x0[dim] =
8575 zones->size[zone_perm[2][z-4]].x0[dim];
8579 zones->size[z].x0[dim] =
8580 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8583 /* A temporary limit, is updated below */
8584 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8588 for (zi = 0; zi < zones->nizone; zi++)
8590 if (zones->shift[zi][dim] == 0)
8592 /* This takes the whole zone into account.
8593 * With multiple pulses this will lead
8594 * to a larger zone then strictly necessary.
8596 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
8597 zones->size[zi].x1[dim]+rcmbs);
8605 /* Loop over the i-zones to set the upper limit of each
8608 for (zi = 0; zi < zones->nizone; zi++)
8610 if (zones->shift[zi][dim] == 0)
8612 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8614 if (zones->shift[z][dim] > 0)
8616 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
8617 zones->size[zi].x1[dim]+rcs);
8624 for (z = zone_start; z < zone_end; z++)
8626 /* Initialization only required to keep the compiler happy */
8627 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8630 /* To determine the bounding box for a zone we need to find
8631 * the extreme corners of 4, 2 or 1 corners.
8633 nc = 1 << (ddbox->nboundeddim - 1);
8635 for (c = 0; c < nc; c++)
8637 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8641 corner[YY] = zones->size[z].x0[YY];
8645 corner[YY] = zones->size[z].x1[YY];
8649 corner[ZZ] = zones->size[z].x0[ZZ];
8653 corner[ZZ] = zones->size[z].x1[ZZ];
8655 if (dd->ndim == 1 && dd->dim[0] < ZZ && ZZ < dd->npbcdim &&
8656 box[ZZ][1 - dd->dim[0]] != 0)
8658 /* With 1D domain decomposition the cg's are not in
8659 * the triclinic box, but triclinic x-y and rectangular y/x-z.
8660 * Shift the corner of the z-vector back to along the box
8661 * vector of dimension d, so it will later end up at 0 along d.
8662 * This can affect the location of this corner along dd->dim[0]
8663 * through the matrix operation below if box[d][dd->dim[0]]!=0.
8665 int d = 1 - dd->dim[0];
8667 corner[d] -= corner[ZZ]*box[ZZ][d]/box[ZZ][ZZ];
8669 /* Apply the triclinic couplings */
8670 assert(ddbox->npbcdim <= DIM);
8671 for (i = YY; i < ddbox->npbcdim; i++)
8673 for (j = XX; j < i; j++)
8675 corner[j] += corner[i]*box[i][j]/box[i][i];
8680 copy_rvec(corner, corner_min);
8681 copy_rvec(corner, corner_max);
8685 for (i = 0; i < DIM; i++)
8687 corner_min[i] = std::min(corner_min[i], corner[i]);
8688 corner_max[i] = std::max(corner_max[i], corner[i]);
8692 /* Copy the extreme cornes without offset along x */
8693 for (i = 0; i < DIM; i++)
8695 zones->size[z].bb_x0[i] = corner_min[i];
8696 zones->size[z].bb_x1[i] = corner_max[i];
8698 /* Add the offset along x */
8699 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8700 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8703 if (zone_start == 0)
8706 for (dim = 0; dim < DIM; dim++)
8708 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8710 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0] - numMovedChargeGroupsInHomeZone)/vol;
8715 for (z = zone_start; z < zone_end; z++)
8717 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8719 zones->size[z].x0[XX], zones->size[z].x1[XX],
8720 zones->size[z].x0[YY], zones->size[z].x1[YY],
8721 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8722 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8724 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8725 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8726 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8731 static int comp_cgsort(const void *a, const void *b)
8735 gmx_cgsort_t *cga, *cgb;
8736 cga = (gmx_cgsort_t *)a;
8737 cgb = (gmx_cgsort_t *)b;
8739 comp = cga->nsc - cgb->nsc;
8742 comp = cga->ind_gl - cgb->ind_gl;
8748 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8753 /* Order the data */
8754 for (i = 0; i < n; i++)
8756 buf[i] = a[sort[i].ind];
8759 /* Copy back to the original array */
8760 for (i = 0; i < n; i++)
8766 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8771 /* Order the data */
8772 for (i = 0; i < n; i++)
8774 copy_rvec(v[sort[i].ind], buf[i]);
8777 /* Copy back to the original array */
8778 for (i = 0; i < n; i++)
8780 copy_rvec(buf[i], v[i]);
8784 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8787 int a, atot, cg, cg0, cg1, i;
8789 if (cgindex == nullptr)
8791 /* Avoid the useless loop of the atoms within a cg */
8792 order_vec_cg(ncg, sort, v, buf);
8797 /* Order the data */
8799 for (cg = 0; cg < ncg; cg++)
8801 cg0 = cgindex[sort[cg].ind];
8802 cg1 = cgindex[sort[cg].ind+1];
8803 for (i = cg0; i < cg1; i++)
8805 copy_rvec(v[i], buf[a]);
8811 /* Copy back to the original array */
8812 for (a = 0; a < atot; a++)
8814 copy_rvec(buf[a], v[a]);
8818 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8819 int nsort_new, gmx_cgsort_t *sort_new,
8820 gmx_cgsort_t *sort1)
8824 /* The new indices are not very ordered, so we qsort them */
8825 gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8827 /* sort2 is already ordered, so now we can merge the two arrays */
8831 while (i2 < nsort2 || i_new < nsort_new)
8835 sort1[i1++] = sort_new[i_new++];
8837 else if (i_new == nsort_new)
8839 sort1[i1++] = sort2[i2++];
8841 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8842 (sort2[i2].nsc == sort_new[i_new].nsc &&
8843 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8845 sort1[i1++] = sort2[i2++];
8849 sort1[i1++] = sort_new[i_new++];
8854 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8856 gmx_domdec_sort_t *sort;
8857 gmx_cgsort_t *cgsort, *sort_i;
8858 int ncg_new, nsort2, nsort_new, i, *a, moved;
8860 sort = dd->comm->sort;
8862 a = fr->ns->grid->cell_index;
8864 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns->grid->ncells;
8866 if (ncg_home_old >= 0)
8868 /* The charge groups that remained in the same ns grid cell
8869 * are completely ordered. So we can sort efficiently by sorting
8870 * the charge groups that did move into the stationary list.
8875 for (i = 0; i < dd->ncg_home; i++)
8877 /* Check if this cg did not move to another node */
8880 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8882 /* This cg is new on this node or moved ns grid cell */
8883 if (nsort_new >= sort->sort_new_nalloc)
8885 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8886 srenew(sort->sort_new, sort->sort_new_nalloc);
8888 sort_i = &(sort->sort_new[nsort_new++]);
8892 /* This cg did not move */
8893 sort_i = &(sort->sort2[nsort2++]);
8895 /* Sort on the ns grid cell indices
8896 * and the global topology index.
8897 * index_gl is irrelevant with cell ns,
8898 * but we set it here anyhow to avoid a conditional.
8901 sort_i->ind_gl = dd->index_gl[i];
8908 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8911 /* Sort efficiently */
8912 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8917 cgsort = sort->sort;
8919 for (i = 0; i < dd->ncg_home; i++)
8921 /* Sort on the ns grid cell indices
8922 * and the global topology index
8924 cgsort[i].nsc = a[i];
8925 cgsort[i].ind_gl = dd->index_gl[i];
8927 if (cgsort[i].nsc < moved)
8934 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
8936 /* Determine the order of the charge groups using qsort */
8937 gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
8943 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
8949 sort = dd->comm->sort->sort;
8951 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
8954 for (i = 0; i < na; i++)
8958 sort[ncg_new].ind = a[i];
8966 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
8969 gmx_domdec_sort_t *sort;
8970 gmx_cgsort_t *cgsort;
8972 int ncg_new, i, *ibuf, cgsize;
8975 sort = dd->comm->sort;
8977 if (dd->ncg_home > sort->sort_nalloc)
8979 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
8980 srenew(sort->sort, sort->sort_nalloc);
8981 srenew(sort->sort2, sort->sort_nalloc);
8983 cgsort = sort->sort;
8985 switch (fr->cutoff_scheme)
8988 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
8991 ncg_new = dd_sort_order_nbnxn(dd, fr);
8994 gmx_incons("unimplemented");
8998 /* We alloc with the old size, since cgindex is still old */
8999 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9000 vbuf = dd->comm->vbuf.v;
9004 cgindex = dd->cgindex;
9011 /* Remove the charge groups which are no longer at home here */
9012 dd->ncg_home = ncg_new;
9015 fprintf(debug, "Set the new home charge group count to %d\n",
9019 /* Reorder the state */
9020 if (state->flags & (1 << estX))
9022 order_vec_atom(dd->ncg_home, cgindex, cgsort, as_rvec_array(state->x.data()), vbuf);
9024 if (state->flags & (1 << estV))
9026 order_vec_atom(dd->ncg_home, cgindex, cgsort, as_rvec_array(state->v.data()), vbuf);
9028 if (state->flags & (1 << estCGP))
9030 order_vec_atom(dd->ncg_home, cgindex, cgsort, as_rvec_array(state->cg_p.data()), vbuf);
9033 if (fr->cutoff_scheme == ecutsGROUP)
9036 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9039 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9041 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9042 srenew(sort->ibuf, sort->ibuf_nalloc);
9045 /* Reorder the global cg index */
9046 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9047 /* Reorder the cginfo */
9048 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9049 /* Rebuild the local cg index */
9053 for (i = 0; i < dd->ncg_home; i++)
9055 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9056 ibuf[i+1] = ibuf[i] + cgsize;
9058 for (i = 0; i < dd->ncg_home+1; i++)
9060 dd->cgindex[i] = ibuf[i];
9065 for (i = 0; i < dd->ncg_home+1; i++)
9070 /* Set the home atom number */
9071 dd->nat_home = dd->cgindex[dd->ncg_home];
9073 if (fr->cutoff_scheme == ecutsVERLET)
9075 /* The atoms are now exactly in grid order, update the grid order */
9076 nbnxn_set_atomorder(fr->nbv->nbs);
9080 /* Copy the sorted ns cell indices back to the ns grid struct */
9081 for (i = 0; i < dd->ncg_home; i++)
9083 fr->ns->grid->cell_index[i] = cgsort[i].nsc;
9085 fr->ns->grid->nr = dd->ncg_home;
9089 static void add_dd_statistics(gmx_domdec_t *dd)
9091 gmx_domdec_comm_t *comm;
9096 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9098 comm->sum_nat[ddnat-ddnatZONE] +=
9099 comm->nat[ddnat] - comm->nat[ddnat-1];
9104 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9106 gmx_domdec_comm_t *comm;
9111 /* Reset all the statistics and counters for total run counting */
9112 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9114 comm->sum_nat[ddnat-ddnatZONE] = 0;
9118 comm->load_step = 0;
9121 clear_ivec(comm->load_lim);
9126 void print_dd_statistics(t_commrec *cr, const t_inputrec *ir, FILE *fplog)
9128 gmx_domdec_comm_t *comm;
9132 comm = cr->dd->comm;
9134 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9136 if (fplog == nullptr)
9141 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9143 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9145 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9150 " av. #atoms communicated per step for force: %d x %.1f\n",
9154 if (cr->dd->vsite_comm)
9157 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9158 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9163 if (cr->dd->constraint_comm)
9166 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9167 1 + ir->nLincsIter, av);
9171 gmx_incons(" Unknown type for DD statistics");
9174 fprintf(fplog, "\n");
9176 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9178 print_dd_load_av(fplog, cr->dd);
9182 void dd_partition_system(FILE *fplog,
9185 gmx_bool bMasterState,
9187 t_state *state_global,
9188 const gmx_mtop_t *top_global,
9189 const t_inputrec *ir,
9190 t_state *state_local,
9191 PaddedRVecVector *f,
9192 gmx::MDAtoms *mdAtoms,
9193 gmx_localtop_t *top_local,
9196 gmx_constr_t constr,
9198 gmx_wallcycle_t wcycle,
9202 gmx_domdec_comm_t *comm;
9203 gmx_ddbox_t ddbox = {0};
9205 gmx_int64_t step_pcoupl;
9206 rvec cell_ns_x0, cell_ns_x1;
9207 int i, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9208 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckWhetherToTurnDlbOn, bLogLoad;
9209 gmx_bool bRedist, bSortCG, bResortAll;
9210 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9214 wallcycle_start(wcycle, ewcDOMDEC);
9219 bBoxChanged = (bMasterState || inputrecDeform(ir));
9220 if (ir->epc != epcNO)
9222 /* With nstpcouple > 1 pressure coupling happens.
9223 * one step after calculating the pressure.
9224 * Box scaling happens at the end of the MD step,
9225 * after the DD partitioning.
9226 * We therefore have to do DLB in the first partitioning
9227 * after an MD step where P-coupling occurred.
9228 * We need to determine the last step in which p-coupling occurred.
9229 * MRS -- need to validate this for vv?
9234 step_pcoupl = step - 1;
9238 step_pcoupl = ((step - 1)/n)*n + 1;
9240 if (step_pcoupl >= comm->partition_step)
9246 bNStGlobalComm = (step % nstglobalcomm == 0);
9254 /* Should we do dynamic load balacing this step?
9255 * Since it requires (possibly expensive) global communication,
9256 * we might want to do DLB less frequently.
9258 if (bBoxChanged || ir->epc != epcNO)
9260 bDoDLB = bBoxChanged;
9264 bDoDLB = bNStGlobalComm;
9268 /* Check if we have recorded loads on the nodes */
9269 if (comm->bRecordLoad && dd_load_count(comm) > 0)
9271 bCheckWhetherToTurnDlbOn = dd_dlb_get_should_check_whether_to_turn_dlb_on(dd);
9273 /* Print load every nstlog, first and last step to the log file */
9274 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9275 comm->n_load_collect == 0 ||
9277 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9279 /* Avoid extra communication due to verbose screen output
9280 * when nstglobalcomm is set.
9282 if (bDoDLB || bLogLoad || bCheckWhetherToTurnDlbOn ||
9283 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9285 get_load_distribution(dd, wcycle);
9290 dd_print_load(fplog, dd, step-1);
9294 dd_print_load_verbose(dd);
9297 comm->n_load_collect++;
9303 /* Add the measured cycles to the running average */
9304 const float averageFactor = 0.1f;
9305 comm->cyclesPerStepDlbExpAverage =
9306 (1 - averageFactor)*comm->cyclesPerStepDlbExpAverage +
9307 averageFactor*comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
9309 if (comm->dlbState == edlbsOnCanTurnOff &&
9310 dd->comm->n_load_have % c_checkTurnDlbOffInterval == c_checkTurnDlbOffInterval - 1)
9312 gmx_bool turnOffDlb;
9315 /* If the running averaged cycles with DLB are more
9316 * than before we turned on DLB, turn off DLB.
9317 * We will again run and check the cycles without DLB
9318 * and we can then decide if to turn off DLB forever.
9320 turnOffDlb = (comm->cyclesPerStepDlbExpAverage >
9321 comm->cyclesPerStepBeforeDLB);
9323 dd_bcast(dd, sizeof(turnOffDlb), &turnOffDlb);
9326 /* To turn off DLB, we need to redistribute the atoms */
9327 dd_collect_state(dd, state_local, state_global);
9328 bMasterState = TRUE;
9329 turn_off_dlb(fplog, cr, step);
9333 else if (bCheckWhetherToTurnDlbOn)
9335 gmx_bool turnOffDlbForever = FALSE;
9336 gmx_bool turnOnDlb = FALSE;
9338 /* Since the timings are node dependent, the master decides */
9341 /* If we recently turned off DLB, we want to check if
9342 * performance is better without DLB. We want to do this
9343 * ASAP to minimize the chance that external factors
9344 * slowed down the DLB step are gone here and we
9345 * incorrectly conclude that DLB was causing the slowdown.
9346 * So we measure one nstlist block, no running average.
9348 if (comm->haveTurnedOffDlb &&
9349 comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep] <
9350 comm->cyclesPerStepDlbExpAverage)
9352 /* After turning off DLB we ran nstlist steps in fewer
9353 * cycles than with DLB. This likely means that DLB
9354 * in not benefical, but this could be due to a one
9355 * time unlucky fluctuation, so we require two such
9356 * observations in close succession to turn off DLB
9359 if (comm->dlbSlowerPartitioningCount > 0 &&
9360 dd->ddp_count < comm->dlbSlowerPartitioningCount + 10*c_checkTurnDlbOnInterval)
9362 turnOffDlbForever = TRUE;
9364 comm->haveTurnedOffDlb = false;
9365 /* Register when we last measured DLB slowdown */
9366 comm->dlbSlowerPartitioningCount = dd->ddp_count;
9370 /* Here we check if the max PME rank load is more than 0.98
9371 * the max PP force load. If so, PP DLB will not help,
9372 * since we are (almost) limited by PME. Furthermore,
9373 * DLB will cause a significant extra x/f redistribution
9374 * cost on the PME ranks, which will then surely result
9375 * in lower total performance.
9377 if (cr->npmenodes > 0 &&
9378 dd_pme_f_ratio(dd) > 1 - DD_PERF_LOSS_DLB_ON)
9384 turnOnDlb = (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
9390 gmx_bool turnOffDlbForever;
9394 turnOffDlbForever, turnOnDlb
9396 dd_bcast(dd, sizeof(bools), &bools);
9397 if (bools.turnOffDlbForever)
9399 turn_off_dlb_forever(fplog, cr, step);
9401 else if (bools.turnOnDlb)
9403 turn_on_dlb(fplog, cr, step);
9408 comm->n_load_have++;
9411 cgs_gl = &comm->cgs_gl;
9416 /* Clear the old state */
9417 clear_dd_indices(dd, 0, 0);
9420 rvec *xGlobal = (SIMMASTER(cr) ? as_rvec_array(state_global->x.data()) : nullptr);
9422 set_ddbox(dd, bMasterState, cr, ir,
9423 SIMMASTER(cr) ? state_global->box : nullptr,
9424 TRUE, cgs_gl, xGlobal,
9427 get_cg_distribution(fplog, dd, cgs_gl,
9428 SIMMASTER(cr) ? state_global->box : nullptr,
9431 dd_distribute_state(dd, cgs_gl,
9432 state_global, state_local, f);
9434 dd_make_local_cgs(dd, &top_local->cgs);
9436 /* Ensure that we have space for the new distribution */
9437 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9439 if (fr->cutoff_scheme == ecutsGROUP)
9441 calc_cgcm(fplog, 0, dd->ncg_home,
9442 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
9445 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9447 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9449 else if (state_local->ddp_count != dd->ddp_count)
9451 if (state_local->ddp_count > dd->ddp_count)
9453 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9456 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9458 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9461 /* Clear the old state */
9462 clear_dd_indices(dd, 0, 0);
9464 /* Build the new indices */
9465 rebuild_cgindex(dd, cgs_gl->index, state_local);
9466 make_dd_indices(dd, cgs_gl->index, 0);
9467 ncgindex_set = dd->ncg_home;
9469 if (fr->cutoff_scheme == ecutsGROUP)
9471 /* Redetermine the cg COMs */
9472 calc_cgcm(fplog, 0, dd->ncg_home,
9473 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
9476 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9478 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9480 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9481 TRUE, &top_local->cgs, as_rvec_array(state_local->x.data()), &ddbox);
9483 bRedist = isDlbOn(comm);
9487 /* We have the full state, only redistribute the cgs */
9489 /* Clear the non-home indices */
9490 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9493 /* Avoid global communication for dim's without pbc and -gcom */
9494 if (!bNStGlobalComm)
9496 copy_rvec(comm->box0, ddbox.box0 );
9497 copy_rvec(comm->box_size, ddbox.box_size);
9499 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9500 bNStGlobalComm, &top_local->cgs, as_rvec_array(state_local->x.data()), &ddbox);
9505 /* For dim's without pbc and -gcom */
9506 copy_rvec(ddbox.box0, comm->box0 );
9507 copy_rvec(ddbox.box_size, comm->box_size);
9509 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9512 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9514 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9517 /* Check if we should sort the charge groups */
9518 bSortCG = (bMasterState || bRedist);
9520 ncg_home_old = dd->ncg_home;
9522 /* When repartitioning we mark charge groups that will move to neighboring
9523 * DD cells, but we do not move them right away for performance reasons.
9524 * Thus we need to keep track of how many charge groups will move for
9525 * obtaining correct local charge group / atom counts.
9530 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9532 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9534 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9536 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9539 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9541 &comm->cell_x0, &comm->cell_x1,
9542 dd->ncg_home, fr->cg_cm,
9543 cell_ns_x0, cell_ns_x1, &grid_density);
9547 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9550 switch (fr->cutoff_scheme)
9553 copy_ivec(fr->ns->grid->n, ncells_old);
9554 grid_first(fplog, fr->ns->grid, dd, &ddbox,
9555 state_local->box, cell_ns_x0, cell_ns_x1,
9556 fr->rlist, grid_density);
9559 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9562 gmx_incons("unimplemented");
9564 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9565 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9569 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9571 /* Sort the state on charge group position.
9572 * This enables exact restarts from this step.
9573 * It also improves performance by about 15% with larger numbers
9574 * of atoms per node.
9577 /* Fill the ns grid with the home cell,
9578 * so we can sort with the indices.
9580 set_zones_ncg_home(dd);
9582 switch (fr->cutoff_scheme)
9585 set_zones_size(dd, state_local->box, &ddbox, 0, 1, ncg_moved);
9587 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9589 comm->zones.size[0].bb_x0,
9590 comm->zones.size[0].bb_x1,
9592 comm->zones.dens_zone0,
9594 as_rvec_array(state_local->x.data()),
9595 ncg_moved, bRedist ? comm->moved : nullptr,
9596 fr->nbv->grp[eintLocal].kernel_type,
9599 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9602 fill_grid(&comm->zones, fr->ns->grid, dd->ncg_home,
9603 0, dd->ncg_home, fr->cg_cm);
9605 copy_ivec(fr->ns->grid->n, ncells_new);
9608 gmx_incons("unimplemented");
9611 bResortAll = bMasterState;
9613 /* Check if we can user the old order and ns grid cell indices
9614 * of the charge groups to sort the charge groups efficiently.
9616 if (ncells_new[XX] != ncells_old[XX] ||
9617 ncells_new[YY] != ncells_old[YY] ||
9618 ncells_new[ZZ] != ncells_old[ZZ])
9625 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9626 gmx_step_str(step, sbuf), dd->ncg_home);
9628 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9629 bResortAll ? -1 : ncg_home_old);
9631 /* After sorting and compacting we set the correct size */
9632 dd_resize_state(state_local, f, dd->nat_home);
9634 /* Rebuild all the indices */
9635 ga2la_clear(dd->ga2la);
9638 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9641 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9643 /* Setup up the communication and communicate the coordinates */
9644 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9646 /* Set the indices */
9647 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9649 /* Set the charge group boundaries for neighbor searching */
9650 set_cg_boundaries(&comm->zones);
9652 if (fr->cutoff_scheme == ecutsVERLET)
9654 set_zones_size(dd, state_local->box, &ddbox,
9655 bSortCG ? 1 : 0, comm->zones.n,
9659 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9662 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9663 -1,as_rvec_array(state_local->x.data()),state_local->box);
9666 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9668 /* Extract a local topology from the global topology */
9669 for (i = 0; i < dd->ndim; i++)
9671 np[dd->dim[i]] = comm->cd[i].np;
9673 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9674 comm->cellsize_min, np,
9676 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : as_rvec_array(state_local->x.data()),
9677 vsite, top_global, top_local);
9679 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9681 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9683 /* Set up the special atom communication */
9684 n = comm->nat[ddnatZONE];
9685 for (i = ddnatZONE+1; i < ddnatNR; i++)
9690 if (vsite && vsite->n_intercg_vsite)
9692 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9696 if (dd->bInterCGcons || dd->bInterCGsettles)
9698 /* Only for inter-cg constraints we need special code */
9699 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9700 constr, ir->nProjOrder,
9701 top_local->idef.il);
9705 gmx_incons("Unknown special atom type setup");
9710 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9712 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9714 /* Make space for the extra coordinates for virtual site
9715 * or constraint communication.
9717 state_local->natoms = comm->nat[ddnatNR-1];
9719 dd_resize_state(state_local, f, state_local->natoms);
9721 if (fr->haveDirectVirialContributions)
9723 if (vsite && vsite->n_intercg_vsite)
9725 nat_f_novirsum = comm->nat[ddnatVSITE];
9729 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9731 nat_f_novirsum = dd->nat_tot;
9735 nat_f_novirsum = dd->nat_home;
9744 /* Set the number of atoms required for the force calculation.
9745 * Forces need to be constrained when doing energy
9746 * minimization. For simple simulations we could avoid some
9747 * allocation, zeroing and copying, but this is probably not worth
9748 * the complications and checking.
9750 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9751 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9753 /* Update atom data for mdatoms and several algorithms */
9754 mdAlgorithmsSetupAtomData(cr, ir, top_global, top_local, fr,
9755 nullptr, mdAtoms, vsite, nullptr);
9757 auto mdatoms = mdAtoms->mdatoms();
9758 if (!thisRankHasDuty(cr, DUTY_PME))
9760 /* Send the charges and/or c6/sigmas to our PME only node */
9761 gmx_pme_send_parameters(cr,
9763 mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
9764 mdatoms->chargeA, mdatoms->chargeB,
9765 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
9766 mdatoms->sigmaA, mdatoms->sigmaB,
9767 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9772 set_constraints(constr, top_local, ir, mdatoms, cr);
9777 /* Update the local pull groups */
9778 dd_make_local_pull_groups(cr, ir->pull_work, mdatoms);
9783 /* Update the local rotation groups */
9784 dd_make_local_rotation_groups(dd, ir->rot);
9787 if (ir->eSwapCoords != eswapNO)
9789 /* Update the local groups needed for ion swapping */
9790 dd_make_local_swap_groups(dd, ir->swap);
9793 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
9794 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
9796 add_dd_statistics(dd);
9798 /* Make sure we only count the cycles for this DD partitioning */
9799 clear_dd_cycle_counts(dd);
9801 /* Because the order of the atoms might have changed since
9802 * the last vsite construction, we need to communicate the constructing
9803 * atom coordinates again (for spreading the forces this MD step).
9805 dd_move_x_vsites(dd, state_local->box, as_rvec_array(state_local->x.data()));
9807 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9809 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9811 dd_move_x(dd, state_local->box, as_rvec_array(state_local->x.data()));
9812 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9813 -1, as_rvec_array(state_local->x.data()), state_local->box);
9816 /* Store the partitioning step */
9817 comm->partition_step = step;
9819 /* Increase the DD partitioning counter */
9821 /* The state currently matches this DD partitioning count, store it */
9822 state_local->ddp_count = dd->ddp_count;
9825 /* The DD master node knows the complete cg distribution,
9826 * store the count so we can possibly skip the cg info communication.
9828 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9831 if (comm->DD_debug > 0)
9833 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9834 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9835 "after partitioning");
9838 wallcycle_stop(wcycle, ewcDOMDEC);