2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
52 #include "gromacs/domdec/domdec_network.h"
53 #include "gromacs/domdec/ga2la.h"
54 #include "gromacs/ewald/pme.h"
55 #include "gromacs/fileio/gmxfio.h"
56 #include "gromacs/fileio/pdbio.h"
57 #include "gromacs/gmxlib/chargegroup.h"
58 #include "gromacs/gmxlib/network.h"
59 #include "gromacs/gmxlib/nrnb.h"
60 #include "gromacs/gpu_utils/gpu_utils.h"
61 #include "gromacs/hardware/hw_info.h"
62 #include "gromacs/imd/imd.h"
63 #include "gromacs/listed-forces/manage-threading.h"
64 #include "gromacs/math/functions.h"
65 #include "gromacs/math/vec.h"
66 #include "gromacs/math/vectypes.h"
67 #include "gromacs/mdlib/constr.h"
68 #include "gromacs/mdlib/force.h"
69 #include "gromacs/mdlib/forcerec.h"
70 #include "gromacs/mdlib/genborn.h"
71 #include "gromacs/mdlib/gmx_omp_nthreads.h"
72 #include "gromacs/mdlib/mdatoms.h"
73 #include "gromacs/mdlib/mdrun.h"
74 #include "gromacs/mdlib/mdsetup.h"
75 #include "gromacs/mdlib/nb_verlet.h"
76 #include "gromacs/mdlib/nbnxn_grid.h"
77 #include "gromacs/mdlib/nsgrid.h"
78 #include "gromacs/mdlib/vsite.h"
79 #include "gromacs/mdtypes/commrec.h"
80 #include "gromacs/mdtypes/df_history.h"
81 #include "gromacs/mdtypes/forcerec.h"
82 #include "gromacs/mdtypes/inputrec.h"
83 #include "gromacs/mdtypes/md_enums.h"
84 #include "gromacs/mdtypes/mdatom.h"
85 #include "gromacs/mdtypes/nblist.h"
86 #include "gromacs/mdtypes/state.h"
87 #include "gromacs/pbcutil/ishift.h"
88 #include "gromacs/pbcutil/pbc.h"
89 #include "gromacs/pulling/pull.h"
90 #include "gromacs/pulling/pull_rotation.h"
91 #include "gromacs/swap/swapcoords.h"
92 #include "gromacs/timing/wallcycle.h"
93 #include "gromacs/topology/block.h"
94 #include "gromacs/topology/idef.h"
95 #include "gromacs/topology/ifunc.h"
96 #include "gromacs/topology/mtop_lookup.h"
97 #include "gromacs/topology/mtop_util.h"
98 #include "gromacs/topology/topology.h"
99 #include "gromacs/utility/basedefinitions.h"
100 #include "gromacs/utility/basenetwork.h"
101 #include "gromacs/utility/cstringutil.h"
102 #include "gromacs/utility/exceptions.h"
103 #include "gromacs/utility/fatalerror.h"
104 #include "gromacs/utility/gmxmpi.h"
105 #include "gromacs/utility/qsort_threadsafe.h"
106 #include "gromacs/utility/real.h"
107 #include "gromacs/utility/smalloc.h"
108 #include "gromacs/utility/stringutil.h"
110 #include "domdec_constraints.h"
111 #include "domdec_internal.h"
112 #include "domdec_vsite.h"
114 #define DDRANK(dd, rank) (rank)
115 #define DDMASTERRANK(dd) (dd->masterrank)
117 struct gmx_domdec_master_t
119 /* The cell boundaries */
121 /* The global charge group division */
122 int *ncg; /* Number of home charge groups for each node */
123 int *index; /* Index of nnodes+1 into cg */
124 int *cg; /* Global charge group index */
125 int *nat; /* Number of home atoms for each node. */
126 int *ibuf; /* Buffer for communication */
127 rvec *vbuf; /* Buffer for state scattering and gathering */
130 #define DD_NLOAD_MAX 9
132 const char *edlbs_names[edlbsNR] = { "off", "auto", "locked", "on", "on" };
134 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
137 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
138 #define DD_FLAG_NRCG 65535
139 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
140 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
142 /* The DD zone order */
143 static const ivec dd_zo[DD_MAXZONE] =
144 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
146 /* The non-bonded zone-pair setup for domain decomposition
147 * The first number is the i-zone, the second number the first j-zone seen by
148 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
149 * As is, this is for 3D decomposition, where there are 4 i-zones.
150 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
151 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
154 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
159 /* Factors used to avoid problems due to rounding issues */
160 #define DD_CELL_MARGIN 1.0001
161 #define DD_CELL_MARGIN2 1.00005
162 /* Factor to account for pressure scaling during nstlist steps */
163 #define DD_PRES_SCALE_MARGIN 1.02
165 /* Turn on DLB when the load imbalance causes this amount of total loss.
166 * There is a bit of overhead with DLB and it's difficult to achieve
167 * a load imbalance of less than 2% with DLB.
169 #define DD_PERF_LOSS_DLB_ON 0.02
171 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
172 #define DD_PERF_LOSS_WARN 0.05
174 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
176 /* Use separate MPI send and receive commands
177 * when nnodes <= GMX_DD_NNODES_SENDRECV.
178 * This saves memory (and some copying for small nnodes).
179 * For high parallelization scatter and gather calls are used.
181 #define GMX_DD_NNODES_SENDRECV 4
184 /* We check if to turn on DLB at the first and every 100 DD partitionings.
185 * With large imbalance DLB will turn on at the first step, so we can
186 * make the interval so large that the MPI overhead of the check is negligible.
188 static const int c_checkTurnDlbOnInterval = 100;
189 /* We need to check if DLB results in worse performance and then turn it off.
190 * We check this more often then for turning DLB on, because the DLB can scale
191 * the domains very rapidly, so if unlucky the load imbalance can go up quickly
192 * and furthermore, we are already synchronizing often with DLB, so
193 * the overhead of the MPI Bcast is not that high.
195 static const int c_checkTurnDlbOffInterval = 20;
197 /* Forward declaration */
198 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue);
202 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
204 static void index2xyz(ivec nc,int ind,ivec xyz)
206 xyz[XX] = ind % nc[XX];
207 xyz[YY] = (ind / nc[XX]) % nc[YY];
208 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
212 /* This order is required to minimize the coordinate communication in PME
213 * which uses decomposition in the x direction.
215 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
217 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
219 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
220 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
221 xyz[ZZ] = ind % nc[ZZ];
224 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
229 ddindex = dd_index(dd->nc, c);
230 if (dd->comm->bCartesianPP_PME)
232 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
234 else if (dd->comm->bCartesianPP)
237 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
248 static gmx_bool dynamic_dd_box(const gmx_ddbox_t *ddbox, const t_inputrec *ir)
250 return (ddbox->nboundeddim < DIM || inputrecDynamicBox(ir));
253 int ddglatnr(const gmx_domdec_t *dd, int i)
263 if (i >= dd->comm->nat[ddnatNR-1])
265 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
267 atnr = dd->gatindex[i] + 1;
273 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
275 return &dd->comm->cgs_gl;
278 /*! \brief Returns true if the DLB state indicates that the balancer is on. */
279 static bool isDlbOn(const gmx_domdec_comm_t *comm)
281 return (comm->dlbState == edlbsOnCanTurnOff ||
282 comm->dlbState == edlbsOnUser);
284 /*! \brief Returns true if the DLB state indicates that the balancer is off/disabled.
286 static bool isDlbDisabled(const gmx_domdec_comm_t *comm)
288 return (comm->dlbState == edlbsOffUser ||
289 comm->dlbState == edlbsOffForever);
292 static void vec_rvec_init(vec_rvec_t *v)
298 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
302 v->nalloc = over_alloc_dd(n);
303 srenew(v->v, v->nalloc);
307 void dd_store_state(gmx_domdec_t *dd, t_state *state)
311 if (state->ddp_count != dd->ddp_count)
313 gmx_incons("The MD state does not match the domain decomposition state");
316 state->cg_gl.resize(dd->ncg_home);
317 for (i = 0; i < dd->ncg_home; i++)
319 state->cg_gl[i] = dd->index_gl[i];
322 state->ddp_count_cg_gl = dd->ddp_count;
325 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
327 return &dd->comm->zones;
330 void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
331 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
333 gmx_domdec_zones_t *zones;
336 zones = &dd->comm->zones;
339 while (icg >= zones->izone[izone].cg1)
348 else if (izone < zones->nizone)
350 *jcg0 = zones->izone[izone].jcg0;
354 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
355 icg, izone, zones->nizone);
358 *jcg1 = zones->izone[izone].jcg1;
360 for (d = 0; d < dd->ndim; d++)
363 shift0[dim] = zones->izone[izone].shift0[dim];
364 shift1[dim] = zones->izone[izone].shift1[dim];
365 if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
367 /* A conservative approach, this can be optimized */
374 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
376 /* We currently set mdatoms entries for all atoms:
377 * local + non-local + communicated for vsite + constraints
380 return dd->comm->nat[ddnatNR - 1];
383 int dd_natoms_vsite(const gmx_domdec_t *dd)
385 return dd->comm->nat[ddnatVSITE];
388 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
390 *at_start = dd->comm->nat[ddnatCON-1];
391 *at_end = dd->comm->nat[ddnatCON];
394 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
396 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
397 int *index, *cgindex;
398 gmx_domdec_comm_t *comm;
399 gmx_domdec_comm_dim_t *cd;
400 gmx_domdec_ind_t *ind;
401 rvec shift = {0, 0, 0}, *buf, *rbuf;
402 gmx_bool bPBC, bScrew;
406 cgindex = dd->cgindex;
411 nat_tot = dd->nat_home;
412 for (d = 0; d < dd->ndim; d++)
414 bPBC = (dd->ci[dd->dim[d]] == 0);
415 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
418 copy_rvec(box[dd->dim[d]], shift);
421 for (p = 0; p < cd->np; p++)
428 for (i = 0; i < ind->nsend[nzone]; i++)
430 at0 = cgindex[index[i]];
431 at1 = cgindex[index[i]+1];
432 for (j = at0; j < at1; j++)
434 copy_rvec(x[j], buf[n]);
441 for (i = 0; i < ind->nsend[nzone]; i++)
443 at0 = cgindex[index[i]];
444 at1 = cgindex[index[i]+1];
445 for (j = at0; j < at1; j++)
447 /* We need to shift the coordinates */
448 rvec_add(x[j], shift, buf[n]);
455 for (i = 0; i < ind->nsend[nzone]; i++)
457 at0 = cgindex[index[i]];
458 at1 = cgindex[index[i]+1];
459 for (j = at0; j < at1; j++)
462 buf[n][XX] = x[j][XX] + shift[XX];
464 * This operation requires a special shift force
465 * treatment, which is performed in calc_vir.
467 buf[n][YY] = box[YY][YY] - x[j][YY];
468 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
480 rbuf = comm->vbuf2.v;
482 /* Send and receive the coordinates */
483 dd_sendrecv_rvec(dd, d, dddirBackward,
484 buf, ind->nsend[nzone+1],
485 rbuf, ind->nrecv[nzone+1]);
489 for (zone = 0; zone < nzone; zone++)
491 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
493 copy_rvec(rbuf[j], x[i]);
498 nat_tot += ind->nrecv[nzone+1];
504 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
506 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
507 int *index, *cgindex;
508 gmx_domdec_comm_t *comm;
509 gmx_domdec_comm_dim_t *cd;
510 gmx_domdec_ind_t *ind;
514 gmx_bool bShiftForcesNeedPbc, bScrew;
518 cgindex = dd->cgindex;
522 nzone = comm->zones.n/2;
523 nat_tot = dd->nat_tot;
524 for (d = dd->ndim-1; d >= 0; d--)
526 /* Only forces in domains near the PBC boundaries need to
527 consider PBC in the treatment of fshift */
528 bShiftForcesNeedPbc = (dd->ci[dd->dim[d]] == 0);
529 bScrew = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
530 if (fshift == nullptr && !bScrew)
532 bShiftForcesNeedPbc = FALSE;
534 /* Determine which shift vector we need */
540 for (p = cd->np-1; p >= 0; p--)
543 nat_tot -= ind->nrecv[nzone+1];
550 sbuf = comm->vbuf2.v;
552 for (zone = 0; zone < nzone; zone++)
554 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
556 copy_rvec(f[i], sbuf[j]);
561 /* Communicate the forces */
562 dd_sendrecv_rvec(dd, d, dddirForward,
563 sbuf, ind->nrecv[nzone+1],
564 buf, ind->nsend[nzone+1]);
566 /* Add the received forces */
568 if (!bShiftForcesNeedPbc)
570 for (i = 0; i < ind->nsend[nzone]; i++)
572 at0 = cgindex[index[i]];
573 at1 = cgindex[index[i]+1];
574 for (j = at0; j < at1; j++)
576 rvec_inc(f[j], buf[n]);
583 /* fshift should always be defined if this function is
584 * called when bShiftForcesNeedPbc is true */
585 assert(NULL != fshift);
586 for (i = 0; i < ind->nsend[nzone]; i++)
588 at0 = cgindex[index[i]];
589 at1 = cgindex[index[i]+1];
590 for (j = at0; j < at1; j++)
592 rvec_inc(f[j], buf[n]);
593 /* Add this force to the shift force */
594 rvec_inc(fshift[is], buf[n]);
601 for (i = 0; i < ind->nsend[nzone]; i++)
603 at0 = cgindex[index[i]];
604 at1 = cgindex[index[i]+1];
605 for (j = at0; j < at1; j++)
607 /* Rotate the force */
608 f[j][XX] += buf[n][XX];
609 f[j][YY] -= buf[n][YY];
610 f[j][ZZ] -= buf[n][ZZ];
613 /* Add this force to the shift force */
614 rvec_inc(fshift[is], buf[n]);
625 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
627 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
628 int *index, *cgindex;
629 gmx_domdec_comm_t *comm;
630 gmx_domdec_comm_dim_t *cd;
631 gmx_domdec_ind_t *ind;
636 cgindex = dd->cgindex;
638 buf = &comm->vbuf.v[0][0];
641 nat_tot = dd->nat_home;
642 for (d = 0; d < dd->ndim; d++)
645 for (p = 0; p < cd->np; p++)
650 for (i = 0; i < ind->nsend[nzone]; i++)
652 at0 = cgindex[index[i]];
653 at1 = cgindex[index[i]+1];
654 for (j = at0; j < at1; j++)
667 rbuf = &comm->vbuf2.v[0][0];
669 /* Send and receive the coordinates */
670 dd_sendrecv_real(dd, d, dddirBackward,
671 buf, ind->nsend[nzone+1],
672 rbuf, ind->nrecv[nzone+1]);
676 for (zone = 0; zone < nzone; zone++)
678 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
685 nat_tot += ind->nrecv[nzone+1];
691 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
693 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
694 int *index, *cgindex;
695 gmx_domdec_comm_t *comm;
696 gmx_domdec_comm_dim_t *cd;
697 gmx_domdec_ind_t *ind;
702 cgindex = dd->cgindex;
704 buf = &comm->vbuf.v[0][0];
706 nzone = comm->zones.n/2;
707 nat_tot = dd->nat_tot;
708 for (d = dd->ndim-1; d >= 0; d--)
711 for (p = cd->np-1; p >= 0; p--)
714 nat_tot -= ind->nrecv[nzone+1];
721 sbuf = &comm->vbuf2.v[0][0];
723 for (zone = 0; zone < nzone; zone++)
725 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
732 /* Communicate the forces */
733 dd_sendrecv_real(dd, d, dddirForward,
734 sbuf, ind->nrecv[nzone+1],
735 buf, ind->nsend[nzone+1]);
737 /* Add the received forces */
739 for (i = 0; i < ind->nsend[nzone]; i++)
741 at0 = cgindex[index[i]];
742 at1 = cgindex[index[i]+1];
743 for (j = at0; j < at1; j++)
754 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
756 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
758 zone->min0, zone->max1,
759 zone->mch0, zone->mch0,
760 zone->p1_0, zone->p1_1);
764 #define DDZONECOMM_MAXZONE 5
765 #define DDZONECOMM_BUFSIZE 3
767 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
768 int ddimind, int direction,
769 gmx_ddzone_t *buf_s, int n_s,
770 gmx_ddzone_t *buf_r, int n_r)
772 #define ZBS DDZONECOMM_BUFSIZE
773 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
774 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
777 for (i = 0; i < n_s; i++)
779 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
780 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
781 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
782 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
783 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
784 vbuf_s[i*ZBS+1][2] = 0;
785 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
786 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
787 vbuf_s[i*ZBS+2][2] = 0;
790 dd_sendrecv_rvec(dd, ddimind, direction,
794 for (i = 0; i < n_r; i++)
796 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
797 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
798 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
799 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
800 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
801 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
802 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
808 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
809 rvec cell_ns_x0, rvec cell_ns_x1)
811 int d, d1, dim, pos, buf_size, i, j, p, npulse, npulse_min;
813 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
814 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
815 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
816 rvec extr_s[2], extr_r[2];
818 real dist_d, c = 0, det;
819 gmx_domdec_comm_t *comm;
824 for (d = 1; d < dd->ndim; d++)
827 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
828 zp->min0 = cell_ns_x0[dim];
829 zp->max1 = cell_ns_x1[dim];
830 zp->min1 = cell_ns_x1[dim];
831 zp->mch0 = cell_ns_x0[dim];
832 zp->mch1 = cell_ns_x1[dim];
833 zp->p1_0 = cell_ns_x0[dim];
834 zp->p1_1 = cell_ns_x1[dim];
837 for (d = dd->ndim-2; d >= 0; d--)
840 bPBC = (dim < ddbox->npbcdim);
842 /* Use an rvec to store two reals */
843 extr_s[d][0] = comm->cell_f0[d+1];
844 extr_s[d][1] = comm->cell_f1[d+1];
845 extr_s[d][2] = comm->cell_f1[d+1];
848 /* Store the extremes in the backward sending buffer,
849 * so the get updated separately from the forward communication.
851 for (d1 = d; d1 < dd->ndim-1; d1++)
853 /* We invert the order to be able to use the same loop for buf_e */
854 buf_s[pos].min0 = extr_s[d1][1];
855 buf_s[pos].max1 = extr_s[d1][0];
856 buf_s[pos].min1 = extr_s[d1][2];
859 /* Store the cell corner of the dimension we communicate along */
860 buf_s[pos].p1_0 = comm->cell_x0[dim];
865 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
868 if (dd->ndim == 3 && d == 0)
870 buf_s[pos] = comm->zone_d2[0][1];
872 buf_s[pos] = comm->zone_d1[0];
876 /* We only need to communicate the extremes
877 * in the forward direction
879 npulse = comm->cd[d].np;
882 /* Take the minimum to avoid double communication */
883 npulse_min = std::min(npulse, dd->nc[dim]-1-npulse);
887 /* Without PBC we should really not communicate over
888 * the boundaries, but implementing that complicates
889 * the communication setup and therefore we simply
890 * do all communication, but ignore some data.
894 for (p = 0; p < npulse_min; p++)
896 /* Communicate the extremes forward */
897 bUse = (bPBC || dd->ci[dim] > 0);
899 dd_sendrecv_rvec(dd, d, dddirForward,
900 extr_s+d, dd->ndim-d-1,
901 extr_r+d, dd->ndim-d-1);
905 for (d1 = d; d1 < dd->ndim-1; d1++)
907 extr_s[d1][0] = std::max(extr_s[d1][0], extr_r[d1][0]);
908 extr_s[d1][1] = std::min(extr_s[d1][1], extr_r[d1][1]);
909 extr_s[d1][2] = std::min(extr_s[d1][2], extr_r[d1][2]);
915 for (p = 0; p < npulse; p++)
917 /* Communicate all the zone information backward */
918 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
920 dd_sendrecv_ddzone(dd, d, dddirBackward,
927 for (d1 = d+1; d1 < dd->ndim; d1++)
929 /* Determine the decrease of maximum required
930 * communication height along d1 due to the distance along d,
931 * this avoids a lot of useless atom communication.
933 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
935 if (ddbox->tric_dir[dim])
937 /* c is the off-diagonal coupling between the cell planes
938 * along directions d and d1.
940 c = ddbox->v[dim][dd->dim[d1]][dim];
946 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
949 dh[d1] = comm->cutoff - (c*dist_d + std::sqrt(det))/(1 + c*c);
953 /* A negative value signals out of range */
959 /* Accumulate the extremes over all pulses */
960 for (i = 0; i < buf_size; i++)
970 buf_e[i].min0 = std::min(buf_e[i].min0, buf_r[i].min0);
971 buf_e[i].max1 = std::max(buf_e[i].max1, buf_r[i].max1);
972 buf_e[i].min1 = std::min(buf_e[i].min1, buf_r[i].min1);
975 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
983 if (bUse && dh[d1] >= 0)
985 buf_e[i].mch0 = std::max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
986 buf_e[i].mch1 = std::max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
989 /* Copy the received buffer to the send buffer,
990 * to pass the data through with the next pulse.
994 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
995 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
997 /* Store the extremes */
1000 for (d1 = d; d1 < dd->ndim-1; d1++)
1002 extr_s[d1][1] = std::min(extr_s[d1][1], buf_e[pos].min0);
1003 extr_s[d1][0] = std::max(extr_s[d1][0], buf_e[pos].max1);
1004 extr_s[d1][2] = std::min(extr_s[d1][2], buf_e[pos].min1);
1008 if (d == 1 || (d == 0 && dd->ndim == 3))
1010 for (i = d; i < 2; i++)
1012 comm->zone_d2[1-d][i] = buf_e[pos];
1018 comm->zone_d1[1] = buf_e[pos];
1028 for (i = 0; i < 2; i++)
1032 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1034 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1035 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1041 for (i = 0; i < 2; i++)
1043 for (j = 0; j < 2; j++)
1047 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1049 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1050 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1054 for (d = 1; d < dd->ndim; d++)
1056 comm->cell_f_max0[d] = extr_s[d-1][0];
1057 comm->cell_f_min1[d] = extr_s[d-1][1];
1060 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1061 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1066 static void dd_collect_cg(gmx_domdec_t *dd,
1067 const t_state *state_local)
1069 gmx_domdec_master_t *ma = nullptr;
1070 int buf2[2], *ibuf, i, ncg_home = 0, nat_home = 0;
1072 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1074 /* The master has the correct distribution */
1080 if (state_local->ddp_count == dd->ddp_count)
1082 /* The local state and DD are in sync, use the DD indices */
1083 ncg_home = dd->ncg_home;
1085 nat_home = dd->nat_home;
1087 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1089 /* The DD is out of sync with the local state, but we have stored
1090 * the cg indices with the local state, so we can use those.
1094 cgs_gl = &dd->comm->cgs_gl;
1096 ncg_home = state_local->cg_gl.size();
1097 cg = state_local->cg_gl.data();
1099 for (i = 0; i < ncg_home; i++)
1101 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1106 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1120 /* Collect the charge group and atom counts on the master */
1121 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1126 for (i = 0; i < dd->nnodes; i++)
1128 ma->ncg[i] = ma->ibuf[2*i];
1129 ma->nat[i] = ma->ibuf[2*i+1];
1130 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1133 /* Make byte counts and indices */
1134 for (i = 0; i < dd->nnodes; i++)
1136 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1137 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1141 fprintf(debug, "Initial charge group distribution: ");
1142 for (i = 0; i < dd->nnodes; i++)
1144 fprintf(debug, " %d", ma->ncg[i]);
1146 fprintf(debug, "\n");
1150 /* Collect the charge group indices on the master */
1152 ncg_home*sizeof(int), cg,
1153 DDMASTER(dd) ? ma->ibuf : nullptr,
1154 DDMASTER(dd) ? ma->ibuf+dd->nnodes : nullptr,
1155 DDMASTER(dd) ? ma->cg : nullptr);
1157 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1160 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1161 gmx::ArrayRef<const gmx::RVec> lv,
1162 gmx::ArrayRef<gmx::RVec> v)
1164 gmx_domdec_master_t *ma;
1165 int n, i, c, a, nalloc = 0;
1166 rvec *buf = nullptr;
1174 MPI_Send(const_cast<void *>(static_cast<const void *>(lv.data())), dd->nat_home*sizeof(rvec), MPI_BYTE,
1175 DDMASTERRANK(dd), dd->rank, dd->mpi_comm_all);
1180 /* Copy the master coordinates to the global array */
1181 cgs_gl = &dd->comm->cgs_gl;
1183 n = DDMASTERRANK(dd);
1185 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1187 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1189 copy_rvec(lv[a++], v[c]);
1193 for (n = 0; n < dd->nnodes; n++)
1197 if (ma->nat[n] > nalloc)
1199 nalloc = over_alloc_dd(ma->nat[n]);
1200 srenew(buf, nalloc);
1203 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1204 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1207 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1209 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1211 copy_rvec(buf[a++], v[c]);
1220 static void get_commbuffer_counts(gmx_domdec_t *dd,
1221 int **counts, int **disps)
1223 gmx_domdec_master_t *ma;
1228 /* Make the rvec count and displacment arrays */
1230 *disps = ma->ibuf + dd->nnodes;
1231 for (n = 0; n < dd->nnodes; n++)
1233 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1234 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1238 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1239 gmx::ArrayRef<const gmx::RVec> lv,
1240 gmx::ArrayRef<gmx::RVec> v)
1242 gmx_domdec_master_t *ma;
1243 int *rcounts = nullptr, *disps = nullptr;
1245 rvec *buf = nullptr;
1252 get_commbuffer_counts(dd, &rcounts, &disps);
1257 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv.data(), rcounts, disps, buf);
1261 cgs_gl = &dd->comm->cgs_gl;
1264 for (n = 0; n < dd->nnodes; n++)
1266 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1268 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1270 copy_rvec(buf[a++], v[c]);
1277 void dd_collect_vec(gmx_domdec_t *dd,
1278 const t_state *state_local,
1279 gmx::ArrayRef<const gmx::RVec> lv,
1280 gmx::ArrayRef<gmx::RVec> v)
1282 dd_collect_cg(dd, state_local);
1284 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1286 dd_collect_vec_sendrecv(dd, lv, v);
1290 dd_collect_vec_gatherv(dd, lv, v);
1295 void dd_collect_state(gmx_domdec_t *dd,
1296 const t_state *state_local, t_state *state)
1298 int nh = state_local->nhchainlength;
1302 GMX_RELEASE_ASSERT(state->nhchainlength == nh, "The global and local Nose-Hoover chain lengths should match");
1304 for (int i = 0; i < efptNR; i++)
1306 state->lambda[i] = state_local->lambda[i];
1308 state->fep_state = state_local->fep_state;
1309 state->veta = state_local->veta;
1310 state->vol0 = state_local->vol0;
1311 copy_mat(state_local->box, state->box);
1312 copy_mat(state_local->boxv, state->boxv);
1313 copy_mat(state_local->svir_prev, state->svir_prev);
1314 copy_mat(state_local->fvir_prev, state->fvir_prev);
1315 copy_mat(state_local->pres_prev, state->pres_prev);
1317 for (int i = 0; i < state_local->ngtc; i++)
1319 for (int j = 0; j < nh; j++)
1321 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1322 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1324 state->therm_integral[i] = state_local->therm_integral[i];
1326 for (int i = 0; i < state_local->nnhpres; i++)
1328 for (int j = 0; j < nh; j++)
1330 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1331 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1334 state->baros_integral = state_local->baros_integral;
1336 if (state_local->flags & (1 << estX))
1338 gmx::ArrayRef<gmx::RVec> globalXRef = state ? gmx::makeArrayRef(state->x) : gmx::EmptyArrayRef();
1339 dd_collect_vec(dd, state_local, state_local->x, globalXRef);
1341 if (state_local->flags & (1 << estV))
1343 gmx::ArrayRef<gmx::RVec> globalVRef = state ? gmx::makeArrayRef(state->v) : gmx::EmptyArrayRef();
1344 dd_collect_vec(dd, state_local, state_local->v, globalVRef);
1346 if (state_local->flags & (1 << estCGP))
1348 gmx::ArrayRef<gmx::RVec> globalCgpRef = state ? gmx::makeArrayRef(state->cg_p) : gmx::EmptyArrayRef();
1349 dd_collect_vec(dd, state_local, state_local->cg_p, globalCgpRef);
1353 static void dd_resize_state(t_state *state, PaddedRVecVector *f, int natoms)
1357 fprintf(debug, "Resizing state: currently %d, required %d\n", state->natoms, natoms);
1360 state_change_natoms(state, natoms);
1364 /* We need to allocate one element extra, since we might use
1365 * (unaligned) 4-wide SIMD loads to access rvec entries.
1367 f->resize(paddedRVecVectorSize(natoms));
1371 static void dd_check_alloc_ncg(t_forcerec *fr,
1373 PaddedRVecVector *f,
1374 int numChargeGroups)
1376 if (numChargeGroups > fr->cg_nalloc)
1380 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, numChargeGroups, over_alloc_dd(numChargeGroups));
1382 fr->cg_nalloc = over_alloc_dd(numChargeGroups);
1383 srenew(fr->cginfo, fr->cg_nalloc);
1384 if (fr->cutoff_scheme == ecutsGROUP)
1386 srenew(fr->cg_cm, fr->cg_nalloc);
1389 if (fr->cutoff_scheme == ecutsVERLET)
1391 /* We don't use charge groups, we use x in state to set up
1392 * the atom communication.
1394 dd_resize_state(state, f, numChargeGroups);
1398 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1399 const rvec *v, rvec *lv)
1401 gmx_domdec_master_t *ma;
1402 int n, i, c, a, nalloc = 0;
1403 rvec *buf = nullptr;
1409 for (n = 0; n < dd->nnodes; n++)
1413 if (ma->nat[n] > nalloc)
1415 nalloc = over_alloc_dd(ma->nat[n]);
1416 srenew(buf, nalloc);
1418 /* Use lv as a temporary buffer */
1420 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1422 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1424 copy_rvec(v[c], buf[a++]);
1427 if (a != ma->nat[n])
1429 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1434 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1435 DDRANK(dd, n), n, dd->mpi_comm_all);
1440 n = DDMASTERRANK(dd);
1442 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1444 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1446 copy_rvec(v[c], lv[a++]);
1453 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1454 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1459 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1460 const rvec *v, rvec *lv)
1462 gmx_domdec_master_t *ma;
1463 int *scounts = nullptr, *disps = nullptr;
1465 rvec *buf = nullptr;
1471 get_commbuffer_counts(dd, &scounts, &disps);
1475 for (n = 0; n < dd->nnodes; n++)
1477 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1479 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1481 copy_rvec(v[c], buf[a++]);
1487 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1490 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs,
1491 const rvec *v, rvec *lv)
1493 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1495 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1499 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1503 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1505 if (dfhist == nullptr)
1510 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1511 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1512 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1514 if (dfhist->nlambda > 0)
1516 int nlam = dfhist->nlambda;
1517 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1518 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1519 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1520 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1521 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1522 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1524 for (int i = 0; i < nlam; i++)
1526 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1527 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1528 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1529 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1530 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1531 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1536 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1537 t_state *state, t_state *state_local,
1538 PaddedRVecVector *f)
1540 int nh = state_local->nhchainlength;
1544 GMX_RELEASE_ASSERT(state->nhchainlength == nh, "The global and local Nose-Hoover chain lengths should match");
1546 for (int i = 0; i < efptNR; i++)
1548 state_local->lambda[i] = state->lambda[i];
1550 state_local->fep_state = state->fep_state;
1551 state_local->veta = state->veta;
1552 state_local->vol0 = state->vol0;
1553 copy_mat(state->box, state_local->box);
1554 copy_mat(state->box_rel, state_local->box_rel);
1555 copy_mat(state->boxv, state_local->boxv);
1556 copy_mat(state->svir_prev, state_local->svir_prev);
1557 copy_mat(state->fvir_prev, state_local->fvir_prev);
1558 if (state->dfhist != nullptr)
1560 copy_df_history(state_local->dfhist, state->dfhist);
1562 for (int i = 0; i < state_local->ngtc; i++)
1564 for (int j = 0; j < nh; j++)
1566 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1567 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1569 state_local->therm_integral[i] = state->therm_integral[i];
1571 for (int i = 0; i < state_local->nnhpres; i++)
1573 for (int j = 0; j < nh; j++)
1575 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1576 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1579 state_local->baros_integral = state->baros_integral;
1581 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda.data());
1582 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1583 dd_bcast(dd, sizeof(real), &state_local->veta);
1584 dd_bcast(dd, sizeof(real), &state_local->vol0);
1585 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1586 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1587 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1588 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1589 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1590 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi.data());
1591 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi.data());
1592 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral.data());
1593 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi.data());
1594 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi.data());
1596 /* communicate df_history -- required for restarting from checkpoint */
1597 dd_distribute_dfhist(dd, state_local->dfhist);
1599 dd_resize_state(state_local, f, dd->nat_home);
1601 if (state_local->flags & (1 << estX))
1603 const rvec *xGlobal = (DDMASTER(dd) ? as_rvec_array(state->x.data()) : nullptr);
1604 dd_distribute_vec(dd, cgs, xGlobal, as_rvec_array(state_local->x.data()));
1606 if (state_local->flags & (1 << estV))
1608 const rvec *vGlobal = (DDMASTER(dd) ? as_rvec_array(state->v.data()) : nullptr);
1609 dd_distribute_vec(dd, cgs, vGlobal, as_rvec_array(state_local->v.data()));
1611 if (state_local->flags & (1 << estCGP))
1613 const rvec *cgpGlobal = (DDMASTER(dd) ? as_rvec_array(state->cg_p.data()) : nullptr);
1614 dd_distribute_vec(dd, cgs, cgpGlobal, as_rvec_array(state_local->cg_p.data()));
1618 static char dim2char(int dim)
1624 case XX: c = 'X'; break;
1625 case YY: c = 'Y'; break;
1626 case ZZ: c = 'Z'; break;
1627 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1633 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1634 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1636 rvec grid_s[2], *grid_r = nullptr, cx, r;
1637 char fname[STRLEN], buf[22];
1639 int a, i, d, z, y, x;
1643 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1644 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1648 snew(grid_r, 2*dd->nnodes);
1651 dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : nullptr);
1655 for (d = 0; d < DIM; d++)
1657 for (i = 0; i < DIM; i++)
1665 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1667 tric[d][i] = box[i][d]/box[i][i];
1676 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1677 out = gmx_fio_fopen(fname, "w");
1678 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1680 for (i = 0; i < dd->nnodes; i++)
1682 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1683 for (d = 0; d < DIM; d++)
1685 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1687 for (z = 0; z < 2; z++)
1689 for (y = 0; y < 2; y++)
1691 for (x = 0; x < 2; x++)
1693 cx[XX] = grid_r[i*2+x][XX];
1694 cx[YY] = grid_r[i*2+y][YY];
1695 cx[ZZ] = grid_r[i*2+z][ZZ];
1697 gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
1698 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
1702 for (d = 0; d < DIM; d++)
1704 for (x = 0; x < 4; x++)
1708 case 0: y = 1 + i*8 + 2*x; break;
1709 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1710 case 2: y = 1 + i*8 + x; break;
1712 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
1716 gmx_fio_fclose(out);
1721 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
1722 const gmx_mtop_t *mtop, t_commrec *cr,
1723 int natoms, rvec x[], matrix box)
1725 char fname[STRLEN], buf[22];
1727 int i, ii, resnr, c;
1728 const char *atomname, *resname;
1735 natoms = dd->comm->nat[ddnatVSITE];
1738 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
1740 out = gmx_fio_fopen(fname, "w");
1742 fprintf(out, "TITLE %s\n", title);
1743 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1745 for (i = 0; i < natoms; i++)
1747 ii = dd->gatindex[i];
1748 mtopGetAtomAndResidueName(mtop, ii, &molb, &atomname, &resnr, &resname, nullptr);
1749 if (i < dd->comm->nat[ddnatZONE])
1752 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
1758 else if (i < dd->comm->nat[ddnatVSITE])
1760 b = dd->comm->zones.n;
1764 b = dd->comm->zones.n + 1;
1766 gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
1767 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
1769 fprintf(out, "TER\n");
1771 gmx_fio_fclose(out);
1774 real dd_cutoff_multibody(const gmx_domdec_t *dd)
1776 gmx_domdec_comm_t *comm;
1783 if (comm->bInterCGBondeds)
1785 if (comm->cutoff_mbody > 0)
1787 r = comm->cutoff_mbody;
1791 /* cutoff_mbody=0 means we do not have DLB */
1792 r = comm->cellsize_min[dd->dim[0]];
1793 for (di = 1; di < dd->ndim; di++)
1795 r = std::min(r, comm->cellsize_min[dd->dim[di]]);
1797 if (comm->bBondComm)
1799 r = std::max(r, comm->cutoff_mbody);
1803 r = std::min(r, comm->cutoff);
1811 real dd_cutoff_twobody(const gmx_domdec_t *dd)
1815 r_mb = dd_cutoff_multibody(dd);
1817 return std::max(dd->comm->cutoff, r_mb);
1821 static void dd_cart_coord2pmecoord(const gmx_domdec_t *dd, const ivec coord,
1826 nc = dd->nc[dd->comm->cartpmedim];
1827 ntot = dd->comm->ntot[dd->comm->cartpmedim];
1828 copy_ivec(coord, coord_pme);
1829 coord_pme[dd->comm->cartpmedim] =
1830 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
1833 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
1838 npme = dd->comm->npmenodes;
1840 /* Here we assign a PME node to communicate with this DD node
1841 * by assuming that the major index of both is x.
1842 * We add cr->npmenodes/2 to obtain an even distribution.
1844 return (ddindex*npme + npme/2)/npp;
1847 static int *dd_interleaved_pme_ranks(const gmx_domdec_t *dd)
1852 snew(pme_rank, dd->comm->npmenodes);
1854 for (i = 0; i < dd->nnodes; i++)
1856 p0 = ddindex2pmeindex(dd, i);
1857 p1 = ddindex2pmeindex(dd, i+1);
1858 if (i+1 == dd->nnodes || p1 > p0)
1862 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
1864 pme_rank[n] = i + 1 + n;
1872 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
1880 if (dd->comm->bCartesian) {
1881 gmx_ddindex2xyz(dd->nc,ddindex,coords);
1882 dd_coords2pmecoords(dd,coords,coords_pme);
1883 copy_ivec(dd->ntot,nc);
1884 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1885 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1887 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
1889 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
1895 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
1900 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
1902 gmx_domdec_comm_t *comm;
1904 int ddindex, nodeid = -1;
1906 comm = cr->dd->comm;
1911 if (comm->bCartesianPP_PME)
1914 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
1919 ddindex = dd_index(cr->dd->nc, coords);
1920 if (comm->bCartesianPP)
1922 nodeid = comm->ddindex2simnodeid[ddindex];
1928 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
1940 static int dd_simnode2pmenode(const gmx_domdec_t *dd,
1941 const t_commrec gmx_unused *cr,
1946 const gmx_domdec_comm_t *comm = dd->comm;
1948 /* This assumes a uniform x domain decomposition grid cell size */
1949 if (comm->bCartesianPP_PME)
1952 ivec coord, coord_pme;
1953 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
1954 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1956 /* This is a PP node */
1957 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1958 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
1962 else if (comm->bCartesianPP)
1964 if (sim_nodeid < dd->nnodes)
1966 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1971 /* This assumes DD cells with identical x coordinates
1972 * are numbered sequentially.
1974 if (dd->comm->pmenodes == nullptr)
1976 if (sim_nodeid < dd->nnodes)
1978 /* The DD index equals the nodeid */
1979 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1985 while (sim_nodeid > dd->comm->pmenodes[i])
1989 if (sim_nodeid < dd->comm->pmenodes[i])
1991 pmenode = dd->comm->pmenodes[i];
1999 void get_pme_nnodes(const gmx_domdec_t *dd,
2000 int *npmenodes_x, int *npmenodes_y)
2004 *npmenodes_x = dd->comm->npmenodes_x;
2005 *npmenodes_y = dd->comm->npmenodes_y;
2014 std::vector<int> get_pme_ddranks(t_commrec *cr, int pmenodeid)
2018 ivec coord, coord_pme;
2022 std::vector<int> ddranks;
2023 ddranks.reserve((dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2025 for (x = 0; x < dd->nc[XX]; x++)
2027 for (y = 0; y < dd->nc[YY]; y++)
2029 for (z = 0; z < dd->nc[ZZ]; z++)
2031 if (dd->comm->bCartesianPP_PME)
2036 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2037 if (dd->ci[XX] == coord_pme[XX] &&
2038 dd->ci[YY] == coord_pme[YY] &&
2039 dd->ci[ZZ] == coord_pme[ZZ])
2041 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
2046 /* The slab corresponds to the nodeid in the PME group */
2047 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2049 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
2058 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd, const t_commrec *cr)
2060 gmx_bool bReceive = TRUE;
2062 if (cr->npmenodes < dd->nnodes)
2064 gmx_domdec_comm_t *comm = dd->comm;
2065 if (comm->bCartesianPP_PME)
2068 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
2070 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2071 coords[comm->cartpmedim]++;
2072 if (coords[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2075 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2076 if (dd_simnode2pmenode(dd, cr, rank) == pmenode)
2078 /* This is not the last PP node for pmenode */
2083 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
2088 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
2089 if (cr->sim_nodeid+1 < cr->nnodes &&
2090 dd_simnode2pmenode(dd, cr, cr->sim_nodeid+1) == pmenode)
2092 /* This is not the last PP node for pmenode */
2101 static void set_zones_ncg_home(gmx_domdec_t *dd)
2103 gmx_domdec_zones_t *zones;
2106 zones = &dd->comm->zones;
2108 zones->cg_range[0] = 0;
2109 for (i = 1; i < zones->n+1; i++)
2111 zones->cg_range[i] = dd->ncg_home;
2113 /* zone_ncg1[0] should always be equal to ncg_home */
2114 dd->comm->zone_ncg1[0] = dd->ncg_home;
2117 static void rebuild_cgindex(gmx_domdec_t *dd,
2118 const int *gcgs_index, const t_state *state)
2120 int * gmx_restrict dd_cg_gl = dd->index_gl;
2121 int * gmx_restrict cgindex = dd->cgindex;
2124 /* Copy back the global charge group indices from state
2125 * and rebuild the local charge group to atom index.
2128 for (unsigned int i = 0; i < state->cg_gl.size(); i++)
2131 int cg_gl = state->cg_gl[i];
2132 dd_cg_gl[i] = cg_gl;
2133 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2135 cgindex[state->cg_gl.size()] = nat;
2137 dd->ncg_home = state->cg_gl.size();
2140 set_zones_ncg_home(dd);
2143 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2145 while (cg >= cginfo_mb->cg_end)
2150 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2153 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2154 t_forcerec *fr, char *bLocalCG)
2156 cginfo_mb_t *cginfo_mb;
2162 cginfo_mb = fr->cginfo_mb;
2163 cginfo = fr->cginfo;
2165 for (cg = cg0; cg < cg1; cg++)
2167 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2171 if (bLocalCG != nullptr)
2173 for (cg = cg0; cg < cg1; cg++)
2175 bLocalCG[index_gl[cg]] = TRUE;
2180 static void make_dd_indices(gmx_domdec_t *dd,
2181 const int *gcgs_index, int cg_start)
2183 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2184 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2187 if (dd->nat_tot > dd->gatindex_nalloc)
2189 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2190 srenew(dd->gatindex, dd->gatindex_nalloc);
2193 nzone = dd->comm->zones.n;
2194 zone2cg = dd->comm->zones.cg_range;
2195 zone_ncg1 = dd->comm->zone_ncg1;
2196 index_gl = dd->index_gl;
2197 gatindex = dd->gatindex;
2198 bCGs = dd->comm->bCGs;
2200 if (zone2cg[1] != dd->ncg_home)
2202 gmx_incons("dd->ncg_zone is not up to date");
2205 /* Make the local to global and global to local atom index */
2206 a = dd->cgindex[cg_start];
2207 for (zone = 0; zone < nzone; zone++)
2215 cg0 = zone2cg[zone];
2217 cg1 = zone2cg[zone+1];
2218 cg1_p1 = cg0 + zone_ncg1[zone];
2220 for (cg = cg0; cg < cg1; cg++)
2225 /* Signal that this cg is from more than one pulse away */
2228 cg_gl = index_gl[cg];
2231 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2234 ga2la_set(dd->ga2la, a_gl, a, zone1);
2240 gatindex[a] = cg_gl;
2241 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2248 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2254 if (bLocalCG == nullptr)
2258 for (i = 0; i < dd->ncg_tot; i++)
2260 if (!bLocalCG[dd->index_gl[i]])
2263 "DD rank %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2268 for (i = 0; i < ncg_sys; i++)
2275 if (ngl != dd->ncg_tot)
2277 fprintf(stderr, "DD rank %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2284 static void check_index_consistency(gmx_domdec_t *dd,
2285 int natoms_sys, int ncg_sys,
2288 int nerr, ngl, i, a, cell;
2293 if (dd->comm->DD_debug > 1)
2295 snew(have, natoms_sys);
2296 for (a = 0; a < dd->nat_tot; a++)
2298 if (have[dd->gatindex[a]] > 0)
2300 fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2304 have[dd->gatindex[a]] = a + 1;
2310 snew(have, dd->nat_tot);
2313 for (i = 0; i < natoms_sys; i++)
2315 if (ga2la_get(dd->ga2la, i, &a, &cell))
2317 if (a >= dd->nat_tot)
2319 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2325 if (dd->gatindex[a] != i)
2327 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2334 if (ngl != dd->nat_tot)
2337 "DD rank %d, %s: %d global atom indices, %d local atoms\n",
2338 dd->rank, where, ngl, dd->nat_tot);
2340 for (a = 0; a < dd->nat_tot; a++)
2345 "DD rank %d, %s: local atom %d, global %d has no global index\n",
2346 dd->rank, where, a+1, dd->gatindex[a]+1);
2351 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2355 gmx_fatal(FARGS, "DD rank %d, %s: %d atom/cg index inconsistencies",
2356 dd->rank, where, nerr);
2360 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2367 /* Clear the whole list without searching */
2368 ga2la_clear(dd->ga2la);
2372 for (i = a_start; i < dd->nat_tot; i++)
2374 ga2la_del(dd->ga2la, dd->gatindex[i]);
2378 bLocalCG = dd->comm->bLocalCG;
2381 for (i = cg_start; i < dd->ncg_tot; i++)
2383 bLocalCG[dd->index_gl[i]] = FALSE;
2387 dd_clear_local_vsite_indices(dd);
2389 if (dd->constraints)
2391 dd_clear_local_constraint_indices(dd);
2395 /* This function should be used for moving the domain boudaries during DLB,
2396 * for obtaining the minimum cell size. It checks the initially set limit
2397 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2398 * and, possibly, a longer cut-off limit set for PME load balancing.
2400 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2404 cellsize_min = comm->cellsize_min[dim];
2406 if (!comm->bVacDLBNoLimit)
2408 /* The cut-off might have changed, e.g. by PME load balacning,
2409 * from the value used to set comm->cellsize_min, so check it.
2411 cellsize_min = std::max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2413 if (comm->bPMELoadBalDLBLimits)
2415 /* Check for the cut-off limit set by the PME load balancing */
2416 cellsize_min = std::max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2420 return cellsize_min;
2423 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2426 real grid_jump_limit;
2428 /* The distance between the boundaries of cells at distance
2429 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2430 * and by the fact that cells should not be shifted by more than
2431 * half their size, such that cg's only shift by one cell
2432 * at redecomposition.
2434 grid_jump_limit = comm->cellsize_limit;
2435 if (!comm->bVacDLBNoLimit)
2437 if (comm->bPMELoadBalDLBLimits)
2439 cutoff = std::max(cutoff, comm->PMELoadBal_max_cutoff);
2441 grid_jump_limit = std::max(grid_jump_limit,
2442 cutoff/comm->cd[dim_ind].np);
2445 return grid_jump_limit;
2448 static gmx_bool check_grid_jump(gmx_int64_t step,
2454 gmx_domdec_comm_t *comm;
2463 for (d = 1; d < dd->ndim; d++)
2466 limit = grid_jump_limit(comm, cutoff, d);
2467 bfac = ddbox->box_size[dim];
2468 if (ddbox->tric_dir[dim])
2470 bfac *= ddbox->skew_fac[dim];
2472 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2473 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2481 /* This error should never be triggered under normal
2482 * circumstances, but you never know ...
2484 gmx_fatal(FARGS, "step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
2485 gmx_step_str(step, buf),
2486 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2494 static int dd_load_count(gmx_domdec_comm_t *comm)
2496 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2499 static float dd_force_load(gmx_domdec_comm_t *comm)
2506 if (comm->eFlop > 1)
2508 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2513 load = comm->cycl[ddCyclF];
2514 if (comm->cycl_n[ddCyclF] > 1)
2516 /* Subtract the maximum of the last n cycle counts
2517 * to get rid of possible high counts due to other sources,
2518 * for instance system activity, that would otherwise
2519 * affect the dynamic load balancing.
2521 load -= comm->cycl_max[ddCyclF];
2525 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2527 float gpu_wait, gpu_wait_sum;
2529 gpu_wait = comm->cycl[ddCyclWaitGPU];
2530 if (comm->cycl_n[ddCyclF] > 1)
2532 /* We should remove the WaitGPU time of the same MD step
2533 * as the one with the maximum F time, since the F time
2534 * and the wait time are not independent.
2535 * Furthermore, the step for the max F time should be chosen
2536 * the same on all ranks that share the same GPU.
2537 * But to keep the code simple, we remove the average instead.
2538 * The main reason for artificially long times at some steps
2539 * is spurious CPU activity or MPI time, so we don't expect
2540 * that changes in the GPU wait time matter a lot here.
2542 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2544 /* Sum the wait times over the ranks that share the same GPU */
2545 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2546 comm->mpi_comm_gpu_shared);
2547 /* Replace the wait time by the average over the ranks */
2548 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2556 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2558 gmx_domdec_comm_t *comm;
2563 snew(*dim_f, dd->nc[dim]+1);
2565 for (i = 1; i < dd->nc[dim]; i++)
2567 if (comm->slb_frac[dim])
2569 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2573 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2576 (*dim_f)[dd->nc[dim]] = 1;
2579 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2581 int pmeindex, slab, nso, i;
2584 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2590 ddpme->dim = dimind;
2592 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2594 ddpme->nslab = (ddpme->dim == 0 ?
2595 dd->comm->npmenodes_x :
2596 dd->comm->npmenodes_y);
2598 if (ddpme->nslab <= 1)
2603 nso = dd->comm->npmenodes/ddpme->nslab;
2604 /* Determine for each PME slab the PP location range for dimension dim */
2605 snew(ddpme->pp_min, ddpme->nslab);
2606 snew(ddpme->pp_max, ddpme->nslab);
2607 for (slab = 0; slab < ddpme->nslab; slab++)
2609 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2610 ddpme->pp_max[slab] = 0;
2612 for (i = 0; i < dd->nnodes; i++)
2614 ddindex2xyz(dd->nc, i, xyz);
2615 /* For y only use our y/z slab.
2616 * This assumes that the PME x grid size matches the DD grid size.
2618 if (dimind == 0 || xyz[XX] == dd->ci[XX])
2620 pmeindex = ddindex2pmeindex(dd, i);
2623 slab = pmeindex/nso;
2627 slab = pmeindex % ddpme->nslab;
2629 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
2630 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
2634 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
2637 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
2639 if (dd->comm->ddpme[0].dim == XX)
2641 return dd->comm->ddpme[0].maxshift;
2649 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
2651 if (dd->comm->ddpme[0].dim == YY)
2653 return dd->comm->ddpme[0].maxshift;
2655 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
2657 return dd->comm->ddpme[1].maxshift;
2665 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
2666 gmx_bool bUniform, const gmx_ddbox_t *ddbox,
2669 gmx_domdec_comm_t *comm;
2672 real range, pme_boundary;
2676 nc = dd->nc[ddpme->dim];
2679 if (!ddpme->dim_match)
2681 /* PP decomposition is not along dim: the worst situation */
2684 else if (ns <= 3 || (bUniform && ns == nc))
2686 /* The optimal situation */
2691 /* We need to check for all pme nodes which nodes they
2692 * could possibly need to communicate with.
2694 xmin = ddpme->pp_min;
2695 xmax = ddpme->pp_max;
2696 /* Allow for atoms to be maximally 2/3 times the cut-off
2697 * out of their DD cell. This is a reasonable balance between
2698 * between performance and support for most charge-group/cut-off
2701 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
2702 /* Avoid extra communication when we are exactly at a boundary */
2706 for (s = 0; s < ns; s++)
2708 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
2709 pme_boundary = (real)s/ns;
2712 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
2714 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
2718 pme_boundary = (real)(s+1)/ns;
2721 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
2723 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
2730 ddpme->maxshift = sh;
2734 fprintf(debug, "PME slab communication range for dim %d is %d\n",
2735 ddpme->dim, ddpme->maxshift);
2739 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
2743 for (d = 0; d < dd->ndim; d++)
2746 if (dim < ddbox->nboundeddim &&
2747 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
2748 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
2750 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
2751 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
2752 dd->nc[dim], dd->comm->cellsize_limit);
2758 setcellsizeslbLOCAL, setcellsizeslbMASTER, setcellsizeslbPULSE_ONLY
2761 /* Set the domain boundaries. Use for static (or no) load balancing,
2762 * and also for the starting state for dynamic load balancing.
2763 * setmode determine if and where the boundaries are stored, use enum above.
2764 * Returns the number communication pulses in npulse.
2766 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, const gmx_ddbox_t *ddbox,
2767 int setmode, ivec npulse)
2769 gmx_domdec_comm_t *comm;
2772 real *cell_x, cell_dx, cellsize;
2776 for (d = 0; d < DIM; d++)
2778 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
2780 if (dd->nc[d] == 1 || comm->slb_frac[d] == nullptr)
2783 cell_dx = ddbox->box_size[d]/dd->nc[d];
2786 case setcellsizeslbMASTER:
2787 for (j = 0; j < dd->nc[d]+1; j++)
2789 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
2792 case setcellsizeslbLOCAL:
2793 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
2794 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
2799 cellsize = cell_dx*ddbox->skew_fac[d];
2800 while (cellsize*npulse[d] < comm->cutoff)
2804 cellsize_min[d] = cellsize;
2808 /* Statically load balanced grid */
2809 /* Also when we are not doing a master distribution we determine
2810 * all cell borders in a loop to obtain identical values
2811 * to the master distribution case and to determine npulse.
2813 if (setmode == setcellsizeslbMASTER)
2815 cell_x = dd->ma->cell_x[d];
2819 snew(cell_x, dd->nc[d]+1);
2821 cell_x[0] = ddbox->box0[d];
2822 for (j = 0; j < dd->nc[d]; j++)
2824 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
2825 cell_x[j+1] = cell_x[j] + cell_dx;
2826 cellsize = cell_dx*ddbox->skew_fac[d];
2827 while (cellsize*npulse[d] < comm->cutoff &&
2828 npulse[d] < dd->nc[d]-1)
2832 cellsize_min[d] = std::min(cellsize_min[d], cellsize);
2834 if (setmode == setcellsizeslbLOCAL)
2836 comm->cell_x0[d] = cell_x[dd->ci[d]];
2837 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
2839 if (setmode != setcellsizeslbMASTER)
2844 /* The following limitation is to avoid that a cell would receive
2845 * some of its own home charge groups back over the periodic boundary.
2846 * Double charge groups cause trouble with the global indices.
2848 if (d < ddbox->npbcdim &&
2849 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
2851 char error_string[STRLEN];
2853 sprintf(error_string,
2854 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
2855 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
2857 dd->nc[d], dd->nc[d],
2858 dd->nnodes > dd->nc[d] ? "cells" : "ranks");
2860 if (setmode == setcellsizeslbLOCAL)
2862 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2867 gmx_fatal(FARGS, error_string);
2874 copy_rvec(cellsize_min, comm->cellsize_min);
2877 for (d = 0; d < comm->npmedecompdim; d++)
2879 set_pme_maxshift(dd, &comm->ddpme[d],
2880 comm->slb_frac[dd->dim[d]] == nullptr, ddbox,
2881 comm->ddpme[d].slb_dim_f);
2886 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
2887 int d, int dim, domdec_root_t *root,
2888 const gmx_ddbox_t *ddbox,
2889 gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
2891 gmx_domdec_comm_t *comm;
2892 int ncd, i, j, nmin, nmin_old;
2893 gmx_bool bLimLo, bLimHi;
2895 real fac, halfway, cellsize_limit_f_i, region_size;
2896 gmx_bool bPBC, bLastHi = FALSE;
2897 int nrange[] = {range[0], range[1]};
2899 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
2905 bPBC = (dim < ddbox->npbcdim);
2907 cell_size = root->buf_ncd;
2911 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
2914 /* First we need to check if the scaling does not make cells
2915 * smaller than the smallest allowed size.
2916 * We need to do this iteratively, since if a cell is too small,
2917 * it needs to be enlarged, which makes all the other cells smaller,
2918 * which could in turn make another cell smaller than allowed.
2920 for (i = range[0]; i < range[1]; i++)
2922 root->bCellMin[i] = FALSE;
2928 /* We need the total for normalization */
2930 for (i = range[0]; i < range[1]; i++)
2932 if (root->bCellMin[i] == FALSE)
2934 fac += cell_size[i];
2937 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
2938 /* Determine the cell boundaries */
2939 for (i = range[0]; i < range[1]; i++)
2941 if (root->bCellMin[i] == FALSE)
2943 cell_size[i] *= fac;
2944 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
2946 cellsize_limit_f_i = 0;
2950 cellsize_limit_f_i = cellsize_limit_f;
2952 if (cell_size[i] < cellsize_limit_f_i)
2954 root->bCellMin[i] = TRUE;
2955 cell_size[i] = cellsize_limit_f_i;
2959 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
2962 while (nmin > nmin_old);
2965 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
2966 /* For this check we should not use DD_CELL_MARGIN,
2967 * but a slightly smaller factor,
2968 * since rounding could get use below the limit.
2970 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
2973 gmx_fatal(FARGS, "step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
2974 gmx_step_str(step, buf),
2975 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
2976 ncd, comm->cellsize_min[dim]);
2979 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
2983 /* Check if the boundary did not displace more than halfway
2984 * each of the cells it bounds, as this could cause problems,
2985 * especially when the differences between cell sizes are large.
2986 * If changes are applied, they will not make cells smaller
2987 * than the cut-off, as we check all the boundaries which
2988 * might be affected by a change and if the old state was ok,
2989 * the cells will at most be shrunk back to their old size.
2991 for (i = range[0]+1; i < range[1]; i++)
2993 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
2994 if (root->cell_f[i] < halfway)
2996 root->cell_f[i] = halfway;
2997 /* Check if the change also causes shifts of the next boundaries */
2998 for (j = i+1; j < range[1]; j++)
3000 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3002 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3006 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3007 if (root->cell_f[i] > halfway)
3009 root->cell_f[i] = halfway;
3010 /* Check if the change also causes shifts of the next boundaries */
3011 for (j = i-1; j >= range[0]+1; j--)
3013 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3015 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3022 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3023 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3024 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3025 * for a and b nrange is used */
3028 /* Take care of the staggering of the cell boundaries */
3031 for (i = range[0]; i < range[1]; i++)
3033 root->cell_f_max0[i] = root->cell_f[i];
3034 root->cell_f_min1[i] = root->cell_f[i+1];
3039 for (i = range[0]+1; i < range[1]; i++)
3041 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3042 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3043 if (bLimLo && bLimHi)
3045 /* Both limits violated, try the best we can */
3046 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3047 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3048 nrange[0] = range[0];
3050 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3053 nrange[1] = range[1];
3054 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3060 /* root->cell_f[i] = root->bound_min[i]; */
3061 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3064 else if (bLimHi && !bLastHi)
3067 if (nrange[1] < range[1]) /* found a LimLo before */
3069 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3070 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3071 nrange[0] = nrange[1];
3073 root->cell_f[i] = root->bound_max[i];
3075 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3077 nrange[1] = range[1];
3080 if (nrange[1] < range[1]) /* found last a LimLo */
3082 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3083 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3084 nrange[0] = nrange[1];
3085 nrange[1] = range[1];
3086 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3088 else if (nrange[0] > range[0]) /* found at least one LimHi */
3090 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3097 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3098 int d, int dim, domdec_root_t *root,
3099 const gmx_ddbox_t *ddbox,
3100 gmx_bool bDynamicBox,
3101 gmx_bool bUniform, gmx_int64_t step)
3103 gmx_domdec_comm_t *comm;
3104 int ncd, d1, i, pos;
3106 real load_aver, load_i, imbalance, change, change_max, sc;
3107 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3111 int range[] = { 0, 0 };
3115 /* Convert the maximum change from the input percentage to a fraction */
3116 change_limit = comm->dlb_scale_lim*0.01;
3120 bPBC = (dim < ddbox->npbcdim);
3122 cell_size = root->buf_ncd;
3124 /* Store the original boundaries */
3125 for (i = 0; i < ncd+1; i++)
3127 root->old_cell_f[i] = root->cell_f[i];
3131 for (i = 0; i < ncd; i++)
3133 cell_size[i] = 1.0/ncd;
3136 else if (dd_load_count(comm) > 0)
3138 load_aver = comm->load[d].sum_m/ncd;
3140 for (i = 0; i < ncd; i++)
3142 /* Determine the relative imbalance of cell i */
3143 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3144 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3145 /* Determine the change of the cell size using underrelaxation */
3146 change = -relax*imbalance;
3147 change_max = std::max(change_max, std::max(change, -change));
3149 /* Limit the amount of scaling.
3150 * We need to use the same rescaling for all cells in one row,
3151 * otherwise the load balancing might not converge.
3154 if (change_max > change_limit)
3156 sc *= change_limit/change_max;
3158 for (i = 0; i < ncd; i++)
3160 /* Determine the relative imbalance of cell i */
3161 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3162 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3163 /* Determine the change of the cell size using underrelaxation */
3164 change = -sc*imbalance;
3165 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3169 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3170 cellsize_limit_f *= DD_CELL_MARGIN;
3171 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3172 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3173 if (ddbox->tric_dir[dim])
3175 cellsize_limit_f /= ddbox->skew_fac[dim];
3176 dist_min_f /= ddbox->skew_fac[dim];
3178 if (bDynamicBox && d > 0)
3180 dist_min_f *= DD_PRES_SCALE_MARGIN;
3182 if (d > 0 && !bUniform)
3184 /* Make sure that the grid is not shifted too much */
3185 for (i = 1; i < ncd; i++)
3187 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3189 gmx_incons("Inconsistent DD boundary staggering limits!");
3191 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3192 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3195 root->bound_min[i] += 0.5*space;
3197 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3198 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3201 root->bound_max[i] += 0.5*space;
3206 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3208 root->cell_f_max0[i-1] + dist_min_f,
3209 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3210 root->cell_f_min1[i] - dist_min_f);
3215 root->cell_f[0] = 0;
3216 root->cell_f[ncd] = 1;
3217 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3220 /* After the checks above, the cells should obey the cut-off
3221 * restrictions, but it does not hurt to check.
3223 for (i = 0; i < ncd; i++)
3227 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3228 dim, i, root->cell_f[i], root->cell_f[i+1]);
3231 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3232 root->cell_f[i+1] - root->cell_f[i] <
3233 cellsize_limit_f/DD_CELL_MARGIN)
3237 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3238 gmx_step_str(step, buf), dim2char(dim), i,
3239 (root->cell_f[i+1] - root->cell_f[i])
3240 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3245 /* Store the cell boundaries of the lower dimensions at the end */
3246 for (d1 = 0; d1 < d; d1++)
3248 root->cell_f[pos++] = comm->cell_f0[d1];
3249 root->cell_f[pos++] = comm->cell_f1[d1];
3252 if (d < comm->npmedecompdim)
3254 /* The master determines the maximum shift for
3255 * the coordinate communication between separate PME nodes.
3257 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3259 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3262 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3266 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3267 const gmx_ddbox_t *ddbox,
3270 gmx_domdec_comm_t *comm;
3275 /* Set the cell dimensions */
3276 dim = dd->dim[dimind];
3277 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3278 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3279 if (dim >= ddbox->nboundeddim)
3281 comm->cell_x0[dim] += ddbox->box0[dim];
3282 comm->cell_x1[dim] += ddbox->box0[dim];
3286 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3287 int d, int dim, real *cell_f_row,
3288 const gmx_ddbox_t *ddbox)
3290 gmx_domdec_comm_t *comm;
3296 /* Each node would only need to know two fractions,
3297 * but it is probably cheaper to broadcast the whole array.
3299 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3300 0, comm->mpi_comm_load[d]);
3302 /* Copy the fractions for this dimension from the buffer */
3303 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3304 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3305 /* The whole array was communicated, so set the buffer position */
3306 pos = dd->nc[dim] + 1;
3307 for (d1 = 0; d1 <= d; d1++)
3311 /* Copy the cell fractions of the lower dimensions */
3312 comm->cell_f0[d1] = cell_f_row[pos++];
3313 comm->cell_f1[d1] = cell_f_row[pos++];
3315 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3317 /* Convert the communicated shift from float to int */
3318 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3321 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3325 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3326 const gmx_ddbox_t *ddbox,
3327 gmx_bool bDynamicBox,
3328 gmx_bool bUniform, gmx_int64_t step)
3330 gmx_domdec_comm_t *comm;
3332 gmx_bool bRowMember, bRowRoot;
3337 for (d = 0; d < dd->ndim; d++)
3342 for (d1 = d; d1 < dd->ndim; d1++)
3344 if (dd->ci[dd->dim[d1]] > 0)
3357 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3358 ddbox, bDynamicBox, bUniform, step);
3359 cell_f_row = comm->root[d]->cell_f;
3363 cell_f_row = comm->cell_f_row;
3365 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3370 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd,
3371 const gmx_ddbox_t *ddbox)
3375 /* This function assumes the box is static and should therefore
3376 * not be called when the box has changed since the last
3377 * call to dd_partition_system.
3379 for (d = 0; d < dd->ndim; d++)
3381 relative_to_absolute_cell_bounds(dd, ddbox, d);
3387 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3388 const gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3389 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3390 gmx_wallcycle_t wcycle)
3392 gmx_domdec_comm_t *comm;
3399 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3400 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3401 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3403 else if (bDynamicBox)
3405 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3408 /* Set the dimensions for which no DD is used */
3409 for (dim = 0; dim < DIM; dim++)
3411 if (dd->nc[dim] == 1)
3413 comm->cell_x0[dim] = 0;
3414 comm->cell_x1[dim] = ddbox->box_size[dim];
3415 if (dim >= ddbox->nboundeddim)
3417 comm->cell_x0[dim] += ddbox->box0[dim];
3418 comm->cell_x1[dim] += ddbox->box0[dim];
3424 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3427 gmx_domdec_comm_dim_t *cd;
3429 for (d = 0; d < dd->ndim; d++)
3431 cd = &dd->comm->cd[d];
3432 np = npulse[dd->dim[d]];
3433 if (np > cd->np_nalloc)
3437 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3438 dim2char(dd->dim[d]), np);
3440 if (DDMASTER(dd) && cd->np_nalloc > 0)
3442 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3444 srenew(cd->ind, np);
3445 for (i = cd->np_nalloc; i < np; i++)
3447 cd->ind[i].index = nullptr;
3448 cd->ind[i].nalloc = 0;
3457 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3458 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3459 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3460 gmx_wallcycle_t wcycle)
3462 gmx_domdec_comm_t *comm;
3468 /* Copy the old cell boundaries for the cg displacement check */
3469 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3470 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3476 check_box_size(dd, ddbox);
3478 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3482 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbLOCAL, npulse);
3483 realloc_comm_ind(dd, npulse);
3488 for (d = 0; d < DIM; d++)
3490 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3491 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3496 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3498 rvec cell_ns_x0, rvec cell_ns_x1,
3501 gmx_domdec_comm_t *comm;
3506 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3508 dim = dd->dim[dim_ind];
3510 /* Without PBC we don't have restrictions on the outer cells */
3511 if (!(dim >= ddbox->npbcdim &&
3512 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3514 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3515 comm->cellsize_min[dim])
3518 gmx_fatal(FARGS, "step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3519 gmx_step_str(step, buf), dim2char(dim),
3520 comm->cell_x1[dim] - comm->cell_x0[dim],
3521 ddbox->skew_fac[dim],
3522 dd->comm->cellsize_min[dim],
3523 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3527 if ((isDlbOn(dd->comm) && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3529 /* Communicate the boundaries and update cell_ns_x0/1 */
3530 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3531 if (isDlbOn(dd->comm) && dd->ndim > 1)
3533 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3538 static void make_tric_corr_matrix(int npbcdim, const matrix box, matrix tcm)
3542 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3550 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3551 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3560 static void check_screw_box(const matrix box)
3562 /* Mathematical limitation */
3563 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3565 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3568 /* Limitation due to the asymmetry of the eighth shell method */
3569 if (box[ZZ][YY] != 0)
3571 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3575 static void distribute_cg(FILE *fplog,
3576 const matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3579 gmx_domdec_master_t *ma;
3580 int **tmp_ind = nullptr, *tmp_nalloc = nullptr;
3581 int i, icg, j, k, k0, k1, d;
3585 real nrcg, inv_ncg, pos_d;
3591 snew(tmp_nalloc, dd->nnodes);
3592 snew(tmp_ind, dd->nnodes);
3593 for (i = 0; i < dd->nnodes; i++)
3595 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3596 snew(tmp_ind[i], tmp_nalloc[i]);
3599 /* Clear the count */
3600 for (i = 0; i < dd->nnodes; i++)
3606 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3608 cgindex = cgs->index;
3610 /* Compute the center of geometry for all charge groups */
3611 for (icg = 0; icg < cgs->nr; icg++)
3614 k1 = cgindex[icg+1];
3618 copy_rvec(pos[k0], cg_cm);
3625 for (k = k0; (k < k1); k++)
3627 rvec_inc(cg_cm, pos[k]);
3629 for (d = 0; (d < DIM); d++)
3631 cg_cm[d] *= inv_ncg;
3634 /* Put the charge group in the box and determine the cell index */
3635 for (d = DIM-1; d >= 0; d--)
3638 if (d < dd->npbcdim)
3640 bScrew = (dd->bScrewPBC && d == XX);
3641 if (tric_dir[d] && dd->nc[d] > 1)
3643 /* Use triclinic coordintates for this dimension */
3644 for (j = d+1; j < DIM; j++)
3646 pos_d += cg_cm[j]*tcm[j][d];
3649 while (pos_d >= box[d][d])
3652 rvec_dec(cg_cm, box[d]);
3655 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3656 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3658 for (k = k0; (k < k1); k++)
3660 rvec_dec(pos[k], box[d]);
3663 pos[k][YY] = box[YY][YY] - pos[k][YY];
3664 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3671 rvec_inc(cg_cm, box[d]);
3674 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3675 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3677 for (k = k0; (k < k1); k++)
3679 rvec_inc(pos[k], box[d]);
3682 pos[k][YY] = box[YY][YY] - pos[k][YY];
3683 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3688 /* This could be done more efficiently */
3690 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
3695 i = dd_index(dd->nc, ind);
3696 if (ma->ncg[i] == tmp_nalloc[i])
3698 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
3699 srenew(tmp_ind[i], tmp_nalloc[i]);
3701 tmp_ind[i][ma->ncg[i]] = icg;
3703 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
3707 for (i = 0; i < dd->nnodes; i++)
3710 for (k = 0; k < ma->ncg[i]; k++)
3712 ma->cg[k1++] = tmp_ind[i][k];
3715 ma->index[dd->nnodes] = k1;
3717 for (i = 0; i < dd->nnodes; i++)
3726 // Use double for the sums to avoid natoms^2 overflowing
3728 int nat_sum, nat_min, nat_max;
3733 nat_min = ma->nat[0];
3734 nat_max = ma->nat[0];
3735 for (i = 0; i < dd->nnodes; i++)
3737 nat_sum += ma->nat[i];
3738 // cast to double to avoid integer overflows when squaring
3739 nat2_sum += gmx::square(static_cast<double>(ma->nat[i]));
3740 nat_min = std::min(nat_min, ma->nat[i]);
3741 nat_max = std::max(nat_max, ma->nat[i]);
3743 nat_sum /= dd->nnodes;
3744 nat2_sum /= dd->nnodes;
3746 fprintf(fplog, "Atom distribution over %d domains: av %d stddev %d min %d max %d\n",
3749 static_cast<int>(std::sqrt(nat2_sum - gmx::square(static_cast<double>(nat_sum)) + 0.5)),
3754 static void get_cg_distribution(FILE *fplog, gmx_domdec_t *dd,
3755 t_block *cgs, const matrix box, gmx_ddbox_t *ddbox,
3758 gmx_domdec_master_t *ma = nullptr;
3761 int *ibuf, buf2[2] = { 0, 0 };
3762 gmx_bool bMaster = DDMASTER(dd);
3770 check_screw_box(box);
3773 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbMASTER, npulse);
3775 distribute_cg(fplog, box, ddbox->tric_dir, cgs, pos, dd);
3776 for (i = 0; i < dd->nnodes; i++)
3778 ma->ibuf[2*i] = ma->ncg[i];
3779 ma->ibuf[2*i+1] = ma->nat[i];
3787 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
3789 dd->ncg_home = buf2[0];
3790 dd->nat_home = buf2[1];
3791 dd->ncg_tot = dd->ncg_home;
3792 dd->nat_tot = dd->nat_home;
3793 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
3795 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
3796 srenew(dd->index_gl, dd->cg_nalloc);
3797 srenew(dd->cgindex, dd->cg_nalloc+1);
3801 for (i = 0; i < dd->nnodes; i++)
3803 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
3804 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
3809 bMaster ? ma->ibuf : nullptr,
3810 bMaster ? ma->ibuf+dd->nnodes : nullptr,
3811 bMaster ? ma->cg : nullptr,
3812 dd->ncg_home*sizeof(int), dd->index_gl);
3814 /* Determine the home charge group sizes */
3816 for (i = 0; i < dd->ncg_home; i++)
3818 cg_gl = dd->index_gl[i];
3820 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
3825 fprintf(debug, "Home charge groups:\n");
3826 for (i = 0; i < dd->ncg_home; i++)
3828 fprintf(debug, " %d", dd->index_gl[i]);
3831 fprintf(debug, "\n");
3834 fprintf(debug, "\n");
3838 static int compact_and_copy_vec_at(int ncg, int *move,
3841 rvec *src, gmx_domdec_comm_t *comm,
3844 int m, icg, i, i0, i1, nrcg;
3850 for (m = 0; m < DIM*2; m++)
3856 for (icg = 0; icg < ncg; icg++)
3858 i1 = cgindex[icg+1];
3864 /* Compact the home array in place */
3865 for (i = i0; i < i1; i++)
3867 copy_rvec(src[i], src[home_pos++]);
3873 /* Copy to the communication buffer */
3875 pos_vec[m] += 1 + vec*nrcg;
3876 for (i = i0; i < i1; i++)
3878 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
3880 pos_vec[m] += (nvec - vec - 1)*nrcg;
3884 home_pos += i1 - i0;
3892 static int compact_and_copy_vec_cg(int ncg, int *move,
3894 int nvec, rvec *src, gmx_domdec_comm_t *comm,
3897 int m, icg, i0, i1, nrcg;
3903 for (m = 0; m < DIM*2; m++)
3909 for (icg = 0; icg < ncg; icg++)
3911 i1 = cgindex[icg+1];
3917 /* Compact the home array in place */
3918 copy_rvec(src[icg], src[home_pos++]);
3924 /* Copy to the communication buffer */
3925 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
3926 pos_vec[m] += 1 + nrcg*nvec;
3938 static int compact_ind(int ncg, int *move,
3939 int *index_gl, int *cgindex,
3941 gmx_ga2la_t *ga2la, char *bLocalCG,
3944 int cg, nat, a0, a1, a, a_gl;
3949 for (cg = 0; cg < ncg; cg++)
3955 /* Compact the home arrays in place.
3956 * Anything that can be done here avoids access to global arrays.
3958 cgindex[home_pos] = nat;
3959 for (a = a0; a < a1; a++)
3962 gatindex[nat] = a_gl;
3963 /* The cell number stays 0, so we don't need to set it */
3964 ga2la_change_la(ga2la, a_gl, nat);
3967 index_gl[home_pos] = index_gl[cg];
3968 cginfo[home_pos] = cginfo[cg];
3969 /* The charge group remains local, so bLocalCG does not change */
3974 /* Clear the global indices */
3975 for (a = a0; a < a1; a++)
3977 ga2la_del(ga2la, gatindex[a]);
3981 bLocalCG[index_gl[cg]] = FALSE;
3985 cgindex[home_pos] = nat;
3990 static void clear_and_mark_ind(int ncg, int *move,
3991 int *index_gl, int *cgindex, int *gatindex,
3992 gmx_ga2la_t *ga2la, char *bLocalCG,
3997 for (cg = 0; cg < ncg; cg++)
4003 /* Clear the global indices */
4004 for (a = a0; a < a1; a++)
4006 ga2la_del(ga2la, gatindex[a]);
4010 bLocalCG[index_gl[cg]] = FALSE;
4012 /* Signal that this cg has moved using the ns cell index.
4013 * Here we set it to -1. fill_grid will change it
4014 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4016 cell_index[cg] = -1;
4021 static void print_cg_move(FILE *fplog,
4023 gmx_int64_t step, int cg, int dim, int dir,
4024 gmx_bool bHaveCgcmOld, real limitd,
4025 rvec cm_old, rvec cm_new, real pos_d)
4027 gmx_domdec_comm_t *comm;
4032 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4035 fprintf(fplog, "%s %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4036 dd->comm->bCGs ? "The charge group starting at atom" : "Atom",
4037 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4041 /* We don't have a limiting distance available: don't print it */
4042 fprintf(fplog, "%s %d moved more than the distance allowed by the domain decomposition in direction %c\n",
4043 dd->comm->bCGs ? "The charge group starting at atom" : "Atom",
4044 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4046 fprintf(fplog, "distance out of cell %f\n",
4047 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4050 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4051 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4053 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4054 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4055 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4057 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4058 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4060 comm->cell_x0[dim], comm->cell_x1[dim]);
4063 static void cg_move_error(FILE *fplog,
4065 gmx_int64_t step, int cg, int dim, int dir,
4066 gmx_bool bHaveCgcmOld, real limitd,
4067 rvec cm_old, rvec cm_new, real pos_d)
4071 print_cg_move(fplog, dd, step, cg, dim, dir,
4072 bHaveCgcmOld, limitd, cm_old, cm_new, pos_d);
4074 print_cg_move(stderr, dd, step, cg, dim, dir,
4075 bHaveCgcmOld, limitd, cm_old, cm_new, pos_d);
4077 "%s moved too far between two domain decomposition steps\n"
4078 "This usually means that your system is not well equilibrated",
4079 dd->comm->bCGs ? "A charge group" : "An atom");
4082 static void rotate_state_atom(t_state *state, int a)
4084 if (state->flags & (1 << estX))
4086 /* Rotate the complete state; for a rectangular box only */
4087 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4088 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4090 if (state->flags & (1 << estV))
4092 state->v[a][YY] = -state->v[a][YY];
4093 state->v[a][ZZ] = -state->v[a][ZZ];
4095 if (state->flags & (1 << estCGP))
4097 state->cg_p[a][YY] = -state->cg_p[a][YY];
4098 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4102 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4104 if (natoms > comm->moved_nalloc)
4106 /* Contents should be preserved here */
4107 comm->moved_nalloc = over_alloc_dd(natoms);
4108 srenew(comm->moved, comm->moved_nalloc);
4114 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
4117 ivec tric_dir, matrix tcm,
4118 rvec cell_x0, rvec cell_x1,
4119 rvec limitd, rvec limit0, rvec limit1,
4121 int cg_start, int cg_end,
4126 int cg, k, k0, k1, d, dim, d2;
4131 real inv_ncg, pos_d;
4134 npbcdim = dd->npbcdim;
4136 for (cg = cg_start; cg < cg_end; cg++)
4143 copy_rvec(state->x[k0], cm_new);
4150 for (k = k0; (k < k1); k++)
4152 rvec_inc(cm_new, state->x[k]);
4154 for (d = 0; (d < DIM); d++)
4156 cm_new[d] = inv_ncg*cm_new[d];
4161 /* Do pbc and check DD cell boundary crossings */
4162 for (d = DIM-1; d >= 0; d--)
4166 bScrew = (dd->bScrewPBC && d == XX);
4167 /* Determine the location of this cg in lattice coordinates */
4171 for (d2 = d+1; d2 < DIM; d2++)
4173 pos_d += cm_new[d2]*tcm[d2][d];
4176 /* Put the charge group in the triclinic unit-cell */
4177 if (pos_d >= cell_x1[d])
4179 if (pos_d >= limit1[d])
4181 cg_move_error(fplog, dd, step, cg, d, 1,
4182 cg_cm != as_rvec_array(state->x.data()), limitd[d],
4183 cg_cm[cg], cm_new, pos_d);
4186 if (dd->ci[d] == dd->nc[d] - 1)
4188 rvec_dec(cm_new, state->box[d]);
4191 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4192 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4194 for (k = k0; (k < k1); k++)
4196 rvec_dec(state->x[k], state->box[d]);
4199 rotate_state_atom(state, k);
4204 else if (pos_d < cell_x0[d])
4206 if (pos_d < limit0[d])
4208 cg_move_error(fplog, dd, step, cg, d, -1,
4209 cg_cm != as_rvec_array(state->x.data()), limitd[d],
4210 cg_cm[cg], cm_new, pos_d);
4215 rvec_inc(cm_new, state->box[d]);
4218 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4219 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4221 for (k = k0; (k < k1); k++)
4223 rvec_inc(state->x[k], state->box[d]);
4226 rotate_state_atom(state, k);
4232 else if (d < npbcdim)
4234 /* Put the charge group in the rectangular unit-cell */
4235 while (cm_new[d] >= state->box[d][d])
4237 rvec_dec(cm_new, state->box[d]);
4238 for (k = k0; (k < k1); k++)
4240 rvec_dec(state->x[k], state->box[d]);
4243 while (cm_new[d] < 0)
4245 rvec_inc(cm_new, state->box[d]);
4246 for (k = k0; (k < k1); k++)
4248 rvec_inc(state->x[k], state->box[d]);
4254 copy_rvec(cm_new, cg_cm[cg]);
4256 /* Determine where this cg should go */
4259 for (d = 0; d < dd->ndim; d++)
4264 flag |= DD_FLAG_FW(d);
4270 else if (dev[dim] == -1)
4272 flag |= DD_FLAG_BW(d);
4275 if (dd->nc[dim] > 2)
4286 /* Temporarily store the flag in move */
4287 move[cg] = mc + flag;
4291 static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
4292 gmx_domdec_t *dd, ivec tric_dir,
4293 t_state *state, PaddedRVecVector *f,
4302 int ncg[DIM*2] = { 0 }, nat[DIM*2] = { 0 };
4303 int i, cg, k, d, dim, dim2, dir, d2, d3;
4304 int mc, cdd, nrcg, ncg_recv, nvs, nvr, nvec, vec;
4305 int sbuf[2], rbuf[2];
4306 int home_pos_cg, home_pos_at, buf_pos;
4310 rvec *cg_cm = nullptr, cell_x0, cell_x1, limitd, limit0, limit1;
4312 cginfo_mb_t *cginfo_mb;
4313 gmx_domdec_comm_t *comm;
4315 int nthread, thread;
4319 check_screw_box(state->box);
4323 if (fr->cutoff_scheme == ecutsGROUP)
4328 // Positions are always present, so there's nothing to flag
4329 bool bV = state->flags & (1<<estV);
4330 bool bCGP = state->flags & (1<<estCGP);
4332 if (dd->ncg_tot > comm->nalloc_int)
4334 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4335 srenew(comm->buf_int, comm->nalloc_int);
4337 move = comm->buf_int;
4339 npbcdim = dd->npbcdim;
4341 for (d = 0; (d < DIM); d++)
4343 limitd[d] = dd->comm->cellsize_min[d];
4344 if (d >= npbcdim && dd->ci[d] == 0)
4346 cell_x0[d] = -GMX_FLOAT_MAX;
4350 cell_x0[d] = comm->cell_x0[d];
4352 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4354 cell_x1[d] = GMX_FLOAT_MAX;
4358 cell_x1[d] = comm->cell_x1[d];
4362 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4363 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4367 /* We check after communication if a charge group moved
4368 * more than one cell. Set the pre-comm check limit to float_max.
4370 limit0[d] = -GMX_FLOAT_MAX;
4371 limit1[d] = GMX_FLOAT_MAX;
4375 make_tric_corr_matrix(npbcdim, state->box, tcm);
4377 cgindex = dd->cgindex;
4379 nthread = gmx_omp_nthreads_get(emntDomdec);
4381 /* Compute the center of geometry for all home charge groups
4382 * and put them in the box and determine where they should go.
4384 #pragma omp parallel for num_threads(nthread) schedule(static)
4385 for (thread = 0; thread < nthread; thread++)
4389 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4390 cell_x0, cell_x1, limitd, limit0, limit1,
4392 ( thread *dd->ncg_home)/nthread,
4393 ((thread+1)*dd->ncg_home)/nthread,
4394 fr->cutoff_scheme == ecutsGROUP ? cg_cm : as_rvec_array(state->x.data()),
4397 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
4400 for (cg = 0; cg < dd->ncg_home; cg++)
4405 flag = mc & ~DD_FLAG_NRCG;
4406 mc = mc & DD_FLAG_NRCG;
4409 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4411 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4412 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4414 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4415 /* We store the cg size in the lower 16 bits
4416 * and the place where the charge group should go
4417 * in the next 6 bits. This saves some communication volume.
4419 nrcg = cgindex[cg+1] - cgindex[cg];
4420 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4426 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4427 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4430 for (i = 0; i < dd->ndim*2; i++)
4432 *ncg_moved += ncg[i];
4445 /* Make sure the communication buffers are large enough */
4446 for (mc = 0; mc < dd->ndim*2; mc++)
4448 nvr = ncg[mc] + nat[mc]*nvec;
4449 if (nvr > comm->cgcm_state_nalloc[mc])
4451 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4452 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4456 switch (fr->cutoff_scheme)
4459 /* Recalculating cg_cm might be cheaper than communicating,
4460 * but that could give rise to rounding issues.
4463 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4464 nvec, cg_cm, comm, bCompact);
4467 /* Without charge groups we send the moved atom coordinates
4468 * over twice. This is so the code below can be used without
4469 * many conditionals for both for with and without charge groups.
4472 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4473 nvec, as_rvec_array(state->x.data()), comm, FALSE);
4476 home_pos_cg -= *ncg_moved;
4480 gmx_incons("unimplemented");
4486 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4487 nvec, vec++, as_rvec_array(state->x.data()),
4491 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4492 nvec, vec++, as_rvec_array(state->v.data()),
4497 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4498 nvec, vec++, as_rvec_array(state->cg_p.data()),
4504 compact_ind(dd->ncg_home, move,
4505 dd->index_gl, dd->cgindex, dd->gatindex,
4506 dd->ga2la, comm->bLocalCG,
4511 if (fr->cutoff_scheme == ecutsVERLET)
4513 moved = get_moved(comm, dd->ncg_home);
4515 for (k = 0; k < dd->ncg_home; k++)
4522 moved = fr->ns->grid->cell_index;
4525 clear_and_mark_ind(dd->ncg_home, move,
4526 dd->index_gl, dd->cgindex, dd->gatindex,
4527 dd->ga2la, comm->bLocalCG,
4531 cginfo_mb = fr->cginfo_mb;
4533 *ncg_stay_home = home_pos_cg;
4534 for (d = 0; d < dd->ndim; d++)
4539 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4542 /* Communicate the cg and atom counts */
4547 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4548 d, dir, sbuf[0], sbuf[1]);
4550 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4552 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4554 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4555 srenew(comm->buf_int, comm->nalloc_int);
4558 /* Communicate the charge group indices, sizes and flags */
4559 dd_sendrecv_int(dd, d, dir,
4560 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4561 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4563 nvs = ncg[cdd] + nat[cdd]*nvec;
4564 i = rbuf[0] + rbuf[1] *nvec;
4565 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4567 /* Communicate cgcm and state */
4568 dd_sendrecv_rvec(dd, d, dir,
4569 comm->cgcm_state[cdd], nvs,
4570 comm->vbuf.v+nvr, i);
4571 ncg_recv += rbuf[0];
4575 dd_check_alloc_ncg(fr, state, f, home_pos_cg + ncg_recv);
4576 if (fr->cutoff_scheme == ecutsGROUP)
4578 /* Here we resize to more than necessary and shrink later */
4579 dd_resize_state(state, f, home_pos_at + ncg_recv*MAX_CGCGSIZE);
4582 /* Process the received charge groups */
4584 for (cg = 0; cg < ncg_recv; cg++)
4586 flag = comm->buf_int[cg*DD_CGIBS+1];
4588 if (dim >= npbcdim && dd->nc[dim] > 2)
4590 /* No pbc in this dim and more than one domain boundary.
4591 * We do a separate check if a charge group didn't move too far.
4593 if (((flag & DD_FLAG_FW(d)) &&
4594 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4595 ((flag & DD_FLAG_BW(d)) &&
4596 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4598 cg_move_error(fplog, dd, step, cg, dim,
4599 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4600 fr->cutoff_scheme == ecutsGROUP, 0,
4601 comm->vbuf.v[buf_pos],
4602 comm->vbuf.v[buf_pos],
4603 comm->vbuf.v[buf_pos][dim]);
4610 /* Check which direction this cg should go */
4611 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4613 if (isDlbOn(dd->comm))
4615 /* The cell boundaries for dimension d2 are not equal
4616 * for each cell row of the lower dimension(s),
4617 * therefore we might need to redetermine where
4618 * this cg should go.
4621 /* If this cg crosses the box boundary in dimension d2
4622 * we can use the communicated flag, so we do not
4623 * have to worry about pbc.
4625 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4626 (flag & DD_FLAG_FW(d2))) ||
4627 (dd->ci[dim2] == 0 &&
4628 (flag & DD_FLAG_BW(d2)))))
4630 /* Clear the two flags for this dimension */
4631 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4632 /* Determine the location of this cg
4633 * in lattice coordinates
4635 pos_d = comm->vbuf.v[buf_pos][dim2];
4638 for (d3 = dim2+1; d3 < DIM; d3++)
4641 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
4644 /* Check of we are not at the box edge.
4645 * pbc is only handled in the first step above,
4646 * but this check could move over pbc while
4647 * the first step did not due to different rounding.
4649 if (pos_d >= cell_x1[dim2] &&
4650 dd->ci[dim2] != dd->nc[dim2]-1)
4652 flag |= DD_FLAG_FW(d2);
4654 else if (pos_d < cell_x0[dim2] &&
4657 flag |= DD_FLAG_BW(d2);
4659 comm->buf_int[cg*DD_CGIBS+1] = flag;
4662 /* Set to which neighboring cell this cg should go */
4663 if (flag & DD_FLAG_FW(d2))
4667 else if (flag & DD_FLAG_BW(d2))
4669 if (dd->nc[dd->dim[d2]] > 2)
4681 nrcg = flag & DD_FLAG_NRCG;
4684 if (home_pos_cg+1 > dd->cg_nalloc)
4686 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
4687 srenew(dd->index_gl, dd->cg_nalloc);
4688 srenew(dd->cgindex, dd->cg_nalloc+1);
4690 /* Set the global charge group index and size */
4691 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
4692 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
4693 /* Copy the state from the buffer */
4694 if (fr->cutoff_scheme == ecutsGROUP)
4697 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
4701 /* Set the cginfo */
4702 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
4703 dd->index_gl[home_pos_cg]);
4706 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
4709 for (i = 0; i < nrcg; i++)
4711 copy_rvec(comm->vbuf.v[buf_pos++],
4712 state->x[home_pos_at+i]);
4716 for (i = 0; i < nrcg; i++)
4718 copy_rvec(comm->vbuf.v[buf_pos++],
4719 state->v[home_pos_at+i]);
4724 for (i = 0; i < nrcg; i++)
4726 copy_rvec(comm->vbuf.v[buf_pos++],
4727 state->cg_p[home_pos_at+i]);
4731 home_pos_at += nrcg;
4735 /* Reallocate the buffers if necessary */
4736 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4738 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4739 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4741 nvr = ncg[mc] + nat[mc]*nvec;
4742 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
4744 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
4745 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4747 /* Copy from the receive to the send buffers */
4748 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
4749 comm->buf_int + cg*DD_CGIBS,
4750 DD_CGIBS*sizeof(int));
4751 memcpy(comm->cgcm_state[mc][nvr],
4752 comm->vbuf.v[buf_pos],
4753 (1+nrcg*nvec)*sizeof(rvec));
4754 buf_pos += 1 + nrcg*nvec;
4761 /* With sorting (!bCompact) the indices are now only partially up to date
4762 * and ncg_home and nat_home are not the real count, since there are
4763 * "holes" in the arrays for the charge groups that moved to neighbors.
4765 if (fr->cutoff_scheme == ecutsVERLET)
4767 moved = get_moved(comm, home_pos_cg);
4769 for (i = dd->ncg_home; i < home_pos_cg; i++)
4774 dd->ncg_home = home_pos_cg;
4775 dd->nat_home = home_pos_at;
4777 if (fr->cutoff_scheme == ecutsGROUP && !bCompact)
4779 /* We overallocated before, we need to set the right size here */
4780 dd_resize_state(state, f, dd->nat_home);
4786 "Finished repartitioning: cgs moved out %d, new home %d\n",
4787 *ncg_moved, dd->ncg_home-*ncg_moved);
4792 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
4794 /* Note that the cycles value can be incorrect, either 0 or some
4795 * extremely large value, when our thread migrated to another core
4796 * with an unsynchronized cycle counter. If this happens less often
4797 * that once per nstlist steps, this will not cause issues, since
4798 * we later subtract the maximum value from the sum over nstlist steps.
4799 * A zero count will slightly lower the total, but that's a small effect.
4800 * Note that the main purpose of the subtraction of the maximum value
4801 * is to avoid throwing off the load balancing when stalls occur due
4802 * e.g. system activity or network congestion.
4804 dd->comm->cycl[ddCycl] += cycles;
4805 dd->comm->cycl_n[ddCycl]++;
4806 if (cycles > dd->comm->cycl_max[ddCycl])
4808 dd->comm->cycl_max[ddCycl] = cycles;
4812 static double force_flop_count(t_nrnb *nrnb)
4819 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
4821 /* To get closer to the real timings, we half the count
4822 * for the normal loops and again half it for water loops.
4825 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
4827 sum += nrnb->n[i]*0.25*cost_nrnb(i);
4831 sum += nrnb->n[i]*0.50*cost_nrnb(i);
4834 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
4837 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
4839 sum += nrnb->n[i]*cost_nrnb(i);
4842 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
4844 sum += nrnb->n[i]*cost_nrnb(i);
4850 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
4852 if (dd->comm->eFlop)
4854 dd->comm->flop -= force_flop_count(nrnb);
4857 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
4859 if (dd->comm->eFlop)
4861 dd->comm->flop += force_flop_count(nrnb);
4866 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
4870 for (i = 0; i < ddCyclNr; i++)
4872 dd->comm->cycl[i] = 0;
4873 dd->comm->cycl_n[i] = 0;
4874 dd->comm->cycl_max[i] = 0;
4877 dd->comm->flop_n = 0;
4880 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
4882 gmx_domdec_comm_t *comm;
4883 domdec_load_t *load;
4884 domdec_root_t *root = nullptr;
4886 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
4891 fprintf(debug, "get_load_distribution start\n");
4894 wallcycle_start(wcycle, ewcDDCOMMLOAD);
4898 bSepPME = (dd->pme_nodeid >= 0);
4900 if (dd->ndim == 0 && bSepPME)
4902 /* Without decomposition, but with PME nodes, we need the load */
4903 comm->load[0].mdf = comm->cycl[ddCyclPPduringPME];
4904 comm->load[0].pme = comm->cycl[ddCyclPME];
4907 for (d = dd->ndim-1; d >= 0; d--)
4910 /* Check if we participate in the communication in this dimension */
4911 if (d == dd->ndim-1 ||
4912 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
4914 load = &comm->load[d];
4915 if (isDlbOn(dd->comm))
4917 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
4920 if (d == dd->ndim-1)
4922 sbuf[pos++] = dd_force_load(comm);
4923 sbuf[pos++] = sbuf[0];
4924 if (isDlbOn(dd->comm))
4926 sbuf[pos++] = sbuf[0];
4927 sbuf[pos++] = cell_frac;
4930 sbuf[pos++] = comm->cell_f_max0[d];
4931 sbuf[pos++] = comm->cell_f_min1[d];
4936 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
4937 sbuf[pos++] = comm->cycl[ddCyclPME];
4942 sbuf[pos++] = comm->load[d+1].sum;
4943 sbuf[pos++] = comm->load[d+1].max;
4944 if (isDlbOn(dd->comm))
4946 sbuf[pos++] = comm->load[d+1].sum_m;
4947 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
4948 sbuf[pos++] = comm->load[d+1].flags;
4951 sbuf[pos++] = comm->cell_f_max0[d];
4952 sbuf[pos++] = comm->cell_f_min1[d];
4957 sbuf[pos++] = comm->load[d+1].mdf;
4958 sbuf[pos++] = comm->load[d+1].pme;
4962 /* Communicate a row in DD direction d.
4963 * The communicators are setup such that the root always has rank 0.
4966 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
4967 load->load, load->nload*sizeof(float), MPI_BYTE,
4968 0, comm->mpi_comm_load[d]);
4970 if (dd->ci[dim] == dd->master_ci[dim])
4972 /* We are the root, process this row */
4975 root = comm->root[d];
4985 for (i = 0; i < dd->nc[dim]; i++)
4987 load->sum += load->load[pos++];
4988 load->max = std::max(load->max, load->load[pos]);
4990 if (isDlbOn(dd->comm))
4994 /* This direction could not be load balanced properly,
4995 * therefore we need to use the maximum iso the average load.
4997 load->sum_m = std::max(load->sum_m, load->load[pos]);
5001 load->sum_m += load->load[pos];
5004 load->cvol_min = std::min(load->cvol_min, load->load[pos]);
5008 load->flags = (int)(load->load[pos++] + 0.5);
5012 root->cell_f_max0[i] = load->load[pos++];
5013 root->cell_f_min1[i] = load->load[pos++];
5018 load->mdf = std::max(load->mdf, load->load[pos]);
5020 load->pme = std::max(load->pme, load->load[pos]);
5024 if (isDlbOn(comm) && root->bLimited)
5026 load->sum_m *= dd->nc[dim];
5027 load->flags |= (1<<d);
5035 comm->nload += dd_load_count(comm);
5036 comm->load_step += comm->cycl[ddCyclStep];
5037 comm->load_sum += comm->load[0].sum;
5038 comm->load_max += comm->load[0].max;
5041 for (d = 0; d < dd->ndim; d++)
5043 if (comm->load[0].flags & (1<<d))
5045 comm->load_lim[d]++;
5051 comm->load_mdf += comm->load[0].mdf;
5052 comm->load_pme += comm->load[0].pme;
5056 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5060 fprintf(debug, "get_load_distribution finished\n");
5064 static float dd_force_load_fraction(gmx_domdec_t *dd)
5066 /* Return the relative performance loss on the total run time
5067 * due to the force calculation load imbalance.
5069 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
5071 return dd->comm->load_sum/(dd->comm->load_step*dd->nnodes);
5079 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5081 /* Return the relative performance loss on the total run time
5082 * due to the force calculation load imbalance.
5084 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
5087 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5088 (dd->comm->load_step*dd->nnodes);
5096 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5098 gmx_domdec_comm_t *comm = dd->comm;
5100 /* Only the master rank prints loads and only if we measured loads */
5101 if (!DDMASTER(dd) || comm->nload == 0)
5107 int numPpRanks = dd->nnodes;
5108 int numPmeRanks = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5109 int numRanks = numPpRanks + numPmeRanks;
5110 float lossFraction = 0;
5112 /* Print the average load imbalance and performance loss */
5113 if (dd->nnodes > 1 && comm->load_sum > 0)
5115 float imbalance = comm->load_max*numPpRanks/comm->load_sum - 1;
5116 lossFraction = dd_force_imb_perf_loss(dd);
5118 std::string msg = "\n Dynamic load balancing report:\n";
5119 std::string dlbStateStr = "";
5121 switch (dd->comm->dlbState)
5124 dlbStateStr = "DLB was off during the run per user request.";
5126 case edlbsOffForever:
5127 /* Currectly this can happen due to performance loss observed, cell size
5128 * limitations or incompatibility with other settings observed during
5129 * determineInitialDlbState(). */
5130 dlbStateStr = "DLB got disabled because it was unsuitable to use.";
5132 case edlbsOffCanTurnOn:
5133 dlbStateStr = "DLB was off during the run due to low measured imbalance.";
5135 case edlbsOffTemporarilyLocked:
5136 dlbStateStr = "DLB was locked at the end of the run due to unfinished PP-PME balancing.";
5138 case edlbsOnCanTurnOff:
5139 dlbStateStr = "DLB was turned on during the run due to measured imbalance.";
5142 dlbStateStr = "DLB was permanently on during the run per user request.";
5145 GMX_ASSERT(false, "Undocumented DLB state");
5148 msg += " " + dlbStateStr + "\n";
5149 msg += gmx::formatString(" Average load imbalance: %.1f%%.\n", imbalance*100);
5150 msg += gmx::formatString(" The balanceable part of the MD step is %d%%, load imbalance is computed from this.\n",
5151 static_cast<int>(dd_force_load_fraction(dd)*100 + 0.5));
5152 msg += gmx::formatString(" Part of the total run time spent waiting due to load imbalance: %.1f%%.\n",
5154 fprintf(fplog, "%s", msg.c_str());
5155 fprintf(stderr, "%s", msg.c_str());
5158 /* Print during what percentage of steps the load balancing was limited */
5159 bool dlbWasLimited = false;
5162 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5163 for (int d = 0; d < dd->ndim; d++)
5165 int limitPercentage = (200*comm->load_lim[d] + 1)/(2*comm->nload);
5166 sprintf(buf+strlen(buf), " %c %d %%",
5167 dim2char(dd->dim[d]), limitPercentage);
5168 if (limitPercentage >= 50)
5170 dlbWasLimited = true;
5173 sprintf(buf + strlen(buf), "\n");
5174 fprintf(fplog, "%s", buf);
5175 fprintf(stderr, "%s", buf);
5178 /* Print the performance loss due to separate PME - PP rank imbalance */
5179 float lossFractionPme = 0;
5180 if (numPmeRanks > 0 && comm->load_mdf > 0 && comm->load_step > 0)
5182 float pmeForceRatio = comm->load_pme/comm->load_mdf;
5183 lossFractionPme = (comm->load_pme - comm->load_mdf)/comm->load_step;
5184 if (lossFractionPme <= 0)
5186 lossFractionPme *= numPmeRanks/static_cast<float>(numRanks);
5190 lossFractionPme *= numPpRanks/static_cast<float>(numRanks);
5192 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pmeForceRatio);
5193 fprintf(fplog, "%s", buf);
5194 fprintf(stderr, "%s", buf);
5195 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossFractionPme)*100);
5196 fprintf(fplog, "%s", buf);
5197 fprintf(stderr, "%s", buf);
5199 fprintf(fplog, "\n");
5200 fprintf(stderr, "\n");
5202 if (lossFraction >= DD_PERF_LOSS_WARN)
5205 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5206 " in the domain decomposition.\n", lossFraction*100);
5209 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5211 else if (dlbWasLimited)
5213 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5215 fprintf(fplog, "%s\n", buf);
5216 fprintf(stderr, "%s\n", buf);
5218 if (numPmeRanks > 0 && fabs(lossFractionPme) >= DD_PERF_LOSS_WARN)
5221 "NOTE: %.1f %% performance was lost because the PME ranks\n"
5222 " had %s work to do than the PP ranks.\n"
5223 " You might want to %s the number of PME ranks\n"
5224 " or %s the cut-off and the grid spacing.\n",
5225 fabs(lossFractionPme*100),
5226 (lossFractionPme < 0) ? "less" : "more",
5227 (lossFractionPme < 0) ? "decrease" : "increase",
5228 (lossFractionPme < 0) ? "decrease" : "increase");
5229 fprintf(fplog, "%s\n", buf);
5230 fprintf(stderr, "%s\n", buf);
5234 static float dd_vol_min(gmx_domdec_t *dd)
5236 return dd->comm->load[0].cvol_min*dd->nnodes;
5239 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5241 return dd->comm->load[0].flags;
5244 static float dd_f_imbal(gmx_domdec_t *dd)
5246 if (dd->comm->load[0].sum > 0)
5248 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1.0f;
5252 /* Something is wrong in the cycle counting, report no load imbalance */
5257 float dd_pme_f_ratio(gmx_domdec_t *dd)
5259 /* Should only be called on the DD master rank */
5260 assert(DDMASTER(dd));
5262 if (dd->comm->load[0].mdf > 0 && dd->comm->cycl_n[ddCyclPME] > 0)
5264 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5272 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
5277 flags = dd_load_flags(dd);
5281 "DD load balancing is limited by minimum cell size in dimension");
5282 for (d = 0; d < dd->ndim; d++)
5286 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5289 fprintf(fplog, "\n");
5291 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5292 if (isDlbOn(dd->comm))
5294 fprintf(fplog, " vol min/aver %5.3f%c",
5295 dd_vol_min(dd), flags ? '!' : ' ');
5299 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5301 if (dd->comm->cycl_n[ddCyclPME])
5303 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5305 fprintf(fplog, "\n\n");
5308 static void dd_print_load_verbose(gmx_domdec_t *dd)
5310 if (isDlbOn(dd->comm))
5312 fprintf(stderr, "vol %4.2f%c ",
5313 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5317 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5319 if (dd->comm->cycl_n[ddCyclPME])
5321 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5326 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5331 domdec_root_t *root;
5332 gmx_bool bPartOfGroup = FALSE;
5334 dim = dd->dim[dim_ind];
5335 copy_ivec(loc, loc_c);
5336 for (i = 0; i < dd->nc[dim]; i++)
5339 rank = dd_index(dd->nc, loc_c);
5340 if (rank == dd->rank)
5342 /* This process is part of the group */
5343 bPartOfGroup = TRUE;
5346 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5350 dd->comm->mpi_comm_load[dim_ind] = c_row;
5351 if (!isDlbDisabled(dd->comm))
5353 if (dd->ci[dim] == dd->master_ci[dim])
5355 /* This is the root process of this row */
5356 snew(dd->comm->root[dim_ind], 1);
5357 root = dd->comm->root[dim_ind];
5358 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5359 snew(root->old_cell_f, dd->nc[dim]+1);
5360 snew(root->bCellMin, dd->nc[dim]);
5363 snew(root->cell_f_max0, dd->nc[dim]);
5364 snew(root->cell_f_min1, dd->nc[dim]);
5365 snew(root->bound_min, dd->nc[dim]);
5366 snew(root->bound_max, dd->nc[dim]);
5368 snew(root->buf_ncd, dd->nc[dim]);
5372 /* This is not a root process, we only need to receive cell_f */
5373 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5376 if (dd->ci[dim] == dd->master_ci[dim])
5378 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5384 void dd_setup_dlb_resource_sharing(t_commrec *cr,
5388 int physicalnode_id_hash;
5390 MPI_Comm mpi_comm_pp_physicalnode;
5392 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
5394 /* Only ranks with short-ranged tasks (currently) use GPUs.
5395 * If we don't have GPUs assigned, there are no resources to share.
5400 physicalnode_id_hash = gmx_physicalnode_id_hash();
5406 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5407 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5408 dd->rank, physicalnode_id_hash, gpu_id);
5410 /* Split the PP communicator over the physical nodes */
5411 /* TODO: See if we should store this (before), as it's also used for
5412 * for the nodecomm summution.
5414 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5415 &mpi_comm_pp_physicalnode);
5416 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5417 &dd->comm->mpi_comm_gpu_shared);
5418 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5419 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5423 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5426 /* Note that some ranks could share a GPU, while others don't */
5428 if (dd->comm->nrank_gpu_shared == 1)
5430 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5433 GMX_UNUSED_VALUE(cr);
5434 GMX_UNUSED_VALUE(gpu_id);
5438 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
5441 int dim0, dim1, i, j;
5446 fprintf(debug, "Making load communicators\n");
5449 snew(dd->comm->load, std::max(dd->ndim, 1));
5450 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
5458 make_load_communicator(dd, 0, loc);
5462 for (i = 0; i < dd->nc[dim0]; i++)
5465 make_load_communicator(dd, 1, loc);
5471 for (i = 0; i < dd->nc[dim0]; i++)
5475 for (j = 0; j < dd->nc[dim1]; j++)
5478 make_load_communicator(dd, 2, loc);
5485 fprintf(debug, "Finished making load communicators\n");
5490 /*! \brief Sets up the relation between neighboring domains and zones */
5491 static void setup_neighbor_relations(gmx_domdec_t *dd)
5493 int d, dim, i, j, m;
5495 gmx_domdec_zones_t *zones;
5496 gmx_domdec_ns_ranges_t *izone;
5498 for (d = 0; d < dd->ndim; d++)
5501 copy_ivec(dd->ci, tmp);
5502 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5503 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5504 copy_ivec(dd->ci, tmp);
5505 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5506 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5509 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5512 dd->neighbor[d][1]);
5516 int nzone = (1 << dd->ndim);
5517 int nizone = (1 << std::max(dd->ndim - 1, 0));
5518 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
5520 zones = &dd->comm->zones;
5522 for (i = 0; i < nzone; i++)
5525 clear_ivec(zones->shift[i]);
5526 for (d = 0; d < dd->ndim; d++)
5528 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5533 for (i = 0; i < nzone; i++)
5535 for (d = 0; d < DIM; d++)
5537 s[d] = dd->ci[d] - zones->shift[i][d];
5542 else if (s[d] >= dd->nc[d])
5548 zones->nizone = nizone;
5549 for (i = 0; i < zones->nizone; i++)
5551 assert(ddNonbondedZonePairRanges[i][0] == i);
5553 izone = &zones->izone[i];
5554 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
5555 * j-zones up to nzone.
5557 izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
5558 izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
5559 for (dim = 0; dim < DIM; dim++)
5561 if (dd->nc[dim] == 1)
5563 /* All shifts should be allowed */
5564 izone->shift0[dim] = -1;
5565 izone->shift1[dim] = 1;
5569 /* Determine the min/max j-zone shift wrt the i-zone */
5570 izone->shift0[dim] = 1;
5571 izone->shift1[dim] = -1;
5572 for (j = izone->j0; j < izone->j1; j++)
5574 int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5575 if (shift_diff < izone->shift0[dim])
5577 izone->shift0[dim] = shift_diff;
5579 if (shift_diff > izone->shift1[dim])
5581 izone->shift1[dim] = shift_diff;
5588 if (!isDlbDisabled(dd->comm))
5590 snew(dd->comm->root, dd->ndim);
5593 if (dd->comm->bRecordLoad)
5595 make_load_communicators(dd);
5599 static void make_pp_communicator(FILE *fplog,
5601 t_commrec gmx_unused *cr,
5602 int gmx_unused reorder)
5605 gmx_domdec_comm_t *comm;
5612 if (comm->bCartesianPP)
5614 /* Set up cartesian communication for the particle-particle part */
5617 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5618 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5621 for (int i = 0; i < DIM; i++)
5625 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5627 /* We overwrite the old communicator with the new cartesian one */
5628 cr->mpi_comm_mygroup = comm_cart;
5631 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5632 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5634 if (comm->bCartesianPP_PME)
5636 /* Since we want to use the original cartesian setup for sim,
5637 * and not the one after split, we need to make an index.
5639 snew(comm->ddindex2ddnodeid, dd->nnodes);
5640 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
5641 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5642 /* Get the rank of the DD master,
5643 * above we made sure that the master node is a PP node.
5653 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
5655 else if (comm->bCartesianPP)
5657 if (cr->npmenodes == 0)
5659 /* The PP communicator is also
5660 * the communicator for this simulation
5662 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5664 cr->nodeid = dd->rank;
5666 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
5668 /* We need to make an index to go from the coordinates
5669 * to the nodeid of this simulation.
5671 snew(comm->ddindex2simnodeid, dd->nnodes);
5672 snew(buf, dd->nnodes);
5673 if (thisRankHasDuty(cr, DUTY_PP))
5675 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5677 /* Communicate the ddindex to simulation nodeid index */
5678 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
5679 cr->mpi_comm_mysim);
5682 /* Determine the master coordinates and rank.
5683 * The DD master should be the same node as the master of this sim.
5685 for (int i = 0; i < dd->nnodes; i++)
5687 if (comm->ddindex2simnodeid[i] == 0)
5689 ddindex2xyz(dd->nc, i, dd->master_ci);
5690 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
5695 fprintf(debug, "The master rank is %d\n", dd->masterrank);
5700 /* No Cartesian communicators */
5701 /* We use the rank in dd->comm->all as DD index */
5702 ddindex2xyz(dd->nc, dd->rank, dd->ci);
5703 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
5705 clear_ivec(dd->master_ci);
5712 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
5713 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5718 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
5719 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5723 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
5727 gmx_domdec_comm_t *comm = dd->comm;
5729 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
5732 snew(comm->ddindex2simnodeid, dd->nnodes);
5733 snew(buf, dd->nnodes);
5734 if (thisRankHasDuty(cr, DUTY_PP))
5736 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5738 /* Communicate the ddindex to simulation nodeid index */
5739 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
5740 cr->mpi_comm_mysim);
5744 GMX_UNUSED_VALUE(dd);
5745 GMX_UNUSED_VALUE(cr);
5749 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
5750 int ncg, int natoms)
5752 gmx_domdec_master_t *ma;
5757 snew(ma->ncg, dd->nnodes);
5758 snew(ma->index, dd->nnodes+1);
5760 snew(ma->nat, dd->nnodes);
5761 snew(ma->ibuf, dd->nnodes*2);
5762 snew(ma->cell_x, DIM);
5763 for (i = 0; i < DIM; i++)
5765 snew(ma->cell_x[i], dd->nc[i]+1);
5768 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
5774 snew(ma->vbuf, natoms);
5780 static void split_communicator(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
5781 DdRankOrder gmx_unused rankOrder,
5782 int gmx_unused reorder)
5784 gmx_domdec_comm_t *comm;
5793 if (comm->bCartesianPP)
5795 for (i = 1; i < DIM; i++)
5797 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
5799 if (bDiv[YY] || bDiv[ZZ])
5801 comm->bCartesianPP_PME = TRUE;
5802 /* If we have 2D PME decomposition, which is always in x+y,
5803 * we stack the PME only nodes in z.
5804 * Otherwise we choose the direction that provides the thinnest slab
5805 * of PME only nodes as this will have the least effect
5806 * on the PP communication.
5807 * But for the PME communication the opposite might be better.
5809 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
5811 dd->nc[YY] > dd->nc[ZZ]))
5813 comm->cartpmedim = ZZ;
5817 comm->cartpmedim = YY;
5819 comm->ntot[comm->cartpmedim]
5820 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
5824 fprintf(fplog, "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
5826 "Will not use a Cartesian communicator for PP <-> PME\n\n");
5830 if (comm->bCartesianPP_PME)
5838 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
5841 for (i = 0; i < DIM; i++)
5845 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
5847 MPI_Comm_rank(comm_cart, &rank);
5848 if (MASTER(cr) && rank != 0)
5850 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
5853 /* With this assigment we loose the link to the original communicator
5854 * which will usually be MPI_COMM_WORLD, unless have multisim.
5856 cr->mpi_comm_mysim = comm_cart;
5857 cr->sim_nodeid = rank;
5859 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
5863 fprintf(fplog, "Cartesian rank %d, coordinates %d %d %d\n\n",
5864 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5867 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
5871 if (cr->npmenodes == 0 ||
5872 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
5874 cr->duty = DUTY_PME;
5877 /* Split the sim communicator into PP and PME only nodes */
5878 MPI_Comm_split(cr->mpi_comm_mysim,
5879 getThisRankDuties(cr),
5880 dd_index(comm->ntot, dd->ci),
5881 &cr->mpi_comm_mygroup);
5888 case DdRankOrder::pp_pme:
5891 fprintf(fplog, "Order of the ranks: PP first, PME last\n");
5894 case DdRankOrder::interleave:
5895 /* Interleave the PP-only and PME-only ranks */
5898 fprintf(fplog, "Interleaving PP and PME ranks\n");
5900 comm->pmenodes = dd_interleaved_pme_ranks(dd);
5902 case DdRankOrder::cartesian:
5905 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(rankOrder));
5908 if (dd_simnode2pmenode(dd, cr, cr->sim_nodeid) == -1)
5910 cr->duty = DUTY_PME;
5917 /* Split the sim communicator into PP and PME only nodes */
5918 MPI_Comm_split(cr->mpi_comm_mysim,
5919 getThisRankDuties(cr),
5921 &cr->mpi_comm_mygroup);
5922 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
5928 fprintf(fplog, "This rank does only %s work.\n\n",
5929 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
5933 /*! \brief Generates the MPI communicators for domain decomposition */
5934 static void make_dd_communicators(FILE *fplog, t_commrec *cr,
5935 gmx_domdec_t *dd, DdRankOrder ddRankOrder)
5937 gmx_domdec_comm_t *comm;
5942 copy_ivec(dd->nc, comm->ntot);
5944 comm->bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
5945 comm->bCartesianPP_PME = FALSE;
5947 /* Reorder the nodes by default. This might change the MPI ranks.
5948 * Real reordering is only supported on very few architectures,
5949 * Blue Gene is one of them.
5951 CartReorder = (getenv("GMX_NO_CART_REORDER") == nullptr);
5953 if (cr->npmenodes > 0)
5955 /* Split the communicator into a PP and PME part */
5956 split_communicator(fplog, cr, dd, ddRankOrder, CartReorder);
5957 if (comm->bCartesianPP_PME)
5959 /* We (possibly) reordered the nodes in split_communicator,
5960 * so it is no longer required in make_pp_communicator.
5962 CartReorder = FALSE;
5967 /* All nodes do PP and PME */
5969 /* We do not require separate communicators */
5970 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
5974 if (thisRankHasDuty(cr, DUTY_PP))
5976 /* Copy or make a new PP communicator */
5977 make_pp_communicator(fplog, dd, cr, CartReorder);
5981 receive_ddindex2simnodeid(dd, cr);
5984 if (!thisRankHasDuty(cr, DUTY_PME))
5986 /* Set up the commnuication to our PME node */
5987 dd->pme_nodeid = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
5988 dd->pme_receive_vir_ener = receive_vir_ener(dd, cr);
5991 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
5992 dd->pme_nodeid, dd->pme_receive_vir_ener);
5997 dd->pme_nodeid = -1;
6002 dd->ma = init_gmx_domdec_master_t(dd,
6004 comm->cgs_gl.index[comm->cgs_gl.nr]);
6008 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6010 real *slb_frac, tot;
6015 if (nc > 1 && size_string != nullptr)
6019 fprintf(fplog, "Using static load balancing for the %s direction\n",
6024 for (i = 0; i < nc; i++)
6027 sscanf(size_string, "%20lf%n", &dbl, &n);
6030 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6039 fprintf(fplog, "Relative cell sizes:");
6041 for (i = 0; i < nc; i++)
6046 fprintf(fplog, " %5.3f", slb_frac[i]);
6051 fprintf(fplog, "\n");
6058 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
6061 gmx_mtop_ilistloop_t iloop;
6065 iloop = gmx_mtop_ilistloop_init(mtop);
6066 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6068 for (ftype = 0; ftype < F_NRE; ftype++)
6070 if ((interaction_function[ftype].flags & IF_BOND) &&
6073 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6081 static int dd_getenv(FILE *fplog, const char *env_var, int def)
6087 val = getenv(env_var);
6090 if (sscanf(val, "%20d", &nst) <= 0)
6096 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6104 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6108 fprintf(stderr, "\n%s\n", warn_string);
6112 fprintf(fplog, "\n%s\n", warn_string);
6116 static void check_dd_restrictions(t_commrec *cr, const gmx_domdec_t *dd,
6117 const t_inputrec *ir, FILE *fplog)
6119 if (ir->ePBC == epbcSCREW &&
6120 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6122 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6125 if (ir->ns_type == ensSIMPLE)
6127 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
6130 if (ir->nstlist == 0)
6132 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6135 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6137 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6141 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6146 r = ddbox->box_size[XX];
6147 for (di = 0; di < dd->ndim; di++)
6150 /* Check using the initial average cell size */
6151 r = std::min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6157 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
6159 static int forceDlbOffOrBail(int cmdlineDlbState,
6160 const std::string &reasonStr,
6164 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
6165 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
6167 if (cmdlineDlbState == edlbsOnUser)
6169 gmx_fatal(FARGS, (dlbNotSupportedErr + reasonStr).c_str());
6171 else if (cmdlineDlbState == edlbsOffCanTurnOn)
6173 dd_warning(cr, fplog, (dlbDisableNote + reasonStr + "\n").c_str());
6175 return edlbsOffForever;
6178 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
6180 * This function parses the parameters of "-dlb" command line option setting
6181 * corresponding state values. Then it checks the consistency of the determined
6182 * state with other run parameters and settings. As a result, the initial state
6183 * may be altered or an error may be thrown if incompatibility of options is detected.
6185 * \param [in] fplog Pointer to mdrun log file.
6186 * \param [in] cr Pointer to MPI communication object.
6187 * \param [in] dlbOption Enum value for the DLB option.
6188 * \param [in] bRecordLoad True if the load balancer is recording load information.
6189 * \param [in] mdrunOptions Options for mdrun.
6190 * \param [in] ir Pointer mdrun to input parameters.
6191 * \returns DLB initial/startup state.
6193 static int determineInitialDlbState(FILE *fplog, t_commrec *cr,
6194 DlbOption dlbOption, gmx_bool bRecordLoad,
6195 const MdrunOptions &mdrunOptions,
6196 const t_inputrec *ir)
6198 int dlbState = edlbsOffCanTurnOn;
6202 case DlbOption::turnOnWhenUseful: dlbState = edlbsOffCanTurnOn; break;
6203 case DlbOption::no: dlbState = edlbsOffUser; break;
6204 case DlbOption::yes: dlbState = edlbsOnUser; break;
6205 default: gmx_incons("Invalid dlbOption enum value");
6208 /* Reruns don't support DLB: bail or override auto mode */
6209 if (mdrunOptions.rerun)
6211 std::string reasonStr = "it is not supported in reruns.";
6212 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
6215 /* Unsupported integrators */
6216 if (!EI_DYNAMICS(ir->eI))
6218 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
6219 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
6222 /* Without cycle counters we can't time work to balance on */
6225 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
6226 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
6229 if (mdrunOptions.reproducible)
6231 std::string reasonStr = "you started a reproducible run.";
6236 case edlbsOffForever:
6237 GMX_RELEASE_ASSERT(false, "edlbsOffForever is not a valid initial state");
6239 case edlbsOffCanTurnOn:
6240 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
6242 case edlbsOnCanTurnOff:
6243 GMX_RELEASE_ASSERT(false, "edlbsOffCanTurnOff is not a valid initial state");
6246 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", cr, fplog);
6249 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", dlbState);
6257 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6262 if (getenv("GMX_DD_ORDER_ZYX") != nullptr)
6264 /* Decomposition order z,y,x */
6267 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6269 for (dim = DIM-1; dim >= 0; dim--)
6271 if (dd->nc[dim] > 1)
6273 dd->dim[dd->ndim++] = dim;
6279 /* Decomposition order x,y,z */
6280 for (dim = 0; dim < DIM; dim++)
6282 if (dd->nc[dim] > 1)
6284 dd->dim[dd->ndim++] = dim;
6290 static gmx_domdec_comm_t *init_dd_comm()
6292 gmx_domdec_comm_t *comm;
6296 snew(comm->cggl_flag, DIM*2);
6297 snew(comm->cgcm_state, DIM*2);
6298 for (i = 0; i < DIM*2; i++)
6300 comm->cggl_flag_nalloc[i] = 0;
6301 comm->cgcm_state_nalloc[i] = 0;
6304 comm->nalloc_int = 0;
6305 comm->buf_int = nullptr;
6307 vec_rvec_init(&comm->vbuf);
6309 comm->n_load_have = 0;
6310 comm->n_load_collect = 0;
6312 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6314 comm->sum_nat[i] = 0;
6318 comm->load_step = 0;
6321 clear_ivec(comm->load_lim);
6325 /* This should be replaced by a unique pointer */
6326 comm->balanceRegion = ddBalanceRegionAllocate();
6331 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
6332 static void set_dd_limits_and_grid(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
6333 const DomdecOptions &options,
6334 const MdrunOptions &mdrunOptions,
6335 const gmx_mtop_t *mtop,
6336 const t_inputrec *ir,
6337 const matrix box, const rvec *xGlobal,
6339 int *npme_x, int *npme_y)
6342 real r_bonded_limit = -1;
6343 const real tenPercentMargin = 1.1;
6344 gmx_domdec_comm_t *comm = dd->comm;
6346 snew(comm->cggl_flag, DIM*2);
6347 snew(comm->cgcm_state, DIM*2);
6349 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6350 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6352 dd->pme_recv_f_alloc = 0;
6353 dd->pme_recv_f_buf = nullptr;
6355 /* Initialize to GPU share count to 0, might change later */
6356 comm->nrank_gpu_shared = 0;
6358 comm->dlbState = determineInitialDlbState(fplog, cr, options.dlbOption, comm->bRecordLoad, mdrunOptions, ir);
6359 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
6360 /* To consider turning DLB on after 2*nstlist steps we need to check
6361 * at partitioning count 3. Thus we need to increase the first count by 2.
6363 comm->ddPartioningCountFirstDlbOff += 2;
6367 fprintf(fplog, "Dynamic load balancing: %s\n",
6368 edlbs_names[comm->dlbState]);
6370 comm->bPMELoadBalDLBLimits = FALSE;
6372 /* Allocate the charge group/atom sorting struct */
6373 snew(comm->sort, 1);
6375 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6377 comm->bInterCGBondeds = ((ncg_mtop(mtop) > mtop->mols.nr) ||
6378 mtop->bIntermolecularInteractions);
6379 if (comm->bInterCGBondeds)
6381 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6385 comm->bInterCGMultiBody = FALSE;
6388 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6389 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6393 /* Set the cut-off to some very large value,
6394 * so we don't need if statements everywhere in the code.
6395 * We use sqrt, since the cut-off is squared in some places.
6397 comm->cutoff = GMX_CUTOFF_INF;
6401 comm->cutoff = ir->rlist;
6403 comm->cutoff_mbody = 0;
6405 comm->cellsize_limit = 0;
6406 comm->bBondComm = FALSE;
6408 /* Atoms should be able to move by up to half the list buffer size (if > 0)
6409 * within nstlist steps. Since boundaries are allowed to displace by half
6410 * a cell size, DD cells should be at least the size of the list buffer.
6412 comm->cellsize_limit = std::max(comm->cellsize_limit,
6413 ir->rlist - std::max(ir->rvdw, ir->rcoulomb));
6415 if (comm->bInterCGBondeds)
6417 if (options.minimumCommunicationRange > 0)
6419 comm->cutoff_mbody = options.minimumCommunicationRange;
6420 if (options.useBondedCommunication)
6422 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6426 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
6428 r_bonded_limit = comm->cutoff_mbody;
6430 else if (ir->bPeriodicMols)
6432 /* Can not easily determine the required cut-off */
6433 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6434 comm->cutoff_mbody = comm->cutoff/2;
6435 r_bonded_limit = comm->cutoff_mbody;
6443 dd_bonded_cg_distance(fplog, mtop, ir, xGlobal, box,
6444 options.checkBondedInteractions,
6447 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6448 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6450 /* We use an initial margin of 10% for the minimum cell size,
6451 * except when we are just below the non-bonded cut-off.
6453 if (options.useBondedCommunication)
6455 if (std::max(r_2b, r_mb) > comm->cutoff)
6457 r_bonded = std::max(r_2b, r_mb);
6458 r_bonded_limit = tenPercentMargin*r_bonded;
6459 comm->bBondComm = TRUE;
6464 r_bonded_limit = std::min(tenPercentMargin*r_bonded, comm->cutoff);
6466 /* We determine cutoff_mbody later */
6470 /* No special bonded communication,
6471 * simply increase the DD cut-off.
6473 r_bonded_limit = tenPercentMargin*std::max(r_2b, r_mb);
6474 comm->cutoff_mbody = r_bonded_limit;
6475 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
6481 "Minimum cell size due to bonded interactions: %.3f nm\n",
6484 comm->cellsize_limit = std::max(comm->cellsize_limit, r_bonded_limit);
6488 if (dd->bInterCGcons && options.constraintCommunicationRange <= 0)
6490 /* There is a cell size limit due to the constraints (P-LINCS) */
6491 rconstr = constr_r_max(fplog, mtop, ir);
6495 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6497 if (rconstr > comm->cellsize_limit)
6499 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6503 else if (options.constraintCommunicationRange > 0 && fplog)
6505 /* Here we do not check for dd->bInterCGcons,
6506 * because one can also set a cell size limit for virtual sites only
6507 * and at this point we don't know yet if there are intercg v-sites.
6510 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6511 options.constraintCommunicationRange);
6512 rconstr = options.constraintCommunicationRange;
6514 comm->cellsize_limit = std::max(comm->cellsize_limit, rconstr);
6516 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6518 if (options.numCells[XX] > 0)
6520 copy_ivec(options.numCells, dd->nc);
6521 set_dd_dim(fplog, dd);
6522 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, xGlobal, ddbox);
6524 if (options.numPmeRanks >= 0)
6526 cr->npmenodes = options.numPmeRanks;
6530 /* When the DD grid is set explicitly and -npme is set to auto,
6531 * don't use PME ranks. We check later if the DD grid is
6532 * compatible with the total number of ranks.
6537 real acs = average_cellsize_min(dd, ddbox);
6538 if (acs < comm->cellsize_limit)
6542 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6544 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
6545 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6546 acs, comm->cellsize_limit);
6551 set_ddbox_cr(cr, nullptr, ir, box, &comm->cgs_gl, xGlobal, ddbox);
6553 /* We need to choose the optimal DD grid and possibly PME nodes */
6555 dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6556 options.numPmeRanks,
6557 !isDlbDisabled(comm),
6559 comm->cellsize_limit, comm->cutoff,
6560 comm->bInterCGBondeds);
6562 if (dd->nc[XX] == 0)
6565 gmx_bool bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6566 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
6567 !bC ? "-rdd" : "-rcon",
6568 comm->dlbState != edlbsOffUser ? " or -dds" : "",
6569 bC ? " or your LINCS settings" : "");
6571 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
6572 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
6574 "Look in the log file for details on the domain decomposition",
6575 cr->nnodes-cr->npmenodes, limit, buf);
6577 set_dd_dim(fplog, dd);
6583 "Domain decomposition grid %d x %d x %d, separate PME ranks %d\n",
6584 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6587 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6588 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6590 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
6591 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
6592 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6594 if (cr->npmenodes > dd->nnodes)
6596 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
6597 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6599 if (cr->npmenodes > 0)
6601 comm->npmenodes = cr->npmenodes;
6605 comm->npmenodes = dd->nnodes;
6608 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
6610 /* The following choices should match those
6611 * in comm_cost_est in domdec_setup.c.
6612 * Note that here the checks have to take into account
6613 * that the decomposition might occur in a different order than xyz
6614 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6615 * in which case they will not match those in comm_cost_est,
6616 * but since that is mainly for testing purposes that's fine.
6618 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6619 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6620 getenv("GMX_PMEONEDD") == nullptr)
6622 comm->npmedecompdim = 2;
6623 comm->npmenodes_x = dd->nc[XX];
6624 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6628 /* In case nc is 1 in both x and y we could still choose to
6629 * decompose pme in y instead of x, but we use x for simplicity.
6631 comm->npmedecompdim = 1;
6632 if (dd->dim[0] == YY)
6634 comm->npmenodes_x = 1;
6635 comm->npmenodes_y = comm->npmenodes;
6639 comm->npmenodes_x = comm->npmenodes;
6640 comm->npmenodes_y = 1;
6645 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6646 comm->npmenodes_x, comm->npmenodes_y, 1);
6651 comm->npmedecompdim = 0;
6652 comm->npmenodes_x = 0;
6653 comm->npmenodes_y = 0;
6656 /* Technically we don't need both of these,
6657 * but it simplifies code not having to recalculate it.
6659 *npme_x = comm->npmenodes_x;
6660 *npme_y = comm->npmenodes_y;
6662 snew(comm->slb_frac, DIM);
6663 if (isDlbDisabled(comm))
6665 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], options.cellSizeX);
6666 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], options.cellSizeY);
6667 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], options.cellSizeZ);
6670 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6672 if (comm->bBondComm || !isDlbDisabled(comm))
6674 /* Set the bonded communication distance to halfway
6675 * the minimum and the maximum,
6676 * since the extra communication cost is nearly zero.
6678 real acs = average_cellsize_min(dd, ddbox);
6679 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6680 if (!isDlbDisabled(comm))
6682 /* Check if this does not limit the scaling */
6683 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
6684 options.dlbScaling*acs);
6686 if (!comm->bBondComm)
6688 /* Without bBondComm do not go beyond the n.b. cut-off */
6689 comm->cutoff_mbody = std::min(comm->cutoff_mbody, comm->cutoff);
6690 if (comm->cellsize_limit >= comm->cutoff)
6692 /* We don't loose a lot of efficieny
6693 * when increasing it to the n.b. cut-off.
6694 * It can even be slightly faster, because we need
6695 * less checks for the communication setup.
6697 comm->cutoff_mbody = comm->cutoff;
6700 /* Check if we did not end up below our original limit */
6701 comm->cutoff_mbody = std::max(comm->cutoff_mbody, r_bonded_limit);
6703 if (comm->cutoff_mbody > comm->cellsize_limit)
6705 comm->cellsize_limit = comm->cutoff_mbody;
6708 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
6713 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
6714 "cellsize limit %f\n",
6715 comm->bBondComm, comm->cellsize_limit);
6720 check_dd_restrictions(cr, dd, ir, fplog);
6724 static void set_dlb_limits(gmx_domdec_t *dd)
6729 for (d = 0; d < dd->ndim; d++)
6731 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
6732 dd->comm->cellsize_min[dd->dim[d]] =
6733 dd->comm->cellsize_min_dlb[dd->dim[d]];
6738 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
6741 gmx_domdec_comm_t *comm;
6748 cellsize_min = comm->cellsize_min[dd->dim[0]];
6749 for (d = 1; d < dd->ndim; d++)
6751 cellsize_min = std::min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
6754 /* Turn off DLB if we're too close to the cell size limit. */
6755 if (cellsize_min < comm->cellsize_limit*1.05)
6757 auto str = gmx::formatString("step %" GMX_PRId64 " Measured %.1f %% performance loss due to load imbalance, "
6758 "but the minimum cell size is smaller than 1.05 times the cell size limit."
6759 "Will no longer try dynamic load balancing.\n", step, dd_force_imb_perf_loss(dd)*100);
6760 dd_warning(cr, fplog, str.c_str());
6762 comm->dlbState = edlbsOffForever;
6767 sprintf(buf, "step %" GMX_PRId64 " Turning on dynamic load balancing, because the performance loss due to load imbalance is %.1f %%.\n", step, dd_force_imb_perf_loss(dd)*100);
6768 dd_warning(cr, fplog, buf);
6769 comm->dlbState = edlbsOnCanTurnOff;
6771 /* Store the non-DLB performance, so we can check if DLB actually
6772 * improves performance.
6774 GMX_RELEASE_ASSERT(comm->cycl_n[ddCyclStep] > 0, "When we turned on DLB, we should have measured cycles");
6775 comm->cyclesPerStepBeforeDLB = comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
6779 /* We can set the required cell size info here,
6780 * so we do not need to communicate this.
6781 * The grid is completely uniform.
6783 for (d = 0; d < dd->ndim; d++)
6787 comm->load[d].sum_m = comm->load[d].sum;
6789 nc = dd->nc[dd->dim[d]];
6790 for (i = 0; i < nc; i++)
6792 comm->root[d]->cell_f[i] = i/(real)nc;
6795 comm->root[d]->cell_f_max0[i] = i /(real)nc;
6796 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
6799 comm->root[d]->cell_f[nc] = 1.0;
6804 static void turn_off_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
6806 gmx_domdec_t *dd = cr->dd;
6809 sprintf(buf, "step %" GMX_PRId64 " Turning off dynamic load balancing, because it is degrading performance.\n", step);
6810 dd_warning(cr, fplog, buf);
6811 dd->comm->dlbState = edlbsOffCanTurnOn;
6812 dd->comm->haveTurnedOffDlb = true;
6813 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
6816 static void turn_off_dlb_forever(FILE *fplog, t_commrec *cr, gmx_int64_t step)
6818 GMX_RELEASE_ASSERT(cr->dd->comm->dlbState == edlbsOffCanTurnOn, "Can only turn off DLB forever when it was in the can-turn-on state");
6820 sprintf(buf, "step %" GMX_PRId64 " Will no longer try dynamic load balancing, as it degraded performance.\n", step);
6821 dd_warning(cr, fplog, buf);
6822 cr->dd->comm->dlbState = edlbsOffForever;
6825 static char *init_bLocalCG(const gmx_mtop_t *mtop)
6830 ncg = ncg_mtop(mtop);
6831 snew(bLocalCG, ncg);
6832 for (cg = 0; cg < ncg; cg++)
6834 bLocalCG[cg] = FALSE;
6840 void dd_init_bondeds(FILE *fplog,
6842 const gmx_mtop_t *mtop,
6843 const gmx_vsite_t *vsite,
6844 const t_inputrec *ir,
6845 gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
6847 gmx_domdec_comm_t *comm;
6849 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
6853 if (comm->bBondComm)
6855 /* Communicate atoms beyond the cut-off for bonded interactions */
6858 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
6860 comm->bLocalCG = init_bLocalCG(mtop);
6864 /* Only communicate atoms based on cut-off */
6865 comm->cglink = nullptr;
6866 comm->bLocalCG = nullptr;
6870 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
6871 const gmx_mtop_t *mtop, const t_inputrec *ir,
6872 gmx_bool bDynLoadBal, real dlb_scale,
6873 const gmx_ddbox_t *ddbox)
6875 gmx_domdec_comm_t *comm;
6881 if (fplog == nullptr)
6890 fprintf(fplog, "The maximum number of communication pulses is:");
6891 for (d = 0; d < dd->ndim; d++)
6893 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
6895 fprintf(fplog, "\n");
6896 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
6897 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
6898 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
6899 for (d = 0; d < DIM; d++)
6903 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
6910 comm->cellsize_min_dlb[d]/
6911 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6913 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
6916 fprintf(fplog, "\n");
6920 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
6921 fprintf(fplog, "The initial number of communication pulses is:");
6922 for (d = 0; d < dd->ndim; d++)
6924 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
6926 fprintf(fplog, "\n");
6927 fprintf(fplog, "The initial domain decomposition cell size is:");
6928 for (d = 0; d < DIM; d++)
6932 fprintf(fplog, " %c %.2f nm",
6933 dim2char(d), dd->comm->cellsize_min[d]);
6936 fprintf(fplog, "\n\n");
6939 gmx_bool bInterCGVsites = count_intercg_vsites(mtop);
6941 if (comm->bInterCGBondeds ||
6943 dd->bInterCGcons || dd->bInterCGsettles)
6945 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
6946 fprintf(fplog, "%40s %-7s %6.3f nm\n",
6947 "non-bonded interactions", "", comm->cutoff);
6951 limit = dd->comm->cellsize_limit;
6955 if (dynamic_dd_box(ddbox, ir))
6957 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
6959 limit = dd->comm->cellsize_min[XX];
6960 for (d = 1; d < DIM; d++)
6962 limit = std::min(limit, dd->comm->cellsize_min[d]);
6966 if (comm->bInterCGBondeds)
6968 fprintf(fplog, "%40s %-7s %6.3f nm\n",
6969 "two-body bonded interactions", "(-rdd)",
6970 std::max(comm->cutoff, comm->cutoff_mbody));
6971 fprintf(fplog, "%40s %-7s %6.3f nm\n",
6972 "multi-body bonded interactions", "(-rdd)",
6973 (comm->bBondComm || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
6977 fprintf(fplog, "%40s %-7s %6.3f nm\n",
6978 "virtual site constructions", "(-rcon)", limit);
6980 if (dd->bInterCGcons || dd->bInterCGsettles)
6982 sprintf(buf, "atoms separated by up to %d constraints",
6984 fprintf(fplog, "%40s %-7s %6.3f nm\n",
6985 buf, "(-rcon)", limit);
6987 fprintf(fplog, "\n");
6993 static void set_cell_limits_dlb(gmx_domdec_t *dd,
6995 const t_inputrec *ir,
6996 const gmx_ddbox_t *ddbox)
6998 gmx_domdec_comm_t *comm;
6999 int d, dim, npulse, npulse_d_max, npulse_d;
7004 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7006 /* Determine the maximum number of comm. pulses in one dimension */
7008 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
7010 /* Determine the maximum required number of grid pulses */
7011 if (comm->cellsize_limit >= comm->cutoff)
7013 /* Only a single pulse is required */
7016 else if (!bNoCutOff && comm->cellsize_limit > 0)
7018 /* We round down slightly here to avoid overhead due to the latency
7019 * of extra communication calls when the cut-off
7020 * would be only slightly longer than the cell size.
7021 * Later cellsize_limit is redetermined,
7022 * so we can not miss interactions due to this rounding.
7024 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7028 /* There is no cell size limit */
7029 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7032 if (!bNoCutOff && npulse > 1)
7034 /* See if we can do with less pulses, based on dlb_scale */
7036 for (d = 0; d < dd->ndim; d++)
7039 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7040 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7041 npulse_d_max = std::max(npulse_d_max, npulse_d);
7043 npulse = std::min(npulse, npulse_d_max);
7046 /* This env var can override npulse */
7047 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
7054 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7055 for (d = 0; d < dd->ndim; d++)
7057 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
7058 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7059 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7060 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
7061 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7063 comm->bVacDLBNoLimit = FALSE;
7067 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7068 if (!comm->bVacDLBNoLimit)
7070 comm->cellsize_limit = std::max(comm->cellsize_limit,
7071 comm->cutoff/comm->maxpulse);
7073 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
7074 /* Set the minimum cell size for each DD dimension */
7075 for (d = 0; d < dd->ndim; d++)
7077 if (comm->bVacDLBNoLimit ||
7078 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7080 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7084 comm->cellsize_min_dlb[dd->dim[d]] =
7085 comm->cutoff/comm->cd[d].np_dlb;
7088 if (comm->cutoff_mbody <= 0)
7090 comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
7098 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
7100 /* If each molecule is a single charge group
7101 * or we use domain decomposition for each periodic dimension,
7102 * we do not need to take pbc into account for the bonded interactions.
7104 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7107 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7110 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
7111 static void set_ddgrid_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7112 const gmx_mtop_t *mtop, const t_inputrec *ir,
7113 const gmx_ddbox_t *ddbox)
7115 gmx_domdec_comm_t *comm;
7121 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
7123 init_ddpme(dd, &comm->ddpme[0], 0);
7124 if (comm->npmedecompdim >= 2)
7126 init_ddpme(dd, &comm->ddpme[1], 1);
7131 comm->npmenodes = 0;
7132 if (dd->pme_nodeid >= 0)
7134 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
7135 "Can not have separate PME ranks without PME electrostatics");
7141 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7143 if (!isDlbDisabled(comm))
7145 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7148 print_dd_settings(fplog, dd, mtop, ir, isDlbOn(comm), dlb_scale, ddbox);
7149 if (comm->dlbState == edlbsOffCanTurnOn)
7153 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7155 print_dd_settings(fplog, dd, mtop, ir, TRUE, dlb_scale, ddbox);
7158 if (ir->ePBC == epbcNONE)
7160 vol_frac = 1 - 1/(double)dd->nnodes;
7165 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7169 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7171 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7173 dd->ga2la = ga2la_init(natoms_tot, static_cast<int>(vol_frac*natoms_tot));
7176 /*! \brief Set some important DD parameters that can be modified by env.vars */
7177 static void set_dd_envvar_options(FILE *fplog, gmx_domdec_t *dd, int rank_mysim)
7179 gmx_domdec_comm_t *comm = dd->comm;
7181 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
7182 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
7183 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
7184 int recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
7185 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
7186 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
7187 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
7189 if (dd->bSendRecv2 && fplog)
7191 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
7198 fprintf(fplog, "Will load balance based on FLOP count\n");
7200 if (comm->eFlop > 1)
7202 srand(1 + rank_mysim);
7204 comm->bRecordLoad = TRUE;
7208 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
7212 DomdecOptions::DomdecOptions() :
7213 checkBondedInteractions(TRUE),
7214 useBondedCommunication(TRUE),
7216 rankOrder(DdRankOrder::pp_pme),
7217 minimumCommunicationRange(0),
7218 constraintCommunicationRange(0),
7219 dlbOption(DlbOption::turnOnWhenUseful),
7225 clear_ivec(numCells);
7228 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
7229 const DomdecOptions &options,
7230 const MdrunOptions &mdrunOptions,
7231 const gmx_mtop_t *mtop,
7232 const t_inputrec *ir,
7234 const rvec *xGlobal,
7236 int *npme_x, int *npme_y)
7243 "\nInitializing Domain Decomposition on %d ranks\n", cr->nnodes);
7248 dd->comm = init_dd_comm();
7250 set_dd_envvar_options(fplog, dd, cr->nodeid);
7252 set_dd_limits_and_grid(fplog, cr, dd, options, mdrunOptions,
7258 make_dd_communicators(fplog, cr, dd, options.rankOrder);
7260 if (thisRankHasDuty(cr, DUTY_PP))
7262 set_ddgrid_parameters(fplog, dd, options.dlbScaling, mtop, ir, ddbox);
7264 setup_neighbor_relations(dd);
7267 /* Set overallocation to avoid frequent reallocation of arrays */
7268 set_over_alloc_dd(TRUE);
7270 /* Initialize DD paritioning counters */
7271 dd->comm->partition_step = INT_MIN;
7274 /* We currently don't know the number of threads yet, we set this later */
7277 clear_dd_cycle_counts(dd);
7282 static gmx_bool test_dd_cutoff(t_commrec *cr,
7283 t_state *state, const t_inputrec *ir,
7294 set_ddbox(dd, FALSE, cr, ir, state->box,
7295 TRUE, &dd->comm->cgs_gl, as_rvec_array(state->x.data()), &ddbox);
7299 for (d = 0; d < dd->ndim; d++)
7303 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7304 if (dynamic_dd_box(&ddbox, ir))
7306 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7309 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7311 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
7313 if (np > dd->comm->cd[d].np_dlb)
7318 /* If a current local cell size is smaller than the requested
7319 * cut-off, we could still fix it, but this gets very complicated.
7320 * Without fixing here, we might actually need more checks.
7322 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7329 if (!isDlbDisabled(dd->comm))
7331 /* If DLB is not active yet, we don't need to check the grid jumps.
7332 * Actually we shouldn't, because then the grid jump data is not set.
7334 if (isDlbOn(dd->comm) &&
7335 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7340 gmx_sumi(1, &LocallyLimited, cr);
7342 if (LocallyLimited > 0)
7351 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, const t_inputrec *ir,
7354 gmx_bool bCutoffAllowed;
7356 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7360 cr->dd->comm->cutoff = cutoff_req;
7363 return bCutoffAllowed;
7366 void set_dd_dlb_max_cutoff(t_commrec *cr, real cutoff)
7368 gmx_domdec_comm_t *comm;
7370 comm = cr->dd->comm;
7372 /* Turn on the DLB limiting (might have been on already) */
7373 comm->bPMELoadBalDLBLimits = TRUE;
7375 /* Change the cut-off limit */
7376 comm->PMELoadBal_max_cutoff = cutoff;
7380 fprintf(debug, "PME load balancing set a limit to the DLB staggering such that a %f cut-off will continue to fit\n",
7381 comm->PMELoadBal_max_cutoff);
7385 /* Sets whether we should later check the load imbalance data, so that
7386 * we can trigger dynamic load balancing if enough imbalance has
7389 * Used after PME load balancing unlocks DLB, so that the check
7390 * whether DLB will be useful can happen immediately.
7392 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue)
7394 if (dd->comm->dlbState == edlbsOffCanTurnOn)
7396 dd->comm->bCheckWhetherToTurnDlbOn = bValue;
7400 /* Store the DD partitioning count, so we can ignore cycle counts
7401 * over the next nstlist steps, which are often slower.
7403 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
7408 /* Returns if we should check whether there has been enough load
7409 * imbalance to trigger dynamic load balancing.
7411 static gmx_bool dd_dlb_get_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd)
7413 if (dd->comm->dlbState != edlbsOffCanTurnOn)
7418 if (dd->ddp_count <= dd->comm->ddPartioningCountFirstDlbOff)
7420 /* We ignore the first nstlist steps at the start of the run
7421 * or after PME load balancing or after turning DLB off, since
7422 * these often have extra allocation or cache miss overhead.
7427 if (dd->comm->cycl_n[ddCyclStep] == 0)
7429 /* We can have zero timed steps when dd_partition_system is called
7430 * more than once at the same step, e.g. with replica exchange.
7431 * Turning on DLB would trigger an assertion failure later, but is
7432 * also useless right after exchanging replicas.
7437 /* We should check whether we should use DLB directly after
7439 if (dd->comm->bCheckWhetherToTurnDlbOn)
7441 /* This flag was set when the PME load-balancing routines
7442 unlocked DLB, and should now be cleared. */
7443 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, FALSE);
7446 /* We check whether we should use DLB every c_checkTurnDlbOnInterval
7447 * partitionings (we do not do this every partioning, so that we
7448 * avoid excessive communication). */
7449 if (dd->comm->n_load_have % c_checkTurnDlbOnInterval == c_checkTurnDlbOnInterval - 1)
7457 gmx_bool dd_dlb_is_on(const gmx_domdec_t *dd)
7459 return isDlbOn(dd->comm);
7462 gmx_bool dd_dlb_is_locked(const gmx_domdec_t *dd)
7464 return (dd->comm->dlbState == edlbsOffTemporarilyLocked);
7467 void dd_dlb_lock(gmx_domdec_t *dd)
7469 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
7470 if (dd->comm->dlbState == edlbsOffCanTurnOn)
7472 dd->comm->dlbState = edlbsOffTemporarilyLocked;
7476 void dd_dlb_unlock(gmx_domdec_t *dd)
7478 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
7479 if (dd->comm->dlbState == edlbsOffTemporarilyLocked)
7481 dd->comm->dlbState = edlbsOffCanTurnOn;
7482 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
7486 static void merge_cg_buffers(int ncell,
7487 gmx_domdec_comm_dim_t *cd, int pulse,
7489 int *index_gl, int *recv_i,
7490 rvec *cg_cm, rvec *recv_vr,
7492 cginfo_mb_t *cginfo_mb, int *cginfo)
7494 gmx_domdec_ind_t *ind, *ind_p;
7495 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7496 int shift, shift_at;
7498 ind = &cd->ind[pulse];
7500 /* First correct the already stored data */
7501 shift = ind->nrecv[ncell];
7502 for (cell = ncell-1; cell >= 0; cell--)
7504 shift -= ind->nrecv[cell];
7507 /* Move the cg's present from previous grid pulses */
7508 cg0 = ncg_cell[ncell+cell];
7509 cg1 = ncg_cell[ncell+cell+1];
7510 cgindex[cg1+shift] = cgindex[cg1];
7511 for (cg = cg1-1; cg >= cg0; cg--)
7513 index_gl[cg+shift] = index_gl[cg];
7514 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7515 cgindex[cg+shift] = cgindex[cg];
7516 cginfo[cg+shift] = cginfo[cg];
7518 /* Correct the already stored send indices for the shift */
7519 for (p = 1; p <= pulse; p++)
7521 ind_p = &cd->ind[p];
7523 for (c = 0; c < cell; c++)
7525 cg0 += ind_p->nsend[c];
7527 cg1 = cg0 + ind_p->nsend[cell];
7528 for (cg = cg0; cg < cg1; cg++)
7530 ind_p->index[cg] += shift;
7536 /* Merge in the communicated buffers */
7540 for (cell = 0; cell < ncell; cell++)
7542 cg1 = ncg_cell[ncell+cell+1] + shift;
7545 /* Correct the old cg indices */
7546 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7548 cgindex[cg+1] += shift_at;
7551 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7553 /* Copy this charge group from the buffer */
7554 index_gl[cg1] = recv_i[cg0];
7555 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7556 /* Add it to the cgindex */
7557 cg_gl = index_gl[cg1];
7558 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7559 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7560 cgindex[cg1+1] = cgindex[cg1] + nat;
7565 shift += ind->nrecv[cell];
7566 ncg_cell[ncell+cell+1] = cg1;
7570 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7571 int nzone, int cg0, const int *cgindex)
7575 /* Store the atom block boundaries for easy copying of communication buffers
7578 for (zone = 0; zone < nzone; zone++)
7580 for (p = 0; p < cd->np; p++)
7582 cd->ind[p].cell2at0[zone] = cgindex[cg];
7583 cg += cd->ind[p].nrecv[zone];
7584 cd->ind[p].cell2at1[zone] = cgindex[cg];
7589 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7595 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7597 if (!bLocalCG[link->a[i]])
7606 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7608 real c[DIM][4]; /* the corners for the non-bonded communication */
7609 real cr0; /* corner for rounding */
7610 real cr1[4]; /* corners for rounding */
7611 real bc[DIM]; /* corners for bounded communication */
7612 real bcr1; /* corner for rounding for bonded communication */
7615 /* Determine the corners of the domain(s) we are communicating with */
7617 set_dd_corners(const gmx_domdec_t *dd,
7618 int dim0, int dim1, int dim2,
7622 const gmx_domdec_comm_t *comm;
7623 const gmx_domdec_zones_t *zones;
7628 zones = &comm->zones;
7630 /* Keep the compiler happy */
7634 /* The first dimension is equal for all cells */
7635 c->c[0][0] = comm->cell_x0[dim0];
7638 c->bc[0] = c->c[0][0];
7643 /* This cell row is only seen from the first row */
7644 c->c[1][0] = comm->cell_x0[dim1];
7645 /* All rows can see this row */
7646 c->c[1][1] = comm->cell_x0[dim1];
7647 if (isDlbOn(dd->comm))
7649 c->c[1][1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7652 /* For the multi-body distance we need the maximum */
7653 c->bc[1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7656 /* Set the upper-right corner for rounding */
7657 c->cr0 = comm->cell_x1[dim0];
7662 for (j = 0; j < 4; j++)
7664 c->c[2][j] = comm->cell_x0[dim2];
7666 if (isDlbOn(dd->comm))
7668 /* Use the maximum of the i-cells that see a j-cell */
7669 for (i = 0; i < zones->nizone; i++)
7671 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7676 std::max(c->c[2][j-4],
7677 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7683 /* For the multi-body distance we need the maximum */
7684 c->bc[2] = comm->cell_x0[dim2];
7685 for (i = 0; i < 2; i++)
7687 for (j = 0; j < 2; j++)
7689 c->bc[2] = std::max(c->bc[2], comm->zone_d2[i][j].p1_0);
7695 /* Set the upper-right corner for rounding */
7696 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7697 * Only cell (0,0,0) can see cell 7 (1,1,1)
7699 c->cr1[0] = comm->cell_x1[dim1];
7700 c->cr1[3] = comm->cell_x1[dim1];
7701 if (isDlbOn(dd->comm))
7703 c->cr1[0] = std::max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7706 /* For the multi-body distance we need the maximum */
7707 c->bcr1 = std::max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7714 /* Determine which cg's we need to send in this pulse from this zone */
7716 get_zone_pulse_cgs(gmx_domdec_t *dd,
7717 int zonei, int zone,
7719 const int *index_gl,
7721 int dim, int dim_ind,
7722 int dim0, int dim1, int dim2,
7723 real r_comm2, real r_bcomm2,
7727 real skew_fac2_d, real skew_fac_01,
7728 rvec *v_d, rvec *v_0, rvec *v_1,
7729 const dd_corners_t *c,
7731 gmx_bool bDistBonded,
7737 gmx_domdec_ind_t *ind,
7738 int **ibuf, int *ibuf_nalloc,
7744 gmx_domdec_comm_t *comm;
7746 gmx_bool bDistMB_pulse;
7748 real r2, rb2, r, tric_sh;
7751 int nsend_z, nsend, nat;
7755 bScrew = (dd->bScrewPBC && dim == XX);
7757 bDistMB_pulse = (bDistMB && bDistBonded);
7763 for (cg = cg0; cg < cg1; cg++)
7767 if (tric_dist[dim_ind] == 0)
7769 /* Rectangular direction, easy */
7770 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7777 r = cg_cm[cg][dim] - c->bc[dim_ind];
7783 /* Rounding gives at most a 16% reduction
7784 * in communicated atoms
7786 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7788 r = cg_cm[cg][dim0] - c->cr0;
7789 /* This is the first dimension, so always r >= 0 */
7796 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7798 r = cg_cm[cg][dim1] - c->cr1[zone];
7805 r = cg_cm[cg][dim1] - c->bcr1;
7815 /* Triclinic direction, more complicated */
7818 /* Rounding, conservative as the skew_fac multiplication
7819 * will slightly underestimate the distance.
7821 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7823 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7824 for (i = dim0+1; i < DIM; i++)
7826 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7828 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7831 rb[dim0] = rn[dim0];
7834 /* Take care that the cell planes along dim0 might not
7835 * be orthogonal to those along dim1 and dim2.
7837 for (i = 1; i <= dim_ind; i++)
7840 if (normal[dim0][dimd] > 0)
7842 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7845 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7850 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7852 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7854 for (i = dim1+1; i < DIM; i++)
7856 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7858 rn[dim1] += tric_sh;
7861 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7862 /* Take care of coupling of the distances
7863 * to the planes along dim0 and dim1 through dim2.
7865 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7866 /* Take care that the cell planes along dim1
7867 * might not be orthogonal to that along dim2.
7869 if (normal[dim1][dim2] > 0)
7871 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7877 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7880 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7881 /* Take care of coupling of the distances
7882 * to the planes along dim0 and dim1 through dim2.
7884 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7885 /* Take care that the cell planes along dim1
7886 * might not be orthogonal to that along dim2.
7888 if (normal[dim1][dim2] > 0)
7890 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7895 /* The distance along the communication direction */
7896 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
7898 for (i = dim+1; i < DIM; i++)
7900 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7905 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7906 /* Take care of coupling of the distances
7907 * to the planes along dim0 and dim1 through dim2.
7909 if (dim_ind == 1 && zonei == 1)
7911 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
7917 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
7920 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
7921 /* Take care of coupling of the distances
7922 * to the planes along dim0 and dim1 through dim2.
7924 if (dim_ind == 1 && zonei == 1)
7926 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
7934 ((bDistMB && rb2 < r_bcomm2) ||
7935 (bDist2B && r2 < r_bcomm2)) &&
7937 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
7938 missing_link(comm->cglink, index_gl[cg],
7941 /* Make an index to the local charge groups */
7942 if (nsend+1 > ind->nalloc)
7944 ind->nalloc = over_alloc_large(nsend+1);
7945 srenew(ind->index, ind->nalloc);
7947 if (nsend+1 > *ibuf_nalloc)
7949 *ibuf_nalloc = over_alloc_large(nsend+1);
7950 srenew(*ibuf, *ibuf_nalloc);
7952 ind->index[nsend] = cg;
7953 (*ibuf)[nsend] = index_gl[cg];
7955 vec_rvec_check_alloc(vbuf, nsend+1);
7957 if (dd->ci[dim] == 0)
7959 /* Correct cg_cm for pbc */
7960 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
7963 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
7964 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
7969 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
7972 nat += cgindex[cg+1] - cgindex[cg];
7978 *nsend_z_ptr = nsend_z;
7981 static void setup_dd_communication(gmx_domdec_t *dd,
7982 matrix box, gmx_ddbox_t *ddbox,
7984 t_state *state, PaddedRVecVector *f)
7986 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
7987 int nzone, nzone_send, zone, zonei, cg0, cg1;
7988 int c, i, cg, cg_gl, nrcg;
7989 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
7990 gmx_domdec_comm_t *comm;
7991 gmx_domdec_zones_t *zones;
7992 gmx_domdec_comm_dim_t *cd;
7993 gmx_domdec_ind_t *ind;
7994 cginfo_mb_t *cginfo_mb;
7995 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
7996 real r_comm2, r_bcomm2;
7997 dd_corners_t corners;
7999 rvec *cg_cm, *normal, *v_d, *v_0 = nullptr, *v_1 = nullptr, *recv_vr;
8000 real skew_fac2_d, skew_fac_01;
8007 fprintf(debug, "Setting up DD communication\n");
8014 /* Initialize the thread data.
8015 * This can not be done in init_domain_decomposition,
8016 * as the numbers of threads is determined later.
8018 comm->nth = gmx_omp_nthreads_get(emntDomdec);
8021 snew(comm->dth, comm->nth);
8025 switch (fr->cutoff_scheme)
8031 cg_cm = as_rvec_array(state->x.data());
8034 gmx_incons("unimplemented");
8038 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8040 /* Check if we need to use triclinic distances */
8041 tric_dist[dim_ind] = 0;
8042 for (i = 0; i <= dim_ind; i++)
8044 if (ddbox->tric_dir[dd->dim[i]])
8046 tric_dist[dim_ind] = 1;
8051 bBondComm = comm->bBondComm;
8053 /* Do we need to determine extra distances for multi-body bondeds? */
8054 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
8056 /* Do we need to determine extra distances for only two-body bondeds? */
8057 bDist2B = (bBondComm && !bDistMB);
8059 r_comm2 = gmx::square(comm->cutoff);
8060 r_bcomm2 = gmx::square(comm->cutoff_mbody);
8064 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, std::sqrt(r_bcomm2));
8067 zones = &comm->zones;
8070 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8071 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8073 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8075 /* Triclinic stuff */
8076 normal = ddbox->normal;
8080 v_0 = ddbox->v[dim0];
8081 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8083 /* Determine the coupling coefficient for the distances
8084 * to the cell planes along dim0 and dim1 through dim2.
8085 * This is required for correct rounding.
8088 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8091 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8097 v_1 = ddbox->v[dim1];
8100 zone_cg_range = zones->cg_range;
8101 index_gl = dd->index_gl;
8102 cgindex = dd->cgindex;
8103 cginfo_mb = fr->cginfo_mb;
8105 zone_cg_range[0] = 0;
8106 zone_cg_range[1] = dd->ncg_home;
8107 comm->zone_ncg1[0] = dd->ncg_home;
8108 pos_cg = dd->ncg_home;
8110 nat_tot = dd->nat_home;
8112 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8114 dim = dd->dim[dim_ind];
8115 cd = &comm->cd[dim_ind];
8117 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8119 /* No pbc in this dimension, the first node should not comm. */
8127 v_d = ddbox->v[dim];
8128 skew_fac2_d = gmx::square(ddbox->skew_fac[dim]);
8130 cd->bInPlace = TRUE;
8131 for (p = 0; p < cd->np; p++)
8133 /* Only atoms communicated in the first pulse are used
8134 * for multi-body bonded interactions or for bBondComm.
8136 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8141 for (zone = 0; zone < nzone_send; zone++)
8143 if (tric_dist[dim_ind] && dim_ind > 0)
8145 /* Determine slightly more optimized skew_fac's
8147 * This reduces the number of communicated atoms
8148 * by about 10% for 3D DD of rhombic dodecahedra.
8150 for (dimd = 0; dimd < dim; dimd++)
8152 sf2_round[dimd] = 1;
8153 if (ddbox->tric_dir[dimd])
8155 for (i = dd->dim[dimd]+1; i < DIM; i++)
8157 /* If we are shifted in dimension i
8158 * and the cell plane is tilted forward
8159 * in dimension i, skip this coupling.
8161 if (!(zones->shift[nzone+zone][i] &&
8162 ddbox->v[dimd][i][dimd] >= 0))
8165 gmx::square(ddbox->v[dimd][i][dimd]);
8168 sf2_round[dimd] = 1/sf2_round[dimd];
8173 zonei = zone_perm[dim_ind][zone];
8176 /* Here we permutate the zones to obtain a convenient order
8177 * for neighbor searching
8179 cg0 = zone_cg_range[zonei];
8180 cg1 = zone_cg_range[zonei+1];
8184 /* Look only at the cg's received in the previous grid pulse
8186 cg1 = zone_cg_range[nzone+zone+1];
8187 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8190 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8191 for (th = 0; th < comm->nth; th++)
8195 gmx_domdec_ind_t *ind_p;
8196 int **ibuf_p, *ibuf_nalloc_p;
8198 int *nsend_p, *nat_p;
8204 /* Thread 0 writes in the comm buffers */
8206 ibuf_p = &comm->buf_int;
8207 ibuf_nalloc_p = &comm->nalloc_int;
8208 vbuf_p = &comm->vbuf;
8211 nsend_zone_p = &ind->nsend[zone];
8215 /* Other threads write into temp buffers */
8216 ind_p = &comm->dth[th].ind;
8217 ibuf_p = &comm->dth[th].ibuf;
8218 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8219 vbuf_p = &comm->dth[th].vbuf;
8220 nsend_p = &comm->dth[th].nsend;
8221 nat_p = &comm->dth[th].nat;
8222 nsend_zone_p = &comm->dth[th].nsend_zone;
8224 comm->dth[th].nsend = 0;
8225 comm->dth[th].nat = 0;
8226 comm->dth[th].nsend_zone = 0;
8236 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8237 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8240 /* Get the cg's for this pulse in this zone */
8241 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8243 dim, dim_ind, dim0, dim1, dim2,
8246 normal, skew_fac2_d, skew_fac_01,
8247 v_d, v_0, v_1, &corners, sf2_round,
8248 bDistBonded, bBondComm,
8252 ibuf_p, ibuf_nalloc_p,
8257 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
8260 /* Append data of threads>=1 to the communication buffers */
8261 for (th = 1; th < comm->nth; th++)
8263 dd_comm_setup_work_t *dth;
8266 dth = &comm->dth[th];
8268 ns1 = nsend + dth->nsend_zone;
8269 if (ns1 > ind->nalloc)
8271 ind->nalloc = over_alloc_dd(ns1);
8272 srenew(ind->index, ind->nalloc);
8274 if (ns1 > comm->nalloc_int)
8276 comm->nalloc_int = over_alloc_dd(ns1);
8277 srenew(comm->buf_int, comm->nalloc_int);
8279 if (ns1 > comm->vbuf.nalloc)
8281 comm->vbuf.nalloc = over_alloc_dd(ns1);
8282 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8285 for (i = 0; i < dth->nsend_zone; i++)
8287 ind->index[nsend] = dth->ind.index[i];
8288 comm->buf_int[nsend] = dth->ibuf[i];
8289 copy_rvec(dth->vbuf.v[i],
8290 comm->vbuf.v[nsend]);
8294 ind->nsend[zone] += dth->nsend_zone;
8297 /* Clear the counts in case we do not have pbc */
8298 for (zone = nzone_send; zone < nzone; zone++)
8300 ind->nsend[zone] = 0;
8302 ind->nsend[nzone] = nsend;
8303 ind->nsend[nzone+1] = nat;
8304 /* Communicate the number of cg's and atoms to receive */
8305 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8306 ind->nsend, nzone+2,
8307 ind->nrecv, nzone+2);
8309 /* The rvec buffer is also required for atom buffers of size nsend
8310 * in dd_move_x and dd_move_f.
8312 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8316 /* We can receive in place if only the last zone is not empty */
8317 for (zone = 0; zone < nzone-1; zone++)
8319 if (ind->nrecv[zone] > 0)
8321 cd->bInPlace = FALSE;
8326 /* The int buffer is only required here for the cg indices */
8327 if (ind->nrecv[nzone] > comm->nalloc_int2)
8329 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8330 srenew(comm->buf_int2, comm->nalloc_int2);
8332 /* The rvec buffer is also required for atom buffers
8333 * of size nrecv in dd_move_x and dd_move_f.
8335 i = std::max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8336 vec_rvec_check_alloc(&comm->vbuf2, i);
8340 /* Make space for the global cg indices */
8341 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8342 || dd->cg_nalloc == 0)
8344 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8345 srenew(index_gl, dd->cg_nalloc);
8346 srenew(cgindex, dd->cg_nalloc+1);
8348 /* Communicate the global cg indices */
8351 recv_i = index_gl + pos_cg;
8355 recv_i = comm->buf_int2;
8357 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8358 comm->buf_int, nsend,
8359 recv_i, ind->nrecv[nzone]);
8361 /* Make space for cg_cm */
8362 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8363 if (fr->cutoff_scheme == ecutsGROUP)
8369 cg_cm = as_rvec_array(state->x.data());
8371 /* Communicate cg_cm */
8374 recv_vr = cg_cm + pos_cg;
8378 recv_vr = comm->vbuf2.v;
8380 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8381 comm->vbuf.v, nsend,
8382 recv_vr, ind->nrecv[nzone]);
8384 /* Make the charge group index */
8387 zone = (p == 0 ? 0 : nzone - 1);
8388 while (zone < nzone)
8390 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8392 cg_gl = index_gl[pos_cg];
8393 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8394 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8395 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8398 /* Update the charge group presence,
8399 * so we can use it in the next pass of the loop.
8401 comm->bLocalCG[cg_gl] = TRUE;
8407 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8410 zone_cg_range[nzone+zone] = pos_cg;
8415 /* This part of the code is never executed with bBondComm. */
8416 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8417 index_gl, recv_i, cg_cm, recv_vr,
8418 cgindex, fr->cginfo_mb, fr->cginfo);
8419 pos_cg += ind->nrecv[nzone];
8421 nat_tot += ind->nrecv[nzone+1];
8425 /* Store the atom block for easy copying of communication buffers */
8426 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8430 dd->index_gl = index_gl;
8431 dd->cgindex = cgindex;
8433 dd->ncg_tot = zone_cg_range[zones->n];
8434 dd->nat_tot = nat_tot;
8435 comm->nat[ddnatHOME] = dd->nat_home;
8436 for (i = ddnatZONE; i < ddnatNR; i++)
8438 comm->nat[i] = dd->nat_tot;
8443 /* We don't need to update cginfo, since that was alrady done above.
8444 * So we pass NULL for the forcerec.
8446 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8447 nullptr, comm->bLocalCG);
8452 fprintf(debug, "Finished setting up DD communication, zones:");
8453 for (c = 0; c < zones->n; c++)
8455 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8457 fprintf(debug, "\n");
8461 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8465 for (c = 0; c < zones->nizone; c++)
8467 zones->izone[c].cg1 = zones->cg_range[c+1];
8468 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8469 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8473 /* \brief Set zone dimensions for zones \p zone_start to \p zone_end-1
8475 * Also sets the atom density for the home zone when \p zone_start=0.
8476 * For this \p numMovedChargeGroupsInHomeZone needs to be passed to tell
8477 * how many charge groups will move but are still part of the current range.
8478 * \todo When converting domdec to use proper classes, all these variables
8479 * should be private and a method should return the correct count
8480 * depending on an internal state.
8482 * \param[in,out] dd The domain decomposition struct
8483 * \param[in] box The box
8484 * \param[in] ddbox The domain decomposition box struct
8485 * \param[in] zone_start The start of the zone range to set sizes for
8486 * \param[in] zone_end The end of the zone range to set sizes for
8487 * \param[in] numMovedChargeGroupsInHomeZone The number of charge groups in the home zone that should moved but are still present in dd->comm->zones.cg_range
8489 static void set_zones_size(gmx_domdec_t *dd,
8490 matrix box, const gmx_ddbox_t *ddbox,
8491 int zone_start, int zone_end,
8492 int numMovedChargeGroupsInHomeZone)
8494 gmx_domdec_comm_t *comm;
8495 gmx_domdec_zones_t *zones;
8504 zones = &comm->zones;
8506 /* Do we need to determine extra distances for multi-body bondeds? */
8507 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
8509 for (z = zone_start; z < zone_end; z++)
8511 /* Copy cell limits to zone limits.
8512 * Valid for non-DD dims and non-shifted dims.
8514 copy_rvec(comm->cell_x0, zones->size[z].x0);
8515 copy_rvec(comm->cell_x1, zones->size[z].x1);
8518 for (d = 0; d < dd->ndim; d++)
8522 for (z = 0; z < zones->n; z++)
8524 /* With a staggered grid we have different sizes
8525 * for non-shifted dimensions.
8527 if (isDlbOn(dd->comm) && zones->shift[z][dim] == 0)
8531 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8532 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8536 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8537 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8543 rcmbs = comm->cutoff_mbody;
8544 if (ddbox->tric_dir[dim])
8546 rcs /= ddbox->skew_fac[dim];
8547 rcmbs /= ddbox->skew_fac[dim];
8550 /* Set the lower limit for the shifted zone dimensions */
8551 for (z = zone_start; z < zone_end; z++)
8553 if (zones->shift[z][dim] > 0)
8556 if (!isDlbOn(dd->comm) || d == 0)
8558 zones->size[z].x0[dim] = comm->cell_x1[dim];
8559 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8563 /* Here we take the lower limit of the zone from
8564 * the lowest domain of the zone below.
8568 zones->size[z].x0[dim] =
8569 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8575 zones->size[z].x0[dim] =
8576 zones->size[zone_perm[2][z-4]].x0[dim];
8580 zones->size[z].x0[dim] =
8581 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8584 /* A temporary limit, is updated below */
8585 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8589 for (zi = 0; zi < zones->nizone; zi++)
8591 if (zones->shift[zi][dim] == 0)
8593 /* This takes the whole zone into account.
8594 * With multiple pulses this will lead
8595 * to a larger zone then strictly necessary.
8597 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
8598 zones->size[zi].x1[dim]+rcmbs);
8606 /* Loop over the i-zones to set the upper limit of each
8609 for (zi = 0; zi < zones->nizone; zi++)
8611 if (zones->shift[zi][dim] == 0)
8613 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8615 if (zones->shift[z][dim] > 0)
8617 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
8618 zones->size[zi].x1[dim]+rcs);
8625 for (z = zone_start; z < zone_end; z++)
8627 /* Initialization only required to keep the compiler happy */
8628 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8631 /* To determine the bounding box for a zone we need to find
8632 * the extreme corners of 4, 2 or 1 corners.
8634 nc = 1 << (ddbox->nboundeddim - 1);
8636 for (c = 0; c < nc; c++)
8638 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8642 corner[YY] = zones->size[z].x0[YY];
8646 corner[YY] = zones->size[z].x1[YY];
8650 corner[ZZ] = zones->size[z].x0[ZZ];
8654 corner[ZZ] = zones->size[z].x1[ZZ];
8656 if (dd->ndim == 1 && dd->dim[0] < ZZ && ZZ < dd->npbcdim &&
8657 box[ZZ][1 - dd->dim[0]] != 0)
8659 /* With 1D domain decomposition the cg's are not in
8660 * the triclinic box, but triclinic x-y and rectangular y/x-z.
8661 * Shift the corner of the z-vector back to along the box
8662 * vector of dimension d, so it will later end up at 0 along d.
8663 * This can affect the location of this corner along dd->dim[0]
8664 * through the matrix operation below if box[d][dd->dim[0]]!=0.
8666 int d = 1 - dd->dim[0];
8668 corner[d] -= corner[ZZ]*box[ZZ][d]/box[ZZ][ZZ];
8670 /* Apply the triclinic couplings */
8671 assert(ddbox->npbcdim <= DIM);
8672 for (i = YY; i < ddbox->npbcdim; i++)
8674 for (j = XX; j < i; j++)
8676 corner[j] += corner[i]*box[i][j]/box[i][i];
8681 copy_rvec(corner, corner_min);
8682 copy_rvec(corner, corner_max);
8686 for (i = 0; i < DIM; i++)
8688 corner_min[i] = std::min(corner_min[i], corner[i]);
8689 corner_max[i] = std::max(corner_max[i], corner[i]);
8693 /* Copy the extreme cornes without offset along x */
8694 for (i = 0; i < DIM; i++)
8696 zones->size[z].bb_x0[i] = corner_min[i];
8697 zones->size[z].bb_x1[i] = corner_max[i];
8699 /* Add the offset along x */
8700 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8701 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8704 if (zone_start == 0)
8707 for (dim = 0; dim < DIM; dim++)
8709 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8711 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0] - numMovedChargeGroupsInHomeZone)/vol;
8716 for (z = zone_start; z < zone_end; z++)
8718 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8720 zones->size[z].x0[XX], zones->size[z].x1[XX],
8721 zones->size[z].x0[YY], zones->size[z].x1[YY],
8722 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8723 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8725 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8726 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8727 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8732 static int comp_cgsort(const void *a, const void *b)
8736 gmx_cgsort_t *cga, *cgb;
8737 cga = (gmx_cgsort_t *)a;
8738 cgb = (gmx_cgsort_t *)b;
8740 comp = cga->nsc - cgb->nsc;
8743 comp = cga->ind_gl - cgb->ind_gl;
8749 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8754 /* Order the data */
8755 for (i = 0; i < n; i++)
8757 buf[i] = a[sort[i].ind];
8760 /* Copy back to the original array */
8761 for (i = 0; i < n; i++)
8767 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8772 /* Order the data */
8773 for (i = 0; i < n; i++)
8775 copy_rvec(v[sort[i].ind], buf[i]);
8778 /* Copy back to the original array */
8779 for (i = 0; i < n; i++)
8781 copy_rvec(buf[i], v[i]);
8785 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8788 int a, atot, cg, cg0, cg1, i;
8790 if (cgindex == nullptr)
8792 /* Avoid the useless loop of the atoms within a cg */
8793 order_vec_cg(ncg, sort, v, buf);
8798 /* Order the data */
8800 for (cg = 0; cg < ncg; cg++)
8802 cg0 = cgindex[sort[cg].ind];
8803 cg1 = cgindex[sort[cg].ind+1];
8804 for (i = cg0; i < cg1; i++)
8806 copy_rvec(v[i], buf[a]);
8812 /* Copy back to the original array */
8813 for (a = 0; a < atot; a++)
8815 copy_rvec(buf[a], v[a]);
8819 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8820 int nsort_new, gmx_cgsort_t *sort_new,
8821 gmx_cgsort_t *sort1)
8825 /* The new indices are not very ordered, so we qsort them */
8826 gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8828 /* sort2 is already ordered, so now we can merge the two arrays */
8832 while (i2 < nsort2 || i_new < nsort_new)
8836 sort1[i1++] = sort_new[i_new++];
8838 else if (i_new == nsort_new)
8840 sort1[i1++] = sort2[i2++];
8842 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8843 (sort2[i2].nsc == sort_new[i_new].nsc &&
8844 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8846 sort1[i1++] = sort2[i2++];
8850 sort1[i1++] = sort_new[i_new++];
8855 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8857 gmx_domdec_sort_t *sort;
8858 gmx_cgsort_t *cgsort, *sort_i;
8859 int ncg_new, nsort2, nsort_new, i, *a, moved;
8861 sort = dd->comm->sort;
8863 a = fr->ns->grid->cell_index;
8865 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns->grid->ncells;
8867 if (ncg_home_old >= 0)
8869 /* The charge groups that remained in the same ns grid cell
8870 * are completely ordered. So we can sort efficiently by sorting
8871 * the charge groups that did move into the stationary list.
8876 for (i = 0; i < dd->ncg_home; i++)
8878 /* Check if this cg did not move to another node */
8881 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8883 /* This cg is new on this node or moved ns grid cell */
8884 if (nsort_new >= sort->sort_new_nalloc)
8886 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8887 srenew(sort->sort_new, sort->sort_new_nalloc);
8889 sort_i = &(sort->sort_new[nsort_new++]);
8893 /* This cg did not move */
8894 sort_i = &(sort->sort2[nsort2++]);
8896 /* Sort on the ns grid cell indices
8897 * and the global topology index.
8898 * index_gl is irrelevant with cell ns,
8899 * but we set it here anyhow to avoid a conditional.
8902 sort_i->ind_gl = dd->index_gl[i];
8909 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8912 /* Sort efficiently */
8913 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8918 cgsort = sort->sort;
8920 for (i = 0; i < dd->ncg_home; i++)
8922 /* Sort on the ns grid cell indices
8923 * and the global topology index
8925 cgsort[i].nsc = a[i];
8926 cgsort[i].ind_gl = dd->index_gl[i];
8928 if (cgsort[i].nsc < moved)
8935 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
8937 /* Determine the order of the charge groups using qsort */
8938 gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
8944 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
8950 sort = dd->comm->sort->sort;
8952 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
8955 for (i = 0; i < na; i++)
8959 sort[ncg_new].ind = a[i];
8967 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
8970 gmx_domdec_sort_t *sort;
8971 gmx_cgsort_t *cgsort;
8973 int ncg_new, i, *ibuf, cgsize;
8976 sort = dd->comm->sort;
8978 if (dd->ncg_home > sort->sort_nalloc)
8980 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
8981 srenew(sort->sort, sort->sort_nalloc);
8982 srenew(sort->sort2, sort->sort_nalloc);
8984 cgsort = sort->sort;
8986 switch (fr->cutoff_scheme)
8989 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
8992 ncg_new = dd_sort_order_nbnxn(dd, fr);
8995 gmx_incons("unimplemented");
8999 /* We alloc with the old size, since cgindex is still old */
9000 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9001 vbuf = dd->comm->vbuf.v;
9005 cgindex = dd->cgindex;
9012 /* Remove the charge groups which are no longer at home here */
9013 dd->ncg_home = ncg_new;
9016 fprintf(debug, "Set the new home charge group count to %d\n",
9020 /* Reorder the state */
9021 if (state->flags & (1 << estX))
9023 order_vec_atom(dd->ncg_home, cgindex, cgsort, as_rvec_array(state->x.data()), vbuf);
9025 if (state->flags & (1 << estV))
9027 order_vec_atom(dd->ncg_home, cgindex, cgsort, as_rvec_array(state->v.data()), vbuf);
9029 if (state->flags & (1 << estCGP))
9031 order_vec_atom(dd->ncg_home, cgindex, cgsort, as_rvec_array(state->cg_p.data()), vbuf);
9034 if (fr->cutoff_scheme == ecutsGROUP)
9037 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9040 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9042 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9043 srenew(sort->ibuf, sort->ibuf_nalloc);
9046 /* Reorder the global cg index */
9047 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9048 /* Reorder the cginfo */
9049 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9050 /* Rebuild the local cg index */
9054 for (i = 0; i < dd->ncg_home; i++)
9056 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9057 ibuf[i+1] = ibuf[i] + cgsize;
9059 for (i = 0; i < dd->ncg_home+1; i++)
9061 dd->cgindex[i] = ibuf[i];
9066 for (i = 0; i < dd->ncg_home+1; i++)
9071 /* Set the home atom number */
9072 dd->nat_home = dd->cgindex[dd->ncg_home];
9074 if (fr->cutoff_scheme == ecutsVERLET)
9076 /* The atoms are now exactly in grid order, update the grid order */
9077 nbnxn_set_atomorder(fr->nbv->nbs);
9081 /* Copy the sorted ns cell indices back to the ns grid struct */
9082 for (i = 0; i < dd->ncg_home; i++)
9084 fr->ns->grid->cell_index[i] = cgsort[i].nsc;
9086 fr->ns->grid->nr = dd->ncg_home;
9090 static void add_dd_statistics(gmx_domdec_t *dd)
9092 gmx_domdec_comm_t *comm;
9097 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9099 comm->sum_nat[ddnat-ddnatZONE] +=
9100 comm->nat[ddnat] - comm->nat[ddnat-1];
9105 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9107 gmx_domdec_comm_t *comm;
9112 /* Reset all the statistics and counters for total run counting */
9113 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9115 comm->sum_nat[ddnat-ddnatZONE] = 0;
9119 comm->load_step = 0;
9122 clear_ivec(comm->load_lim);
9127 void print_dd_statistics(t_commrec *cr, const t_inputrec *ir, FILE *fplog)
9129 gmx_domdec_comm_t *comm;
9133 comm = cr->dd->comm;
9135 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9137 if (fplog == nullptr)
9142 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9144 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9146 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9151 " av. #atoms communicated per step for force: %d x %.1f\n",
9155 if (cr->dd->vsite_comm)
9158 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9159 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9164 if (cr->dd->constraint_comm)
9167 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9168 1 + ir->nLincsIter, av);
9172 gmx_incons(" Unknown type for DD statistics");
9175 fprintf(fplog, "\n");
9177 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9179 print_dd_load_av(fplog, cr->dd);
9183 void dd_partition_system(FILE *fplog,
9186 gmx_bool bMasterState,
9188 t_state *state_global,
9189 const gmx_mtop_t *top_global,
9190 const t_inputrec *ir,
9191 t_state *state_local,
9192 PaddedRVecVector *f,
9193 gmx::MDAtoms *mdAtoms,
9194 gmx_localtop_t *top_local,
9197 gmx_constr_t constr,
9199 gmx_wallcycle_t wcycle,
9203 gmx_domdec_comm_t *comm;
9204 gmx_ddbox_t ddbox = {0};
9206 gmx_int64_t step_pcoupl;
9207 rvec cell_ns_x0, cell_ns_x1;
9208 int i, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9209 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckWhetherToTurnDlbOn, bLogLoad;
9210 gmx_bool bRedist, bSortCG, bResortAll;
9211 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9215 wallcycle_start(wcycle, ewcDOMDEC);
9220 bBoxChanged = (bMasterState || inputrecDeform(ir));
9221 if (ir->epc != epcNO)
9223 /* With nstpcouple > 1 pressure coupling happens.
9224 * one step after calculating the pressure.
9225 * Box scaling happens at the end of the MD step,
9226 * after the DD partitioning.
9227 * We therefore have to do DLB in the first partitioning
9228 * after an MD step where P-coupling occurred.
9229 * We need to determine the last step in which p-coupling occurred.
9230 * MRS -- need to validate this for vv?
9235 step_pcoupl = step - 1;
9239 step_pcoupl = ((step - 1)/n)*n + 1;
9241 if (step_pcoupl >= comm->partition_step)
9247 bNStGlobalComm = (step % nstglobalcomm == 0);
9255 /* Should we do dynamic load balacing this step?
9256 * Since it requires (possibly expensive) global communication,
9257 * we might want to do DLB less frequently.
9259 if (bBoxChanged || ir->epc != epcNO)
9261 bDoDLB = bBoxChanged;
9265 bDoDLB = bNStGlobalComm;
9269 /* Check if we have recorded loads on the nodes */
9270 if (comm->bRecordLoad && dd_load_count(comm) > 0)
9272 bCheckWhetherToTurnDlbOn = dd_dlb_get_should_check_whether_to_turn_dlb_on(dd);
9274 /* Print load every nstlog, first and last step to the log file */
9275 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9276 comm->n_load_collect == 0 ||
9278 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9280 /* Avoid extra communication due to verbose screen output
9281 * when nstglobalcomm is set.
9283 if (bDoDLB || bLogLoad || bCheckWhetherToTurnDlbOn ||
9284 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9286 get_load_distribution(dd, wcycle);
9291 dd_print_load(fplog, dd, step-1);
9295 dd_print_load_verbose(dd);
9298 comm->n_load_collect++;
9304 /* Add the measured cycles to the running average */
9305 const float averageFactor = 0.1f;
9306 comm->cyclesPerStepDlbExpAverage =
9307 (1 - averageFactor)*comm->cyclesPerStepDlbExpAverage +
9308 averageFactor*comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
9310 if (comm->dlbState == edlbsOnCanTurnOff &&
9311 dd->comm->n_load_have % c_checkTurnDlbOffInterval == c_checkTurnDlbOffInterval - 1)
9313 gmx_bool turnOffDlb;
9316 /* If the running averaged cycles with DLB are more
9317 * than before we turned on DLB, turn off DLB.
9318 * We will again run and check the cycles without DLB
9319 * and we can then decide if to turn off DLB forever.
9321 turnOffDlb = (comm->cyclesPerStepDlbExpAverage >
9322 comm->cyclesPerStepBeforeDLB);
9324 dd_bcast(dd, sizeof(turnOffDlb), &turnOffDlb);
9327 /* To turn off DLB, we need to redistribute the atoms */
9328 dd_collect_state(dd, state_local, state_global);
9329 bMasterState = TRUE;
9330 turn_off_dlb(fplog, cr, step);
9334 else if (bCheckWhetherToTurnDlbOn)
9336 gmx_bool turnOffDlbForever = FALSE;
9337 gmx_bool turnOnDlb = FALSE;
9339 /* Since the timings are node dependent, the master decides */
9342 /* If we recently turned off DLB, we want to check if
9343 * performance is better without DLB. We want to do this
9344 * ASAP to minimize the chance that external factors
9345 * slowed down the DLB step are gone here and we
9346 * incorrectly conclude that DLB was causing the slowdown.
9347 * So we measure one nstlist block, no running average.
9349 if (comm->haveTurnedOffDlb &&
9350 comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep] <
9351 comm->cyclesPerStepDlbExpAverage)
9353 /* After turning off DLB we ran nstlist steps in fewer
9354 * cycles than with DLB. This likely means that DLB
9355 * in not benefical, but this could be due to a one
9356 * time unlucky fluctuation, so we require two such
9357 * observations in close succession to turn off DLB
9360 if (comm->dlbSlowerPartitioningCount > 0 &&
9361 dd->ddp_count < comm->dlbSlowerPartitioningCount + 10*c_checkTurnDlbOnInterval)
9363 turnOffDlbForever = TRUE;
9365 comm->haveTurnedOffDlb = false;
9366 /* Register when we last measured DLB slowdown */
9367 comm->dlbSlowerPartitioningCount = dd->ddp_count;
9371 /* Here we check if the max PME rank load is more than 0.98
9372 * the max PP force load. If so, PP DLB will not help,
9373 * since we are (almost) limited by PME. Furthermore,
9374 * DLB will cause a significant extra x/f redistribution
9375 * cost on the PME ranks, which will then surely result
9376 * in lower total performance.
9378 if (cr->npmenodes > 0 &&
9379 dd_pme_f_ratio(dd) > 1 - DD_PERF_LOSS_DLB_ON)
9385 turnOnDlb = (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
9391 gmx_bool turnOffDlbForever;
9395 turnOffDlbForever, turnOnDlb
9397 dd_bcast(dd, sizeof(bools), &bools);
9398 if (bools.turnOffDlbForever)
9400 turn_off_dlb_forever(fplog, cr, step);
9402 else if (bools.turnOnDlb)
9404 turn_on_dlb(fplog, cr, step);
9409 comm->n_load_have++;
9412 cgs_gl = &comm->cgs_gl;
9417 /* Clear the old state */
9418 clear_dd_indices(dd, 0, 0);
9421 rvec *xGlobal = (SIMMASTER(cr) ? as_rvec_array(state_global->x.data()) : nullptr);
9423 set_ddbox(dd, bMasterState, cr, ir,
9424 SIMMASTER(cr) ? state_global->box : nullptr,
9425 TRUE, cgs_gl, xGlobal,
9428 get_cg_distribution(fplog, dd, cgs_gl,
9429 SIMMASTER(cr) ? state_global->box : nullptr,
9432 dd_distribute_state(dd, cgs_gl,
9433 state_global, state_local, f);
9435 dd_make_local_cgs(dd, &top_local->cgs);
9437 /* Ensure that we have space for the new distribution */
9438 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9440 if (fr->cutoff_scheme == ecutsGROUP)
9442 calc_cgcm(fplog, 0, dd->ncg_home,
9443 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
9446 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9448 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9450 else if (state_local->ddp_count != dd->ddp_count)
9452 if (state_local->ddp_count > dd->ddp_count)
9454 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9457 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9459 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9462 /* Clear the old state */
9463 clear_dd_indices(dd, 0, 0);
9465 /* Build the new indices */
9466 rebuild_cgindex(dd, cgs_gl->index, state_local);
9467 make_dd_indices(dd, cgs_gl->index, 0);
9468 ncgindex_set = dd->ncg_home;
9470 if (fr->cutoff_scheme == ecutsGROUP)
9472 /* Redetermine the cg COMs */
9473 calc_cgcm(fplog, 0, dd->ncg_home,
9474 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
9477 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9479 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9481 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9482 TRUE, &top_local->cgs, as_rvec_array(state_local->x.data()), &ddbox);
9484 bRedist = isDlbOn(comm);
9488 /* We have the full state, only redistribute the cgs */
9490 /* Clear the non-home indices */
9491 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9494 /* Avoid global communication for dim's without pbc and -gcom */
9495 if (!bNStGlobalComm)
9497 copy_rvec(comm->box0, ddbox.box0 );
9498 copy_rvec(comm->box_size, ddbox.box_size);
9500 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9501 bNStGlobalComm, &top_local->cgs, as_rvec_array(state_local->x.data()), &ddbox);
9506 /* For dim's without pbc and -gcom */
9507 copy_rvec(ddbox.box0, comm->box0 );
9508 copy_rvec(ddbox.box_size, comm->box_size);
9510 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9513 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9515 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9518 /* Check if we should sort the charge groups */
9519 bSortCG = (bMasterState || bRedist);
9521 ncg_home_old = dd->ncg_home;
9523 /* When repartitioning we mark charge groups that will move to neighboring
9524 * DD cells, but we do not move them right away for performance reasons.
9525 * Thus we need to keep track of how many charge groups will move for
9526 * obtaining correct local charge group / atom counts.
9531 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9533 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9535 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9537 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9540 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9542 &comm->cell_x0, &comm->cell_x1,
9543 dd->ncg_home, fr->cg_cm,
9544 cell_ns_x0, cell_ns_x1, &grid_density);
9548 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9551 switch (fr->cutoff_scheme)
9554 copy_ivec(fr->ns->grid->n, ncells_old);
9555 grid_first(fplog, fr->ns->grid, dd, &ddbox,
9556 state_local->box, cell_ns_x0, cell_ns_x1,
9557 fr->rlist, grid_density);
9560 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9563 gmx_incons("unimplemented");
9565 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9566 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9570 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9572 /* Sort the state on charge group position.
9573 * This enables exact restarts from this step.
9574 * It also improves performance by about 15% with larger numbers
9575 * of atoms per node.
9578 /* Fill the ns grid with the home cell,
9579 * so we can sort with the indices.
9581 set_zones_ncg_home(dd);
9583 switch (fr->cutoff_scheme)
9586 set_zones_size(dd, state_local->box, &ddbox, 0, 1, ncg_moved);
9588 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9590 comm->zones.size[0].bb_x0,
9591 comm->zones.size[0].bb_x1,
9593 comm->zones.dens_zone0,
9595 as_rvec_array(state_local->x.data()),
9596 ncg_moved, bRedist ? comm->moved : nullptr,
9597 fr->nbv->grp[eintLocal].kernel_type,
9600 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9603 fill_grid(&comm->zones, fr->ns->grid, dd->ncg_home,
9604 0, dd->ncg_home, fr->cg_cm);
9606 copy_ivec(fr->ns->grid->n, ncells_new);
9609 gmx_incons("unimplemented");
9612 bResortAll = bMasterState;
9614 /* Check if we can user the old order and ns grid cell indices
9615 * of the charge groups to sort the charge groups efficiently.
9617 if (ncells_new[XX] != ncells_old[XX] ||
9618 ncells_new[YY] != ncells_old[YY] ||
9619 ncells_new[ZZ] != ncells_old[ZZ])
9626 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9627 gmx_step_str(step, sbuf), dd->ncg_home);
9629 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9630 bResortAll ? -1 : ncg_home_old);
9632 /* After sorting and compacting we set the correct size */
9633 dd_resize_state(state_local, f, dd->nat_home);
9635 /* Rebuild all the indices */
9636 ga2la_clear(dd->ga2la);
9639 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9642 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9644 /* Setup up the communication and communicate the coordinates */
9645 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9647 /* Set the indices */
9648 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9650 /* Set the charge group boundaries for neighbor searching */
9651 set_cg_boundaries(&comm->zones);
9653 if (fr->cutoff_scheme == ecutsVERLET)
9655 set_zones_size(dd, state_local->box, &ddbox,
9656 bSortCG ? 1 : 0, comm->zones.n,
9660 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9663 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9664 -1,as_rvec_array(state_local->x.data()),state_local->box);
9667 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9669 /* Extract a local topology from the global topology */
9670 for (i = 0; i < dd->ndim; i++)
9672 np[dd->dim[i]] = comm->cd[i].np;
9674 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9675 comm->cellsize_min, np,
9677 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : as_rvec_array(state_local->x.data()),
9678 vsite, top_global, top_local);
9680 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9682 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9684 /* Set up the special atom communication */
9685 n = comm->nat[ddnatZONE];
9686 for (i = ddnatZONE+1; i < ddnatNR; i++)
9691 if (vsite && vsite->n_intercg_vsite)
9693 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9697 if (dd->bInterCGcons || dd->bInterCGsettles)
9699 /* Only for inter-cg constraints we need special code */
9700 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9701 constr, ir->nProjOrder,
9702 top_local->idef.il);
9706 gmx_incons("Unknown special atom type setup");
9711 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9713 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9715 /* Make space for the extra coordinates for virtual site
9716 * or constraint communication.
9718 state_local->natoms = comm->nat[ddnatNR-1];
9720 dd_resize_state(state_local, f, state_local->natoms);
9722 if (fr->haveDirectVirialContributions)
9724 if (vsite && vsite->n_intercg_vsite)
9726 nat_f_novirsum = comm->nat[ddnatVSITE];
9730 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9732 nat_f_novirsum = dd->nat_tot;
9736 nat_f_novirsum = dd->nat_home;
9745 /* Set the number of atoms required for the force calculation.
9746 * Forces need to be constrained when doing energy
9747 * minimization. For simple simulations we could avoid some
9748 * allocation, zeroing and copying, but this is probably not worth
9749 * the complications and checking.
9751 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9752 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9754 /* Update atom data for mdatoms and several algorithms */
9755 mdAlgorithmsSetupAtomData(cr, ir, top_global, top_local, fr,
9756 nullptr, mdAtoms, vsite, nullptr);
9758 if (ir->implicit_solvent)
9760 make_local_gb(cr, fr->born, ir->gb_algorithm);
9763 auto mdatoms = mdAtoms->mdatoms();
9764 if (!thisRankHasDuty(cr, DUTY_PME))
9766 /* Send the charges and/or c6/sigmas to our PME only node */
9767 gmx_pme_send_parameters(cr,
9769 mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
9770 mdatoms->chargeA, mdatoms->chargeB,
9771 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
9772 mdatoms->sigmaA, mdatoms->sigmaB,
9773 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9778 set_constraints(constr, top_local, ir, mdatoms, cr);
9783 /* Update the local pull groups */
9784 dd_make_local_pull_groups(cr, ir->pull_work, mdatoms);
9789 /* Update the local rotation groups */
9790 dd_make_local_rotation_groups(dd, ir->rot);
9793 if (ir->eSwapCoords != eswapNO)
9795 /* Update the local groups needed for ion swapping */
9796 dd_make_local_swap_groups(dd, ir->swap);
9799 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
9800 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
9802 add_dd_statistics(dd);
9804 /* Make sure we only count the cycles for this DD partitioning */
9805 clear_dd_cycle_counts(dd);
9807 /* Because the order of the atoms might have changed since
9808 * the last vsite construction, we need to communicate the constructing
9809 * atom coordinates again (for spreading the forces this MD step).
9811 dd_move_x_vsites(dd, state_local->box, as_rvec_array(state_local->x.data()));
9813 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9815 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9817 dd_move_x(dd, state_local->box, as_rvec_array(state_local->x.data()));
9818 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9819 -1, as_rvec_array(state_local->x.data()), state_local->box);
9822 /* Store the partitioning step */
9823 comm->partition_step = step;
9825 /* Increase the DD partitioning counter */
9827 /* The state currently matches this DD partitioning count, store it */
9828 state_local->ddp_count = dd->ddp_count;
9831 /* The DD master node knows the complete cg distribution,
9832 * store the count so we can possibly skip the cg info communication.
9834 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9837 if (comm->DD_debug > 0)
9839 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9840 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9841 "after partitioning");
9844 wallcycle_stop(wcycle, ewcDOMDEC);