2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
51 #include "gromacs/domdec/domdec_network.h"
52 #include "gromacs/domdec/ga2la.h"
53 #include "gromacs/ewald/pme.h"
54 #include "gromacs/fileio/gmxfio.h"
55 #include "gromacs/fileio/pdbio.h"
56 #include "gromacs/gmxlib/chargegroup.h"
57 #include "gromacs/gmxlib/network.h"
58 #include "gromacs/gmxlib/nrnb.h"
59 #include "gromacs/gpu_utils/gpu_utils.h"
60 #include "gromacs/hardware/hw_info.h"
61 #include "gromacs/imd/imd.h"
62 #include "gromacs/listed-forces/manage-threading.h"
63 #include "gromacs/math/functions.h"
64 #include "gromacs/math/vec.h"
65 #include "gromacs/math/vectypes.h"
66 #include "gromacs/mdlib/constr.h"
67 #include "gromacs/mdlib/force.h"
68 #include "gromacs/mdlib/forcerec.h"
69 #include "gromacs/mdlib/genborn.h"
70 #include "gromacs/mdlib/gmx_omp_nthreads.h"
71 #include "gromacs/mdlib/mdatoms.h"
72 #include "gromacs/mdlib/mdrun.h"
73 #include "gromacs/mdlib/mdsetup.h"
74 #include "gromacs/mdlib/nb_verlet.h"
75 #include "gromacs/mdlib/nbnxn_grid.h"
76 #include "gromacs/mdlib/nsgrid.h"
77 #include "gromacs/mdlib/vsite.h"
78 #include "gromacs/mdtypes/commrec.h"
79 #include "gromacs/mdtypes/df_history.h"
80 #include "gromacs/mdtypes/forcerec.h"
81 #include "gromacs/mdtypes/inputrec.h"
82 #include "gromacs/mdtypes/md_enums.h"
83 #include "gromacs/mdtypes/mdatom.h"
84 #include "gromacs/mdtypes/nblist.h"
85 #include "gromacs/mdtypes/state.h"
86 #include "gromacs/pbcutil/ishift.h"
87 #include "gromacs/pbcutil/pbc.h"
88 #include "gromacs/pulling/pull.h"
89 #include "gromacs/pulling/pull_rotation.h"
90 #include "gromacs/swap/swapcoords.h"
91 #include "gromacs/timing/wallcycle.h"
92 #include "gromacs/topology/block.h"
93 #include "gromacs/topology/idef.h"
94 #include "gromacs/topology/ifunc.h"
95 #include "gromacs/topology/mtop_lookup.h"
96 #include "gromacs/topology/mtop_util.h"
97 #include "gromacs/topology/topology.h"
98 #include "gromacs/utility/basedefinitions.h"
99 #include "gromacs/utility/basenetwork.h"
100 #include "gromacs/utility/cstringutil.h"
101 #include "gromacs/utility/exceptions.h"
102 #include "gromacs/utility/fatalerror.h"
103 #include "gromacs/utility/gmxmpi.h"
104 #include "gromacs/utility/qsort_threadsafe.h"
105 #include "gromacs/utility/real.h"
106 #include "gromacs/utility/smalloc.h"
107 #include "gromacs/utility/stringutil.h"
109 #include "domdec_constraints.h"
110 #include "domdec_internal.h"
111 #include "domdec_vsite.h"
113 #define DDRANK(dd, rank) (rank)
114 #define DDMASTERRANK(dd) (dd->masterrank)
116 struct gmx_domdec_master_t
118 /* The cell boundaries */
120 /* The global charge group division */
121 int *ncg; /* Number of home charge groups for each node */
122 int *index; /* Index of nnodes+1 into cg */
123 int *cg; /* Global charge group index */
124 int *nat; /* Number of home atoms for each node. */
125 int *ibuf; /* Buffer for communication */
126 rvec *vbuf; /* Buffer for state scattering and gathering */
129 #define DD_NLOAD_MAX 9
131 const char *edlbs_names[edlbsNR] = { "off", "auto", "locked", "on", "on" };
133 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
136 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
137 #define DD_FLAG_NRCG 65535
138 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
139 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
141 /* The DD zone order */
142 static const ivec dd_zo[DD_MAXZONE] =
143 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
145 /* The non-bonded zone-pair setup for domain decomposition
146 * The first number is the i-zone, the second number the first j-zone seen by
147 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
148 * As is, this is for 3D decomposition, where there are 4 i-zones.
149 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
150 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
153 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
158 /* Factors used to avoid problems due to rounding issues */
159 #define DD_CELL_MARGIN 1.0001
160 #define DD_CELL_MARGIN2 1.00005
161 /* Factor to account for pressure scaling during nstlist steps */
162 #define DD_PRES_SCALE_MARGIN 1.02
164 /* Turn on DLB when the load imbalance causes this amount of total loss.
165 * There is a bit of overhead with DLB and it's difficult to achieve
166 * a load imbalance of less than 2% with DLB.
168 #define DD_PERF_LOSS_DLB_ON 0.02
170 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
171 #define DD_PERF_LOSS_WARN 0.05
173 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
175 /* Use separate MPI send and receive commands
176 * when nnodes <= GMX_DD_NNODES_SENDRECV.
177 * This saves memory (and some copying for small nnodes).
178 * For high parallelization scatter and gather calls are used.
180 #define GMX_DD_NNODES_SENDRECV 4
183 /* We check if to turn on DLB at the first and every 100 DD partitionings.
184 * With large imbalance DLB will turn on at the first step, so we can
185 * make the interval so large that the MPI overhead of the check is negligible.
187 static const int c_checkTurnDlbOnInterval = 100;
188 /* We need to check if DLB results in worse performance and then turn it off.
189 * We check this more often then for turning DLB on, because the DLB can scale
190 * the domains very rapidly, so if unlucky the load imbalance can go up quickly
191 * and furthermore, we are already synchronizing often with DLB, so
192 * the overhead of the MPI Bcast is not that high.
194 static const int c_checkTurnDlbOffInterval = 20;
196 /* Forward declaration */
197 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue);
201 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
203 static void index2xyz(ivec nc,int ind,ivec xyz)
205 xyz[XX] = ind % nc[XX];
206 xyz[YY] = (ind / nc[XX]) % nc[YY];
207 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
211 /* This order is required to minimize the coordinate communication in PME
212 * which uses decomposition in the x direction.
214 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
216 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
218 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
219 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
220 xyz[ZZ] = ind % nc[ZZ];
223 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
228 ddindex = dd_index(dd->nc, c);
229 if (dd->comm->bCartesianPP_PME)
231 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
233 else if (dd->comm->bCartesianPP)
236 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
247 static gmx_bool dynamic_dd_box(const gmx_ddbox_t *ddbox, const t_inputrec *ir)
249 return (ddbox->nboundeddim < DIM || inputrecDynamicBox(ir));
252 int ddglatnr(const gmx_domdec_t *dd, int i)
262 if (i >= dd->comm->nat[ddnatNR-1])
264 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
266 atnr = dd->gatindex[i] + 1;
272 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
274 return &dd->comm->cgs_gl;
277 /*! \brief Returns true if the DLB state indicates that the balancer is on. */
278 static bool isDlbOn(const gmx_domdec_comm_t *comm)
280 return (comm->dlbState == edlbsOnCanTurnOff ||
281 comm->dlbState == edlbsOnUser);
283 /*! \brief Returns true if the DLB state indicates that the balancer is off/disabled.
285 static bool isDlbDisabled(const gmx_domdec_comm_t *comm)
287 return (comm->dlbState == edlbsOffUser ||
288 comm->dlbState == edlbsOffForever);
291 static void vec_rvec_init(vec_rvec_t *v)
297 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
301 v->nalloc = over_alloc_dd(n);
302 srenew(v->v, v->nalloc);
306 void dd_store_state(gmx_domdec_t *dd, t_state *state)
310 if (state->ddp_count != dd->ddp_count)
312 gmx_incons("The state does not the domain decomposition state");
315 state->cg_gl.resize(dd->ncg_home);
316 for (i = 0; i < dd->ncg_home; i++)
318 state->cg_gl[i] = dd->index_gl[i];
321 state->ddp_count_cg_gl = dd->ddp_count;
324 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
326 return &dd->comm->zones;
329 void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
330 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
332 gmx_domdec_zones_t *zones;
335 zones = &dd->comm->zones;
338 while (icg >= zones->izone[izone].cg1)
347 else if (izone < zones->nizone)
349 *jcg0 = zones->izone[izone].jcg0;
353 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
354 icg, izone, zones->nizone);
357 *jcg1 = zones->izone[izone].jcg1;
359 for (d = 0; d < dd->ndim; d++)
362 shift0[dim] = zones->izone[izone].shift0[dim];
363 shift1[dim] = zones->izone[izone].shift1[dim];
364 if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
366 /* A conservative approach, this can be optimized */
373 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
375 /* We currently set mdatoms entries for all atoms:
376 * local + non-local + communicated for vsite + constraints
379 return dd->comm->nat[ddnatNR - 1];
382 int dd_natoms_vsite(const gmx_domdec_t *dd)
384 return dd->comm->nat[ddnatVSITE];
387 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
389 *at_start = dd->comm->nat[ddnatCON-1];
390 *at_end = dd->comm->nat[ddnatCON];
393 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
395 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
396 int *index, *cgindex;
397 gmx_domdec_comm_t *comm;
398 gmx_domdec_comm_dim_t *cd;
399 gmx_domdec_ind_t *ind;
400 rvec shift = {0, 0, 0}, *buf, *rbuf;
401 gmx_bool bPBC, bScrew;
405 cgindex = dd->cgindex;
410 nat_tot = dd->nat_home;
411 for (d = 0; d < dd->ndim; d++)
413 bPBC = (dd->ci[dd->dim[d]] == 0);
414 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
417 copy_rvec(box[dd->dim[d]], shift);
420 for (p = 0; p < cd->np; p++)
427 for (i = 0; i < ind->nsend[nzone]; i++)
429 at0 = cgindex[index[i]];
430 at1 = cgindex[index[i]+1];
431 for (j = at0; j < at1; j++)
433 copy_rvec(x[j], buf[n]);
440 for (i = 0; i < ind->nsend[nzone]; i++)
442 at0 = cgindex[index[i]];
443 at1 = cgindex[index[i]+1];
444 for (j = at0; j < at1; j++)
446 /* We need to shift the coordinates */
447 rvec_add(x[j], shift, buf[n]);
454 for (i = 0; i < ind->nsend[nzone]; i++)
456 at0 = cgindex[index[i]];
457 at1 = cgindex[index[i]+1];
458 for (j = at0; j < at1; j++)
461 buf[n][XX] = x[j][XX] + shift[XX];
463 * This operation requires a special shift force
464 * treatment, which is performed in calc_vir.
466 buf[n][YY] = box[YY][YY] - x[j][YY];
467 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
479 rbuf = comm->vbuf2.v;
481 /* Send and receive the coordinates */
482 dd_sendrecv_rvec(dd, d, dddirBackward,
483 buf, ind->nsend[nzone+1],
484 rbuf, ind->nrecv[nzone+1]);
488 for (zone = 0; zone < nzone; zone++)
490 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
492 copy_rvec(rbuf[j], x[i]);
497 nat_tot += ind->nrecv[nzone+1];
503 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
505 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
506 int *index, *cgindex;
507 gmx_domdec_comm_t *comm;
508 gmx_domdec_comm_dim_t *cd;
509 gmx_domdec_ind_t *ind;
513 gmx_bool bShiftForcesNeedPbc, bScrew;
517 cgindex = dd->cgindex;
521 nzone = comm->zones.n/2;
522 nat_tot = dd->nat_tot;
523 for (d = dd->ndim-1; d >= 0; d--)
525 /* Only forces in domains near the PBC boundaries need to
526 consider PBC in the treatment of fshift */
527 bShiftForcesNeedPbc = (dd->ci[dd->dim[d]] == 0);
528 bScrew = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
529 if (fshift == nullptr && !bScrew)
531 bShiftForcesNeedPbc = FALSE;
533 /* Determine which shift vector we need */
539 for (p = cd->np-1; p >= 0; p--)
542 nat_tot -= ind->nrecv[nzone+1];
549 sbuf = comm->vbuf2.v;
551 for (zone = 0; zone < nzone; zone++)
553 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
555 copy_rvec(f[i], sbuf[j]);
560 /* Communicate the forces */
561 dd_sendrecv_rvec(dd, d, dddirForward,
562 sbuf, ind->nrecv[nzone+1],
563 buf, ind->nsend[nzone+1]);
565 /* Add the received forces */
567 if (!bShiftForcesNeedPbc)
569 for (i = 0; i < ind->nsend[nzone]; i++)
571 at0 = cgindex[index[i]];
572 at1 = cgindex[index[i]+1];
573 for (j = at0; j < at1; j++)
575 rvec_inc(f[j], buf[n]);
582 /* fshift should always be defined if this function is
583 * called when bShiftForcesNeedPbc is true */
584 assert(NULL != fshift);
585 for (i = 0; i < ind->nsend[nzone]; i++)
587 at0 = cgindex[index[i]];
588 at1 = cgindex[index[i]+1];
589 for (j = at0; j < at1; j++)
591 rvec_inc(f[j], buf[n]);
592 /* Add this force to the shift force */
593 rvec_inc(fshift[is], buf[n]);
600 for (i = 0; i < ind->nsend[nzone]; i++)
602 at0 = cgindex[index[i]];
603 at1 = cgindex[index[i]+1];
604 for (j = at0; j < at1; j++)
606 /* Rotate the force */
607 f[j][XX] += buf[n][XX];
608 f[j][YY] -= buf[n][YY];
609 f[j][ZZ] -= buf[n][ZZ];
612 /* Add this force to the shift force */
613 rvec_inc(fshift[is], buf[n]);
624 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
626 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
627 int *index, *cgindex;
628 gmx_domdec_comm_t *comm;
629 gmx_domdec_comm_dim_t *cd;
630 gmx_domdec_ind_t *ind;
635 cgindex = dd->cgindex;
637 buf = &comm->vbuf.v[0][0];
640 nat_tot = dd->nat_home;
641 for (d = 0; d < dd->ndim; d++)
644 for (p = 0; p < cd->np; p++)
649 for (i = 0; i < ind->nsend[nzone]; i++)
651 at0 = cgindex[index[i]];
652 at1 = cgindex[index[i]+1];
653 for (j = at0; j < at1; j++)
666 rbuf = &comm->vbuf2.v[0][0];
668 /* Send and receive the coordinates */
669 dd_sendrecv_real(dd, d, dddirBackward,
670 buf, ind->nsend[nzone+1],
671 rbuf, ind->nrecv[nzone+1]);
675 for (zone = 0; zone < nzone; zone++)
677 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
684 nat_tot += ind->nrecv[nzone+1];
690 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
692 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
693 int *index, *cgindex;
694 gmx_domdec_comm_t *comm;
695 gmx_domdec_comm_dim_t *cd;
696 gmx_domdec_ind_t *ind;
701 cgindex = dd->cgindex;
703 buf = &comm->vbuf.v[0][0];
705 nzone = comm->zones.n/2;
706 nat_tot = dd->nat_tot;
707 for (d = dd->ndim-1; d >= 0; d--)
710 for (p = cd->np-1; p >= 0; p--)
713 nat_tot -= ind->nrecv[nzone+1];
720 sbuf = &comm->vbuf2.v[0][0];
722 for (zone = 0; zone < nzone; zone++)
724 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
731 /* Communicate the forces */
732 dd_sendrecv_real(dd, d, dddirForward,
733 sbuf, ind->nrecv[nzone+1],
734 buf, ind->nsend[nzone+1]);
736 /* Add the received forces */
738 for (i = 0; i < ind->nsend[nzone]; i++)
740 at0 = cgindex[index[i]];
741 at1 = cgindex[index[i]+1];
742 for (j = at0; j < at1; j++)
753 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
755 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
757 zone->min0, zone->max1,
758 zone->mch0, zone->mch0,
759 zone->p1_0, zone->p1_1);
763 #define DDZONECOMM_MAXZONE 5
764 #define DDZONECOMM_BUFSIZE 3
766 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
767 int ddimind, int direction,
768 gmx_ddzone_t *buf_s, int n_s,
769 gmx_ddzone_t *buf_r, int n_r)
771 #define ZBS DDZONECOMM_BUFSIZE
772 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
773 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
776 for (i = 0; i < n_s; i++)
778 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
779 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
780 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
781 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
782 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
783 vbuf_s[i*ZBS+1][2] = 0;
784 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
785 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
786 vbuf_s[i*ZBS+2][2] = 0;
789 dd_sendrecv_rvec(dd, ddimind, direction,
793 for (i = 0; i < n_r; i++)
795 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
796 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
797 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
798 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
799 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
800 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
801 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
807 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
808 rvec cell_ns_x0, rvec cell_ns_x1)
810 int d, d1, dim, pos, buf_size, i, j, p, npulse, npulse_min;
812 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
813 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
814 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
815 rvec extr_s[2], extr_r[2];
817 real dist_d, c = 0, det;
818 gmx_domdec_comm_t *comm;
823 for (d = 1; d < dd->ndim; d++)
826 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
827 zp->min0 = cell_ns_x0[dim];
828 zp->max1 = cell_ns_x1[dim];
829 zp->min1 = cell_ns_x1[dim];
830 zp->mch0 = cell_ns_x0[dim];
831 zp->mch1 = cell_ns_x1[dim];
832 zp->p1_0 = cell_ns_x0[dim];
833 zp->p1_1 = cell_ns_x1[dim];
836 for (d = dd->ndim-2; d >= 0; d--)
839 bPBC = (dim < ddbox->npbcdim);
841 /* Use an rvec to store two reals */
842 extr_s[d][0] = comm->cell_f0[d+1];
843 extr_s[d][1] = comm->cell_f1[d+1];
844 extr_s[d][2] = comm->cell_f1[d+1];
847 /* Store the extremes in the backward sending buffer,
848 * so the get updated separately from the forward communication.
850 for (d1 = d; d1 < dd->ndim-1; d1++)
852 /* We invert the order to be able to use the same loop for buf_e */
853 buf_s[pos].min0 = extr_s[d1][1];
854 buf_s[pos].max1 = extr_s[d1][0];
855 buf_s[pos].min1 = extr_s[d1][2];
858 /* Store the cell corner of the dimension we communicate along */
859 buf_s[pos].p1_0 = comm->cell_x0[dim];
864 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
867 if (dd->ndim == 3 && d == 0)
869 buf_s[pos] = comm->zone_d2[0][1];
871 buf_s[pos] = comm->zone_d1[0];
875 /* We only need to communicate the extremes
876 * in the forward direction
878 npulse = comm->cd[d].np;
881 /* Take the minimum to avoid double communication */
882 npulse_min = std::min(npulse, dd->nc[dim]-1-npulse);
886 /* Without PBC we should really not communicate over
887 * the boundaries, but implementing that complicates
888 * the communication setup and therefore we simply
889 * do all communication, but ignore some data.
893 for (p = 0; p < npulse_min; p++)
895 /* Communicate the extremes forward */
896 bUse = (bPBC || dd->ci[dim] > 0);
898 dd_sendrecv_rvec(dd, d, dddirForward,
899 extr_s+d, dd->ndim-d-1,
900 extr_r+d, dd->ndim-d-1);
904 for (d1 = d; d1 < dd->ndim-1; d1++)
906 extr_s[d1][0] = std::max(extr_s[d1][0], extr_r[d1][0]);
907 extr_s[d1][1] = std::min(extr_s[d1][1], extr_r[d1][1]);
908 extr_s[d1][2] = std::min(extr_s[d1][2], extr_r[d1][2]);
914 for (p = 0; p < npulse; p++)
916 /* Communicate all the zone information backward */
917 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
919 dd_sendrecv_ddzone(dd, d, dddirBackward,
926 for (d1 = d+1; d1 < dd->ndim; d1++)
928 /* Determine the decrease of maximum required
929 * communication height along d1 due to the distance along d,
930 * this avoids a lot of useless atom communication.
932 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
934 if (ddbox->tric_dir[dim])
936 /* c is the off-diagonal coupling between the cell planes
937 * along directions d and d1.
939 c = ddbox->v[dim][dd->dim[d1]][dim];
945 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
948 dh[d1] = comm->cutoff - (c*dist_d + std::sqrt(det))/(1 + c*c);
952 /* A negative value signals out of range */
958 /* Accumulate the extremes over all pulses */
959 for (i = 0; i < buf_size; i++)
969 buf_e[i].min0 = std::min(buf_e[i].min0, buf_r[i].min0);
970 buf_e[i].max1 = std::max(buf_e[i].max1, buf_r[i].max1);
971 buf_e[i].min1 = std::min(buf_e[i].min1, buf_r[i].min1);
974 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
982 if (bUse && dh[d1] >= 0)
984 buf_e[i].mch0 = std::max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
985 buf_e[i].mch1 = std::max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
988 /* Copy the received buffer to the send buffer,
989 * to pass the data through with the next pulse.
993 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
994 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
996 /* Store the extremes */
999 for (d1 = d; d1 < dd->ndim-1; d1++)
1001 extr_s[d1][1] = std::min(extr_s[d1][1], buf_e[pos].min0);
1002 extr_s[d1][0] = std::max(extr_s[d1][0], buf_e[pos].max1);
1003 extr_s[d1][2] = std::min(extr_s[d1][2], buf_e[pos].min1);
1007 if (d == 1 || (d == 0 && dd->ndim == 3))
1009 for (i = d; i < 2; i++)
1011 comm->zone_d2[1-d][i] = buf_e[pos];
1017 comm->zone_d1[1] = buf_e[pos];
1027 for (i = 0; i < 2; i++)
1031 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1033 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1034 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1040 for (i = 0; i < 2; i++)
1042 for (j = 0; j < 2; j++)
1046 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1048 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1049 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1053 for (d = 1; d < dd->ndim; d++)
1055 comm->cell_f_max0[d] = extr_s[d-1][0];
1056 comm->cell_f_min1[d] = extr_s[d-1][1];
1059 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1060 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1065 static void dd_collect_cg(gmx_domdec_t *dd,
1066 t_state *state_local)
1068 gmx_domdec_master_t *ma = nullptr;
1069 int buf2[2], *ibuf, i, ncg_home = 0, *cg = nullptr, nat_home = 0;
1071 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1073 /* The master has the correct distribution */
1077 if (state_local->ddp_count == dd->ddp_count)
1079 /* The local state and DD are in sync, use the DD indices */
1080 ncg_home = dd->ncg_home;
1082 nat_home = dd->nat_home;
1084 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1086 /* The DD is out of sync with the local state, but we have stored
1087 * the cg indices with the local state, so we can use those.
1091 cgs_gl = &dd->comm->cgs_gl;
1093 ncg_home = state_local->cg_gl.size();
1094 cg = state_local->cg_gl.data();
1096 for (i = 0; i < ncg_home; i++)
1098 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1103 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1117 /* Collect the charge group and atom counts on the master */
1118 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1123 for (i = 0; i < dd->nnodes; i++)
1125 ma->ncg[i] = ma->ibuf[2*i];
1126 ma->nat[i] = ma->ibuf[2*i+1];
1127 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1130 /* Make byte counts and indices */
1131 for (i = 0; i < dd->nnodes; i++)
1133 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1134 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1138 fprintf(debug, "Initial charge group distribution: ");
1139 for (i = 0; i < dd->nnodes; i++)
1141 fprintf(debug, " %d", ma->ncg[i]);
1143 fprintf(debug, "\n");
1147 /* Collect the charge group indices on the master */
1149 ncg_home*sizeof(int), cg,
1150 DDMASTER(dd) ? ma->ibuf : nullptr,
1151 DDMASTER(dd) ? ma->ibuf+dd->nnodes : nullptr,
1152 DDMASTER(dd) ? ma->cg : nullptr);
1154 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1157 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1158 const rvec *lv, rvec *v)
1160 gmx_domdec_master_t *ma;
1161 int n, i, c, a, nalloc = 0;
1162 rvec *buf = nullptr;
1170 MPI_Send(const_cast<void *>(static_cast<const void *>(lv)), dd->nat_home*sizeof(rvec), MPI_BYTE,
1171 DDMASTERRANK(dd), dd->rank, dd->mpi_comm_all);
1176 /* Copy the master coordinates to the global array */
1177 cgs_gl = &dd->comm->cgs_gl;
1179 n = DDMASTERRANK(dd);
1181 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1183 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1185 copy_rvec(lv[a++], v[c]);
1189 for (n = 0; n < dd->nnodes; n++)
1193 if (ma->nat[n] > nalloc)
1195 nalloc = over_alloc_dd(ma->nat[n]);
1196 srenew(buf, nalloc);
1199 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1200 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1203 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1205 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1207 copy_rvec(buf[a++], v[c]);
1216 static void get_commbuffer_counts(gmx_domdec_t *dd,
1217 int **counts, int **disps)
1219 gmx_domdec_master_t *ma;
1224 /* Make the rvec count and displacment arrays */
1226 *disps = ma->ibuf + dd->nnodes;
1227 for (n = 0; n < dd->nnodes; n++)
1229 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1230 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1234 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1235 const rvec *lv, rvec *v)
1237 gmx_domdec_master_t *ma;
1238 int *rcounts = nullptr, *disps = nullptr;
1240 rvec *buf = nullptr;
1247 get_commbuffer_counts(dd, &rcounts, &disps);
1252 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv, rcounts, disps, buf);
1256 cgs_gl = &dd->comm->cgs_gl;
1259 for (n = 0; n < dd->nnodes; n++)
1261 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1263 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1265 copy_rvec(buf[a++], v[c]);
1272 void dd_collect_vec(gmx_domdec_t *dd,
1273 t_state *state_local,
1274 const PaddedRVecVector *localVector,
1277 dd_collect_cg(dd, state_local);
1279 const rvec *lv = as_rvec_array(localVector->data());
1281 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1283 dd_collect_vec_sendrecv(dd, lv, v);
1287 dd_collect_vec_gatherv(dd, lv, v);
1291 void dd_collect_vec(gmx_domdec_t *dd,
1292 t_state *state_local,
1293 const PaddedRVecVector *localVector,
1294 PaddedRVecVector *vector)
1296 dd_collect_vec(dd, state_local, localVector, as_rvec_array(vector->data()));
1300 void dd_collect_state(gmx_domdec_t *dd,
1301 t_state *state_local, t_state *state)
1303 int nh = state->nhchainlength;
1307 for (int i = 0; i < efptNR; i++)
1309 state->lambda[i] = state_local->lambda[i];
1311 state->fep_state = state_local->fep_state;
1312 state->veta = state_local->veta;
1313 state->vol0 = state_local->vol0;
1314 copy_mat(state_local->box, state->box);
1315 copy_mat(state_local->boxv, state->boxv);
1316 copy_mat(state_local->svir_prev, state->svir_prev);
1317 copy_mat(state_local->fvir_prev, state->fvir_prev);
1318 copy_mat(state_local->pres_prev, state->pres_prev);
1320 for (int i = 0; i < state_local->ngtc; i++)
1322 for (int j = 0; j < nh; j++)
1324 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1325 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1327 state->therm_integral[i] = state_local->therm_integral[i];
1329 for (int i = 0; i < state_local->nnhpres; i++)
1331 for (int j = 0; j < nh; j++)
1333 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1334 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1337 state->baros_integral = state_local->baros_integral;
1339 if (state_local->flags & (1 << estX))
1341 dd_collect_vec(dd, state_local, &state_local->x, &state->x);
1343 if (state_local->flags & (1 << estV))
1345 dd_collect_vec(dd, state_local, &state_local->v, &state->v);
1347 if (state_local->flags & (1 << estCGP))
1349 dd_collect_vec(dd, state_local, &state_local->cg_p, &state->cg_p);
1353 static void dd_resize_state(t_state *state, PaddedRVecVector *f, int natoms)
1357 fprintf(debug, "Resizing state: currently %d, required %d\n", state->natoms, natoms);
1360 state_change_natoms(state, natoms);
1364 /* We need to allocate one element extra, since we might use
1365 * (unaligned) 4-wide SIMD loads to access rvec entries.
1367 f->resize(natoms + 1);
1371 static void dd_check_alloc_ncg(t_forcerec *fr,
1373 PaddedRVecVector *f,
1374 int numChargeGroups)
1376 if (numChargeGroups > fr->cg_nalloc)
1380 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, numChargeGroups, over_alloc_dd(numChargeGroups));
1382 fr->cg_nalloc = over_alloc_dd(numChargeGroups);
1383 srenew(fr->cginfo, fr->cg_nalloc);
1384 if (fr->cutoff_scheme == ecutsGROUP)
1386 srenew(fr->cg_cm, fr->cg_nalloc);
1389 if (fr->cutoff_scheme == ecutsVERLET)
1391 /* We don't use charge groups, we use x in state to set up
1392 * the atom communication.
1394 dd_resize_state(state, f, numChargeGroups);
1398 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1401 gmx_domdec_master_t *ma;
1402 int n, i, c, a, nalloc = 0;
1403 rvec *buf = nullptr;
1409 for (n = 0; n < dd->nnodes; n++)
1413 if (ma->nat[n] > nalloc)
1415 nalloc = over_alloc_dd(ma->nat[n]);
1416 srenew(buf, nalloc);
1418 /* Use lv as a temporary buffer */
1420 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1422 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1424 copy_rvec(v[c], buf[a++]);
1427 if (a != ma->nat[n])
1429 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1434 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1435 DDRANK(dd, n), n, dd->mpi_comm_all);
1440 n = DDMASTERRANK(dd);
1442 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1444 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1446 copy_rvec(v[c], lv[a++]);
1453 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1454 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1459 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1462 gmx_domdec_master_t *ma;
1463 int *scounts = nullptr, *disps = nullptr;
1465 rvec *buf = nullptr;
1471 get_commbuffer_counts(dd, &scounts, &disps);
1475 for (n = 0; n < dd->nnodes; n++)
1477 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1479 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1481 copy_rvec(v[c], buf[a++]);
1487 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1490 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
1492 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1494 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1498 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1502 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1504 if (dfhist == nullptr)
1509 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1510 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1511 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1513 if (dfhist->nlambda > 0)
1515 int nlam = dfhist->nlambda;
1516 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1517 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1518 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1519 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1520 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1521 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1523 for (int i = 0; i < nlam; i++)
1525 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1526 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1527 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1528 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1529 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1530 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1535 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1536 t_state *state, t_state *state_local,
1537 PaddedRVecVector *f)
1539 int nh = state->nhchainlength;
1543 for (int i = 0; i < efptNR; i++)
1545 state_local->lambda[i] = state->lambda[i];
1547 state_local->fep_state = state->fep_state;
1548 state_local->veta = state->veta;
1549 state_local->vol0 = state->vol0;
1550 copy_mat(state->box, state_local->box);
1551 copy_mat(state->box_rel, state_local->box_rel);
1552 copy_mat(state->boxv, state_local->boxv);
1553 copy_mat(state->svir_prev, state_local->svir_prev);
1554 copy_mat(state->fvir_prev, state_local->fvir_prev);
1555 if (state->dfhist != nullptr)
1557 copy_df_history(state_local->dfhist, state->dfhist);
1559 for (int i = 0; i < state_local->ngtc; i++)
1561 for (int j = 0; j < nh; j++)
1563 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1564 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1566 state_local->therm_integral[i] = state->therm_integral[i];
1568 for (int i = 0; i < state_local->nnhpres; i++)
1570 for (int j = 0; j < nh; j++)
1572 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1573 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1576 state_local->baros_integral = state->baros_integral;
1578 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda.data());
1579 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1580 dd_bcast(dd, sizeof(real), &state_local->veta);
1581 dd_bcast(dd, sizeof(real), &state_local->vol0);
1582 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1583 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1584 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1585 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1586 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1587 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi.data());
1588 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi.data());
1589 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral.data());
1590 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi.data());
1591 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi.data());
1593 /* communicate df_history -- required for restarting from checkpoint */
1594 dd_distribute_dfhist(dd, state_local->dfhist);
1596 dd_resize_state(state_local, f, dd->nat_home);
1598 if (state_local->flags & (1 << estX))
1600 dd_distribute_vec(dd, cgs, as_rvec_array(state->x.data()), as_rvec_array(state_local->x.data()));
1602 if (state_local->flags & (1 << estV))
1604 dd_distribute_vec(dd, cgs, as_rvec_array(state->v.data()), as_rvec_array(state_local->v.data()));
1606 if (state_local->flags & (1 << estCGP))
1608 dd_distribute_vec(dd, cgs, as_rvec_array(state->cg_p.data()), as_rvec_array(state_local->cg_p.data()));
1612 static char dim2char(int dim)
1618 case XX: c = 'X'; break;
1619 case YY: c = 'Y'; break;
1620 case ZZ: c = 'Z'; break;
1621 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1627 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1628 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1630 rvec grid_s[2], *grid_r = nullptr, cx, r;
1631 char fname[STRLEN], buf[22];
1633 int a, i, d, z, y, x;
1637 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1638 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1642 snew(grid_r, 2*dd->nnodes);
1645 dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : nullptr);
1649 for (d = 0; d < DIM; d++)
1651 for (i = 0; i < DIM; i++)
1659 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1661 tric[d][i] = box[i][d]/box[i][i];
1670 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1671 out = gmx_fio_fopen(fname, "w");
1672 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1674 for (i = 0; i < dd->nnodes; i++)
1676 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1677 for (d = 0; d < DIM; d++)
1679 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1681 for (z = 0; z < 2; z++)
1683 for (y = 0; y < 2; y++)
1685 for (x = 0; x < 2; x++)
1687 cx[XX] = grid_r[i*2+x][XX];
1688 cx[YY] = grid_r[i*2+y][YY];
1689 cx[ZZ] = grid_r[i*2+z][ZZ];
1691 gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
1692 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
1696 for (d = 0; d < DIM; d++)
1698 for (x = 0; x < 4; x++)
1702 case 0: y = 1 + i*8 + 2*x; break;
1703 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1704 case 2: y = 1 + i*8 + x; break;
1706 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
1710 gmx_fio_fclose(out);
1715 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
1716 const gmx_mtop_t *mtop, t_commrec *cr,
1717 int natoms, rvec x[], matrix box)
1719 char fname[STRLEN], buf[22];
1721 int i, ii, resnr, c;
1722 const char *atomname, *resname;
1729 natoms = dd->comm->nat[ddnatVSITE];
1732 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
1734 out = gmx_fio_fopen(fname, "w");
1736 fprintf(out, "TITLE %s\n", title);
1737 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1739 for (i = 0; i < natoms; i++)
1741 ii = dd->gatindex[i];
1742 mtopGetAtomAndResidueName(mtop, ii, &molb, &atomname, &resnr, &resname, nullptr);
1743 if (i < dd->comm->nat[ddnatZONE])
1746 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
1752 else if (i < dd->comm->nat[ddnatVSITE])
1754 b = dd->comm->zones.n;
1758 b = dd->comm->zones.n + 1;
1760 gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
1761 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
1763 fprintf(out, "TER\n");
1765 gmx_fio_fclose(out);
1768 real dd_cutoff_multibody(const gmx_domdec_t *dd)
1770 gmx_domdec_comm_t *comm;
1777 if (comm->bInterCGBondeds)
1779 if (comm->cutoff_mbody > 0)
1781 r = comm->cutoff_mbody;
1785 /* cutoff_mbody=0 means we do not have DLB */
1786 r = comm->cellsize_min[dd->dim[0]];
1787 for (di = 1; di < dd->ndim; di++)
1789 r = std::min(r, comm->cellsize_min[dd->dim[di]]);
1791 if (comm->bBondComm)
1793 r = std::max(r, comm->cutoff_mbody);
1797 r = std::min(r, comm->cutoff);
1805 real dd_cutoff_twobody(const gmx_domdec_t *dd)
1809 r_mb = dd_cutoff_multibody(dd);
1811 return std::max(dd->comm->cutoff, r_mb);
1815 static void dd_cart_coord2pmecoord(const gmx_domdec_t *dd, const ivec coord,
1820 nc = dd->nc[dd->comm->cartpmedim];
1821 ntot = dd->comm->ntot[dd->comm->cartpmedim];
1822 copy_ivec(coord, coord_pme);
1823 coord_pme[dd->comm->cartpmedim] =
1824 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
1827 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
1832 npme = dd->comm->npmenodes;
1834 /* Here we assign a PME node to communicate with this DD node
1835 * by assuming that the major index of both is x.
1836 * We add cr->npmenodes/2 to obtain an even distribution.
1838 return (ddindex*npme + npme/2)/npp;
1841 static int *dd_interleaved_pme_ranks(const gmx_domdec_t *dd)
1846 snew(pme_rank, dd->comm->npmenodes);
1848 for (i = 0; i < dd->nnodes; i++)
1850 p0 = ddindex2pmeindex(dd, i);
1851 p1 = ddindex2pmeindex(dd, i+1);
1852 if (i+1 == dd->nnodes || p1 > p0)
1856 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
1858 pme_rank[n] = i + 1 + n;
1866 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
1874 if (dd->comm->bCartesian) {
1875 gmx_ddindex2xyz(dd->nc,ddindex,coords);
1876 dd_coords2pmecoords(dd,coords,coords_pme);
1877 copy_ivec(dd->ntot,nc);
1878 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1879 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1881 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
1883 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
1889 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
1894 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
1896 gmx_domdec_comm_t *comm;
1898 int ddindex, nodeid = -1;
1900 comm = cr->dd->comm;
1905 if (comm->bCartesianPP_PME)
1908 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
1913 ddindex = dd_index(cr->dd->nc, coords);
1914 if (comm->bCartesianPP)
1916 nodeid = comm->ddindex2simnodeid[ddindex];
1922 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
1934 static int dd_simnode2pmenode(const gmx_domdec_t *dd,
1935 const t_commrec gmx_unused *cr,
1940 const gmx_domdec_comm_t *comm = dd->comm;
1942 /* This assumes a uniform x domain decomposition grid cell size */
1943 if (comm->bCartesianPP_PME)
1946 ivec coord, coord_pme;
1947 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
1948 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1950 /* This is a PP node */
1951 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1952 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
1956 else if (comm->bCartesianPP)
1958 if (sim_nodeid < dd->nnodes)
1960 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1965 /* This assumes DD cells with identical x coordinates
1966 * are numbered sequentially.
1968 if (dd->comm->pmenodes == nullptr)
1970 if (sim_nodeid < dd->nnodes)
1972 /* The DD index equals the nodeid */
1973 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1979 while (sim_nodeid > dd->comm->pmenodes[i])
1983 if (sim_nodeid < dd->comm->pmenodes[i])
1985 pmenode = dd->comm->pmenodes[i];
1993 void get_pme_nnodes(const gmx_domdec_t *dd,
1994 int *npmenodes_x, int *npmenodes_y)
1998 *npmenodes_x = dd->comm->npmenodes_x;
1999 *npmenodes_y = dd->comm->npmenodes_y;
2008 void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
2009 int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
2013 ivec coord, coord_pme;
2017 snew(*my_ddnodes, (dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2020 for (x = 0; x < dd->nc[XX]; x++)
2022 for (y = 0; y < dd->nc[YY]; y++)
2024 for (z = 0; z < dd->nc[ZZ]; z++)
2026 if (dd->comm->bCartesianPP_PME)
2031 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2032 if (dd->ci[XX] == coord_pme[XX] &&
2033 dd->ci[YY] == coord_pme[YY] &&
2034 dd->ci[ZZ] == coord_pme[ZZ])
2036 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2041 /* The slab corresponds to the nodeid in the PME group */
2042 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2044 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2051 /* The last PP-only node is the peer node */
2052 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2056 fprintf(debug, "Receive coordinates from PP ranks:");
2057 for (x = 0; x < *nmy_ddnodes; x++)
2059 fprintf(debug, " %d", (*my_ddnodes)[x]);
2061 fprintf(debug, "\n");
2065 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd, const t_commrec *cr)
2067 gmx_bool bReceive = TRUE;
2069 if (cr->npmenodes < dd->nnodes)
2071 gmx_domdec_comm_t *comm = dd->comm;
2072 if (comm->bCartesianPP_PME)
2075 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
2077 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2078 coords[comm->cartpmedim]++;
2079 if (coords[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2082 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2083 if (dd_simnode2pmenode(dd, cr, rank) == pmenode)
2085 /* This is not the last PP node for pmenode */
2090 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
2095 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
2096 if (cr->sim_nodeid+1 < cr->nnodes &&
2097 dd_simnode2pmenode(dd, cr, cr->sim_nodeid+1) == pmenode)
2099 /* This is not the last PP node for pmenode */
2108 static void set_zones_ncg_home(gmx_domdec_t *dd)
2110 gmx_domdec_zones_t *zones;
2113 zones = &dd->comm->zones;
2115 zones->cg_range[0] = 0;
2116 for (i = 1; i < zones->n+1; i++)
2118 zones->cg_range[i] = dd->ncg_home;
2120 /* zone_ncg1[0] should always be equal to ncg_home */
2121 dd->comm->zone_ncg1[0] = dd->ncg_home;
2124 static void rebuild_cgindex(gmx_domdec_t *dd,
2125 const int *gcgs_index, const t_state *state)
2127 int * gmx_restrict dd_cg_gl = dd->index_gl;
2128 int * gmx_restrict cgindex = dd->cgindex;
2131 /* Copy back the global charge group indices from state
2132 * and rebuild the local charge group to atom index.
2135 for (unsigned int i = 0; i < state->cg_gl.size(); i++)
2138 int cg_gl = state->cg_gl[i];
2139 dd_cg_gl[i] = cg_gl;
2140 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2142 cgindex[state->cg_gl.size()] = nat;
2144 dd->ncg_home = state->cg_gl.size();
2147 set_zones_ncg_home(dd);
2150 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2152 while (cg >= cginfo_mb->cg_end)
2157 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2160 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2161 t_forcerec *fr, char *bLocalCG)
2163 cginfo_mb_t *cginfo_mb;
2169 cginfo_mb = fr->cginfo_mb;
2170 cginfo = fr->cginfo;
2172 for (cg = cg0; cg < cg1; cg++)
2174 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2178 if (bLocalCG != nullptr)
2180 for (cg = cg0; cg < cg1; cg++)
2182 bLocalCG[index_gl[cg]] = TRUE;
2187 static void make_dd_indices(gmx_domdec_t *dd,
2188 const int *gcgs_index, int cg_start)
2190 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2191 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2194 if (dd->nat_tot > dd->gatindex_nalloc)
2196 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2197 srenew(dd->gatindex, dd->gatindex_nalloc);
2200 nzone = dd->comm->zones.n;
2201 zone2cg = dd->comm->zones.cg_range;
2202 zone_ncg1 = dd->comm->zone_ncg1;
2203 index_gl = dd->index_gl;
2204 gatindex = dd->gatindex;
2205 bCGs = dd->comm->bCGs;
2207 if (zone2cg[1] != dd->ncg_home)
2209 gmx_incons("dd->ncg_zone is not up to date");
2212 /* Make the local to global and global to local atom index */
2213 a = dd->cgindex[cg_start];
2214 for (zone = 0; zone < nzone; zone++)
2222 cg0 = zone2cg[zone];
2224 cg1 = zone2cg[zone+1];
2225 cg1_p1 = cg0 + zone_ncg1[zone];
2227 for (cg = cg0; cg < cg1; cg++)
2232 /* Signal that this cg is from more than one pulse away */
2235 cg_gl = index_gl[cg];
2238 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2241 ga2la_set(dd->ga2la, a_gl, a, zone1);
2247 gatindex[a] = cg_gl;
2248 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2255 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2261 if (bLocalCG == nullptr)
2265 for (i = 0; i < dd->ncg_tot; i++)
2267 if (!bLocalCG[dd->index_gl[i]])
2270 "DD rank %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2275 for (i = 0; i < ncg_sys; i++)
2282 if (ngl != dd->ncg_tot)
2284 fprintf(stderr, "DD rank %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2291 static void check_index_consistency(gmx_domdec_t *dd,
2292 int natoms_sys, int ncg_sys,
2295 int nerr, ngl, i, a, cell;
2300 if (dd->comm->DD_debug > 1)
2302 snew(have, natoms_sys);
2303 for (a = 0; a < dd->nat_tot; a++)
2305 if (have[dd->gatindex[a]] > 0)
2307 fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2311 have[dd->gatindex[a]] = a + 1;
2317 snew(have, dd->nat_tot);
2320 for (i = 0; i < natoms_sys; i++)
2322 if (ga2la_get(dd->ga2la, i, &a, &cell))
2324 if (a >= dd->nat_tot)
2326 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2332 if (dd->gatindex[a] != i)
2334 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2341 if (ngl != dd->nat_tot)
2344 "DD rank %d, %s: %d global atom indices, %d local atoms\n",
2345 dd->rank, where, ngl, dd->nat_tot);
2347 for (a = 0; a < dd->nat_tot; a++)
2352 "DD rank %d, %s: local atom %d, global %d has no global index\n",
2353 dd->rank, where, a+1, dd->gatindex[a]+1);
2358 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2362 gmx_fatal(FARGS, "DD rank %d, %s: %d atom/cg index inconsistencies",
2363 dd->rank, where, nerr);
2367 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2374 /* Clear the whole list without searching */
2375 ga2la_clear(dd->ga2la);
2379 for (i = a_start; i < dd->nat_tot; i++)
2381 ga2la_del(dd->ga2la, dd->gatindex[i]);
2385 bLocalCG = dd->comm->bLocalCG;
2388 for (i = cg_start; i < dd->ncg_tot; i++)
2390 bLocalCG[dd->index_gl[i]] = FALSE;
2394 dd_clear_local_vsite_indices(dd);
2396 if (dd->constraints)
2398 dd_clear_local_constraint_indices(dd);
2402 /* This function should be used for moving the domain boudaries during DLB,
2403 * for obtaining the minimum cell size. It checks the initially set limit
2404 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2405 * and, possibly, a longer cut-off limit set for PME load balancing.
2407 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2411 cellsize_min = comm->cellsize_min[dim];
2413 if (!comm->bVacDLBNoLimit)
2415 /* The cut-off might have changed, e.g. by PME load balacning,
2416 * from the value used to set comm->cellsize_min, so check it.
2418 cellsize_min = std::max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2420 if (comm->bPMELoadBalDLBLimits)
2422 /* Check for the cut-off limit set by the PME load balancing */
2423 cellsize_min = std::max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2427 return cellsize_min;
2430 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2433 real grid_jump_limit;
2435 /* The distance between the boundaries of cells at distance
2436 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2437 * and by the fact that cells should not be shifted by more than
2438 * half their size, such that cg's only shift by one cell
2439 * at redecomposition.
2441 grid_jump_limit = comm->cellsize_limit;
2442 if (!comm->bVacDLBNoLimit)
2444 if (comm->bPMELoadBalDLBLimits)
2446 cutoff = std::max(cutoff, comm->PMELoadBal_max_cutoff);
2448 grid_jump_limit = std::max(grid_jump_limit,
2449 cutoff/comm->cd[dim_ind].np);
2452 return grid_jump_limit;
2455 static gmx_bool check_grid_jump(gmx_int64_t step,
2461 gmx_domdec_comm_t *comm;
2470 for (d = 1; d < dd->ndim; d++)
2473 limit = grid_jump_limit(comm, cutoff, d);
2474 bfac = ddbox->box_size[dim];
2475 if (ddbox->tric_dir[dim])
2477 bfac *= ddbox->skew_fac[dim];
2479 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2480 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2488 /* This error should never be triggered under normal
2489 * circumstances, but you never know ...
2491 gmx_fatal(FARGS, "step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
2492 gmx_step_str(step, buf),
2493 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2501 static int dd_load_count(gmx_domdec_comm_t *comm)
2503 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2506 static float dd_force_load(gmx_domdec_comm_t *comm)
2513 if (comm->eFlop > 1)
2515 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2520 load = comm->cycl[ddCyclF];
2521 if (comm->cycl_n[ddCyclF] > 1)
2523 /* Subtract the maximum of the last n cycle counts
2524 * to get rid of possible high counts due to other sources,
2525 * for instance system activity, that would otherwise
2526 * affect the dynamic load balancing.
2528 load -= comm->cycl_max[ddCyclF];
2532 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2534 float gpu_wait, gpu_wait_sum;
2536 gpu_wait = comm->cycl[ddCyclWaitGPU];
2537 if (comm->cycl_n[ddCyclF] > 1)
2539 /* We should remove the WaitGPU time of the same MD step
2540 * as the one with the maximum F time, since the F time
2541 * and the wait time are not independent.
2542 * Furthermore, the step for the max F time should be chosen
2543 * the same on all ranks that share the same GPU.
2544 * But to keep the code simple, we remove the average instead.
2545 * The main reason for artificially long times at some steps
2546 * is spurious CPU activity or MPI time, so we don't expect
2547 * that changes in the GPU wait time matter a lot here.
2549 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2551 /* Sum the wait times over the ranks that share the same GPU */
2552 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2553 comm->mpi_comm_gpu_shared);
2554 /* Replace the wait time by the average over the ranks */
2555 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2563 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2565 gmx_domdec_comm_t *comm;
2570 snew(*dim_f, dd->nc[dim]+1);
2572 for (i = 1; i < dd->nc[dim]; i++)
2574 if (comm->slb_frac[dim])
2576 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2580 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2583 (*dim_f)[dd->nc[dim]] = 1;
2586 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2588 int pmeindex, slab, nso, i;
2591 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2597 ddpme->dim = dimind;
2599 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2601 ddpme->nslab = (ddpme->dim == 0 ?
2602 dd->comm->npmenodes_x :
2603 dd->comm->npmenodes_y);
2605 if (ddpme->nslab <= 1)
2610 nso = dd->comm->npmenodes/ddpme->nslab;
2611 /* Determine for each PME slab the PP location range for dimension dim */
2612 snew(ddpme->pp_min, ddpme->nslab);
2613 snew(ddpme->pp_max, ddpme->nslab);
2614 for (slab = 0; slab < ddpme->nslab; slab++)
2616 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2617 ddpme->pp_max[slab] = 0;
2619 for (i = 0; i < dd->nnodes; i++)
2621 ddindex2xyz(dd->nc, i, xyz);
2622 /* For y only use our y/z slab.
2623 * This assumes that the PME x grid size matches the DD grid size.
2625 if (dimind == 0 || xyz[XX] == dd->ci[XX])
2627 pmeindex = ddindex2pmeindex(dd, i);
2630 slab = pmeindex/nso;
2634 slab = pmeindex % ddpme->nslab;
2636 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
2637 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
2641 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
2644 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
2646 if (dd->comm->ddpme[0].dim == XX)
2648 return dd->comm->ddpme[0].maxshift;
2656 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
2658 if (dd->comm->ddpme[0].dim == YY)
2660 return dd->comm->ddpme[0].maxshift;
2662 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
2664 return dd->comm->ddpme[1].maxshift;
2672 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
2673 gmx_bool bUniform, const gmx_ddbox_t *ddbox,
2676 gmx_domdec_comm_t *comm;
2679 real range, pme_boundary;
2683 nc = dd->nc[ddpme->dim];
2686 if (!ddpme->dim_match)
2688 /* PP decomposition is not along dim: the worst situation */
2691 else if (ns <= 3 || (bUniform && ns == nc))
2693 /* The optimal situation */
2698 /* We need to check for all pme nodes which nodes they
2699 * could possibly need to communicate with.
2701 xmin = ddpme->pp_min;
2702 xmax = ddpme->pp_max;
2703 /* Allow for atoms to be maximally 2/3 times the cut-off
2704 * out of their DD cell. This is a reasonable balance between
2705 * between performance and support for most charge-group/cut-off
2708 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
2709 /* Avoid extra communication when we are exactly at a boundary */
2713 for (s = 0; s < ns; s++)
2715 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
2716 pme_boundary = (real)s/ns;
2719 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
2721 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
2725 pme_boundary = (real)(s+1)/ns;
2728 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
2730 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
2737 ddpme->maxshift = sh;
2741 fprintf(debug, "PME slab communication range for dim %d is %d\n",
2742 ddpme->dim, ddpme->maxshift);
2746 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
2750 for (d = 0; d < dd->ndim; d++)
2753 if (dim < ddbox->nboundeddim &&
2754 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
2755 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
2757 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
2758 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
2759 dd->nc[dim], dd->comm->cellsize_limit);
2765 setcellsizeslbLOCAL, setcellsizeslbMASTER, setcellsizeslbPULSE_ONLY
2768 /* Set the domain boundaries. Use for static (or no) load balancing,
2769 * and also for the starting state for dynamic load balancing.
2770 * setmode determine if and where the boundaries are stored, use enum above.
2771 * Returns the number communication pulses in npulse.
2773 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, const gmx_ddbox_t *ddbox,
2774 int setmode, ivec npulse)
2776 gmx_domdec_comm_t *comm;
2779 real *cell_x, cell_dx, cellsize;
2783 for (d = 0; d < DIM; d++)
2785 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
2787 if (dd->nc[d] == 1 || comm->slb_frac[d] == nullptr)
2790 cell_dx = ddbox->box_size[d]/dd->nc[d];
2793 case setcellsizeslbMASTER:
2794 for (j = 0; j < dd->nc[d]+1; j++)
2796 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
2799 case setcellsizeslbLOCAL:
2800 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
2801 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
2806 cellsize = cell_dx*ddbox->skew_fac[d];
2807 while (cellsize*npulse[d] < comm->cutoff)
2811 cellsize_min[d] = cellsize;
2815 /* Statically load balanced grid */
2816 /* Also when we are not doing a master distribution we determine
2817 * all cell borders in a loop to obtain identical values
2818 * to the master distribution case and to determine npulse.
2820 if (setmode == setcellsizeslbMASTER)
2822 cell_x = dd->ma->cell_x[d];
2826 snew(cell_x, dd->nc[d]+1);
2828 cell_x[0] = ddbox->box0[d];
2829 for (j = 0; j < dd->nc[d]; j++)
2831 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
2832 cell_x[j+1] = cell_x[j] + cell_dx;
2833 cellsize = cell_dx*ddbox->skew_fac[d];
2834 while (cellsize*npulse[d] < comm->cutoff &&
2835 npulse[d] < dd->nc[d]-1)
2839 cellsize_min[d] = std::min(cellsize_min[d], cellsize);
2841 if (setmode == setcellsizeslbLOCAL)
2843 comm->cell_x0[d] = cell_x[dd->ci[d]];
2844 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
2846 if (setmode != setcellsizeslbMASTER)
2851 /* The following limitation is to avoid that a cell would receive
2852 * some of its own home charge groups back over the periodic boundary.
2853 * Double charge groups cause trouble with the global indices.
2855 if (d < ddbox->npbcdim &&
2856 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
2858 char error_string[STRLEN];
2860 sprintf(error_string,
2861 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
2862 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
2864 dd->nc[d], dd->nc[d],
2865 dd->nnodes > dd->nc[d] ? "cells" : "ranks");
2867 if (setmode == setcellsizeslbLOCAL)
2869 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2874 gmx_fatal(FARGS, error_string);
2881 copy_rvec(cellsize_min, comm->cellsize_min);
2884 for (d = 0; d < comm->npmedecompdim; d++)
2886 set_pme_maxshift(dd, &comm->ddpme[d],
2887 comm->slb_frac[dd->dim[d]] == nullptr, ddbox,
2888 comm->ddpme[d].slb_dim_f);
2893 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
2894 int d, int dim, domdec_root_t *root,
2895 const gmx_ddbox_t *ddbox,
2896 gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
2898 gmx_domdec_comm_t *comm;
2899 int ncd, i, j, nmin, nmin_old;
2900 gmx_bool bLimLo, bLimHi;
2902 real fac, halfway, cellsize_limit_f_i, region_size;
2903 gmx_bool bPBC, bLastHi = FALSE;
2904 int nrange[] = {range[0], range[1]};
2906 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
2912 bPBC = (dim < ddbox->npbcdim);
2914 cell_size = root->buf_ncd;
2918 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
2921 /* First we need to check if the scaling does not make cells
2922 * smaller than the smallest allowed size.
2923 * We need to do this iteratively, since if a cell is too small,
2924 * it needs to be enlarged, which makes all the other cells smaller,
2925 * which could in turn make another cell smaller than allowed.
2927 for (i = range[0]; i < range[1]; i++)
2929 root->bCellMin[i] = FALSE;
2935 /* We need the total for normalization */
2937 for (i = range[0]; i < range[1]; i++)
2939 if (root->bCellMin[i] == FALSE)
2941 fac += cell_size[i];
2944 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
2945 /* Determine the cell boundaries */
2946 for (i = range[0]; i < range[1]; i++)
2948 if (root->bCellMin[i] == FALSE)
2950 cell_size[i] *= fac;
2951 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
2953 cellsize_limit_f_i = 0;
2957 cellsize_limit_f_i = cellsize_limit_f;
2959 if (cell_size[i] < cellsize_limit_f_i)
2961 root->bCellMin[i] = TRUE;
2962 cell_size[i] = cellsize_limit_f_i;
2966 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
2969 while (nmin > nmin_old);
2972 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
2973 /* For this check we should not use DD_CELL_MARGIN,
2974 * but a slightly smaller factor,
2975 * since rounding could get use below the limit.
2977 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
2980 gmx_fatal(FARGS, "step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
2981 gmx_step_str(step, buf),
2982 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
2983 ncd, comm->cellsize_min[dim]);
2986 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
2990 /* Check if the boundary did not displace more than halfway
2991 * each of the cells it bounds, as this could cause problems,
2992 * especially when the differences between cell sizes are large.
2993 * If changes are applied, they will not make cells smaller
2994 * than the cut-off, as we check all the boundaries which
2995 * might be affected by a change and if the old state was ok,
2996 * the cells will at most be shrunk back to their old size.
2998 for (i = range[0]+1; i < range[1]; i++)
3000 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3001 if (root->cell_f[i] < halfway)
3003 root->cell_f[i] = halfway;
3004 /* Check if the change also causes shifts of the next boundaries */
3005 for (j = i+1; j < range[1]; j++)
3007 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3009 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3013 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3014 if (root->cell_f[i] > halfway)
3016 root->cell_f[i] = halfway;
3017 /* Check if the change also causes shifts of the next boundaries */
3018 for (j = i-1; j >= range[0]+1; j--)
3020 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3022 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3029 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3030 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3031 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3032 * for a and b nrange is used */
3035 /* Take care of the staggering of the cell boundaries */
3038 for (i = range[0]; i < range[1]; i++)
3040 root->cell_f_max0[i] = root->cell_f[i];
3041 root->cell_f_min1[i] = root->cell_f[i+1];
3046 for (i = range[0]+1; i < range[1]; i++)
3048 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3049 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3050 if (bLimLo && bLimHi)
3052 /* Both limits violated, try the best we can */
3053 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3054 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3055 nrange[0] = range[0];
3057 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3060 nrange[1] = range[1];
3061 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3067 /* root->cell_f[i] = root->bound_min[i]; */
3068 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3071 else if (bLimHi && !bLastHi)
3074 if (nrange[1] < range[1]) /* found a LimLo before */
3076 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3077 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3078 nrange[0] = nrange[1];
3080 root->cell_f[i] = root->bound_max[i];
3082 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3084 nrange[1] = range[1];
3087 if (nrange[1] < range[1]) /* found last a LimLo */
3089 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3090 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3091 nrange[0] = nrange[1];
3092 nrange[1] = range[1];
3093 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3095 else if (nrange[0] > range[0]) /* found at least one LimHi */
3097 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3104 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3105 int d, int dim, domdec_root_t *root,
3106 const gmx_ddbox_t *ddbox,
3107 gmx_bool bDynamicBox,
3108 gmx_bool bUniform, gmx_int64_t step)
3110 gmx_domdec_comm_t *comm;
3111 int ncd, d1, i, pos;
3113 real load_aver, load_i, imbalance, change, change_max, sc;
3114 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3118 int range[] = { 0, 0 };
3122 /* Convert the maximum change from the input percentage to a fraction */
3123 change_limit = comm->dlb_scale_lim*0.01;
3127 bPBC = (dim < ddbox->npbcdim);
3129 cell_size = root->buf_ncd;
3131 /* Store the original boundaries */
3132 for (i = 0; i < ncd+1; i++)
3134 root->old_cell_f[i] = root->cell_f[i];
3138 for (i = 0; i < ncd; i++)
3140 cell_size[i] = 1.0/ncd;
3143 else if (dd_load_count(comm) > 0)
3145 load_aver = comm->load[d].sum_m/ncd;
3147 for (i = 0; i < ncd; i++)
3149 /* Determine the relative imbalance of cell i */
3150 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3151 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3152 /* Determine the change of the cell size using underrelaxation */
3153 change = -relax*imbalance;
3154 change_max = std::max(change_max, std::max(change, -change));
3156 /* Limit the amount of scaling.
3157 * We need to use the same rescaling for all cells in one row,
3158 * otherwise the load balancing might not converge.
3161 if (change_max > change_limit)
3163 sc *= change_limit/change_max;
3165 for (i = 0; i < ncd; i++)
3167 /* Determine the relative imbalance of cell i */
3168 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3169 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3170 /* Determine the change of the cell size using underrelaxation */
3171 change = -sc*imbalance;
3172 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3176 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3177 cellsize_limit_f *= DD_CELL_MARGIN;
3178 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3179 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3180 if (ddbox->tric_dir[dim])
3182 cellsize_limit_f /= ddbox->skew_fac[dim];
3183 dist_min_f /= ddbox->skew_fac[dim];
3185 if (bDynamicBox && d > 0)
3187 dist_min_f *= DD_PRES_SCALE_MARGIN;
3189 if (d > 0 && !bUniform)
3191 /* Make sure that the grid is not shifted too much */
3192 for (i = 1; i < ncd; i++)
3194 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3196 gmx_incons("Inconsistent DD boundary staggering limits!");
3198 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3199 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3202 root->bound_min[i] += 0.5*space;
3204 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3205 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3208 root->bound_max[i] += 0.5*space;
3213 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3215 root->cell_f_max0[i-1] + dist_min_f,
3216 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3217 root->cell_f_min1[i] - dist_min_f);
3222 root->cell_f[0] = 0;
3223 root->cell_f[ncd] = 1;
3224 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3227 /* After the checks above, the cells should obey the cut-off
3228 * restrictions, but it does not hurt to check.
3230 for (i = 0; i < ncd; i++)
3234 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3235 dim, i, root->cell_f[i], root->cell_f[i+1]);
3238 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3239 root->cell_f[i+1] - root->cell_f[i] <
3240 cellsize_limit_f/DD_CELL_MARGIN)
3244 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3245 gmx_step_str(step, buf), dim2char(dim), i,
3246 (root->cell_f[i+1] - root->cell_f[i])
3247 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3252 /* Store the cell boundaries of the lower dimensions at the end */
3253 for (d1 = 0; d1 < d; d1++)
3255 root->cell_f[pos++] = comm->cell_f0[d1];
3256 root->cell_f[pos++] = comm->cell_f1[d1];
3259 if (d < comm->npmedecompdim)
3261 /* The master determines the maximum shift for
3262 * the coordinate communication between separate PME nodes.
3264 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3266 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3269 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3273 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3274 const gmx_ddbox_t *ddbox,
3277 gmx_domdec_comm_t *comm;
3282 /* Set the cell dimensions */
3283 dim = dd->dim[dimind];
3284 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3285 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3286 if (dim >= ddbox->nboundeddim)
3288 comm->cell_x0[dim] += ddbox->box0[dim];
3289 comm->cell_x1[dim] += ddbox->box0[dim];
3293 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3294 int d, int dim, real *cell_f_row,
3295 const gmx_ddbox_t *ddbox)
3297 gmx_domdec_comm_t *comm;
3303 /* Each node would only need to know two fractions,
3304 * but it is probably cheaper to broadcast the whole array.
3306 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3307 0, comm->mpi_comm_load[d]);
3309 /* Copy the fractions for this dimension from the buffer */
3310 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3311 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3312 /* The whole array was communicated, so set the buffer position */
3313 pos = dd->nc[dim] + 1;
3314 for (d1 = 0; d1 <= d; d1++)
3318 /* Copy the cell fractions of the lower dimensions */
3319 comm->cell_f0[d1] = cell_f_row[pos++];
3320 comm->cell_f1[d1] = cell_f_row[pos++];
3322 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3324 /* Convert the communicated shift from float to int */
3325 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3328 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3332 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3333 const gmx_ddbox_t *ddbox,
3334 gmx_bool bDynamicBox,
3335 gmx_bool bUniform, gmx_int64_t step)
3337 gmx_domdec_comm_t *comm;
3339 gmx_bool bRowMember, bRowRoot;
3344 for (d = 0; d < dd->ndim; d++)
3349 for (d1 = d; d1 < dd->ndim; d1++)
3351 if (dd->ci[dd->dim[d1]] > 0)
3364 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3365 ddbox, bDynamicBox, bUniform, step);
3366 cell_f_row = comm->root[d]->cell_f;
3370 cell_f_row = comm->cell_f_row;
3372 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3377 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd,
3378 const gmx_ddbox_t *ddbox)
3382 /* This function assumes the box is static and should therefore
3383 * not be called when the box has changed since the last
3384 * call to dd_partition_system.
3386 for (d = 0; d < dd->ndim; d++)
3388 relative_to_absolute_cell_bounds(dd, ddbox, d);
3394 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3395 const gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3396 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3397 gmx_wallcycle_t wcycle)
3399 gmx_domdec_comm_t *comm;
3406 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3407 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3408 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3410 else if (bDynamicBox)
3412 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3415 /* Set the dimensions for which no DD is used */
3416 for (dim = 0; dim < DIM; dim++)
3418 if (dd->nc[dim] == 1)
3420 comm->cell_x0[dim] = 0;
3421 comm->cell_x1[dim] = ddbox->box_size[dim];
3422 if (dim >= ddbox->nboundeddim)
3424 comm->cell_x0[dim] += ddbox->box0[dim];
3425 comm->cell_x1[dim] += ddbox->box0[dim];
3431 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3434 gmx_domdec_comm_dim_t *cd;
3436 for (d = 0; d < dd->ndim; d++)
3438 cd = &dd->comm->cd[d];
3439 np = npulse[dd->dim[d]];
3440 if (np > cd->np_nalloc)
3444 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3445 dim2char(dd->dim[d]), np);
3447 if (DDMASTER(dd) && cd->np_nalloc > 0)
3449 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3451 srenew(cd->ind, np);
3452 for (i = cd->np_nalloc; i < np; i++)
3454 cd->ind[i].index = nullptr;
3455 cd->ind[i].nalloc = 0;
3464 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3465 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3466 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3467 gmx_wallcycle_t wcycle)
3469 gmx_domdec_comm_t *comm;
3475 /* Copy the old cell boundaries for the cg displacement check */
3476 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3477 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3483 check_box_size(dd, ddbox);
3485 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3489 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbLOCAL, npulse);
3490 realloc_comm_ind(dd, npulse);
3495 for (d = 0; d < DIM; d++)
3497 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3498 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3503 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3505 rvec cell_ns_x0, rvec cell_ns_x1,
3508 gmx_domdec_comm_t *comm;
3513 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3515 dim = dd->dim[dim_ind];
3517 /* Without PBC we don't have restrictions on the outer cells */
3518 if (!(dim >= ddbox->npbcdim &&
3519 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3521 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3522 comm->cellsize_min[dim])
3525 gmx_fatal(FARGS, "step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3526 gmx_step_str(step, buf), dim2char(dim),
3527 comm->cell_x1[dim] - comm->cell_x0[dim],
3528 ddbox->skew_fac[dim],
3529 dd->comm->cellsize_min[dim],
3530 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3534 if ((isDlbOn(dd->comm) && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3536 /* Communicate the boundaries and update cell_ns_x0/1 */
3537 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3538 if (isDlbOn(dd->comm) && dd->ndim > 1)
3540 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3545 static void make_tric_corr_matrix(int npbcdim, matrix box, matrix tcm)
3549 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3557 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3558 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3567 static void check_screw_box(matrix box)
3569 /* Mathematical limitation */
3570 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3572 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3575 /* Limitation due to the asymmetry of the eighth shell method */
3576 if (box[ZZ][YY] != 0)
3578 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3582 static void distribute_cg(FILE *fplog,
3583 matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3586 gmx_domdec_master_t *ma;
3587 int **tmp_ind = nullptr, *tmp_nalloc = nullptr;
3588 int i, icg, j, k, k0, k1, d;
3592 real nrcg, inv_ncg, pos_d;
3598 snew(tmp_nalloc, dd->nnodes);
3599 snew(tmp_ind, dd->nnodes);
3600 for (i = 0; i < dd->nnodes; i++)
3602 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3603 snew(tmp_ind[i], tmp_nalloc[i]);
3606 /* Clear the count */
3607 for (i = 0; i < dd->nnodes; i++)
3613 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3615 cgindex = cgs->index;
3617 /* Compute the center of geometry for all charge groups */
3618 for (icg = 0; icg < cgs->nr; icg++)
3621 k1 = cgindex[icg+1];
3625 copy_rvec(pos[k0], cg_cm);
3632 for (k = k0; (k < k1); k++)
3634 rvec_inc(cg_cm, pos[k]);
3636 for (d = 0; (d < DIM); d++)
3638 cg_cm[d] *= inv_ncg;
3641 /* Put the charge group in the box and determine the cell index */
3642 for (d = DIM-1; d >= 0; d--)
3645 if (d < dd->npbcdim)
3647 bScrew = (dd->bScrewPBC && d == XX);
3648 if (tric_dir[d] && dd->nc[d] > 1)
3650 /* Use triclinic coordintates for this dimension */
3651 for (j = d+1; j < DIM; j++)
3653 pos_d += cg_cm[j]*tcm[j][d];
3656 while (pos_d >= box[d][d])
3659 rvec_dec(cg_cm, box[d]);
3662 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3663 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3665 for (k = k0; (k < k1); k++)
3667 rvec_dec(pos[k], box[d]);
3670 pos[k][YY] = box[YY][YY] - pos[k][YY];
3671 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3678 rvec_inc(cg_cm, box[d]);
3681 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3682 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3684 for (k = k0; (k < k1); k++)
3686 rvec_inc(pos[k], box[d]);
3689 pos[k][YY] = box[YY][YY] - pos[k][YY];
3690 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3695 /* This could be done more efficiently */
3697 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
3702 i = dd_index(dd->nc, ind);
3703 if (ma->ncg[i] == tmp_nalloc[i])
3705 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
3706 srenew(tmp_ind[i], tmp_nalloc[i]);
3708 tmp_ind[i][ma->ncg[i]] = icg;
3710 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
3714 for (i = 0; i < dd->nnodes; i++)
3717 for (k = 0; k < ma->ncg[i]; k++)
3719 ma->cg[k1++] = tmp_ind[i][k];
3722 ma->index[dd->nnodes] = k1;
3724 for (i = 0; i < dd->nnodes; i++)
3733 // Use double for the sums to avoid natoms^2 overflowing
3735 int nat_sum, nat_min, nat_max;
3740 nat_min = ma->nat[0];
3741 nat_max = ma->nat[0];
3742 for (i = 0; i < dd->nnodes; i++)
3744 nat_sum += ma->nat[i];
3745 // cast to double to avoid integer overflows when squaring
3746 nat2_sum += gmx::square(static_cast<double>(ma->nat[i]));
3747 nat_min = std::min(nat_min, ma->nat[i]);
3748 nat_max = std::max(nat_max, ma->nat[i]);
3750 nat_sum /= dd->nnodes;
3751 nat2_sum /= dd->nnodes;
3753 fprintf(fplog, "Atom distribution over %d domains: av %d stddev %d min %d max %d\n",
3756 static_cast<int>(std::sqrt(nat2_sum - gmx::square(static_cast<double>(nat_sum)) + 0.5)),
3761 static void get_cg_distribution(FILE *fplog, gmx_domdec_t *dd,
3762 t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
3765 gmx_domdec_master_t *ma = nullptr;
3768 int *ibuf, buf2[2] = { 0, 0 };
3769 gmx_bool bMaster = DDMASTER(dd);
3777 check_screw_box(box);
3780 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbMASTER, npulse);
3782 distribute_cg(fplog, box, ddbox->tric_dir, cgs, pos, dd);
3783 for (i = 0; i < dd->nnodes; i++)
3785 ma->ibuf[2*i] = ma->ncg[i];
3786 ma->ibuf[2*i+1] = ma->nat[i];
3794 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
3796 dd->ncg_home = buf2[0];
3797 dd->nat_home = buf2[1];
3798 dd->ncg_tot = dd->ncg_home;
3799 dd->nat_tot = dd->nat_home;
3800 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
3802 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
3803 srenew(dd->index_gl, dd->cg_nalloc);
3804 srenew(dd->cgindex, dd->cg_nalloc+1);
3808 for (i = 0; i < dd->nnodes; i++)
3810 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
3811 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
3816 bMaster ? ma->ibuf : nullptr,
3817 bMaster ? ma->ibuf+dd->nnodes : nullptr,
3818 bMaster ? ma->cg : nullptr,
3819 dd->ncg_home*sizeof(int), dd->index_gl);
3821 /* Determine the home charge group sizes */
3823 for (i = 0; i < dd->ncg_home; i++)
3825 cg_gl = dd->index_gl[i];
3827 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
3832 fprintf(debug, "Home charge groups:\n");
3833 for (i = 0; i < dd->ncg_home; i++)
3835 fprintf(debug, " %d", dd->index_gl[i]);
3838 fprintf(debug, "\n");
3841 fprintf(debug, "\n");
3845 static int compact_and_copy_vec_at(int ncg, int *move,
3848 rvec *src, gmx_domdec_comm_t *comm,
3851 int m, icg, i, i0, i1, nrcg;
3857 for (m = 0; m < DIM*2; m++)
3863 for (icg = 0; icg < ncg; icg++)
3865 i1 = cgindex[icg+1];
3871 /* Compact the home array in place */
3872 for (i = i0; i < i1; i++)
3874 copy_rvec(src[i], src[home_pos++]);
3880 /* Copy to the communication buffer */
3882 pos_vec[m] += 1 + vec*nrcg;
3883 for (i = i0; i < i1; i++)
3885 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
3887 pos_vec[m] += (nvec - vec - 1)*nrcg;
3891 home_pos += i1 - i0;
3899 static int compact_and_copy_vec_cg(int ncg, int *move,
3901 int nvec, rvec *src, gmx_domdec_comm_t *comm,
3904 int m, icg, i0, i1, nrcg;
3910 for (m = 0; m < DIM*2; m++)
3916 for (icg = 0; icg < ncg; icg++)
3918 i1 = cgindex[icg+1];
3924 /* Compact the home array in place */
3925 copy_rvec(src[icg], src[home_pos++]);
3931 /* Copy to the communication buffer */
3932 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
3933 pos_vec[m] += 1 + nrcg*nvec;
3945 static int compact_ind(int ncg, int *move,
3946 int *index_gl, int *cgindex,
3948 gmx_ga2la_t *ga2la, char *bLocalCG,
3951 int cg, nat, a0, a1, a, a_gl;
3956 for (cg = 0; cg < ncg; cg++)
3962 /* Compact the home arrays in place.
3963 * Anything that can be done here avoids access to global arrays.
3965 cgindex[home_pos] = nat;
3966 for (a = a0; a < a1; a++)
3969 gatindex[nat] = a_gl;
3970 /* The cell number stays 0, so we don't need to set it */
3971 ga2la_change_la(ga2la, a_gl, nat);
3974 index_gl[home_pos] = index_gl[cg];
3975 cginfo[home_pos] = cginfo[cg];
3976 /* The charge group remains local, so bLocalCG does not change */
3981 /* Clear the global indices */
3982 for (a = a0; a < a1; a++)
3984 ga2la_del(ga2la, gatindex[a]);
3988 bLocalCG[index_gl[cg]] = FALSE;
3992 cgindex[home_pos] = nat;
3997 static void clear_and_mark_ind(int ncg, int *move,
3998 int *index_gl, int *cgindex, int *gatindex,
3999 gmx_ga2la_t *ga2la, char *bLocalCG,
4004 for (cg = 0; cg < ncg; cg++)
4010 /* Clear the global indices */
4011 for (a = a0; a < a1; a++)
4013 ga2la_del(ga2la, gatindex[a]);
4017 bLocalCG[index_gl[cg]] = FALSE;
4019 /* Signal that this cg has moved using the ns cell index.
4020 * Here we set it to -1. fill_grid will change it
4021 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4023 cell_index[cg] = -1;
4028 static void print_cg_move(FILE *fplog,
4030 gmx_int64_t step, int cg, int dim, int dir,
4031 gmx_bool bHaveCgcmOld, real limitd,
4032 rvec cm_old, rvec cm_new, real pos_d)
4034 gmx_domdec_comm_t *comm;
4039 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4042 fprintf(fplog, "%s %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4043 dd->comm->bCGs ? "The charge group starting at atom" : "Atom",
4044 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4048 /* We don't have a limiting distance available: don't print it */
4049 fprintf(fplog, "%s %d moved more than the distance allowed by the domain decomposition in direction %c\n",
4050 dd->comm->bCGs ? "The charge group starting at atom" : "Atom",
4051 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4053 fprintf(fplog, "distance out of cell %f\n",
4054 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4057 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4058 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4060 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4061 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4062 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4064 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4065 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4067 comm->cell_x0[dim], comm->cell_x1[dim]);
4070 static void cg_move_error(FILE *fplog,
4072 gmx_int64_t step, int cg, int dim, int dir,
4073 gmx_bool bHaveCgcmOld, real limitd,
4074 rvec cm_old, rvec cm_new, real pos_d)
4078 print_cg_move(fplog, dd, step, cg, dim, dir,
4079 bHaveCgcmOld, limitd, cm_old, cm_new, pos_d);
4081 print_cg_move(stderr, dd, step, cg, dim, dir,
4082 bHaveCgcmOld, limitd, cm_old, cm_new, pos_d);
4084 "%s moved too far between two domain decomposition steps\n"
4085 "This usually means that your system is not well equilibrated",
4086 dd->comm->bCGs ? "A charge group" : "An atom");
4089 static void rotate_state_atom(t_state *state, int a)
4091 if (state->flags & (1 << estX))
4093 /* Rotate the complete state; for a rectangular box only */
4094 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4095 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4097 if (state->flags & (1 << estV))
4099 state->v[a][YY] = -state->v[a][YY];
4100 state->v[a][ZZ] = -state->v[a][ZZ];
4102 if (state->flags & (1 << estCGP))
4104 state->cg_p[a][YY] = -state->cg_p[a][YY];
4105 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4109 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4111 if (natoms > comm->moved_nalloc)
4113 /* Contents should be preserved here */
4114 comm->moved_nalloc = over_alloc_dd(natoms);
4115 srenew(comm->moved, comm->moved_nalloc);
4121 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
4124 ivec tric_dir, matrix tcm,
4125 rvec cell_x0, rvec cell_x1,
4126 rvec limitd, rvec limit0, rvec limit1,
4128 int cg_start, int cg_end,
4133 int cg, k, k0, k1, d, dim, d2;
4138 real inv_ncg, pos_d;
4141 npbcdim = dd->npbcdim;
4143 for (cg = cg_start; cg < cg_end; cg++)
4150 copy_rvec(state->x[k0], cm_new);
4157 for (k = k0; (k < k1); k++)
4159 rvec_inc(cm_new, state->x[k]);
4161 for (d = 0; (d < DIM); d++)
4163 cm_new[d] = inv_ncg*cm_new[d];
4168 /* Do pbc and check DD cell boundary crossings */
4169 for (d = DIM-1; d >= 0; d--)
4173 bScrew = (dd->bScrewPBC && d == XX);
4174 /* Determine the location of this cg in lattice coordinates */
4178 for (d2 = d+1; d2 < DIM; d2++)
4180 pos_d += cm_new[d2]*tcm[d2][d];
4183 /* Put the charge group in the triclinic unit-cell */
4184 if (pos_d >= cell_x1[d])
4186 if (pos_d >= limit1[d])
4188 cg_move_error(fplog, dd, step, cg, d, 1,
4189 cg_cm != as_rvec_array(state->x.data()), limitd[d],
4190 cg_cm[cg], cm_new, pos_d);
4193 if (dd->ci[d] == dd->nc[d] - 1)
4195 rvec_dec(cm_new, state->box[d]);
4198 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4199 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4201 for (k = k0; (k < k1); k++)
4203 rvec_dec(state->x[k], state->box[d]);
4206 rotate_state_atom(state, k);
4211 else if (pos_d < cell_x0[d])
4213 if (pos_d < limit0[d])
4215 cg_move_error(fplog, dd, step, cg, d, -1,
4216 cg_cm != as_rvec_array(state->x.data()), limitd[d],
4217 cg_cm[cg], cm_new, pos_d);
4222 rvec_inc(cm_new, state->box[d]);
4225 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4226 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4228 for (k = k0; (k < k1); k++)
4230 rvec_inc(state->x[k], state->box[d]);
4233 rotate_state_atom(state, k);
4239 else if (d < npbcdim)
4241 /* Put the charge group in the rectangular unit-cell */
4242 while (cm_new[d] >= state->box[d][d])
4244 rvec_dec(cm_new, state->box[d]);
4245 for (k = k0; (k < k1); k++)
4247 rvec_dec(state->x[k], state->box[d]);
4250 while (cm_new[d] < 0)
4252 rvec_inc(cm_new, state->box[d]);
4253 for (k = k0; (k < k1); k++)
4255 rvec_inc(state->x[k], state->box[d]);
4261 copy_rvec(cm_new, cg_cm[cg]);
4263 /* Determine where this cg should go */
4266 for (d = 0; d < dd->ndim; d++)
4271 flag |= DD_FLAG_FW(d);
4277 else if (dev[dim] == -1)
4279 flag |= DD_FLAG_BW(d);
4282 if (dd->nc[dim] > 2)
4293 /* Temporarily store the flag in move */
4294 move[cg] = mc + flag;
4298 static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
4299 gmx_domdec_t *dd, ivec tric_dir,
4300 t_state *state, PaddedRVecVector *f,
4309 int ncg[DIM*2] = { 0 }, nat[DIM*2] = { 0 };
4310 int i, cg, k, d, dim, dim2, dir, d2, d3;
4311 int mc, cdd, nrcg, ncg_recv, nvs, nvr, nvec, vec;
4312 int sbuf[2], rbuf[2];
4313 int home_pos_cg, home_pos_at, buf_pos;
4317 rvec *cg_cm = nullptr, cell_x0, cell_x1, limitd, limit0, limit1;
4319 cginfo_mb_t *cginfo_mb;
4320 gmx_domdec_comm_t *comm;
4322 int nthread, thread;
4326 check_screw_box(state->box);
4330 if (fr->cutoff_scheme == ecutsGROUP)
4335 // Positions are always present, so there's nothing to flag
4336 bool bV = state->flags & (1<<estV);
4337 bool bCGP = state->flags & (1<<estCGP);
4339 if (dd->ncg_tot > comm->nalloc_int)
4341 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4342 srenew(comm->buf_int, comm->nalloc_int);
4344 move = comm->buf_int;
4346 npbcdim = dd->npbcdim;
4348 for (d = 0; (d < DIM); d++)
4350 limitd[d] = dd->comm->cellsize_min[d];
4351 if (d >= npbcdim && dd->ci[d] == 0)
4353 cell_x0[d] = -GMX_FLOAT_MAX;
4357 cell_x0[d] = comm->cell_x0[d];
4359 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4361 cell_x1[d] = GMX_FLOAT_MAX;
4365 cell_x1[d] = comm->cell_x1[d];
4369 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4370 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4374 /* We check after communication if a charge group moved
4375 * more than one cell. Set the pre-comm check limit to float_max.
4377 limit0[d] = -GMX_FLOAT_MAX;
4378 limit1[d] = GMX_FLOAT_MAX;
4382 make_tric_corr_matrix(npbcdim, state->box, tcm);
4384 cgindex = dd->cgindex;
4386 nthread = gmx_omp_nthreads_get(emntDomdec);
4388 /* Compute the center of geometry for all home charge groups
4389 * and put them in the box and determine where they should go.
4391 #pragma omp parallel for num_threads(nthread) schedule(static)
4392 for (thread = 0; thread < nthread; thread++)
4396 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4397 cell_x0, cell_x1, limitd, limit0, limit1,
4399 ( thread *dd->ncg_home)/nthread,
4400 ((thread+1)*dd->ncg_home)/nthread,
4401 fr->cutoff_scheme == ecutsGROUP ? cg_cm : as_rvec_array(state->x.data()),
4404 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
4407 for (cg = 0; cg < dd->ncg_home; cg++)
4412 flag = mc & ~DD_FLAG_NRCG;
4413 mc = mc & DD_FLAG_NRCG;
4416 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4418 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4419 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4421 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4422 /* We store the cg size in the lower 16 bits
4423 * and the place where the charge group should go
4424 * in the next 6 bits. This saves some communication volume.
4426 nrcg = cgindex[cg+1] - cgindex[cg];
4427 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4433 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4434 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4437 for (i = 0; i < dd->ndim*2; i++)
4439 *ncg_moved += ncg[i];
4452 /* Make sure the communication buffers are large enough */
4453 for (mc = 0; mc < dd->ndim*2; mc++)
4455 nvr = ncg[mc] + nat[mc]*nvec;
4456 if (nvr > comm->cgcm_state_nalloc[mc])
4458 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4459 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4463 switch (fr->cutoff_scheme)
4466 /* Recalculating cg_cm might be cheaper than communicating,
4467 * but that could give rise to rounding issues.
4470 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4471 nvec, cg_cm, comm, bCompact);
4474 /* Without charge groups we send the moved atom coordinates
4475 * over twice. This is so the code below can be used without
4476 * many conditionals for both for with and without charge groups.
4479 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4480 nvec, as_rvec_array(state->x.data()), comm, FALSE);
4483 home_pos_cg -= *ncg_moved;
4487 gmx_incons("unimplemented");
4493 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4494 nvec, vec++, as_rvec_array(state->x.data()),
4498 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4499 nvec, vec++, as_rvec_array(state->v.data()),
4504 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4505 nvec, vec++, as_rvec_array(state->cg_p.data()),
4511 compact_ind(dd->ncg_home, move,
4512 dd->index_gl, dd->cgindex, dd->gatindex,
4513 dd->ga2la, comm->bLocalCG,
4518 if (fr->cutoff_scheme == ecutsVERLET)
4520 moved = get_moved(comm, dd->ncg_home);
4522 for (k = 0; k < dd->ncg_home; k++)
4529 moved = fr->ns->grid->cell_index;
4532 clear_and_mark_ind(dd->ncg_home, move,
4533 dd->index_gl, dd->cgindex, dd->gatindex,
4534 dd->ga2la, comm->bLocalCG,
4538 cginfo_mb = fr->cginfo_mb;
4540 *ncg_stay_home = home_pos_cg;
4541 for (d = 0; d < dd->ndim; d++)
4546 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4549 /* Communicate the cg and atom counts */
4554 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4555 d, dir, sbuf[0], sbuf[1]);
4557 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4559 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4561 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4562 srenew(comm->buf_int, comm->nalloc_int);
4565 /* Communicate the charge group indices, sizes and flags */
4566 dd_sendrecv_int(dd, d, dir,
4567 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4568 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4570 nvs = ncg[cdd] + nat[cdd]*nvec;
4571 i = rbuf[0] + rbuf[1] *nvec;
4572 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4574 /* Communicate cgcm and state */
4575 dd_sendrecv_rvec(dd, d, dir,
4576 comm->cgcm_state[cdd], nvs,
4577 comm->vbuf.v+nvr, i);
4578 ncg_recv += rbuf[0];
4582 dd_check_alloc_ncg(fr, state, f, home_pos_cg + ncg_recv);
4583 if (fr->cutoff_scheme == ecutsGROUP)
4585 /* Here we resize to more than necessary and shrink later */
4586 dd_resize_state(state, f, home_pos_at + ncg_recv*MAX_CGCGSIZE);
4589 /* Process the received charge groups */
4591 for (cg = 0; cg < ncg_recv; cg++)
4593 flag = comm->buf_int[cg*DD_CGIBS+1];
4595 if (dim >= npbcdim && dd->nc[dim] > 2)
4597 /* No pbc in this dim and more than one domain boundary.
4598 * We do a separate check if a charge group didn't move too far.
4600 if (((flag & DD_FLAG_FW(d)) &&
4601 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4602 ((flag & DD_FLAG_BW(d)) &&
4603 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4605 cg_move_error(fplog, dd, step, cg, dim,
4606 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4607 fr->cutoff_scheme == ecutsGROUP, 0,
4608 comm->vbuf.v[buf_pos],
4609 comm->vbuf.v[buf_pos],
4610 comm->vbuf.v[buf_pos][dim]);
4617 /* Check which direction this cg should go */
4618 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4620 if (isDlbOn(dd->comm))
4622 /* The cell boundaries for dimension d2 are not equal
4623 * for each cell row of the lower dimension(s),
4624 * therefore we might need to redetermine where
4625 * this cg should go.
4628 /* If this cg crosses the box boundary in dimension d2
4629 * we can use the communicated flag, so we do not
4630 * have to worry about pbc.
4632 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4633 (flag & DD_FLAG_FW(d2))) ||
4634 (dd->ci[dim2] == 0 &&
4635 (flag & DD_FLAG_BW(d2)))))
4637 /* Clear the two flags for this dimension */
4638 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4639 /* Determine the location of this cg
4640 * in lattice coordinates
4642 pos_d = comm->vbuf.v[buf_pos][dim2];
4645 for (d3 = dim2+1; d3 < DIM; d3++)
4648 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
4651 /* Check of we are not at the box edge.
4652 * pbc is only handled in the first step above,
4653 * but this check could move over pbc while
4654 * the first step did not due to different rounding.
4656 if (pos_d >= cell_x1[dim2] &&
4657 dd->ci[dim2] != dd->nc[dim2]-1)
4659 flag |= DD_FLAG_FW(d2);
4661 else if (pos_d < cell_x0[dim2] &&
4664 flag |= DD_FLAG_BW(d2);
4666 comm->buf_int[cg*DD_CGIBS+1] = flag;
4669 /* Set to which neighboring cell this cg should go */
4670 if (flag & DD_FLAG_FW(d2))
4674 else if (flag & DD_FLAG_BW(d2))
4676 if (dd->nc[dd->dim[d2]] > 2)
4688 nrcg = flag & DD_FLAG_NRCG;
4691 if (home_pos_cg+1 > dd->cg_nalloc)
4693 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
4694 srenew(dd->index_gl, dd->cg_nalloc);
4695 srenew(dd->cgindex, dd->cg_nalloc+1);
4697 /* Set the global charge group index and size */
4698 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
4699 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
4700 /* Copy the state from the buffer */
4701 if (fr->cutoff_scheme == ecutsGROUP)
4704 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
4708 /* Set the cginfo */
4709 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
4710 dd->index_gl[home_pos_cg]);
4713 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
4716 for (i = 0; i < nrcg; i++)
4718 copy_rvec(comm->vbuf.v[buf_pos++],
4719 state->x[home_pos_at+i]);
4723 for (i = 0; i < nrcg; i++)
4725 copy_rvec(comm->vbuf.v[buf_pos++],
4726 state->v[home_pos_at+i]);
4731 for (i = 0; i < nrcg; i++)
4733 copy_rvec(comm->vbuf.v[buf_pos++],
4734 state->cg_p[home_pos_at+i]);
4738 home_pos_at += nrcg;
4742 /* Reallocate the buffers if necessary */
4743 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4745 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4746 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4748 nvr = ncg[mc] + nat[mc]*nvec;
4749 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
4751 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
4752 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4754 /* Copy from the receive to the send buffers */
4755 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
4756 comm->buf_int + cg*DD_CGIBS,
4757 DD_CGIBS*sizeof(int));
4758 memcpy(comm->cgcm_state[mc][nvr],
4759 comm->vbuf.v[buf_pos],
4760 (1+nrcg*nvec)*sizeof(rvec));
4761 buf_pos += 1 + nrcg*nvec;
4768 /* With sorting (!bCompact) the indices are now only partially up to date
4769 * and ncg_home and nat_home are not the real count, since there are
4770 * "holes" in the arrays for the charge groups that moved to neighbors.
4772 if (fr->cutoff_scheme == ecutsVERLET)
4774 moved = get_moved(comm, home_pos_cg);
4776 for (i = dd->ncg_home; i < home_pos_cg; i++)
4781 dd->ncg_home = home_pos_cg;
4782 dd->nat_home = home_pos_at;
4784 if (fr->cutoff_scheme == ecutsGROUP && !bCompact)
4786 /* We overallocated before, we need to set the right size here */
4787 dd_resize_state(state, f, dd->nat_home);
4793 "Finished repartitioning: cgs moved out %d, new home %d\n",
4794 *ncg_moved, dd->ncg_home-*ncg_moved);
4799 void dd_cycles_add(gmx_domdec_t *dd, float cycles, int ddCycl)
4801 /* Note that the cycles value can be incorrect, either 0 or some
4802 * extremely large value, when our thread migrated to another core
4803 * with an unsynchronized cycle counter. If this happens less often
4804 * that once per nstlist steps, this will not cause issues, since
4805 * we later subtract the maximum value from the sum over nstlist steps.
4806 * A zero count will slightly lower the total, but that's a small effect.
4807 * Note that the main purpose of the subtraction of the maximum value
4808 * is to avoid throwing off the load balancing when stalls occur due
4809 * e.g. system activity or network congestion.
4811 dd->comm->cycl[ddCycl] += cycles;
4812 dd->comm->cycl_n[ddCycl]++;
4813 if (cycles > dd->comm->cycl_max[ddCycl])
4815 dd->comm->cycl_max[ddCycl] = cycles;
4819 static double force_flop_count(t_nrnb *nrnb)
4826 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
4828 /* To get closer to the real timings, we half the count
4829 * for the normal loops and again half it for water loops.
4832 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
4834 sum += nrnb->n[i]*0.25*cost_nrnb(i);
4838 sum += nrnb->n[i]*0.50*cost_nrnb(i);
4841 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
4844 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
4846 sum += nrnb->n[i]*cost_nrnb(i);
4849 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
4851 sum += nrnb->n[i]*cost_nrnb(i);
4857 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
4859 if (dd->comm->eFlop)
4861 dd->comm->flop -= force_flop_count(nrnb);
4864 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
4866 if (dd->comm->eFlop)
4868 dd->comm->flop += force_flop_count(nrnb);
4873 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
4877 for (i = 0; i < ddCyclNr; i++)
4879 dd->comm->cycl[i] = 0;
4880 dd->comm->cycl_n[i] = 0;
4881 dd->comm->cycl_max[i] = 0;
4884 dd->comm->flop_n = 0;
4887 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
4889 gmx_domdec_comm_t *comm;
4890 domdec_load_t *load;
4891 domdec_root_t *root = nullptr;
4893 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
4898 fprintf(debug, "get_load_distribution start\n");
4901 wallcycle_start(wcycle, ewcDDCOMMLOAD);
4905 bSepPME = (dd->pme_nodeid >= 0);
4907 if (dd->ndim == 0 && bSepPME)
4909 /* Without decomposition, but with PME nodes, we need the load */
4910 comm->load[0].mdf = comm->cycl[ddCyclPPduringPME];
4911 comm->load[0].pme = comm->cycl[ddCyclPME];
4914 for (d = dd->ndim-1; d >= 0; d--)
4917 /* Check if we participate in the communication in this dimension */
4918 if (d == dd->ndim-1 ||
4919 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
4921 load = &comm->load[d];
4922 if (isDlbOn(dd->comm))
4924 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
4927 if (d == dd->ndim-1)
4929 sbuf[pos++] = dd_force_load(comm);
4930 sbuf[pos++] = sbuf[0];
4931 if (isDlbOn(dd->comm))
4933 sbuf[pos++] = sbuf[0];
4934 sbuf[pos++] = cell_frac;
4937 sbuf[pos++] = comm->cell_f_max0[d];
4938 sbuf[pos++] = comm->cell_f_min1[d];
4943 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
4944 sbuf[pos++] = comm->cycl[ddCyclPME];
4949 sbuf[pos++] = comm->load[d+1].sum;
4950 sbuf[pos++] = comm->load[d+1].max;
4951 if (isDlbOn(dd->comm))
4953 sbuf[pos++] = comm->load[d+1].sum_m;
4954 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
4955 sbuf[pos++] = comm->load[d+1].flags;
4958 sbuf[pos++] = comm->cell_f_max0[d];
4959 sbuf[pos++] = comm->cell_f_min1[d];
4964 sbuf[pos++] = comm->load[d+1].mdf;
4965 sbuf[pos++] = comm->load[d+1].pme;
4969 /* Communicate a row in DD direction d.
4970 * The communicators are setup such that the root always has rank 0.
4973 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
4974 load->load, load->nload*sizeof(float), MPI_BYTE,
4975 0, comm->mpi_comm_load[d]);
4977 if (dd->ci[dim] == dd->master_ci[dim])
4979 /* We are the root, process this row */
4982 root = comm->root[d];
4992 for (i = 0; i < dd->nc[dim]; i++)
4994 load->sum += load->load[pos++];
4995 load->max = std::max(load->max, load->load[pos]);
4997 if (isDlbOn(dd->comm))
5001 /* This direction could not be load balanced properly,
5002 * therefore we need to use the maximum iso the average load.
5004 load->sum_m = std::max(load->sum_m, load->load[pos]);
5008 load->sum_m += load->load[pos];
5011 load->cvol_min = std::min(load->cvol_min, load->load[pos]);
5015 load->flags = (int)(load->load[pos++] + 0.5);
5019 root->cell_f_max0[i] = load->load[pos++];
5020 root->cell_f_min1[i] = load->load[pos++];
5025 load->mdf = std::max(load->mdf, load->load[pos]);
5027 load->pme = std::max(load->pme, load->load[pos]);
5031 if (isDlbOn(comm) && root->bLimited)
5033 load->sum_m *= dd->nc[dim];
5034 load->flags |= (1<<d);
5042 comm->nload += dd_load_count(comm);
5043 comm->load_step += comm->cycl[ddCyclStep];
5044 comm->load_sum += comm->load[0].sum;
5045 comm->load_max += comm->load[0].max;
5048 for (d = 0; d < dd->ndim; d++)
5050 if (comm->load[0].flags & (1<<d))
5052 comm->load_lim[d]++;
5058 comm->load_mdf += comm->load[0].mdf;
5059 comm->load_pme += comm->load[0].pme;
5063 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5067 fprintf(debug, "get_load_distribution finished\n");
5071 static float dd_force_load_fraction(gmx_domdec_t *dd)
5073 /* Return the relative performance loss on the total run time
5074 * due to the force calculation load imbalance.
5076 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
5078 return dd->comm->load_sum/(dd->comm->load_step*dd->nnodes);
5086 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5088 /* Return the relative performance loss on the total run time
5089 * due to the force calculation load imbalance.
5091 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
5094 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5095 (dd->comm->load_step*dd->nnodes);
5103 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5105 gmx_domdec_comm_t *comm = dd->comm;
5107 /* Only the master rank prints loads and only if we measured loads */
5108 if (!DDMASTER(dd) || comm->nload == 0)
5114 int numPpRanks = dd->nnodes;
5115 int numPmeRanks = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5116 int numRanks = numPpRanks + numPmeRanks;
5117 float lossFraction = 0;
5119 /* Print the average load imbalance and performance loss */
5120 if (dd->nnodes > 1 && comm->load_sum > 0)
5122 float imbalance = comm->load_max*numPpRanks/comm->load_sum - 1;
5123 lossFraction = dd_force_imb_perf_loss(dd);
5124 fprintf(stderr, "\n");
5126 " Load balancing based on %d %% of the MD step time\n"
5127 " Average load imbalance: %.1f %%\n"
5128 " Part of the total run time spent waiting due to load imbalance: %.1f %%\n",
5129 static_cast<int>(dd_force_load_fraction(dd)*100 + 0.5),
5132 fprintf(fplog, "%s", buf);
5133 fprintf(stderr, "%s", buf);
5136 /* Print during what percentage of steps the load balancing was limited */
5137 bool dlbWasLimited = false;
5140 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5141 for (int d = 0; d < dd->ndim; d++)
5143 int limitPercentage = (200*comm->load_lim[d] + 1)/(2*comm->nload);
5144 sprintf(buf+strlen(buf), " %c %d %%",
5145 dim2char(dd->dim[d]), limitPercentage);
5146 if (limitPercentage >= 50)
5148 dlbWasLimited = true;
5151 sprintf(buf + strlen(buf), "\n");
5152 fprintf(fplog, "%s", buf);
5153 fprintf(stderr, "%s", buf);
5156 /* Print the performance loss due to separate PME - PP rank imbalance */
5157 float lossFractionPme = 0;
5158 if (numPmeRanks > 0 && comm->load_mdf > 0 && comm->load_step > 0)
5160 float pmeForceRatio = comm->load_pme/comm->load_mdf;
5161 lossFractionPme = (comm->load_pme - comm->load_mdf)/comm->load_step;
5162 if (lossFractionPme <= 0)
5164 lossFractionPme *= numPmeRanks/static_cast<float>(numRanks);
5168 lossFractionPme *= numPpRanks/static_cast<float>(numRanks);
5170 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pmeForceRatio);
5171 fprintf(fplog, "%s", buf);
5172 fprintf(stderr, "%s", buf);
5173 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossFractionPme)*100);
5174 fprintf(fplog, "%s", buf);
5175 fprintf(stderr, "%s", buf);
5177 fprintf(fplog, "\n");
5178 fprintf(stderr, "\n");
5180 if (lossFraction >= DD_PERF_LOSS_WARN)
5183 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5184 " in the domain decomposition.\n", lossFraction*100);
5187 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5189 else if (dlbWasLimited)
5191 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5193 fprintf(fplog, "%s\n", buf);
5194 fprintf(stderr, "%s\n", buf);
5196 if (numPmeRanks > 0 && fabs(lossFractionPme) >= DD_PERF_LOSS_WARN)
5199 "NOTE: %.1f %% performance was lost because the PME ranks\n"
5200 " had %s work to do than the PP ranks.\n"
5201 " You might want to %s the number of PME ranks\n"
5202 " or %s the cut-off and the grid spacing.\n",
5203 fabs(lossFractionPme*100),
5204 (lossFractionPme < 0) ? "less" : "more",
5205 (lossFractionPme < 0) ? "decrease" : "increase",
5206 (lossFractionPme < 0) ? "decrease" : "increase");
5207 fprintf(fplog, "%s\n", buf);
5208 fprintf(stderr, "%s\n", buf);
5212 static float dd_vol_min(gmx_domdec_t *dd)
5214 return dd->comm->load[0].cvol_min*dd->nnodes;
5217 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5219 return dd->comm->load[0].flags;
5222 static float dd_f_imbal(gmx_domdec_t *dd)
5224 if (dd->comm->load[0].sum > 0)
5226 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1.0f;
5230 /* Something is wrong in the cycle counting, report no load imbalance */
5235 float dd_pme_f_ratio(gmx_domdec_t *dd)
5237 /* Should only be called on the DD master rank */
5238 assert(DDMASTER(dd));
5240 if (dd->comm->load[0].mdf > 0 && dd->comm->cycl_n[ddCyclPME] > 0)
5242 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5250 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
5255 flags = dd_load_flags(dd);
5259 "DD load balancing is limited by minimum cell size in dimension");
5260 for (d = 0; d < dd->ndim; d++)
5264 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5267 fprintf(fplog, "\n");
5269 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5270 if (isDlbOn(dd->comm))
5272 fprintf(fplog, " vol min/aver %5.3f%c",
5273 dd_vol_min(dd), flags ? '!' : ' ');
5277 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5279 if (dd->comm->cycl_n[ddCyclPME])
5281 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5283 fprintf(fplog, "\n\n");
5286 static void dd_print_load_verbose(gmx_domdec_t *dd)
5288 if (isDlbOn(dd->comm))
5290 fprintf(stderr, "vol %4.2f%c ",
5291 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5295 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5297 if (dd->comm->cycl_n[ddCyclPME])
5299 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5304 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5309 domdec_root_t *root;
5310 gmx_bool bPartOfGroup = FALSE;
5312 dim = dd->dim[dim_ind];
5313 copy_ivec(loc, loc_c);
5314 for (i = 0; i < dd->nc[dim]; i++)
5317 rank = dd_index(dd->nc, loc_c);
5318 if (rank == dd->rank)
5320 /* This process is part of the group */
5321 bPartOfGroup = TRUE;
5324 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5328 dd->comm->mpi_comm_load[dim_ind] = c_row;
5329 if (!isDlbDisabled(dd->comm))
5331 if (dd->ci[dim] == dd->master_ci[dim])
5333 /* This is the root process of this row */
5334 snew(dd->comm->root[dim_ind], 1);
5335 root = dd->comm->root[dim_ind];
5336 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5337 snew(root->old_cell_f, dd->nc[dim]+1);
5338 snew(root->bCellMin, dd->nc[dim]);
5341 snew(root->cell_f_max0, dd->nc[dim]);
5342 snew(root->cell_f_min1, dd->nc[dim]);
5343 snew(root->bound_min, dd->nc[dim]);
5344 snew(root->bound_max, dd->nc[dim]);
5346 snew(root->buf_ncd, dd->nc[dim]);
5350 /* This is not a root process, we only need to receive cell_f */
5351 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5354 if (dd->ci[dim] == dd->master_ci[dim])
5356 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5362 void dd_setup_dlb_resource_sharing(t_commrec gmx_unused *cr,
5363 const gmx_hw_info_t gmx_unused *hwinfo,
5364 const gmx_hw_opt_t gmx_unused *hw_opt)
5367 int physicalnode_id_hash;
5370 MPI_Comm mpi_comm_pp_physicalnode;
5372 if (!(cr->duty & DUTY_PP) || hw_opt->gpu_opt.n_dev_use == 0)
5374 /* Only PP nodes (currently) use GPUs.
5375 * If we don't have GPUs, there are no resources to share.
5380 physicalnode_id_hash = gmx_physicalnode_id_hash();
5382 gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
5388 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5389 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5390 dd->rank, physicalnode_id_hash, gpu_id);
5392 /* Split the PP communicator over the physical nodes */
5393 /* TODO: See if we should store this (before), as it's also used for
5394 * for the nodecomm summution.
5396 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5397 &mpi_comm_pp_physicalnode);
5398 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5399 &dd->comm->mpi_comm_gpu_shared);
5400 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5401 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5405 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5408 /* Note that some ranks could share a GPU, while others don't */
5410 if (dd->comm->nrank_gpu_shared == 1)
5412 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5417 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
5420 int dim0, dim1, i, j;
5425 fprintf(debug, "Making load communicators\n");
5428 snew(dd->comm->load, std::max(dd->ndim, 1));
5429 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
5437 make_load_communicator(dd, 0, loc);
5441 for (i = 0; i < dd->nc[dim0]; i++)
5444 make_load_communicator(dd, 1, loc);
5450 for (i = 0; i < dd->nc[dim0]; i++)
5454 for (j = 0; j < dd->nc[dim1]; j++)
5457 make_load_communicator(dd, 2, loc);
5464 fprintf(debug, "Finished making load communicators\n");
5469 /*! \brief Sets up the relation between neighboring domains and zones */
5470 static void setup_neighbor_relations(gmx_domdec_t *dd)
5472 int d, dim, i, j, m;
5474 gmx_domdec_zones_t *zones;
5475 gmx_domdec_ns_ranges_t *izone;
5477 for (d = 0; d < dd->ndim; d++)
5480 copy_ivec(dd->ci, tmp);
5481 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5482 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5483 copy_ivec(dd->ci, tmp);
5484 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5485 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5488 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5491 dd->neighbor[d][1]);
5495 int nzone = (1 << dd->ndim);
5496 int nizone = (1 << std::max(dd->ndim - 1, 0));
5497 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
5499 zones = &dd->comm->zones;
5501 for (i = 0; i < nzone; i++)
5504 clear_ivec(zones->shift[i]);
5505 for (d = 0; d < dd->ndim; d++)
5507 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5512 for (i = 0; i < nzone; i++)
5514 for (d = 0; d < DIM; d++)
5516 s[d] = dd->ci[d] - zones->shift[i][d];
5521 else if (s[d] >= dd->nc[d])
5527 zones->nizone = nizone;
5528 for (i = 0; i < zones->nizone; i++)
5530 assert(ddNonbondedZonePairRanges[i][0] == i);
5532 izone = &zones->izone[i];
5533 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
5534 * j-zones up to nzone.
5536 izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
5537 izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
5538 for (dim = 0; dim < DIM; dim++)
5540 if (dd->nc[dim] == 1)
5542 /* All shifts should be allowed */
5543 izone->shift0[dim] = -1;
5544 izone->shift1[dim] = 1;
5548 /* Determine the min/max j-zone shift wrt the i-zone */
5549 izone->shift0[dim] = 1;
5550 izone->shift1[dim] = -1;
5551 for (j = izone->j0; j < izone->j1; j++)
5553 int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5554 if (shift_diff < izone->shift0[dim])
5556 izone->shift0[dim] = shift_diff;
5558 if (shift_diff > izone->shift1[dim])
5560 izone->shift1[dim] = shift_diff;
5567 if (!isDlbDisabled(dd->comm))
5569 snew(dd->comm->root, dd->ndim);
5572 if (dd->comm->bRecordLoad)
5574 make_load_communicators(dd);
5578 static void make_pp_communicator(FILE *fplog,
5580 t_commrec gmx_unused *cr,
5581 int gmx_unused reorder)
5584 gmx_domdec_comm_t *comm;
5591 if (comm->bCartesianPP)
5593 /* Set up cartesian communication for the particle-particle part */
5596 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5597 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5600 for (int i = 0; i < DIM; i++)
5604 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5606 /* We overwrite the old communicator with the new cartesian one */
5607 cr->mpi_comm_mygroup = comm_cart;
5610 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5611 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5613 if (comm->bCartesianPP_PME)
5615 /* Since we want to use the original cartesian setup for sim,
5616 * and not the one after split, we need to make an index.
5618 snew(comm->ddindex2ddnodeid, dd->nnodes);
5619 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
5620 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5621 /* Get the rank of the DD master,
5622 * above we made sure that the master node is a PP node.
5632 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
5634 else if (comm->bCartesianPP)
5636 if (cr->npmenodes == 0)
5638 /* The PP communicator is also
5639 * the communicator for this simulation
5641 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5643 cr->nodeid = dd->rank;
5645 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
5647 /* We need to make an index to go from the coordinates
5648 * to the nodeid of this simulation.
5650 snew(comm->ddindex2simnodeid, dd->nnodes);
5651 snew(buf, dd->nnodes);
5652 if (cr->duty & DUTY_PP)
5654 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5656 /* Communicate the ddindex to simulation nodeid index */
5657 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
5658 cr->mpi_comm_mysim);
5661 /* Determine the master coordinates and rank.
5662 * The DD master should be the same node as the master of this sim.
5664 for (int i = 0; i < dd->nnodes; i++)
5666 if (comm->ddindex2simnodeid[i] == 0)
5668 ddindex2xyz(dd->nc, i, dd->master_ci);
5669 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
5674 fprintf(debug, "The master rank is %d\n", dd->masterrank);
5679 /* No Cartesian communicators */
5680 /* We use the rank in dd->comm->all as DD index */
5681 ddindex2xyz(dd->nc, dd->rank, dd->ci);
5682 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
5684 clear_ivec(dd->master_ci);
5691 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
5692 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5697 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
5698 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5702 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
5706 gmx_domdec_comm_t *comm = dd->comm;
5708 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
5711 snew(comm->ddindex2simnodeid, dd->nnodes);
5712 snew(buf, dd->nnodes);
5713 if (cr->duty & DUTY_PP)
5715 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5717 /* Communicate the ddindex to simulation nodeid index */
5718 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
5719 cr->mpi_comm_mysim);
5723 GMX_UNUSED_VALUE(dd);
5724 GMX_UNUSED_VALUE(cr);
5728 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
5729 int ncg, int natoms)
5731 gmx_domdec_master_t *ma;
5736 snew(ma->ncg, dd->nnodes);
5737 snew(ma->index, dd->nnodes+1);
5739 snew(ma->nat, dd->nnodes);
5740 snew(ma->ibuf, dd->nnodes*2);
5741 snew(ma->cell_x, DIM);
5742 for (i = 0; i < DIM; i++)
5744 snew(ma->cell_x[i], dd->nc[i]+1);
5747 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
5753 snew(ma->vbuf, natoms);
5759 static void split_communicator(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
5760 int gmx_unused dd_rank_order,
5761 int gmx_unused reorder)
5763 gmx_domdec_comm_t *comm;
5772 if (comm->bCartesianPP)
5774 for (i = 1; i < DIM; i++)
5776 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
5778 if (bDiv[YY] || bDiv[ZZ])
5780 comm->bCartesianPP_PME = TRUE;
5781 /* If we have 2D PME decomposition, which is always in x+y,
5782 * we stack the PME only nodes in z.
5783 * Otherwise we choose the direction that provides the thinnest slab
5784 * of PME only nodes as this will have the least effect
5785 * on the PP communication.
5786 * But for the PME communication the opposite might be better.
5788 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
5790 dd->nc[YY] > dd->nc[ZZ]))
5792 comm->cartpmedim = ZZ;
5796 comm->cartpmedim = YY;
5798 comm->ntot[comm->cartpmedim]
5799 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
5803 fprintf(fplog, "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
5805 "Will not use a Cartesian communicator for PP <-> PME\n\n");
5810 if (comm->bCartesianPP_PME)
5817 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
5820 for (i = 0; i < DIM; i++)
5824 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
5826 MPI_Comm_rank(comm_cart, &rank);
5827 if (MASTER(cr) && rank != 0)
5829 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
5832 /* With this assigment we loose the link to the original communicator
5833 * which will usually be MPI_COMM_WORLD, unless have multisim.
5835 cr->mpi_comm_mysim = comm_cart;
5836 cr->sim_nodeid = rank;
5838 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
5842 fprintf(fplog, "Cartesian rank %d, coordinates %d %d %d\n\n",
5843 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5846 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
5850 if (cr->npmenodes == 0 ||
5851 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
5853 cr->duty = DUTY_PME;
5856 /* Split the sim communicator into PP and PME only nodes */
5857 MPI_Comm_split(cr->mpi_comm_mysim,
5859 dd_index(comm->ntot, dd->ci),
5860 &cr->mpi_comm_mygroup);
5864 switch (dd_rank_order)
5866 case ddrankorderPP_PME:
5869 fprintf(fplog, "Order of the ranks: PP first, PME last\n");
5872 case ddrankorderINTERLEAVE:
5873 /* Interleave the PP-only and PME-only ranks */
5876 fprintf(fplog, "Interleaving PP and PME ranks\n");
5878 comm->pmenodes = dd_interleaved_pme_ranks(dd);
5880 case ddrankorderCARTESIAN:
5883 gmx_fatal(FARGS, "Unknown dd_rank_order=%d", dd_rank_order);
5886 if (dd_simnode2pmenode(dd, cr, cr->sim_nodeid) == -1)
5888 cr->duty = DUTY_PME;
5895 /* Split the sim communicator into PP and PME only nodes */
5896 MPI_Comm_split(cr->mpi_comm_mysim,
5899 &cr->mpi_comm_mygroup);
5900 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
5906 fprintf(fplog, "This rank does only %s work.\n\n",
5907 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
5911 /*! \brief Generates the MPI communicators for domain decomposition */
5912 static void make_dd_communicators(FILE *fplog, t_commrec *cr,
5913 gmx_domdec_t *dd, int dd_rank_order)
5915 gmx_domdec_comm_t *comm;
5920 copy_ivec(dd->nc, comm->ntot);
5922 comm->bCartesianPP = (dd_rank_order == ddrankorderCARTESIAN);
5923 comm->bCartesianPP_PME = FALSE;
5925 /* Reorder the nodes by default. This might change the MPI ranks.
5926 * Real reordering is only supported on very few architectures,
5927 * Blue Gene is one of them.
5929 CartReorder = (getenv("GMX_NO_CART_REORDER") == nullptr);
5931 if (cr->npmenodes > 0)
5933 /* Split the communicator into a PP and PME part */
5934 split_communicator(fplog, cr, dd, dd_rank_order, CartReorder);
5935 if (comm->bCartesianPP_PME)
5937 /* We (possibly) reordered the nodes in split_communicator,
5938 * so it is no longer required in make_pp_communicator.
5940 CartReorder = FALSE;
5945 /* All nodes do PP and PME */
5947 /* We do not require separate communicators */
5948 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
5952 if (cr->duty & DUTY_PP)
5954 /* Copy or make a new PP communicator */
5955 make_pp_communicator(fplog, dd, cr, CartReorder);
5959 receive_ddindex2simnodeid(dd, cr);
5962 if (!(cr->duty & DUTY_PME))
5964 /* Set up the commnuication to our PME node */
5965 dd->pme_nodeid = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
5966 dd->pme_receive_vir_ener = receive_vir_ener(dd, cr);
5969 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
5970 dd->pme_nodeid, dd->pme_receive_vir_ener);
5975 dd->pme_nodeid = -1;
5980 dd->ma = init_gmx_domdec_master_t(dd,
5982 comm->cgs_gl.index[comm->cgs_gl.nr]);
5986 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
5988 real *slb_frac, tot;
5993 if (nc > 1 && size_string != nullptr)
5997 fprintf(fplog, "Using static load balancing for the %s direction\n",
6002 for (i = 0; i < nc; i++)
6005 sscanf(size_string, "%20lf%n", &dbl, &n);
6008 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6017 fprintf(fplog, "Relative cell sizes:");
6019 for (i = 0; i < nc; i++)
6024 fprintf(fplog, " %5.3f", slb_frac[i]);
6029 fprintf(fplog, "\n");
6036 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
6039 gmx_mtop_ilistloop_t iloop;
6043 iloop = gmx_mtop_ilistloop_init(mtop);
6044 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6046 for (ftype = 0; ftype < F_NRE; ftype++)
6048 if ((interaction_function[ftype].flags & IF_BOND) &&
6051 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6059 static int dd_getenv(FILE *fplog, const char *env_var, int def)
6065 val = getenv(env_var);
6068 if (sscanf(val, "%20d", &nst) <= 0)
6074 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6082 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6086 fprintf(stderr, "\n%s\n", warn_string);
6090 fprintf(fplog, "\n%s\n", warn_string);
6094 static void check_dd_restrictions(t_commrec *cr, const gmx_domdec_t *dd,
6095 const t_inputrec *ir, FILE *fplog)
6097 if (ir->ePBC == epbcSCREW &&
6098 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6100 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6103 if (ir->ns_type == ensSIMPLE)
6105 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
6108 if (ir->nstlist == 0)
6110 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6113 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6115 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6119 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6124 r = ddbox->box_size[XX];
6125 for (di = 0; di < dd->ndim; di++)
6128 /* Check using the initial average cell size */
6129 r = std::min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6135 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
6137 static int forceDlbOffOrBail(int cmdlineDlbState,
6138 const std::string &reasonStr,
6142 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
6143 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
6145 if (cmdlineDlbState == edlbsOnUser)
6147 gmx_fatal(FARGS, (dlbNotSupportedErr + reasonStr).c_str());
6149 else if (cmdlineDlbState == edlbsOffCanTurnOn)
6151 dd_warning(cr, fplog, (dlbDisableNote + reasonStr + "\n").c_str());
6153 return edlbsOffForever;
6156 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
6158 * This function parses the parameters of "-dlb" command line option setting
6159 * corresponding state values. Then it checks the consistency of the determined
6160 * state with other run parameters and settings. As a result, the initial state
6161 * may be altered or an error may be thrown if incompatibility of options is detected.
6163 * \param [in] fplog Pointer to mdrun log file.
6164 * \param [in] cr Pointer to MPI communication object.
6165 * \param [in] dlb_opt Pointer to the '-dlb' command line argument's option.
6166 * \param [in] bRecordLoad True if the load balancer is recording load information.
6167 * \param [in] Flags Simulation flags passed from main.
6168 * \param [in] ir Pointer mdrun to input parameters.
6169 * \returns DLB initial/startup state.
6171 static int determineInitialDlbState(FILE *fplog, t_commrec *cr,
6172 const char *dlb_opt, gmx_bool bRecordLoad,
6173 unsigned long Flags, const t_inputrec *ir)
6179 case 'a': dlbState = edlbsOffCanTurnOn; break;
6180 case 'n': dlbState = edlbsOffUser; break;
6181 case 'y': dlbState = edlbsOnUser; break;
6182 default: gmx_incons("Unknown dlb_opt");
6185 /* Reruns don't support DLB: bail or override auto mode */
6186 if (Flags & MD_RERUN)
6188 std::string reasonStr = "it is not supported in reruns.";
6189 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
6192 /* Unsupported integrators */
6193 if (!EI_DYNAMICS(ir->eI))
6195 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
6196 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
6199 /* Without cycle counters we can't time work to balance on */
6202 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
6203 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
6206 if (Flags & MD_REPRODUCIBLE)
6208 std::string reasonStr = "you started a reproducible run.";
6213 case edlbsOffForever:
6214 GMX_RELEASE_ASSERT(false, "edlbsOffForever is not a valid initial state");
6216 case edlbsOffCanTurnOn:
6217 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
6219 case edlbsOnCanTurnOff:
6220 GMX_RELEASE_ASSERT(false, "edlbsOffCanTurnOff is not a valid initial state");
6223 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", cr, fplog);
6226 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", dlbState);
6234 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6239 if (getenv("GMX_DD_ORDER_ZYX") != nullptr)
6241 /* Decomposition order z,y,x */
6244 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6246 for (dim = DIM-1; dim >= 0; dim--)
6248 if (dd->nc[dim] > 1)
6250 dd->dim[dd->ndim++] = dim;
6256 /* Decomposition order x,y,z */
6257 for (dim = 0; dim < DIM; dim++)
6259 if (dd->nc[dim] > 1)
6261 dd->dim[dd->ndim++] = dim;
6267 static gmx_domdec_comm_t *init_dd_comm()
6269 gmx_domdec_comm_t *comm;
6273 snew(comm->cggl_flag, DIM*2);
6274 snew(comm->cgcm_state, DIM*2);
6275 for (i = 0; i < DIM*2; i++)
6277 comm->cggl_flag_nalloc[i] = 0;
6278 comm->cgcm_state_nalloc[i] = 0;
6281 comm->nalloc_int = 0;
6282 comm->buf_int = nullptr;
6284 vec_rvec_init(&comm->vbuf);
6286 comm->n_load_have = 0;
6287 comm->n_load_collect = 0;
6289 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6291 comm->sum_nat[i] = 0;
6295 comm->load_step = 0;
6298 clear_ivec(comm->load_lim);
6305 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
6306 static void set_dd_limits_and_grid(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
6307 unsigned long Flags,
6308 ivec nc, int nPmeRanks,
6309 real comm_distance_min, real rconstr,
6310 const char *dlb_opt, real dlb_scale,
6311 const char *sizex, const char *sizey, const char *sizez,
6312 const gmx_mtop_t *mtop,
6313 const t_inputrec *ir,
6314 matrix box, const rvec *x,
6316 int *npme_x, int *npme_y)
6319 real r_bonded_limit = -1;
6320 const real tenPercentMargin = 1.1;
6321 gmx_domdec_comm_t *comm = dd->comm;
6323 snew(comm->cggl_flag, DIM*2);
6324 snew(comm->cgcm_state, DIM*2);
6326 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6327 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6329 dd->pme_recv_f_alloc = 0;
6330 dd->pme_recv_f_buf = nullptr;
6332 /* Initialize to GPU share count to 0, might change later */
6333 comm->nrank_gpu_shared = 0;
6335 comm->dlbState = determineInitialDlbState(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
6336 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
6337 /* To consider turning DLB on after 2*nstlist steps we need to check
6338 * at partitioning count 3. Thus we need to increase the first count by 2.
6340 comm->ddPartioningCountFirstDlbOff += 2;
6344 fprintf(fplog, "Dynamic load balancing: %s\n",
6345 edlbs_names[comm->dlbState]);
6347 comm->bPMELoadBalDLBLimits = FALSE;
6349 /* Allocate the charge group/atom sorting struct */
6350 snew(comm->sort, 1);
6352 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6354 comm->bInterCGBondeds = ((ncg_mtop(mtop) > mtop->mols.nr) ||
6355 mtop->bIntermolecularInteractions);
6356 if (comm->bInterCGBondeds)
6358 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6362 comm->bInterCGMultiBody = FALSE;
6365 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6366 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6370 /* Set the cut-off to some very large value,
6371 * so we don't need if statements everywhere in the code.
6372 * We use sqrt, since the cut-off is squared in some places.
6374 comm->cutoff = GMX_CUTOFF_INF;
6378 comm->cutoff = ir->rlist;
6380 comm->cutoff_mbody = 0;
6382 comm->cellsize_limit = 0;
6383 comm->bBondComm = FALSE;
6385 /* Atoms should be able to move by up to half the list buffer size (if > 0)
6386 * within nstlist steps. Since boundaries are allowed to displace by half
6387 * a cell size, DD cells should be at least the size of the list buffer.
6389 comm->cellsize_limit = std::max(comm->cellsize_limit,
6390 ir->rlist - std::max(ir->rvdw, ir->rcoulomb));
6392 if (comm->bInterCGBondeds)
6394 if (comm_distance_min > 0)
6396 comm->cutoff_mbody = comm_distance_min;
6397 if (Flags & MD_DDBONDCOMM)
6399 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6403 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
6405 r_bonded_limit = comm->cutoff_mbody;
6407 else if (ir->bPeriodicMols)
6409 /* Can not easily determine the required cut-off */
6410 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6411 comm->cutoff_mbody = comm->cutoff/2;
6412 r_bonded_limit = comm->cutoff_mbody;
6420 dd_bonded_cg_distance(fplog, mtop, ir, x, box,
6421 Flags & MD_DDBONDCHECK, &r_2b, &r_mb);
6423 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6424 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6426 /* We use an initial margin of 10% for the minimum cell size,
6427 * except when we are just below the non-bonded cut-off.
6429 if (Flags & MD_DDBONDCOMM)
6431 if (std::max(r_2b, r_mb) > comm->cutoff)
6433 r_bonded = std::max(r_2b, r_mb);
6434 r_bonded_limit = tenPercentMargin*r_bonded;
6435 comm->bBondComm = TRUE;
6440 r_bonded_limit = std::min(tenPercentMargin*r_bonded, comm->cutoff);
6442 /* We determine cutoff_mbody later */
6446 /* No special bonded communication,
6447 * simply increase the DD cut-off.
6449 r_bonded_limit = tenPercentMargin*std::max(r_2b, r_mb);
6450 comm->cutoff_mbody = r_bonded_limit;
6451 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
6457 "Minimum cell size due to bonded interactions: %.3f nm\n",
6460 comm->cellsize_limit = std::max(comm->cellsize_limit, r_bonded_limit);
6463 if (dd->bInterCGcons && rconstr <= 0)
6465 /* There is a cell size limit due to the constraints (P-LINCS) */
6466 rconstr = constr_r_max(fplog, mtop, ir);
6470 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6472 if (rconstr > comm->cellsize_limit)
6474 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6478 else if (rconstr > 0 && fplog)
6480 /* Here we do not check for dd->bInterCGcons,
6481 * because one can also set a cell size limit for virtual sites only
6482 * and at this point we don't know yet if there are intercg v-sites.
6485 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6488 comm->cellsize_limit = std::max(comm->cellsize_limit, rconstr);
6490 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6494 copy_ivec(nc, dd->nc);
6495 set_dd_dim(fplog, dd);
6496 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, x, ddbox);
6500 cr->npmenodes = nPmeRanks;
6504 /* When the DD grid is set explicitly and -npme is set to auto,
6505 * don't use PME ranks. We check later if the DD grid is
6506 * compatible with the total number of ranks.
6511 real acs = average_cellsize_min(dd, ddbox);
6512 if (acs < comm->cellsize_limit)
6516 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6518 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
6519 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6520 acs, comm->cellsize_limit);
6525 set_ddbox_cr(cr, nullptr, ir, box, &comm->cgs_gl, x, ddbox);
6527 /* We need to choose the optimal DD grid and possibly PME nodes */
6529 dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6531 !isDlbDisabled(comm),
6533 comm->cellsize_limit, comm->cutoff,
6534 comm->bInterCGBondeds);
6536 if (dd->nc[XX] == 0)
6539 gmx_bool bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6540 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
6541 !bC ? "-rdd" : "-rcon",
6542 comm->dlbState != edlbsOffUser ? " or -dds" : "",
6543 bC ? " or your LINCS settings" : "");
6545 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
6546 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
6548 "Look in the log file for details on the domain decomposition",
6549 cr->nnodes-cr->npmenodes, limit, buf);
6551 set_dd_dim(fplog, dd);
6557 "Domain decomposition grid %d x %d x %d, separate PME ranks %d\n",
6558 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6561 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6562 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6564 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
6565 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
6566 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6568 if (cr->npmenodes > dd->nnodes)
6570 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
6571 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6573 if (cr->npmenodes > 0)
6575 comm->npmenodes = cr->npmenodes;
6579 comm->npmenodes = dd->nnodes;
6582 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
6584 /* The following choices should match those
6585 * in comm_cost_est in domdec_setup.c.
6586 * Note that here the checks have to take into account
6587 * that the decomposition might occur in a different order than xyz
6588 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6589 * in which case they will not match those in comm_cost_est,
6590 * but since that is mainly for testing purposes that's fine.
6592 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6593 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6594 getenv("GMX_PMEONEDD") == nullptr)
6596 comm->npmedecompdim = 2;
6597 comm->npmenodes_x = dd->nc[XX];
6598 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6602 /* In case nc is 1 in both x and y we could still choose to
6603 * decompose pme in y instead of x, but we use x for simplicity.
6605 comm->npmedecompdim = 1;
6606 if (dd->dim[0] == YY)
6608 comm->npmenodes_x = 1;
6609 comm->npmenodes_y = comm->npmenodes;
6613 comm->npmenodes_x = comm->npmenodes;
6614 comm->npmenodes_y = 1;
6619 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6620 comm->npmenodes_x, comm->npmenodes_y, 1);
6625 comm->npmedecompdim = 0;
6626 comm->npmenodes_x = 0;
6627 comm->npmenodes_y = 0;
6630 /* Technically we don't need both of these,
6631 * but it simplifies code not having to recalculate it.
6633 *npme_x = comm->npmenodes_x;
6634 *npme_y = comm->npmenodes_y;
6636 snew(comm->slb_frac, DIM);
6637 if (isDlbDisabled(comm))
6639 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
6640 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
6641 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], sizez);
6644 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6646 if (comm->bBondComm || !isDlbDisabled(comm))
6648 /* Set the bonded communication distance to halfway
6649 * the minimum and the maximum,
6650 * since the extra communication cost is nearly zero.
6652 real acs = average_cellsize_min(dd, ddbox);
6653 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6654 if (!isDlbDisabled(comm))
6656 /* Check if this does not limit the scaling */
6657 comm->cutoff_mbody = std::min(comm->cutoff_mbody, dlb_scale*acs);
6659 if (!comm->bBondComm)
6661 /* Without bBondComm do not go beyond the n.b. cut-off */
6662 comm->cutoff_mbody = std::min(comm->cutoff_mbody, comm->cutoff);
6663 if (comm->cellsize_limit >= comm->cutoff)
6665 /* We don't loose a lot of efficieny
6666 * when increasing it to the n.b. cut-off.
6667 * It can even be slightly faster, because we need
6668 * less checks for the communication setup.
6670 comm->cutoff_mbody = comm->cutoff;
6673 /* Check if we did not end up below our original limit */
6674 comm->cutoff_mbody = std::max(comm->cutoff_mbody, r_bonded_limit);
6676 if (comm->cutoff_mbody > comm->cellsize_limit)
6678 comm->cellsize_limit = comm->cutoff_mbody;
6681 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
6686 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
6687 "cellsize limit %f\n",
6688 comm->bBondComm, comm->cellsize_limit);
6693 check_dd_restrictions(cr, dd, ir, fplog);
6697 static void set_dlb_limits(gmx_domdec_t *dd)
6702 for (d = 0; d < dd->ndim; d++)
6704 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
6705 dd->comm->cellsize_min[dd->dim[d]] =
6706 dd->comm->cellsize_min_dlb[dd->dim[d]];
6711 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
6714 gmx_domdec_comm_t *comm;
6721 cellsize_min = comm->cellsize_min[dd->dim[0]];
6722 for (d = 1; d < dd->ndim; d++)
6724 cellsize_min = std::min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
6727 /* Turn off DLB if we're too close to the cell size limit. */
6728 if (cellsize_min < comm->cellsize_limit*1.05)
6730 auto str = gmx::formatString("step %" GMX_PRId64 " Measured %.1f %% performance loss due to load imbalance, "
6731 "but the minimum cell size is smaller than 1.05 times the cell size limit."
6732 "Will no longer try dynamic load balancing.\n", step, dd_force_imb_perf_loss(dd)*100);
6733 dd_warning(cr, fplog, str.c_str());
6735 comm->dlbState = edlbsOffForever;
6740 sprintf(buf, "step %" GMX_PRId64 " Turning on dynamic load balancing, because the performance loss due to load imbalance is %.1f %%.\n", step, dd_force_imb_perf_loss(dd)*100);
6741 dd_warning(cr, fplog, buf);
6742 comm->dlbState = edlbsOnCanTurnOff;
6744 /* Store the non-DLB performance, so we can check if DLB actually
6745 * improves performance.
6747 GMX_RELEASE_ASSERT(comm->cycl_n[ddCyclStep] > 0, "When we turned on DLB, we should have measured cycles");
6748 comm->cyclesPerStepBeforeDLB = comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
6752 /* We can set the required cell size info here,
6753 * so we do not need to communicate this.
6754 * The grid is completely uniform.
6756 for (d = 0; d < dd->ndim; d++)
6760 comm->load[d].sum_m = comm->load[d].sum;
6762 nc = dd->nc[dd->dim[d]];
6763 for (i = 0; i < nc; i++)
6765 comm->root[d]->cell_f[i] = i/(real)nc;
6768 comm->root[d]->cell_f_max0[i] = i /(real)nc;
6769 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
6772 comm->root[d]->cell_f[nc] = 1.0;
6777 static void turn_off_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
6779 gmx_domdec_t *dd = cr->dd;
6782 sprintf(buf, "step %" GMX_PRId64 " Turning off dynamic load balancing, because it is degrading performance.\n", step);
6783 dd_warning(cr, fplog, buf);
6784 dd->comm->dlbState = edlbsOffCanTurnOn;
6785 dd->comm->haveTurnedOffDlb = true;
6786 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
6789 static void turn_off_dlb_forever(FILE *fplog, t_commrec *cr, gmx_int64_t step)
6791 GMX_RELEASE_ASSERT(cr->dd->comm->dlbState == edlbsOffCanTurnOn, "Can only turn off DLB forever when it was in the can-turn-on state");
6793 sprintf(buf, "step %" GMX_PRId64 " Will no longer try dynamic load balancing, as it degraded performance.\n", step);
6794 dd_warning(cr, fplog, buf);
6795 cr->dd->comm->dlbState = edlbsOffForever;
6798 static char *init_bLocalCG(const gmx_mtop_t *mtop)
6803 ncg = ncg_mtop(mtop);
6804 snew(bLocalCG, ncg);
6805 for (cg = 0; cg < ncg; cg++)
6807 bLocalCG[cg] = FALSE;
6813 void dd_init_bondeds(FILE *fplog,
6815 const gmx_mtop_t *mtop,
6816 const gmx_vsite_t *vsite,
6817 const t_inputrec *ir,
6818 gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
6820 gmx_domdec_comm_t *comm;
6822 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
6826 if (comm->bBondComm)
6828 /* Communicate atoms beyond the cut-off for bonded interactions */
6831 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
6833 comm->bLocalCG = init_bLocalCG(mtop);
6837 /* Only communicate atoms based on cut-off */
6838 comm->cglink = nullptr;
6839 comm->bLocalCG = nullptr;
6843 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
6844 const gmx_mtop_t *mtop, const t_inputrec *ir,
6845 gmx_bool bDynLoadBal, real dlb_scale,
6846 const gmx_ddbox_t *ddbox)
6848 gmx_domdec_comm_t *comm;
6854 if (fplog == nullptr)
6863 fprintf(fplog, "The maximum number of communication pulses is:");
6864 for (d = 0; d < dd->ndim; d++)
6866 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
6868 fprintf(fplog, "\n");
6869 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
6870 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
6871 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
6872 for (d = 0; d < DIM; d++)
6876 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
6883 comm->cellsize_min_dlb[d]/
6884 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6886 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
6889 fprintf(fplog, "\n");
6893 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
6894 fprintf(fplog, "The initial number of communication pulses is:");
6895 for (d = 0; d < dd->ndim; d++)
6897 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
6899 fprintf(fplog, "\n");
6900 fprintf(fplog, "The initial domain decomposition cell size is:");
6901 for (d = 0; d < DIM; d++)
6905 fprintf(fplog, " %c %.2f nm",
6906 dim2char(d), dd->comm->cellsize_min[d]);
6909 fprintf(fplog, "\n\n");
6912 gmx_bool bInterCGVsites = count_intercg_vsites(mtop);
6914 if (comm->bInterCGBondeds ||
6916 dd->bInterCGcons || dd->bInterCGsettles)
6918 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
6919 fprintf(fplog, "%40s %-7s %6.3f nm\n",
6920 "non-bonded interactions", "", comm->cutoff);
6924 limit = dd->comm->cellsize_limit;
6928 if (dynamic_dd_box(ddbox, ir))
6930 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
6932 limit = dd->comm->cellsize_min[XX];
6933 for (d = 1; d < DIM; d++)
6935 limit = std::min(limit, dd->comm->cellsize_min[d]);
6939 if (comm->bInterCGBondeds)
6941 fprintf(fplog, "%40s %-7s %6.3f nm\n",
6942 "two-body bonded interactions", "(-rdd)",
6943 std::max(comm->cutoff, comm->cutoff_mbody));
6944 fprintf(fplog, "%40s %-7s %6.3f nm\n",
6945 "multi-body bonded interactions", "(-rdd)",
6946 (comm->bBondComm || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
6950 fprintf(fplog, "%40s %-7s %6.3f nm\n",
6951 "virtual site constructions", "(-rcon)", limit);
6953 if (dd->bInterCGcons || dd->bInterCGsettles)
6955 sprintf(buf, "atoms separated by up to %d constraints",
6957 fprintf(fplog, "%40s %-7s %6.3f nm\n",
6958 buf, "(-rcon)", limit);
6960 fprintf(fplog, "\n");
6966 static void set_cell_limits_dlb(gmx_domdec_t *dd,
6968 const t_inputrec *ir,
6969 const gmx_ddbox_t *ddbox)
6971 gmx_domdec_comm_t *comm;
6972 int d, dim, npulse, npulse_d_max, npulse_d;
6977 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
6979 /* Determine the maximum number of comm. pulses in one dimension */
6981 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
6983 /* Determine the maximum required number of grid pulses */
6984 if (comm->cellsize_limit >= comm->cutoff)
6986 /* Only a single pulse is required */
6989 else if (!bNoCutOff && comm->cellsize_limit > 0)
6991 /* We round down slightly here to avoid overhead due to the latency
6992 * of extra communication calls when the cut-off
6993 * would be only slightly longer than the cell size.
6994 * Later cellsize_limit is redetermined,
6995 * so we can not miss interactions due to this rounding.
6997 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7001 /* There is no cell size limit */
7002 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7005 if (!bNoCutOff && npulse > 1)
7007 /* See if we can do with less pulses, based on dlb_scale */
7009 for (d = 0; d < dd->ndim; d++)
7012 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7013 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7014 npulse_d_max = std::max(npulse_d_max, npulse_d);
7016 npulse = std::min(npulse, npulse_d_max);
7019 /* This env var can override npulse */
7020 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
7027 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7028 for (d = 0; d < dd->ndim; d++)
7030 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
7031 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7032 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7033 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
7034 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7036 comm->bVacDLBNoLimit = FALSE;
7040 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7041 if (!comm->bVacDLBNoLimit)
7043 comm->cellsize_limit = std::max(comm->cellsize_limit,
7044 comm->cutoff/comm->maxpulse);
7046 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
7047 /* Set the minimum cell size for each DD dimension */
7048 for (d = 0; d < dd->ndim; d++)
7050 if (comm->bVacDLBNoLimit ||
7051 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7053 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7057 comm->cellsize_min_dlb[dd->dim[d]] =
7058 comm->cutoff/comm->cd[d].np_dlb;
7061 if (comm->cutoff_mbody <= 0)
7063 comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
7071 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
7073 /* If each molecule is a single charge group
7074 * or we use domain decomposition for each periodic dimension,
7075 * we do not need to take pbc into account for the bonded interactions.
7077 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7080 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7083 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
7084 static void set_ddgrid_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7085 const gmx_mtop_t *mtop, const t_inputrec *ir,
7086 const gmx_ddbox_t *ddbox)
7088 gmx_domdec_comm_t *comm;
7094 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
7096 init_ddpme(dd, &comm->ddpme[0], 0);
7097 if (comm->npmedecompdim >= 2)
7099 init_ddpme(dd, &comm->ddpme[1], 1);
7104 comm->npmenodes = 0;
7105 if (dd->pme_nodeid >= 0)
7107 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
7108 "Can not have separate PME ranks without PME electrostatics");
7114 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7116 if (!isDlbDisabled(comm))
7118 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7121 print_dd_settings(fplog, dd, mtop, ir, isDlbOn(comm), dlb_scale, ddbox);
7122 if (comm->dlbState == edlbsOffCanTurnOn)
7126 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7128 print_dd_settings(fplog, dd, mtop, ir, TRUE, dlb_scale, ddbox);
7131 if (ir->ePBC == epbcNONE)
7133 vol_frac = 1 - 1/(double)dd->nnodes;
7138 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7142 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7144 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7146 dd->ga2la = ga2la_init(natoms_tot, static_cast<int>(vol_frac*natoms_tot));
7149 /*! \brief Set some important DD parameters that can be modified by env.vars */
7150 static void set_dd_envvar_options(FILE *fplog, gmx_domdec_t *dd, int rank_mysim)
7152 gmx_domdec_comm_t *comm = dd->comm;
7154 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
7155 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
7156 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
7157 int recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
7158 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
7159 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
7160 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
7162 if (dd->bSendRecv2 && fplog)
7164 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
7171 fprintf(fplog, "Will load balance based on FLOP count\n");
7173 if (comm->eFlop > 1)
7175 srand(1 + rank_mysim);
7177 comm->bRecordLoad = TRUE;
7181 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
7185 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
7186 unsigned long Flags,
7187 ivec nc, int nPmeRanks,
7189 real comm_distance_min, real rconstr,
7190 const char *dlb_opt, real dlb_scale,
7191 const char *sizex, const char *sizey, const char *sizez,
7192 const gmx_mtop_t *mtop,
7193 const t_inputrec *ir,
7194 matrix box, rvec *x,
7196 int *npme_x, int *npme_y)
7203 "\nInitializing Domain Decomposition on %d ranks\n", cr->nnodes);
7208 dd->comm = init_dd_comm();
7210 set_dd_envvar_options(fplog, dd, cr->nodeid);
7212 set_dd_limits_and_grid(fplog, cr, dd, Flags,
7214 comm_distance_min, rconstr,
7216 sizex, sizey, sizez,
7222 make_dd_communicators(fplog, cr, dd, dd_rank_order);
7224 if (cr->duty & DUTY_PP)
7226 set_ddgrid_parameters(fplog, dd, dlb_scale, mtop, ir, ddbox);
7228 setup_neighbor_relations(dd);
7231 /* Set overallocation to avoid frequent reallocation of arrays */
7232 set_over_alloc_dd(TRUE);
7234 /* Initialize DD paritioning counters */
7235 dd->comm->partition_step = INT_MIN;
7238 /* We currently don't know the number of threads yet, we set this later */
7241 clear_dd_cycle_counts(dd);
7246 static gmx_bool test_dd_cutoff(t_commrec *cr,
7247 t_state *state, const t_inputrec *ir,
7258 set_ddbox(dd, FALSE, cr, ir, state->box,
7259 TRUE, &dd->comm->cgs_gl, as_rvec_array(state->x.data()), &ddbox);
7263 for (d = 0; d < dd->ndim; d++)
7267 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7268 if (dynamic_dd_box(&ddbox, ir))
7270 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7273 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7275 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
7277 if (np > dd->comm->cd[d].np_dlb)
7282 /* If a current local cell size is smaller than the requested
7283 * cut-off, we could still fix it, but this gets very complicated.
7284 * Without fixing here, we might actually need more checks.
7286 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7293 if (!isDlbDisabled(dd->comm))
7295 /* If DLB is not active yet, we don't need to check the grid jumps.
7296 * Actually we shouldn't, because then the grid jump data is not set.
7298 if (isDlbOn(dd->comm) &&
7299 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7304 gmx_sumi(1, &LocallyLimited, cr);
7306 if (LocallyLimited > 0)
7315 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, const t_inputrec *ir,
7318 gmx_bool bCutoffAllowed;
7320 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7324 cr->dd->comm->cutoff = cutoff_req;
7327 return bCutoffAllowed;
7330 void set_dd_dlb_max_cutoff(t_commrec *cr, real cutoff)
7332 gmx_domdec_comm_t *comm;
7334 comm = cr->dd->comm;
7336 /* Turn on the DLB limiting (might have been on already) */
7337 comm->bPMELoadBalDLBLimits = TRUE;
7339 /* Change the cut-off limit */
7340 comm->PMELoadBal_max_cutoff = cutoff;
7344 fprintf(debug, "PME load balancing set a limit to the DLB staggering such that a %f cut-off will continue to fit\n",
7345 comm->PMELoadBal_max_cutoff);
7349 /* Sets whether we should later check the load imbalance data, so that
7350 * we can trigger dynamic load balancing if enough imbalance has
7353 * Used after PME load balancing unlocks DLB, so that the check
7354 * whether DLB will be useful can happen immediately.
7356 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue)
7358 if (dd->comm->dlbState == edlbsOffCanTurnOn)
7360 dd->comm->bCheckWhetherToTurnDlbOn = bValue;
7364 /* Store the DD partitioning count, so we can ignore cycle counts
7365 * over the next nstlist steps, which are often slower.
7367 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
7372 /* Returns if we should check whether there has been enough load
7373 * imbalance to trigger dynamic load balancing.
7375 static gmx_bool dd_dlb_get_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd)
7377 if (dd->comm->dlbState != edlbsOffCanTurnOn)
7382 if (dd->ddp_count <= dd->comm->ddPartioningCountFirstDlbOff)
7384 /* We ignore the first nstlist steps at the start of the run
7385 * or after PME load balancing or after turning DLB off, since
7386 * these often have extra allocation or cache miss overhead.
7391 /* We should check whether we should use DLB directly after
7393 if (dd->comm->bCheckWhetherToTurnDlbOn)
7395 /* This flag was set when the PME load-balancing routines
7396 unlocked DLB, and should now be cleared. */
7397 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, FALSE);
7400 /* We check whether we should use DLB every c_checkTurnDlbOnInterval
7401 * partitionings (we do not do this every partioning, so that we
7402 * avoid excessive communication). */
7403 if (dd->comm->n_load_have % c_checkTurnDlbOnInterval == c_checkTurnDlbOnInterval - 1)
7411 gmx_bool dd_dlb_is_on(const gmx_domdec_t *dd)
7413 return isDlbOn(dd->comm);
7416 gmx_bool dd_dlb_is_locked(const gmx_domdec_t *dd)
7418 return (dd->comm->dlbState == edlbsOffTemporarilyLocked);
7421 void dd_dlb_lock(gmx_domdec_t *dd)
7423 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
7424 if (dd->comm->dlbState == edlbsOffCanTurnOn)
7426 dd->comm->dlbState = edlbsOffTemporarilyLocked;
7430 void dd_dlb_unlock(gmx_domdec_t *dd)
7432 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
7433 if (dd->comm->dlbState == edlbsOffTemporarilyLocked)
7435 dd->comm->dlbState = edlbsOffCanTurnOn;
7436 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
7440 static void merge_cg_buffers(int ncell,
7441 gmx_domdec_comm_dim_t *cd, int pulse,
7443 int *index_gl, int *recv_i,
7444 rvec *cg_cm, rvec *recv_vr,
7446 cginfo_mb_t *cginfo_mb, int *cginfo)
7448 gmx_domdec_ind_t *ind, *ind_p;
7449 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7450 int shift, shift_at;
7452 ind = &cd->ind[pulse];
7454 /* First correct the already stored data */
7455 shift = ind->nrecv[ncell];
7456 for (cell = ncell-1; cell >= 0; cell--)
7458 shift -= ind->nrecv[cell];
7461 /* Move the cg's present from previous grid pulses */
7462 cg0 = ncg_cell[ncell+cell];
7463 cg1 = ncg_cell[ncell+cell+1];
7464 cgindex[cg1+shift] = cgindex[cg1];
7465 for (cg = cg1-1; cg >= cg0; cg--)
7467 index_gl[cg+shift] = index_gl[cg];
7468 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7469 cgindex[cg+shift] = cgindex[cg];
7470 cginfo[cg+shift] = cginfo[cg];
7472 /* Correct the already stored send indices for the shift */
7473 for (p = 1; p <= pulse; p++)
7475 ind_p = &cd->ind[p];
7477 for (c = 0; c < cell; c++)
7479 cg0 += ind_p->nsend[c];
7481 cg1 = cg0 + ind_p->nsend[cell];
7482 for (cg = cg0; cg < cg1; cg++)
7484 ind_p->index[cg] += shift;
7490 /* Merge in the communicated buffers */
7494 for (cell = 0; cell < ncell; cell++)
7496 cg1 = ncg_cell[ncell+cell+1] + shift;
7499 /* Correct the old cg indices */
7500 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7502 cgindex[cg+1] += shift_at;
7505 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7507 /* Copy this charge group from the buffer */
7508 index_gl[cg1] = recv_i[cg0];
7509 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7510 /* Add it to the cgindex */
7511 cg_gl = index_gl[cg1];
7512 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7513 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7514 cgindex[cg1+1] = cgindex[cg1] + nat;
7519 shift += ind->nrecv[cell];
7520 ncg_cell[ncell+cell+1] = cg1;
7524 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7525 int nzone, int cg0, const int *cgindex)
7529 /* Store the atom block boundaries for easy copying of communication buffers
7532 for (zone = 0; zone < nzone; zone++)
7534 for (p = 0; p < cd->np; p++)
7536 cd->ind[p].cell2at0[zone] = cgindex[cg];
7537 cg += cd->ind[p].nrecv[zone];
7538 cd->ind[p].cell2at1[zone] = cgindex[cg];
7543 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7549 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7551 if (!bLocalCG[link->a[i]])
7560 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7562 real c[DIM][4]; /* the corners for the non-bonded communication */
7563 real cr0; /* corner for rounding */
7564 real cr1[4]; /* corners for rounding */
7565 real bc[DIM]; /* corners for bounded communication */
7566 real bcr1; /* corner for rounding for bonded communication */
7569 /* Determine the corners of the domain(s) we are communicating with */
7571 set_dd_corners(const gmx_domdec_t *dd,
7572 int dim0, int dim1, int dim2,
7576 const gmx_domdec_comm_t *comm;
7577 const gmx_domdec_zones_t *zones;
7582 zones = &comm->zones;
7584 /* Keep the compiler happy */
7588 /* The first dimension is equal for all cells */
7589 c->c[0][0] = comm->cell_x0[dim0];
7592 c->bc[0] = c->c[0][0];
7597 /* This cell row is only seen from the first row */
7598 c->c[1][0] = comm->cell_x0[dim1];
7599 /* All rows can see this row */
7600 c->c[1][1] = comm->cell_x0[dim1];
7601 if (isDlbOn(dd->comm))
7603 c->c[1][1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7606 /* For the multi-body distance we need the maximum */
7607 c->bc[1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7610 /* Set the upper-right corner for rounding */
7611 c->cr0 = comm->cell_x1[dim0];
7616 for (j = 0; j < 4; j++)
7618 c->c[2][j] = comm->cell_x0[dim2];
7620 if (isDlbOn(dd->comm))
7622 /* Use the maximum of the i-cells that see a j-cell */
7623 for (i = 0; i < zones->nizone; i++)
7625 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7630 std::max(c->c[2][j-4],
7631 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7637 /* For the multi-body distance we need the maximum */
7638 c->bc[2] = comm->cell_x0[dim2];
7639 for (i = 0; i < 2; i++)
7641 for (j = 0; j < 2; j++)
7643 c->bc[2] = std::max(c->bc[2], comm->zone_d2[i][j].p1_0);
7649 /* Set the upper-right corner for rounding */
7650 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7651 * Only cell (0,0,0) can see cell 7 (1,1,1)
7653 c->cr1[0] = comm->cell_x1[dim1];
7654 c->cr1[3] = comm->cell_x1[dim1];
7655 if (isDlbOn(dd->comm))
7657 c->cr1[0] = std::max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7660 /* For the multi-body distance we need the maximum */
7661 c->bcr1 = std::max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7668 /* Determine which cg's we need to send in this pulse from this zone */
7670 get_zone_pulse_cgs(gmx_domdec_t *dd,
7671 int zonei, int zone,
7673 const int *index_gl,
7675 int dim, int dim_ind,
7676 int dim0, int dim1, int dim2,
7677 real r_comm2, real r_bcomm2,
7681 real skew_fac2_d, real skew_fac_01,
7682 rvec *v_d, rvec *v_0, rvec *v_1,
7683 const dd_corners_t *c,
7685 gmx_bool bDistBonded,
7691 gmx_domdec_ind_t *ind,
7692 int **ibuf, int *ibuf_nalloc,
7698 gmx_domdec_comm_t *comm;
7700 gmx_bool bDistMB_pulse;
7702 real r2, rb2, r, tric_sh;
7705 int nsend_z, nsend, nat;
7709 bScrew = (dd->bScrewPBC && dim == XX);
7711 bDistMB_pulse = (bDistMB && bDistBonded);
7717 for (cg = cg0; cg < cg1; cg++)
7721 if (tric_dist[dim_ind] == 0)
7723 /* Rectangular direction, easy */
7724 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7731 r = cg_cm[cg][dim] - c->bc[dim_ind];
7737 /* Rounding gives at most a 16% reduction
7738 * in communicated atoms
7740 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7742 r = cg_cm[cg][dim0] - c->cr0;
7743 /* This is the first dimension, so always r >= 0 */
7750 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7752 r = cg_cm[cg][dim1] - c->cr1[zone];
7759 r = cg_cm[cg][dim1] - c->bcr1;
7769 /* Triclinic direction, more complicated */
7772 /* Rounding, conservative as the skew_fac multiplication
7773 * will slightly underestimate the distance.
7775 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7777 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7778 for (i = dim0+1; i < DIM; i++)
7780 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7782 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7785 rb[dim0] = rn[dim0];
7788 /* Take care that the cell planes along dim0 might not
7789 * be orthogonal to those along dim1 and dim2.
7791 for (i = 1; i <= dim_ind; i++)
7794 if (normal[dim0][dimd] > 0)
7796 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7799 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7804 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7806 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7808 for (i = dim1+1; i < DIM; i++)
7810 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7812 rn[dim1] += tric_sh;
7815 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7816 /* Take care of coupling of the distances
7817 * to the planes along dim0 and dim1 through dim2.
7819 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7820 /* Take care that the cell planes along dim1
7821 * might not be orthogonal to that along dim2.
7823 if (normal[dim1][dim2] > 0)
7825 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7831 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7834 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7835 /* Take care of coupling of the distances
7836 * to the planes along dim0 and dim1 through dim2.
7838 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7839 /* Take care that the cell planes along dim1
7840 * might not be orthogonal to that along dim2.
7842 if (normal[dim1][dim2] > 0)
7844 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7849 /* The distance along the communication direction */
7850 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
7852 for (i = dim+1; i < DIM; i++)
7854 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7859 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7860 /* Take care of coupling of the distances
7861 * to the planes along dim0 and dim1 through dim2.
7863 if (dim_ind == 1 && zonei == 1)
7865 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
7871 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
7874 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
7875 /* Take care of coupling of the distances
7876 * to the planes along dim0 and dim1 through dim2.
7878 if (dim_ind == 1 && zonei == 1)
7880 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
7888 ((bDistMB && rb2 < r_bcomm2) ||
7889 (bDist2B && r2 < r_bcomm2)) &&
7891 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
7892 missing_link(comm->cglink, index_gl[cg],
7895 /* Make an index to the local charge groups */
7896 if (nsend+1 > ind->nalloc)
7898 ind->nalloc = over_alloc_large(nsend+1);
7899 srenew(ind->index, ind->nalloc);
7901 if (nsend+1 > *ibuf_nalloc)
7903 *ibuf_nalloc = over_alloc_large(nsend+1);
7904 srenew(*ibuf, *ibuf_nalloc);
7906 ind->index[nsend] = cg;
7907 (*ibuf)[nsend] = index_gl[cg];
7909 vec_rvec_check_alloc(vbuf, nsend+1);
7911 if (dd->ci[dim] == 0)
7913 /* Correct cg_cm for pbc */
7914 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
7917 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
7918 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
7923 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
7926 nat += cgindex[cg+1] - cgindex[cg];
7932 *nsend_z_ptr = nsend_z;
7935 static void setup_dd_communication(gmx_domdec_t *dd,
7936 matrix box, gmx_ddbox_t *ddbox,
7938 t_state *state, PaddedRVecVector *f)
7940 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
7941 int nzone, nzone_send, zone, zonei, cg0, cg1;
7942 int c, i, cg, cg_gl, nrcg;
7943 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
7944 gmx_domdec_comm_t *comm;
7945 gmx_domdec_zones_t *zones;
7946 gmx_domdec_comm_dim_t *cd;
7947 gmx_domdec_ind_t *ind;
7948 cginfo_mb_t *cginfo_mb;
7949 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
7950 real r_comm2, r_bcomm2;
7951 dd_corners_t corners;
7953 rvec *cg_cm, *normal, *v_d, *v_0 = nullptr, *v_1 = nullptr, *recv_vr;
7954 real skew_fac2_d, skew_fac_01;
7961 fprintf(debug, "Setting up DD communication\n");
7968 /* Initialize the thread data.
7969 * This can not be done in init_domain_decomposition,
7970 * as the numbers of threads is determined later.
7972 comm->nth = gmx_omp_nthreads_get(emntDomdec);
7975 snew(comm->dth, comm->nth);
7979 switch (fr->cutoff_scheme)
7985 cg_cm = as_rvec_array(state->x.data());
7988 gmx_incons("unimplemented");
7992 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
7994 /* Check if we need to use triclinic distances */
7995 tric_dist[dim_ind] = 0;
7996 for (i = 0; i <= dim_ind; i++)
7998 if (ddbox->tric_dir[dd->dim[i]])
8000 tric_dist[dim_ind] = 1;
8005 bBondComm = comm->bBondComm;
8007 /* Do we need to determine extra distances for multi-body bondeds? */
8008 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
8010 /* Do we need to determine extra distances for only two-body bondeds? */
8011 bDist2B = (bBondComm && !bDistMB);
8013 r_comm2 = gmx::square(comm->cutoff);
8014 r_bcomm2 = gmx::square(comm->cutoff_mbody);
8018 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, std::sqrt(r_bcomm2));
8021 zones = &comm->zones;
8024 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8025 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8027 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8029 /* Triclinic stuff */
8030 normal = ddbox->normal;
8034 v_0 = ddbox->v[dim0];
8035 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8037 /* Determine the coupling coefficient for the distances
8038 * to the cell planes along dim0 and dim1 through dim2.
8039 * This is required for correct rounding.
8042 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8045 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8051 v_1 = ddbox->v[dim1];
8054 zone_cg_range = zones->cg_range;
8055 index_gl = dd->index_gl;
8056 cgindex = dd->cgindex;
8057 cginfo_mb = fr->cginfo_mb;
8059 zone_cg_range[0] = 0;
8060 zone_cg_range[1] = dd->ncg_home;
8061 comm->zone_ncg1[0] = dd->ncg_home;
8062 pos_cg = dd->ncg_home;
8064 nat_tot = dd->nat_home;
8066 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8068 dim = dd->dim[dim_ind];
8069 cd = &comm->cd[dim_ind];
8071 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8073 /* No pbc in this dimension, the first node should not comm. */
8081 v_d = ddbox->v[dim];
8082 skew_fac2_d = gmx::square(ddbox->skew_fac[dim]);
8084 cd->bInPlace = TRUE;
8085 for (p = 0; p < cd->np; p++)
8087 /* Only atoms communicated in the first pulse are used
8088 * for multi-body bonded interactions or for bBondComm.
8090 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8095 for (zone = 0; zone < nzone_send; zone++)
8097 if (tric_dist[dim_ind] && dim_ind > 0)
8099 /* Determine slightly more optimized skew_fac's
8101 * This reduces the number of communicated atoms
8102 * by about 10% for 3D DD of rhombic dodecahedra.
8104 for (dimd = 0; dimd < dim; dimd++)
8106 sf2_round[dimd] = 1;
8107 if (ddbox->tric_dir[dimd])
8109 for (i = dd->dim[dimd]+1; i < DIM; i++)
8111 /* If we are shifted in dimension i
8112 * and the cell plane is tilted forward
8113 * in dimension i, skip this coupling.
8115 if (!(zones->shift[nzone+zone][i] &&
8116 ddbox->v[dimd][i][dimd] >= 0))
8119 gmx::square(ddbox->v[dimd][i][dimd]);
8122 sf2_round[dimd] = 1/sf2_round[dimd];
8127 zonei = zone_perm[dim_ind][zone];
8130 /* Here we permutate the zones to obtain a convenient order
8131 * for neighbor searching
8133 cg0 = zone_cg_range[zonei];
8134 cg1 = zone_cg_range[zonei+1];
8138 /* Look only at the cg's received in the previous grid pulse
8140 cg1 = zone_cg_range[nzone+zone+1];
8141 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8144 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8145 for (th = 0; th < comm->nth; th++)
8149 gmx_domdec_ind_t *ind_p;
8150 int **ibuf_p, *ibuf_nalloc_p;
8152 int *nsend_p, *nat_p;
8158 /* Thread 0 writes in the comm buffers */
8160 ibuf_p = &comm->buf_int;
8161 ibuf_nalloc_p = &comm->nalloc_int;
8162 vbuf_p = &comm->vbuf;
8165 nsend_zone_p = &ind->nsend[zone];
8169 /* Other threads write into temp buffers */
8170 ind_p = &comm->dth[th].ind;
8171 ibuf_p = &comm->dth[th].ibuf;
8172 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8173 vbuf_p = &comm->dth[th].vbuf;
8174 nsend_p = &comm->dth[th].nsend;
8175 nat_p = &comm->dth[th].nat;
8176 nsend_zone_p = &comm->dth[th].nsend_zone;
8178 comm->dth[th].nsend = 0;
8179 comm->dth[th].nat = 0;
8180 comm->dth[th].nsend_zone = 0;
8190 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8191 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8194 /* Get the cg's for this pulse in this zone */
8195 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8197 dim, dim_ind, dim0, dim1, dim2,
8200 normal, skew_fac2_d, skew_fac_01,
8201 v_d, v_0, v_1, &corners, sf2_round,
8202 bDistBonded, bBondComm,
8206 ibuf_p, ibuf_nalloc_p,
8211 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
8214 /* Append data of threads>=1 to the communication buffers */
8215 for (th = 1; th < comm->nth; th++)
8217 dd_comm_setup_work_t *dth;
8220 dth = &comm->dth[th];
8222 ns1 = nsend + dth->nsend_zone;
8223 if (ns1 > ind->nalloc)
8225 ind->nalloc = over_alloc_dd(ns1);
8226 srenew(ind->index, ind->nalloc);
8228 if (ns1 > comm->nalloc_int)
8230 comm->nalloc_int = over_alloc_dd(ns1);
8231 srenew(comm->buf_int, comm->nalloc_int);
8233 if (ns1 > comm->vbuf.nalloc)
8235 comm->vbuf.nalloc = over_alloc_dd(ns1);
8236 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8239 for (i = 0; i < dth->nsend_zone; i++)
8241 ind->index[nsend] = dth->ind.index[i];
8242 comm->buf_int[nsend] = dth->ibuf[i];
8243 copy_rvec(dth->vbuf.v[i],
8244 comm->vbuf.v[nsend]);
8248 ind->nsend[zone] += dth->nsend_zone;
8251 /* Clear the counts in case we do not have pbc */
8252 for (zone = nzone_send; zone < nzone; zone++)
8254 ind->nsend[zone] = 0;
8256 ind->nsend[nzone] = nsend;
8257 ind->nsend[nzone+1] = nat;
8258 /* Communicate the number of cg's and atoms to receive */
8259 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8260 ind->nsend, nzone+2,
8261 ind->nrecv, nzone+2);
8263 /* The rvec buffer is also required for atom buffers of size nsend
8264 * in dd_move_x and dd_move_f.
8266 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8270 /* We can receive in place if only the last zone is not empty */
8271 for (zone = 0; zone < nzone-1; zone++)
8273 if (ind->nrecv[zone] > 0)
8275 cd->bInPlace = FALSE;
8280 /* The int buffer is only required here for the cg indices */
8281 if (ind->nrecv[nzone] > comm->nalloc_int2)
8283 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8284 srenew(comm->buf_int2, comm->nalloc_int2);
8286 /* The rvec buffer is also required for atom buffers
8287 * of size nrecv in dd_move_x and dd_move_f.
8289 i = std::max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8290 vec_rvec_check_alloc(&comm->vbuf2, i);
8294 /* Make space for the global cg indices */
8295 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8296 || dd->cg_nalloc == 0)
8298 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8299 srenew(index_gl, dd->cg_nalloc);
8300 srenew(cgindex, dd->cg_nalloc+1);
8302 /* Communicate the global cg indices */
8305 recv_i = index_gl + pos_cg;
8309 recv_i = comm->buf_int2;
8311 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8312 comm->buf_int, nsend,
8313 recv_i, ind->nrecv[nzone]);
8315 /* Make space for cg_cm */
8316 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8317 if (fr->cutoff_scheme == ecutsGROUP)
8323 cg_cm = as_rvec_array(state->x.data());
8325 /* Communicate cg_cm */
8328 recv_vr = cg_cm + pos_cg;
8332 recv_vr = comm->vbuf2.v;
8334 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8335 comm->vbuf.v, nsend,
8336 recv_vr, ind->nrecv[nzone]);
8338 /* Make the charge group index */
8341 zone = (p == 0 ? 0 : nzone - 1);
8342 while (zone < nzone)
8344 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8346 cg_gl = index_gl[pos_cg];
8347 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8348 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8349 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8352 /* Update the charge group presence,
8353 * so we can use it in the next pass of the loop.
8355 comm->bLocalCG[cg_gl] = TRUE;
8361 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8364 zone_cg_range[nzone+zone] = pos_cg;
8369 /* This part of the code is never executed with bBondComm. */
8370 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8371 index_gl, recv_i, cg_cm, recv_vr,
8372 cgindex, fr->cginfo_mb, fr->cginfo);
8373 pos_cg += ind->nrecv[nzone];
8375 nat_tot += ind->nrecv[nzone+1];
8379 /* Store the atom block for easy copying of communication buffers */
8380 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8384 dd->index_gl = index_gl;
8385 dd->cgindex = cgindex;
8387 dd->ncg_tot = zone_cg_range[zones->n];
8388 dd->nat_tot = nat_tot;
8389 comm->nat[ddnatHOME] = dd->nat_home;
8390 for (i = ddnatZONE; i < ddnatNR; i++)
8392 comm->nat[i] = dd->nat_tot;
8397 /* We don't need to update cginfo, since that was alrady done above.
8398 * So we pass NULL for the forcerec.
8400 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8401 nullptr, comm->bLocalCG);
8406 fprintf(debug, "Finished setting up DD communication, zones:");
8407 for (c = 0; c < zones->n; c++)
8409 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8411 fprintf(debug, "\n");
8415 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8419 for (c = 0; c < zones->nizone; c++)
8421 zones->izone[c].cg1 = zones->cg_range[c+1];
8422 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8423 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8427 static void set_zones_size(gmx_domdec_t *dd,
8428 matrix box, const gmx_ddbox_t *ddbox,
8429 int zone_start, int zone_end)
8431 gmx_domdec_comm_t *comm;
8432 gmx_domdec_zones_t *zones;
8441 zones = &comm->zones;
8443 /* Do we need to determine extra distances for multi-body bondeds? */
8444 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
8446 for (z = zone_start; z < zone_end; z++)
8448 /* Copy cell limits to zone limits.
8449 * Valid for non-DD dims and non-shifted dims.
8451 copy_rvec(comm->cell_x0, zones->size[z].x0);
8452 copy_rvec(comm->cell_x1, zones->size[z].x1);
8455 for (d = 0; d < dd->ndim; d++)
8459 for (z = 0; z < zones->n; z++)
8461 /* With a staggered grid we have different sizes
8462 * for non-shifted dimensions.
8464 if (isDlbOn(dd->comm) && zones->shift[z][dim] == 0)
8468 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8469 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8473 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8474 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8480 rcmbs = comm->cutoff_mbody;
8481 if (ddbox->tric_dir[dim])
8483 rcs /= ddbox->skew_fac[dim];
8484 rcmbs /= ddbox->skew_fac[dim];
8487 /* Set the lower limit for the shifted zone dimensions */
8488 for (z = zone_start; z < zone_end; z++)
8490 if (zones->shift[z][dim] > 0)
8493 if (!isDlbOn(dd->comm) || d == 0)
8495 zones->size[z].x0[dim] = comm->cell_x1[dim];
8496 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8500 /* Here we take the lower limit of the zone from
8501 * the lowest domain of the zone below.
8505 zones->size[z].x0[dim] =
8506 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8512 zones->size[z].x0[dim] =
8513 zones->size[zone_perm[2][z-4]].x0[dim];
8517 zones->size[z].x0[dim] =
8518 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8521 /* A temporary limit, is updated below */
8522 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8526 for (zi = 0; zi < zones->nizone; zi++)
8528 if (zones->shift[zi][dim] == 0)
8530 /* This takes the whole zone into account.
8531 * With multiple pulses this will lead
8532 * to a larger zone then strictly necessary.
8534 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
8535 zones->size[zi].x1[dim]+rcmbs);
8543 /* Loop over the i-zones to set the upper limit of each
8546 for (zi = 0; zi < zones->nizone; zi++)
8548 if (zones->shift[zi][dim] == 0)
8550 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8552 if (zones->shift[z][dim] > 0)
8554 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
8555 zones->size[zi].x1[dim]+rcs);
8562 for (z = zone_start; z < zone_end; z++)
8564 /* Initialization only required to keep the compiler happy */
8565 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8568 /* To determine the bounding box for a zone we need to find
8569 * the extreme corners of 4, 2 or 1 corners.
8571 nc = 1 << (ddbox->nboundeddim - 1);
8573 for (c = 0; c < nc; c++)
8575 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8579 corner[YY] = zones->size[z].x0[YY];
8583 corner[YY] = zones->size[z].x1[YY];
8587 corner[ZZ] = zones->size[z].x0[ZZ];
8591 corner[ZZ] = zones->size[z].x1[ZZ];
8593 if (dd->ndim == 1 && dd->dim[0] < ZZ && ZZ < dd->npbcdim &&
8594 box[ZZ][1 - dd->dim[0]] != 0)
8596 /* With 1D domain decomposition the cg's are not in
8597 * the triclinic box, but triclinic x-y and rectangular y/x-z.
8598 * Shift the corner of the z-vector back to along the box
8599 * vector of dimension d, so it will later end up at 0 along d.
8600 * This can affect the location of this corner along dd->dim[0]
8601 * through the matrix operation below if box[d][dd->dim[0]]!=0.
8603 int d = 1 - dd->dim[0];
8605 corner[d] -= corner[ZZ]*box[ZZ][d]/box[ZZ][ZZ];
8607 /* Apply the triclinic couplings */
8608 assert(ddbox->npbcdim <= DIM);
8609 for (i = YY; i < ddbox->npbcdim; i++)
8611 for (j = XX; j < i; j++)
8613 corner[j] += corner[i]*box[i][j]/box[i][i];
8618 copy_rvec(corner, corner_min);
8619 copy_rvec(corner, corner_max);
8623 for (i = 0; i < DIM; i++)
8625 corner_min[i] = std::min(corner_min[i], corner[i]);
8626 corner_max[i] = std::max(corner_max[i], corner[i]);
8630 /* Copy the extreme cornes without offset along x */
8631 for (i = 0; i < DIM; i++)
8633 zones->size[z].bb_x0[i] = corner_min[i];
8634 zones->size[z].bb_x1[i] = corner_max[i];
8636 /* Add the offset along x */
8637 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8638 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8641 if (zone_start == 0)
8644 for (dim = 0; dim < DIM; dim++)
8646 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8648 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
8653 for (z = zone_start; z < zone_end; z++)
8655 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8657 zones->size[z].x0[XX], zones->size[z].x1[XX],
8658 zones->size[z].x0[YY], zones->size[z].x1[YY],
8659 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8660 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8662 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8663 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8664 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8669 static int comp_cgsort(const void *a, const void *b)
8673 gmx_cgsort_t *cga, *cgb;
8674 cga = (gmx_cgsort_t *)a;
8675 cgb = (gmx_cgsort_t *)b;
8677 comp = cga->nsc - cgb->nsc;
8680 comp = cga->ind_gl - cgb->ind_gl;
8686 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8691 /* Order the data */
8692 for (i = 0; i < n; i++)
8694 buf[i] = a[sort[i].ind];
8697 /* Copy back to the original array */
8698 for (i = 0; i < n; i++)
8704 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8709 /* Order the data */
8710 for (i = 0; i < n; i++)
8712 copy_rvec(v[sort[i].ind], buf[i]);
8715 /* Copy back to the original array */
8716 for (i = 0; i < n; i++)
8718 copy_rvec(buf[i], v[i]);
8722 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8725 int a, atot, cg, cg0, cg1, i;
8727 if (cgindex == nullptr)
8729 /* Avoid the useless loop of the atoms within a cg */
8730 order_vec_cg(ncg, sort, v, buf);
8735 /* Order the data */
8737 for (cg = 0; cg < ncg; cg++)
8739 cg0 = cgindex[sort[cg].ind];
8740 cg1 = cgindex[sort[cg].ind+1];
8741 for (i = cg0; i < cg1; i++)
8743 copy_rvec(v[i], buf[a]);
8749 /* Copy back to the original array */
8750 for (a = 0; a < atot; a++)
8752 copy_rvec(buf[a], v[a]);
8756 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8757 int nsort_new, gmx_cgsort_t *sort_new,
8758 gmx_cgsort_t *sort1)
8762 /* The new indices are not very ordered, so we qsort them */
8763 gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8765 /* sort2 is already ordered, so now we can merge the two arrays */
8769 while (i2 < nsort2 || i_new < nsort_new)
8773 sort1[i1++] = sort_new[i_new++];
8775 else if (i_new == nsort_new)
8777 sort1[i1++] = sort2[i2++];
8779 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8780 (sort2[i2].nsc == sort_new[i_new].nsc &&
8781 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8783 sort1[i1++] = sort2[i2++];
8787 sort1[i1++] = sort_new[i_new++];
8792 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8794 gmx_domdec_sort_t *sort;
8795 gmx_cgsort_t *cgsort, *sort_i;
8796 int ncg_new, nsort2, nsort_new, i, *a, moved;
8798 sort = dd->comm->sort;
8800 a = fr->ns->grid->cell_index;
8802 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns->grid->ncells;
8804 if (ncg_home_old >= 0)
8806 /* The charge groups that remained in the same ns grid cell
8807 * are completely ordered. So we can sort efficiently by sorting
8808 * the charge groups that did move into the stationary list.
8813 for (i = 0; i < dd->ncg_home; i++)
8815 /* Check if this cg did not move to another node */
8818 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8820 /* This cg is new on this node or moved ns grid cell */
8821 if (nsort_new >= sort->sort_new_nalloc)
8823 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8824 srenew(sort->sort_new, sort->sort_new_nalloc);
8826 sort_i = &(sort->sort_new[nsort_new++]);
8830 /* This cg did not move */
8831 sort_i = &(sort->sort2[nsort2++]);
8833 /* Sort on the ns grid cell indices
8834 * and the global topology index.
8835 * index_gl is irrelevant with cell ns,
8836 * but we set it here anyhow to avoid a conditional.
8839 sort_i->ind_gl = dd->index_gl[i];
8846 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8849 /* Sort efficiently */
8850 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8855 cgsort = sort->sort;
8857 for (i = 0; i < dd->ncg_home; i++)
8859 /* Sort on the ns grid cell indices
8860 * and the global topology index
8862 cgsort[i].nsc = a[i];
8863 cgsort[i].ind_gl = dd->index_gl[i];
8865 if (cgsort[i].nsc < moved)
8872 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
8874 /* Determine the order of the charge groups using qsort */
8875 gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
8881 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
8887 sort = dd->comm->sort->sort;
8889 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
8892 for (i = 0; i < na; i++)
8896 sort[ncg_new].ind = a[i];
8904 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
8907 gmx_domdec_sort_t *sort;
8908 gmx_cgsort_t *cgsort;
8910 int ncg_new, i, *ibuf, cgsize;
8913 sort = dd->comm->sort;
8915 if (dd->ncg_home > sort->sort_nalloc)
8917 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
8918 srenew(sort->sort, sort->sort_nalloc);
8919 srenew(sort->sort2, sort->sort_nalloc);
8921 cgsort = sort->sort;
8923 switch (fr->cutoff_scheme)
8926 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
8929 ncg_new = dd_sort_order_nbnxn(dd, fr);
8932 gmx_incons("unimplemented");
8936 /* We alloc with the old size, since cgindex is still old */
8937 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
8938 vbuf = dd->comm->vbuf.v;
8942 cgindex = dd->cgindex;
8949 /* Remove the charge groups which are no longer at home here */
8950 dd->ncg_home = ncg_new;
8953 fprintf(debug, "Set the new home charge group count to %d\n",
8957 /* Reorder the state */
8958 if (state->flags & (1 << estX))
8960 order_vec_atom(dd->ncg_home, cgindex, cgsort, as_rvec_array(state->x.data()), vbuf);
8962 if (state->flags & (1 << estV))
8964 order_vec_atom(dd->ncg_home, cgindex, cgsort, as_rvec_array(state->v.data()), vbuf);
8966 if (state->flags & (1 << estCGP))
8968 order_vec_atom(dd->ncg_home, cgindex, cgsort, as_rvec_array(state->cg_p.data()), vbuf);
8971 if (fr->cutoff_scheme == ecutsGROUP)
8974 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
8977 if (dd->ncg_home+1 > sort->ibuf_nalloc)
8979 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
8980 srenew(sort->ibuf, sort->ibuf_nalloc);
8983 /* Reorder the global cg index */
8984 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
8985 /* Reorder the cginfo */
8986 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
8987 /* Rebuild the local cg index */
8991 for (i = 0; i < dd->ncg_home; i++)
8993 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
8994 ibuf[i+1] = ibuf[i] + cgsize;
8996 for (i = 0; i < dd->ncg_home+1; i++)
8998 dd->cgindex[i] = ibuf[i];
9003 for (i = 0; i < dd->ncg_home+1; i++)
9008 /* Set the home atom number */
9009 dd->nat_home = dd->cgindex[dd->ncg_home];
9011 if (fr->cutoff_scheme == ecutsVERLET)
9013 /* The atoms are now exactly in grid order, update the grid order */
9014 nbnxn_set_atomorder(fr->nbv->nbs);
9018 /* Copy the sorted ns cell indices back to the ns grid struct */
9019 for (i = 0; i < dd->ncg_home; i++)
9021 fr->ns->grid->cell_index[i] = cgsort[i].nsc;
9023 fr->ns->grid->nr = dd->ncg_home;
9027 static void add_dd_statistics(gmx_domdec_t *dd)
9029 gmx_domdec_comm_t *comm;
9034 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9036 comm->sum_nat[ddnat-ddnatZONE] +=
9037 comm->nat[ddnat] - comm->nat[ddnat-1];
9042 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9044 gmx_domdec_comm_t *comm;
9049 /* Reset all the statistics and counters for total run counting */
9050 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9052 comm->sum_nat[ddnat-ddnatZONE] = 0;
9056 comm->load_step = 0;
9059 clear_ivec(comm->load_lim);
9064 void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
9066 gmx_domdec_comm_t *comm;
9070 comm = cr->dd->comm;
9072 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9074 if (fplog == nullptr)
9079 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9081 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9083 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9088 " av. #atoms communicated per step for force: %d x %.1f\n",
9092 if (cr->dd->vsite_comm)
9095 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9096 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9101 if (cr->dd->constraint_comm)
9104 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9105 1 + ir->nLincsIter, av);
9109 gmx_incons(" Unknown type for DD statistics");
9112 fprintf(fplog, "\n");
9114 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9116 print_dd_load_av(fplog, cr->dd);
9120 void dd_partition_system(FILE *fplog,
9123 gmx_bool bMasterState,
9125 t_state *state_global,
9126 const gmx_mtop_t *top_global,
9127 const t_inputrec *ir,
9128 t_state *state_local,
9129 PaddedRVecVector *f,
9131 gmx_localtop_t *top_local,
9134 gmx_constr_t constr,
9136 gmx_wallcycle_t wcycle,
9140 gmx_domdec_comm_t *comm;
9141 gmx_ddbox_t ddbox = {0};
9143 gmx_int64_t step_pcoupl;
9144 rvec cell_ns_x0, cell_ns_x1;
9145 int i, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9146 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckWhetherToTurnDlbOn, bLogLoad;
9147 gmx_bool bRedist, bSortCG, bResortAll;
9148 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9152 wallcycle_start(wcycle, ewcDOMDEC);
9157 bBoxChanged = (bMasterState || inputrecDeform(ir));
9158 if (ir->epc != epcNO)
9160 /* With nstpcouple > 1 pressure coupling happens.
9161 * one step after calculating the pressure.
9162 * Box scaling happens at the end of the MD step,
9163 * after the DD partitioning.
9164 * We therefore have to do DLB in the first partitioning
9165 * after an MD step where P-coupling occurred.
9166 * We need to determine the last step in which p-coupling occurred.
9167 * MRS -- need to validate this for vv?
9172 step_pcoupl = step - 1;
9176 step_pcoupl = ((step - 1)/n)*n + 1;
9178 if (step_pcoupl >= comm->partition_step)
9184 bNStGlobalComm = (step % nstglobalcomm == 0);
9192 /* Should we do dynamic load balacing this step?
9193 * Since it requires (possibly expensive) global communication,
9194 * we might want to do DLB less frequently.
9196 if (bBoxChanged || ir->epc != epcNO)
9198 bDoDLB = bBoxChanged;
9202 bDoDLB = bNStGlobalComm;
9206 /* Check if we have recorded loads on the nodes */
9207 if (comm->bRecordLoad && dd_load_count(comm) > 0)
9209 bCheckWhetherToTurnDlbOn = dd_dlb_get_should_check_whether_to_turn_dlb_on(dd);
9211 /* Print load every nstlog, first and last step to the log file */
9212 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9213 comm->n_load_collect == 0 ||
9215 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9217 /* Avoid extra communication due to verbose screen output
9218 * when nstglobalcomm is set.
9220 if (bDoDLB || bLogLoad || bCheckWhetherToTurnDlbOn ||
9221 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9223 get_load_distribution(dd, wcycle);
9228 dd_print_load(fplog, dd, step-1);
9232 dd_print_load_verbose(dd);
9235 comm->n_load_collect++;
9241 /* Add the measured cycles to the running average */
9242 const float averageFactor = 0.1f;
9243 comm->cyclesPerStepDlbExpAverage =
9244 (1 - averageFactor)*comm->cyclesPerStepDlbExpAverage +
9245 averageFactor*comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
9247 if (comm->dlbState == edlbsOnCanTurnOff &&
9248 dd->comm->n_load_have % c_checkTurnDlbOffInterval == c_checkTurnDlbOffInterval - 1)
9250 gmx_bool turnOffDlb;
9253 /* If the running averaged cycles with DLB are more
9254 * than before we turned on DLB, turn off DLB.
9255 * We will again run and check the cycles without DLB
9256 * and we can then decide if to turn off DLB forever.
9258 turnOffDlb = (comm->cyclesPerStepDlbExpAverage >
9259 comm->cyclesPerStepBeforeDLB);
9261 dd_bcast(dd, sizeof(turnOffDlb), &turnOffDlb);
9264 /* To turn off DLB, we need to redistribute the atoms */
9265 dd_collect_state(dd, state_local, state_global);
9266 bMasterState = TRUE;
9267 turn_off_dlb(fplog, cr, step);
9271 else if (bCheckWhetherToTurnDlbOn)
9273 gmx_bool turnOffDlbForever = FALSE;
9274 gmx_bool turnOnDlb = FALSE;
9276 /* Since the timings are node dependent, the master decides */
9279 /* If we recently turned off DLB, we want to check if
9280 * performance is better without DLB. We want to do this
9281 * ASAP to minimize the chance that external factors
9282 * slowed down the DLB step are gone here and we
9283 * incorrectly conclude that DLB was causing the slowdown.
9284 * So we measure one nstlist block, no running average.
9286 if (comm->haveTurnedOffDlb &&
9287 comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep] <
9288 comm->cyclesPerStepDlbExpAverage)
9290 /* After turning off DLB we ran nstlist steps in fewer
9291 * cycles than with DLB. This likely means that DLB
9292 * in not benefical, but this could be due to a one
9293 * time unlucky fluctuation, so we require two such
9294 * observations in close succession to turn off DLB
9297 if (comm->dlbSlowerPartitioningCount > 0 &&
9298 dd->ddp_count < comm->dlbSlowerPartitioningCount + 10*c_checkTurnDlbOnInterval)
9300 turnOffDlbForever = TRUE;
9302 comm->haveTurnedOffDlb = false;
9303 /* Register when we last measured DLB slowdown */
9304 comm->dlbSlowerPartitioningCount = dd->ddp_count;
9308 /* Here we check if the max PME rank load is more than 0.98
9309 * the max PP force load. If so, PP DLB will not help,
9310 * since we are (almost) limited by PME. Furthermore,
9311 * DLB will cause a significant extra x/f redistribution
9312 * cost on the PME ranks, which will then surely result
9313 * in lower total performance.
9315 if (cr->npmenodes > 0 &&
9316 dd_pme_f_ratio(dd) > 1 - DD_PERF_LOSS_DLB_ON)
9322 turnOnDlb = (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
9328 gmx_bool turnOffDlbForever;
9332 turnOffDlbForever, turnOnDlb
9334 dd_bcast(dd, sizeof(bools), &bools);
9335 if (bools.turnOffDlbForever)
9337 turn_off_dlb_forever(fplog, cr, step);
9339 else if (bools.turnOnDlb)
9341 turn_on_dlb(fplog, cr, step);
9346 comm->n_load_have++;
9349 cgs_gl = &comm->cgs_gl;
9354 /* Clear the old state */
9355 clear_dd_indices(dd, 0, 0);
9358 set_ddbox(dd, bMasterState, cr, ir, state_global->box,
9359 TRUE, cgs_gl, as_rvec_array(state_global->x.data()), &ddbox);
9361 get_cg_distribution(fplog, dd, cgs_gl,
9362 state_global->box, &ddbox, as_rvec_array(state_global->x.data()));
9364 dd_distribute_state(dd, cgs_gl,
9365 state_global, state_local, f);
9367 dd_make_local_cgs(dd, &top_local->cgs);
9369 /* Ensure that we have space for the new distribution */
9370 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9372 if (fr->cutoff_scheme == ecutsGROUP)
9374 calc_cgcm(fplog, 0, dd->ncg_home,
9375 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
9378 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9380 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9382 else if (state_local->ddp_count != dd->ddp_count)
9384 if (state_local->ddp_count > dd->ddp_count)
9386 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9389 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9391 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9394 /* Clear the old state */
9395 clear_dd_indices(dd, 0, 0);
9397 /* Build the new indices */
9398 rebuild_cgindex(dd, cgs_gl->index, state_local);
9399 make_dd_indices(dd, cgs_gl->index, 0);
9400 ncgindex_set = dd->ncg_home;
9402 if (fr->cutoff_scheme == ecutsGROUP)
9404 /* Redetermine the cg COMs */
9405 calc_cgcm(fplog, 0, dd->ncg_home,
9406 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
9409 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9411 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9413 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9414 TRUE, &top_local->cgs, as_rvec_array(state_local->x.data()), &ddbox);
9416 bRedist = isDlbOn(comm);
9420 /* We have the full state, only redistribute the cgs */
9422 /* Clear the non-home indices */
9423 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9426 /* Avoid global communication for dim's without pbc and -gcom */
9427 if (!bNStGlobalComm)
9429 copy_rvec(comm->box0, ddbox.box0 );
9430 copy_rvec(comm->box_size, ddbox.box_size);
9432 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9433 bNStGlobalComm, &top_local->cgs, as_rvec_array(state_local->x.data()), &ddbox);
9438 /* For dim's without pbc and -gcom */
9439 copy_rvec(ddbox.box0, comm->box0 );
9440 copy_rvec(ddbox.box_size, comm->box_size);
9442 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9445 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9447 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9450 /* Check if we should sort the charge groups */
9451 bSortCG = (bMasterState || bRedist);
9453 ncg_home_old = dd->ncg_home;
9458 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9460 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9462 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9464 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9467 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9469 &comm->cell_x0, &comm->cell_x1,
9470 dd->ncg_home, fr->cg_cm,
9471 cell_ns_x0, cell_ns_x1, &grid_density);
9475 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9478 switch (fr->cutoff_scheme)
9481 copy_ivec(fr->ns->grid->n, ncells_old);
9482 grid_first(fplog, fr->ns->grid, dd, &ddbox,
9483 state_local->box, cell_ns_x0, cell_ns_x1,
9484 fr->rlist, grid_density);
9487 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9490 gmx_incons("unimplemented");
9492 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9493 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9497 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9499 /* Sort the state on charge group position.
9500 * This enables exact restarts from this step.
9501 * It also improves performance by about 15% with larger numbers
9502 * of atoms per node.
9505 /* Fill the ns grid with the home cell,
9506 * so we can sort with the indices.
9508 set_zones_ncg_home(dd);
9510 switch (fr->cutoff_scheme)
9513 set_zones_size(dd, state_local->box, &ddbox, 0, 1);
9515 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9517 comm->zones.size[0].bb_x0,
9518 comm->zones.size[0].bb_x1,
9520 comm->zones.dens_zone0,
9522 as_rvec_array(state_local->x.data()),
9523 ncg_moved, bRedist ? comm->moved : nullptr,
9524 fr->nbv->grp[eintLocal].kernel_type,
9525 fr->nbv->grp[eintLocal].nbat);
9527 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9530 fill_grid(&comm->zones, fr->ns->grid, dd->ncg_home,
9531 0, dd->ncg_home, fr->cg_cm);
9533 copy_ivec(fr->ns->grid->n, ncells_new);
9536 gmx_incons("unimplemented");
9539 bResortAll = bMasterState;
9541 /* Check if we can user the old order and ns grid cell indices
9542 * of the charge groups to sort the charge groups efficiently.
9544 if (ncells_new[XX] != ncells_old[XX] ||
9545 ncells_new[YY] != ncells_old[YY] ||
9546 ncells_new[ZZ] != ncells_old[ZZ])
9553 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9554 gmx_step_str(step, sbuf), dd->ncg_home);
9556 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9557 bResortAll ? -1 : ncg_home_old);
9559 /* After sorting and compacting we set the correct size */
9560 dd_resize_state(state_local, f, dd->nat_home);
9562 /* Rebuild all the indices */
9563 ga2la_clear(dd->ga2la);
9566 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9569 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9571 /* Setup up the communication and communicate the coordinates */
9572 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9574 /* Set the indices */
9575 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9577 /* Set the charge group boundaries for neighbor searching */
9578 set_cg_boundaries(&comm->zones);
9580 if (fr->cutoff_scheme == ecutsVERLET)
9582 set_zones_size(dd, state_local->box, &ddbox,
9583 bSortCG ? 1 : 0, comm->zones.n);
9586 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9589 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9590 -1,as_rvec_array(state_local->x.data()),state_local->box);
9593 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9595 /* Extract a local topology from the global topology */
9596 for (i = 0; i < dd->ndim; i++)
9598 np[dd->dim[i]] = comm->cd[i].np;
9600 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9601 comm->cellsize_min, np,
9603 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : as_rvec_array(state_local->x.data()),
9604 vsite, top_global, top_local);
9606 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9608 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9610 /* Set up the special atom communication */
9611 n = comm->nat[ddnatZONE];
9612 for (i = ddnatZONE+1; i < ddnatNR; i++)
9617 if (vsite && vsite->n_intercg_vsite)
9619 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9623 if (dd->bInterCGcons || dd->bInterCGsettles)
9625 /* Only for inter-cg constraints we need special code */
9626 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9627 constr, ir->nProjOrder,
9628 top_local->idef.il);
9632 gmx_incons("Unknown special atom type setup");
9637 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9639 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9641 /* Make space for the extra coordinates for virtual site
9642 * or constraint communication.
9644 state_local->natoms = comm->nat[ddnatNR-1];
9646 dd_resize_state(state_local, f, state_local->natoms);
9648 if (fr->bF_NoVirSum)
9650 if (vsite && vsite->n_intercg_vsite)
9652 nat_f_novirsum = comm->nat[ddnatVSITE];
9656 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9658 nat_f_novirsum = dd->nat_tot;
9662 nat_f_novirsum = dd->nat_home;
9671 /* Set the number of atoms required for the force calculation.
9672 * Forces need to be constrained when doing energy
9673 * minimization. For simple simulations we could avoid some
9674 * allocation, zeroing and copying, but this is probably not worth
9675 * the complications and checking.
9677 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9678 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9680 /* Update atom data for mdatoms and several algorithms */
9681 mdAlgorithmsSetupAtomData(cr, ir, top_global, top_local, fr,
9682 nullptr, mdatoms, vsite, nullptr);
9684 if (ir->implicit_solvent)
9686 make_local_gb(cr, fr->born, ir->gb_algorithm);
9689 if (!(cr->duty & DUTY_PME))
9691 /* Send the charges and/or c6/sigmas to our PME only node */
9692 gmx_pme_send_parameters(cr,
9694 mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
9695 mdatoms->chargeA, mdatoms->chargeB,
9696 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
9697 mdatoms->sigmaA, mdatoms->sigmaB,
9698 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9703 set_constraints(constr, top_local, ir, mdatoms, cr);
9708 /* Update the local pull groups */
9709 dd_make_local_pull_groups(cr, ir->pull_work, mdatoms);
9714 /* Update the local rotation groups */
9715 dd_make_local_rotation_groups(dd, ir->rot);
9718 if (ir->eSwapCoords != eswapNO)
9720 /* Update the local groups needed for ion swapping */
9721 dd_make_local_swap_groups(dd, ir->swap);
9724 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
9725 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
9727 add_dd_statistics(dd);
9729 /* Make sure we only count the cycles for this DD partitioning */
9730 clear_dd_cycle_counts(dd);
9732 /* Because the order of the atoms might have changed since
9733 * the last vsite construction, we need to communicate the constructing
9734 * atom coordinates again (for spreading the forces this MD step).
9736 dd_move_x_vsites(dd, state_local->box, as_rvec_array(state_local->x.data()));
9738 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9740 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9742 dd_move_x(dd, state_local->box, as_rvec_array(state_local->x.data()));
9743 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9744 -1, as_rvec_array(state_local->x.data()), state_local->box);
9747 /* Store the partitioning step */
9748 comm->partition_step = step;
9750 /* Increase the DD partitioning counter */
9752 /* The state currently matches this DD partitioning count, store it */
9753 state_local->ddp_count = dd->ddp_count;
9756 /* The DD master node knows the complete cg distribution,
9757 * store the count so we can possibly skip the cg info communication.
9759 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9762 if (comm->DD_debug > 0)
9764 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9765 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9766 "after partitioning");
9769 wallcycle_stop(wcycle, ewcDOMDEC);