2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2008
5 * Copyright (c) 2012,2013, by the GROMACS development team, led by
6 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
7 * others, as listed in the AUTHORS file in the top-level source
8 * directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
48 #include "gmx_fatal.h"
49 #include "gmx_fatal_collective.h"
52 #include "domdec_network.h"
55 #include "chargegroup.h"
64 #include "pull_rotation.h"
65 #include "gmx_wallcycle.h"
69 #include "mtop_util.h"
71 #include "gmx_ga2la.h"
73 #include "nbnxn_search.h"
75 #include "gmx_omp_nthreads.h"
76 #include "gpu_utils.h"
85 #define DDRANK(dd, rank) (rank)
86 #define DDMASTERRANK(dd) (dd->masterrank)
88 typedef struct gmx_domdec_master
90 /* The cell boundaries */
92 /* The global charge group division */
93 int *ncg; /* Number of home charge groups for each node */
94 int *index; /* Index of nnodes+1 into cg */
95 int *cg; /* Global charge group index */
96 int *nat; /* Number of home atoms for each node. */
97 int *ibuf; /* Buffer for communication */
98 rvec *vbuf; /* Buffer for state scattering and gathering */
99 } gmx_domdec_master_t;
103 /* The numbers of charge groups to send and receive for each cell
104 * that requires communication, the last entry contains the total
105 * number of atoms that needs to be communicated.
107 int nsend[DD_MAXIZONE+2];
108 int nrecv[DD_MAXIZONE+2];
109 /* The charge groups to send */
112 /* The atom range for non-in-place communication */
113 int cell2at0[DD_MAXIZONE];
114 int cell2at1[DD_MAXIZONE];
119 int np; /* Number of grid pulses in this dimension */
120 int np_dlb; /* For dlb, for use with edlbAUTO */
121 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
123 gmx_bool bInPlace; /* Can we communicate in place? */
124 } gmx_domdec_comm_dim_t;
128 gmx_bool *bCellMin; /* Temp. var.: is this cell size at the limit */
129 real *cell_f; /* State var.: cell boundaries, box relative */
130 real *old_cell_f; /* Temp. var.: old cell size */
131 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
132 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
133 real *bound_min; /* Temp. var.: lower limit for cell boundary */
134 real *bound_max; /* Temp. var.: upper limit for cell boundary */
135 gmx_bool bLimited; /* State var.: is DLB limited in this dim and row */
136 real *buf_ncd; /* Temp. var. */
139 #define DD_NLOAD_MAX 9
141 /* Here floats are accurate enough, since these variables
142 * only influence the load balancing, not the actual MD results.
169 gmx_cgsort_t *sort_new;
181 /* This enum determines the order of the coordinates.
182 * ddnatHOME and ddnatZONE should be first and second,
183 * the others can be ordered as wanted.
186 ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR
190 edlbAUTO, edlbNO, edlbYES, edlbNR
192 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
196 int dim; /* The dimension */
197 gmx_bool dim_match; /* Tells if DD and PME dims match */
198 int nslab; /* The number of PME slabs in this dimension */
199 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
200 int *pp_min; /* The minimum pp node location, size nslab */
201 int *pp_max; /* The maximum pp node location,size nslab */
202 int maxshift; /* The maximum shift for coordinate redistribution in PME */
207 real min0; /* The minimum bottom of this zone */
208 real max1; /* The maximum top of this zone */
209 real min1; /* The minimum top of this zone */
210 real mch0; /* The maximum bottom communicaton height for this zone */
211 real mch1; /* The maximum top communicaton height for this zone */
212 real p1_0; /* The bottom value of the first cell in this zone */
213 real p1_1; /* The top value of the first cell in this zone */
218 gmx_domdec_ind_t ind;
225 } dd_comm_setup_work_t;
227 typedef struct gmx_domdec_comm
229 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
230 * unless stated otherwise.
233 /* The number of decomposition dimensions for PME, 0: no PME */
235 /* The number of nodes doing PME (PP/PME or only PME) */
239 /* The communication setup including the PME only nodes */
240 gmx_bool bCartesianPP_PME;
243 int *pmenodes; /* size npmenodes */
244 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
245 * but with bCartesianPP_PME */
246 gmx_ddpme_t ddpme[2];
248 /* The DD particle-particle nodes only */
249 gmx_bool bCartesianPP;
250 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
252 /* The global charge groups */
255 /* Should we sort the cgs */
257 gmx_domdec_sort_t *sort;
259 /* Are there charge groups? */
262 /* Are there bonded and multi-body interactions between charge groups? */
263 gmx_bool bInterCGBondeds;
264 gmx_bool bInterCGMultiBody;
266 /* Data for the optional bonded interaction atom communication range */
273 /* Are we actually using DLB? */
274 gmx_bool bDynLoadBal;
276 /* Cell sizes for static load balancing, first index cartesian */
279 /* The width of the communicated boundaries */
282 /* The minimum cell size (including triclinic correction) */
284 /* For dlb, for use with edlbAUTO */
285 rvec cellsize_min_dlb;
286 /* The lower limit for the DD cell size with DLB */
288 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
289 gmx_bool bVacDLBNoLimit;
291 /* With PME load balancing we set limits on DLB */
292 gmx_bool bPMELoadBalDLBLimits;
293 /* DLB needs to take into account that we want to allow this maximum
294 * cut-off (for PME load balancing), this could limit cell boundaries.
296 real PMELoadBal_max_cutoff;
298 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
300 /* box0 and box_size are required with dim's without pbc and -gcom */
304 /* The cell boundaries */
308 /* The old location of the cell boundaries, to check cg displacements */
312 /* The communication setup and charge group boundaries for the zones */
313 gmx_domdec_zones_t zones;
315 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
316 * cell boundaries of neighboring cells for dynamic load balancing.
318 gmx_ddzone_t zone_d1[2];
319 gmx_ddzone_t zone_d2[2][2];
321 /* The coordinate/force communication setup and indices */
322 gmx_domdec_comm_dim_t cd[DIM];
323 /* The maximum number of cells to communicate with in one dimension */
326 /* Which cg distribution is stored on the master node */
327 int master_cg_ddp_count;
329 /* The number of cg's received from the direct neighbors */
330 int zone_ncg1[DD_MAXZONE];
332 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
335 /* Array for signalling if atoms have moved to another domain */
339 /* Communication buffer for general use */
343 /* Communication buffer for general use */
346 /* Temporary storage for thread parallel communication setup */
348 dd_comm_setup_work_t *dth;
350 /* Communication buffers only used with multiple grid pulses */
355 /* Communication buffers for local redistribution */
357 int cggl_flag_nalloc[DIM*2];
359 int cgcm_state_nalloc[DIM*2];
361 /* Cell sizes for dynamic load balancing */
362 gmx_domdec_root_t **root;
366 real cell_f_max0[DIM];
367 real cell_f_min1[DIM];
369 /* Stuff for load communication */
370 gmx_bool bRecordLoad;
371 gmx_domdec_load_t *load;
372 int nrank_gpu_shared;
374 MPI_Comm *mpi_comm_load;
375 MPI_Comm mpi_comm_gpu_shared;
378 /* Maximum DLB scaling per load balancing step in percent */
382 float cycl[ddCyclNr];
383 int cycl_n[ddCyclNr];
384 float cycl_max[ddCyclNr];
385 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
389 /* Have often have did we have load measurements */
391 /* Have often have we collected the load measurements */
395 double sum_nat[ddnatNR-ddnatZONE];
405 /* The last partition step */
406 gmx_large_int_t partition_step;
414 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
417 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
418 #define DD_FLAG_NRCG 65535
419 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
420 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
422 /* Zone permutation required to obtain consecutive charge groups
423 * for neighbor searching.
425 static const int zone_perm[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
427 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
428 * components see only j zones with that component 0.
431 /* The DD zone order */
432 static const ivec dd_zo[DD_MAXZONE] =
433 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
438 static const ivec dd_zp3[dd_zp3n] = {{0, 0, 8}, {1, 3, 6}, {2, 5, 6}, {3, 5, 7}};
443 static const ivec dd_zp2[dd_zp2n] = {{0, 0, 4}, {1, 3, 4}};
448 static const ivec dd_zp1[dd_zp1n] = {{0, 0, 2}};
450 /* Factors used to avoid problems due to rounding issues */
451 #define DD_CELL_MARGIN 1.0001
452 #define DD_CELL_MARGIN2 1.00005
453 /* Factor to account for pressure scaling during nstlist steps */
454 #define DD_PRES_SCALE_MARGIN 1.02
456 /* Allowed performance loss before we DLB or warn */
457 #define DD_PERF_LOSS 0.05
459 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
461 /* Use separate MPI send and receive commands
462 * when nnodes <= GMX_DD_NNODES_SENDRECV.
463 * This saves memory (and some copying for small nnodes).
464 * For high parallelization scatter and gather calls are used.
466 #define GMX_DD_NNODES_SENDRECV 4
470 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
472 static void index2xyz(ivec nc,int ind,ivec xyz)
474 xyz[XX] = ind % nc[XX];
475 xyz[YY] = (ind / nc[XX]) % nc[YY];
476 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
480 /* This order is required to minimize the coordinate communication in PME
481 * which uses decomposition in the x direction.
483 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
485 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
487 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
488 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
489 xyz[ZZ] = ind % nc[ZZ];
492 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
497 ddindex = dd_index(dd->nc, c);
498 if (dd->comm->bCartesianPP_PME)
500 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
502 else if (dd->comm->bCartesianPP)
505 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
516 static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox, t_inputrec *ir)
518 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
521 int ddglatnr(gmx_domdec_t *dd, int i)
531 if (i >= dd->comm->nat[ddnatNR-1])
533 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
535 atnr = dd->gatindex[i] + 1;
541 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
543 return &dd->comm->cgs_gl;
546 static void vec_rvec_init(vec_rvec_t *v)
552 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
556 v->nalloc = over_alloc_dd(n);
557 srenew(v->v, v->nalloc);
561 void dd_store_state(gmx_domdec_t *dd, t_state *state)
565 if (state->ddp_count != dd->ddp_count)
567 gmx_incons("The state does not the domain decomposition state");
570 state->ncg_gl = dd->ncg_home;
571 if (state->ncg_gl > state->cg_gl_nalloc)
573 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
574 srenew(state->cg_gl, state->cg_gl_nalloc);
576 for (i = 0; i < state->ncg_gl; i++)
578 state->cg_gl[i] = dd->index_gl[i];
581 state->ddp_count_cg_gl = dd->ddp_count;
584 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
586 return &dd->comm->zones;
589 void dd_get_ns_ranges(gmx_domdec_t *dd, int icg,
590 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
592 gmx_domdec_zones_t *zones;
595 zones = &dd->comm->zones;
598 while (icg >= zones->izone[izone].cg1)
607 else if (izone < zones->nizone)
609 *jcg0 = zones->izone[izone].jcg0;
613 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
614 icg, izone, zones->nizone);
617 *jcg1 = zones->izone[izone].jcg1;
619 for (d = 0; d < dd->ndim; d++)
622 shift0[dim] = zones->izone[izone].shift0[dim];
623 shift1[dim] = zones->izone[izone].shift1[dim];
624 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
626 /* A conservative approach, this can be optimized */
633 int dd_natoms_vsite(gmx_domdec_t *dd)
635 return dd->comm->nat[ddnatVSITE];
638 void dd_get_constraint_range(gmx_domdec_t *dd, int *at_start, int *at_end)
640 *at_start = dd->comm->nat[ddnatCON-1];
641 *at_end = dd->comm->nat[ddnatCON];
644 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
646 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
647 int *index, *cgindex;
648 gmx_domdec_comm_t *comm;
649 gmx_domdec_comm_dim_t *cd;
650 gmx_domdec_ind_t *ind;
651 rvec shift = {0, 0, 0}, *buf, *rbuf;
652 gmx_bool bPBC, bScrew;
656 cgindex = dd->cgindex;
661 nat_tot = dd->nat_home;
662 for (d = 0; d < dd->ndim; d++)
664 bPBC = (dd->ci[dd->dim[d]] == 0);
665 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
668 copy_rvec(box[dd->dim[d]], shift);
671 for (p = 0; p < cd->np; p++)
678 for (i = 0; i < ind->nsend[nzone]; i++)
680 at0 = cgindex[index[i]];
681 at1 = cgindex[index[i]+1];
682 for (j = at0; j < at1; j++)
684 copy_rvec(x[j], buf[n]);
691 for (i = 0; i < ind->nsend[nzone]; i++)
693 at0 = cgindex[index[i]];
694 at1 = cgindex[index[i]+1];
695 for (j = at0; j < at1; j++)
697 /* We need to shift the coordinates */
698 rvec_add(x[j], shift, buf[n]);
705 for (i = 0; i < ind->nsend[nzone]; i++)
707 at0 = cgindex[index[i]];
708 at1 = cgindex[index[i]+1];
709 for (j = at0; j < at1; j++)
712 buf[n][XX] = x[j][XX] + shift[XX];
714 * This operation requires a special shift force
715 * treatment, which is performed in calc_vir.
717 buf[n][YY] = box[YY][YY] - x[j][YY];
718 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
730 rbuf = comm->vbuf2.v;
732 /* Send and receive the coordinates */
733 dd_sendrecv_rvec(dd, d, dddirBackward,
734 buf, ind->nsend[nzone+1],
735 rbuf, ind->nrecv[nzone+1]);
739 for (zone = 0; zone < nzone; zone++)
741 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
743 copy_rvec(rbuf[j], x[i]);
748 nat_tot += ind->nrecv[nzone+1];
754 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
756 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
757 int *index, *cgindex;
758 gmx_domdec_comm_t *comm;
759 gmx_domdec_comm_dim_t *cd;
760 gmx_domdec_ind_t *ind;
764 gmx_bool bPBC, bScrew;
768 cgindex = dd->cgindex;
773 nzone = comm->zones.n/2;
774 nat_tot = dd->nat_tot;
775 for (d = dd->ndim-1; d >= 0; d--)
777 bPBC = (dd->ci[dd->dim[d]] == 0);
778 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
779 if (fshift == NULL && !bScrew)
783 /* Determine which shift vector we need */
789 for (p = cd->np-1; p >= 0; p--)
792 nat_tot -= ind->nrecv[nzone+1];
799 sbuf = comm->vbuf2.v;
801 for (zone = 0; zone < nzone; zone++)
803 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
805 copy_rvec(f[i], sbuf[j]);
810 /* Communicate the forces */
811 dd_sendrecv_rvec(dd, d, dddirForward,
812 sbuf, ind->nrecv[nzone+1],
813 buf, ind->nsend[nzone+1]);
815 /* Add the received forces */
819 for (i = 0; i < ind->nsend[nzone]; i++)
821 at0 = cgindex[index[i]];
822 at1 = cgindex[index[i]+1];
823 for (j = at0; j < at1; j++)
825 rvec_inc(f[j], buf[n]);
832 for (i = 0; i < ind->nsend[nzone]; i++)
834 at0 = cgindex[index[i]];
835 at1 = cgindex[index[i]+1];
836 for (j = at0; j < at1; j++)
838 rvec_inc(f[j], buf[n]);
839 /* Add this force to the shift force */
840 rvec_inc(fshift[is], buf[n]);
847 for (i = 0; i < ind->nsend[nzone]; i++)
849 at0 = cgindex[index[i]];
850 at1 = cgindex[index[i]+1];
851 for (j = at0; j < at1; j++)
853 /* Rotate the force */
854 f[j][XX] += buf[n][XX];
855 f[j][YY] -= buf[n][YY];
856 f[j][ZZ] -= buf[n][ZZ];
859 /* Add this force to the shift force */
860 rvec_inc(fshift[is], buf[n]);
871 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
873 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
874 int *index, *cgindex;
875 gmx_domdec_comm_t *comm;
876 gmx_domdec_comm_dim_t *cd;
877 gmx_domdec_ind_t *ind;
882 cgindex = dd->cgindex;
884 buf = &comm->vbuf.v[0][0];
887 nat_tot = dd->nat_home;
888 for (d = 0; d < dd->ndim; d++)
891 for (p = 0; p < cd->np; p++)
896 for (i = 0; i < ind->nsend[nzone]; i++)
898 at0 = cgindex[index[i]];
899 at1 = cgindex[index[i]+1];
900 for (j = at0; j < at1; j++)
913 rbuf = &comm->vbuf2.v[0][0];
915 /* Send and receive the coordinates */
916 dd_sendrecv_real(dd, d, dddirBackward,
917 buf, ind->nsend[nzone+1],
918 rbuf, ind->nrecv[nzone+1]);
922 for (zone = 0; zone < nzone; zone++)
924 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
931 nat_tot += ind->nrecv[nzone+1];
937 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
939 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
940 int *index, *cgindex;
941 gmx_domdec_comm_t *comm;
942 gmx_domdec_comm_dim_t *cd;
943 gmx_domdec_ind_t *ind;
948 cgindex = dd->cgindex;
950 buf = &comm->vbuf.v[0][0];
953 nzone = comm->zones.n/2;
954 nat_tot = dd->nat_tot;
955 for (d = dd->ndim-1; d >= 0; d--)
958 for (p = cd->np-1; p >= 0; p--)
961 nat_tot -= ind->nrecv[nzone+1];
968 sbuf = &comm->vbuf2.v[0][0];
970 for (zone = 0; zone < nzone; zone++)
972 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
979 /* Communicate the forces */
980 dd_sendrecv_real(dd, d, dddirForward,
981 sbuf, ind->nrecv[nzone+1],
982 buf, ind->nsend[nzone+1]);
984 /* Add the received forces */
986 for (i = 0; i < ind->nsend[nzone]; i++)
988 at0 = cgindex[index[i]];
989 at1 = cgindex[index[i]+1];
990 for (j = at0; j < at1; j++)
1001 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
1003 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
1005 zone->min0, zone->max1,
1006 zone->mch0, zone->mch0,
1007 zone->p1_0, zone->p1_1);
1011 #define DDZONECOMM_MAXZONE 5
1012 #define DDZONECOMM_BUFSIZE 3
1014 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
1015 int ddimind, int direction,
1016 gmx_ddzone_t *buf_s, int n_s,
1017 gmx_ddzone_t *buf_r, int n_r)
1019 #define ZBS DDZONECOMM_BUFSIZE
1020 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
1021 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
1024 for (i = 0; i < n_s; i++)
1026 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
1027 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
1028 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
1029 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
1030 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
1031 vbuf_s[i*ZBS+1][2] = 0;
1032 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
1033 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
1034 vbuf_s[i*ZBS+2][2] = 0;
1037 dd_sendrecv_rvec(dd, ddimind, direction,
1041 for (i = 0; i < n_r; i++)
1043 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
1044 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
1045 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
1046 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
1047 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
1048 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
1049 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
1055 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
1056 rvec cell_ns_x0, rvec cell_ns_x1)
1058 int d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
1060 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
1061 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
1062 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
1063 rvec extr_s[2], extr_r[2];
1065 real dist_d, c = 0, det;
1066 gmx_domdec_comm_t *comm;
1067 gmx_bool bPBC, bUse;
1071 for (d = 1; d < dd->ndim; d++)
1074 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
1075 zp->min0 = cell_ns_x0[dim];
1076 zp->max1 = cell_ns_x1[dim];
1077 zp->min1 = cell_ns_x1[dim];
1078 zp->mch0 = cell_ns_x0[dim];
1079 zp->mch1 = cell_ns_x1[dim];
1080 zp->p1_0 = cell_ns_x0[dim];
1081 zp->p1_1 = cell_ns_x1[dim];
1084 for (d = dd->ndim-2; d >= 0; d--)
1087 bPBC = (dim < ddbox->npbcdim);
1089 /* Use an rvec to store two reals */
1090 extr_s[d][0] = comm->cell_f0[d+1];
1091 extr_s[d][1] = comm->cell_f1[d+1];
1092 extr_s[d][2] = comm->cell_f1[d+1];
1095 /* Store the extremes in the backward sending buffer,
1096 * so the get updated separately from the forward communication.
1098 for (d1 = d; d1 < dd->ndim-1; d1++)
1100 /* We invert the order to be able to use the same loop for buf_e */
1101 buf_s[pos].min0 = extr_s[d1][1];
1102 buf_s[pos].max1 = extr_s[d1][0];
1103 buf_s[pos].min1 = extr_s[d1][2];
1104 buf_s[pos].mch0 = 0;
1105 buf_s[pos].mch1 = 0;
1106 /* Store the cell corner of the dimension we communicate along */
1107 buf_s[pos].p1_0 = comm->cell_x0[dim];
1108 buf_s[pos].p1_1 = 0;
1112 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1115 if (dd->ndim == 3 && d == 0)
1117 buf_s[pos] = comm->zone_d2[0][1];
1119 buf_s[pos] = comm->zone_d1[0];
1123 /* We only need to communicate the extremes
1124 * in the forward direction
1126 npulse = comm->cd[d].np;
1129 /* Take the minimum to avoid double communication */
1130 npulse_min = min(npulse, dd->nc[dim]-1-npulse);
1134 /* Without PBC we should really not communicate over
1135 * the boundaries, but implementing that complicates
1136 * the communication setup and therefore we simply
1137 * do all communication, but ignore some data.
1139 npulse_min = npulse;
1141 for (p = 0; p < npulse_min; p++)
1143 /* Communicate the extremes forward */
1144 bUse = (bPBC || dd->ci[dim] > 0);
1146 dd_sendrecv_rvec(dd, d, dddirForward,
1147 extr_s+d, dd->ndim-d-1,
1148 extr_r+d, dd->ndim-d-1);
1152 for (d1 = d; d1 < dd->ndim-1; d1++)
1154 extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]);
1155 extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]);
1156 extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]);
1162 for (p = 0; p < npulse; p++)
1164 /* Communicate all the zone information backward */
1165 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1167 dd_sendrecv_ddzone(dd, d, dddirBackward,
1174 for (d1 = d+1; d1 < dd->ndim; d1++)
1176 /* Determine the decrease of maximum required
1177 * communication height along d1 due to the distance along d,
1178 * this avoids a lot of useless atom communication.
1180 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1182 if (ddbox->tric_dir[dim])
1184 /* c is the off-diagonal coupling between the cell planes
1185 * along directions d and d1.
1187 c = ddbox->v[dim][dd->dim[d1]][dim];
1193 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1196 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1200 /* A negative value signals out of range */
1206 /* Accumulate the extremes over all pulses */
1207 for (i = 0; i < buf_size; i++)
1211 buf_e[i] = buf_r[i];
1217 buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0);
1218 buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1);
1219 buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1);
1222 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1230 if (bUse && dh[d1] >= 0)
1232 buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
1233 buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
1236 /* Copy the received buffer to the send buffer,
1237 * to pass the data through with the next pulse.
1239 buf_s[i] = buf_r[i];
1241 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1242 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1244 /* Store the extremes */
1247 for (d1 = d; d1 < dd->ndim-1; d1++)
1249 extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0);
1250 extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1);
1251 extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1);
1255 if (d == 1 || (d == 0 && dd->ndim == 3))
1257 for (i = d; i < 2; i++)
1259 comm->zone_d2[1-d][i] = buf_e[pos];
1265 comm->zone_d1[1] = buf_e[pos];
1275 for (i = 0; i < 2; i++)
1279 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1281 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1282 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1288 for (i = 0; i < 2; i++)
1290 for (j = 0; j < 2; j++)
1294 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1296 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1297 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1301 for (d = 1; d < dd->ndim; d++)
1303 comm->cell_f_max0[d] = extr_s[d-1][0];
1304 comm->cell_f_min1[d] = extr_s[d-1][1];
1307 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1308 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1313 static void dd_collect_cg(gmx_domdec_t *dd,
1314 t_state *state_local)
1316 gmx_domdec_master_t *ma = NULL;
1317 int buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
1320 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1322 /* The master has the correct distribution */
1326 if (state_local->ddp_count == dd->ddp_count)
1328 ncg_home = dd->ncg_home;
1330 nat_home = dd->nat_home;
1332 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1334 cgs_gl = &dd->comm->cgs_gl;
1336 ncg_home = state_local->ncg_gl;
1337 cg = state_local->cg_gl;
1339 for (i = 0; i < ncg_home; i++)
1341 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1346 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1349 buf2[0] = dd->ncg_home;
1350 buf2[1] = dd->nat_home;
1360 /* Collect the charge group and atom counts on the master */
1361 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1366 for (i = 0; i < dd->nnodes; i++)
1368 ma->ncg[i] = ma->ibuf[2*i];
1369 ma->nat[i] = ma->ibuf[2*i+1];
1370 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1373 /* Make byte counts and indices */
1374 for (i = 0; i < dd->nnodes; i++)
1376 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1377 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1381 fprintf(debug, "Initial charge group distribution: ");
1382 for (i = 0; i < dd->nnodes; i++)
1384 fprintf(debug, " %d", ma->ncg[i]);
1386 fprintf(debug, "\n");
1390 /* Collect the charge group indices on the master */
1392 dd->ncg_home*sizeof(int), dd->index_gl,
1393 DDMASTER(dd) ? ma->ibuf : NULL,
1394 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1395 DDMASTER(dd) ? ma->cg : NULL);
1397 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1400 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1403 gmx_domdec_master_t *ma;
1404 int n, i, c, a, nalloc = 0;
1413 MPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1414 dd->rank, dd->mpi_comm_all);
1419 /* Copy the master coordinates to the global array */
1420 cgs_gl = &dd->comm->cgs_gl;
1422 n = DDMASTERRANK(dd);
1424 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1426 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1428 copy_rvec(lv[a++], v[c]);
1432 for (n = 0; n < dd->nnodes; n++)
1436 if (ma->nat[n] > nalloc)
1438 nalloc = over_alloc_dd(ma->nat[n]);
1439 srenew(buf, nalloc);
1442 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1443 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1446 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1448 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1450 copy_rvec(buf[a++], v[c]);
1459 static void get_commbuffer_counts(gmx_domdec_t *dd,
1460 int **counts, int **disps)
1462 gmx_domdec_master_t *ma;
1467 /* Make the rvec count and displacment arrays */
1469 *disps = ma->ibuf + dd->nnodes;
1470 for (n = 0; n < dd->nnodes; n++)
1472 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1473 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1477 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1480 gmx_domdec_master_t *ma;
1481 int *rcounts = NULL, *disps = NULL;
1490 get_commbuffer_counts(dd, &rcounts, &disps);
1495 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv, rcounts, disps, buf);
1499 cgs_gl = &dd->comm->cgs_gl;
1502 for (n = 0; n < dd->nnodes; n++)
1504 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1506 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1508 copy_rvec(buf[a++], v[c]);
1515 void dd_collect_vec(gmx_domdec_t *dd,
1516 t_state *state_local, rvec *lv, rvec *v)
1518 gmx_domdec_master_t *ma;
1519 int n, i, c, a, nalloc = 0;
1522 dd_collect_cg(dd, state_local);
1524 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1526 dd_collect_vec_sendrecv(dd, lv, v);
1530 dd_collect_vec_gatherv(dd, lv, v);
1535 void dd_collect_state(gmx_domdec_t *dd,
1536 t_state *state_local, t_state *state)
1540 nh = state->nhchainlength;
1544 for (i = 0; i < efptNR; i++)
1546 state->lambda[i] = state_local->lambda[i];
1548 state->fep_state = state_local->fep_state;
1549 state->veta = state_local->veta;
1550 state->vol0 = state_local->vol0;
1551 copy_mat(state_local->box, state->box);
1552 copy_mat(state_local->boxv, state->boxv);
1553 copy_mat(state_local->svir_prev, state->svir_prev);
1554 copy_mat(state_local->fvir_prev, state->fvir_prev);
1555 copy_mat(state_local->pres_prev, state->pres_prev);
1557 for (i = 0; i < state_local->ngtc; i++)
1559 for (j = 0; j < nh; j++)
1561 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1562 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1564 state->therm_integral[i] = state_local->therm_integral[i];
1566 for (i = 0; i < state_local->nnhpres; i++)
1568 for (j = 0; j < nh; j++)
1570 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1571 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1575 for (est = 0; est < estNR; est++)
1577 if (EST_DISTR(est) && (state_local->flags & (1<<est)))
1582 dd_collect_vec(dd, state_local, state_local->x, state->x);
1585 dd_collect_vec(dd, state_local, state_local->v, state->v);
1588 dd_collect_vec(dd, state_local, state_local->sd_X, state->sd_X);
1591 dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
1594 if (state->nrngi == 1)
1598 for (i = 0; i < state_local->nrng; i++)
1600 state->ld_rng[i] = state_local->ld_rng[i];
1606 dd_gather(dd, state_local->nrng*sizeof(state->ld_rng[0]),
1607 state_local->ld_rng, state->ld_rng);
1611 if (state->nrngi == 1)
1615 state->ld_rngi[0] = state_local->ld_rngi[0];
1620 dd_gather(dd, sizeof(state->ld_rngi[0]),
1621 state_local->ld_rngi, state->ld_rngi);
1624 case estDISRE_INITF:
1625 case estDISRE_RM3TAV:
1626 case estORIRE_INITF:
1630 gmx_incons("Unknown state entry encountered in dd_collect_state");
1636 static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
1642 fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
1645 state->nalloc = over_alloc_dd(nalloc);
1647 for (est = 0; est < estNR; est++)
1649 if (EST_DISTR(est) && (state->flags & (1<<est)))
1654 srenew(state->x, state->nalloc);
1657 srenew(state->v, state->nalloc);
1660 srenew(state->sd_X, state->nalloc);
1663 srenew(state->cg_p, state->nalloc);
1667 case estDISRE_INITF:
1668 case estDISRE_RM3TAV:
1669 case estORIRE_INITF:
1671 /* No reallocation required */
1674 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1681 srenew(*f, state->nalloc);
1685 static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
1688 if (nalloc > fr->cg_nalloc)
1692 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
1694 fr->cg_nalloc = over_alloc_dd(nalloc);
1695 srenew(fr->cginfo, fr->cg_nalloc);
1696 if (fr->cutoff_scheme == ecutsGROUP)
1698 srenew(fr->cg_cm, fr->cg_nalloc);
1701 if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
1703 /* We don't use charge groups, we use x in state to set up
1704 * the atom communication.
1706 dd_realloc_state(state, f, nalloc);
1710 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1713 gmx_domdec_master_t *ma;
1714 int n, i, c, a, nalloc = 0;
1721 for (n = 0; n < dd->nnodes; n++)
1725 if (ma->nat[n] > nalloc)
1727 nalloc = over_alloc_dd(ma->nat[n]);
1728 srenew(buf, nalloc);
1730 /* Use lv as a temporary buffer */
1732 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1734 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1736 copy_rvec(v[c], buf[a++]);
1739 if (a != ma->nat[n])
1741 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1746 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1747 DDRANK(dd, n), n, dd->mpi_comm_all);
1752 n = DDMASTERRANK(dd);
1754 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1756 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1758 copy_rvec(v[c], lv[a++]);
1765 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1766 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1771 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1774 gmx_domdec_master_t *ma;
1775 int *scounts = NULL, *disps = NULL;
1776 int n, i, c, a, nalloc = 0;
1783 get_commbuffer_counts(dd, &scounts, &disps);
1787 for (n = 0; n < dd->nnodes; n++)
1789 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1791 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1793 copy_rvec(v[c], buf[a++]);
1799 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1802 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
1804 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1806 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1810 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1814 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1817 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1818 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1819 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1821 if (dfhist->nlambda > 0)
1823 int nlam = dfhist->nlambda;
1824 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1825 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1826 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1827 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1828 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1829 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1831 for (i = 0; i<nlam; i++) {
1832 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1833 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1834 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1835 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1836 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1837 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1842 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1843 t_state *state, t_state *state_local,
1848 nh = state->nhchainlength;
1852 for (i = 0; i < efptNR; i++)
1854 state_local->lambda[i] = state->lambda[i];
1856 state_local->fep_state = state->fep_state;
1857 state_local->veta = state->veta;
1858 state_local->vol0 = state->vol0;
1859 copy_mat(state->box, state_local->box);
1860 copy_mat(state->box_rel, state_local->box_rel);
1861 copy_mat(state->boxv, state_local->boxv);
1862 copy_mat(state->svir_prev, state_local->svir_prev);
1863 copy_mat(state->fvir_prev, state_local->fvir_prev);
1864 copy_df_history(&state_local->dfhist,&state->dfhist);
1865 for (i = 0; i < state_local->ngtc; i++)
1867 for (j = 0; j < nh; j++)
1869 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1870 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1872 state_local->therm_integral[i] = state->therm_integral[i];
1874 for (i = 0; i < state_local->nnhpres; i++)
1876 for (j = 0; j < nh; j++)
1878 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1879 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1883 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
1884 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1885 dd_bcast(dd, sizeof(real), &state_local->veta);
1886 dd_bcast(dd, sizeof(real), &state_local->vol0);
1887 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1888 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1889 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1890 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1891 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1892 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
1893 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
1894 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
1895 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
1896 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
1898 /* communicate df_history -- required for restarting from checkpoint */
1899 dd_distribute_dfhist(dd,&state_local->dfhist);
1901 if (dd->nat_home > state_local->nalloc)
1903 dd_realloc_state(state_local, f, dd->nat_home);
1905 for (i = 0; i < estNR; i++)
1907 if (EST_DISTR(i) && (state_local->flags & (1<<i)))
1912 dd_distribute_vec(dd, cgs, state->x, state_local->x);
1915 dd_distribute_vec(dd, cgs, state->v, state_local->v);
1918 dd_distribute_vec(dd, cgs, state->sd_X, state_local->sd_X);
1921 dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
1924 if (state->nrngi == 1)
1927 state_local->nrng*sizeof(state_local->ld_rng[0]),
1928 state->ld_rng, state_local->ld_rng);
1933 state_local->nrng*sizeof(state_local->ld_rng[0]),
1934 state->ld_rng, state_local->ld_rng);
1938 if (state->nrngi == 1)
1940 dd_bcastc(dd, sizeof(state_local->ld_rngi[0]),
1941 state->ld_rngi, state_local->ld_rngi);
1945 dd_scatter(dd, sizeof(state_local->ld_rngi[0]),
1946 state->ld_rngi, state_local->ld_rngi);
1949 case estDISRE_INITF:
1950 case estDISRE_RM3TAV:
1951 case estORIRE_INITF:
1953 /* Not implemented yet */
1956 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1962 static char dim2char(int dim)
1968 case XX: c = 'X'; break;
1969 case YY: c = 'Y'; break;
1970 case ZZ: c = 'Z'; break;
1971 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1977 static void write_dd_grid_pdb(const char *fn, gmx_large_int_t step,
1978 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1980 rvec grid_s[2], *grid_r = NULL, cx, r;
1981 char fname[STRLEN], format[STRLEN], buf[22];
1983 int a, i, d, z, y, x;
1987 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1988 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1992 snew(grid_r, 2*dd->nnodes);
1995 dd_gather(dd, 2*sizeof(rvec), grid_s[0], DDMASTER(dd) ? grid_r[0] : NULL);
1999 for (d = 0; d < DIM; d++)
2001 for (i = 0; i < DIM; i++)
2009 if (d < ddbox->npbcdim && dd->nc[d] > 1)
2011 tric[d][i] = box[i][d]/box[i][i];
2020 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
2021 sprintf(format, "%s%s\n", pdbformat, "%6.2f%6.2f");
2022 out = gmx_fio_fopen(fname, "w");
2023 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2025 for (i = 0; i < dd->nnodes; i++)
2027 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
2028 for (d = 0; d < DIM; d++)
2030 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
2032 for (z = 0; z < 2; z++)
2034 for (y = 0; y < 2; y++)
2036 for (x = 0; x < 2; x++)
2038 cx[XX] = grid_r[i*2+x][XX];
2039 cx[YY] = grid_r[i*2+y][YY];
2040 cx[ZZ] = grid_r[i*2+z][ZZ];
2042 fprintf(out, format, "ATOM", a++, "CA", "GLY", ' ', 1+i,
2043 ' ', 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol);
2047 for (d = 0; d < DIM; d++)
2049 for (x = 0; x < 4; x++)
2053 case 0: y = 1 + i*8 + 2*x; break;
2054 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
2055 case 2: y = 1 + i*8 + x; break;
2057 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
2061 gmx_fio_fclose(out);
2066 void write_dd_pdb(const char *fn, gmx_large_int_t step, const char *title,
2067 gmx_mtop_t *mtop, t_commrec *cr,
2068 int natoms, rvec x[], matrix box)
2070 char fname[STRLEN], format[STRLEN], format4[STRLEN], buf[22];
2072 int i, ii, resnr, c;
2073 char *atomname, *resname;
2080 natoms = dd->comm->nat[ddnatVSITE];
2083 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
2085 sprintf(format, "%s%s\n", pdbformat, "%6.2f%6.2f");
2086 sprintf(format4, "%s%s\n", pdbformat4, "%6.2f%6.2f");
2088 out = gmx_fio_fopen(fname, "w");
2090 fprintf(out, "TITLE %s\n", title);
2091 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2092 for (i = 0; i < natoms; i++)
2094 ii = dd->gatindex[i];
2095 gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
2096 if (i < dd->comm->nat[ddnatZONE])
2099 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
2105 else if (i < dd->comm->nat[ddnatVSITE])
2107 b = dd->comm->zones.n;
2111 b = dd->comm->zones.n + 1;
2113 fprintf(out, strlen(atomname) < 4 ? format : format4,
2114 "ATOM", (ii+1)%100000,
2115 atomname, resname, ' ', resnr%10000, ' ',
2116 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b);
2118 fprintf(out, "TER\n");
2120 gmx_fio_fclose(out);
2123 real dd_cutoff_mbody(gmx_domdec_t *dd)
2125 gmx_domdec_comm_t *comm;
2132 if (comm->bInterCGBondeds)
2134 if (comm->cutoff_mbody > 0)
2136 r = comm->cutoff_mbody;
2140 /* cutoff_mbody=0 means we do not have DLB */
2141 r = comm->cellsize_min[dd->dim[0]];
2142 for (di = 1; di < dd->ndim; di++)
2144 r = min(r, comm->cellsize_min[dd->dim[di]]);
2146 if (comm->bBondComm)
2148 r = max(r, comm->cutoff_mbody);
2152 r = min(r, comm->cutoff);
2160 real dd_cutoff_twobody(gmx_domdec_t *dd)
2164 r_mb = dd_cutoff_mbody(dd);
2166 return max(dd->comm->cutoff, r_mb);
2170 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd, ivec coord, ivec coord_pme)
2174 nc = dd->nc[dd->comm->cartpmedim];
2175 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2176 copy_ivec(coord, coord_pme);
2177 coord_pme[dd->comm->cartpmedim] =
2178 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2181 static int low_ddindex2pmeindex(int ndd, int npme, int ddindex)
2183 /* Here we assign a PME node to communicate with this DD node
2184 * by assuming that the major index of both is x.
2185 * We add cr->npmenodes/2 to obtain an even distribution.
2187 return (ddindex*npme + npme/2)/ndd;
2190 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
2192 return low_ddindex2pmeindex(dd->nnodes, dd->comm->npmenodes, ddindex);
2195 static int cr_ddindex2pmeindex(const t_commrec *cr, int ddindex)
2197 return low_ddindex2pmeindex(cr->dd->nnodes, cr->npmenodes, ddindex);
2200 static int *dd_pmenodes(t_commrec *cr)
2205 snew(pmenodes, cr->npmenodes);
2207 for (i = 0; i < cr->dd->nnodes; i++)
2209 p0 = cr_ddindex2pmeindex(cr, i);
2210 p1 = cr_ddindex2pmeindex(cr, i+1);
2211 if (i+1 == cr->dd->nnodes || p1 > p0)
2215 fprintf(debug, "pmenode[%d] = %d\n", n, i+1+n);
2217 pmenodes[n] = i + 1 + n;
2225 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
2228 ivec coords, coords_pme, nc;
2233 if (dd->comm->bCartesian) {
2234 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2235 dd_coords2pmecoords(dd,coords,coords_pme);
2236 copy_ivec(dd->ntot,nc);
2237 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2238 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2240 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2242 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2248 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
2253 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
2255 gmx_domdec_comm_t *comm;
2257 int ddindex, nodeid = -1;
2259 comm = cr->dd->comm;
2264 if (comm->bCartesianPP_PME)
2267 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
2272 ddindex = dd_index(cr->dd->nc, coords);
2273 if (comm->bCartesianPP)
2275 nodeid = comm->ddindex2simnodeid[ddindex];
2281 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
2293 static int dd_simnode2pmenode(t_commrec *cr, int sim_nodeid)
2296 gmx_domdec_comm_t *comm;
2297 ivec coord, coord_pme;
2304 /* This assumes a uniform x domain decomposition grid cell size */
2305 if (comm->bCartesianPP_PME)
2308 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
2309 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2311 /* This is a PP node */
2312 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2313 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
2317 else if (comm->bCartesianPP)
2319 if (sim_nodeid < dd->nnodes)
2321 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2326 /* This assumes DD cells with identical x coordinates
2327 * are numbered sequentially.
2329 if (dd->comm->pmenodes == NULL)
2331 if (sim_nodeid < dd->nnodes)
2333 /* The DD index equals the nodeid */
2334 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2340 while (sim_nodeid > dd->comm->pmenodes[i])
2344 if (sim_nodeid < dd->comm->pmenodes[i])
2346 pmenode = dd->comm->pmenodes[i];
2354 void get_pme_nnodes(const gmx_domdec_t *dd,
2355 int *npmenodes_x, int *npmenodes_y)
2359 *npmenodes_x = dd->comm->npmenodes_x;
2360 *npmenodes_y = dd->comm->npmenodes_y;
2369 gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
2371 gmx_bool bPMEOnlyNode;
2373 if (DOMAINDECOMP(cr))
2375 bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
2379 bPMEOnlyNode = FALSE;
2382 return bPMEOnlyNode;
2385 void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
2386 int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
2390 ivec coord, coord_pme;
2394 snew(*my_ddnodes, (dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2397 for (x = 0; x < dd->nc[XX]; x++)
2399 for (y = 0; y < dd->nc[YY]; y++)
2401 for (z = 0; z < dd->nc[ZZ]; z++)
2403 if (dd->comm->bCartesianPP_PME)
2408 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2409 if (dd->ci[XX] == coord_pme[XX] &&
2410 dd->ci[YY] == coord_pme[YY] &&
2411 dd->ci[ZZ] == coord_pme[ZZ])
2413 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2418 /* The slab corresponds to the nodeid in the PME group */
2419 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2421 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2428 /* The last PP-only node is the peer node */
2429 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2433 fprintf(debug, "Receive coordinates from PP nodes:");
2434 for (x = 0; x < *nmy_ddnodes; x++)
2436 fprintf(debug, " %d", (*my_ddnodes)[x]);
2438 fprintf(debug, "\n");
2442 static gmx_bool receive_vir_ener(t_commrec *cr)
2444 gmx_domdec_comm_t *comm;
2445 int pmenode, coords[DIM], rank;
2449 if (cr->npmenodes < cr->dd->nnodes)
2451 comm = cr->dd->comm;
2452 if (comm->bCartesianPP_PME)
2454 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2456 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2457 coords[comm->cartpmedim]++;
2458 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2460 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2461 if (dd_simnode2pmenode(cr, rank) == pmenode)
2463 /* This is not the last PP node for pmenode */
2471 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2472 if (cr->sim_nodeid+1 < cr->nnodes &&
2473 dd_simnode2pmenode(cr, cr->sim_nodeid+1) == pmenode)
2475 /* This is not the last PP node for pmenode */
2484 static void set_zones_ncg_home(gmx_domdec_t *dd)
2486 gmx_domdec_zones_t *zones;
2489 zones = &dd->comm->zones;
2491 zones->cg_range[0] = 0;
2492 for (i = 1; i < zones->n+1; i++)
2494 zones->cg_range[i] = dd->ncg_home;
2496 /* zone_ncg1[0] should always be equal to ncg_home */
2497 dd->comm->zone_ncg1[0] = dd->ncg_home;
2500 static void rebuild_cgindex(gmx_domdec_t *dd,
2501 const int *gcgs_index, t_state *state)
2503 int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
2506 dd_cg_gl = dd->index_gl;
2507 cgindex = dd->cgindex;
2510 for (i = 0; i < state->ncg_gl; i++)
2514 dd_cg_gl[i] = cg_gl;
2515 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2519 dd->ncg_home = state->ncg_gl;
2522 set_zones_ncg_home(dd);
2525 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2527 while (cg >= cginfo_mb->cg_end)
2532 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2535 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2536 t_forcerec *fr, char *bLocalCG)
2538 cginfo_mb_t *cginfo_mb;
2544 cginfo_mb = fr->cginfo_mb;
2545 cginfo = fr->cginfo;
2547 for (cg = cg0; cg < cg1; cg++)
2549 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2553 if (bLocalCG != NULL)
2555 for (cg = cg0; cg < cg1; cg++)
2557 bLocalCG[index_gl[cg]] = TRUE;
2562 static void make_dd_indices(gmx_domdec_t *dd,
2563 const int *gcgs_index, int cg_start)
2565 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2566 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2571 bLocalCG = dd->comm->bLocalCG;
2573 if (dd->nat_tot > dd->gatindex_nalloc)
2575 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2576 srenew(dd->gatindex, dd->gatindex_nalloc);
2579 nzone = dd->comm->zones.n;
2580 zone2cg = dd->comm->zones.cg_range;
2581 zone_ncg1 = dd->comm->zone_ncg1;
2582 index_gl = dd->index_gl;
2583 gatindex = dd->gatindex;
2584 bCGs = dd->comm->bCGs;
2586 if (zone2cg[1] != dd->ncg_home)
2588 gmx_incons("dd->ncg_zone is not up to date");
2591 /* Make the local to global and global to local atom index */
2592 a = dd->cgindex[cg_start];
2593 for (zone = 0; zone < nzone; zone++)
2601 cg0 = zone2cg[zone];
2603 cg1 = zone2cg[zone+1];
2604 cg1_p1 = cg0 + zone_ncg1[zone];
2606 for (cg = cg0; cg < cg1; cg++)
2611 /* Signal that this cg is from more than one pulse away */
2614 cg_gl = index_gl[cg];
2617 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2620 ga2la_set(dd->ga2la, a_gl, a, zone1);
2626 gatindex[a] = cg_gl;
2627 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2634 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2637 int ncg, i, ngl, nerr;
2640 if (bLocalCG == NULL)
2644 for (i = 0; i < dd->ncg_tot; i++)
2646 if (!bLocalCG[dd->index_gl[i]])
2649 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2654 for (i = 0; i < ncg_sys; i++)
2661 if (ngl != dd->ncg_tot)
2663 fprintf(stderr, "DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2670 static void check_index_consistency(gmx_domdec_t *dd,
2671 int natoms_sys, int ncg_sys,
2674 int nerr, ngl, i, a, cell;
2679 if (dd->comm->DD_debug > 1)
2681 snew(have, natoms_sys);
2682 for (a = 0; a < dd->nat_tot; a++)
2684 if (have[dd->gatindex[a]] > 0)
2686 fprintf(stderr, "DD node %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2690 have[dd->gatindex[a]] = a + 1;
2696 snew(have, dd->nat_tot);
2699 for (i = 0; i < natoms_sys; i++)
2701 if (ga2la_get(dd->ga2la, i, &a, &cell))
2703 if (a >= dd->nat_tot)
2705 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2711 if (dd->gatindex[a] != i)
2713 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2720 if (ngl != dd->nat_tot)
2723 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2724 dd->rank, where, ngl, dd->nat_tot);
2726 for (a = 0; a < dd->nat_tot; a++)
2731 "DD node %d, %s: local atom %d, global %d has no global index\n",
2732 dd->rank, where, a+1, dd->gatindex[a]+1);
2737 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2741 gmx_fatal(FARGS, "DD node %d, %s: %d atom/cg index inconsistencies",
2742 dd->rank, where, nerr);
2746 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2753 /* Clear the whole list without searching */
2754 ga2la_clear(dd->ga2la);
2758 for (i = a_start; i < dd->nat_tot; i++)
2760 ga2la_del(dd->ga2la, dd->gatindex[i]);
2764 bLocalCG = dd->comm->bLocalCG;
2767 for (i = cg_start; i < dd->ncg_tot; i++)
2769 bLocalCG[dd->index_gl[i]] = FALSE;
2773 dd_clear_local_vsite_indices(dd);
2775 if (dd->constraints)
2777 dd_clear_local_constraint_indices(dd);
2781 /* This function should be used for moving the domain boudaries during DLB,
2782 * for obtaining the minimum cell size. It checks the initially set limit
2783 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2784 * and, possibly, a longer cut-off limit set for PME load balancing.
2786 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2790 cellsize_min = comm->cellsize_min[dim];
2792 if (!comm->bVacDLBNoLimit)
2794 /* The cut-off might have changed, e.g. by PME load balacning,
2795 * from the value used to set comm->cellsize_min, so check it.
2797 cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2799 if (comm->bPMELoadBalDLBLimits)
2801 /* Check for the cut-off limit set by the PME load balancing */
2802 cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2806 return cellsize_min;
2809 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2812 real grid_jump_limit;
2814 /* The distance between the boundaries of cells at distance
2815 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2816 * and by the fact that cells should not be shifted by more than
2817 * half their size, such that cg's only shift by one cell
2818 * at redecomposition.
2820 grid_jump_limit = comm->cellsize_limit;
2821 if (!comm->bVacDLBNoLimit)
2823 if (comm->bPMELoadBalDLBLimits)
2825 cutoff = max(cutoff, comm->PMELoadBal_max_cutoff);
2827 grid_jump_limit = max(grid_jump_limit,
2828 cutoff/comm->cd[dim_ind].np);
2831 return grid_jump_limit;
2834 static gmx_bool check_grid_jump(gmx_large_int_t step,
2840 gmx_domdec_comm_t *comm;
2849 for (d = 1; d < dd->ndim; d++)
2852 limit = grid_jump_limit(comm, cutoff, d);
2853 bfac = ddbox->box_size[dim];
2854 if (ddbox->tric_dir[dim])
2856 bfac *= ddbox->skew_fac[dim];
2858 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2859 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2867 /* This error should never be triggered under normal
2868 * circumstances, but you never know ...
2870 gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with less nodes might avoid this issue.",
2871 gmx_step_str(step, buf),
2872 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2880 static int dd_load_count(gmx_domdec_comm_t *comm)
2882 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2885 static float dd_force_load(gmx_domdec_comm_t *comm)
2892 if (comm->eFlop > 1)
2894 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2899 load = comm->cycl[ddCyclF];
2900 if (comm->cycl_n[ddCyclF] > 1)
2902 /* Subtract the maximum of the last n cycle counts
2903 * to get rid of possible high counts due to other sources,
2904 * for instance system activity, that would otherwise
2905 * affect the dynamic load balancing.
2907 load -= comm->cycl_max[ddCyclF];
2911 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2913 float gpu_wait, gpu_wait_sum;
2915 gpu_wait = comm->cycl[ddCyclWaitGPU];
2916 if (comm->cycl_n[ddCyclF] > 1)
2918 /* We should remove the WaitGPU time of the same MD step
2919 * as the one with the maximum F time, since the F time
2920 * and the wait time are not independent.
2921 * Furthermore, the step for the max F time should be chosen
2922 * the same on all ranks that share the same GPU.
2923 * But to keep the code simple, we remove the average instead.
2924 * The main reason for artificially long times at some steps
2925 * is spurious CPU activity or MPI time, so we don't expect
2926 * that changes in the GPU wait time matter a lot here.
2928 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2930 /* Sum the wait times over the ranks that share the same GPU */
2931 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2932 comm->mpi_comm_gpu_shared);
2933 /* Replace the wait time by the average over the ranks */
2934 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2942 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2944 gmx_domdec_comm_t *comm;
2949 snew(*dim_f, dd->nc[dim]+1);
2951 for (i = 1; i < dd->nc[dim]; i++)
2953 if (comm->slb_frac[dim])
2955 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2959 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2962 (*dim_f)[dd->nc[dim]] = 1;
2965 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2967 int pmeindex, slab, nso, i;
2970 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2976 ddpme->dim = dimind;
2978 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2980 ddpme->nslab = (ddpme->dim == 0 ?
2981 dd->comm->npmenodes_x :
2982 dd->comm->npmenodes_y);
2984 if (ddpme->nslab <= 1)
2989 nso = dd->comm->npmenodes/ddpme->nslab;
2990 /* Determine for each PME slab the PP location range for dimension dim */
2991 snew(ddpme->pp_min, ddpme->nslab);
2992 snew(ddpme->pp_max, ddpme->nslab);
2993 for (slab = 0; slab < ddpme->nslab; slab++)
2995 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2996 ddpme->pp_max[slab] = 0;
2998 for (i = 0; i < dd->nnodes; i++)
3000 ddindex2xyz(dd->nc, i, xyz);
3001 /* For y only use our y/z slab.
3002 * This assumes that the PME x grid size matches the DD grid size.
3004 if (dimind == 0 || xyz[XX] == dd->ci[XX])
3006 pmeindex = ddindex2pmeindex(dd, i);
3009 slab = pmeindex/nso;
3013 slab = pmeindex % ddpme->nslab;
3015 ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]);
3016 ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]);
3020 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
3023 int dd_pme_maxshift_x(gmx_domdec_t *dd)
3025 if (dd->comm->ddpme[0].dim == XX)
3027 return dd->comm->ddpme[0].maxshift;
3035 int dd_pme_maxshift_y(gmx_domdec_t *dd)
3037 if (dd->comm->ddpme[0].dim == YY)
3039 return dd->comm->ddpme[0].maxshift;
3041 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
3043 return dd->comm->ddpme[1].maxshift;
3051 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
3052 gmx_bool bUniform, gmx_ddbox_t *ddbox, real *cell_f)
3054 gmx_domdec_comm_t *comm;
3057 real range, pme_boundary;
3061 nc = dd->nc[ddpme->dim];
3064 if (!ddpme->dim_match)
3066 /* PP decomposition is not along dim: the worst situation */
3069 else if (ns <= 3 || (bUniform && ns == nc))
3071 /* The optimal situation */
3076 /* We need to check for all pme nodes which nodes they
3077 * could possibly need to communicate with.
3079 xmin = ddpme->pp_min;
3080 xmax = ddpme->pp_max;
3081 /* Allow for atoms to be maximally 2/3 times the cut-off
3082 * out of their DD cell. This is a reasonable balance between
3083 * between performance and support for most charge-group/cut-off
3086 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
3087 /* Avoid extra communication when we are exactly at a boundary */
3091 for (s = 0; s < ns; s++)
3093 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
3094 pme_boundary = (real)s/ns;
3097 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
3099 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
3103 pme_boundary = (real)(s+1)/ns;
3106 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
3108 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
3115 ddpme->maxshift = sh;
3119 fprintf(debug, "PME slab communication range for dim %d is %d\n",
3120 ddpme->dim, ddpme->maxshift);
3124 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3128 for (d = 0; d < dd->ndim; d++)
3131 if (dim < ddbox->nboundeddim &&
3132 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
3133 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
3135 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
3136 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3137 dd->nc[dim], dd->comm->cellsize_limit);
3142 enum { setcellsizeslbLOCAL, setcellsizeslbMASTER, setcellsizeslbPULSE_ONLY };
3144 /* Set the domain boundaries. Use for static (or no) load balancing,
3145 * and also for the starting state for dynamic load balancing.
3146 * setmode determine if and where the boundaries are stored, use enum above.
3147 * Returns the number communication pulses in npulse.
3149 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
3150 int setmode, ivec npulse)
3152 gmx_domdec_comm_t *comm;
3155 real *cell_x, cell_dx, cellsize;
3159 for (d = 0; d < DIM; d++)
3161 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
3163 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
3166 cell_dx = ddbox->box_size[d]/dd->nc[d];
3169 case setcellsizeslbMASTER:
3170 for (j = 0; j < dd->nc[d]+1; j++)
3172 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
3175 case setcellsizeslbLOCAL:
3176 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
3177 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
3182 cellsize = cell_dx*ddbox->skew_fac[d];
3183 while (cellsize*npulse[d] < comm->cutoff)
3187 cellsize_min[d] = cellsize;
3191 /* Statically load balanced grid */
3192 /* Also when we are not doing a master distribution we determine
3193 * all cell borders in a loop to obtain identical values
3194 * to the master distribution case and to determine npulse.
3196 if (setmode == setcellsizeslbMASTER)
3198 cell_x = dd->ma->cell_x[d];
3202 snew(cell_x, dd->nc[d]+1);
3204 cell_x[0] = ddbox->box0[d];
3205 for (j = 0; j < dd->nc[d]; j++)
3207 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
3208 cell_x[j+1] = cell_x[j] + cell_dx;
3209 cellsize = cell_dx*ddbox->skew_fac[d];
3210 while (cellsize*npulse[d] < comm->cutoff &&
3211 npulse[d] < dd->nc[d]-1)
3215 cellsize_min[d] = min(cellsize_min[d], cellsize);
3217 if (setmode == setcellsizeslbLOCAL)
3219 comm->cell_x0[d] = cell_x[dd->ci[d]];
3220 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
3222 if (setmode != setcellsizeslbMASTER)
3227 /* The following limitation is to avoid that a cell would receive
3228 * some of its own home charge groups back over the periodic boundary.
3229 * Double charge groups cause trouble with the global indices.
3231 if (d < ddbox->npbcdim &&
3232 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
3234 char error_string[STRLEN];
3236 sprintf(error_string,
3237 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3238 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
3240 dd->nc[d], dd->nc[d],
3241 dd->nnodes > dd->nc[d] ? "cells" : "processors");
3243 if (setmode == setcellsizeslbLOCAL)
3245 gmx_fatal_collective(FARGS, NULL, dd, error_string);
3249 gmx_fatal(FARGS, error_string);
3254 if (!comm->bDynLoadBal)
3256 copy_rvec(cellsize_min, comm->cellsize_min);
3259 for (d = 0; d < comm->npmedecompdim; d++)
3261 set_pme_maxshift(dd, &comm->ddpme[d],
3262 comm->slb_frac[dd->dim[d]] == NULL, ddbox,
3263 comm->ddpme[d].slb_dim_f);
3268 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
3269 int d, int dim, gmx_domdec_root_t *root,
3271 gmx_bool bUniform, gmx_large_int_t step, real cellsize_limit_f, int range[])
3273 gmx_domdec_comm_t *comm;
3274 int ncd, i, j, nmin, nmin_old;
3275 gmx_bool bLimLo, bLimHi;
3277 real fac, halfway, cellsize_limit_f_i, region_size;
3278 gmx_bool bPBC, bLastHi = FALSE;
3279 int nrange[] = {range[0], range[1]};
3281 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
3287 bPBC = (dim < ddbox->npbcdim);
3289 cell_size = root->buf_ncd;
3293 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
3296 /* First we need to check if the scaling does not make cells
3297 * smaller than the smallest allowed size.
3298 * We need to do this iteratively, since if a cell is too small,
3299 * it needs to be enlarged, which makes all the other cells smaller,
3300 * which could in turn make another cell smaller than allowed.
3302 for (i = range[0]; i < range[1]; i++)
3304 root->bCellMin[i] = FALSE;
3310 /* We need the total for normalization */
3312 for (i = range[0]; i < range[1]; i++)
3314 if (root->bCellMin[i] == FALSE)
3316 fac += cell_size[i];
3319 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3320 /* Determine the cell boundaries */
3321 for (i = range[0]; i < range[1]; i++)
3323 if (root->bCellMin[i] == FALSE)
3325 cell_size[i] *= fac;
3326 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3328 cellsize_limit_f_i = 0;
3332 cellsize_limit_f_i = cellsize_limit_f;
3334 if (cell_size[i] < cellsize_limit_f_i)
3336 root->bCellMin[i] = TRUE;
3337 cell_size[i] = cellsize_limit_f_i;
3341 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3344 while (nmin > nmin_old);
3347 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3348 /* For this check we should not use DD_CELL_MARGIN,
3349 * but a slightly smaller factor,
3350 * since rounding could get use below the limit.
3352 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3355 gmx_fatal(FARGS, "Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3356 gmx_step_str(step, buf),
3357 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3358 ncd, comm->cellsize_min[dim]);
3361 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
3365 /* Check if the boundary did not displace more than halfway
3366 * each of the cells it bounds, as this could cause problems,
3367 * especially when the differences between cell sizes are large.
3368 * If changes are applied, they will not make cells smaller
3369 * than the cut-off, as we check all the boundaries which
3370 * might be affected by a change and if the old state was ok,
3371 * the cells will at most be shrunk back to their old size.
3373 for (i = range[0]+1; i < range[1]; i++)
3375 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3376 if (root->cell_f[i] < halfway)
3378 root->cell_f[i] = halfway;
3379 /* Check if the change also causes shifts of the next boundaries */
3380 for (j = i+1; j < range[1]; j++)
3382 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3384 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3388 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3389 if (root->cell_f[i] > halfway)
3391 root->cell_f[i] = halfway;
3392 /* Check if the change also causes shifts of the next boundaries */
3393 for (j = i-1; j >= range[0]+1; j--)
3395 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3397 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3404 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3405 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3406 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3407 * for a and b nrange is used */
3410 /* Take care of the staggering of the cell boundaries */
3413 for (i = range[0]; i < range[1]; i++)
3415 root->cell_f_max0[i] = root->cell_f[i];
3416 root->cell_f_min1[i] = root->cell_f[i+1];
3421 for (i = range[0]+1; i < range[1]; i++)
3423 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3424 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3425 if (bLimLo && bLimHi)
3427 /* Both limits violated, try the best we can */
3428 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3429 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3430 nrange[0] = range[0];
3432 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3435 nrange[1] = range[1];
3436 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3442 /* root->cell_f[i] = root->bound_min[i]; */
3443 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3446 else if (bLimHi && !bLastHi)
3449 if (nrange[1] < range[1]) /* found a LimLo before */
3451 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3452 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3453 nrange[0] = nrange[1];
3455 root->cell_f[i] = root->bound_max[i];
3457 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3459 nrange[1] = range[1];
3462 if (nrange[1] < range[1]) /* found last a LimLo */
3464 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3465 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3466 nrange[0] = nrange[1];
3467 nrange[1] = range[1];
3468 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3470 else if (nrange[0] > range[0]) /* found at least one LimHi */
3472 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3479 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3480 int d, int dim, gmx_domdec_root_t *root,
3481 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3482 gmx_bool bUniform, gmx_large_int_t step)
3484 gmx_domdec_comm_t *comm;
3485 int ncd, d1, i, j, pos;
3487 real load_aver, load_i, imbalance, change, change_max, sc;
3488 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3492 int range[] = { 0, 0 };
3496 /* Convert the maximum change from the input percentage to a fraction */
3497 change_limit = comm->dlb_scale_lim*0.01;
3501 bPBC = (dim < ddbox->npbcdim);
3503 cell_size = root->buf_ncd;
3505 /* Store the original boundaries */
3506 for (i = 0; i < ncd+1; i++)
3508 root->old_cell_f[i] = root->cell_f[i];
3512 for (i = 0; i < ncd; i++)
3514 cell_size[i] = 1.0/ncd;
3517 else if (dd_load_count(comm))
3519 load_aver = comm->load[d].sum_m/ncd;
3521 for (i = 0; i < ncd; i++)
3523 /* Determine the relative imbalance of cell i */
3524 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3525 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3526 /* Determine the change of the cell size using underrelaxation */
3527 change = -relax*imbalance;
3528 change_max = max(change_max, max(change, -change));
3530 /* Limit the amount of scaling.
3531 * We need to use the same rescaling for all cells in one row,
3532 * otherwise the load balancing might not converge.
3535 if (change_max > change_limit)
3537 sc *= change_limit/change_max;
3539 for (i = 0; i < ncd; i++)
3541 /* Determine the relative imbalance of cell i */
3542 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3543 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3544 /* Determine the change of the cell size using underrelaxation */
3545 change = -sc*imbalance;
3546 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3550 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3551 cellsize_limit_f *= DD_CELL_MARGIN;
3552 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3553 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3554 if (ddbox->tric_dir[dim])
3556 cellsize_limit_f /= ddbox->skew_fac[dim];
3557 dist_min_f /= ddbox->skew_fac[dim];
3559 if (bDynamicBox && d > 0)
3561 dist_min_f *= DD_PRES_SCALE_MARGIN;
3563 if (d > 0 && !bUniform)
3565 /* Make sure that the grid is not shifted too much */
3566 for (i = 1; i < ncd; i++)
3568 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3570 gmx_incons("Inconsistent DD boundary staggering limits!");
3572 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3573 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3576 root->bound_min[i] += 0.5*space;
3578 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3579 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3582 root->bound_max[i] += 0.5*space;
3587 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3589 root->cell_f_max0[i-1] + dist_min_f,
3590 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3591 root->cell_f_min1[i] - dist_min_f);
3596 root->cell_f[0] = 0;
3597 root->cell_f[ncd] = 1;
3598 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3601 /* After the checks above, the cells should obey the cut-off
3602 * restrictions, but it does not hurt to check.
3604 for (i = 0; i < ncd; i++)
3608 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3609 dim, i, root->cell_f[i], root->cell_f[i+1]);
3612 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3613 root->cell_f[i+1] - root->cell_f[i] <
3614 cellsize_limit_f/DD_CELL_MARGIN)
3618 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3619 gmx_step_str(step, buf), dim2char(dim), i,
3620 (root->cell_f[i+1] - root->cell_f[i])
3621 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3626 /* Store the cell boundaries of the lower dimensions at the end */
3627 for (d1 = 0; d1 < d; d1++)
3629 root->cell_f[pos++] = comm->cell_f0[d1];
3630 root->cell_f[pos++] = comm->cell_f1[d1];
3633 if (d < comm->npmedecompdim)
3635 /* The master determines the maximum shift for
3636 * the coordinate communication between separate PME nodes.
3638 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3640 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3643 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3647 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3648 gmx_ddbox_t *ddbox, int dimind)
3650 gmx_domdec_comm_t *comm;
3655 /* Set the cell dimensions */
3656 dim = dd->dim[dimind];
3657 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3658 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3659 if (dim >= ddbox->nboundeddim)
3661 comm->cell_x0[dim] += ddbox->box0[dim];
3662 comm->cell_x1[dim] += ddbox->box0[dim];
3666 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3667 int d, int dim, real *cell_f_row,
3670 gmx_domdec_comm_t *comm;
3676 /* Each node would only need to know two fractions,
3677 * but it is probably cheaper to broadcast the whole array.
3679 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3680 0, comm->mpi_comm_load[d]);
3682 /* Copy the fractions for this dimension from the buffer */
3683 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3684 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3685 /* The whole array was communicated, so set the buffer position */
3686 pos = dd->nc[dim] + 1;
3687 for (d1 = 0; d1 <= d; d1++)
3691 /* Copy the cell fractions of the lower dimensions */
3692 comm->cell_f0[d1] = cell_f_row[pos++];
3693 comm->cell_f1[d1] = cell_f_row[pos++];
3695 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3697 /* Convert the communicated shift from float to int */
3698 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3701 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3705 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3706 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3707 gmx_bool bUniform, gmx_large_int_t step)
3709 gmx_domdec_comm_t *comm;
3711 gmx_bool bRowMember, bRowRoot;
3716 for (d = 0; d < dd->ndim; d++)
3721 for (d1 = d; d1 < dd->ndim; d1++)
3723 if (dd->ci[dd->dim[d1]] > 0)
3736 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3737 ddbox, bDynamicBox, bUniform, step);
3738 cell_f_row = comm->root[d]->cell_f;
3742 cell_f_row = comm->cell_f_row;
3744 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3749 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3753 /* This function assumes the box is static and should therefore
3754 * not be called when the box has changed since the last
3755 * call to dd_partition_system.
3757 for (d = 0; d < dd->ndim; d++)
3759 relative_to_absolute_cell_bounds(dd, ddbox, d);
3765 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3766 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3767 gmx_bool bUniform, gmx_bool bDoDLB, gmx_large_int_t step,
3768 gmx_wallcycle_t wcycle)
3770 gmx_domdec_comm_t *comm;
3777 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3778 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3779 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3781 else if (bDynamicBox)
3783 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3786 /* Set the dimensions for which no DD is used */
3787 for (dim = 0; dim < DIM; dim++)
3789 if (dd->nc[dim] == 1)
3791 comm->cell_x0[dim] = 0;
3792 comm->cell_x1[dim] = ddbox->box_size[dim];
3793 if (dim >= ddbox->nboundeddim)
3795 comm->cell_x0[dim] += ddbox->box0[dim];
3796 comm->cell_x1[dim] += ddbox->box0[dim];
3802 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3805 gmx_domdec_comm_dim_t *cd;
3807 for (d = 0; d < dd->ndim; d++)
3809 cd = &dd->comm->cd[d];
3810 np = npulse[dd->dim[d]];
3811 if (np > cd->np_nalloc)
3815 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3816 dim2char(dd->dim[d]), np);
3818 if (DDMASTER(dd) && cd->np_nalloc > 0)
3820 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3822 srenew(cd->ind, np);
3823 for (i = cd->np_nalloc; i < np; i++)
3825 cd->ind[i].index = NULL;
3826 cd->ind[i].nalloc = 0;
3835 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3836 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3837 gmx_bool bUniform, gmx_bool bDoDLB, gmx_large_int_t step,
3838 gmx_wallcycle_t wcycle)
3840 gmx_domdec_comm_t *comm;
3846 /* Copy the old cell boundaries for the cg displacement check */
3847 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3848 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3850 if (comm->bDynLoadBal)
3854 check_box_size(dd, ddbox);
3856 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3860 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbLOCAL, npulse);
3861 realloc_comm_ind(dd, npulse);
3866 for (d = 0; d < DIM; d++)
3868 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3869 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3874 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3876 rvec cell_ns_x0, rvec cell_ns_x1,
3877 gmx_large_int_t step)
3879 gmx_domdec_comm_t *comm;
3884 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3886 dim = dd->dim[dim_ind];
3888 /* Without PBC we don't have restrictions on the outer cells */
3889 if (!(dim >= ddbox->npbcdim &&
3890 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3891 comm->bDynLoadBal &&
3892 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3893 comm->cellsize_min[dim])
3896 gmx_fatal(FARGS, "Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3897 gmx_step_str(step, buf), dim2char(dim),
3898 comm->cell_x1[dim] - comm->cell_x0[dim],
3899 ddbox->skew_fac[dim],
3900 dd->comm->cellsize_min[dim],
3901 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3905 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3907 /* Communicate the boundaries and update cell_ns_x0/1 */
3908 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3909 if (dd->bGridJump && dd->ndim > 1)
3911 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3916 static void make_tric_corr_matrix(int npbcdim, matrix box, matrix tcm)
3920 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3928 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3929 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3938 static void check_screw_box(matrix box)
3940 /* Mathematical limitation */
3941 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3943 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3946 /* Limitation due to the asymmetry of the eighth shell method */
3947 if (box[ZZ][YY] != 0)
3949 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3953 static void distribute_cg(FILE *fplog, gmx_large_int_t step,
3954 matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3957 gmx_domdec_master_t *ma;
3958 int **tmp_ind = NULL, *tmp_nalloc = NULL;
3959 int i, icg, j, k, k0, k1, d, npbcdim;
3961 rvec box_size, cg_cm;
3963 real nrcg, inv_ncg, pos_d;
3965 gmx_bool bUnbounded, bScrew;
3969 if (tmp_ind == NULL)
3971 snew(tmp_nalloc, dd->nnodes);
3972 snew(tmp_ind, dd->nnodes);
3973 for (i = 0; i < dd->nnodes; i++)
3975 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3976 snew(tmp_ind[i], tmp_nalloc[i]);
3980 /* Clear the count */
3981 for (i = 0; i < dd->nnodes; i++)
3987 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3989 cgindex = cgs->index;
3991 /* Compute the center of geometry for all charge groups */
3992 for (icg = 0; icg < cgs->nr; icg++)
3995 k1 = cgindex[icg+1];
3999 copy_rvec(pos[k0], cg_cm);
4006 for (k = k0; (k < k1); k++)
4008 rvec_inc(cg_cm, pos[k]);
4010 for (d = 0; (d < DIM); d++)
4012 cg_cm[d] *= inv_ncg;
4015 /* Put the charge group in the box and determine the cell index */
4016 for (d = DIM-1; d >= 0; d--)
4019 if (d < dd->npbcdim)
4021 bScrew = (dd->bScrewPBC && d == XX);
4022 if (tric_dir[d] && dd->nc[d] > 1)
4024 /* Use triclinic coordintates for this dimension */
4025 for (j = d+1; j < DIM; j++)
4027 pos_d += cg_cm[j]*tcm[j][d];
4030 while (pos_d >= box[d][d])
4033 rvec_dec(cg_cm, box[d]);
4036 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
4037 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
4039 for (k = k0; (k < k1); k++)
4041 rvec_dec(pos[k], box[d]);
4044 pos[k][YY] = box[YY][YY] - pos[k][YY];
4045 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4052 rvec_inc(cg_cm, box[d]);
4055 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
4056 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
4058 for (k = k0; (k < k1); k++)
4060 rvec_inc(pos[k], box[d]);
4063 pos[k][YY] = box[YY][YY] - pos[k][YY];
4064 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4069 /* This could be done more efficiently */
4071 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
4076 i = dd_index(dd->nc, ind);
4077 if (ma->ncg[i] == tmp_nalloc[i])
4079 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
4080 srenew(tmp_ind[i], tmp_nalloc[i]);
4082 tmp_ind[i][ma->ncg[i]] = icg;
4084 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
4088 for (i = 0; i < dd->nnodes; i++)
4091 for (k = 0; k < ma->ncg[i]; k++)
4093 ma->cg[k1++] = tmp_ind[i][k];
4096 ma->index[dd->nnodes] = k1;
4098 for (i = 0; i < dd->nnodes; i++)
4108 fprintf(fplog, "Charge group distribution at step %s:",
4109 gmx_step_str(step, buf));
4110 for (i = 0; i < dd->nnodes; i++)
4112 fprintf(fplog, " %d", ma->ncg[i]);
4114 fprintf(fplog, "\n");
4118 static void get_cg_distribution(FILE *fplog, gmx_large_int_t step, gmx_domdec_t *dd,
4119 t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
4122 gmx_domdec_master_t *ma = NULL;
4125 int *ibuf, buf2[2] = { 0, 0 };
4126 gmx_bool bMaster = DDMASTER(dd);
4134 check_screw_box(box);
4137 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbMASTER, npulse);
4139 distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
4140 for (i = 0; i < dd->nnodes; i++)
4142 ma->ibuf[2*i] = ma->ncg[i];
4143 ma->ibuf[2*i+1] = ma->nat[i];
4151 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
4153 dd->ncg_home = buf2[0];
4154 dd->nat_home = buf2[1];
4155 dd->ncg_tot = dd->ncg_home;
4156 dd->nat_tot = dd->nat_home;
4157 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
4159 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
4160 srenew(dd->index_gl, dd->cg_nalloc);
4161 srenew(dd->cgindex, dd->cg_nalloc+1);
4165 for (i = 0; i < dd->nnodes; i++)
4167 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
4168 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
4173 DDMASTER(dd) ? ma->ibuf : NULL,
4174 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
4175 DDMASTER(dd) ? ma->cg : NULL,
4176 dd->ncg_home*sizeof(int), dd->index_gl);
4178 /* Determine the home charge group sizes */
4180 for (i = 0; i < dd->ncg_home; i++)
4182 cg_gl = dd->index_gl[i];
4184 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
4189 fprintf(debug, "Home charge groups:\n");
4190 for (i = 0; i < dd->ncg_home; i++)
4192 fprintf(debug, " %d", dd->index_gl[i]);
4195 fprintf(debug, "\n");
4198 fprintf(debug, "\n");
4202 static int compact_and_copy_vec_at(int ncg, int *move,
4205 rvec *src, gmx_domdec_comm_t *comm,
4208 int m, icg, i, i0, i1, nrcg;
4214 for (m = 0; m < DIM*2; m++)
4220 for (icg = 0; icg < ncg; icg++)
4222 i1 = cgindex[icg+1];
4228 /* Compact the home array in place */
4229 for (i = i0; i < i1; i++)
4231 copy_rvec(src[i], src[home_pos++]);
4237 /* Copy to the communication buffer */
4239 pos_vec[m] += 1 + vec*nrcg;
4240 for (i = i0; i < i1; i++)
4242 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
4244 pos_vec[m] += (nvec - vec - 1)*nrcg;
4248 home_pos += i1 - i0;
4256 static int compact_and_copy_vec_cg(int ncg, int *move,
4258 int nvec, rvec *src, gmx_domdec_comm_t *comm,
4261 int m, icg, i0, i1, nrcg;
4267 for (m = 0; m < DIM*2; m++)
4273 for (icg = 0; icg < ncg; icg++)
4275 i1 = cgindex[icg+1];
4281 /* Compact the home array in place */
4282 copy_rvec(src[icg], src[home_pos++]);
4288 /* Copy to the communication buffer */
4289 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
4290 pos_vec[m] += 1 + nrcg*nvec;
4302 static int compact_ind(int ncg, int *move,
4303 int *index_gl, int *cgindex,
4305 gmx_ga2la_t ga2la, char *bLocalCG,
4308 int cg, nat, a0, a1, a, a_gl;
4313 for (cg = 0; cg < ncg; cg++)
4319 /* Compact the home arrays in place.
4320 * Anything that can be done here avoids access to global arrays.
4322 cgindex[home_pos] = nat;
4323 for (a = a0; a < a1; a++)
4326 gatindex[nat] = a_gl;
4327 /* The cell number stays 0, so we don't need to set it */
4328 ga2la_change_la(ga2la, a_gl, nat);
4331 index_gl[home_pos] = index_gl[cg];
4332 cginfo[home_pos] = cginfo[cg];
4333 /* The charge group remains local, so bLocalCG does not change */
4338 /* Clear the global indices */
4339 for (a = a0; a < a1; a++)
4341 ga2la_del(ga2la, gatindex[a]);
4345 bLocalCG[index_gl[cg]] = FALSE;
4349 cgindex[home_pos] = nat;
4354 static void clear_and_mark_ind(int ncg, int *move,
4355 int *index_gl, int *cgindex, int *gatindex,
4356 gmx_ga2la_t ga2la, char *bLocalCG,
4361 for (cg = 0; cg < ncg; cg++)
4367 /* Clear the global indices */
4368 for (a = a0; a < a1; a++)
4370 ga2la_del(ga2la, gatindex[a]);
4374 bLocalCG[index_gl[cg]] = FALSE;
4376 /* Signal that this cg has moved using the ns cell index.
4377 * Here we set it to -1. fill_grid will change it
4378 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4380 cell_index[cg] = -1;
4385 static void print_cg_move(FILE *fplog,
4387 gmx_large_int_t step, int cg, int dim, int dir,
4388 gmx_bool bHaveLimitdAndCMOld, real limitd,
4389 rvec cm_old, rvec cm_new, real pos_d)
4391 gmx_domdec_comm_t *comm;
4396 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4397 if (bHaveLimitdAndCMOld)
4399 fprintf(fplog, "The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4400 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4404 fprintf(fplog, "The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4405 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4407 fprintf(fplog, "distance out of cell %f\n",
4408 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4409 if (bHaveLimitdAndCMOld)
4411 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4412 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4414 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4415 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4416 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4418 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4419 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4421 comm->cell_x0[dim], comm->cell_x1[dim]);
4424 static void cg_move_error(FILE *fplog,
4426 gmx_large_int_t step, int cg, int dim, int dir,
4427 gmx_bool bHaveLimitdAndCMOld, real limitd,
4428 rvec cm_old, rvec cm_new, real pos_d)
4432 print_cg_move(fplog, dd, step, cg, dim, dir,
4433 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4435 print_cg_move(stderr, dd, step, cg, dim, dir,
4436 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4438 "A charge group moved too far between two domain decomposition steps\n"
4439 "This usually means that your system is not well equilibrated");
4442 static void rotate_state_atom(t_state *state, int a)
4446 for (est = 0; est < estNR; est++)
4448 if (EST_DISTR(est) && (state->flags & (1<<est)))
4453 /* Rotate the complete state; for a rectangular box only */
4454 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4455 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4458 state->v[a][YY] = -state->v[a][YY];
4459 state->v[a][ZZ] = -state->v[a][ZZ];
4462 state->sd_X[a][YY] = -state->sd_X[a][YY];
4463 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4466 state->cg_p[a][YY] = -state->cg_p[a][YY];
4467 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4469 case estDISRE_INITF:
4470 case estDISRE_RM3TAV:
4471 case estORIRE_INITF:
4473 /* These are distances, so not affected by rotation */
4476 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4482 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4484 if (natoms > comm->moved_nalloc)
4486 /* Contents should be preserved here */
4487 comm->moved_nalloc = over_alloc_dd(natoms);
4488 srenew(comm->moved, comm->moved_nalloc);
4494 static void calc_cg_move(FILE *fplog, gmx_large_int_t step,
4497 ivec tric_dir, matrix tcm,
4498 rvec cell_x0, rvec cell_x1,
4499 rvec limitd, rvec limit0, rvec limit1,
4501 int cg_start, int cg_end,
4506 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4507 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4511 real inv_ncg, pos_d;
4514 npbcdim = dd->npbcdim;
4516 for (cg = cg_start; cg < cg_end; cg++)
4523 copy_rvec(state->x[k0], cm_new);
4530 for (k = k0; (k < k1); k++)
4532 rvec_inc(cm_new, state->x[k]);
4534 for (d = 0; (d < DIM); d++)
4536 cm_new[d] = inv_ncg*cm_new[d];
4541 /* Do pbc and check DD cell boundary crossings */
4542 for (d = DIM-1; d >= 0; d--)
4546 bScrew = (dd->bScrewPBC && d == XX);
4547 /* Determine the location of this cg in lattice coordinates */
4551 for (d2 = d+1; d2 < DIM; d2++)
4553 pos_d += cm_new[d2]*tcm[d2][d];
4556 /* Put the charge group in the triclinic unit-cell */
4557 if (pos_d >= cell_x1[d])
4559 if (pos_d >= limit1[d])
4561 cg_move_error(fplog, dd, step, cg, d, 1, TRUE, limitd[d],
4562 cg_cm[cg], cm_new, pos_d);
4565 if (dd->ci[d] == dd->nc[d] - 1)
4567 rvec_dec(cm_new, state->box[d]);
4570 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4571 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4573 for (k = k0; (k < k1); k++)
4575 rvec_dec(state->x[k], state->box[d]);
4578 rotate_state_atom(state, k);
4583 else if (pos_d < cell_x0[d])
4585 if (pos_d < limit0[d])
4587 cg_move_error(fplog, dd, step, cg, d, -1, TRUE, limitd[d],
4588 cg_cm[cg], cm_new, pos_d);
4593 rvec_inc(cm_new, state->box[d]);
4596 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4597 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4599 for (k = k0; (k < k1); k++)
4601 rvec_inc(state->x[k], state->box[d]);
4604 rotate_state_atom(state, k);
4610 else if (d < npbcdim)
4612 /* Put the charge group in the rectangular unit-cell */
4613 while (cm_new[d] >= state->box[d][d])
4615 rvec_dec(cm_new, state->box[d]);
4616 for (k = k0; (k < k1); k++)
4618 rvec_dec(state->x[k], state->box[d]);
4621 while (cm_new[d] < 0)
4623 rvec_inc(cm_new, state->box[d]);
4624 for (k = k0; (k < k1); k++)
4626 rvec_inc(state->x[k], state->box[d]);
4632 copy_rvec(cm_new, cg_cm[cg]);
4634 /* Determine where this cg should go */
4637 for (d = 0; d < dd->ndim; d++)
4642 flag |= DD_FLAG_FW(d);
4648 else if (dev[dim] == -1)
4650 flag |= DD_FLAG_BW(d);
4653 if (dd->nc[dim] > 2)
4664 /* Temporarily store the flag in move */
4665 move[cg] = mc + flag;
4669 static void dd_redistribute_cg(FILE *fplog, gmx_large_int_t step,
4670 gmx_domdec_t *dd, ivec tric_dir,
4671 t_state *state, rvec **f,
4672 t_forcerec *fr, t_mdatoms *md,
4680 int ncg[DIM*2], nat[DIM*2];
4681 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4682 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4683 int sbuf[2], rbuf[2];
4684 int home_pos_cg, home_pos_at, buf_pos;
4686 gmx_bool bV = FALSE, bSDX = FALSE, bCGP = FALSE;
4689 real inv_ncg, pos_d;
4691 rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
4693 cginfo_mb_t *cginfo_mb;
4694 gmx_domdec_comm_t *comm;
4696 int nthread, thread;
4700 check_screw_box(state->box);
4704 if (fr->cutoff_scheme == ecutsGROUP)
4709 for (i = 0; i < estNR; i++)
4715 case estX: /* Always present */ break;
4716 case estV: bV = (state->flags & (1<<i)); break;
4717 case estSDX: bSDX = (state->flags & (1<<i)); break;
4718 case estCGP: bCGP = (state->flags & (1<<i)); break;
4721 case estDISRE_INITF:
4722 case estDISRE_RM3TAV:
4723 case estORIRE_INITF:
4725 /* No processing required */
4728 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4733 if (dd->ncg_tot > comm->nalloc_int)
4735 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4736 srenew(comm->buf_int, comm->nalloc_int);
4738 move = comm->buf_int;
4740 /* Clear the count */
4741 for (c = 0; c < dd->ndim*2; c++)
4747 npbcdim = dd->npbcdim;
4749 for (d = 0; (d < DIM); d++)
4751 limitd[d] = dd->comm->cellsize_min[d];
4752 if (d >= npbcdim && dd->ci[d] == 0)
4754 cell_x0[d] = -GMX_FLOAT_MAX;
4758 cell_x0[d] = comm->cell_x0[d];
4760 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4762 cell_x1[d] = GMX_FLOAT_MAX;
4766 cell_x1[d] = comm->cell_x1[d];
4770 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4771 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4775 /* We check after communication if a charge group moved
4776 * more than one cell. Set the pre-comm check limit to float_max.
4778 limit0[d] = -GMX_FLOAT_MAX;
4779 limit1[d] = GMX_FLOAT_MAX;
4783 make_tric_corr_matrix(npbcdim, state->box, tcm);
4785 cgindex = dd->cgindex;
4787 nthread = gmx_omp_nthreads_get(emntDomdec);
4789 /* Compute the center of geometry for all home charge groups
4790 * and put them in the box and determine where they should go.
4792 #pragma omp parallel for num_threads(nthread) schedule(static)
4793 for (thread = 0; thread < nthread; thread++)
4795 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4796 cell_x0, cell_x1, limitd, limit0, limit1,
4798 ( thread *dd->ncg_home)/nthread,
4799 ((thread+1)*dd->ncg_home)/nthread,
4800 fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
4804 for (cg = 0; cg < dd->ncg_home; cg++)
4809 flag = mc & ~DD_FLAG_NRCG;
4810 mc = mc & DD_FLAG_NRCG;
4813 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4815 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4816 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4818 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4819 /* We store the cg size in the lower 16 bits
4820 * and the place where the charge group should go
4821 * in the next 6 bits. This saves some communication volume.
4823 nrcg = cgindex[cg+1] - cgindex[cg];
4824 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4830 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4831 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4834 for (i = 0; i < dd->ndim*2; i++)
4836 *ncg_moved += ncg[i];
4853 /* Make sure the communication buffers are large enough */
4854 for (mc = 0; mc < dd->ndim*2; mc++)
4856 nvr = ncg[mc] + nat[mc]*nvec;
4857 if (nvr > comm->cgcm_state_nalloc[mc])
4859 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4860 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4864 switch (fr->cutoff_scheme)
4867 /* Recalculating cg_cm might be cheaper than communicating,
4868 * but that could give rise to rounding issues.
4871 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4872 nvec, cg_cm, comm, bCompact);
4875 /* Without charge groups we send the moved atom coordinates
4876 * over twice. This is so the code below can be used without
4877 * many conditionals for both for with and without charge groups.
4880 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4881 nvec, state->x, comm, FALSE);
4884 home_pos_cg -= *ncg_moved;
4888 gmx_incons("unimplemented");
4894 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4895 nvec, vec++, state->x, comm, bCompact);
4898 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4899 nvec, vec++, state->v, comm, bCompact);
4903 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4904 nvec, vec++, state->sd_X, comm, bCompact);
4908 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4909 nvec, vec++, state->cg_p, comm, bCompact);
4914 compact_ind(dd->ncg_home, move,
4915 dd->index_gl, dd->cgindex, dd->gatindex,
4916 dd->ga2la, comm->bLocalCG,
4921 if (fr->cutoff_scheme == ecutsVERLET)
4923 moved = get_moved(comm, dd->ncg_home);
4925 for (k = 0; k < dd->ncg_home; k++)
4932 moved = fr->ns.grid->cell_index;
4935 clear_and_mark_ind(dd->ncg_home, move,
4936 dd->index_gl, dd->cgindex, dd->gatindex,
4937 dd->ga2la, comm->bLocalCG,
4941 cginfo_mb = fr->cginfo_mb;
4943 *ncg_stay_home = home_pos_cg;
4944 for (d = 0; d < dd->ndim; d++)
4950 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4953 /* Communicate the cg and atom counts */
4958 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4959 d, dir, sbuf[0], sbuf[1]);
4961 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4963 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4965 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4966 srenew(comm->buf_int, comm->nalloc_int);
4969 /* Communicate the charge group indices, sizes and flags */
4970 dd_sendrecv_int(dd, d, dir,
4971 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4972 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4974 nvs = ncg[cdd] + nat[cdd]*nvec;
4975 i = rbuf[0] + rbuf[1] *nvec;
4976 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4978 /* Communicate cgcm and state */
4979 dd_sendrecv_rvec(dd, d, dir,
4980 comm->cgcm_state[cdd], nvs,
4981 comm->vbuf.v+nvr, i);
4982 ncg_recv += rbuf[0];
4983 nat_recv += rbuf[1];
4987 /* Process the received charge groups */
4989 for (cg = 0; cg < ncg_recv; cg++)
4991 flag = comm->buf_int[cg*DD_CGIBS+1];
4993 if (dim >= npbcdim && dd->nc[dim] > 2)
4995 /* No pbc in this dim and more than one domain boundary.
4996 * We do a separate check if a charge group didn't move too far.
4998 if (((flag & DD_FLAG_FW(d)) &&
4999 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
5000 ((flag & DD_FLAG_BW(d)) &&
5001 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
5003 cg_move_error(fplog, dd, step, cg, dim,
5004 (flag & DD_FLAG_FW(d)) ? 1 : 0,
5006 comm->vbuf.v[buf_pos],
5007 comm->vbuf.v[buf_pos],
5008 comm->vbuf.v[buf_pos][dim]);
5015 /* Check which direction this cg should go */
5016 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
5020 /* The cell boundaries for dimension d2 are not equal
5021 * for each cell row of the lower dimension(s),
5022 * therefore we might need to redetermine where
5023 * this cg should go.
5026 /* If this cg crosses the box boundary in dimension d2
5027 * we can use the communicated flag, so we do not
5028 * have to worry about pbc.
5030 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
5031 (flag & DD_FLAG_FW(d2))) ||
5032 (dd->ci[dim2] == 0 &&
5033 (flag & DD_FLAG_BW(d2)))))
5035 /* Clear the two flags for this dimension */
5036 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
5037 /* Determine the location of this cg
5038 * in lattice coordinates
5040 pos_d = comm->vbuf.v[buf_pos][dim2];
5043 for (d3 = dim2+1; d3 < DIM; d3++)
5046 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
5049 /* Check of we are not at the box edge.
5050 * pbc is only handled in the first step above,
5051 * but this check could move over pbc while
5052 * the first step did not due to different rounding.
5054 if (pos_d >= cell_x1[dim2] &&
5055 dd->ci[dim2] != dd->nc[dim2]-1)
5057 flag |= DD_FLAG_FW(d2);
5059 else if (pos_d < cell_x0[dim2] &&
5062 flag |= DD_FLAG_BW(d2);
5064 comm->buf_int[cg*DD_CGIBS+1] = flag;
5067 /* Set to which neighboring cell this cg should go */
5068 if (flag & DD_FLAG_FW(d2))
5072 else if (flag & DD_FLAG_BW(d2))
5074 if (dd->nc[dd->dim[d2]] > 2)
5086 nrcg = flag & DD_FLAG_NRCG;
5089 if (home_pos_cg+1 > dd->cg_nalloc)
5091 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
5092 srenew(dd->index_gl, dd->cg_nalloc);
5093 srenew(dd->cgindex, dd->cg_nalloc+1);
5095 /* Set the global charge group index and size */
5096 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
5097 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
5098 /* Copy the state from the buffer */
5099 dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
5100 if (fr->cutoff_scheme == ecutsGROUP)
5103 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
5107 /* Set the cginfo */
5108 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
5109 dd->index_gl[home_pos_cg]);
5112 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
5115 if (home_pos_at+nrcg > state->nalloc)
5117 dd_realloc_state(state, f, home_pos_at+nrcg);
5119 for (i = 0; i < nrcg; i++)
5121 copy_rvec(comm->vbuf.v[buf_pos++],
5122 state->x[home_pos_at+i]);
5126 for (i = 0; i < nrcg; i++)
5128 copy_rvec(comm->vbuf.v[buf_pos++],
5129 state->v[home_pos_at+i]);
5134 for (i = 0; i < nrcg; i++)
5136 copy_rvec(comm->vbuf.v[buf_pos++],
5137 state->sd_X[home_pos_at+i]);
5142 for (i = 0; i < nrcg; i++)
5144 copy_rvec(comm->vbuf.v[buf_pos++],
5145 state->cg_p[home_pos_at+i]);
5149 home_pos_at += nrcg;
5153 /* Reallocate the buffers if necessary */
5154 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
5156 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
5157 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
5159 nvr = ncg[mc] + nat[mc]*nvec;
5160 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
5162 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
5163 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
5165 /* Copy from the receive to the send buffers */
5166 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
5167 comm->buf_int + cg*DD_CGIBS,
5168 DD_CGIBS*sizeof(int));
5169 memcpy(comm->cgcm_state[mc][nvr],
5170 comm->vbuf.v[buf_pos],
5171 (1+nrcg*nvec)*sizeof(rvec));
5172 buf_pos += 1 + nrcg*nvec;
5179 /* With sorting (!bCompact) the indices are now only partially up to date
5180 * and ncg_home and nat_home are not the real count, since there are
5181 * "holes" in the arrays for the charge groups that moved to neighbors.
5183 if (fr->cutoff_scheme == ecutsVERLET)
5185 moved = get_moved(comm, home_pos_cg);
5187 for (i = dd->ncg_home; i < home_pos_cg; i++)
5192 dd->ncg_home = home_pos_cg;
5193 dd->nat_home = home_pos_at;
5198 "Finished repartitioning: cgs moved out %d, new home %d\n",
5199 *ncg_moved, dd->ncg_home-*ncg_moved);
5204 void dd_cycles_add(gmx_domdec_t *dd, float cycles, int ddCycl)
5206 dd->comm->cycl[ddCycl] += cycles;
5207 dd->comm->cycl_n[ddCycl]++;
5208 if (cycles > dd->comm->cycl_max[ddCycl])
5210 dd->comm->cycl_max[ddCycl] = cycles;
5214 static double force_flop_count(t_nrnb *nrnb)
5221 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
5223 /* To get closer to the real timings, we half the count
5224 * for the normal loops and again half it for water loops.
5227 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5229 sum += nrnb->n[i]*0.25*cost_nrnb(i);
5233 sum += nrnb->n[i]*0.50*cost_nrnb(i);
5236 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
5239 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5241 sum += nrnb->n[i]*cost_nrnb(i);
5244 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
5246 sum += nrnb->n[i]*cost_nrnb(i);
5252 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
5254 if (dd->comm->eFlop)
5256 dd->comm->flop -= force_flop_count(nrnb);
5259 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
5261 if (dd->comm->eFlop)
5263 dd->comm->flop += force_flop_count(nrnb);
5268 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
5272 for (i = 0; i < ddCyclNr; i++)
5274 dd->comm->cycl[i] = 0;
5275 dd->comm->cycl_n[i] = 0;
5276 dd->comm->cycl_max[i] = 0;
5279 dd->comm->flop_n = 0;
5282 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
5284 gmx_domdec_comm_t *comm;
5285 gmx_domdec_load_t *load;
5286 gmx_domdec_root_t *root = NULL;
5287 int d, dim, cid, i, pos;
5288 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
5293 fprintf(debug, "get_load_distribution start\n");
5296 wallcycle_start(wcycle, ewcDDCOMMLOAD);
5300 bSepPME = (dd->pme_nodeid >= 0);
5302 for (d = dd->ndim-1; d >= 0; d--)
5305 /* Check if we participate in the communication in this dimension */
5306 if (d == dd->ndim-1 ||
5307 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
5309 load = &comm->load[d];
5312 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
5315 if (d == dd->ndim-1)
5317 sbuf[pos++] = dd_force_load(comm);
5318 sbuf[pos++] = sbuf[0];
5321 sbuf[pos++] = sbuf[0];
5322 sbuf[pos++] = cell_frac;
5325 sbuf[pos++] = comm->cell_f_max0[d];
5326 sbuf[pos++] = comm->cell_f_min1[d];
5331 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
5332 sbuf[pos++] = comm->cycl[ddCyclPME];
5337 sbuf[pos++] = comm->load[d+1].sum;
5338 sbuf[pos++] = comm->load[d+1].max;
5341 sbuf[pos++] = comm->load[d+1].sum_m;
5342 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
5343 sbuf[pos++] = comm->load[d+1].flags;
5346 sbuf[pos++] = comm->cell_f_max0[d];
5347 sbuf[pos++] = comm->cell_f_min1[d];
5352 sbuf[pos++] = comm->load[d+1].mdf;
5353 sbuf[pos++] = comm->load[d+1].pme;
5357 /* Communicate a row in DD direction d.
5358 * The communicators are setup such that the root always has rank 0.
5361 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
5362 load->load, load->nload*sizeof(float), MPI_BYTE,
5363 0, comm->mpi_comm_load[d]);
5365 if (dd->ci[dim] == dd->master_ci[dim])
5367 /* We are the root, process this row */
5368 if (comm->bDynLoadBal)
5370 root = comm->root[d];
5380 for (i = 0; i < dd->nc[dim]; i++)
5382 load->sum += load->load[pos++];
5383 load->max = max(load->max, load->load[pos]);
5389 /* This direction could not be load balanced properly,
5390 * therefore we need to use the maximum iso the average load.
5392 load->sum_m = max(load->sum_m, load->load[pos]);
5396 load->sum_m += load->load[pos];
5399 load->cvol_min = min(load->cvol_min, load->load[pos]);
5403 load->flags = (int)(load->load[pos++] + 0.5);
5407 root->cell_f_max0[i] = load->load[pos++];
5408 root->cell_f_min1[i] = load->load[pos++];
5413 load->mdf = max(load->mdf, load->load[pos]);
5415 load->pme = max(load->pme, load->load[pos]);
5419 if (comm->bDynLoadBal && root->bLimited)
5421 load->sum_m *= dd->nc[dim];
5422 load->flags |= (1<<d);
5430 comm->nload += dd_load_count(comm);
5431 comm->load_step += comm->cycl[ddCyclStep];
5432 comm->load_sum += comm->load[0].sum;
5433 comm->load_max += comm->load[0].max;
5434 if (comm->bDynLoadBal)
5436 for (d = 0; d < dd->ndim; d++)
5438 if (comm->load[0].flags & (1<<d))
5440 comm->load_lim[d]++;
5446 comm->load_mdf += comm->load[0].mdf;
5447 comm->load_pme += comm->load[0].pme;
5451 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5455 fprintf(debug, "get_load_distribution finished\n");
5459 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5461 /* Return the relative performance loss on the total run time
5462 * due to the force calculation load imbalance.
5464 if (dd->comm->nload > 0)
5467 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5468 (dd->comm->load_step*dd->nnodes);
5476 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5479 int npp, npme, nnodes, d, limp;
5480 float imbal, pme_f_ratio, lossf, lossp = 0;
5482 gmx_domdec_comm_t *comm;
5485 if (DDMASTER(dd) && comm->nload > 0)
5488 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5489 nnodes = npp + npme;
5490 imbal = comm->load_max*npp/comm->load_sum - 1;
5491 lossf = dd_force_imb_perf_loss(dd);
5492 sprintf(buf, " Average load imbalance: %.1f %%\n", imbal*100);
5493 fprintf(fplog, "%s", buf);
5494 fprintf(stderr, "\n");
5495 fprintf(stderr, "%s", buf);
5496 sprintf(buf, " Part of the total run time spent waiting due to load imbalance: %.1f %%\n", lossf*100);
5497 fprintf(fplog, "%s", buf);
5498 fprintf(stderr, "%s", buf);
5500 if (comm->bDynLoadBal)
5502 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5503 for (d = 0; d < dd->ndim; d++)
5505 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5506 sprintf(buf+strlen(buf), " %c %d %%", dim2char(dd->dim[d]), limp);
5512 sprintf(buf+strlen(buf), "\n");
5513 fprintf(fplog, "%s", buf);
5514 fprintf(stderr, "%s", buf);
5518 pme_f_ratio = comm->load_pme/comm->load_mdf;
5519 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5522 lossp *= (float)npme/(float)nnodes;
5526 lossp *= (float)npp/(float)nnodes;
5528 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pme_f_ratio);
5529 fprintf(fplog, "%s", buf);
5530 fprintf(stderr, "%s", buf);
5531 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossp)*100);
5532 fprintf(fplog, "%s", buf);
5533 fprintf(stderr, "%s", buf);
5535 fprintf(fplog, "\n");
5536 fprintf(stderr, "\n");
5538 if (lossf >= DD_PERF_LOSS)
5541 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5542 " in the domain decomposition.\n", lossf*100);
5543 if (!comm->bDynLoadBal)
5545 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5549 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5551 fprintf(fplog, "%s\n", buf);
5552 fprintf(stderr, "%s\n", buf);
5554 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS)
5557 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5558 " had %s work to do than the PP nodes.\n"
5559 " You might want to %s the number of PME nodes\n"
5560 " or %s the cut-off and the grid spacing.\n",
5562 (lossp < 0) ? "less" : "more",
5563 (lossp < 0) ? "decrease" : "increase",
5564 (lossp < 0) ? "decrease" : "increase");
5565 fprintf(fplog, "%s\n", buf);
5566 fprintf(stderr, "%s\n", buf);
5571 static float dd_vol_min(gmx_domdec_t *dd)
5573 return dd->comm->load[0].cvol_min*dd->nnodes;
5576 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5578 return dd->comm->load[0].flags;
5581 static float dd_f_imbal(gmx_domdec_t *dd)
5583 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5586 float dd_pme_f_ratio(gmx_domdec_t *dd)
5588 if (dd->comm->cycl_n[ddCyclPME] > 0)
5590 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5598 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_large_int_t step)
5603 flags = dd_load_flags(dd);
5607 "DD load balancing is limited by minimum cell size in dimension");
5608 for (d = 0; d < dd->ndim; d++)
5612 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5615 fprintf(fplog, "\n");
5617 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5618 if (dd->comm->bDynLoadBal)
5620 fprintf(fplog, " vol min/aver %5.3f%c",
5621 dd_vol_min(dd), flags ? '!' : ' ');
5623 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5624 if (dd->comm->cycl_n[ddCyclPME])
5626 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5628 fprintf(fplog, "\n\n");
5631 static void dd_print_load_verbose(gmx_domdec_t *dd)
5633 if (dd->comm->bDynLoadBal)
5635 fprintf(stderr, "vol %4.2f%c ",
5636 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5638 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5639 if (dd->comm->cycl_n[ddCyclPME])
5641 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5646 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5651 gmx_domdec_root_t *root;
5652 gmx_bool bPartOfGroup = FALSE;
5654 dim = dd->dim[dim_ind];
5655 copy_ivec(loc, loc_c);
5656 for (i = 0; i < dd->nc[dim]; i++)
5659 rank = dd_index(dd->nc, loc_c);
5660 if (rank == dd->rank)
5662 /* This process is part of the group */
5663 bPartOfGroup = TRUE;
5666 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5670 dd->comm->mpi_comm_load[dim_ind] = c_row;
5671 if (dd->comm->eDLB != edlbNO)
5673 if (dd->ci[dim] == dd->master_ci[dim])
5675 /* This is the root process of this row */
5676 snew(dd->comm->root[dim_ind], 1);
5677 root = dd->comm->root[dim_ind];
5678 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5679 snew(root->old_cell_f, dd->nc[dim]+1);
5680 snew(root->bCellMin, dd->nc[dim]);
5683 snew(root->cell_f_max0, dd->nc[dim]);
5684 snew(root->cell_f_min1, dd->nc[dim]);
5685 snew(root->bound_min, dd->nc[dim]);
5686 snew(root->bound_max, dd->nc[dim]);
5688 snew(root->buf_ncd, dd->nc[dim]);
5692 /* This is not a root process, we only need to receive cell_f */
5693 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5696 if (dd->ci[dim] == dd->master_ci[dim])
5698 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5704 void dd_setup_dlb_resource_sharing(t_commrec *cr,
5705 const gmx_hw_info_t *hwinfo,
5706 const gmx_hw_opt_t *hw_opt)
5709 int physicalnode_id_hash;
5712 MPI_Comm mpi_comm_pp_physicalnode;
5714 if (!(cr->duty & DUTY_PP) ||
5715 hw_opt->gpu_opt.ncuda_dev_use == 0)
5717 /* Only PP nodes (currently) use GPUs.
5718 * If we don't have GPUs, there are no resources to share.
5723 physicalnode_id_hash = gmx_physicalnode_id_hash();
5725 gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
5731 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5732 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5733 dd->rank, physicalnode_id_hash, gpu_id);
5735 /* Split the PP communicator over the physical nodes */
5736 /* TODO: See if we should store this (before), as it's also used for
5737 * for the nodecomm summution.
5739 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5740 &mpi_comm_pp_physicalnode);
5741 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5742 &dd->comm->mpi_comm_gpu_shared);
5743 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5744 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5748 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5751 /* Note that some ranks could share a GPU, while others don't */
5753 if (dd->comm->nrank_gpu_shared == 1)
5755 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5760 static void make_load_communicators(gmx_domdec_t *dd)
5763 int dim0, dim1, i, j;
5768 fprintf(debug, "Making load communicators\n");
5771 snew(dd->comm->load, dd->ndim);
5772 snew(dd->comm->mpi_comm_load, dd->ndim);
5775 make_load_communicator(dd, 0, loc);
5779 for (i = 0; i < dd->nc[dim0]; i++)
5782 make_load_communicator(dd, 1, loc);
5788 for (i = 0; i < dd->nc[dim0]; i++)
5792 for (j = 0; j < dd->nc[dim1]; j++)
5795 make_load_communicator(dd, 2, loc);
5802 fprintf(debug, "Finished making load communicators\n");
5807 void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
5810 int d, dim, i, j, m;
5813 ivec dd_zp[DD_MAXIZONE];
5814 gmx_domdec_zones_t *zones;
5815 gmx_domdec_ns_ranges_t *izone;
5817 for (d = 0; d < dd->ndim; d++)
5820 copy_ivec(dd->ci, tmp);
5821 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5822 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5823 copy_ivec(dd->ci, tmp);
5824 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5825 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5828 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5831 dd->neighbor[d][1]);
5837 fprintf(fplog, "\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5839 dd->nc[XX], dd->nc[YY], dd->nc[ZZ],
5840 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5847 for (i = 0; i < nzonep; i++)
5849 copy_ivec(dd_zp3[i], dd_zp[i]);
5855 for (i = 0; i < nzonep; i++)
5857 copy_ivec(dd_zp2[i], dd_zp[i]);
5863 for (i = 0; i < nzonep; i++)
5865 copy_ivec(dd_zp1[i], dd_zp[i]);
5869 gmx_fatal(FARGS, "Can only do 1, 2 or 3D domain decomposition");
5874 zones = &dd->comm->zones;
5876 for (i = 0; i < nzone; i++)
5879 clear_ivec(zones->shift[i]);
5880 for (d = 0; d < dd->ndim; d++)
5882 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5887 for (i = 0; i < nzone; i++)
5889 for (d = 0; d < DIM; d++)
5891 s[d] = dd->ci[d] - zones->shift[i][d];
5896 else if (s[d] >= dd->nc[d])
5902 zones->nizone = nzonep;
5903 for (i = 0; i < zones->nizone; i++)
5905 if (dd_zp[i][0] != i)
5907 gmx_fatal(FARGS, "Internal inconsistency in the dd grid setup");
5909 izone = &zones->izone[i];
5910 izone->j0 = dd_zp[i][1];
5911 izone->j1 = dd_zp[i][2];
5912 for (dim = 0; dim < DIM; dim++)
5914 if (dd->nc[dim] == 1)
5916 /* All shifts should be allowed */
5917 izone->shift0[dim] = -1;
5918 izone->shift1[dim] = 1;
5923 izone->shift0[d] = 0;
5924 izone->shift1[d] = 0;
5925 for(j=izone->j0; j<izone->j1; j++) {
5926 if (dd->shift[j][d] > dd->shift[i][d])
5927 izone->shift0[d] = -1;
5928 if (dd->shift[j][d] < dd->shift[i][d])
5929 izone->shift1[d] = 1;
5935 /* Assume the shift are not more than 1 cell */
5936 izone->shift0[dim] = 1;
5937 izone->shift1[dim] = -1;
5938 for (j = izone->j0; j < izone->j1; j++)
5940 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5941 if (shift_diff < izone->shift0[dim])
5943 izone->shift0[dim] = shift_diff;
5945 if (shift_diff > izone->shift1[dim])
5947 izone->shift1[dim] = shift_diff;
5954 if (dd->comm->eDLB != edlbNO)
5956 snew(dd->comm->root, dd->ndim);
5959 if (dd->comm->bRecordLoad)
5961 make_load_communicators(dd);
5965 static void make_pp_communicator(FILE *fplog, t_commrec *cr, int reorder)
5968 gmx_domdec_comm_t *comm;
5979 if (comm->bCartesianPP)
5981 /* Set up cartesian communication for the particle-particle part */
5984 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5985 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5988 for (i = 0; i < DIM; i++)
5992 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5994 /* We overwrite the old communicator with the new cartesian one */
5995 cr->mpi_comm_mygroup = comm_cart;
5998 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5999 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
6001 if (comm->bCartesianPP_PME)
6003 /* Since we want to use the original cartesian setup for sim,
6004 * and not the one after split, we need to make an index.
6006 snew(comm->ddindex2ddnodeid, dd->nnodes);
6007 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
6008 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
6009 /* Get the rank of the DD master,
6010 * above we made sure that the master node is a PP node.
6020 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
6022 else if (comm->bCartesianPP)
6024 if (cr->npmenodes == 0)
6026 /* The PP communicator is also
6027 * the communicator for this simulation
6029 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
6031 cr->nodeid = dd->rank;
6033 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
6035 /* We need to make an index to go from the coordinates
6036 * to the nodeid of this simulation.
6038 snew(comm->ddindex2simnodeid, dd->nnodes);
6039 snew(buf, dd->nnodes);
6040 if (cr->duty & DUTY_PP)
6042 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
6044 /* Communicate the ddindex to simulation nodeid index */
6045 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6046 cr->mpi_comm_mysim);
6049 /* Determine the master coordinates and rank.
6050 * The DD master should be the same node as the master of this sim.
6052 for (i = 0; i < dd->nnodes; i++)
6054 if (comm->ddindex2simnodeid[i] == 0)
6056 ddindex2xyz(dd->nc, i, dd->master_ci);
6057 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
6062 fprintf(debug, "The master rank is %d\n", dd->masterrank);
6067 /* No Cartesian communicators */
6068 /* We use the rank in dd->comm->all as DD index */
6069 ddindex2xyz(dd->nc, dd->rank, dd->ci);
6070 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
6072 clear_ivec(dd->master_ci);
6079 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6080 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6085 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6086 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6090 static void receive_ddindex2simnodeid(t_commrec *cr)
6094 gmx_domdec_comm_t *comm;
6101 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
6103 snew(comm->ddindex2simnodeid, dd->nnodes);
6104 snew(buf, dd->nnodes);
6105 if (cr->duty & DUTY_PP)
6107 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
6110 /* Communicate the ddindex to simulation nodeid index */
6111 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6112 cr->mpi_comm_mysim);
6119 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
6120 int ncg, int natoms)
6122 gmx_domdec_master_t *ma;
6127 snew(ma->ncg, dd->nnodes);
6128 snew(ma->index, dd->nnodes+1);
6130 snew(ma->nat, dd->nnodes);
6131 snew(ma->ibuf, dd->nnodes*2);
6132 snew(ma->cell_x, DIM);
6133 for (i = 0; i < DIM; i++)
6135 snew(ma->cell_x[i], dd->nc[i]+1);
6138 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
6144 snew(ma->vbuf, natoms);
6150 static void split_communicator(FILE *fplog, t_commrec *cr, int dd_node_order,
6154 gmx_domdec_comm_t *comm;
6165 if (comm->bCartesianPP)
6167 for (i = 1; i < DIM; i++)
6169 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
6171 if (bDiv[YY] || bDiv[ZZ])
6173 comm->bCartesianPP_PME = TRUE;
6174 /* If we have 2D PME decomposition, which is always in x+y,
6175 * we stack the PME only nodes in z.
6176 * Otherwise we choose the direction that provides the thinnest slab
6177 * of PME only nodes as this will have the least effect
6178 * on the PP communication.
6179 * But for the PME communication the opposite might be better.
6181 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
6183 dd->nc[YY] > dd->nc[ZZ]))
6185 comm->cartpmedim = ZZ;
6189 comm->cartpmedim = YY;
6191 comm->ntot[comm->cartpmedim]
6192 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
6196 fprintf(fplog, "#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
6198 "Will not use a Cartesian communicator for PP <-> PME\n\n");
6203 if (comm->bCartesianPP_PME)
6207 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
6210 for (i = 0; i < DIM; i++)
6214 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
6217 MPI_Comm_rank(comm_cart, &rank);
6218 if (MASTERNODE(cr) && rank != 0)
6220 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
6223 /* With this assigment we loose the link to the original communicator
6224 * which will usually be MPI_COMM_WORLD, unless have multisim.
6226 cr->mpi_comm_mysim = comm_cart;
6227 cr->sim_nodeid = rank;
6229 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
6233 fprintf(fplog, "Cartesian nodeid %d, coordinates %d %d %d\n\n",
6234 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6237 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
6241 if (cr->npmenodes == 0 ||
6242 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
6244 cr->duty = DUTY_PME;
6247 /* Split the sim communicator into PP and PME only nodes */
6248 MPI_Comm_split(cr->mpi_comm_mysim,
6250 dd_index(comm->ntot, dd->ci),
6251 &cr->mpi_comm_mygroup);
6255 switch (dd_node_order)
6260 fprintf(fplog, "Order of the nodes: PP first, PME last\n");
6263 case ddnoINTERLEAVE:
6264 /* Interleave the PP-only and PME-only nodes,
6265 * as on clusters with dual-core machines this will double
6266 * the communication bandwidth of the PME processes
6267 * and thus speed up the PP <-> PME and inter PME communication.
6271 fprintf(fplog, "Interleaving PP and PME nodes\n");
6273 comm->pmenodes = dd_pmenodes(cr);
6278 gmx_fatal(FARGS, "Unknown dd_node_order=%d", dd_node_order);
6281 if (dd_simnode2pmenode(cr, cr->sim_nodeid) == -1)
6283 cr->duty = DUTY_PME;
6290 /* Split the sim communicator into PP and PME only nodes */
6291 MPI_Comm_split(cr->mpi_comm_mysim,
6294 &cr->mpi_comm_mygroup);
6295 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
6301 fprintf(fplog, "This is a %s only node\n\n",
6302 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
6306 void make_dd_communicators(FILE *fplog, t_commrec *cr, int dd_node_order)
6309 gmx_domdec_comm_t *comm;
6315 copy_ivec(dd->nc, comm->ntot);
6317 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
6318 comm->bCartesianPP_PME = FALSE;
6320 /* Reorder the nodes by default. This might change the MPI ranks.
6321 * Real reordering is only supported on very few architectures,
6322 * Blue Gene is one of them.
6324 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
6326 if (cr->npmenodes > 0)
6328 /* Split the communicator into a PP and PME part */
6329 split_communicator(fplog, cr, dd_node_order, CartReorder);
6330 if (comm->bCartesianPP_PME)
6332 /* We (possibly) reordered the nodes in split_communicator,
6333 * so it is no longer required in make_pp_communicator.
6335 CartReorder = FALSE;
6340 /* All nodes do PP and PME */
6342 /* We do not require separate communicators */
6343 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
6347 if (cr->duty & DUTY_PP)
6349 /* Copy or make a new PP communicator */
6350 make_pp_communicator(fplog, cr, CartReorder);
6354 receive_ddindex2simnodeid(cr);
6357 if (!(cr->duty & DUTY_PME))
6359 /* Set up the commnuication to our PME node */
6360 dd->pme_nodeid = dd_simnode2pmenode(cr, cr->sim_nodeid);
6361 dd->pme_receive_vir_ener = receive_vir_ener(cr);
6364 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
6365 dd->pme_nodeid, dd->pme_receive_vir_ener);
6370 dd->pme_nodeid = -1;
6375 dd->ma = init_gmx_domdec_master_t(dd,
6377 comm->cgs_gl.index[comm->cgs_gl.nr]);
6381 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6383 real *slb_frac, tot;
6388 if (nc > 1 && size_string != NULL)
6392 fprintf(fplog, "Using static load balancing for the %s direction\n",
6397 for (i = 0; i < nc; i++)
6400 sscanf(size_string, "%lf%n", &dbl, &n);
6403 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6412 fprintf(fplog, "Relative cell sizes:");
6414 for (i = 0; i < nc; i++)
6419 fprintf(fplog, " %5.3f", slb_frac[i]);
6424 fprintf(fplog, "\n");
6431 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
6434 gmx_mtop_ilistloop_t iloop;
6438 iloop = gmx_mtop_ilistloop_init(mtop);
6439 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6441 for (ftype = 0; ftype < F_NRE; ftype++)
6443 if ((interaction_function[ftype].flags & IF_BOND) &&
6446 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6454 static int dd_nst_env(FILE *fplog, const char *env_var, int def)
6460 val = getenv(env_var);
6463 if (sscanf(val, "%d", &nst) <= 0)
6469 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6477 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6481 fprintf(stderr, "\n%s\n", warn_string);
6485 fprintf(fplog, "\n%s\n", warn_string);
6489 static void check_dd_restrictions(t_commrec *cr, gmx_domdec_t *dd,
6490 t_inputrec *ir, FILE *fplog)
6492 if (ir->ePBC == epbcSCREW &&
6493 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6495 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6498 if (ir->ns_type == ensSIMPLE)
6500 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
6503 if (ir->nstlist == 0)
6505 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6508 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6510 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6514 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6519 r = ddbox->box_size[XX];
6520 for (di = 0; di < dd->ndim; di++)
6523 /* Check using the initial average cell size */
6524 r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6530 static int check_dlb_support(FILE *fplog, t_commrec *cr,
6531 const char *dlb_opt, gmx_bool bRecordLoad,
6532 unsigned long Flags, t_inputrec *ir)
6540 case 'a': eDLB = edlbAUTO; break;
6541 case 'n': eDLB = edlbNO; break;
6542 case 'y': eDLB = edlbYES; break;
6543 default: gmx_incons("Unknown dlb_opt");
6546 if (Flags & MD_RERUN)
6551 if (!EI_DYNAMICS(ir->eI))
6553 if (eDLB == edlbYES)
6555 sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
6556 dd_warning(cr, fplog, buf);
6564 dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6569 if (Flags & MD_REPRODUCIBLE)
6576 dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
6580 dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6583 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", eDLB);
6591 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6596 if (getenv("GMX_DD_ORDER_ZYX") != NULL)
6598 /* Decomposition order z,y,x */
6601 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6603 for (dim = DIM-1; dim >= 0; dim--)
6605 if (dd->nc[dim] > 1)
6607 dd->dim[dd->ndim++] = dim;
6613 /* Decomposition order x,y,z */
6614 for (dim = 0; dim < DIM; dim++)
6616 if (dd->nc[dim] > 1)
6618 dd->dim[dd->ndim++] = dim;
6624 static gmx_domdec_comm_t *init_dd_comm()
6626 gmx_domdec_comm_t *comm;
6630 snew(comm->cggl_flag, DIM*2);
6631 snew(comm->cgcm_state, DIM*2);
6632 for (i = 0; i < DIM*2; i++)
6634 comm->cggl_flag_nalloc[i] = 0;
6635 comm->cgcm_state_nalloc[i] = 0;
6638 comm->nalloc_int = 0;
6639 comm->buf_int = NULL;
6641 vec_rvec_init(&comm->vbuf);
6643 comm->n_load_have = 0;
6644 comm->n_load_collect = 0;
6646 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6648 comm->sum_nat[i] = 0;
6652 comm->load_step = 0;
6655 clear_ivec(comm->load_lim);
6662 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
6663 unsigned long Flags,
6665 real comm_distance_min, real rconstr,
6666 const char *dlb_opt, real dlb_scale,
6667 const char *sizex, const char *sizey, const char *sizez,
6668 gmx_mtop_t *mtop, t_inputrec *ir,
6669 matrix box, rvec *x,
6671 int *npme_x, int *npme_y)
6674 gmx_domdec_comm_t *comm;
6677 real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
6684 "\nInitializing Domain Decomposition on %d nodes\n", cr->nnodes);
6689 dd->comm = init_dd_comm();
6691 snew(comm->cggl_flag, DIM*2);
6692 snew(comm->cgcm_state, DIM*2);
6694 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6695 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6697 dd->bSendRecv2 = dd_nst_env(fplog, "GMX_DD_SENDRECV2", 0);
6698 comm->dlb_scale_lim = dd_nst_env(fplog, "GMX_DLB_MAX", 10);
6699 comm->eFlop = dd_nst_env(fplog, "GMX_DLB_FLOP", 0);
6700 recload = dd_nst_env(fplog, "GMX_DD_LOAD", 1);
6701 comm->nstSortCG = dd_nst_env(fplog, "GMX_DD_SORT", 1);
6702 comm->nstDDDump = dd_nst_env(fplog, "GMX_DD_DUMP", 0);
6703 comm->nstDDDumpGrid = dd_nst_env(fplog, "GMX_DD_DUMP_GRID", 0);
6704 comm->DD_debug = dd_nst_env(fplog, "GMX_DD_DEBUG", 0);
6706 dd->pme_recv_f_alloc = 0;
6707 dd->pme_recv_f_buf = NULL;
6709 if (dd->bSendRecv2 && fplog)
6711 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6717 fprintf(fplog, "Will load balance based on FLOP count\n");
6719 if (comm->eFlop > 1)
6721 srand(1+cr->nodeid);
6723 comm->bRecordLoad = TRUE;
6727 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6731 /* Initialize to GPU share count to 0, might change later */
6732 comm->nrank_gpu_shared = 0;
6734 comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
6736 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6739 fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
6741 dd->bGridJump = comm->bDynLoadBal;
6742 comm->bPMELoadBalDLBLimits = FALSE;
6744 if (comm->nstSortCG)
6748 if (comm->nstSortCG == 1)
6750 fprintf(fplog, "Will sort the charge groups at every domain (re)decomposition\n");
6754 fprintf(fplog, "Will sort the charge groups every %d steps\n",
6758 snew(comm->sort, 1);
6764 fprintf(fplog, "Will not sort the charge groups\n");
6768 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6770 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6771 if (comm->bInterCGBondeds)
6773 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6777 comm->bInterCGMultiBody = FALSE;
6780 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6781 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6783 if (ir->rlistlong == 0)
6785 /* Set the cut-off to some very large value,
6786 * so we don't need if statements everywhere in the code.
6787 * We use sqrt, since the cut-off is squared in some places.
6789 comm->cutoff = GMX_CUTOFF_INF;
6793 comm->cutoff = ir->rlistlong;
6795 comm->cutoff_mbody = 0;
6797 comm->cellsize_limit = 0;
6798 comm->bBondComm = FALSE;
6800 if (comm->bInterCGBondeds)
6802 if (comm_distance_min > 0)
6804 comm->cutoff_mbody = comm_distance_min;
6805 if (Flags & MD_DDBONDCOMM)
6807 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6811 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6813 r_bonded_limit = comm->cutoff_mbody;
6815 else if (ir->bPeriodicMols)
6817 /* Can not easily determine the required cut-off */
6818 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6819 comm->cutoff_mbody = comm->cutoff/2;
6820 r_bonded_limit = comm->cutoff_mbody;
6826 dd_bonded_cg_distance(fplog, dd, mtop, ir, x, box,
6827 Flags & MD_DDBONDCHECK, &r_2b, &r_mb);
6829 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6830 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6832 /* We use an initial margin of 10% for the minimum cell size,
6833 * except when we are just below the non-bonded cut-off.
6835 if (Flags & MD_DDBONDCOMM)
6837 if (max(r_2b, r_mb) > comm->cutoff)
6839 r_bonded = max(r_2b, r_mb);
6840 r_bonded_limit = 1.1*r_bonded;
6841 comm->bBondComm = TRUE;
6846 r_bonded_limit = min(1.1*r_bonded, comm->cutoff);
6848 /* We determine cutoff_mbody later */
6852 /* No special bonded communication,
6853 * simply increase the DD cut-off.
6855 r_bonded_limit = 1.1*max(r_2b, r_mb);
6856 comm->cutoff_mbody = r_bonded_limit;
6857 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6860 comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit);
6864 "Minimum cell size due to bonded interactions: %.3f nm\n",
6865 comm->cellsize_limit);
6869 if (dd->bInterCGcons && rconstr <= 0)
6871 /* There is a cell size limit due to the constraints (P-LINCS) */
6872 rconstr = constr_r_max(fplog, mtop, ir);
6876 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6878 if (rconstr > comm->cellsize_limit)
6880 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6884 else if (rconstr > 0 && fplog)
6886 /* Here we do not check for dd->bInterCGcons,
6887 * because one can also set a cell size limit for virtual sites only
6888 * and at this point we don't know yet if there are intercg v-sites.
6891 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6894 comm->cellsize_limit = max(comm->cellsize_limit, rconstr);
6896 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6900 copy_ivec(nc, dd->nc);
6901 set_dd_dim(fplog, dd);
6902 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, x, ddbox);
6904 if (cr->npmenodes == -1)
6908 acs = average_cellsize_min(dd, ddbox);
6909 if (acs < comm->cellsize_limit)
6913 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6915 gmx_fatal_collective(FARGS, cr, NULL,
6916 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6917 acs, comm->cellsize_limit);
6922 set_ddbox_cr(cr, NULL, ir, box, &comm->cgs_gl, x, ddbox);
6924 /* We need to choose the optimal DD grid and possibly PME nodes */
6925 limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6926 comm->eDLB != edlbNO, dlb_scale,
6927 comm->cellsize_limit, comm->cutoff,
6928 comm->bInterCGBondeds, comm->bInterCGMultiBody);
6930 if (dd->nc[XX] == 0)
6932 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6933 sprintf(buf, "Change the number of nodes or mdrun option %s%s%s",
6934 !bC ? "-rdd" : "-rcon",
6935 comm->eDLB != edlbNO ? " or -dds" : "",
6936 bC ? " or your LINCS settings" : "");
6938 gmx_fatal_collective(FARGS, cr, NULL,
6939 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6941 "Look in the log file for details on the domain decomposition",
6942 cr->nnodes-cr->npmenodes, limit, buf);
6944 set_dd_dim(fplog, dd);
6950 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6951 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6954 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6955 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6957 gmx_fatal_collective(FARGS, cr, NULL,
6958 "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6959 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6961 if (cr->npmenodes > dd->nnodes)
6963 gmx_fatal_collective(FARGS, cr, NULL,
6964 "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6966 if (cr->npmenodes > 0)
6968 comm->npmenodes = cr->npmenodes;
6972 comm->npmenodes = dd->nnodes;
6975 if (EEL_PME(ir->coulombtype))
6977 /* The following choices should match those
6978 * in comm_cost_est in domdec_setup.c.
6979 * Note that here the checks have to take into account
6980 * that the decomposition might occur in a different order than xyz
6981 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6982 * in which case they will not match those in comm_cost_est,
6983 * but since that is mainly for testing purposes that's fine.
6985 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6986 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6987 getenv("GMX_PMEONEDD") == NULL)
6989 comm->npmedecompdim = 2;
6990 comm->npmenodes_x = dd->nc[XX];
6991 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6995 /* In case nc is 1 in both x and y we could still choose to
6996 * decompose pme in y instead of x, but we use x for simplicity.
6998 comm->npmedecompdim = 1;
6999 if (dd->dim[0] == YY)
7001 comm->npmenodes_x = 1;
7002 comm->npmenodes_y = comm->npmenodes;
7006 comm->npmenodes_x = comm->npmenodes;
7007 comm->npmenodes_y = 1;
7012 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
7013 comm->npmenodes_x, comm->npmenodes_y, 1);
7018 comm->npmedecompdim = 0;
7019 comm->npmenodes_x = 0;
7020 comm->npmenodes_y = 0;
7023 /* Technically we don't need both of these,
7024 * but it simplifies code not having to recalculate it.
7026 *npme_x = comm->npmenodes_x;
7027 *npme_y = comm->npmenodes_y;
7029 snew(comm->slb_frac, DIM);
7030 if (comm->eDLB == edlbNO)
7032 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
7033 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
7034 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], sizez);
7037 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
7039 if (comm->bBondComm || comm->eDLB != edlbNO)
7041 /* Set the bonded communication distance to halfway
7042 * the minimum and the maximum,
7043 * since the extra communication cost is nearly zero.
7045 acs = average_cellsize_min(dd, ddbox);
7046 comm->cutoff_mbody = 0.5*(r_bonded + acs);
7047 if (comm->eDLB != edlbNO)
7049 /* Check if this does not limit the scaling */
7050 comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs);
7052 if (!comm->bBondComm)
7054 /* Without bBondComm do not go beyond the n.b. cut-off */
7055 comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff);
7056 if (comm->cellsize_limit >= comm->cutoff)
7058 /* We don't loose a lot of efficieny
7059 * when increasing it to the n.b. cut-off.
7060 * It can even be slightly faster, because we need
7061 * less checks for the communication setup.
7063 comm->cutoff_mbody = comm->cutoff;
7066 /* Check if we did not end up below our original limit */
7067 comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit);
7069 if (comm->cutoff_mbody > comm->cellsize_limit)
7071 comm->cellsize_limit = comm->cutoff_mbody;
7074 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
7079 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
7080 "cellsize limit %f\n",
7081 comm->bBondComm, comm->cellsize_limit);
7086 check_dd_restrictions(cr, dd, ir, fplog);
7089 comm->partition_step = INT_MIN;
7092 clear_dd_cycle_counts(dd);
7097 static void set_dlb_limits(gmx_domdec_t *dd)
7102 for (d = 0; d < dd->ndim; d++)
7104 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
7105 dd->comm->cellsize_min[dd->dim[d]] =
7106 dd->comm->cellsize_min_dlb[dd->dim[d]];
7111 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_large_int_t step)
7114 gmx_domdec_comm_t *comm;
7124 fprintf(fplog, "At step %s the performance loss due to force load imbalance is %.1f %%\n", gmx_step_str(step, buf), dd_force_imb_perf_loss(dd)*100);
7127 cellsize_min = comm->cellsize_min[dd->dim[0]];
7128 for (d = 1; d < dd->ndim; d++)
7130 cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
7133 if (cellsize_min < comm->cellsize_limit*1.05)
7135 dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
7137 /* Change DLB from "auto" to "no". */
7138 comm->eDLB = edlbNO;
7143 dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
7144 comm->bDynLoadBal = TRUE;
7145 dd->bGridJump = TRUE;
7149 /* We can set the required cell size info here,
7150 * so we do not need to communicate this.
7151 * The grid is completely uniform.
7153 for (d = 0; d < dd->ndim; d++)
7157 comm->load[d].sum_m = comm->load[d].sum;
7159 nc = dd->nc[dd->dim[d]];
7160 for (i = 0; i < nc; i++)
7162 comm->root[d]->cell_f[i] = i/(real)nc;
7165 comm->root[d]->cell_f_max0[i] = i /(real)nc;
7166 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
7169 comm->root[d]->cell_f[nc] = 1.0;
7174 static char *init_bLocalCG(gmx_mtop_t *mtop)
7179 ncg = ncg_mtop(mtop);
7180 snew(bLocalCG, ncg);
7181 for (cg = 0; cg < ncg; cg++)
7183 bLocalCG[cg] = FALSE;
7189 void dd_init_bondeds(FILE *fplog,
7190 gmx_domdec_t *dd, gmx_mtop_t *mtop,
7191 gmx_vsite_t *vsite, gmx_constr_t constr,
7192 t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
7194 gmx_domdec_comm_t *comm;
7198 dd_make_reverse_top(fplog, dd, mtop, vsite, constr, ir, bBCheck);
7202 if (comm->bBondComm)
7204 /* Communicate atoms beyond the cut-off for bonded interactions */
7207 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
7209 comm->bLocalCG = init_bLocalCG(mtop);
7213 /* Only communicate atoms based on cut-off */
7214 comm->cglink = NULL;
7215 comm->bLocalCG = NULL;
7219 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
7221 gmx_bool bDynLoadBal, real dlb_scale,
7224 gmx_domdec_comm_t *comm;
7239 fprintf(fplog, "The maximum number of communication pulses is:");
7240 for (d = 0; d < dd->ndim; d++)
7242 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
7244 fprintf(fplog, "\n");
7245 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
7246 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
7247 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
7248 for (d = 0; d < DIM; d++)
7252 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
7259 comm->cellsize_min_dlb[d]/
7260 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
7262 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
7265 fprintf(fplog, "\n");
7269 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
7270 fprintf(fplog, "The initial number of communication pulses is:");
7271 for (d = 0; d < dd->ndim; d++)
7273 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
7275 fprintf(fplog, "\n");
7276 fprintf(fplog, "The initial domain decomposition cell size is:");
7277 for (d = 0; d < DIM; d++)
7281 fprintf(fplog, " %c %.2f nm",
7282 dim2char(d), dd->comm->cellsize_min[d]);
7285 fprintf(fplog, "\n\n");
7288 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
7290 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
7291 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7292 "non-bonded interactions", "", comm->cutoff);
7296 limit = dd->comm->cellsize_limit;
7300 if (dynamic_dd_box(ddbox, ir))
7302 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
7304 limit = dd->comm->cellsize_min[XX];
7305 for (d = 1; d < DIM; d++)
7307 limit = min(limit, dd->comm->cellsize_min[d]);
7311 if (comm->bInterCGBondeds)
7313 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7314 "two-body bonded interactions", "(-rdd)",
7315 max(comm->cutoff, comm->cutoff_mbody));
7316 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7317 "multi-body bonded interactions", "(-rdd)",
7318 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit));
7322 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7323 "virtual site constructions", "(-rcon)", limit);
7325 if (dd->constraint_comm)
7327 sprintf(buf, "atoms separated by up to %d constraints",
7329 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7330 buf, "(-rcon)", limit);
7332 fprintf(fplog, "\n");
7338 static void set_cell_limits_dlb(gmx_domdec_t *dd,
7340 const t_inputrec *ir,
7341 const gmx_ddbox_t *ddbox)
7343 gmx_domdec_comm_t *comm;
7344 int d, dim, npulse, npulse_d_max, npulse_d;
7349 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7351 /* Determine the maximum number of comm. pulses in one dimension */
7353 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7355 /* Determine the maximum required number of grid pulses */
7356 if (comm->cellsize_limit >= comm->cutoff)
7358 /* Only a single pulse is required */
7361 else if (!bNoCutOff && comm->cellsize_limit > 0)
7363 /* We round down slightly here to avoid overhead due to the latency
7364 * of extra communication calls when the cut-off
7365 * would be only slightly longer than the cell size.
7366 * Later cellsize_limit is redetermined,
7367 * so we can not miss interactions due to this rounding.
7369 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7373 /* There is no cell size limit */
7374 npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7377 if (!bNoCutOff && npulse > 1)
7379 /* See if we can do with less pulses, based on dlb_scale */
7381 for (d = 0; d < dd->ndim; d++)
7384 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7385 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7386 npulse_d_max = max(npulse_d_max, npulse_d);
7388 npulse = min(npulse, npulse_d_max);
7391 /* This env var can override npulse */
7392 d = dd_nst_env(debug, "GMX_DD_NPULSE", 0);
7399 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7400 for (d = 0; d < dd->ndim; d++)
7402 comm->cd[d].np_dlb = min(npulse, dd->nc[dd->dim[d]]-1);
7403 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7404 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7405 comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb);
7406 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7408 comm->bVacDLBNoLimit = FALSE;
7412 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7413 if (!comm->bVacDLBNoLimit)
7415 comm->cellsize_limit = max(comm->cellsize_limit,
7416 comm->cutoff/comm->maxpulse);
7418 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7419 /* Set the minimum cell size for each DD dimension */
7420 for (d = 0; d < dd->ndim; d++)
7422 if (comm->bVacDLBNoLimit ||
7423 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7425 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7429 comm->cellsize_min_dlb[dd->dim[d]] =
7430 comm->cutoff/comm->cd[d].np_dlb;
7433 if (comm->cutoff_mbody <= 0)
7435 comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit);
7437 if (comm->bDynLoadBal)
7443 gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd, int ePBC)
7445 /* If each molecule is a single charge group
7446 * or we use domain decomposition for each periodic dimension,
7447 * we do not need to take pbc into account for the bonded interactions.
7449 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7452 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7455 void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7456 t_inputrec *ir, t_forcerec *fr,
7459 gmx_domdec_comm_t *comm;
7465 /* Initialize the thread data.
7466 * This can not be done in init_domain_decomposition,
7467 * as the numbers of threads is determined later.
7469 comm->nth = gmx_omp_nthreads_get(emntDomdec);
7472 snew(comm->dth, comm->nth);
7475 if (EEL_PME(ir->coulombtype))
7477 init_ddpme(dd, &comm->ddpme[0], 0);
7478 if (comm->npmedecompdim >= 2)
7480 init_ddpme(dd, &comm->ddpme[1], 1);
7485 comm->npmenodes = 0;
7486 if (dd->pme_nodeid >= 0)
7488 gmx_fatal_collective(FARGS, NULL, dd,
7489 "Can not have separate PME nodes without PME electrostatics");
7495 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7497 if (comm->eDLB != edlbNO)
7499 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7502 print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
7503 if (comm->eDLB == edlbAUTO)
7507 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7509 print_dd_settings(fplog, dd, ir, TRUE, dlb_scale, ddbox);
7512 if (ir->ePBC == epbcNONE)
7514 vol_frac = 1 - 1/(double)dd->nnodes;
7519 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7523 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7525 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7527 dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
7530 static gmx_bool test_dd_cutoff(t_commrec *cr,
7531 t_state *state, t_inputrec *ir,
7542 set_ddbox(dd, FALSE, cr, ir, state->box,
7543 TRUE, &dd->comm->cgs_gl, state->x, &ddbox);
7547 for (d = 0; d < dd->ndim; d++)
7551 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7552 if (dynamic_dd_box(&ddbox, ir))
7554 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7557 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7559 if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
7560 dd->comm->cd[d].np_dlb > 0)
7562 if (np > dd->comm->cd[d].np_dlb)
7567 /* If a current local cell size is smaller than the requested
7568 * cut-off, we could still fix it, but this gets very complicated.
7569 * Without fixing here, we might actually need more checks.
7571 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7578 if (dd->comm->eDLB != edlbNO)
7580 /* If DLB is not active yet, we don't need to check the grid jumps.
7581 * Actually we shouldn't, because then the grid jump data is not set.
7583 if (dd->comm->bDynLoadBal &&
7584 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7589 gmx_sumi(1, &LocallyLimited, cr);
7591 if (LocallyLimited > 0)
7600 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, t_inputrec *ir,
7603 gmx_bool bCutoffAllowed;
7605 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7609 cr->dd->comm->cutoff = cutoff_req;
7612 return bCutoffAllowed;
7615 void change_dd_dlb_cutoff_limit(t_commrec *cr)
7617 gmx_domdec_comm_t *comm;
7619 comm = cr->dd->comm;
7621 /* Turn on the DLB limiting (might have been on already) */
7622 comm->bPMELoadBalDLBLimits = TRUE;
7624 /* Change the cut-off limit */
7625 comm->PMELoadBal_max_cutoff = comm->cutoff;
7628 static void merge_cg_buffers(int ncell,
7629 gmx_domdec_comm_dim_t *cd, int pulse,
7631 int *index_gl, int *recv_i,
7632 rvec *cg_cm, rvec *recv_vr,
7634 cginfo_mb_t *cginfo_mb, int *cginfo)
7636 gmx_domdec_ind_t *ind, *ind_p;
7637 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7638 int shift, shift_at;
7640 ind = &cd->ind[pulse];
7642 /* First correct the already stored data */
7643 shift = ind->nrecv[ncell];
7644 for (cell = ncell-1; cell >= 0; cell--)
7646 shift -= ind->nrecv[cell];
7649 /* Move the cg's present from previous grid pulses */
7650 cg0 = ncg_cell[ncell+cell];
7651 cg1 = ncg_cell[ncell+cell+1];
7652 cgindex[cg1+shift] = cgindex[cg1];
7653 for (cg = cg1-1; cg >= cg0; cg--)
7655 index_gl[cg+shift] = index_gl[cg];
7656 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7657 cgindex[cg+shift] = cgindex[cg];
7658 cginfo[cg+shift] = cginfo[cg];
7660 /* Correct the already stored send indices for the shift */
7661 for (p = 1; p <= pulse; p++)
7663 ind_p = &cd->ind[p];
7665 for (c = 0; c < cell; c++)
7667 cg0 += ind_p->nsend[c];
7669 cg1 = cg0 + ind_p->nsend[cell];
7670 for (cg = cg0; cg < cg1; cg++)
7672 ind_p->index[cg] += shift;
7678 /* Merge in the communicated buffers */
7682 for (cell = 0; cell < ncell; cell++)
7684 cg1 = ncg_cell[ncell+cell+1] + shift;
7687 /* Correct the old cg indices */
7688 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7690 cgindex[cg+1] += shift_at;
7693 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7695 /* Copy this charge group from the buffer */
7696 index_gl[cg1] = recv_i[cg0];
7697 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7698 /* Add it to the cgindex */
7699 cg_gl = index_gl[cg1];
7700 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7701 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7702 cgindex[cg1+1] = cgindex[cg1] + nat;
7707 shift += ind->nrecv[cell];
7708 ncg_cell[ncell+cell+1] = cg1;
7712 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7713 int nzone, int cg0, const int *cgindex)
7717 /* Store the atom block boundaries for easy copying of communication buffers
7720 for (zone = 0; zone < nzone; zone++)
7722 for (p = 0; p < cd->np; p++)
7724 cd->ind[p].cell2at0[zone] = cgindex[cg];
7725 cg += cd->ind[p].nrecv[zone];
7726 cd->ind[p].cell2at1[zone] = cgindex[cg];
7731 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7737 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7739 if (!bLocalCG[link->a[i]])
7748 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7750 real c[DIM][4]; /* the corners for the non-bonded communication */
7751 real cr0; /* corner for rounding */
7752 real cr1[4]; /* corners for rounding */
7753 real bc[DIM]; /* corners for bounded communication */
7754 real bcr1; /* corner for rounding for bonded communication */
7757 /* Determine the corners of the domain(s) we are communicating with */
7759 set_dd_corners(const gmx_domdec_t *dd,
7760 int dim0, int dim1, int dim2,
7764 const gmx_domdec_comm_t *comm;
7765 const gmx_domdec_zones_t *zones;
7770 zones = &comm->zones;
7772 /* Keep the compiler happy */
7776 /* The first dimension is equal for all cells */
7777 c->c[0][0] = comm->cell_x0[dim0];
7780 c->bc[0] = c->c[0][0];
7785 /* This cell row is only seen from the first row */
7786 c->c[1][0] = comm->cell_x0[dim1];
7787 /* All rows can see this row */
7788 c->c[1][1] = comm->cell_x0[dim1];
7791 c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7794 /* For the multi-body distance we need the maximum */
7795 c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7798 /* Set the upper-right corner for rounding */
7799 c->cr0 = comm->cell_x1[dim0];
7804 for (j = 0; j < 4; j++)
7806 c->c[2][j] = comm->cell_x0[dim2];
7810 /* Use the maximum of the i-cells that see a j-cell */
7811 for (i = 0; i < zones->nizone; i++)
7813 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7819 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7825 /* For the multi-body distance we need the maximum */
7826 c->bc[2] = comm->cell_x0[dim2];
7827 for (i = 0; i < 2; i++)
7829 for (j = 0; j < 2; j++)
7831 c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0);
7837 /* Set the upper-right corner for rounding */
7838 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7839 * Only cell (0,0,0) can see cell 7 (1,1,1)
7841 c->cr1[0] = comm->cell_x1[dim1];
7842 c->cr1[3] = comm->cell_x1[dim1];
7845 c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7848 /* For the multi-body distance we need the maximum */
7849 c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7856 /* Determine which cg's we need to send in this pulse from this zone */
7858 get_zone_pulse_cgs(gmx_domdec_t *dd,
7859 int zonei, int zone,
7861 const int *index_gl,
7863 int dim, int dim_ind,
7864 int dim0, int dim1, int dim2,
7865 real r_comm2, real r_bcomm2,
7869 real skew_fac2_d, real skew_fac_01,
7870 rvec *v_d, rvec *v_0, rvec *v_1,
7871 const dd_corners_t *c,
7873 gmx_bool bDistBonded,
7879 gmx_domdec_ind_t *ind,
7880 int **ibuf, int *ibuf_nalloc,
7886 gmx_domdec_comm_t *comm;
7888 gmx_bool bDistMB_pulse;
7890 real r2, rb2, r, tric_sh;
7893 int nsend_z, nsend, nat;
7897 bScrew = (dd->bScrewPBC && dim == XX);
7899 bDistMB_pulse = (bDistMB && bDistBonded);
7905 for (cg = cg0; cg < cg1; cg++)
7909 if (tric_dist[dim_ind] == 0)
7911 /* Rectangular direction, easy */
7912 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7919 r = cg_cm[cg][dim] - c->bc[dim_ind];
7925 /* Rounding gives at most a 16% reduction
7926 * in communicated atoms
7928 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7930 r = cg_cm[cg][dim0] - c->cr0;
7931 /* This is the first dimension, so always r >= 0 */
7938 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7940 r = cg_cm[cg][dim1] - c->cr1[zone];
7947 r = cg_cm[cg][dim1] - c->bcr1;
7957 /* Triclinic direction, more complicated */
7960 /* Rounding, conservative as the skew_fac multiplication
7961 * will slightly underestimate the distance.
7963 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7965 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7966 for (i = dim0+1; i < DIM; i++)
7968 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7970 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7973 rb[dim0] = rn[dim0];
7976 /* Take care that the cell planes along dim0 might not
7977 * be orthogonal to those along dim1 and dim2.
7979 for (i = 1; i <= dim_ind; i++)
7982 if (normal[dim0][dimd] > 0)
7984 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7987 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7992 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7994 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7996 for (i = dim1+1; i < DIM; i++)
7998 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
8000 rn[dim1] += tric_sh;
8003 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
8004 /* Take care of coupling of the distances
8005 * to the planes along dim0 and dim1 through dim2.
8007 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
8008 /* Take care that the cell planes along dim1
8009 * might not be orthogonal to that along dim2.
8011 if (normal[dim1][dim2] > 0)
8013 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
8019 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
8022 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
8023 /* Take care of coupling of the distances
8024 * to the planes along dim0 and dim1 through dim2.
8026 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
8027 /* Take care that the cell planes along dim1
8028 * might not be orthogonal to that along dim2.
8030 if (normal[dim1][dim2] > 0)
8032 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
8037 /* The distance along the communication direction */
8038 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
8040 for (i = dim+1; i < DIM; i++)
8042 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
8047 r2 += rn[dim]*rn[dim]*skew_fac2_d;
8048 /* Take care of coupling of the distances
8049 * to the planes along dim0 and dim1 through dim2.
8051 if (dim_ind == 1 && zonei == 1)
8053 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
8059 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
8062 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
8063 /* Take care of coupling of the distances
8064 * to the planes along dim0 and dim1 through dim2.
8066 if (dim_ind == 1 && zonei == 1)
8068 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
8076 ((bDistMB && rb2 < r_bcomm2) ||
8077 (bDist2B && r2 < r_bcomm2)) &&
8079 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
8080 missing_link(comm->cglink, index_gl[cg],
8083 /* Make an index to the local charge groups */
8084 if (nsend+1 > ind->nalloc)
8086 ind->nalloc = over_alloc_large(nsend+1);
8087 srenew(ind->index, ind->nalloc);
8089 if (nsend+1 > *ibuf_nalloc)
8091 *ibuf_nalloc = over_alloc_large(nsend+1);
8092 srenew(*ibuf, *ibuf_nalloc);
8094 ind->index[nsend] = cg;
8095 (*ibuf)[nsend] = index_gl[cg];
8097 vec_rvec_check_alloc(vbuf, nsend+1);
8099 if (dd->ci[dim] == 0)
8101 /* Correct cg_cm for pbc */
8102 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
8105 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
8106 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
8111 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
8114 nat += cgindex[cg+1] - cgindex[cg];
8120 *nsend_z_ptr = nsend_z;
8123 static void setup_dd_communication(gmx_domdec_t *dd,
8124 matrix box, gmx_ddbox_t *ddbox,
8125 t_forcerec *fr, t_state *state, rvec **f)
8127 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
8128 int nzone, nzone_send, zone, zonei, cg0, cg1;
8129 int c, i, j, cg, cg_gl, nrcg;
8130 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
8131 gmx_domdec_comm_t *comm;
8132 gmx_domdec_zones_t *zones;
8133 gmx_domdec_comm_dim_t *cd;
8134 gmx_domdec_ind_t *ind;
8135 cginfo_mb_t *cginfo_mb;
8136 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
8137 real r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
8138 dd_corners_t corners;
8140 rvec *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
8141 real skew_fac2_d, skew_fac_01;
8148 fprintf(debug, "Setting up DD communication\n");
8153 switch (fr->cutoff_scheme)
8162 gmx_incons("unimplemented");
8166 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8168 dim = dd->dim[dim_ind];
8170 /* Check if we need to use triclinic distances */
8171 tric_dist[dim_ind] = 0;
8172 for (i = 0; i <= dim_ind; i++)
8174 if (ddbox->tric_dir[dd->dim[i]])
8176 tric_dist[dim_ind] = 1;
8181 bBondComm = comm->bBondComm;
8183 /* Do we need to determine extra distances for multi-body bondeds? */
8184 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8186 /* Do we need to determine extra distances for only two-body bondeds? */
8187 bDist2B = (bBondComm && !bDistMB);
8189 r_comm2 = sqr(comm->cutoff);
8190 r_bcomm2 = sqr(comm->cutoff_mbody);
8194 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, sqrt(r_bcomm2));
8197 zones = &comm->zones;
8200 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8201 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8203 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8205 /* Triclinic stuff */
8206 normal = ddbox->normal;
8210 v_0 = ddbox->v[dim0];
8211 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8213 /* Determine the coupling coefficient for the distances
8214 * to the cell planes along dim0 and dim1 through dim2.
8215 * This is required for correct rounding.
8218 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8221 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8227 v_1 = ddbox->v[dim1];
8230 zone_cg_range = zones->cg_range;
8231 index_gl = dd->index_gl;
8232 cgindex = dd->cgindex;
8233 cginfo_mb = fr->cginfo_mb;
8235 zone_cg_range[0] = 0;
8236 zone_cg_range[1] = dd->ncg_home;
8237 comm->zone_ncg1[0] = dd->ncg_home;
8238 pos_cg = dd->ncg_home;
8240 nat_tot = dd->nat_home;
8242 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8244 dim = dd->dim[dim_ind];
8245 cd = &comm->cd[dim_ind];
8247 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8249 /* No pbc in this dimension, the first node should not comm. */
8257 v_d = ddbox->v[dim];
8258 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
8260 cd->bInPlace = TRUE;
8261 for (p = 0; p < cd->np; p++)
8263 /* Only atoms communicated in the first pulse are used
8264 * for multi-body bonded interactions or for bBondComm.
8266 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8271 for (zone = 0; zone < nzone_send; zone++)
8273 if (tric_dist[dim_ind] && dim_ind > 0)
8275 /* Determine slightly more optimized skew_fac's
8277 * This reduces the number of communicated atoms
8278 * by about 10% for 3D DD of rhombic dodecahedra.
8280 for (dimd = 0; dimd < dim; dimd++)
8282 sf2_round[dimd] = 1;
8283 if (ddbox->tric_dir[dimd])
8285 for (i = dd->dim[dimd]+1; i < DIM; i++)
8287 /* If we are shifted in dimension i
8288 * and the cell plane is tilted forward
8289 * in dimension i, skip this coupling.
8291 if (!(zones->shift[nzone+zone][i] &&
8292 ddbox->v[dimd][i][dimd] >= 0))
8295 sqr(ddbox->v[dimd][i][dimd]);
8298 sf2_round[dimd] = 1/sf2_round[dimd];
8303 zonei = zone_perm[dim_ind][zone];
8306 /* Here we permutate the zones to obtain a convenient order
8307 * for neighbor searching
8309 cg0 = zone_cg_range[zonei];
8310 cg1 = zone_cg_range[zonei+1];
8314 /* Look only at the cg's received in the previous grid pulse
8316 cg1 = zone_cg_range[nzone+zone+1];
8317 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8320 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8321 for (th = 0; th < comm->nth; th++)
8323 gmx_domdec_ind_t *ind_p;
8324 int **ibuf_p, *ibuf_nalloc_p;
8326 int *nsend_p, *nat_p;
8332 /* Thread 0 writes in the comm buffers */
8334 ibuf_p = &comm->buf_int;
8335 ibuf_nalloc_p = &comm->nalloc_int;
8336 vbuf_p = &comm->vbuf;
8339 nsend_zone_p = &ind->nsend[zone];
8343 /* Other threads write into temp buffers */
8344 ind_p = &comm->dth[th].ind;
8345 ibuf_p = &comm->dth[th].ibuf;
8346 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8347 vbuf_p = &comm->dth[th].vbuf;
8348 nsend_p = &comm->dth[th].nsend;
8349 nat_p = &comm->dth[th].nat;
8350 nsend_zone_p = &comm->dth[th].nsend_zone;
8352 comm->dth[th].nsend = 0;
8353 comm->dth[th].nat = 0;
8354 comm->dth[th].nsend_zone = 0;
8364 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8365 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8368 /* Get the cg's for this pulse in this zone */
8369 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8371 dim, dim_ind, dim0, dim1, dim2,
8374 normal, skew_fac2_d, skew_fac_01,
8375 v_d, v_0, v_1, &corners, sf2_round,
8376 bDistBonded, bBondComm,
8380 ibuf_p, ibuf_nalloc_p,
8386 /* Append data of threads>=1 to the communication buffers */
8387 for (th = 1; th < comm->nth; th++)
8389 dd_comm_setup_work_t *dth;
8392 dth = &comm->dth[th];
8394 ns1 = nsend + dth->nsend_zone;
8395 if (ns1 > ind->nalloc)
8397 ind->nalloc = over_alloc_dd(ns1);
8398 srenew(ind->index, ind->nalloc);
8400 if (ns1 > comm->nalloc_int)
8402 comm->nalloc_int = over_alloc_dd(ns1);
8403 srenew(comm->buf_int, comm->nalloc_int);
8405 if (ns1 > comm->vbuf.nalloc)
8407 comm->vbuf.nalloc = over_alloc_dd(ns1);
8408 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8411 for (i = 0; i < dth->nsend_zone; i++)
8413 ind->index[nsend] = dth->ind.index[i];
8414 comm->buf_int[nsend] = dth->ibuf[i];
8415 copy_rvec(dth->vbuf.v[i],
8416 comm->vbuf.v[nsend]);
8420 ind->nsend[zone] += dth->nsend_zone;
8423 /* Clear the counts in case we do not have pbc */
8424 for (zone = nzone_send; zone < nzone; zone++)
8426 ind->nsend[zone] = 0;
8428 ind->nsend[nzone] = nsend;
8429 ind->nsend[nzone+1] = nat;
8430 /* Communicate the number of cg's and atoms to receive */
8431 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8432 ind->nsend, nzone+2,
8433 ind->nrecv, nzone+2);
8435 /* The rvec buffer is also required for atom buffers of size nsend
8436 * in dd_move_x and dd_move_f.
8438 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8442 /* We can receive in place if only the last zone is not empty */
8443 for (zone = 0; zone < nzone-1; zone++)
8445 if (ind->nrecv[zone] > 0)
8447 cd->bInPlace = FALSE;
8452 /* The int buffer is only required here for the cg indices */
8453 if (ind->nrecv[nzone] > comm->nalloc_int2)
8455 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8456 srenew(comm->buf_int2, comm->nalloc_int2);
8458 /* The rvec buffer is also required for atom buffers
8459 * of size nrecv in dd_move_x and dd_move_f.
8461 i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8462 vec_rvec_check_alloc(&comm->vbuf2, i);
8466 /* Make space for the global cg indices */
8467 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8468 || dd->cg_nalloc == 0)
8470 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8471 srenew(index_gl, dd->cg_nalloc);
8472 srenew(cgindex, dd->cg_nalloc+1);
8474 /* Communicate the global cg indices */
8477 recv_i = index_gl + pos_cg;
8481 recv_i = comm->buf_int2;
8483 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8484 comm->buf_int, nsend,
8485 recv_i, ind->nrecv[nzone]);
8487 /* Make space for cg_cm */
8488 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8489 if (fr->cutoff_scheme == ecutsGROUP)
8497 /* Communicate cg_cm */
8500 recv_vr = cg_cm + pos_cg;
8504 recv_vr = comm->vbuf2.v;
8506 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8507 comm->vbuf.v, nsend,
8508 recv_vr, ind->nrecv[nzone]);
8510 /* Make the charge group index */
8513 zone = (p == 0 ? 0 : nzone - 1);
8514 while (zone < nzone)
8516 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8518 cg_gl = index_gl[pos_cg];
8519 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8520 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8521 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8524 /* Update the charge group presence,
8525 * so we can use it in the next pass of the loop.
8527 comm->bLocalCG[cg_gl] = TRUE;
8533 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8536 zone_cg_range[nzone+zone] = pos_cg;
8541 /* This part of the code is never executed with bBondComm. */
8542 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8543 index_gl, recv_i, cg_cm, recv_vr,
8544 cgindex, fr->cginfo_mb, fr->cginfo);
8545 pos_cg += ind->nrecv[nzone];
8547 nat_tot += ind->nrecv[nzone+1];
8551 /* Store the atom block for easy copying of communication buffers */
8552 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8556 dd->index_gl = index_gl;
8557 dd->cgindex = cgindex;
8559 dd->ncg_tot = zone_cg_range[zones->n];
8560 dd->nat_tot = nat_tot;
8561 comm->nat[ddnatHOME] = dd->nat_home;
8562 for (i = ddnatZONE; i < ddnatNR; i++)
8564 comm->nat[i] = dd->nat_tot;
8569 /* We don't need to update cginfo, since that was alrady done above.
8570 * So we pass NULL for the forcerec.
8572 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8573 NULL, comm->bLocalCG);
8578 fprintf(debug, "Finished setting up DD communication, zones:");
8579 for (c = 0; c < zones->n; c++)
8581 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8583 fprintf(debug, "\n");
8587 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8591 for (c = 0; c < zones->nizone; c++)
8593 zones->izone[c].cg1 = zones->cg_range[c+1];
8594 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8595 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8599 static void set_zones_size(gmx_domdec_t *dd,
8600 matrix box, const gmx_ddbox_t *ddbox,
8601 int zone_start, int zone_end)
8603 gmx_domdec_comm_t *comm;
8604 gmx_domdec_zones_t *zones;
8606 int z, zi, zj0, zj1, d, dim;
8609 real size_j, add_tric;
8614 zones = &comm->zones;
8616 /* Do we need to determine extra distances for multi-body bondeds? */
8617 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8619 for (z = zone_start; z < zone_end; z++)
8621 /* Copy cell limits to zone limits.
8622 * Valid for non-DD dims and non-shifted dims.
8624 copy_rvec(comm->cell_x0, zones->size[z].x0);
8625 copy_rvec(comm->cell_x1, zones->size[z].x1);
8628 for (d = 0; d < dd->ndim; d++)
8632 for (z = 0; z < zones->n; z++)
8634 /* With a staggered grid we have different sizes
8635 * for non-shifted dimensions.
8637 if (dd->bGridJump && zones->shift[z][dim] == 0)
8641 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8642 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8646 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8647 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8653 rcmbs = comm->cutoff_mbody;
8654 if (ddbox->tric_dir[dim])
8656 rcs /= ddbox->skew_fac[dim];
8657 rcmbs /= ddbox->skew_fac[dim];
8660 /* Set the lower limit for the shifted zone dimensions */
8661 for (z = zone_start; z < zone_end; z++)
8663 if (zones->shift[z][dim] > 0)
8666 if (!dd->bGridJump || d == 0)
8668 zones->size[z].x0[dim] = comm->cell_x1[dim];
8669 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8673 /* Here we take the lower limit of the zone from
8674 * the lowest domain of the zone below.
8678 zones->size[z].x0[dim] =
8679 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8685 zones->size[z].x0[dim] =
8686 zones->size[zone_perm[2][z-4]].x0[dim];
8690 zones->size[z].x0[dim] =
8691 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8694 /* A temporary limit, is updated below */
8695 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8699 for (zi = 0; zi < zones->nizone; zi++)
8701 if (zones->shift[zi][dim] == 0)
8703 /* This takes the whole zone into account.
8704 * With multiple pulses this will lead
8705 * to a larger zone then strictly necessary.
8707 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8708 zones->size[zi].x1[dim]+rcmbs);
8716 /* Loop over the i-zones to set the upper limit of each
8719 for (zi = 0; zi < zones->nizone; zi++)
8721 if (zones->shift[zi][dim] == 0)
8723 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8725 if (zones->shift[z][dim] > 0)
8727 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8728 zones->size[zi].x1[dim]+rcs);
8735 for (z = zone_start; z < zone_end; z++)
8737 /* Initialization only required to keep the compiler happy */
8738 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8741 /* To determine the bounding box for a zone we need to find
8742 * the extreme corners of 4, 2 or 1 corners.
8744 nc = 1 << (ddbox->npbcdim - 1);
8746 for (c = 0; c < nc; c++)
8748 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8752 corner[YY] = zones->size[z].x0[YY];
8756 corner[YY] = zones->size[z].x1[YY];
8760 corner[ZZ] = zones->size[z].x0[ZZ];
8764 corner[ZZ] = zones->size[z].x1[ZZ];
8766 if (dd->ndim == 1 && box[ZZ][YY] != 0)
8768 /* With 1D domain decomposition the cg's are not in
8769 * the triclinic box, but triclinic x-y and rectangular y-z.
8770 * Shift y back, so it will later end up at 0.
8772 corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
8774 /* Apply the triclinic couplings */
8775 for (i = YY; i < ddbox->npbcdim; i++)
8777 for (j = XX; j < i; j++)
8779 corner[j] += corner[i]*box[i][j]/box[i][i];
8784 copy_rvec(corner, corner_min);
8785 copy_rvec(corner, corner_max);
8789 for (i = 0; i < DIM; i++)
8791 corner_min[i] = min(corner_min[i], corner[i]);
8792 corner_max[i] = max(corner_max[i], corner[i]);
8796 /* Copy the extreme cornes without offset along x */
8797 for (i = 0; i < DIM; i++)
8799 zones->size[z].bb_x0[i] = corner_min[i];
8800 zones->size[z].bb_x1[i] = corner_max[i];
8802 /* Add the offset along x */
8803 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8804 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8807 if (zone_start == 0)
8810 for (dim = 0; dim < DIM; dim++)
8812 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8814 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
8819 for (z = zone_start; z < zone_end; z++)
8821 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8823 zones->size[z].x0[XX], zones->size[z].x1[XX],
8824 zones->size[z].x0[YY], zones->size[z].x1[YY],
8825 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8826 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8828 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8829 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8830 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8835 static int comp_cgsort(const void *a, const void *b)
8839 gmx_cgsort_t *cga, *cgb;
8840 cga = (gmx_cgsort_t *)a;
8841 cgb = (gmx_cgsort_t *)b;
8843 comp = cga->nsc - cgb->nsc;
8846 comp = cga->ind_gl - cgb->ind_gl;
8852 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8857 /* Order the data */
8858 for (i = 0; i < n; i++)
8860 buf[i] = a[sort[i].ind];
8863 /* Copy back to the original array */
8864 for (i = 0; i < n; i++)
8870 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8875 /* Order the data */
8876 for (i = 0; i < n; i++)
8878 copy_rvec(v[sort[i].ind], buf[i]);
8881 /* Copy back to the original array */
8882 for (i = 0; i < n; i++)
8884 copy_rvec(buf[i], v[i]);
8888 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8891 int a, atot, cg, cg0, cg1, i;
8893 if (cgindex == NULL)
8895 /* Avoid the useless loop of the atoms within a cg */
8896 order_vec_cg(ncg, sort, v, buf);
8901 /* Order the data */
8903 for (cg = 0; cg < ncg; cg++)
8905 cg0 = cgindex[sort[cg].ind];
8906 cg1 = cgindex[sort[cg].ind+1];
8907 for (i = cg0; i < cg1; i++)
8909 copy_rvec(v[i], buf[a]);
8915 /* Copy back to the original array */
8916 for (a = 0; a < atot; a++)
8918 copy_rvec(buf[a], v[a]);
8922 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8923 int nsort_new, gmx_cgsort_t *sort_new,
8924 gmx_cgsort_t *sort1)
8928 /* The new indices are not very ordered, so we qsort them */
8929 qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8931 /* sort2 is already ordered, so now we can merge the two arrays */
8935 while (i2 < nsort2 || i_new < nsort_new)
8939 sort1[i1++] = sort_new[i_new++];
8941 else if (i_new == nsort_new)
8943 sort1[i1++] = sort2[i2++];
8945 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8946 (sort2[i2].nsc == sort_new[i_new].nsc &&
8947 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8949 sort1[i1++] = sort2[i2++];
8953 sort1[i1++] = sort_new[i_new++];
8958 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8960 gmx_domdec_sort_t *sort;
8961 gmx_cgsort_t *cgsort, *sort_i;
8962 int ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
8963 int sort_last, sort_skip;
8965 sort = dd->comm->sort;
8967 a = fr->ns.grid->cell_index;
8969 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns.grid->ncells;
8971 if (ncg_home_old >= 0)
8973 /* The charge groups that remained in the same ns grid cell
8974 * are completely ordered. So we can sort efficiently by sorting
8975 * the charge groups that did move into the stationary list.
8980 for (i = 0; i < dd->ncg_home; i++)
8982 /* Check if this cg did not move to another node */
8985 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8987 /* This cg is new on this node or moved ns grid cell */
8988 if (nsort_new >= sort->sort_new_nalloc)
8990 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8991 srenew(sort->sort_new, sort->sort_new_nalloc);
8993 sort_i = &(sort->sort_new[nsort_new++]);
8997 /* This cg did not move */
8998 sort_i = &(sort->sort2[nsort2++]);
9000 /* Sort on the ns grid cell indices
9001 * and the global topology index.
9002 * index_gl is irrelevant with cell ns,
9003 * but we set it here anyhow to avoid a conditional.
9006 sort_i->ind_gl = dd->index_gl[i];
9013 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
9016 /* Sort efficiently */
9017 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
9022 cgsort = sort->sort;
9024 for (i = 0; i < dd->ncg_home; i++)
9026 /* Sort on the ns grid cell indices
9027 * and the global topology index
9029 cgsort[i].nsc = a[i];
9030 cgsort[i].ind_gl = dd->index_gl[i];
9032 if (cgsort[i].nsc < moved)
9039 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
9041 /* Determine the order of the charge groups using qsort */
9042 qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
9048 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
9051 int ncg_new, i, *a, na;
9053 sort = dd->comm->sort->sort;
9055 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
9058 for (i = 0; i < na; i++)
9062 sort[ncg_new].ind = a[i];
9070 static void dd_sort_state(gmx_domdec_t *dd, int ePBC,
9071 rvec *cgcm, t_forcerec *fr, t_state *state,
9074 gmx_domdec_sort_t *sort;
9075 gmx_cgsort_t *cgsort, *sort_i;
9077 int ncg_new, i, *ibuf, cgsize;
9080 sort = dd->comm->sort;
9082 if (dd->ncg_home > sort->sort_nalloc)
9084 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
9085 srenew(sort->sort, sort->sort_nalloc);
9086 srenew(sort->sort2, sort->sort_nalloc);
9088 cgsort = sort->sort;
9090 switch (fr->cutoff_scheme)
9093 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
9096 ncg_new = dd_sort_order_nbnxn(dd, fr);
9099 gmx_incons("unimplemented");
9103 /* We alloc with the old size, since cgindex is still old */
9104 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9105 vbuf = dd->comm->vbuf.v;
9109 cgindex = dd->cgindex;
9116 /* Remove the charge groups which are no longer at home here */
9117 dd->ncg_home = ncg_new;
9120 fprintf(debug, "Set the new home charge group count to %d\n",
9124 /* Reorder the state */
9125 for (i = 0; i < estNR; i++)
9127 if (EST_DISTR(i) && (state->flags & (1<<i)))
9132 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
9135 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
9138 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->sd_X, vbuf);
9141 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
9145 case estDISRE_INITF:
9146 case estDISRE_RM3TAV:
9147 case estORIRE_INITF:
9149 /* No ordering required */
9152 gmx_incons("Unknown state entry encountered in dd_sort_state");
9157 if (fr->cutoff_scheme == ecutsGROUP)
9160 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9163 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9165 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9166 srenew(sort->ibuf, sort->ibuf_nalloc);
9169 /* Reorder the global cg index */
9170 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9171 /* Reorder the cginfo */
9172 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9173 /* Rebuild the local cg index */
9177 for (i = 0; i < dd->ncg_home; i++)
9179 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9180 ibuf[i+1] = ibuf[i] + cgsize;
9182 for (i = 0; i < dd->ncg_home+1; i++)
9184 dd->cgindex[i] = ibuf[i];
9189 for (i = 0; i < dd->ncg_home+1; i++)
9194 /* Set the home atom number */
9195 dd->nat_home = dd->cgindex[dd->ncg_home];
9197 if (fr->cutoff_scheme == ecutsVERLET)
9199 /* The atoms are now exactly in grid order, update the grid order */
9200 nbnxn_set_atomorder(fr->nbv->nbs);
9204 /* Copy the sorted ns cell indices back to the ns grid struct */
9205 for (i = 0; i < dd->ncg_home; i++)
9207 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
9209 fr->ns.grid->nr = dd->ncg_home;
9213 static void add_dd_statistics(gmx_domdec_t *dd)
9215 gmx_domdec_comm_t *comm;
9220 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9222 comm->sum_nat[ddnat-ddnatZONE] +=
9223 comm->nat[ddnat] - comm->nat[ddnat-1];
9228 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9230 gmx_domdec_comm_t *comm;
9235 /* Reset all the statistics and counters for total run counting */
9236 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9238 comm->sum_nat[ddnat-ddnatZONE] = 0;
9242 comm->load_step = 0;
9245 clear_ivec(comm->load_lim);
9250 void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
9252 gmx_domdec_comm_t *comm;
9256 comm = cr->dd->comm;
9258 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9265 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9267 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9269 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9274 " av. #atoms communicated per step for force: %d x %.1f\n",
9278 if (cr->dd->vsite_comm)
9281 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9282 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9287 if (cr->dd->constraint_comm)
9290 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9291 1 + ir->nLincsIter, av);
9295 gmx_incons(" Unknown type for DD statistics");
9298 fprintf(fplog, "\n");
9300 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9302 print_dd_load_av(fplog, cr->dd);
9306 void dd_partition_system(FILE *fplog,
9307 gmx_large_int_t step,
9309 gmx_bool bMasterState,
9311 t_state *state_global,
9312 gmx_mtop_t *top_global,
9314 t_state *state_local,
9317 gmx_localtop_t *top_local,
9320 gmx_shellfc_t shellfc,
9321 gmx_constr_t constr,
9323 gmx_wallcycle_t wcycle,
9327 gmx_domdec_comm_t *comm;
9328 gmx_ddbox_t ddbox = {0};
9330 gmx_large_int_t step_pcoupl;
9331 rvec cell_ns_x0, cell_ns_x1;
9332 int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9333 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
9334 gmx_bool bRedist, bSortCG, bResortAll;
9335 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9342 bBoxChanged = (bMasterState || DEFORM(*ir));
9343 if (ir->epc != epcNO)
9345 /* With nstpcouple > 1 pressure coupling happens.
9346 * one step after calculating the pressure.
9347 * Box scaling happens at the end of the MD step,
9348 * after the DD partitioning.
9349 * We therefore have to do DLB in the first partitioning
9350 * after an MD step where P-coupling occured.
9351 * We need to determine the last step in which p-coupling occurred.
9352 * MRS -- need to validate this for vv?
9357 step_pcoupl = step - 1;
9361 step_pcoupl = ((step - 1)/n)*n + 1;
9363 if (step_pcoupl >= comm->partition_step)
9369 bNStGlobalComm = (step % nstglobalcomm == 0);
9371 if (!comm->bDynLoadBal)
9377 /* Should we do dynamic load balacing this step?
9378 * Since it requires (possibly expensive) global communication,
9379 * we might want to do DLB less frequently.
9381 if (bBoxChanged || ir->epc != epcNO)
9383 bDoDLB = bBoxChanged;
9387 bDoDLB = bNStGlobalComm;
9391 /* Check if we have recorded loads on the nodes */
9392 if (comm->bRecordLoad && dd_load_count(comm))
9394 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
9396 /* Check if we should use DLB at the second partitioning
9397 * and every 100 partitionings,
9398 * so the extra communication cost is negligible.
9400 n = max(100, nstglobalcomm);
9401 bCheckDLB = (comm->n_load_collect == 0 ||
9402 comm->n_load_have % n == n-1);
9409 /* Print load every nstlog, first and last step to the log file */
9410 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9411 comm->n_load_collect == 0 ||
9413 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9415 /* Avoid extra communication due to verbose screen output
9416 * when nstglobalcomm is set.
9418 if (bDoDLB || bLogLoad || bCheckDLB ||
9419 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9421 get_load_distribution(dd, wcycle);
9426 dd_print_load(fplog, dd, step-1);
9430 dd_print_load_verbose(dd);
9433 comm->n_load_collect++;
9437 /* Since the timings are node dependent, the master decides */
9441 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS);
9444 fprintf(debug, "step %s, imb loss %f\n",
9445 gmx_step_str(step, sbuf),
9446 dd_force_imb_perf_loss(dd));
9449 dd_bcast(dd, sizeof(bTurnOnDLB), &bTurnOnDLB);
9452 turn_on_dlb(fplog, cr, step);
9457 comm->n_load_have++;
9460 cgs_gl = &comm->cgs_gl;
9465 /* Clear the old state */
9466 clear_dd_indices(dd, 0, 0);
9469 set_ddbox(dd, bMasterState, cr, ir, state_global->box,
9470 TRUE, cgs_gl, state_global->x, &ddbox);
9472 get_cg_distribution(fplog, step, dd, cgs_gl,
9473 state_global->box, &ddbox, state_global->x);
9475 dd_distribute_state(dd, cgs_gl,
9476 state_global, state_local, f);
9478 dd_make_local_cgs(dd, &top_local->cgs);
9480 /* Ensure that we have space for the new distribution */
9481 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9483 if (fr->cutoff_scheme == ecutsGROUP)
9485 calc_cgcm(fplog, 0, dd->ncg_home,
9486 &top_local->cgs, state_local->x, fr->cg_cm);
9489 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9491 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9493 else if (state_local->ddp_count != dd->ddp_count)
9495 if (state_local->ddp_count > dd->ddp_count)
9497 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9500 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9502 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9505 /* Clear the old state */
9506 clear_dd_indices(dd, 0, 0);
9508 /* Build the new indices */
9509 rebuild_cgindex(dd, cgs_gl->index, state_local);
9510 make_dd_indices(dd, cgs_gl->index, 0);
9511 ncgindex_set = dd->ncg_home;
9513 if (fr->cutoff_scheme == ecutsGROUP)
9515 /* Redetermine the cg COMs */
9516 calc_cgcm(fplog, 0, dd->ncg_home,
9517 &top_local->cgs, state_local->x, fr->cg_cm);
9520 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9522 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9524 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9525 TRUE, &top_local->cgs, state_local->x, &ddbox);
9527 bRedist = comm->bDynLoadBal;
9531 /* We have the full state, only redistribute the cgs */
9533 /* Clear the non-home indices */
9534 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9537 /* Avoid global communication for dim's without pbc and -gcom */
9538 if (!bNStGlobalComm)
9540 copy_rvec(comm->box0, ddbox.box0 );
9541 copy_rvec(comm->box_size, ddbox.box_size);
9543 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9544 bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
9549 /* For dim's without pbc and -gcom */
9550 copy_rvec(ddbox.box0, comm->box0 );
9551 copy_rvec(ddbox.box_size, comm->box_size);
9553 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9556 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9558 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9561 /* Check if we should sort the charge groups */
9562 if (comm->nstSortCG > 0)
9564 bSortCG = (bMasterState ||
9565 (bRedist && (step % comm->nstSortCG == 0)));
9572 ncg_home_old = dd->ncg_home;
9577 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9579 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9580 state_local, f, fr, mdatoms,
9581 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9583 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9586 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9588 &comm->cell_x0, &comm->cell_x1,
9589 dd->ncg_home, fr->cg_cm,
9590 cell_ns_x0, cell_ns_x1, &grid_density);
9594 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9597 switch (fr->cutoff_scheme)
9600 copy_ivec(fr->ns.grid->n, ncells_old);
9601 grid_first(fplog, fr->ns.grid, dd, &ddbox, fr->ePBC,
9602 state_local->box, cell_ns_x0, cell_ns_x1,
9603 fr->rlistlong, grid_density);
9606 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9609 gmx_incons("unimplemented");
9611 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9612 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9616 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9618 /* Sort the state on charge group position.
9619 * This enables exact restarts from this step.
9620 * It also improves performance by about 15% with larger numbers
9621 * of atoms per node.
9624 /* Fill the ns grid with the home cell,
9625 * so we can sort with the indices.
9627 set_zones_ncg_home(dd);
9629 switch (fr->cutoff_scheme)
9632 set_zones_size(dd, state_local->box, &ddbox, 0, 1);
9634 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9636 comm->zones.size[0].bb_x0,
9637 comm->zones.size[0].bb_x1,
9639 comm->zones.dens_zone0,
9642 ncg_moved, bRedist ? comm->moved : NULL,
9643 fr->nbv->grp[eintLocal].kernel_type,
9644 fr->nbv->grp[eintLocal].nbat);
9646 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9649 fill_grid(fplog, &comm->zones, fr->ns.grid, dd->ncg_home,
9650 0, dd->ncg_home, fr->cg_cm);
9652 copy_ivec(fr->ns.grid->n, ncells_new);
9655 gmx_incons("unimplemented");
9658 bResortAll = bMasterState;
9660 /* Check if we can user the old order and ns grid cell indices
9661 * of the charge groups to sort the charge groups efficiently.
9663 if (ncells_new[XX] != ncells_old[XX] ||
9664 ncells_new[YY] != ncells_old[YY] ||
9665 ncells_new[ZZ] != ncells_old[ZZ])
9672 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9673 gmx_step_str(step, sbuf), dd->ncg_home);
9675 dd_sort_state(dd, ir->ePBC, fr->cg_cm, fr, state_local,
9676 bResortAll ? -1 : ncg_home_old);
9677 /* Rebuild all the indices */
9678 ga2la_clear(dd->ga2la);
9681 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9684 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9686 /* Setup up the communication and communicate the coordinates */
9687 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9689 /* Set the indices */
9690 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9692 /* Set the charge group boundaries for neighbor searching */
9693 set_cg_boundaries(&comm->zones);
9695 if (fr->cutoff_scheme == ecutsVERLET)
9697 set_zones_size(dd, state_local->box, &ddbox,
9698 bSortCG ? 1 : 0, comm->zones.n);
9701 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9704 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9705 -1,state_local->x,state_local->box);
9708 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9710 /* Extract a local topology from the global topology */
9711 for (i = 0; i < dd->ndim; i++)
9713 np[dd->dim[i]] = comm->cd[i].np;
9715 dd_make_local_top(fplog, dd, &comm->zones, dd->npbcdim, state_local->box,
9716 comm->cellsize_min, np,
9718 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
9719 vsite, top_global, top_local);
9721 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9723 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9725 /* Set up the special atom communication */
9726 n = comm->nat[ddnatZONE];
9727 for (i = ddnatZONE+1; i < ddnatNR; i++)
9732 if (vsite && vsite->n_intercg_vsite)
9734 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9738 if (dd->bInterCGcons || dd->bInterCGsettles)
9740 /* Only for inter-cg constraints we need special code */
9741 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9742 constr, ir->nProjOrder,
9743 top_local->idef.il);
9747 gmx_incons("Unknown special atom type setup");
9752 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9754 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9756 /* Make space for the extra coordinates for virtual site
9757 * or constraint communication.
9759 state_local->natoms = comm->nat[ddnatNR-1];
9760 if (state_local->natoms > state_local->nalloc)
9762 dd_realloc_state(state_local, f, state_local->natoms);
9765 if (fr->bF_NoVirSum)
9767 if (vsite && vsite->n_intercg_vsite)
9769 nat_f_novirsum = comm->nat[ddnatVSITE];
9773 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9775 nat_f_novirsum = dd->nat_tot;
9779 nat_f_novirsum = dd->nat_home;
9788 /* Set the number of atoms required for the force calculation.
9789 * Forces need to be constrained when using a twin-range setup
9790 * or with energy minimization. For simple simulations we could
9791 * avoid some allocation, zeroing and copying, but this is
9792 * probably not worth the complications ande checking.
9794 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9795 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9797 /* We make the all mdatoms up to nat_tot_con.
9798 * We could save some work by only setting invmass
9799 * between nat_tot and nat_tot_con.
9801 /* This call also sets the new number of home particles to dd->nat_home */
9802 atoms2md(top_global, ir,
9803 comm->nat[ddnatCON], dd->gatindex, 0, dd->nat_home, mdatoms);
9805 /* Now we have the charges we can sort the FE interactions */
9806 dd_sort_local_top(dd, mdatoms, top_local);
9810 /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
9811 split_vsites_over_threads(top_local->idef.il, top_local->idef.iparams,
9812 mdatoms, FALSE, vsite);
9817 /* Make the local shell stuff, currently no communication is done */
9818 make_local_shells(cr, mdatoms, shellfc);
9821 if (ir->implicit_solvent)
9823 make_local_gb(cr, fr->born, ir->gb_algorithm);
9826 setup_bonded_threading(fr, &top_local->idef);
9828 if (!(cr->duty & DUTY_PME))
9830 /* Send the charges to our PME only node */
9831 gmx_pme_send_q(cr, mdatoms->nChargePerturbed,
9832 mdatoms->chargeA, mdatoms->chargeB,
9833 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9838 set_constraints(constr, top_local, ir, mdatoms, cr);
9841 if (ir->ePull != epullNO)
9843 /* Update the local pull groups */
9844 dd_make_local_pull_groups(dd, ir->pull, mdatoms);
9849 /* Update the local rotation groups */
9850 dd_make_local_rotation_groups(dd, ir->rot);
9854 add_dd_statistics(dd);
9856 /* Make sure we only count the cycles for this DD partitioning */
9857 clear_dd_cycle_counts(dd);
9859 /* Because the order of the atoms might have changed since
9860 * the last vsite construction, we need to communicate the constructing
9861 * atom coordinates again (for spreading the forces this MD step).
9863 dd_move_x_vsites(dd, state_local->box, state_local->x);
9865 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9867 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9869 dd_move_x(dd, state_local->box, state_local->x);
9870 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9871 -1, state_local->x, state_local->box);
9874 /* Store the partitioning step */
9875 comm->partition_step = step;
9877 /* Increase the DD partitioning counter */
9879 /* The state currently matches this DD partitioning count, store it */
9880 state_local->ddp_count = dd->ddp_count;
9883 /* The DD master node knows the complete cg distribution,
9884 * store the count so we can possibly skip the cg info communication.
9886 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9889 if (comm->DD_debug > 0)
9891 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9892 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9893 "after partitioning");