2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
48 #include "gromacs/utility/smalloc.h"
49 #include "gromacs/utility/fatalerror.h"
53 #include "domdec_network.h"
56 #include "chargegroup.h"
65 #include "mtop_util.h"
66 #include "gmx_ga2la.h"
68 #include "nbnxn_search.h"
70 #include "gmx_omp_nthreads.h"
71 #include "gpu_utils.h"
73 #include "gromacs/utility/futil.h"
74 #include "gromacs/fileio/gmxfio.h"
75 #include "gromacs/fileio/pdbio.h"
76 #include "gromacs/imd/imd.h"
77 #include "gromacs/pulling/pull.h"
78 #include "gromacs/pulling/pull_rotation.h"
79 #include "gromacs/swap/swapcoords.h"
80 #include "gromacs/timing/wallcycle.h"
81 #include "gromacs/utility/basenetwork.h"
82 #include "gromacs/utility/gmxmpi.h"
83 #include "gromacs/utility/qsort_threadsafe.h"
85 #define DDRANK(dd, rank) (rank)
86 #define DDMASTERRANK(dd) (dd->masterrank)
88 typedef struct gmx_domdec_master
90 /* The cell boundaries */
92 /* The global charge group division */
93 int *ncg; /* Number of home charge groups for each node */
94 int *index; /* Index of nnodes+1 into cg */
95 int *cg; /* Global charge group index */
96 int *nat; /* Number of home atoms for each node. */
97 int *ibuf; /* Buffer for communication */
98 rvec *vbuf; /* Buffer for state scattering and gathering */
99 } gmx_domdec_master_t;
103 /* The numbers of charge groups to send and receive for each cell
104 * that requires communication, the last entry contains the total
105 * number of atoms that needs to be communicated.
107 int nsend[DD_MAXIZONE+2];
108 int nrecv[DD_MAXIZONE+2];
109 /* The charge groups to send */
112 /* The atom range for non-in-place communication */
113 int cell2at0[DD_MAXIZONE];
114 int cell2at1[DD_MAXIZONE];
119 int np; /* Number of grid pulses in this dimension */
120 int np_dlb; /* For dlb, for use with edlbAUTO */
121 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
123 gmx_bool bInPlace; /* Can we communicate in place? */
124 } gmx_domdec_comm_dim_t;
128 gmx_bool *bCellMin; /* Temp. var.: is this cell size at the limit */
129 real *cell_f; /* State var.: cell boundaries, box relative */
130 real *old_cell_f; /* Temp. var.: old cell size */
131 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
132 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
133 real *bound_min; /* Temp. var.: lower limit for cell boundary */
134 real *bound_max; /* Temp. var.: upper limit for cell boundary */
135 gmx_bool bLimited; /* State var.: is DLB limited in this dim and row */
136 real *buf_ncd; /* Temp. var. */
139 #define DD_NLOAD_MAX 9
141 /* Here floats are accurate enough, since these variables
142 * only influence the load balancing, not the actual MD results.
169 gmx_cgsort_t *sort_new;
181 /* This enum determines the order of the coordinates.
182 * ddnatHOME and ddnatZONE should be first and second,
183 * the others can be ordered as wanted.
186 ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR
190 edlbAUTO, edlbNO, edlbYES, edlbNR
192 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
196 int dim; /* The dimension */
197 gmx_bool dim_match; /* Tells if DD and PME dims match */
198 int nslab; /* The number of PME slabs in this dimension */
199 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
200 int *pp_min; /* The minimum pp node location, size nslab */
201 int *pp_max; /* The maximum pp node location,size nslab */
202 int maxshift; /* The maximum shift for coordinate redistribution in PME */
207 real min0; /* The minimum bottom of this zone */
208 real max1; /* The maximum top of this zone */
209 real min1; /* The minimum top of this zone */
210 real mch0; /* The maximum bottom communicaton height for this zone */
211 real mch1; /* The maximum top communicaton height for this zone */
212 real p1_0; /* The bottom value of the first cell in this zone */
213 real p1_1; /* The top value of the first cell in this zone */
218 gmx_domdec_ind_t ind;
225 } dd_comm_setup_work_t;
227 typedef struct gmx_domdec_comm
229 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
230 * unless stated otherwise.
233 /* The number of decomposition dimensions for PME, 0: no PME */
235 /* The number of nodes doing PME (PP/PME or only PME) */
239 /* The communication setup including the PME only nodes */
240 gmx_bool bCartesianPP_PME;
243 int *pmenodes; /* size npmenodes */
244 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
245 * but with bCartesianPP_PME */
246 gmx_ddpme_t ddpme[2];
248 /* The DD particle-particle nodes only */
249 gmx_bool bCartesianPP;
250 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
252 /* The global charge groups */
255 /* Should we sort the cgs */
257 gmx_domdec_sort_t *sort;
259 /* Are there charge groups? */
262 /* Are there bonded and multi-body interactions between charge groups? */
263 gmx_bool bInterCGBondeds;
264 gmx_bool bInterCGMultiBody;
266 /* Data for the optional bonded interaction atom communication range */
273 /* Are we actually using DLB? */
274 gmx_bool bDynLoadBal;
276 /* Cell sizes for static load balancing, first index cartesian */
279 /* The width of the communicated boundaries */
282 /* The minimum cell size (including triclinic correction) */
284 /* For dlb, for use with edlbAUTO */
285 rvec cellsize_min_dlb;
286 /* The lower limit for the DD cell size with DLB */
288 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
289 gmx_bool bVacDLBNoLimit;
291 /* With PME load balancing we set limits on DLB */
292 gmx_bool bPMELoadBalDLBLimits;
293 /* DLB needs to take into account that we want to allow this maximum
294 * cut-off (for PME load balancing), this could limit cell boundaries.
296 real PMELoadBal_max_cutoff;
298 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
300 /* box0 and box_size are required with dim's without pbc and -gcom */
304 /* The cell boundaries */
308 /* The old location of the cell boundaries, to check cg displacements */
312 /* The communication setup and charge group boundaries for the zones */
313 gmx_domdec_zones_t zones;
315 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
316 * cell boundaries of neighboring cells for dynamic load balancing.
318 gmx_ddzone_t zone_d1[2];
319 gmx_ddzone_t zone_d2[2][2];
321 /* The coordinate/force communication setup and indices */
322 gmx_domdec_comm_dim_t cd[DIM];
323 /* The maximum number of cells to communicate with in one dimension */
326 /* Which cg distribution is stored on the master node */
327 int master_cg_ddp_count;
329 /* The number of cg's received from the direct neighbors */
330 int zone_ncg1[DD_MAXZONE];
332 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
335 /* Array for signalling if atoms have moved to another domain */
339 /* Communication buffer for general use */
343 /* Communication buffer for general use */
346 /* Temporary storage for thread parallel communication setup */
348 dd_comm_setup_work_t *dth;
350 /* Communication buffers only used with multiple grid pulses */
355 /* Communication buffers for local redistribution */
357 int cggl_flag_nalloc[DIM*2];
359 int cgcm_state_nalloc[DIM*2];
361 /* Cell sizes for dynamic load balancing */
362 gmx_domdec_root_t **root;
366 real cell_f_max0[DIM];
367 real cell_f_min1[DIM];
369 /* Stuff for load communication */
370 gmx_bool bRecordLoad;
371 gmx_domdec_load_t *load;
372 int nrank_gpu_shared;
374 MPI_Comm *mpi_comm_load;
375 MPI_Comm mpi_comm_gpu_shared;
378 /* Maximum DLB scaling per load balancing step in percent */
382 float cycl[ddCyclNr];
383 int cycl_n[ddCyclNr];
384 float cycl_max[ddCyclNr];
385 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
389 /* Have often have did we have load measurements */
391 /* Have often have we collected the load measurements */
395 double sum_nat[ddnatNR-ddnatZONE];
405 /* The last partition step */
406 gmx_int64_t partition_step;
414 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
417 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
418 #define DD_FLAG_NRCG 65535
419 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
420 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
422 /* Zone permutation required to obtain consecutive charge groups
423 * for neighbor searching.
425 static const int zone_perm[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
427 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
428 * components see only j zones with that component 0.
431 /* The DD zone order */
432 static const ivec dd_zo[DD_MAXZONE] =
433 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
438 static const ivec dd_zp3[dd_zp3n] = {{0, 0, 8}, {1, 3, 6}, {2, 5, 6}, {3, 5, 7}};
443 static const ivec dd_zp2[dd_zp2n] = {{0, 0, 4}, {1, 3, 4}};
448 static const ivec dd_zp1[dd_zp1n] = {{0, 0, 2}};
450 /* Factors used to avoid problems due to rounding issues */
451 #define DD_CELL_MARGIN 1.0001
452 #define DD_CELL_MARGIN2 1.00005
453 /* Factor to account for pressure scaling during nstlist steps */
454 #define DD_PRES_SCALE_MARGIN 1.02
456 /* Allowed performance loss before we DLB or warn */
457 #define DD_PERF_LOSS 0.05
459 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
461 /* Use separate MPI send and receive commands
462 * when nnodes <= GMX_DD_NNODES_SENDRECV.
463 * This saves memory (and some copying for small nnodes).
464 * For high parallelization scatter and gather calls are used.
466 #define GMX_DD_NNODES_SENDRECV 4
470 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
472 static void index2xyz(ivec nc,int ind,ivec xyz)
474 xyz[XX] = ind % nc[XX];
475 xyz[YY] = (ind / nc[XX]) % nc[YY];
476 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
480 /* This order is required to minimize the coordinate communication in PME
481 * which uses decomposition in the x direction.
483 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
485 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
487 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
488 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
489 xyz[ZZ] = ind % nc[ZZ];
492 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
497 ddindex = dd_index(dd->nc, c);
498 if (dd->comm->bCartesianPP_PME)
500 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
502 else if (dd->comm->bCartesianPP)
505 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
516 static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox, t_inputrec *ir)
518 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
521 int ddglatnr(gmx_domdec_t *dd, int i)
531 if (i >= dd->comm->nat[ddnatNR-1])
533 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
535 atnr = dd->gatindex[i] + 1;
541 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
543 return &dd->comm->cgs_gl;
546 static void vec_rvec_init(vec_rvec_t *v)
552 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
556 v->nalloc = over_alloc_dd(n);
557 srenew(v->v, v->nalloc);
561 void dd_store_state(gmx_domdec_t *dd, t_state *state)
565 if (state->ddp_count != dd->ddp_count)
567 gmx_incons("The state does not the domain decomposition state");
570 state->ncg_gl = dd->ncg_home;
571 if (state->ncg_gl > state->cg_gl_nalloc)
573 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
574 srenew(state->cg_gl, state->cg_gl_nalloc);
576 for (i = 0; i < state->ncg_gl; i++)
578 state->cg_gl[i] = dd->index_gl[i];
581 state->ddp_count_cg_gl = dd->ddp_count;
584 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
586 return &dd->comm->zones;
589 void dd_get_ns_ranges(gmx_domdec_t *dd, int icg,
590 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
592 gmx_domdec_zones_t *zones;
595 zones = &dd->comm->zones;
598 while (icg >= zones->izone[izone].cg1)
607 else if (izone < zones->nizone)
609 *jcg0 = zones->izone[izone].jcg0;
613 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
614 icg, izone, zones->nizone);
617 *jcg1 = zones->izone[izone].jcg1;
619 for (d = 0; d < dd->ndim; d++)
622 shift0[dim] = zones->izone[izone].shift0[dim];
623 shift1[dim] = zones->izone[izone].shift1[dim];
624 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
626 /* A conservative approach, this can be optimized */
633 int dd_natoms_vsite(gmx_domdec_t *dd)
635 return dd->comm->nat[ddnatVSITE];
638 void dd_get_constraint_range(gmx_domdec_t *dd, int *at_start, int *at_end)
640 *at_start = dd->comm->nat[ddnatCON-1];
641 *at_end = dd->comm->nat[ddnatCON];
644 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
646 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
647 int *index, *cgindex;
648 gmx_domdec_comm_t *comm;
649 gmx_domdec_comm_dim_t *cd;
650 gmx_domdec_ind_t *ind;
651 rvec shift = {0, 0, 0}, *buf, *rbuf;
652 gmx_bool bPBC, bScrew;
656 cgindex = dd->cgindex;
661 nat_tot = dd->nat_home;
662 for (d = 0; d < dd->ndim; d++)
664 bPBC = (dd->ci[dd->dim[d]] == 0);
665 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
668 copy_rvec(box[dd->dim[d]], shift);
671 for (p = 0; p < cd->np; p++)
678 for (i = 0; i < ind->nsend[nzone]; i++)
680 at0 = cgindex[index[i]];
681 at1 = cgindex[index[i]+1];
682 for (j = at0; j < at1; j++)
684 copy_rvec(x[j], buf[n]);
691 for (i = 0; i < ind->nsend[nzone]; i++)
693 at0 = cgindex[index[i]];
694 at1 = cgindex[index[i]+1];
695 for (j = at0; j < at1; j++)
697 /* We need to shift the coordinates */
698 rvec_add(x[j], shift, buf[n]);
705 for (i = 0; i < ind->nsend[nzone]; i++)
707 at0 = cgindex[index[i]];
708 at1 = cgindex[index[i]+1];
709 for (j = at0; j < at1; j++)
712 buf[n][XX] = x[j][XX] + shift[XX];
714 * This operation requires a special shift force
715 * treatment, which is performed in calc_vir.
717 buf[n][YY] = box[YY][YY] - x[j][YY];
718 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
730 rbuf = comm->vbuf2.v;
732 /* Send and receive the coordinates */
733 dd_sendrecv_rvec(dd, d, dddirBackward,
734 buf, ind->nsend[nzone+1],
735 rbuf, ind->nrecv[nzone+1]);
739 for (zone = 0; zone < nzone; zone++)
741 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
743 copy_rvec(rbuf[j], x[i]);
748 nat_tot += ind->nrecv[nzone+1];
754 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
756 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
757 int *index, *cgindex;
758 gmx_domdec_comm_t *comm;
759 gmx_domdec_comm_dim_t *cd;
760 gmx_domdec_ind_t *ind;
764 gmx_bool bPBC, bScrew;
768 cgindex = dd->cgindex;
773 nzone = comm->zones.n/2;
774 nat_tot = dd->nat_tot;
775 for (d = dd->ndim-1; d >= 0; d--)
777 bPBC = (dd->ci[dd->dim[d]] == 0);
778 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
779 if (fshift == NULL && !bScrew)
783 /* Determine which shift vector we need */
789 for (p = cd->np-1; p >= 0; p--)
792 nat_tot -= ind->nrecv[nzone+1];
799 sbuf = comm->vbuf2.v;
801 for (zone = 0; zone < nzone; zone++)
803 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
805 copy_rvec(f[i], sbuf[j]);
810 /* Communicate the forces */
811 dd_sendrecv_rvec(dd, d, dddirForward,
812 sbuf, ind->nrecv[nzone+1],
813 buf, ind->nsend[nzone+1]);
815 /* Add the received forces */
819 for (i = 0; i < ind->nsend[nzone]; i++)
821 at0 = cgindex[index[i]];
822 at1 = cgindex[index[i]+1];
823 for (j = at0; j < at1; j++)
825 rvec_inc(f[j], buf[n]);
832 for (i = 0; i < ind->nsend[nzone]; i++)
834 at0 = cgindex[index[i]];
835 at1 = cgindex[index[i]+1];
836 for (j = at0; j < at1; j++)
838 rvec_inc(f[j], buf[n]);
839 /* Add this force to the shift force */
840 rvec_inc(fshift[is], buf[n]);
847 for (i = 0; i < ind->nsend[nzone]; i++)
849 at0 = cgindex[index[i]];
850 at1 = cgindex[index[i]+1];
851 for (j = at0; j < at1; j++)
853 /* Rotate the force */
854 f[j][XX] += buf[n][XX];
855 f[j][YY] -= buf[n][YY];
856 f[j][ZZ] -= buf[n][ZZ];
859 /* Add this force to the shift force */
860 rvec_inc(fshift[is], buf[n]);
871 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
873 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
874 int *index, *cgindex;
875 gmx_domdec_comm_t *comm;
876 gmx_domdec_comm_dim_t *cd;
877 gmx_domdec_ind_t *ind;
882 cgindex = dd->cgindex;
884 buf = &comm->vbuf.v[0][0];
887 nat_tot = dd->nat_home;
888 for (d = 0; d < dd->ndim; d++)
891 for (p = 0; p < cd->np; p++)
896 for (i = 0; i < ind->nsend[nzone]; i++)
898 at0 = cgindex[index[i]];
899 at1 = cgindex[index[i]+1];
900 for (j = at0; j < at1; j++)
913 rbuf = &comm->vbuf2.v[0][0];
915 /* Send and receive the coordinates */
916 dd_sendrecv_real(dd, d, dddirBackward,
917 buf, ind->nsend[nzone+1],
918 rbuf, ind->nrecv[nzone+1]);
922 for (zone = 0; zone < nzone; zone++)
924 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
931 nat_tot += ind->nrecv[nzone+1];
937 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
939 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
940 int *index, *cgindex;
941 gmx_domdec_comm_t *comm;
942 gmx_domdec_comm_dim_t *cd;
943 gmx_domdec_ind_t *ind;
948 cgindex = dd->cgindex;
950 buf = &comm->vbuf.v[0][0];
953 nzone = comm->zones.n/2;
954 nat_tot = dd->nat_tot;
955 for (d = dd->ndim-1; d >= 0; d--)
958 for (p = cd->np-1; p >= 0; p--)
961 nat_tot -= ind->nrecv[nzone+1];
968 sbuf = &comm->vbuf2.v[0][0];
970 for (zone = 0; zone < nzone; zone++)
972 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
979 /* Communicate the forces */
980 dd_sendrecv_real(dd, d, dddirForward,
981 sbuf, ind->nrecv[nzone+1],
982 buf, ind->nsend[nzone+1]);
984 /* Add the received forces */
986 for (i = 0; i < ind->nsend[nzone]; i++)
988 at0 = cgindex[index[i]];
989 at1 = cgindex[index[i]+1];
990 for (j = at0; j < at1; j++)
1001 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
1003 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
1005 zone->min0, zone->max1,
1006 zone->mch0, zone->mch0,
1007 zone->p1_0, zone->p1_1);
1011 #define DDZONECOMM_MAXZONE 5
1012 #define DDZONECOMM_BUFSIZE 3
1014 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
1015 int ddimind, int direction,
1016 gmx_ddzone_t *buf_s, int n_s,
1017 gmx_ddzone_t *buf_r, int n_r)
1019 #define ZBS DDZONECOMM_BUFSIZE
1020 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
1021 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
1024 for (i = 0; i < n_s; i++)
1026 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
1027 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
1028 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
1029 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
1030 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
1031 vbuf_s[i*ZBS+1][2] = 0;
1032 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
1033 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
1034 vbuf_s[i*ZBS+2][2] = 0;
1037 dd_sendrecv_rvec(dd, ddimind, direction,
1041 for (i = 0; i < n_r; i++)
1043 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
1044 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
1045 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
1046 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
1047 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
1048 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
1049 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
1055 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
1056 rvec cell_ns_x0, rvec cell_ns_x1)
1058 int d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
1060 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
1061 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
1062 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
1063 rvec extr_s[2], extr_r[2];
1065 real dist_d, c = 0, det;
1066 gmx_domdec_comm_t *comm;
1067 gmx_bool bPBC, bUse;
1071 for (d = 1; d < dd->ndim; d++)
1074 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
1075 zp->min0 = cell_ns_x0[dim];
1076 zp->max1 = cell_ns_x1[dim];
1077 zp->min1 = cell_ns_x1[dim];
1078 zp->mch0 = cell_ns_x0[dim];
1079 zp->mch1 = cell_ns_x1[dim];
1080 zp->p1_0 = cell_ns_x0[dim];
1081 zp->p1_1 = cell_ns_x1[dim];
1084 for (d = dd->ndim-2; d >= 0; d--)
1087 bPBC = (dim < ddbox->npbcdim);
1089 /* Use an rvec to store two reals */
1090 extr_s[d][0] = comm->cell_f0[d+1];
1091 extr_s[d][1] = comm->cell_f1[d+1];
1092 extr_s[d][2] = comm->cell_f1[d+1];
1095 /* Store the extremes in the backward sending buffer,
1096 * so the get updated separately from the forward communication.
1098 for (d1 = d; d1 < dd->ndim-1; d1++)
1100 /* We invert the order to be able to use the same loop for buf_e */
1101 buf_s[pos].min0 = extr_s[d1][1];
1102 buf_s[pos].max1 = extr_s[d1][0];
1103 buf_s[pos].min1 = extr_s[d1][2];
1104 buf_s[pos].mch0 = 0;
1105 buf_s[pos].mch1 = 0;
1106 /* Store the cell corner of the dimension we communicate along */
1107 buf_s[pos].p1_0 = comm->cell_x0[dim];
1108 buf_s[pos].p1_1 = 0;
1112 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1115 if (dd->ndim == 3 && d == 0)
1117 buf_s[pos] = comm->zone_d2[0][1];
1119 buf_s[pos] = comm->zone_d1[0];
1123 /* We only need to communicate the extremes
1124 * in the forward direction
1126 npulse = comm->cd[d].np;
1129 /* Take the minimum to avoid double communication */
1130 npulse_min = min(npulse, dd->nc[dim]-1-npulse);
1134 /* Without PBC we should really not communicate over
1135 * the boundaries, but implementing that complicates
1136 * the communication setup and therefore we simply
1137 * do all communication, but ignore some data.
1139 npulse_min = npulse;
1141 for (p = 0; p < npulse_min; p++)
1143 /* Communicate the extremes forward */
1144 bUse = (bPBC || dd->ci[dim] > 0);
1146 dd_sendrecv_rvec(dd, d, dddirForward,
1147 extr_s+d, dd->ndim-d-1,
1148 extr_r+d, dd->ndim-d-1);
1152 for (d1 = d; d1 < dd->ndim-1; d1++)
1154 extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]);
1155 extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]);
1156 extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]);
1162 for (p = 0; p < npulse; p++)
1164 /* Communicate all the zone information backward */
1165 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1167 dd_sendrecv_ddzone(dd, d, dddirBackward,
1174 for (d1 = d+1; d1 < dd->ndim; d1++)
1176 /* Determine the decrease of maximum required
1177 * communication height along d1 due to the distance along d,
1178 * this avoids a lot of useless atom communication.
1180 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1182 if (ddbox->tric_dir[dim])
1184 /* c is the off-diagonal coupling between the cell planes
1185 * along directions d and d1.
1187 c = ddbox->v[dim][dd->dim[d1]][dim];
1193 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1196 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1200 /* A negative value signals out of range */
1206 /* Accumulate the extremes over all pulses */
1207 for (i = 0; i < buf_size; i++)
1211 buf_e[i] = buf_r[i];
1217 buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0);
1218 buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1);
1219 buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1);
1222 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1230 if (bUse && dh[d1] >= 0)
1232 buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
1233 buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
1236 /* Copy the received buffer to the send buffer,
1237 * to pass the data through with the next pulse.
1239 buf_s[i] = buf_r[i];
1241 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1242 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1244 /* Store the extremes */
1247 for (d1 = d; d1 < dd->ndim-1; d1++)
1249 extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0);
1250 extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1);
1251 extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1);
1255 if (d == 1 || (d == 0 && dd->ndim == 3))
1257 for (i = d; i < 2; i++)
1259 comm->zone_d2[1-d][i] = buf_e[pos];
1265 comm->zone_d1[1] = buf_e[pos];
1275 for (i = 0; i < 2; i++)
1279 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1281 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1282 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1288 for (i = 0; i < 2; i++)
1290 for (j = 0; j < 2; j++)
1294 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1296 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1297 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1301 for (d = 1; d < dd->ndim; d++)
1303 comm->cell_f_max0[d] = extr_s[d-1][0];
1304 comm->cell_f_min1[d] = extr_s[d-1][1];
1307 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1308 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1313 static void dd_collect_cg(gmx_domdec_t *dd,
1314 t_state *state_local)
1316 gmx_domdec_master_t *ma = NULL;
1317 int buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
1320 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1322 /* The master has the correct distribution */
1326 if (state_local->ddp_count == dd->ddp_count)
1328 ncg_home = dd->ncg_home;
1330 nat_home = dd->nat_home;
1332 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1334 cgs_gl = &dd->comm->cgs_gl;
1336 ncg_home = state_local->ncg_gl;
1337 cg = state_local->cg_gl;
1339 for (i = 0; i < ncg_home; i++)
1341 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1346 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1349 buf2[0] = dd->ncg_home;
1350 buf2[1] = dd->nat_home;
1360 /* Collect the charge group and atom counts on the master */
1361 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1366 for (i = 0; i < dd->nnodes; i++)
1368 ma->ncg[i] = ma->ibuf[2*i];
1369 ma->nat[i] = ma->ibuf[2*i+1];
1370 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1373 /* Make byte counts and indices */
1374 for (i = 0; i < dd->nnodes; i++)
1376 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1377 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1381 fprintf(debug, "Initial charge group distribution: ");
1382 for (i = 0; i < dd->nnodes; i++)
1384 fprintf(debug, " %d", ma->ncg[i]);
1386 fprintf(debug, "\n");
1390 /* Collect the charge group indices on the master */
1392 dd->ncg_home*sizeof(int), dd->index_gl,
1393 DDMASTER(dd) ? ma->ibuf : NULL,
1394 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1395 DDMASTER(dd) ? ma->cg : NULL);
1397 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1400 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1403 gmx_domdec_master_t *ma;
1404 int n, i, c, a, nalloc = 0;
1413 MPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1414 dd->rank, dd->mpi_comm_all);
1419 /* Copy the master coordinates to the global array */
1420 cgs_gl = &dd->comm->cgs_gl;
1422 n = DDMASTERRANK(dd);
1424 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1426 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1428 copy_rvec(lv[a++], v[c]);
1432 for (n = 0; n < dd->nnodes; n++)
1436 if (ma->nat[n] > nalloc)
1438 nalloc = over_alloc_dd(ma->nat[n]);
1439 srenew(buf, nalloc);
1442 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1443 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1446 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1448 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1450 copy_rvec(buf[a++], v[c]);
1459 static void get_commbuffer_counts(gmx_domdec_t *dd,
1460 int **counts, int **disps)
1462 gmx_domdec_master_t *ma;
1467 /* Make the rvec count and displacment arrays */
1469 *disps = ma->ibuf + dd->nnodes;
1470 for (n = 0; n < dd->nnodes; n++)
1472 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1473 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1477 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1480 gmx_domdec_master_t *ma;
1481 int *rcounts = NULL, *disps = NULL;
1490 get_commbuffer_counts(dd, &rcounts, &disps);
1495 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv, rcounts, disps, buf);
1499 cgs_gl = &dd->comm->cgs_gl;
1502 for (n = 0; n < dd->nnodes; n++)
1504 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1506 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1508 copy_rvec(buf[a++], v[c]);
1515 void dd_collect_vec(gmx_domdec_t *dd,
1516 t_state *state_local, rvec *lv, rvec *v)
1518 gmx_domdec_master_t *ma;
1519 int n, i, c, a, nalloc = 0;
1522 dd_collect_cg(dd, state_local);
1524 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1526 dd_collect_vec_sendrecv(dd, lv, v);
1530 dd_collect_vec_gatherv(dd, lv, v);
1535 void dd_collect_state(gmx_domdec_t *dd,
1536 t_state *state_local, t_state *state)
1540 nh = state->nhchainlength;
1544 for (i = 0; i < efptNR; i++)
1546 state->lambda[i] = state_local->lambda[i];
1548 state->fep_state = state_local->fep_state;
1549 state->veta = state_local->veta;
1550 state->vol0 = state_local->vol0;
1551 copy_mat(state_local->box, state->box);
1552 copy_mat(state_local->boxv, state->boxv);
1553 copy_mat(state_local->svir_prev, state->svir_prev);
1554 copy_mat(state_local->fvir_prev, state->fvir_prev);
1555 copy_mat(state_local->pres_prev, state->pres_prev);
1557 for (i = 0; i < state_local->ngtc; i++)
1559 for (j = 0; j < nh; j++)
1561 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1562 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1564 state->therm_integral[i] = state_local->therm_integral[i];
1566 for (i = 0; i < state_local->nnhpres; i++)
1568 for (j = 0; j < nh; j++)
1570 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1571 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1575 for (est = 0; est < estNR; est++)
1577 if (EST_DISTR(est) && (state_local->flags & (1<<est)))
1582 dd_collect_vec(dd, state_local, state_local->x, state->x);
1585 dd_collect_vec(dd, state_local, state_local->v, state->v);
1588 dd_collect_vec(dd, state_local, state_local->sd_X, state->sd_X);
1591 dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
1593 case estDISRE_INITF:
1594 case estDISRE_RM3TAV:
1595 case estORIRE_INITF:
1599 gmx_incons("Unknown state entry encountered in dd_collect_state");
1605 static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
1611 fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
1614 state->nalloc = over_alloc_dd(nalloc);
1616 for (est = 0; est < estNR; est++)
1618 if (EST_DISTR(est) && (state->flags & (1<<est)))
1623 srenew(state->x, state->nalloc);
1626 srenew(state->v, state->nalloc);
1629 srenew(state->sd_X, state->nalloc);
1632 srenew(state->cg_p, state->nalloc);
1634 case estDISRE_INITF:
1635 case estDISRE_RM3TAV:
1636 case estORIRE_INITF:
1638 /* No reallocation required */
1641 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1648 srenew(*f, state->nalloc);
1652 static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
1655 if (nalloc > fr->cg_nalloc)
1659 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
1661 fr->cg_nalloc = over_alloc_dd(nalloc);
1662 srenew(fr->cginfo, fr->cg_nalloc);
1663 if (fr->cutoff_scheme == ecutsGROUP)
1665 srenew(fr->cg_cm, fr->cg_nalloc);
1668 if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
1670 /* We don't use charge groups, we use x in state to set up
1671 * the atom communication.
1673 dd_realloc_state(state, f, nalloc);
1677 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1680 gmx_domdec_master_t *ma;
1681 int n, i, c, a, nalloc = 0;
1688 for (n = 0; n < dd->nnodes; n++)
1692 if (ma->nat[n] > nalloc)
1694 nalloc = over_alloc_dd(ma->nat[n]);
1695 srenew(buf, nalloc);
1697 /* Use lv as a temporary buffer */
1699 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1701 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1703 copy_rvec(v[c], buf[a++]);
1706 if (a != ma->nat[n])
1708 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1713 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1714 DDRANK(dd, n), n, dd->mpi_comm_all);
1719 n = DDMASTERRANK(dd);
1721 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1723 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1725 copy_rvec(v[c], lv[a++]);
1732 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1733 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1738 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1741 gmx_domdec_master_t *ma;
1742 int *scounts = NULL, *disps = NULL;
1743 int n, i, c, a, nalloc = 0;
1750 get_commbuffer_counts(dd, &scounts, &disps);
1754 for (n = 0; n < dd->nnodes; n++)
1756 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1758 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1760 copy_rvec(v[c], buf[a++]);
1766 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1769 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
1771 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1773 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1777 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1781 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1784 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1785 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1786 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1788 if (dfhist->nlambda > 0)
1790 int nlam = dfhist->nlambda;
1791 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1792 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1793 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1794 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1795 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1796 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1798 for (i = 0; i < nlam; i++)
1800 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1801 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1802 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1803 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1804 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1805 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1810 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1811 t_state *state, t_state *state_local,
1816 nh = state->nhchainlength;
1820 for (i = 0; i < efptNR; i++)
1822 state_local->lambda[i] = state->lambda[i];
1824 state_local->fep_state = state->fep_state;
1825 state_local->veta = state->veta;
1826 state_local->vol0 = state->vol0;
1827 copy_mat(state->box, state_local->box);
1828 copy_mat(state->box_rel, state_local->box_rel);
1829 copy_mat(state->boxv, state_local->boxv);
1830 copy_mat(state->svir_prev, state_local->svir_prev);
1831 copy_mat(state->fvir_prev, state_local->fvir_prev);
1832 copy_df_history(&state_local->dfhist, &state->dfhist);
1833 for (i = 0; i < state_local->ngtc; i++)
1835 for (j = 0; j < nh; j++)
1837 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1838 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1840 state_local->therm_integral[i] = state->therm_integral[i];
1842 for (i = 0; i < state_local->nnhpres; i++)
1844 for (j = 0; j < nh; j++)
1846 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1847 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1851 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
1852 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1853 dd_bcast(dd, sizeof(real), &state_local->veta);
1854 dd_bcast(dd, sizeof(real), &state_local->vol0);
1855 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1856 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1857 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1858 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1859 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1860 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
1861 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
1862 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
1863 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
1864 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
1866 /* communicate df_history -- required for restarting from checkpoint */
1867 dd_distribute_dfhist(dd, &state_local->dfhist);
1869 if (dd->nat_home > state_local->nalloc)
1871 dd_realloc_state(state_local, f, dd->nat_home);
1873 for (i = 0; i < estNR; i++)
1875 if (EST_DISTR(i) && (state_local->flags & (1<<i)))
1880 dd_distribute_vec(dd, cgs, state->x, state_local->x);
1883 dd_distribute_vec(dd, cgs, state->v, state_local->v);
1886 dd_distribute_vec(dd, cgs, state->sd_X, state_local->sd_X);
1889 dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
1891 case estDISRE_INITF:
1892 case estDISRE_RM3TAV:
1893 case estORIRE_INITF:
1895 /* Not implemented yet */
1898 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1904 static char dim2char(int dim)
1910 case XX: c = 'X'; break;
1911 case YY: c = 'Y'; break;
1912 case ZZ: c = 'Z'; break;
1913 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1919 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1920 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1922 rvec grid_s[2], *grid_r = NULL, cx, r;
1923 char fname[STRLEN], format[STRLEN], buf[22];
1925 int a, i, d, z, y, x;
1929 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1930 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1934 snew(grid_r, 2*dd->nnodes);
1937 dd_gather(dd, 2*sizeof(rvec), grid_s[0], DDMASTER(dd) ? grid_r[0] : NULL);
1941 for (d = 0; d < DIM; d++)
1943 for (i = 0; i < DIM; i++)
1951 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1953 tric[d][i] = box[i][d]/box[i][i];
1962 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1963 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
1964 out = gmx_fio_fopen(fname, "w");
1965 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1967 for (i = 0; i < dd->nnodes; i++)
1969 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1970 for (d = 0; d < DIM; d++)
1972 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1974 for (z = 0; z < 2; z++)
1976 for (y = 0; y < 2; y++)
1978 for (x = 0; x < 2; x++)
1980 cx[XX] = grid_r[i*2+x][XX];
1981 cx[YY] = grid_r[i*2+y][YY];
1982 cx[ZZ] = grid_r[i*2+z][ZZ];
1984 fprintf(out, format, "ATOM", a++, "CA", "GLY", ' ', 1+i,
1985 ' ', 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol);
1989 for (d = 0; d < DIM; d++)
1991 for (x = 0; x < 4; x++)
1995 case 0: y = 1 + i*8 + 2*x; break;
1996 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1997 case 2: y = 1 + i*8 + x; break;
1999 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
2003 gmx_fio_fclose(out);
2008 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
2009 gmx_mtop_t *mtop, t_commrec *cr,
2010 int natoms, rvec x[], matrix box)
2012 char fname[STRLEN], format[STRLEN], format4[STRLEN], buf[22];
2014 int i, ii, resnr, c;
2015 char *atomname, *resname;
2022 natoms = dd->comm->nat[ddnatVSITE];
2025 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
2027 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
2028 sprintf(format4, "%s%s\n", get_pdbformat4(), "%6.2f%6.2f");
2030 out = gmx_fio_fopen(fname, "w");
2032 fprintf(out, "TITLE %s\n", title);
2033 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2034 for (i = 0; i < natoms; i++)
2036 ii = dd->gatindex[i];
2037 gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
2038 if (i < dd->comm->nat[ddnatZONE])
2041 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
2047 else if (i < dd->comm->nat[ddnatVSITE])
2049 b = dd->comm->zones.n;
2053 b = dd->comm->zones.n + 1;
2055 fprintf(out, strlen(atomname) < 4 ? format : format4,
2056 "ATOM", (ii+1)%100000,
2057 atomname, resname, ' ', resnr%10000, ' ',
2058 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b);
2060 fprintf(out, "TER\n");
2062 gmx_fio_fclose(out);
2065 real dd_cutoff_mbody(gmx_domdec_t *dd)
2067 gmx_domdec_comm_t *comm;
2074 if (comm->bInterCGBondeds)
2076 if (comm->cutoff_mbody > 0)
2078 r = comm->cutoff_mbody;
2082 /* cutoff_mbody=0 means we do not have DLB */
2083 r = comm->cellsize_min[dd->dim[0]];
2084 for (di = 1; di < dd->ndim; di++)
2086 r = min(r, comm->cellsize_min[dd->dim[di]]);
2088 if (comm->bBondComm)
2090 r = max(r, comm->cutoff_mbody);
2094 r = min(r, comm->cutoff);
2102 real dd_cutoff_twobody(gmx_domdec_t *dd)
2106 r_mb = dd_cutoff_mbody(dd);
2108 return max(dd->comm->cutoff, r_mb);
2112 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd, ivec coord, ivec coord_pme)
2116 nc = dd->nc[dd->comm->cartpmedim];
2117 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2118 copy_ivec(coord, coord_pme);
2119 coord_pme[dd->comm->cartpmedim] =
2120 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2123 static int low_ddindex2pmeindex(int ndd, int npme, int ddindex)
2125 /* Here we assign a PME node to communicate with this DD node
2126 * by assuming that the major index of both is x.
2127 * We add cr->npmenodes/2 to obtain an even distribution.
2129 return (ddindex*npme + npme/2)/ndd;
2132 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
2134 return low_ddindex2pmeindex(dd->nnodes, dd->comm->npmenodes, ddindex);
2137 static int cr_ddindex2pmeindex(const t_commrec *cr, int ddindex)
2139 return low_ddindex2pmeindex(cr->dd->nnodes, cr->npmenodes, ddindex);
2142 static int *dd_pmenodes(t_commrec *cr)
2147 snew(pmenodes, cr->npmenodes);
2149 for (i = 0; i < cr->dd->nnodes; i++)
2151 p0 = cr_ddindex2pmeindex(cr, i);
2152 p1 = cr_ddindex2pmeindex(cr, i+1);
2153 if (i+1 == cr->dd->nnodes || p1 > p0)
2157 fprintf(debug, "pmenode[%d] = %d\n", n, i+1+n);
2159 pmenodes[n] = i + 1 + n;
2167 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
2170 ivec coords, coords_pme, nc;
2175 if (dd->comm->bCartesian) {
2176 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2177 dd_coords2pmecoords(dd,coords,coords_pme);
2178 copy_ivec(dd->ntot,nc);
2179 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2180 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2182 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2184 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2190 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
2195 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
2197 gmx_domdec_comm_t *comm;
2199 int ddindex, nodeid = -1;
2201 comm = cr->dd->comm;
2206 if (comm->bCartesianPP_PME)
2209 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
2214 ddindex = dd_index(cr->dd->nc, coords);
2215 if (comm->bCartesianPP)
2217 nodeid = comm->ddindex2simnodeid[ddindex];
2223 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
2235 static int dd_simnode2pmenode(t_commrec *cr, int sim_nodeid)
2238 gmx_domdec_comm_t *comm;
2239 ivec coord, coord_pme;
2246 /* This assumes a uniform x domain decomposition grid cell size */
2247 if (comm->bCartesianPP_PME)
2250 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
2251 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2253 /* This is a PP node */
2254 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2255 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
2259 else if (comm->bCartesianPP)
2261 if (sim_nodeid < dd->nnodes)
2263 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2268 /* This assumes DD cells with identical x coordinates
2269 * are numbered sequentially.
2271 if (dd->comm->pmenodes == NULL)
2273 if (sim_nodeid < dd->nnodes)
2275 /* The DD index equals the nodeid */
2276 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2282 while (sim_nodeid > dd->comm->pmenodes[i])
2286 if (sim_nodeid < dd->comm->pmenodes[i])
2288 pmenode = dd->comm->pmenodes[i];
2296 void get_pme_nnodes(const gmx_domdec_t *dd,
2297 int *npmenodes_x, int *npmenodes_y)
2301 *npmenodes_x = dd->comm->npmenodes_x;
2302 *npmenodes_y = dd->comm->npmenodes_y;
2311 gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
2313 gmx_bool bPMEOnlyNode;
2315 if (DOMAINDECOMP(cr))
2317 bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
2321 bPMEOnlyNode = FALSE;
2324 return bPMEOnlyNode;
2327 void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
2328 int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
2332 ivec coord, coord_pme;
2336 snew(*my_ddnodes, (dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2339 for (x = 0; x < dd->nc[XX]; x++)
2341 for (y = 0; y < dd->nc[YY]; y++)
2343 for (z = 0; z < dd->nc[ZZ]; z++)
2345 if (dd->comm->bCartesianPP_PME)
2350 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2351 if (dd->ci[XX] == coord_pme[XX] &&
2352 dd->ci[YY] == coord_pme[YY] &&
2353 dd->ci[ZZ] == coord_pme[ZZ])
2355 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2360 /* The slab corresponds to the nodeid in the PME group */
2361 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2363 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2370 /* The last PP-only node is the peer node */
2371 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2375 fprintf(debug, "Receive coordinates from PP nodes:");
2376 for (x = 0; x < *nmy_ddnodes; x++)
2378 fprintf(debug, " %d", (*my_ddnodes)[x]);
2380 fprintf(debug, "\n");
2384 static gmx_bool receive_vir_ener(t_commrec *cr)
2386 gmx_domdec_comm_t *comm;
2387 int pmenode, coords[DIM], rank;
2391 if (cr->npmenodes < cr->dd->nnodes)
2393 comm = cr->dd->comm;
2394 if (comm->bCartesianPP_PME)
2396 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2398 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2399 coords[comm->cartpmedim]++;
2400 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2402 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2403 if (dd_simnode2pmenode(cr, rank) == pmenode)
2405 /* This is not the last PP node for pmenode */
2413 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2414 if (cr->sim_nodeid+1 < cr->nnodes &&
2415 dd_simnode2pmenode(cr, cr->sim_nodeid+1) == pmenode)
2417 /* This is not the last PP node for pmenode */
2426 static void set_zones_ncg_home(gmx_domdec_t *dd)
2428 gmx_domdec_zones_t *zones;
2431 zones = &dd->comm->zones;
2433 zones->cg_range[0] = 0;
2434 for (i = 1; i < zones->n+1; i++)
2436 zones->cg_range[i] = dd->ncg_home;
2438 /* zone_ncg1[0] should always be equal to ncg_home */
2439 dd->comm->zone_ncg1[0] = dd->ncg_home;
2442 static void rebuild_cgindex(gmx_domdec_t *dd,
2443 const int *gcgs_index, t_state *state)
2445 int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
2448 dd_cg_gl = dd->index_gl;
2449 cgindex = dd->cgindex;
2452 for (i = 0; i < state->ncg_gl; i++)
2456 dd_cg_gl[i] = cg_gl;
2457 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2461 dd->ncg_home = state->ncg_gl;
2464 set_zones_ncg_home(dd);
2467 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2469 while (cg >= cginfo_mb->cg_end)
2474 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2477 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2478 t_forcerec *fr, char *bLocalCG)
2480 cginfo_mb_t *cginfo_mb;
2486 cginfo_mb = fr->cginfo_mb;
2487 cginfo = fr->cginfo;
2489 for (cg = cg0; cg < cg1; cg++)
2491 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2495 if (bLocalCG != NULL)
2497 for (cg = cg0; cg < cg1; cg++)
2499 bLocalCG[index_gl[cg]] = TRUE;
2504 static void make_dd_indices(gmx_domdec_t *dd,
2505 const int *gcgs_index, int cg_start)
2507 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2508 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2513 bLocalCG = dd->comm->bLocalCG;
2515 if (dd->nat_tot > dd->gatindex_nalloc)
2517 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2518 srenew(dd->gatindex, dd->gatindex_nalloc);
2521 nzone = dd->comm->zones.n;
2522 zone2cg = dd->comm->zones.cg_range;
2523 zone_ncg1 = dd->comm->zone_ncg1;
2524 index_gl = dd->index_gl;
2525 gatindex = dd->gatindex;
2526 bCGs = dd->comm->bCGs;
2528 if (zone2cg[1] != dd->ncg_home)
2530 gmx_incons("dd->ncg_zone is not up to date");
2533 /* Make the local to global and global to local atom index */
2534 a = dd->cgindex[cg_start];
2535 for (zone = 0; zone < nzone; zone++)
2543 cg0 = zone2cg[zone];
2545 cg1 = zone2cg[zone+1];
2546 cg1_p1 = cg0 + zone_ncg1[zone];
2548 for (cg = cg0; cg < cg1; cg++)
2553 /* Signal that this cg is from more than one pulse away */
2556 cg_gl = index_gl[cg];
2559 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2562 ga2la_set(dd->ga2la, a_gl, a, zone1);
2568 gatindex[a] = cg_gl;
2569 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2576 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2579 int ncg, i, ngl, nerr;
2582 if (bLocalCG == NULL)
2586 for (i = 0; i < dd->ncg_tot; i++)
2588 if (!bLocalCG[dd->index_gl[i]])
2591 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2596 for (i = 0; i < ncg_sys; i++)
2603 if (ngl != dd->ncg_tot)
2605 fprintf(stderr, "DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2612 static void check_index_consistency(gmx_domdec_t *dd,
2613 int natoms_sys, int ncg_sys,
2616 int nerr, ngl, i, a, cell;
2621 if (dd->comm->DD_debug > 1)
2623 snew(have, natoms_sys);
2624 for (a = 0; a < dd->nat_tot; a++)
2626 if (have[dd->gatindex[a]] > 0)
2628 fprintf(stderr, "DD node %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2632 have[dd->gatindex[a]] = a + 1;
2638 snew(have, dd->nat_tot);
2641 for (i = 0; i < natoms_sys; i++)
2643 if (ga2la_get(dd->ga2la, i, &a, &cell))
2645 if (a >= dd->nat_tot)
2647 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2653 if (dd->gatindex[a] != i)
2655 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2662 if (ngl != dd->nat_tot)
2665 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2666 dd->rank, where, ngl, dd->nat_tot);
2668 for (a = 0; a < dd->nat_tot; a++)
2673 "DD node %d, %s: local atom %d, global %d has no global index\n",
2674 dd->rank, where, a+1, dd->gatindex[a]+1);
2679 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2683 gmx_fatal(FARGS, "DD node %d, %s: %d atom/cg index inconsistencies",
2684 dd->rank, where, nerr);
2688 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2695 /* Clear the whole list without searching */
2696 ga2la_clear(dd->ga2la);
2700 for (i = a_start; i < dd->nat_tot; i++)
2702 ga2la_del(dd->ga2la, dd->gatindex[i]);
2706 bLocalCG = dd->comm->bLocalCG;
2709 for (i = cg_start; i < dd->ncg_tot; i++)
2711 bLocalCG[dd->index_gl[i]] = FALSE;
2715 dd_clear_local_vsite_indices(dd);
2717 if (dd->constraints)
2719 dd_clear_local_constraint_indices(dd);
2723 /* This function should be used for moving the domain boudaries during DLB,
2724 * for obtaining the minimum cell size. It checks the initially set limit
2725 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2726 * and, possibly, a longer cut-off limit set for PME load balancing.
2728 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2732 cellsize_min = comm->cellsize_min[dim];
2734 if (!comm->bVacDLBNoLimit)
2736 /* The cut-off might have changed, e.g. by PME load balacning,
2737 * from the value used to set comm->cellsize_min, so check it.
2739 cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2741 if (comm->bPMELoadBalDLBLimits)
2743 /* Check for the cut-off limit set by the PME load balancing */
2744 cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2748 return cellsize_min;
2751 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2754 real grid_jump_limit;
2756 /* The distance between the boundaries of cells at distance
2757 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2758 * and by the fact that cells should not be shifted by more than
2759 * half their size, such that cg's only shift by one cell
2760 * at redecomposition.
2762 grid_jump_limit = comm->cellsize_limit;
2763 if (!comm->bVacDLBNoLimit)
2765 if (comm->bPMELoadBalDLBLimits)
2767 cutoff = max(cutoff, comm->PMELoadBal_max_cutoff);
2769 grid_jump_limit = max(grid_jump_limit,
2770 cutoff/comm->cd[dim_ind].np);
2773 return grid_jump_limit;
2776 static gmx_bool check_grid_jump(gmx_int64_t step,
2782 gmx_domdec_comm_t *comm;
2791 for (d = 1; d < dd->ndim; d++)
2794 limit = grid_jump_limit(comm, cutoff, d);
2795 bfac = ddbox->box_size[dim];
2796 if (ddbox->tric_dir[dim])
2798 bfac *= ddbox->skew_fac[dim];
2800 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2801 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2809 /* This error should never be triggered under normal
2810 * circumstances, but you never know ...
2812 gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with less nodes might avoid this issue.",
2813 gmx_step_str(step, buf),
2814 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2822 static int dd_load_count(gmx_domdec_comm_t *comm)
2824 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2827 static float dd_force_load(gmx_domdec_comm_t *comm)
2834 if (comm->eFlop > 1)
2836 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2841 load = comm->cycl[ddCyclF];
2842 if (comm->cycl_n[ddCyclF] > 1)
2844 /* Subtract the maximum of the last n cycle counts
2845 * to get rid of possible high counts due to other sources,
2846 * for instance system activity, that would otherwise
2847 * affect the dynamic load balancing.
2849 load -= comm->cycl_max[ddCyclF];
2853 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2855 float gpu_wait, gpu_wait_sum;
2857 gpu_wait = comm->cycl[ddCyclWaitGPU];
2858 if (comm->cycl_n[ddCyclF] > 1)
2860 /* We should remove the WaitGPU time of the same MD step
2861 * as the one with the maximum F time, since the F time
2862 * and the wait time are not independent.
2863 * Furthermore, the step for the max F time should be chosen
2864 * the same on all ranks that share the same GPU.
2865 * But to keep the code simple, we remove the average instead.
2866 * The main reason for artificially long times at some steps
2867 * is spurious CPU activity or MPI time, so we don't expect
2868 * that changes in the GPU wait time matter a lot here.
2870 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2872 /* Sum the wait times over the ranks that share the same GPU */
2873 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2874 comm->mpi_comm_gpu_shared);
2875 /* Replace the wait time by the average over the ranks */
2876 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2884 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2886 gmx_domdec_comm_t *comm;
2891 snew(*dim_f, dd->nc[dim]+1);
2893 for (i = 1; i < dd->nc[dim]; i++)
2895 if (comm->slb_frac[dim])
2897 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2901 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2904 (*dim_f)[dd->nc[dim]] = 1;
2907 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2909 int pmeindex, slab, nso, i;
2912 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2918 ddpme->dim = dimind;
2920 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2922 ddpme->nslab = (ddpme->dim == 0 ?
2923 dd->comm->npmenodes_x :
2924 dd->comm->npmenodes_y);
2926 if (ddpme->nslab <= 1)
2931 nso = dd->comm->npmenodes/ddpme->nslab;
2932 /* Determine for each PME slab the PP location range for dimension dim */
2933 snew(ddpme->pp_min, ddpme->nslab);
2934 snew(ddpme->pp_max, ddpme->nslab);
2935 for (slab = 0; slab < ddpme->nslab; slab++)
2937 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2938 ddpme->pp_max[slab] = 0;
2940 for (i = 0; i < dd->nnodes; i++)
2942 ddindex2xyz(dd->nc, i, xyz);
2943 /* For y only use our y/z slab.
2944 * This assumes that the PME x grid size matches the DD grid size.
2946 if (dimind == 0 || xyz[XX] == dd->ci[XX])
2948 pmeindex = ddindex2pmeindex(dd, i);
2951 slab = pmeindex/nso;
2955 slab = pmeindex % ddpme->nslab;
2957 ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]);
2958 ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]);
2962 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
2965 int dd_pme_maxshift_x(gmx_domdec_t *dd)
2967 if (dd->comm->ddpme[0].dim == XX)
2969 return dd->comm->ddpme[0].maxshift;
2977 int dd_pme_maxshift_y(gmx_domdec_t *dd)
2979 if (dd->comm->ddpme[0].dim == YY)
2981 return dd->comm->ddpme[0].maxshift;
2983 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
2985 return dd->comm->ddpme[1].maxshift;
2993 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
2994 gmx_bool bUniform, gmx_ddbox_t *ddbox, real *cell_f)
2996 gmx_domdec_comm_t *comm;
2999 real range, pme_boundary;
3003 nc = dd->nc[ddpme->dim];
3006 if (!ddpme->dim_match)
3008 /* PP decomposition is not along dim: the worst situation */
3011 else if (ns <= 3 || (bUniform && ns == nc))
3013 /* The optimal situation */
3018 /* We need to check for all pme nodes which nodes they
3019 * could possibly need to communicate with.
3021 xmin = ddpme->pp_min;
3022 xmax = ddpme->pp_max;
3023 /* Allow for atoms to be maximally 2/3 times the cut-off
3024 * out of their DD cell. This is a reasonable balance between
3025 * between performance and support for most charge-group/cut-off
3028 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
3029 /* Avoid extra communication when we are exactly at a boundary */
3033 for (s = 0; s < ns; s++)
3035 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
3036 pme_boundary = (real)s/ns;
3039 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
3041 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
3045 pme_boundary = (real)(s+1)/ns;
3048 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
3050 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
3057 ddpme->maxshift = sh;
3061 fprintf(debug, "PME slab communication range for dim %d is %d\n",
3062 ddpme->dim, ddpme->maxshift);
3066 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3070 for (d = 0; d < dd->ndim; d++)
3073 if (dim < ddbox->nboundeddim &&
3074 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
3075 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
3077 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
3078 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3079 dd->nc[dim], dd->comm->cellsize_limit);
3084 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
3085 gmx_bool bMaster, ivec npulse)
3087 gmx_domdec_comm_t *comm;
3090 real *cell_x, cell_dx, cellsize;
3094 for (d = 0; d < DIM; d++)
3096 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
3098 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
3101 cell_dx = ddbox->box_size[d]/dd->nc[d];
3104 for (j = 0; j < dd->nc[d]+1; j++)
3106 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
3111 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
3112 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
3114 cellsize = cell_dx*ddbox->skew_fac[d];
3115 while (cellsize*npulse[d] < comm->cutoff && npulse[d] < dd->nc[d]-1)
3119 cellsize_min[d] = cellsize;
3123 /* Statically load balanced grid */
3124 /* Also when we are not doing a master distribution we determine
3125 * all cell borders in a loop to obtain identical values
3126 * to the master distribution case and to determine npulse.
3130 cell_x = dd->ma->cell_x[d];
3134 snew(cell_x, dd->nc[d]+1);
3136 cell_x[0] = ddbox->box0[d];
3137 for (j = 0; j < dd->nc[d]; j++)
3139 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
3140 cell_x[j+1] = cell_x[j] + cell_dx;
3141 cellsize = cell_dx*ddbox->skew_fac[d];
3142 while (cellsize*npulse[d] < comm->cutoff &&
3143 npulse[d] < dd->nc[d]-1)
3147 cellsize_min[d] = min(cellsize_min[d], cellsize);
3151 comm->cell_x0[d] = cell_x[dd->ci[d]];
3152 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
3156 /* The following limitation is to avoid that a cell would receive
3157 * some of its own home charge groups back over the periodic boundary.
3158 * Double charge groups cause trouble with the global indices.
3160 if (d < ddbox->npbcdim &&
3161 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
3163 gmx_fatal_collective(FARGS, NULL, dd,
3164 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3165 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
3167 dd->nc[d], dd->nc[d],
3168 dd->nnodes > dd->nc[d] ? "cells" : "processors");
3172 if (!comm->bDynLoadBal)
3174 copy_rvec(cellsize_min, comm->cellsize_min);
3177 for (d = 0; d < comm->npmedecompdim; d++)
3179 set_pme_maxshift(dd, &comm->ddpme[d],
3180 comm->slb_frac[dd->dim[d]] == NULL, ddbox,
3181 comm->ddpme[d].slb_dim_f);
3186 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
3187 int d, int dim, gmx_domdec_root_t *root,
3189 gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
3191 gmx_domdec_comm_t *comm;
3192 int ncd, i, j, nmin, nmin_old;
3193 gmx_bool bLimLo, bLimHi;
3195 real fac, halfway, cellsize_limit_f_i, region_size;
3196 gmx_bool bPBC, bLastHi = FALSE;
3197 int nrange[] = {range[0], range[1]};
3199 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
3205 bPBC = (dim < ddbox->npbcdim);
3207 cell_size = root->buf_ncd;
3211 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
3214 /* First we need to check if the scaling does not make cells
3215 * smaller than the smallest allowed size.
3216 * We need to do this iteratively, since if a cell is too small,
3217 * it needs to be enlarged, which makes all the other cells smaller,
3218 * which could in turn make another cell smaller than allowed.
3220 for (i = range[0]; i < range[1]; i++)
3222 root->bCellMin[i] = FALSE;
3228 /* We need the total for normalization */
3230 for (i = range[0]; i < range[1]; i++)
3232 if (root->bCellMin[i] == FALSE)
3234 fac += cell_size[i];
3237 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3238 /* Determine the cell boundaries */
3239 for (i = range[0]; i < range[1]; i++)
3241 if (root->bCellMin[i] == FALSE)
3243 cell_size[i] *= fac;
3244 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3246 cellsize_limit_f_i = 0;
3250 cellsize_limit_f_i = cellsize_limit_f;
3252 if (cell_size[i] < cellsize_limit_f_i)
3254 root->bCellMin[i] = TRUE;
3255 cell_size[i] = cellsize_limit_f_i;
3259 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3262 while (nmin > nmin_old);
3265 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3266 /* For this check we should not use DD_CELL_MARGIN,
3267 * but a slightly smaller factor,
3268 * since rounding could get use below the limit.
3270 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3273 gmx_fatal(FARGS, "Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3274 gmx_step_str(step, buf),
3275 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3276 ncd, comm->cellsize_min[dim]);
3279 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
3283 /* Check if the boundary did not displace more than halfway
3284 * each of the cells it bounds, as this could cause problems,
3285 * especially when the differences between cell sizes are large.
3286 * If changes are applied, they will not make cells smaller
3287 * than the cut-off, as we check all the boundaries which
3288 * might be affected by a change and if the old state was ok,
3289 * the cells will at most be shrunk back to their old size.
3291 for (i = range[0]+1; i < range[1]; i++)
3293 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3294 if (root->cell_f[i] < halfway)
3296 root->cell_f[i] = halfway;
3297 /* Check if the change also causes shifts of the next boundaries */
3298 for (j = i+1; j < range[1]; j++)
3300 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3302 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3306 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3307 if (root->cell_f[i] > halfway)
3309 root->cell_f[i] = halfway;
3310 /* Check if the change also causes shifts of the next boundaries */
3311 for (j = i-1; j >= range[0]+1; j--)
3313 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3315 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3322 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3323 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3324 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3325 * for a and b nrange is used */
3328 /* Take care of the staggering of the cell boundaries */
3331 for (i = range[0]; i < range[1]; i++)
3333 root->cell_f_max0[i] = root->cell_f[i];
3334 root->cell_f_min1[i] = root->cell_f[i+1];
3339 for (i = range[0]+1; i < range[1]; i++)
3341 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3342 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3343 if (bLimLo && bLimHi)
3345 /* Both limits violated, try the best we can */
3346 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3347 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3348 nrange[0] = range[0];
3350 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3353 nrange[1] = range[1];
3354 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3360 /* root->cell_f[i] = root->bound_min[i]; */
3361 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3364 else if (bLimHi && !bLastHi)
3367 if (nrange[1] < range[1]) /* found a LimLo before */
3369 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3370 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3371 nrange[0] = nrange[1];
3373 root->cell_f[i] = root->bound_max[i];
3375 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3377 nrange[1] = range[1];
3380 if (nrange[1] < range[1]) /* found last a LimLo */
3382 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3383 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3384 nrange[0] = nrange[1];
3385 nrange[1] = range[1];
3386 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3388 else if (nrange[0] > range[0]) /* found at least one LimHi */
3390 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3397 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3398 int d, int dim, gmx_domdec_root_t *root,
3399 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3400 gmx_bool bUniform, gmx_int64_t step)
3402 gmx_domdec_comm_t *comm;
3403 int ncd, d1, i, j, pos;
3405 real load_aver, load_i, imbalance, change, change_max, sc;
3406 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3410 int range[] = { 0, 0 };
3414 /* Convert the maximum change from the input percentage to a fraction */
3415 change_limit = comm->dlb_scale_lim*0.01;
3419 bPBC = (dim < ddbox->npbcdim);
3421 cell_size = root->buf_ncd;
3423 /* Store the original boundaries */
3424 for (i = 0; i < ncd+1; i++)
3426 root->old_cell_f[i] = root->cell_f[i];
3430 for (i = 0; i < ncd; i++)
3432 cell_size[i] = 1.0/ncd;
3435 else if (dd_load_count(comm))
3437 load_aver = comm->load[d].sum_m/ncd;
3439 for (i = 0; i < ncd; i++)
3441 /* Determine the relative imbalance of cell i */
3442 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3443 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3444 /* Determine the change of the cell size using underrelaxation */
3445 change = -relax*imbalance;
3446 change_max = max(change_max, max(change, -change));
3448 /* Limit the amount of scaling.
3449 * We need to use the same rescaling for all cells in one row,
3450 * otherwise the load balancing might not converge.
3453 if (change_max > change_limit)
3455 sc *= change_limit/change_max;
3457 for (i = 0; i < ncd; i++)
3459 /* Determine the relative imbalance of cell i */
3460 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3461 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3462 /* Determine the change of the cell size using underrelaxation */
3463 change = -sc*imbalance;
3464 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3468 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3469 cellsize_limit_f *= DD_CELL_MARGIN;
3470 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3471 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3472 if (ddbox->tric_dir[dim])
3474 cellsize_limit_f /= ddbox->skew_fac[dim];
3475 dist_min_f /= ddbox->skew_fac[dim];
3477 if (bDynamicBox && d > 0)
3479 dist_min_f *= DD_PRES_SCALE_MARGIN;
3481 if (d > 0 && !bUniform)
3483 /* Make sure that the grid is not shifted too much */
3484 for (i = 1; i < ncd; i++)
3486 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3488 gmx_incons("Inconsistent DD boundary staggering limits!");
3490 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3491 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3494 root->bound_min[i] += 0.5*space;
3496 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3497 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3500 root->bound_max[i] += 0.5*space;
3505 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3507 root->cell_f_max0[i-1] + dist_min_f,
3508 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3509 root->cell_f_min1[i] - dist_min_f);
3514 root->cell_f[0] = 0;
3515 root->cell_f[ncd] = 1;
3516 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3519 /* After the checks above, the cells should obey the cut-off
3520 * restrictions, but it does not hurt to check.
3522 for (i = 0; i < ncd; i++)
3526 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3527 dim, i, root->cell_f[i], root->cell_f[i+1]);
3530 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3531 root->cell_f[i+1] - root->cell_f[i] <
3532 cellsize_limit_f/DD_CELL_MARGIN)
3536 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3537 gmx_step_str(step, buf), dim2char(dim), i,
3538 (root->cell_f[i+1] - root->cell_f[i])
3539 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3544 /* Store the cell boundaries of the lower dimensions at the end */
3545 for (d1 = 0; d1 < d; d1++)
3547 root->cell_f[pos++] = comm->cell_f0[d1];
3548 root->cell_f[pos++] = comm->cell_f1[d1];
3551 if (d < comm->npmedecompdim)
3553 /* The master determines the maximum shift for
3554 * the coordinate communication between separate PME nodes.
3556 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3558 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3561 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3565 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3566 gmx_ddbox_t *ddbox, int dimind)
3568 gmx_domdec_comm_t *comm;
3573 /* Set the cell dimensions */
3574 dim = dd->dim[dimind];
3575 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3576 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3577 if (dim >= ddbox->nboundeddim)
3579 comm->cell_x0[dim] += ddbox->box0[dim];
3580 comm->cell_x1[dim] += ddbox->box0[dim];
3584 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3585 int d, int dim, real *cell_f_row,
3588 gmx_domdec_comm_t *comm;
3594 /* Each node would only need to know two fractions,
3595 * but it is probably cheaper to broadcast the whole array.
3597 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3598 0, comm->mpi_comm_load[d]);
3600 /* Copy the fractions for this dimension from the buffer */
3601 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3602 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3603 /* The whole array was communicated, so set the buffer position */
3604 pos = dd->nc[dim] + 1;
3605 for (d1 = 0; d1 <= d; d1++)
3609 /* Copy the cell fractions of the lower dimensions */
3610 comm->cell_f0[d1] = cell_f_row[pos++];
3611 comm->cell_f1[d1] = cell_f_row[pos++];
3613 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3615 /* Convert the communicated shift from float to int */
3616 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3619 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3623 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3624 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3625 gmx_bool bUniform, gmx_int64_t step)
3627 gmx_domdec_comm_t *comm;
3629 gmx_bool bRowMember, bRowRoot;
3634 for (d = 0; d < dd->ndim; d++)
3639 for (d1 = d; d1 < dd->ndim; d1++)
3641 if (dd->ci[dd->dim[d1]] > 0)
3654 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3655 ddbox, bDynamicBox, bUniform, step);
3656 cell_f_row = comm->root[d]->cell_f;
3660 cell_f_row = comm->cell_f_row;
3662 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3667 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3671 /* This function assumes the box is static and should therefore
3672 * not be called when the box has changed since the last
3673 * call to dd_partition_system.
3675 for (d = 0; d < dd->ndim; d++)
3677 relative_to_absolute_cell_bounds(dd, ddbox, d);
3683 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3684 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3685 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3686 gmx_wallcycle_t wcycle)
3688 gmx_domdec_comm_t *comm;
3695 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3696 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3697 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3699 else if (bDynamicBox)
3701 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3704 /* Set the dimensions for which no DD is used */
3705 for (dim = 0; dim < DIM; dim++)
3707 if (dd->nc[dim] == 1)
3709 comm->cell_x0[dim] = 0;
3710 comm->cell_x1[dim] = ddbox->box_size[dim];
3711 if (dim >= ddbox->nboundeddim)
3713 comm->cell_x0[dim] += ddbox->box0[dim];
3714 comm->cell_x1[dim] += ddbox->box0[dim];
3720 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3723 gmx_domdec_comm_dim_t *cd;
3725 for (d = 0; d < dd->ndim; d++)
3727 cd = &dd->comm->cd[d];
3728 np = npulse[dd->dim[d]];
3729 if (np > cd->np_nalloc)
3733 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3734 dim2char(dd->dim[d]), np);
3736 if (DDMASTER(dd) && cd->np_nalloc > 0)
3738 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3740 srenew(cd->ind, np);
3741 for (i = cd->np_nalloc; i < np; i++)
3743 cd->ind[i].index = NULL;
3744 cd->ind[i].nalloc = 0;
3753 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3754 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3755 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3756 gmx_wallcycle_t wcycle)
3758 gmx_domdec_comm_t *comm;
3764 /* Copy the old cell boundaries for the cg displacement check */
3765 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3766 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3768 if (comm->bDynLoadBal)
3772 check_box_size(dd, ddbox);
3774 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3778 set_dd_cell_sizes_slb(dd, ddbox, FALSE, npulse);
3779 realloc_comm_ind(dd, npulse);
3784 for (d = 0; d < DIM; d++)
3786 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3787 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3792 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3794 rvec cell_ns_x0, rvec cell_ns_x1,
3797 gmx_domdec_comm_t *comm;
3802 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3804 dim = dd->dim[dim_ind];
3806 /* Without PBC we don't have restrictions on the outer cells */
3807 if (!(dim >= ddbox->npbcdim &&
3808 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3809 comm->bDynLoadBal &&
3810 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3811 comm->cellsize_min[dim])
3814 gmx_fatal(FARGS, "Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3815 gmx_step_str(step, buf), dim2char(dim),
3816 comm->cell_x1[dim] - comm->cell_x0[dim],
3817 ddbox->skew_fac[dim],
3818 dd->comm->cellsize_min[dim],
3819 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3823 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3825 /* Communicate the boundaries and update cell_ns_x0/1 */
3826 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3827 if (dd->bGridJump && dd->ndim > 1)
3829 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3834 static void make_tric_corr_matrix(int npbcdim, matrix box, matrix tcm)
3838 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3846 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3847 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3856 static void check_screw_box(matrix box)
3858 /* Mathematical limitation */
3859 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3861 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3864 /* Limitation due to the asymmetry of the eighth shell method */
3865 if (box[ZZ][YY] != 0)
3867 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3871 static void distribute_cg(FILE *fplog, gmx_int64_t step,
3872 matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3875 gmx_domdec_master_t *ma;
3876 int **tmp_ind = NULL, *tmp_nalloc = NULL;
3877 int i, icg, j, k, k0, k1, d, npbcdim;
3879 rvec box_size, cg_cm;
3881 real nrcg, inv_ncg, pos_d;
3883 gmx_bool bUnbounded, bScrew;
3887 if (tmp_ind == NULL)
3889 snew(tmp_nalloc, dd->nnodes);
3890 snew(tmp_ind, dd->nnodes);
3891 for (i = 0; i < dd->nnodes; i++)
3893 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3894 snew(tmp_ind[i], tmp_nalloc[i]);
3898 /* Clear the count */
3899 for (i = 0; i < dd->nnodes; i++)
3905 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3907 cgindex = cgs->index;
3909 /* Compute the center of geometry for all charge groups */
3910 for (icg = 0; icg < cgs->nr; icg++)
3913 k1 = cgindex[icg+1];
3917 copy_rvec(pos[k0], cg_cm);
3924 for (k = k0; (k < k1); k++)
3926 rvec_inc(cg_cm, pos[k]);
3928 for (d = 0; (d < DIM); d++)
3930 cg_cm[d] *= inv_ncg;
3933 /* Put the charge group in the box and determine the cell index */
3934 for (d = DIM-1; d >= 0; d--)
3937 if (d < dd->npbcdim)
3939 bScrew = (dd->bScrewPBC && d == XX);
3940 if (tric_dir[d] && dd->nc[d] > 1)
3942 /* Use triclinic coordintates for this dimension */
3943 for (j = d+1; j < DIM; j++)
3945 pos_d += cg_cm[j]*tcm[j][d];
3948 while (pos_d >= box[d][d])
3951 rvec_dec(cg_cm, box[d]);
3954 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3955 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3957 for (k = k0; (k < k1); k++)
3959 rvec_dec(pos[k], box[d]);
3962 pos[k][YY] = box[YY][YY] - pos[k][YY];
3963 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3970 rvec_inc(cg_cm, box[d]);
3973 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3974 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3976 for (k = k0; (k < k1); k++)
3978 rvec_inc(pos[k], box[d]);
3981 pos[k][YY] = box[YY][YY] - pos[k][YY];
3982 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3987 /* This could be done more efficiently */
3989 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
3994 i = dd_index(dd->nc, ind);
3995 if (ma->ncg[i] == tmp_nalloc[i])
3997 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
3998 srenew(tmp_ind[i], tmp_nalloc[i]);
4000 tmp_ind[i][ma->ncg[i]] = icg;
4002 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
4006 for (i = 0; i < dd->nnodes; i++)
4009 for (k = 0; k < ma->ncg[i]; k++)
4011 ma->cg[k1++] = tmp_ind[i][k];
4014 ma->index[dd->nnodes] = k1;
4016 for (i = 0; i < dd->nnodes; i++)
4026 fprintf(fplog, "Charge group distribution at step %s:",
4027 gmx_step_str(step, buf));
4028 for (i = 0; i < dd->nnodes; i++)
4030 fprintf(fplog, " %d", ma->ncg[i]);
4032 fprintf(fplog, "\n");
4036 static void get_cg_distribution(FILE *fplog, gmx_int64_t step, gmx_domdec_t *dd,
4037 t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
4040 gmx_domdec_master_t *ma = NULL;
4043 int *ibuf, buf2[2] = { 0, 0 };
4044 gmx_bool bMaster = DDMASTER(dd);
4051 check_screw_box(box);
4054 set_dd_cell_sizes_slb(dd, ddbox, TRUE, npulse);
4056 distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
4057 for (i = 0; i < dd->nnodes; i++)
4059 ma->ibuf[2*i] = ma->ncg[i];
4060 ma->ibuf[2*i+1] = ma->nat[i];
4068 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
4070 dd->ncg_home = buf2[0];
4071 dd->nat_home = buf2[1];
4072 dd->ncg_tot = dd->ncg_home;
4073 dd->nat_tot = dd->nat_home;
4074 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
4076 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
4077 srenew(dd->index_gl, dd->cg_nalloc);
4078 srenew(dd->cgindex, dd->cg_nalloc+1);
4082 for (i = 0; i < dd->nnodes; i++)
4084 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
4085 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
4090 DDMASTER(dd) ? ma->ibuf : NULL,
4091 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
4092 DDMASTER(dd) ? ma->cg : NULL,
4093 dd->ncg_home*sizeof(int), dd->index_gl);
4095 /* Determine the home charge group sizes */
4097 for (i = 0; i < dd->ncg_home; i++)
4099 cg_gl = dd->index_gl[i];
4101 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
4106 fprintf(debug, "Home charge groups:\n");
4107 for (i = 0; i < dd->ncg_home; i++)
4109 fprintf(debug, " %d", dd->index_gl[i]);
4112 fprintf(debug, "\n");
4115 fprintf(debug, "\n");
4119 static int compact_and_copy_vec_at(int ncg, int *move,
4122 rvec *src, gmx_domdec_comm_t *comm,
4125 int m, icg, i, i0, i1, nrcg;
4131 for (m = 0; m < DIM*2; m++)
4137 for (icg = 0; icg < ncg; icg++)
4139 i1 = cgindex[icg+1];
4145 /* Compact the home array in place */
4146 for (i = i0; i < i1; i++)
4148 copy_rvec(src[i], src[home_pos++]);
4154 /* Copy to the communication buffer */
4156 pos_vec[m] += 1 + vec*nrcg;
4157 for (i = i0; i < i1; i++)
4159 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
4161 pos_vec[m] += (nvec - vec - 1)*nrcg;
4165 home_pos += i1 - i0;
4173 static int compact_and_copy_vec_cg(int ncg, int *move,
4175 int nvec, rvec *src, gmx_domdec_comm_t *comm,
4178 int m, icg, i0, i1, nrcg;
4184 for (m = 0; m < DIM*2; m++)
4190 for (icg = 0; icg < ncg; icg++)
4192 i1 = cgindex[icg+1];
4198 /* Compact the home array in place */
4199 copy_rvec(src[icg], src[home_pos++]);
4205 /* Copy to the communication buffer */
4206 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
4207 pos_vec[m] += 1 + nrcg*nvec;
4219 static int compact_ind(int ncg, int *move,
4220 int *index_gl, int *cgindex,
4222 gmx_ga2la_t ga2la, char *bLocalCG,
4225 int cg, nat, a0, a1, a, a_gl;
4230 for (cg = 0; cg < ncg; cg++)
4236 /* Compact the home arrays in place.
4237 * Anything that can be done here avoids access to global arrays.
4239 cgindex[home_pos] = nat;
4240 for (a = a0; a < a1; a++)
4243 gatindex[nat] = a_gl;
4244 /* The cell number stays 0, so we don't need to set it */
4245 ga2la_change_la(ga2la, a_gl, nat);
4248 index_gl[home_pos] = index_gl[cg];
4249 cginfo[home_pos] = cginfo[cg];
4250 /* The charge group remains local, so bLocalCG does not change */
4255 /* Clear the global indices */
4256 for (a = a0; a < a1; a++)
4258 ga2la_del(ga2la, gatindex[a]);
4262 bLocalCG[index_gl[cg]] = FALSE;
4266 cgindex[home_pos] = nat;
4271 static void clear_and_mark_ind(int ncg, int *move,
4272 int *index_gl, int *cgindex, int *gatindex,
4273 gmx_ga2la_t ga2la, char *bLocalCG,
4278 for (cg = 0; cg < ncg; cg++)
4284 /* Clear the global indices */
4285 for (a = a0; a < a1; a++)
4287 ga2la_del(ga2la, gatindex[a]);
4291 bLocalCG[index_gl[cg]] = FALSE;
4293 /* Signal that this cg has moved using the ns cell index.
4294 * Here we set it to -1. fill_grid will change it
4295 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4297 cell_index[cg] = -1;
4302 static void print_cg_move(FILE *fplog,
4304 gmx_int64_t step, int cg, int dim, int dir,
4305 gmx_bool bHaveLimitdAndCMOld, real limitd,
4306 rvec cm_old, rvec cm_new, real pos_d)
4308 gmx_domdec_comm_t *comm;
4313 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4314 if (bHaveLimitdAndCMOld)
4316 fprintf(fplog, "The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4317 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4321 fprintf(fplog, "The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4322 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4324 fprintf(fplog, "distance out of cell %f\n",
4325 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4326 if (bHaveLimitdAndCMOld)
4328 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4329 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4331 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4332 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4333 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4335 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4336 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4338 comm->cell_x0[dim], comm->cell_x1[dim]);
4341 static void cg_move_error(FILE *fplog,
4343 gmx_int64_t step, int cg, int dim, int dir,
4344 gmx_bool bHaveLimitdAndCMOld, real limitd,
4345 rvec cm_old, rvec cm_new, real pos_d)
4349 print_cg_move(fplog, dd, step, cg, dim, dir,
4350 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4352 print_cg_move(stderr, dd, step, cg, dim, dir,
4353 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4355 "A charge group moved too far between two domain decomposition steps\n"
4356 "This usually means that your system is not well equilibrated");
4359 static void rotate_state_atom(t_state *state, int a)
4363 for (est = 0; est < estNR; est++)
4365 if (EST_DISTR(est) && (state->flags & (1<<est)))
4370 /* Rotate the complete state; for a rectangular box only */
4371 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4372 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4375 state->v[a][YY] = -state->v[a][YY];
4376 state->v[a][ZZ] = -state->v[a][ZZ];
4379 state->sd_X[a][YY] = -state->sd_X[a][YY];
4380 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4383 state->cg_p[a][YY] = -state->cg_p[a][YY];
4384 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4386 case estDISRE_INITF:
4387 case estDISRE_RM3TAV:
4388 case estORIRE_INITF:
4390 /* These are distances, so not affected by rotation */
4393 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4399 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4401 if (natoms > comm->moved_nalloc)
4403 /* Contents should be preserved here */
4404 comm->moved_nalloc = over_alloc_dd(natoms);
4405 srenew(comm->moved, comm->moved_nalloc);
4411 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
4414 ivec tric_dir, matrix tcm,
4415 rvec cell_x0, rvec cell_x1,
4416 rvec limitd, rvec limit0, rvec limit1,
4418 int cg_start, int cg_end,
4423 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4424 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4428 real inv_ncg, pos_d;
4431 npbcdim = dd->npbcdim;
4433 for (cg = cg_start; cg < cg_end; cg++)
4440 copy_rvec(state->x[k0], cm_new);
4447 for (k = k0; (k < k1); k++)
4449 rvec_inc(cm_new, state->x[k]);
4451 for (d = 0; (d < DIM); d++)
4453 cm_new[d] = inv_ncg*cm_new[d];
4458 /* Do pbc and check DD cell boundary crossings */
4459 for (d = DIM-1; d >= 0; d--)
4463 bScrew = (dd->bScrewPBC && d == XX);
4464 /* Determine the location of this cg in lattice coordinates */
4468 for (d2 = d+1; d2 < DIM; d2++)
4470 pos_d += cm_new[d2]*tcm[d2][d];
4473 /* Put the charge group in the triclinic unit-cell */
4474 if (pos_d >= cell_x1[d])
4476 if (pos_d >= limit1[d])
4478 cg_move_error(fplog, dd, step, cg, d, 1, TRUE, limitd[d],
4479 cg_cm[cg], cm_new, pos_d);
4482 if (dd->ci[d] == dd->nc[d] - 1)
4484 rvec_dec(cm_new, state->box[d]);
4487 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4488 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4490 for (k = k0; (k < k1); k++)
4492 rvec_dec(state->x[k], state->box[d]);
4495 rotate_state_atom(state, k);
4500 else if (pos_d < cell_x0[d])
4502 if (pos_d < limit0[d])
4504 cg_move_error(fplog, dd, step, cg, d, -1, TRUE, limitd[d],
4505 cg_cm[cg], cm_new, pos_d);
4510 rvec_inc(cm_new, state->box[d]);
4513 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4514 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4516 for (k = k0; (k < k1); k++)
4518 rvec_inc(state->x[k], state->box[d]);
4521 rotate_state_atom(state, k);
4527 else if (d < npbcdim)
4529 /* Put the charge group in the rectangular unit-cell */
4530 while (cm_new[d] >= state->box[d][d])
4532 rvec_dec(cm_new, state->box[d]);
4533 for (k = k0; (k < k1); k++)
4535 rvec_dec(state->x[k], state->box[d]);
4538 while (cm_new[d] < 0)
4540 rvec_inc(cm_new, state->box[d]);
4541 for (k = k0; (k < k1); k++)
4543 rvec_inc(state->x[k], state->box[d]);
4549 copy_rvec(cm_new, cg_cm[cg]);
4551 /* Determine where this cg should go */
4554 for (d = 0; d < dd->ndim; d++)
4559 flag |= DD_FLAG_FW(d);
4565 else if (dev[dim] == -1)
4567 flag |= DD_FLAG_BW(d);
4570 if (dd->nc[dim] > 2)
4581 /* Temporarily store the flag in move */
4582 move[cg] = mc + flag;
4586 static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
4587 gmx_domdec_t *dd, ivec tric_dir,
4588 t_state *state, rvec **f,
4597 int ncg[DIM*2], nat[DIM*2];
4598 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4599 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4600 int sbuf[2], rbuf[2];
4601 int home_pos_cg, home_pos_at, buf_pos;
4603 gmx_bool bV = FALSE, bSDX = FALSE, bCGP = FALSE;
4606 real inv_ncg, pos_d;
4608 rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
4610 cginfo_mb_t *cginfo_mb;
4611 gmx_domdec_comm_t *comm;
4613 int nthread, thread;
4617 check_screw_box(state->box);
4621 if (fr->cutoff_scheme == ecutsGROUP)
4626 for (i = 0; i < estNR; i++)
4632 case estX: /* Always present */ break;
4633 case estV: bV = (state->flags & (1<<i)); break;
4634 case estSDX: bSDX = (state->flags & (1<<i)); break;
4635 case estCGP: bCGP = (state->flags & (1<<i)); break;
4638 case estDISRE_INITF:
4639 case estDISRE_RM3TAV:
4640 case estORIRE_INITF:
4642 /* No processing required */
4645 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4650 if (dd->ncg_tot > comm->nalloc_int)
4652 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4653 srenew(comm->buf_int, comm->nalloc_int);
4655 move = comm->buf_int;
4657 /* Clear the count */
4658 for (c = 0; c < dd->ndim*2; c++)
4664 npbcdim = dd->npbcdim;
4666 for (d = 0; (d < DIM); d++)
4668 limitd[d] = dd->comm->cellsize_min[d];
4669 if (d >= npbcdim && dd->ci[d] == 0)
4671 cell_x0[d] = -GMX_FLOAT_MAX;
4675 cell_x0[d] = comm->cell_x0[d];
4677 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4679 cell_x1[d] = GMX_FLOAT_MAX;
4683 cell_x1[d] = comm->cell_x1[d];
4687 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4688 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4692 /* We check after communication if a charge group moved
4693 * more than one cell. Set the pre-comm check limit to float_max.
4695 limit0[d] = -GMX_FLOAT_MAX;
4696 limit1[d] = GMX_FLOAT_MAX;
4700 make_tric_corr_matrix(npbcdim, state->box, tcm);
4702 cgindex = dd->cgindex;
4704 nthread = gmx_omp_nthreads_get(emntDomdec);
4706 /* Compute the center of geometry for all home charge groups
4707 * and put them in the box and determine where they should go.
4709 #pragma omp parallel for num_threads(nthread) schedule(static)
4710 for (thread = 0; thread < nthread; thread++)
4712 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4713 cell_x0, cell_x1, limitd, limit0, limit1,
4715 ( thread *dd->ncg_home)/nthread,
4716 ((thread+1)*dd->ncg_home)/nthread,
4717 fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
4721 for (cg = 0; cg < dd->ncg_home; cg++)
4726 flag = mc & ~DD_FLAG_NRCG;
4727 mc = mc & DD_FLAG_NRCG;
4730 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4732 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4733 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4735 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4736 /* We store the cg size in the lower 16 bits
4737 * and the place where the charge group should go
4738 * in the next 6 bits. This saves some communication volume.
4740 nrcg = cgindex[cg+1] - cgindex[cg];
4741 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4747 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4748 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4751 for (i = 0; i < dd->ndim*2; i++)
4753 *ncg_moved += ncg[i];
4770 /* Make sure the communication buffers are large enough */
4771 for (mc = 0; mc < dd->ndim*2; mc++)
4773 nvr = ncg[mc] + nat[mc]*nvec;
4774 if (nvr > comm->cgcm_state_nalloc[mc])
4776 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4777 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4781 switch (fr->cutoff_scheme)
4784 /* Recalculating cg_cm might be cheaper than communicating,
4785 * but that could give rise to rounding issues.
4788 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4789 nvec, cg_cm, comm, bCompact);
4792 /* Without charge groups we send the moved atom coordinates
4793 * over twice. This is so the code below can be used without
4794 * many conditionals for both for with and without charge groups.
4797 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4798 nvec, state->x, comm, FALSE);
4801 home_pos_cg -= *ncg_moved;
4805 gmx_incons("unimplemented");
4811 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4812 nvec, vec++, state->x, comm, bCompact);
4815 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4816 nvec, vec++, state->v, comm, bCompact);
4820 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4821 nvec, vec++, state->sd_X, comm, bCompact);
4825 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4826 nvec, vec++, state->cg_p, comm, bCompact);
4831 compact_ind(dd->ncg_home, move,
4832 dd->index_gl, dd->cgindex, dd->gatindex,
4833 dd->ga2la, comm->bLocalCG,
4838 if (fr->cutoff_scheme == ecutsVERLET)
4840 moved = get_moved(comm, dd->ncg_home);
4842 for (k = 0; k < dd->ncg_home; k++)
4849 moved = fr->ns.grid->cell_index;
4852 clear_and_mark_ind(dd->ncg_home, move,
4853 dd->index_gl, dd->cgindex, dd->gatindex,
4854 dd->ga2la, comm->bLocalCG,
4858 cginfo_mb = fr->cginfo_mb;
4860 *ncg_stay_home = home_pos_cg;
4861 for (d = 0; d < dd->ndim; d++)
4867 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4870 /* Communicate the cg and atom counts */
4875 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4876 d, dir, sbuf[0], sbuf[1]);
4878 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4880 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4882 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4883 srenew(comm->buf_int, comm->nalloc_int);
4886 /* Communicate the charge group indices, sizes and flags */
4887 dd_sendrecv_int(dd, d, dir,
4888 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4889 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4891 nvs = ncg[cdd] + nat[cdd]*nvec;
4892 i = rbuf[0] + rbuf[1] *nvec;
4893 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4895 /* Communicate cgcm and state */
4896 dd_sendrecv_rvec(dd, d, dir,
4897 comm->cgcm_state[cdd], nvs,
4898 comm->vbuf.v+nvr, i);
4899 ncg_recv += rbuf[0];
4900 nat_recv += rbuf[1];
4904 /* Process the received charge groups */
4906 for (cg = 0; cg < ncg_recv; cg++)
4908 flag = comm->buf_int[cg*DD_CGIBS+1];
4910 if (dim >= npbcdim && dd->nc[dim] > 2)
4912 /* No pbc in this dim and more than one domain boundary.
4913 * We do a separate check if a charge group didn't move too far.
4915 if (((flag & DD_FLAG_FW(d)) &&
4916 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4917 ((flag & DD_FLAG_BW(d)) &&
4918 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4920 cg_move_error(fplog, dd, step, cg, dim,
4921 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4923 comm->vbuf.v[buf_pos],
4924 comm->vbuf.v[buf_pos],
4925 comm->vbuf.v[buf_pos][dim]);
4932 /* Check which direction this cg should go */
4933 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4937 /* The cell boundaries for dimension d2 are not equal
4938 * for each cell row of the lower dimension(s),
4939 * therefore we might need to redetermine where
4940 * this cg should go.
4943 /* If this cg crosses the box boundary in dimension d2
4944 * we can use the communicated flag, so we do not
4945 * have to worry about pbc.
4947 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4948 (flag & DD_FLAG_FW(d2))) ||
4949 (dd->ci[dim2] == 0 &&
4950 (flag & DD_FLAG_BW(d2)))))
4952 /* Clear the two flags for this dimension */
4953 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4954 /* Determine the location of this cg
4955 * in lattice coordinates
4957 pos_d = comm->vbuf.v[buf_pos][dim2];
4960 for (d3 = dim2+1; d3 < DIM; d3++)
4963 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
4966 /* Check of we are not at the box edge.
4967 * pbc is only handled in the first step above,
4968 * but this check could move over pbc while
4969 * the first step did not due to different rounding.
4971 if (pos_d >= cell_x1[dim2] &&
4972 dd->ci[dim2] != dd->nc[dim2]-1)
4974 flag |= DD_FLAG_FW(d2);
4976 else if (pos_d < cell_x0[dim2] &&
4979 flag |= DD_FLAG_BW(d2);
4981 comm->buf_int[cg*DD_CGIBS+1] = flag;
4984 /* Set to which neighboring cell this cg should go */
4985 if (flag & DD_FLAG_FW(d2))
4989 else if (flag & DD_FLAG_BW(d2))
4991 if (dd->nc[dd->dim[d2]] > 2)
5003 nrcg = flag & DD_FLAG_NRCG;
5006 if (home_pos_cg+1 > dd->cg_nalloc)
5008 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
5009 srenew(dd->index_gl, dd->cg_nalloc);
5010 srenew(dd->cgindex, dd->cg_nalloc+1);
5012 /* Set the global charge group index and size */
5013 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
5014 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
5015 /* Copy the state from the buffer */
5016 dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
5017 if (fr->cutoff_scheme == ecutsGROUP)
5020 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
5024 /* Set the cginfo */
5025 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
5026 dd->index_gl[home_pos_cg]);
5029 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
5032 if (home_pos_at+nrcg > state->nalloc)
5034 dd_realloc_state(state, f, home_pos_at+nrcg);
5036 for (i = 0; i < nrcg; i++)
5038 copy_rvec(comm->vbuf.v[buf_pos++],
5039 state->x[home_pos_at+i]);
5043 for (i = 0; i < nrcg; i++)
5045 copy_rvec(comm->vbuf.v[buf_pos++],
5046 state->v[home_pos_at+i]);
5051 for (i = 0; i < nrcg; i++)
5053 copy_rvec(comm->vbuf.v[buf_pos++],
5054 state->sd_X[home_pos_at+i]);
5059 for (i = 0; i < nrcg; i++)
5061 copy_rvec(comm->vbuf.v[buf_pos++],
5062 state->cg_p[home_pos_at+i]);
5066 home_pos_at += nrcg;
5070 /* Reallocate the buffers if necessary */
5071 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
5073 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
5074 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
5076 nvr = ncg[mc] + nat[mc]*nvec;
5077 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
5079 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
5080 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
5082 /* Copy from the receive to the send buffers */
5083 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
5084 comm->buf_int + cg*DD_CGIBS,
5085 DD_CGIBS*sizeof(int));
5086 memcpy(comm->cgcm_state[mc][nvr],
5087 comm->vbuf.v[buf_pos],
5088 (1+nrcg*nvec)*sizeof(rvec));
5089 buf_pos += 1 + nrcg*nvec;
5096 /* With sorting (!bCompact) the indices are now only partially up to date
5097 * and ncg_home and nat_home are not the real count, since there are
5098 * "holes" in the arrays for the charge groups that moved to neighbors.
5100 if (fr->cutoff_scheme == ecutsVERLET)
5102 moved = get_moved(comm, home_pos_cg);
5104 for (i = dd->ncg_home; i < home_pos_cg; i++)
5109 dd->ncg_home = home_pos_cg;
5110 dd->nat_home = home_pos_at;
5115 "Finished repartitioning: cgs moved out %d, new home %d\n",
5116 *ncg_moved, dd->ncg_home-*ncg_moved);
5121 void dd_cycles_add(gmx_domdec_t *dd, float cycles, int ddCycl)
5123 dd->comm->cycl[ddCycl] += cycles;
5124 dd->comm->cycl_n[ddCycl]++;
5125 if (cycles > dd->comm->cycl_max[ddCycl])
5127 dd->comm->cycl_max[ddCycl] = cycles;
5131 static double force_flop_count(t_nrnb *nrnb)
5138 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
5140 /* To get closer to the real timings, we half the count
5141 * for the normal loops and again half it for water loops.
5144 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5146 sum += nrnb->n[i]*0.25*cost_nrnb(i);
5150 sum += nrnb->n[i]*0.50*cost_nrnb(i);
5153 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
5156 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5158 sum += nrnb->n[i]*cost_nrnb(i);
5161 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
5163 sum += nrnb->n[i]*cost_nrnb(i);
5169 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
5171 if (dd->comm->eFlop)
5173 dd->comm->flop -= force_flop_count(nrnb);
5176 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
5178 if (dd->comm->eFlop)
5180 dd->comm->flop += force_flop_count(nrnb);
5185 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
5189 for (i = 0; i < ddCyclNr; i++)
5191 dd->comm->cycl[i] = 0;
5192 dd->comm->cycl_n[i] = 0;
5193 dd->comm->cycl_max[i] = 0;
5196 dd->comm->flop_n = 0;
5199 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
5201 gmx_domdec_comm_t *comm;
5202 gmx_domdec_load_t *load;
5203 gmx_domdec_root_t *root = NULL;
5204 int d, dim, cid, i, pos;
5205 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
5210 fprintf(debug, "get_load_distribution start\n");
5213 wallcycle_start(wcycle, ewcDDCOMMLOAD);
5217 bSepPME = (dd->pme_nodeid >= 0);
5219 for (d = dd->ndim-1; d >= 0; d--)
5222 /* Check if we participate in the communication in this dimension */
5223 if (d == dd->ndim-1 ||
5224 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
5226 load = &comm->load[d];
5229 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
5232 if (d == dd->ndim-1)
5234 sbuf[pos++] = dd_force_load(comm);
5235 sbuf[pos++] = sbuf[0];
5238 sbuf[pos++] = sbuf[0];
5239 sbuf[pos++] = cell_frac;
5242 sbuf[pos++] = comm->cell_f_max0[d];
5243 sbuf[pos++] = comm->cell_f_min1[d];
5248 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
5249 sbuf[pos++] = comm->cycl[ddCyclPME];
5254 sbuf[pos++] = comm->load[d+1].sum;
5255 sbuf[pos++] = comm->load[d+1].max;
5258 sbuf[pos++] = comm->load[d+1].sum_m;
5259 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
5260 sbuf[pos++] = comm->load[d+1].flags;
5263 sbuf[pos++] = comm->cell_f_max0[d];
5264 sbuf[pos++] = comm->cell_f_min1[d];
5269 sbuf[pos++] = comm->load[d+1].mdf;
5270 sbuf[pos++] = comm->load[d+1].pme;
5274 /* Communicate a row in DD direction d.
5275 * The communicators are setup such that the root always has rank 0.
5278 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
5279 load->load, load->nload*sizeof(float), MPI_BYTE,
5280 0, comm->mpi_comm_load[d]);
5282 if (dd->ci[dim] == dd->master_ci[dim])
5284 /* We are the root, process this row */
5285 if (comm->bDynLoadBal)
5287 root = comm->root[d];
5297 for (i = 0; i < dd->nc[dim]; i++)
5299 load->sum += load->load[pos++];
5300 load->max = max(load->max, load->load[pos]);
5306 /* This direction could not be load balanced properly,
5307 * therefore we need to use the maximum iso the average load.
5309 load->sum_m = max(load->sum_m, load->load[pos]);
5313 load->sum_m += load->load[pos];
5316 load->cvol_min = min(load->cvol_min, load->load[pos]);
5320 load->flags = (int)(load->load[pos++] + 0.5);
5324 root->cell_f_max0[i] = load->load[pos++];
5325 root->cell_f_min1[i] = load->load[pos++];
5330 load->mdf = max(load->mdf, load->load[pos]);
5332 load->pme = max(load->pme, load->load[pos]);
5336 if (comm->bDynLoadBal && root->bLimited)
5338 load->sum_m *= dd->nc[dim];
5339 load->flags |= (1<<d);
5347 comm->nload += dd_load_count(comm);
5348 comm->load_step += comm->cycl[ddCyclStep];
5349 comm->load_sum += comm->load[0].sum;
5350 comm->load_max += comm->load[0].max;
5351 if (comm->bDynLoadBal)
5353 for (d = 0; d < dd->ndim; d++)
5355 if (comm->load[0].flags & (1<<d))
5357 comm->load_lim[d]++;
5363 comm->load_mdf += comm->load[0].mdf;
5364 comm->load_pme += comm->load[0].pme;
5368 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5372 fprintf(debug, "get_load_distribution finished\n");
5376 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5378 /* Return the relative performance loss on the total run time
5379 * due to the force calculation load imbalance.
5381 if (dd->comm->nload > 0)
5384 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5385 (dd->comm->load_step*dd->nnodes);
5393 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5396 int npp, npme, nnodes, d, limp;
5397 float imbal, pme_f_ratio, lossf, lossp = 0;
5399 gmx_domdec_comm_t *comm;
5402 if (DDMASTER(dd) && comm->nload > 0)
5405 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5406 nnodes = npp + npme;
5407 imbal = comm->load_max*npp/comm->load_sum - 1;
5408 lossf = dd_force_imb_perf_loss(dd);
5409 sprintf(buf, " Average load imbalance: %.1f %%\n", imbal*100);
5410 fprintf(fplog, "%s", buf);
5411 fprintf(stderr, "\n");
5412 fprintf(stderr, "%s", buf);
5413 sprintf(buf, " Part of the total run time spent waiting due to load imbalance: %.1f %%\n", lossf*100);
5414 fprintf(fplog, "%s", buf);
5415 fprintf(stderr, "%s", buf);
5417 if (comm->bDynLoadBal)
5419 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5420 for (d = 0; d < dd->ndim; d++)
5422 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5423 sprintf(buf+strlen(buf), " %c %d %%", dim2char(dd->dim[d]), limp);
5429 sprintf(buf+strlen(buf), "\n");
5430 fprintf(fplog, "%s", buf);
5431 fprintf(stderr, "%s", buf);
5435 pme_f_ratio = comm->load_pme/comm->load_mdf;
5436 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5439 lossp *= (float)npme/(float)nnodes;
5443 lossp *= (float)npp/(float)nnodes;
5445 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pme_f_ratio);
5446 fprintf(fplog, "%s", buf);
5447 fprintf(stderr, "%s", buf);
5448 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossp)*100);
5449 fprintf(fplog, "%s", buf);
5450 fprintf(stderr, "%s", buf);
5452 fprintf(fplog, "\n");
5453 fprintf(stderr, "\n");
5455 if (lossf >= DD_PERF_LOSS)
5458 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5459 " in the domain decomposition.\n", lossf*100);
5460 if (!comm->bDynLoadBal)
5462 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5466 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5468 fprintf(fplog, "%s\n", buf);
5469 fprintf(stderr, "%s\n", buf);
5471 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS)
5474 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5475 " had %s work to do than the PP nodes.\n"
5476 " You might want to %s the number of PME nodes\n"
5477 " or %s the cut-off and the grid spacing.\n",
5479 (lossp < 0) ? "less" : "more",
5480 (lossp < 0) ? "decrease" : "increase",
5481 (lossp < 0) ? "decrease" : "increase");
5482 fprintf(fplog, "%s\n", buf);
5483 fprintf(stderr, "%s\n", buf);
5488 static float dd_vol_min(gmx_domdec_t *dd)
5490 return dd->comm->load[0].cvol_min*dd->nnodes;
5493 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5495 return dd->comm->load[0].flags;
5498 static float dd_f_imbal(gmx_domdec_t *dd)
5500 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5503 float dd_pme_f_ratio(gmx_domdec_t *dd)
5505 if (dd->comm->cycl_n[ddCyclPME] > 0)
5507 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5515 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
5520 flags = dd_load_flags(dd);
5524 "DD load balancing is limited by minimum cell size in dimension");
5525 for (d = 0; d < dd->ndim; d++)
5529 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5532 fprintf(fplog, "\n");
5534 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5535 if (dd->comm->bDynLoadBal)
5537 fprintf(fplog, " vol min/aver %5.3f%c",
5538 dd_vol_min(dd), flags ? '!' : ' ');
5540 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5541 if (dd->comm->cycl_n[ddCyclPME])
5543 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5545 fprintf(fplog, "\n\n");
5548 static void dd_print_load_verbose(gmx_domdec_t *dd)
5550 if (dd->comm->bDynLoadBal)
5552 fprintf(stderr, "vol %4.2f%c ",
5553 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5555 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5556 if (dd->comm->cycl_n[ddCyclPME])
5558 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5563 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5568 gmx_domdec_root_t *root;
5569 gmx_bool bPartOfGroup = FALSE;
5571 dim = dd->dim[dim_ind];
5572 copy_ivec(loc, loc_c);
5573 for (i = 0; i < dd->nc[dim]; i++)
5576 rank = dd_index(dd->nc, loc_c);
5577 if (rank == dd->rank)
5579 /* This process is part of the group */
5580 bPartOfGroup = TRUE;
5583 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5587 dd->comm->mpi_comm_load[dim_ind] = c_row;
5588 if (dd->comm->eDLB != edlbNO)
5590 if (dd->ci[dim] == dd->master_ci[dim])
5592 /* This is the root process of this row */
5593 snew(dd->comm->root[dim_ind], 1);
5594 root = dd->comm->root[dim_ind];
5595 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5596 snew(root->old_cell_f, dd->nc[dim]+1);
5597 snew(root->bCellMin, dd->nc[dim]);
5600 snew(root->cell_f_max0, dd->nc[dim]);
5601 snew(root->cell_f_min1, dd->nc[dim]);
5602 snew(root->bound_min, dd->nc[dim]);
5603 snew(root->bound_max, dd->nc[dim]);
5605 snew(root->buf_ncd, dd->nc[dim]);
5609 /* This is not a root process, we only need to receive cell_f */
5610 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5613 if (dd->ci[dim] == dd->master_ci[dim])
5615 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5621 void dd_setup_dlb_resource_sharing(t_commrec gmx_unused *cr,
5622 const gmx_hw_info_t gmx_unused *hwinfo,
5623 const gmx_hw_opt_t gmx_unused *hw_opt)
5626 int physicalnode_id_hash;
5629 MPI_Comm mpi_comm_pp_physicalnode;
5631 if (!(cr->duty & DUTY_PP) ||
5632 hw_opt->gpu_opt.ncuda_dev_use == 0)
5634 /* Only PP nodes (currently) use GPUs.
5635 * If we don't have GPUs, there are no resources to share.
5640 physicalnode_id_hash = gmx_physicalnode_id_hash();
5642 gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
5648 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5649 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5650 dd->rank, physicalnode_id_hash, gpu_id);
5652 /* Split the PP communicator over the physical nodes */
5653 /* TODO: See if we should store this (before), as it's also used for
5654 * for the nodecomm summution.
5656 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5657 &mpi_comm_pp_physicalnode);
5658 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5659 &dd->comm->mpi_comm_gpu_shared);
5660 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5661 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5665 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5668 /* Note that some ranks could share a GPU, while others don't */
5670 if (dd->comm->nrank_gpu_shared == 1)
5672 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5677 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
5680 int dim0, dim1, i, j;
5685 fprintf(debug, "Making load communicators\n");
5688 snew(dd->comm->load, dd->ndim);
5689 snew(dd->comm->mpi_comm_load, dd->ndim);
5692 make_load_communicator(dd, 0, loc);
5696 for (i = 0; i < dd->nc[dim0]; i++)
5699 make_load_communicator(dd, 1, loc);
5705 for (i = 0; i < dd->nc[dim0]; i++)
5709 for (j = 0; j < dd->nc[dim1]; j++)
5712 make_load_communicator(dd, 2, loc);
5719 fprintf(debug, "Finished making load communicators\n");
5724 void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
5727 int d, dim, i, j, m;
5730 ivec dd_zp[DD_MAXIZONE];
5731 gmx_domdec_zones_t *zones;
5732 gmx_domdec_ns_ranges_t *izone;
5734 for (d = 0; d < dd->ndim; d++)
5737 copy_ivec(dd->ci, tmp);
5738 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5739 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5740 copy_ivec(dd->ci, tmp);
5741 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5742 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5745 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5748 dd->neighbor[d][1]);
5754 fprintf(fplog, "\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5756 dd->nc[XX], dd->nc[YY], dd->nc[ZZ],
5757 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5764 for (i = 0; i < nzonep; i++)
5766 copy_ivec(dd_zp3[i], dd_zp[i]);
5772 for (i = 0; i < nzonep; i++)
5774 copy_ivec(dd_zp2[i], dd_zp[i]);
5780 for (i = 0; i < nzonep; i++)
5782 copy_ivec(dd_zp1[i], dd_zp[i]);
5786 gmx_fatal(FARGS, "Can only do 1, 2 or 3D domain decomposition");
5791 zones = &dd->comm->zones;
5793 for (i = 0; i < nzone; i++)
5796 clear_ivec(zones->shift[i]);
5797 for (d = 0; d < dd->ndim; d++)
5799 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5804 for (i = 0; i < nzone; i++)
5806 for (d = 0; d < DIM; d++)
5808 s[d] = dd->ci[d] - zones->shift[i][d];
5813 else if (s[d] >= dd->nc[d])
5819 zones->nizone = nzonep;
5820 for (i = 0; i < zones->nizone; i++)
5822 if (dd_zp[i][0] != i)
5824 gmx_fatal(FARGS, "Internal inconsistency in the dd grid setup");
5826 izone = &zones->izone[i];
5827 izone->j0 = dd_zp[i][1];
5828 izone->j1 = dd_zp[i][2];
5829 for (dim = 0; dim < DIM; dim++)
5831 if (dd->nc[dim] == 1)
5833 /* All shifts should be allowed */
5834 izone->shift0[dim] = -1;
5835 izone->shift1[dim] = 1;
5840 izone->shift0[d] = 0;
5841 izone->shift1[d] = 0;
5842 for(j=izone->j0; j<izone->j1; j++) {
5843 if (dd->shift[j][d] > dd->shift[i][d])
5844 izone->shift0[d] = -1;
5845 if (dd->shift[j][d] < dd->shift[i][d])
5846 izone->shift1[d] = 1;
5852 /* Assume the shift are not more than 1 cell */
5853 izone->shift0[dim] = 1;
5854 izone->shift1[dim] = -1;
5855 for (j = izone->j0; j < izone->j1; j++)
5857 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5858 if (shift_diff < izone->shift0[dim])
5860 izone->shift0[dim] = shift_diff;
5862 if (shift_diff > izone->shift1[dim])
5864 izone->shift1[dim] = shift_diff;
5871 if (dd->comm->eDLB != edlbNO)
5873 snew(dd->comm->root, dd->ndim);
5876 if (dd->comm->bRecordLoad)
5878 make_load_communicators(dd);
5882 static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reorder)
5885 gmx_domdec_comm_t *comm;
5896 if (comm->bCartesianPP)
5898 /* Set up cartesian communication for the particle-particle part */
5901 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5902 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5905 for (i = 0; i < DIM; i++)
5909 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5911 /* We overwrite the old communicator with the new cartesian one */
5912 cr->mpi_comm_mygroup = comm_cart;
5915 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5916 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5918 if (comm->bCartesianPP_PME)
5920 /* Since we want to use the original cartesian setup for sim,
5921 * and not the one after split, we need to make an index.
5923 snew(comm->ddindex2ddnodeid, dd->nnodes);
5924 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
5925 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5926 /* Get the rank of the DD master,
5927 * above we made sure that the master node is a PP node.
5937 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
5939 else if (comm->bCartesianPP)
5941 if (cr->npmenodes == 0)
5943 /* The PP communicator is also
5944 * the communicator for this simulation
5946 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5948 cr->nodeid = dd->rank;
5950 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
5952 /* We need to make an index to go from the coordinates
5953 * to the nodeid of this simulation.
5955 snew(comm->ddindex2simnodeid, dd->nnodes);
5956 snew(buf, dd->nnodes);
5957 if (cr->duty & DUTY_PP)
5959 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5961 /* Communicate the ddindex to simulation nodeid index */
5962 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
5963 cr->mpi_comm_mysim);
5966 /* Determine the master coordinates and rank.
5967 * The DD master should be the same node as the master of this sim.
5969 for (i = 0; i < dd->nnodes; i++)
5971 if (comm->ddindex2simnodeid[i] == 0)
5973 ddindex2xyz(dd->nc, i, dd->master_ci);
5974 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
5979 fprintf(debug, "The master rank is %d\n", dd->masterrank);
5984 /* No Cartesian communicators */
5985 /* We use the rank in dd->comm->all as DD index */
5986 ddindex2xyz(dd->nc, dd->rank, dd->ci);
5987 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
5989 clear_ivec(dd->master_ci);
5996 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5997 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6002 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6003 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6007 static void receive_ddindex2simnodeid(t_commrec *cr)
6011 gmx_domdec_comm_t *comm;
6018 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
6020 snew(comm->ddindex2simnodeid, dd->nnodes);
6021 snew(buf, dd->nnodes);
6022 if (cr->duty & DUTY_PP)
6024 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
6027 /* Communicate the ddindex to simulation nodeid index */
6028 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6029 cr->mpi_comm_mysim);
6036 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
6037 int ncg, int natoms)
6039 gmx_domdec_master_t *ma;
6044 snew(ma->ncg, dd->nnodes);
6045 snew(ma->index, dd->nnodes+1);
6047 snew(ma->nat, dd->nnodes);
6048 snew(ma->ibuf, dd->nnodes*2);
6049 snew(ma->cell_x, DIM);
6050 for (i = 0; i < DIM; i++)
6052 snew(ma->cell_x[i], dd->nc[i]+1);
6055 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
6061 snew(ma->vbuf, natoms);
6067 static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_node_order,
6068 int gmx_unused reorder)
6071 gmx_domdec_comm_t *comm;
6082 if (comm->bCartesianPP)
6084 for (i = 1; i < DIM; i++)
6086 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
6088 if (bDiv[YY] || bDiv[ZZ])
6090 comm->bCartesianPP_PME = TRUE;
6091 /* If we have 2D PME decomposition, which is always in x+y,
6092 * we stack the PME only nodes in z.
6093 * Otherwise we choose the direction that provides the thinnest slab
6094 * of PME only nodes as this will have the least effect
6095 * on the PP communication.
6096 * But for the PME communication the opposite might be better.
6098 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
6100 dd->nc[YY] > dd->nc[ZZ]))
6102 comm->cartpmedim = ZZ;
6106 comm->cartpmedim = YY;
6108 comm->ntot[comm->cartpmedim]
6109 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
6113 fprintf(fplog, "#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
6115 "Will not use a Cartesian communicator for PP <-> PME\n\n");
6120 if (comm->bCartesianPP_PME)
6124 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
6127 for (i = 0; i < DIM; i++)
6131 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
6134 MPI_Comm_rank(comm_cart, &rank);
6135 if (MASTERNODE(cr) && rank != 0)
6137 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
6140 /* With this assigment we loose the link to the original communicator
6141 * which will usually be MPI_COMM_WORLD, unless have multisim.
6143 cr->mpi_comm_mysim = comm_cart;
6144 cr->sim_nodeid = rank;
6146 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
6150 fprintf(fplog, "Cartesian nodeid %d, coordinates %d %d %d\n\n",
6151 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6154 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
6158 if (cr->npmenodes == 0 ||
6159 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
6161 cr->duty = DUTY_PME;
6164 /* Split the sim communicator into PP and PME only nodes */
6165 MPI_Comm_split(cr->mpi_comm_mysim,
6167 dd_index(comm->ntot, dd->ci),
6168 &cr->mpi_comm_mygroup);
6172 switch (dd_node_order)
6177 fprintf(fplog, "Order of the nodes: PP first, PME last\n");
6180 case ddnoINTERLEAVE:
6181 /* Interleave the PP-only and PME-only nodes,
6182 * as on clusters with dual-core machines this will double
6183 * the communication bandwidth of the PME processes
6184 * and thus speed up the PP <-> PME and inter PME communication.
6188 fprintf(fplog, "Interleaving PP and PME nodes\n");
6190 comm->pmenodes = dd_pmenodes(cr);
6195 gmx_fatal(FARGS, "Unknown dd_node_order=%d", dd_node_order);
6198 if (dd_simnode2pmenode(cr, cr->sim_nodeid) == -1)
6200 cr->duty = DUTY_PME;
6207 /* Split the sim communicator into PP and PME only nodes */
6208 MPI_Comm_split(cr->mpi_comm_mysim,
6211 &cr->mpi_comm_mygroup);
6212 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
6218 fprintf(fplog, "This is a %s only node\n\n",
6219 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
6223 void make_dd_communicators(FILE *fplog, t_commrec *cr, int dd_node_order)
6226 gmx_domdec_comm_t *comm;
6232 copy_ivec(dd->nc, comm->ntot);
6234 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
6235 comm->bCartesianPP_PME = FALSE;
6237 /* Reorder the nodes by default. This might change the MPI ranks.
6238 * Real reordering is only supported on very few architectures,
6239 * Blue Gene is one of them.
6241 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
6243 if (cr->npmenodes > 0)
6245 /* Split the communicator into a PP and PME part */
6246 split_communicator(fplog, cr, dd_node_order, CartReorder);
6247 if (comm->bCartesianPP_PME)
6249 /* We (possibly) reordered the nodes in split_communicator,
6250 * so it is no longer required in make_pp_communicator.
6252 CartReorder = FALSE;
6257 /* All nodes do PP and PME */
6259 /* We do not require separate communicators */
6260 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
6264 if (cr->duty & DUTY_PP)
6266 /* Copy or make a new PP communicator */
6267 make_pp_communicator(fplog, cr, CartReorder);
6271 receive_ddindex2simnodeid(cr);
6274 if (!(cr->duty & DUTY_PME))
6276 /* Set up the commnuication to our PME node */
6277 dd->pme_nodeid = dd_simnode2pmenode(cr, cr->sim_nodeid);
6278 dd->pme_receive_vir_ener = receive_vir_ener(cr);
6281 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
6282 dd->pme_nodeid, dd->pme_receive_vir_ener);
6287 dd->pme_nodeid = -1;
6292 dd->ma = init_gmx_domdec_master_t(dd,
6294 comm->cgs_gl.index[comm->cgs_gl.nr]);
6298 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6300 real *slb_frac, tot;
6305 if (nc > 1 && size_string != NULL)
6309 fprintf(fplog, "Using static load balancing for the %s direction\n",
6314 for (i = 0; i < nc; i++)
6317 sscanf(size_string, "%lf%n", &dbl, &n);
6320 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6329 fprintf(fplog, "Relative cell sizes:");
6331 for (i = 0; i < nc; i++)
6336 fprintf(fplog, " %5.3f", slb_frac[i]);
6341 fprintf(fplog, "\n");
6348 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
6351 gmx_mtop_ilistloop_t iloop;
6355 iloop = gmx_mtop_ilistloop_init(mtop);
6356 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6358 for (ftype = 0; ftype < F_NRE; ftype++)
6360 if ((interaction_function[ftype].flags & IF_BOND) &&
6363 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6371 static int dd_getenv(FILE *fplog, const char *env_var, int def)
6377 val = getenv(env_var);
6380 if (sscanf(val, "%d", &nst) <= 0)
6386 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6394 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6398 fprintf(stderr, "\n%s\n", warn_string);
6402 fprintf(fplog, "\n%s\n", warn_string);
6406 static void check_dd_restrictions(t_commrec *cr, gmx_domdec_t *dd,
6407 t_inputrec *ir, FILE *fplog)
6409 if (ir->ePBC == epbcSCREW &&
6410 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6412 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6415 if (ir->ns_type == ensSIMPLE)
6417 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
6420 if (ir->nstlist == 0)
6422 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6425 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6427 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6431 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6436 r = ddbox->box_size[XX];
6437 for (di = 0; di < dd->ndim; di++)
6440 /* Check using the initial average cell size */
6441 r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6447 static int check_dlb_support(FILE *fplog, t_commrec *cr,
6448 const char *dlb_opt, gmx_bool bRecordLoad,
6449 unsigned long Flags, t_inputrec *ir)
6457 case 'a': eDLB = edlbAUTO; break;
6458 case 'n': eDLB = edlbNO; break;
6459 case 'y': eDLB = edlbYES; break;
6460 default: gmx_incons("Unknown dlb_opt");
6463 if (Flags & MD_RERUN)
6468 if (!EI_DYNAMICS(ir->eI))
6470 if (eDLB == edlbYES)
6472 sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
6473 dd_warning(cr, fplog, buf);
6481 dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6486 if (Flags & MD_REPRODUCIBLE)
6493 dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
6497 dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6500 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", eDLB);
6508 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6513 if (getenv("GMX_DD_ORDER_ZYX") != NULL)
6515 /* Decomposition order z,y,x */
6518 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6520 for (dim = DIM-1; dim >= 0; dim--)
6522 if (dd->nc[dim] > 1)
6524 dd->dim[dd->ndim++] = dim;
6530 /* Decomposition order x,y,z */
6531 for (dim = 0; dim < DIM; dim++)
6533 if (dd->nc[dim] > 1)
6535 dd->dim[dd->ndim++] = dim;
6541 static gmx_domdec_comm_t *init_dd_comm()
6543 gmx_domdec_comm_t *comm;
6547 snew(comm->cggl_flag, DIM*2);
6548 snew(comm->cgcm_state, DIM*2);
6549 for (i = 0; i < DIM*2; i++)
6551 comm->cggl_flag_nalloc[i] = 0;
6552 comm->cgcm_state_nalloc[i] = 0;
6555 comm->nalloc_int = 0;
6556 comm->buf_int = NULL;
6558 vec_rvec_init(&comm->vbuf);
6560 comm->n_load_have = 0;
6561 comm->n_load_collect = 0;
6563 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6565 comm->sum_nat[i] = 0;
6569 comm->load_step = 0;
6572 clear_ivec(comm->load_lim);
6579 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
6580 unsigned long Flags,
6582 real comm_distance_min, real rconstr,
6583 const char *dlb_opt, real dlb_scale,
6584 const char *sizex, const char *sizey, const char *sizez,
6585 gmx_mtop_t *mtop, t_inputrec *ir,
6586 matrix box, rvec *x,
6588 int *npme_x, int *npme_y)
6591 gmx_domdec_comm_t *comm;
6594 real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
6601 "\nInitializing Domain Decomposition on %d nodes\n", cr->nnodes);
6606 dd->comm = init_dd_comm();
6608 snew(comm->cggl_flag, DIM*2);
6609 snew(comm->cgcm_state, DIM*2);
6611 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6612 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6614 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
6615 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
6616 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
6617 recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
6618 comm->nstSortCG = dd_getenv(fplog, "GMX_DD_NST_SORT_CHARGE_GROUPS", 1);
6619 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
6620 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
6621 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
6623 dd->pme_recv_f_alloc = 0;
6624 dd->pme_recv_f_buf = NULL;
6626 if (dd->bSendRecv2 && fplog)
6628 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6634 fprintf(fplog, "Will load balance based on FLOP count\n");
6636 if (comm->eFlop > 1)
6638 srand(1+cr->nodeid);
6640 comm->bRecordLoad = TRUE;
6644 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6648 /* Initialize to GPU share count to 0, might change later */
6649 comm->nrank_gpu_shared = 0;
6651 comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
6653 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6656 fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
6658 dd->bGridJump = comm->bDynLoadBal;
6659 comm->bPMELoadBalDLBLimits = FALSE;
6661 if (comm->nstSortCG)
6665 if (comm->nstSortCG == 1)
6667 fprintf(fplog, "Will sort the charge groups at every domain (re)decomposition\n");
6671 fprintf(fplog, "Will sort the charge groups every %d steps\n",
6675 snew(comm->sort, 1);
6681 fprintf(fplog, "Will not sort the charge groups\n");
6685 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6687 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6688 if (comm->bInterCGBondeds)
6690 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6694 comm->bInterCGMultiBody = FALSE;
6697 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6698 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6700 if (ir->rlistlong == 0)
6702 /* Set the cut-off to some very large value,
6703 * so we don't need if statements everywhere in the code.
6704 * We use sqrt, since the cut-off is squared in some places.
6706 comm->cutoff = GMX_CUTOFF_INF;
6710 comm->cutoff = ir->rlistlong;
6712 comm->cutoff_mbody = 0;
6714 comm->cellsize_limit = 0;
6715 comm->bBondComm = FALSE;
6717 if (comm->bInterCGBondeds)
6719 if (comm_distance_min > 0)
6721 comm->cutoff_mbody = comm_distance_min;
6722 if (Flags & MD_DDBONDCOMM)
6724 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6728 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6730 r_bonded_limit = comm->cutoff_mbody;
6732 else if (ir->bPeriodicMols)
6734 /* Can not easily determine the required cut-off */
6735 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6736 comm->cutoff_mbody = comm->cutoff/2;
6737 r_bonded_limit = comm->cutoff_mbody;
6743 dd_bonded_cg_distance(fplog, mtop, ir, x, box,
6744 Flags & MD_DDBONDCHECK, &r_2b, &r_mb);
6746 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6747 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6749 /* We use an initial margin of 10% for the minimum cell size,
6750 * except when we are just below the non-bonded cut-off.
6752 if (Flags & MD_DDBONDCOMM)
6754 if (max(r_2b, r_mb) > comm->cutoff)
6756 r_bonded = max(r_2b, r_mb);
6757 r_bonded_limit = 1.1*r_bonded;
6758 comm->bBondComm = TRUE;
6763 r_bonded_limit = min(1.1*r_bonded, comm->cutoff);
6765 /* We determine cutoff_mbody later */
6769 /* No special bonded communication,
6770 * simply increase the DD cut-off.
6772 r_bonded_limit = 1.1*max(r_2b, r_mb);
6773 comm->cutoff_mbody = r_bonded_limit;
6774 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6777 comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit);
6781 "Minimum cell size due to bonded interactions: %.3f nm\n",
6782 comm->cellsize_limit);
6786 if (dd->bInterCGcons && rconstr <= 0)
6788 /* There is a cell size limit due to the constraints (P-LINCS) */
6789 rconstr = constr_r_max(fplog, mtop, ir);
6793 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6795 if (rconstr > comm->cellsize_limit)
6797 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6801 else if (rconstr > 0 && fplog)
6803 /* Here we do not check for dd->bInterCGcons,
6804 * because one can also set a cell size limit for virtual sites only
6805 * and at this point we don't know yet if there are intercg v-sites.
6808 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6811 comm->cellsize_limit = max(comm->cellsize_limit, rconstr);
6813 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6817 copy_ivec(nc, dd->nc);
6818 set_dd_dim(fplog, dd);
6819 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, x, ddbox);
6821 if (cr->npmenodes == -1)
6825 acs = average_cellsize_min(dd, ddbox);
6826 if (acs < comm->cellsize_limit)
6830 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6832 gmx_fatal_collective(FARGS, cr, NULL,
6833 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6834 acs, comm->cellsize_limit);
6839 set_ddbox_cr(cr, NULL, ir, box, &comm->cgs_gl, x, ddbox);
6841 /* We need to choose the optimal DD grid and possibly PME nodes */
6842 limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6843 comm->eDLB != edlbNO, dlb_scale,
6844 comm->cellsize_limit, comm->cutoff,
6845 comm->bInterCGBondeds);
6847 if (dd->nc[XX] == 0)
6849 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6850 sprintf(buf, "Change the number of nodes or mdrun option %s%s%s",
6851 !bC ? "-rdd" : "-rcon",
6852 comm->eDLB != edlbNO ? " or -dds" : "",
6853 bC ? " or your LINCS settings" : "");
6855 gmx_fatal_collective(FARGS, cr, NULL,
6856 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6858 "Look in the log file for details on the domain decomposition",
6859 cr->nnodes-cr->npmenodes, limit, buf);
6861 set_dd_dim(fplog, dd);
6867 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6868 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6871 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6872 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6874 gmx_fatal_collective(FARGS, cr, NULL,
6875 "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6876 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6878 if (cr->npmenodes > dd->nnodes)
6880 gmx_fatal_collective(FARGS, cr, NULL,
6881 "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6883 if (cr->npmenodes > 0)
6885 comm->npmenodes = cr->npmenodes;
6889 comm->npmenodes = dd->nnodes;
6892 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
6894 /* The following choices should match those
6895 * in comm_cost_est in domdec_setup.c.
6896 * Note that here the checks have to take into account
6897 * that the decomposition might occur in a different order than xyz
6898 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6899 * in which case they will not match those in comm_cost_est,
6900 * but since that is mainly for testing purposes that's fine.
6902 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6903 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6904 getenv("GMX_PMEONEDD") == NULL)
6906 comm->npmedecompdim = 2;
6907 comm->npmenodes_x = dd->nc[XX];
6908 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6912 /* In case nc is 1 in both x and y we could still choose to
6913 * decompose pme in y instead of x, but we use x for simplicity.
6915 comm->npmedecompdim = 1;
6916 if (dd->dim[0] == YY)
6918 comm->npmenodes_x = 1;
6919 comm->npmenodes_y = comm->npmenodes;
6923 comm->npmenodes_x = comm->npmenodes;
6924 comm->npmenodes_y = 1;
6929 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6930 comm->npmenodes_x, comm->npmenodes_y, 1);
6935 comm->npmedecompdim = 0;
6936 comm->npmenodes_x = 0;
6937 comm->npmenodes_y = 0;
6940 /* Technically we don't need both of these,
6941 * but it simplifies code not having to recalculate it.
6943 *npme_x = comm->npmenodes_x;
6944 *npme_y = comm->npmenodes_y;
6946 snew(comm->slb_frac, DIM);
6947 if (comm->eDLB == edlbNO)
6949 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
6950 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
6951 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], sizez);
6954 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6956 if (comm->bBondComm || comm->eDLB != edlbNO)
6958 /* Set the bonded communication distance to halfway
6959 * the minimum and the maximum,
6960 * since the extra communication cost is nearly zero.
6962 acs = average_cellsize_min(dd, ddbox);
6963 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6964 if (comm->eDLB != edlbNO)
6966 /* Check if this does not limit the scaling */
6967 comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs);
6969 if (!comm->bBondComm)
6971 /* Without bBondComm do not go beyond the n.b. cut-off */
6972 comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff);
6973 if (comm->cellsize_limit >= comm->cutoff)
6975 /* We don't loose a lot of efficieny
6976 * when increasing it to the n.b. cut-off.
6977 * It can even be slightly faster, because we need
6978 * less checks for the communication setup.
6980 comm->cutoff_mbody = comm->cutoff;
6983 /* Check if we did not end up below our original limit */
6984 comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit);
6986 if (comm->cutoff_mbody > comm->cellsize_limit)
6988 comm->cellsize_limit = comm->cutoff_mbody;
6991 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
6996 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
6997 "cellsize limit %f\n",
6998 comm->bBondComm, comm->cellsize_limit);
7003 check_dd_restrictions(cr, dd, ir, fplog);
7006 comm->partition_step = INT_MIN;
7009 clear_dd_cycle_counts(dd);
7014 static void set_dlb_limits(gmx_domdec_t *dd)
7019 for (d = 0; d < dd->ndim; d++)
7021 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
7022 dd->comm->cellsize_min[dd->dim[d]] =
7023 dd->comm->cellsize_min_dlb[dd->dim[d]];
7028 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
7031 gmx_domdec_comm_t *comm;
7041 fprintf(fplog, "At step %s the performance loss due to force load imbalance is %.1f %%\n", gmx_step_str(step, buf), dd_force_imb_perf_loss(dd)*100);
7044 cellsize_min = comm->cellsize_min[dd->dim[0]];
7045 for (d = 1; d < dd->ndim; d++)
7047 cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
7050 if (cellsize_min < comm->cellsize_limit*1.05)
7052 dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
7054 /* Change DLB from "auto" to "no". */
7055 comm->eDLB = edlbNO;
7060 dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
7061 comm->bDynLoadBal = TRUE;
7062 dd->bGridJump = TRUE;
7066 /* We can set the required cell size info here,
7067 * so we do not need to communicate this.
7068 * The grid is completely uniform.
7070 for (d = 0; d < dd->ndim; d++)
7074 comm->load[d].sum_m = comm->load[d].sum;
7076 nc = dd->nc[dd->dim[d]];
7077 for (i = 0; i < nc; i++)
7079 comm->root[d]->cell_f[i] = i/(real)nc;
7082 comm->root[d]->cell_f_max0[i] = i /(real)nc;
7083 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
7086 comm->root[d]->cell_f[nc] = 1.0;
7091 static char *init_bLocalCG(gmx_mtop_t *mtop)
7096 ncg = ncg_mtop(mtop);
7097 snew(bLocalCG, ncg);
7098 for (cg = 0; cg < ncg; cg++)
7100 bLocalCG[cg] = FALSE;
7106 void dd_init_bondeds(FILE *fplog,
7107 gmx_domdec_t *dd, gmx_mtop_t *mtop,
7109 t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
7111 gmx_domdec_comm_t *comm;
7115 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
7119 if (comm->bBondComm)
7121 /* Communicate atoms beyond the cut-off for bonded interactions */
7124 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
7126 comm->bLocalCG = init_bLocalCG(mtop);
7130 /* Only communicate atoms based on cut-off */
7131 comm->cglink = NULL;
7132 comm->bLocalCG = NULL;
7136 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
7138 gmx_bool bDynLoadBal, real dlb_scale,
7141 gmx_domdec_comm_t *comm;
7156 fprintf(fplog, "The maximum number of communication pulses is:");
7157 for (d = 0; d < dd->ndim; d++)
7159 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
7161 fprintf(fplog, "\n");
7162 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
7163 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
7164 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
7165 for (d = 0; d < DIM; d++)
7169 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
7176 comm->cellsize_min_dlb[d]/
7177 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
7179 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
7182 fprintf(fplog, "\n");
7186 set_dd_cell_sizes_slb(dd, ddbox, FALSE, np);
7187 fprintf(fplog, "The initial number of communication pulses is:");
7188 for (d = 0; d < dd->ndim; d++)
7190 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
7192 fprintf(fplog, "\n");
7193 fprintf(fplog, "The initial domain decomposition cell size is:");
7194 for (d = 0; d < DIM; d++)
7198 fprintf(fplog, " %c %.2f nm",
7199 dim2char(d), dd->comm->cellsize_min[d]);
7202 fprintf(fplog, "\n\n");
7205 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
7207 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
7208 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7209 "non-bonded interactions", "", comm->cutoff);
7213 limit = dd->comm->cellsize_limit;
7217 if (dynamic_dd_box(ddbox, ir))
7219 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
7221 limit = dd->comm->cellsize_min[XX];
7222 for (d = 1; d < DIM; d++)
7224 limit = min(limit, dd->comm->cellsize_min[d]);
7228 if (comm->bInterCGBondeds)
7230 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7231 "two-body bonded interactions", "(-rdd)",
7232 max(comm->cutoff, comm->cutoff_mbody));
7233 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7234 "multi-body bonded interactions", "(-rdd)",
7235 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit));
7239 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7240 "virtual site constructions", "(-rcon)", limit);
7242 if (dd->constraint_comm)
7244 sprintf(buf, "atoms separated by up to %d constraints",
7246 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7247 buf, "(-rcon)", limit);
7249 fprintf(fplog, "\n");
7255 static void set_cell_limits_dlb(gmx_domdec_t *dd,
7257 const t_inputrec *ir,
7258 const gmx_ddbox_t *ddbox)
7260 gmx_domdec_comm_t *comm;
7261 int d, dim, npulse, npulse_d_max, npulse_d;
7266 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7268 /* Determine the maximum number of comm. pulses in one dimension */
7270 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7272 /* Determine the maximum required number of grid pulses */
7273 if (comm->cellsize_limit >= comm->cutoff)
7275 /* Only a single pulse is required */
7278 else if (!bNoCutOff && comm->cellsize_limit > 0)
7280 /* We round down slightly here to avoid overhead due to the latency
7281 * of extra communication calls when the cut-off
7282 * would be only slightly longer than the cell size.
7283 * Later cellsize_limit is redetermined,
7284 * so we can not miss interactions due to this rounding.
7286 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7290 /* There is no cell size limit */
7291 npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7294 if (!bNoCutOff && npulse > 1)
7296 /* See if we can do with less pulses, based on dlb_scale */
7298 for (d = 0; d < dd->ndim; d++)
7301 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7302 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7303 npulse_d_max = max(npulse_d_max, npulse_d);
7305 npulse = min(npulse, npulse_d_max);
7308 /* This env var can override npulse */
7309 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
7316 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7317 for (d = 0; d < dd->ndim; d++)
7319 comm->cd[d].np_dlb = min(npulse, dd->nc[dd->dim[d]]-1);
7320 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7321 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7322 comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb);
7323 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7325 comm->bVacDLBNoLimit = FALSE;
7329 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7330 if (!comm->bVacDLBNoLimit)
7332 comm->cellsize_limit = max(comm->cellsize_limit,
7333 comm->cutoff/comm->maxpulse);
7335 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7336 /* Set the minimum cell size for each DD dimension */
7337 for (d = 0; d < dd->ndim; d++)
7339 if (comm->bVacDLBNoLimit ||
7340 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7342 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7346 comm->cellsize_min_dlb[dd->dim[d]] =
7347 comm->cutoff/comm->cd[d].np_dlb;
7350 if (comm->cutoff_mbody <= 0)
7352 comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit);
7354 if (comm->bDynLoadBal)
7360 gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd, int ePBC)
7362 /* If each molecule is a single charge group
7363 * or we use domain decomposition for each periodic dimension,
7364 * we do not need to take pbc into account for the bonded interactions.
7366 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7369 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7372 void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7373 t_inputrec *ir, gmx_ddbox_t *ddbox)
7375 gmx_domdec_comm_t *comm;
7381 /* Initialize the thread data.
7382 * This can not be done in init_domain_decomposition,
7383 * as the numbers of threads is determined later.
7385 comm->nth = gmx_omp_nthreads_get(emntDomdec);
7388 snew(comm->dth, comm->nth);
7391 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
7393 init_ddpme(dd, &comm->ddpme[0], 0);
7394 if (comm->npmedecompdim >= 2)
7396 init_ddpme(dd, &comm->ddpme[1], 1);
7401 comm->npmenodes = 0;
7402 if (dd->pme_nodeid >= 0)
7404 gmx_fatal_collective(FARGS, NULL, dd,
7405 "Can not have separate PME nodes without PME electrostatics");
7411 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7413 if (comm->eDLB != edlbNO)
7415 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7418 print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
7419 if (comm->eDLB == edlbAUTO)
7423 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7425 print_dd_settings(fplog, dd, ir, TRUE, dlb_scale, ddbox);
7428 if (ir->ePBC == epbcNONE)
7430 vol_frac = 1 - 1/(double)dd->nnodes;
7435 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7439 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7441 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7443 dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
7446 static gmx_bool test_dd_cutoff(t_commrec *cr,
7447 t_state *state, t_inputrec *ir,
7458 set_ddbox(dd, FALSE, cr, ir, state->box,
7459 TRUE, &dd->comm->cgs_gl, state->x, &ddbox);
7463 for (d = 0; d < dd->ndim; d++)
7467 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7468 if (dynamic_dd_box(&ddbox, ir))
7470 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7473 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7475 if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
7476 dd->comm->cd[d].np_dlb > 0)
7478 if (np > dd->comm->cd[d].np_dlb)
7483 /* If a current local cell size is smaller than the requested
7484 * cut-off, we could still fix it, but this gets very complicated.
7485 * Without fixing here, we might actually need more checks.
7487 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7494 if (dd->comm->eDLB != edlbNO)
7496 /* If DLB is not active yet, we don't need to check the grid jumps.
7497 * Actually we shouldn't, because then the grid jump data is not set.
7499 if (dd->comm->bDynLoadBal &&
7500 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7505 gmx_sumi(1, &LocallyLimited, cr);
7507 if (LocallyLimited > 0)
7516 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, t_inputrec *ir,
7519 gmx_bool bCutoffAllowed;
7521 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7525 cr->dd->comm->cutoff = cutoff_req;
7528 return bCutoffAllowed;
7531 void change_dd_dlb_cutoff_limit(t_commrec *cr)
7533 gmx_domdec_comm_t *comm;
7535 comm = cr->dd->comm;
7537 /* Turn on the DLB limiting (might have been on already) */
7538 comm->bPMELoadBalDLBLimits = TRUE;
7540 /* Change the cut-off limit */
7541 comm->PMELoadBal_max_cutoff = comm->cutoff;
7544 static void merge_cg_buffers(int ncell,
7545 gmx_domdec_comm_dim_t *cd, int pulse,
7547 int *index_gl, int *recv_i,
7548 rvec *cg_cm, rvec *recv_vr,
7550 cginfo_mb_t *cginfo_mb, int *cginfo)
7552 gmx_domdec_ind_t *ind, *ind_p;
7553 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7554 int shift, shift_at;
7556 ind = &cd->ind[pulse];
7558 /* First correct the already stored data */
7559 shift = ind->nrecv[ncell];
7560 for (cell = ncell-1; cell >= 0; cell--)
7562 shift -= ind->nrecv[cell];
7565 /* Move the cg's present from previous grid pulses */
7566 cg0 = ncg_cell[ncell+cell];
7567 cg1 = ncg_cell[ncell+cell+1];
7568 cgindex[cg1+shift] = cgindex[cg1];
7569 for (cg = cg1-1; cg >= cg0; cg--)
7571 index_gl[cg+shift] = index_gl[cg];
7572 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7573 cgindex[cg+shift] = cgindex[cg];
7574 cginfo[cg+shift] = cginfo[cg];
7576 /* Correct the already stored send indices for the shift */
7577 for (p = 1; p <= pulse; p++)
7579 ind_p = &cd->ind[p];
7581 for (c = 0; c < cell; c++)
7583 cg0 += ind_p->nsend[c];
7585 cg1 = cg0 + ind_p->nsend[cell];
7586 for (cg = cg0; cg < cg1; cg++)
7588 ind_p->index[cg] += shift;
7594 /* Merge in the communicated buffers */
7598 for (cell = 0; cell < ncell; cell++)
7600 cg1 = ncg_cell[ncell+cell+1] + shift;
7603 /* Correct the old cg indices */
7604 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7606 cgindex[cg+1] += shift_at;
7609 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7611 /* Copy this charge group from the buffer */
7612 index_gl[cg1] = recv_i[cg0];
7613 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7614 /* Add it to the cgindex */
7615 cg_gl = index_gl[cg1];
7616 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7617 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7618 cgindex[cg1+1] = cgindex[cg1] + nat;
7623 shift += ind->nrecv[cell];
7624 ncg_cell[ncell+cell+1] = cg1;
7628 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7629 int nzone, int cg0, const int *cgindex)
7633 /* Store the atom block boundaries for easy copying of communication buffers
7636 for (zone = 0; zone < nzone; zone++)
7638 for (p = 0; p < cd->np; p++)
7640 cd->ind[p].cell2at0[zone] = cgindex[cg];
7641 cg += cd->ind[p].nrecv[zone];
7642 cd->ind[p].cell2at1[zone] = cgindex[cg];
7647 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7653 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7655 if (!bLocalCG[link->a[i]])
7664 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7666 real c[DIM][4]; /* the corners for the non-bonded communication */
7667 real cr0; /* corner for rounding */
7668 real cr1[4]; /* corners for rounding */
7669 real bc[DIM]; /* corners for bounded communication */
7670 real bcr1; /* corner for rounding for bonded communication */
7673 /* Determine the corners of the domain(s) we are communicating with */
7675 set_dd_corners(const gmx_domdec_t *dd,
7676 int dim0, int dim1, int dim2,
7680 const gmx_domdec_comm_t *comm;
7681 const gmx_domdec_zones_t *zones;
7686 zones = &comm->zones;
7688 /* Keep the compiler happy */
7692 /* The first dimension is equal for all cells */
7693 c->c[0][0] = comm->cell_x0[dim0];
7696 c->bc[0] = c->c[0][0];
7701 /* This cell row is only seen from the first row */
7702 c->c[1][0] = comm->cell_x0[dim1];
7703 /* All rows can see this row */
7704 c->c[1][1] = comm->cell_x0[dim1];
7707 c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7710 /* For the multi-body distance we need the maximum */
7711 c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7714 /* Set the upper-right corner for rounding */
7715 c->cr0 = comm->cell_x1[dim0];
7720 for (j = 0; j < 4; j++)
7722 c->c[2][j] = comm->cell_x0[dim2];
7726 /* Use the maximum of the i-cells that see a j-cell */
7727 for (i = 0; i < zones->nizone; i++)
7729 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7735 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7741 /* For the multi-body distance we need the maximum */
7742 c->bc[2] = comm->cell_x0[dim2];
7743 for (i = 0; i < 2; i++)
7745 for (j = 0; j < 2; j++)
7747 c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0);
7753 /* Set the upper-right corner for rounding */
7754 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7755 * Only cell (0,0,0) can see cell 7 (1,1,1)
7757 c->cr1[0] = comm->cell_x1[dim1];
7758 c->cr1[3] = comm->cell_x1[dim1];
7761 c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7764 /* For the multi-body distance we need the maximum */
7765 c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7772 /* Determine which cg's we need to send in this pulse from this zone */
7774 get_zone_pulse_cgs(gmx_domdec_t *dd,
7775 int zonei, int zone,
7777 const int *index_gl,
7779 int dim, int dim_ind,
7780 int dim0, int dim1, int dim2,
7781 real r_comm2, real r_bcomm2,
7785 real skew_fac2_d, real skew_fac_01,
7786 rvec *v_d, rvec *v_0, rvec *v_1,
7787 const dd_corners_t *c,
7789 gmx_bool bDistBonded,
7795 gmx_domdec_ind_t *ind,
7796 int **ibuf, int *ibuf_nalloc,
7802 gmx_domdec_comm_t *comm;
7804 gmx_bool bDistMB_pulse;
7806 real r2, rb2, r, tric_sh;
7809 int nsend_z, nsend, nat;
7813 bScrew = (dd->bScrewPBC && dim == XX);
7815 bDistMB_pulse = (bDistMB && bDistBonded);
7821 for (cg = cg0; cg < cg1; cg++)
7825 if (tric_dist[dim_ind] == 0)
7827 /* Rectangular direction, easy */
7828 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7835 r = cg_cm[cg][dim] - c->bc[dim_ind];
7841 /* Rounding gives at most a 16% reduction
7842 * in communicated atoms
7844 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7846 r = cg_cm[cg][dim0] - c->cr0;
7847 /* This is the first dimension, so always r >= 0 */
7854 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7856 r = cg_cm[cg][dim1] - c->cr1[zone];
7863 r = cg_cm[cg][dim1] - c->bcr1;
7873 /* Triclinic direction, more complicated */
7876 /* Rounding, conservative as the skew_fac multiplication
7877 * will slightly underestimate the distance.
7879 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7881 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7882 for (i = dim0+1; i < DIM; i++)
7884 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7886 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7889 rb[dim0] = rn[dim0];
7892 /* Take care that the cell planes along dim0 might not
7893 * be orthogonal to those along dim1 and dim2.
7895 for (i = 1; i <= dim_ind; i++)
7898 if (normal[dim0][dimd] > 0)
7900 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7903 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7908 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7910 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7912 for (i = dim1+1; i < DIM; i++)
7914 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7916 rn[dim1] += tric_sh;
7919 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7920 /* Take care of coupling of the distances
7921 * to the planes along dim0 and dim1 through dim2.
7923 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7924 /* Take care that the cell planes along dim1
7925 * might not be orthogonal to that along dim2.
7927 if (normal[dim1][dim2] > 0)
7929 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7935 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7938 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7939 /* Take care of coupling of the distances
7940 * to the planes along dim0 and dim1 through dim2.
7942 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7943 /* Take care that the cell planes along dim1
7944 * might not be orthogonal to that along dim2.
7946 if (normal[dim1][dim2] > 0)
7948 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7953 /* The distance along the communication direction */
7954 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
7956 for (i = dim+1; i < DIM; i++)
7958 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7963 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7964 /* Take care of coupling of the distances
7965 * to the planes along dim0 and dim1 through dim2.
7967 if (dim_ind == 1 && zonei == 1)
7969 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
7975 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
7978 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
7979 /* Take care of coupling of the distances
7980 * to the planes along dim0 and dim1 through dim2.
7982 if (dim_ind == 1 && zonei == 1)
7984 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
7992 ((bDistMB && rb2 < r_bcomm2) ||
7993 (bDist2B && r2 < r_bcomm2)) &&
7995 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
7996 missing_link(comm->cglink, index_gl[cg],
7999 /* Make an index to the local charge groups */
8000 if (nsend+1 > ind->nalloc)
8002 ind->nalloc = over_alloc_large(nsend+1);
8003 srenew(ind->index, ind->nalloc);
8005 if (nsend+1 > *ibuf_nalloc)
8007 *ibuf_nalloc = over_alloc_large(nsend+1);
8008 srenew(*ibuf, *ibuf_nalloc);
8010 ind->index[nsend] = cg;
8011 (*ibuf)[nsend] = index_gl[cg];
8013 vec_rvec_check_alloc(vbuf, nsend+1);
8015 if (dd->ci[dim] == 0)
8017 /* Correct cg_cm for pbc */
8018 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
8021 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
8022 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
8027 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
8030 nat += cgindex[cg+1] - cgindex[cg];
8036 *nsend_z_ptr = nsend_z;
8039 static void setup_dd_communication(gmx_domdec_t *dd,
8040 matrix box, gmx_ddbox_t *ddbox,
8041 t_forcerec *fr, t_state *state, rvec **f)
8043 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
8044 int nzone, nzone_send, zone, zonei, cg0, cg1;
8045 int c, i, j, cg, cg_gl, nrcg;
8046 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
8047 gmx_domdec_comm_t *comm;
8048 gmx_domdec_zones_t *zones;
8049 gmx_domdec_comm_dim_t *cd;
8050 gmx_domdec_ind_t *ind;
8051 cginfo_mb_t *cginfo_mb;
8052 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
8053 real r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
8054 dd_corners_t corners;
8056 rvec *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
8057 real skew_fac2_d, skew_fac_01;
8064 fprintf(debug, "Setting up DD communication\n");
8069 switch (fr->cutoff_scheme)
8078 gmx_incons("unimplemented");
8082 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8084 dim = dd->dim[dim_ind];
8086 /* Check if we need to use triclinic distances */
8087 tric_dist[dim_ind] = 0;
8088 for (i = 0; i <= dim_ind; i++)
8090 if (ddbox->tric_dir[dd->dim[i]])
8092 tric_dist[dim_ind] = 1;
8097 bBondComm = comm->bBondComm;
8099 /* Do we need to determine extra distances for multi-body bondeds? */
8100 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8102 /* Do we need to determine extra distances for only two-body bondeds? */
8103 bDist2B = (bBondComm && !bDistMB);
8105 r_comm2 = sqr(comm->cutoff);
8106 r_bcomm2 = sqr(comm->cutoff_mbody);
8110 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, sqrt(r_bcomm2));
8113 zones = &comm->zones;
8116 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8117 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8119 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8121 /* Triclinic stuff */
8122 normal = ddbox->normal;
8126 v_0 = ddbox->v[dim0];
8127 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8129 /* Determine the coupling coefficient for the distances
8130 * to the cell planes along dim0 and dim1 through dim2.
8131 * This is required for correct rounding.
8134 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8137 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8143 v_1 = ddbox->v[dim1];
8146 zone_cg_range = zones->cg_range;
8147 index_gl = dd->index_gl;
8148 cgindex = dd->cgindex;
8149 cginfo_mb = fr->cginfo_mb;
8151 zone_cg_range[0] = 0;
8152 zone_cg_range[1] = dd->ncg_home;
8153 comm->zone_ncg1[0] = dd->ncg_home;
8154 pos_cg = dd->ncg_home;
8156 nat_tot = dd->nat_home;
8158 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8160 dim = dd->dim[dim_ind];
8161 cd = &comm->cd[dim_ind];
8163 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8165 /* No pbc in this dimension, the first node should not comm. */
8173 v_d = ddbox->v[dim];
8174 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
8176 cd->bInPlace = TRUE;
8177 for (p = 0; p < cd->np; p++)
8179 /* Only atoms communicated in the first pulse are used
8180 * for multi-body bonded interactions or for bBondComm.
8182 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8187 for (zone = 0; zone < nzone_send; zone++)
8189 if (tric_dist[dim_ind] && dim_ind > 0)
8191 /* Determine slightly more optimized skew_fac's
8193 * This reduces the number of communicated atoms
8194 * by about 10% for 3D DD of rhombic dodecahedra.
8196 for (dimd = 0; dimd < dim; dimd++)
8198 sf2_round[dimd] = 1;
8199 if (ddbox->tric_dir[dimd])
8201 for (i = dd->dim[dimd]+1; i < DIM; i++)
8203 /* If we are shifted in dimension i
8204 * and the cell plane is tilted forward
8205 * in dimension i, skip this coupling.
8207 if (!(zones->shift[nzone+zone][i] &&
8208 ddbox->v[dimd][i][dimd] >= 0))
8211 sqr(ddbox->v[dimd][i][dimd]);
8214 sf2_round[dimd] = 1/sf2_round[dimd];
8219 zonei = zone_perm[dim_ind][zone];
8222 /* Here we permutate the zones to obtain a convenient order
8223 * for neighbor searching
8225 cg0 = zone_cg_range[zonei];
8226 cg1 = zone_cg_range[zonei+1];
8230 /* Look only at the cg's received in the previous grid pulse
8232 cg1 = zone_cg_range[nzone+zone+1];
8233 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8236 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8237 for (th = 0; th < comm->nth; th++)
8239 gmx_domdec_ind_t *ind_p;
8240 int **ibuf_p, *ibuf_nalloc_p;
8242 int *nsend_p, *nat_p;
8248 /* Thread 0 writes in the comm buffers */
8250 ibuf_p = &comm->buf_int;
8251 ibuf_nalloc_p = &comm->nalloc_int;
8252 vbuf_p = &comm->vbuf;
8255 nsend_zone_p = &ind->nsend[zone];
8259 /* Other threads write into temp buffers */
8260 ind_p = &comm->dth[th].ind;
8261 ibuf_p = &comm->dth[th].ibuf;
8262 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8263 vbuf_p = &comm->dth[th].vbuf;
8264 nsend_p = &comm->dth[th].nsend;
8265 nat_p = &comm->dth[th].nat;
8266 nsend_zone_p = &comm->dth[th].nsend_zone;
8268 comm->dth[th].nsend = 0;
8269 comm->dth[th].nat = 0;
8270 comm->dth[th].nsend_zone = 0;
8280 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8281 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8284 /* Get the cg's for this pulse in this zone */
8285 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8287 dim, dim_ind, dim0, dim1, dim2,
8290 normal, skew_fac2_d, skew_fac_01,
8291 v_d, v_0, v_1, &corners, sf2_round,
8292 bDistBonded, bBondComm,
8296 ibuf_p, ibuf_nalloc_p,
8302 /* Append data of threads>=1 to the communication buffers */
8303 for (th = 1; th < comm->nth; th++)
8305 dd_comm_setup_work_t *dth;
8308 dth = &comm->dth[th];
8310 ns1 = nsend + dth->nsend_zone;
8311 if (ns1 > ind->nalloc)
8313 ind->nalloc = over_alloc_dd(ns1);
8314 srenew(ind->index, ind->nalloc);
8316 if (ns1 > comm->nalloc_int)
8318 comm->nalloc_int = over_alloc_dd(ns1);
8319 srenew(comm->buf_int, comm->nalloc_int);
8321 if (ns1 > comm->vbuf.nalloc)
8323 comm->vbuf.nalloc = over_alloc_dd(ns1);
8324 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8327 for (i = 0; i < dth->nsend_zone; i++)
8329 ind->index[nsend] = dth->ind.index[i];
8330 comm->buf_int[nsend] = dth->ibuf[i];
8331 copy_rvec(dth->vbuf.v[i],
8332 comm->vbuf.v[nsend]);
8336 ind->nsend[zone] += dth->nsend_zone;
8339 /* Clear the counts in case we do not have pbc */
8340 for (zone = nzone_send; zone < nzone; zone++)
8342 ind->nsend[zone] = 0;
8344 ind->nsend[nzone] = nsend;
8345 ind->nsend[nzone+1] = nat;
8346 /* Communicate the number of cg's and atoms to receive */
8347 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8348 ind->nsend, nzone+2,
8349 ind->nrecv, nzone+2);
8351 /* The rvec buffer is also required for atom buffers of size nsend
8352 * in dd_move_x and dd_move_f.
8354 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8358 /* We can receive in place if only the last zone is not empty */
8359 for (zone = 0; zone < nzone-1; zone++)
8361 if (ind->nrecv[zone] > 0)
8363 cd->bInPlace = FALSE;
8368 /* The int buffer is only required here for the cg indices */
8369 if (ind->nrecv[nzone] > comm->nalloc_int2)
8371 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8372 srenew(comm->buf_int2, comm->nalloc_int2);
8374 /* The rvec buffer is also required for atom buffers
8375 * of size nrecv in dd_move_x and dd_move_f.
8377 i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8378 vec_rvec_check_alloc(&comm->vbuf2, i);
8382 /* Make space for the global cg indices */
8383 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8384 || dd->cg_nalloc == 0)
8386 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8387 srenew(index_gl, dd->cg_nalloc);
8388 srenew(cgindex, dd->cg_nalloc+1);
8390 /* Communicate the global cg indices */
8393 recv_i = index_gl + pos_cg;
8397 recv_i = comm->buf_int2;
8399 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8400 comm->buf_int, nsend,
8401 recv_i, ind->nrecv[nzone]);
8403 /* Make space for cg_cm */
8404 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8405 if (fr->cutoff_scheme == ecutsGROUP)
8413 /* Communicate cg_cm */
8416 recv_vr = cg_cm + pos_cg;
8420 recv_vr = comm->vbuf2.v;
8422 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8423 comm->vbuf.v, nsend,
8424 recv_vr, ind->nrecv[nzone]);
8426 /* Make the charge group index */
8429 zone = (p == 0 ? 0 : nzone - 1);
8430 while (zone < nzone)
8432 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8434 cg_gl = index_gl[pos_cg];
8435 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8436 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8437 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8440 /* Update the charge group presence,
8441 * so we can use it in the next pass of the loop.
8443 comm->bLocalCG[cg_gl] = TRUE;
8449 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8452 zone_cg_range[nzone+zone] = pos_cg;
8457 /* This part of the code is never executed with bBondComm. */
8458 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8459 index_gl, recv_i, cg_cm, recv_vr,
8460 cgindex, fr->cginfo_mb, fr->cginfo);
8461 pos_cg += ind->nrecv[nzone];
8463 nat_tot += ind->nrecv[nzone+1];
8467 /* Store the atom block for easy copying of communication buffers */
8468 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8472 dd->index_gl = index_gl;
8473 dd->cgindex = cgindex;
8475 dd->ncg_tot = zone_cg_range[zones->n];
8476 dd->nat_tot = nat_tot;
8477 comm->nat[ddnatHOME] = dd->nat_home;
8478 for (i = ddnatZONE; i < ddnatNR; i++)
8480 comm->nat[i] = dd->nat_tot;
8485 /* We don't need to update cginfo, since that was alrady done above.
8486 * So we pass NULL for the forcerec.
8488 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8489 NULL, comm->bLocalCG);
8494 fprintf(debug, "Finished setting up DD communication, zones:");
8495 for (c = 0; c < zones->n; c++)
8497 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8499 fprintf(debug, "\n");
8503 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8507 for (c = 0; c < zones->nizone; c++)
8509 zones->izone[c].cg1 = zones->cg_range[c+1];
8510 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8511 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8515 static void set_zones_size(gmx_domdec_t *dd,
8516 matrix box, const gmx_ddbox_t *ddbox,
8517 int zone_start, int zone_end)
8519 gmx_domdec_comm_t *comm;
8520 gmx_domdec_zones_t *zones;
8522 int z, zi, zj0, zj1, d, dim;
8525 real size_j, add_tric;
8530 zones = &comm->zones;
8532 /* Do we need to determine extra distances for multi-body bondeds? */
8533 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8535 for (z = zone_start; z < zone_end; z++)
8537 /* Copy cell limits to zone limits.
8538 * Valid for non-DD dims and non-shifted dims.
8540 copy_rvec(comm->cell_x0, zones->size[z].x0);
8541 copy_rvec(comm->cell_x1, zones->size[z].x1);
8544 for (d = 0; d < dd->ndim; d++)
8548 for (z = 0; z < zones->n; z++)
8550 /* With a staggered grid we have different sizes
8551 * for non-shifted dimensions.
8553 if (dd->bGridJump && zones->shift[z][dim] == 0)
8557 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8558 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8562 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8563 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8569 rcmbs = comm->cutoff_mbody;
8570 if (ddbox->tric_dir[dim])
8572 rcs /= ddbox->skew_fac[dim];
8573 rcmbs /= ddbox->skew_fac[dim];
8576 /* Set the lower limit for the shifted zone dimensions */
8577 for (z = zone_start; z < zone_end; z++)
8579 if (zones->shift[z][dim] > 0)
8582 if (!dd->bGridJump || d == 0)
8584 zones->size[z].x0[dim] = comm->cell_x1[dim];
8585 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8589 /* Here we take the lower limit of the zone from
8590 * the lowest domain of the zone below.
8594 zones->size[z].x0[dim] =
8595 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8601 zones->size[z].x0[dim] =
8602 zones->size[zone_perm[2][z-4]].x0[dim];
8606 zones->size[z].x0[dim] =
8607 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8610 /* A temporary limit, is updated below */
8611 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8615 for (zi = 0; zi < zones->nizone; zi++)
8617 if (zones->shift[zi][dim] == 0)
8619 /* This takes the whole zone into account.
8620 * With multiple pulses this will lead
8621 * to a larger zone then strictly necessary.
8623 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8624 zones->size[zi].x1[dim]+rcmbs);
8632 /* Loop over the i-zones to set the upper limit of each
8635 for (zi = 0; zi < zones->nizone; zi++)
8637 if (zones->shift[zi][dim] == 0)
8639 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8641 if (zones->shift[z][dim] > 0)
8643 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8644 zones->size[zi].x1[dim]+rcs);
8651 for (z = zone_start; z < zone_end; z++)
8653 /* Initialization only required to keep the compiler happy */
8654 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8657 /* To determine the bounding box for a zone we need to find
8658 * the extreme corners of 4, 2 or 1 corners.
8660 nc = 1 << (ddbox->npbcdim - 1);
8662 for (c = 0; c < nc; c++)
8664 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8668 corner[YY] = zones->size[z].x0[YY];
8672 corner[YY] = zones->size[z].x1[YY];
8676 corner[ZZ] = zones->size[z].x0[ZZ];
8680 corner[ZZ] = zones->size[z].x1[ZZ];
8682 if (dd->ndim == 1 && box[ZZ][YY] != 0)
8684 /* With 1D domain decomposition the cg's are not in
8685 * the triclinic box, but triclinic x-y and rectangular y-z.
8686 * Shift y back, so it will later end up at 0.
8688 corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
8690 /* Apply the triclinic couplings */
8691 assert(ddbox->npbcdim <= DIM);
8692 for (i = YY; i < ddbox->npbcdim; i++)
8694 for (j = XX; j < i; j++)
8696 corner[j] += corner[i]*box[i][j]/box[i][i];
8701 copy_rvec(corner, corner_min);
8702 copy_rvec(corner, corner_max);
8706 for (i = 0; i < DIM; i++)
8708 corner_min[i] = min(corner_min[i], corner[i]);
8709 corner_max[i] = max(corner_max[i], corner[i]);
8713 /* Copy the extreme cornes without offset along x */
8714 for (i = 0; i < DIM; i++)
8716 zones->size[z].bb_x0[i] = corner_min[i];
8717 zones->size[z].bb_x1[i] = corner_max[i];
8719 /* Add the offset along x */
8720 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8721 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8724 if (zone_start == 0)
8727 for (dim = 0; dim < DIM; dim++)
8729 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8731 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
8736 for (z = zone_start; z < zone_end; z++)
8738 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8740 zones->size[z].x0[XX], zones->size[z].x1[XX],
8741 zones->size[z].x0[YY], zones->size[z].x1[YY],
8742 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8743 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8745 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8746 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8747 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8752 static int comp_cgsort(const void *a, const void *b)
8756 gmx_cgsort_t *cga, *cgb;
8757 cga = (gmx_cgsort_t *)a;
8758 cgb = (gmx_cgsort_t *)b;
8760 comp = cga->nsc - cgb->nsc;
8763 comp = cga->ind_gl - cgb->ind_gl;
8769 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8774 /* Order the data */
8775 for (i = 0; i < n; i++)
8777 buf[i] = a[sort[i].ind];
8780 /* Copy back to the original array */
8781 for (i = 0; i < n; i++)
8787 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8792 /* Order the data */
8793 for (i = 0; i < n; i++)
8795 copy_rvec(v[sort[i].ind], buf[i]);
8798 /* Copy back to the original array */
8799 for (i = 0; i < n; i++)
8801 copy_rvec(buf[i], v[i]);
8805 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8808 int a, atot, cg, cg0, cg1, i;
8810 if (cgindex == NULL)
8812 /* Avoid the useless loop of the atoms within a cg */
8813 order_vec_cg(ncg, sort, v, buf);
8818 /* Order the data */
8820 for (cg = 0; cg < ncg; cg++)
8822 cg0 = cgindex[sort[cg].ind];
8823 cg1 = cgindex[sort[cg].ind+1];
8824 for (i = cg0; i < cg1; i++)
8826 copy_rvec(v[i], buf[a]);
8832 /* Copy back to the original array */
8833 for (a = 0; a < atot; a++)
8835 copy_rvec(buf[a], v[a]);
8839 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8840 int nsort_new, gmx_cgsort_t *sort_new,
8841 gmx_cgsort_t *sort1)
8845 /* The new indices are not very ordered, so we qsort them */
8846 gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8848 /* sort2 is already ordered, so now we can merge the two arrays */
8852 while (i2 < nsort2 || i_new < nsort_new)
8856 sort1[i1++] = sort_new[i_new++];
8858 else if (i_new == nsort_new)
8860 sort1[i1++] = sort2[i2++];
8862 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8863 (sort2[i2].nsc == sort_new[i_new].nsc &&
8864 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8866 sort1[i1++] = sort2[i2++];
8870 sort1[i1++] = sort_new[i_new++];
8875 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8877 gmx_domdec_sort_t *sort;
8878 gmx_cgsort_t *cgsort, *sort_i;
8879 int ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
8880 int sort_last, sort_skip;
8882 sort = dd->comm->sort;
8884 a = fr->ns.grid->cell_index;
8886 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns.grid->ncells;
8888 if (ncg_home_old >= 0)
8890 /* The charge groups that remained in the same ns grid cell
8891 * are completely ordered. So we can sort efficiently by sorting
8892 * the charge groups that did move into the stationary list.
8897 for (i = 0; i < dd->ncg_home; i++)
8899 /* Check if this cg did not move to another node */
8902 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8904 /* This cg is new on this node or moved ns grid cell */
8905 if (nsort_new >= sort->sort_new_nalloc)
8907 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8908 srenew(sort->sort_new, sort->sort_new_nalloc);
8910 sort_i = &(sort->sort_new[nsort_new++]);
8914 /* This cg did not move */
8915 sort_i = &(sort->sort2[nsort2++]);
8917 /* Sort on the ns grid cell indices
8918 * and the global topology index.
8919 * index_gl is irrelevant with cell ns,
8920 * but we set it here anyhow to avoid a conditional.
8923 sort_i->ind_gl = dd->index_gl[i];
8930 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8933 /* Sort efficiently */
8934 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8939 cgsort = sort->sort;
8941 for (i = 0; i < dd->ncg_home; i++)
8943 /* Sort on the ns grid cell indices
8944 * and the global topology index
8946 cgsort[i].nsc = a[i];
8947 cgsort[i].ind_gl = dd->index_gl[i];
8949 if (cgsort[i].nsc < moved)
8956 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
8958 /* Determine the order of the charge groups using qsort */
8959 gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
8965 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
8968 int ncg_new, i, *a, na;
8970 sort = dd->comm->sort->sort;
8972 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
8975 for (i = 0; i < na; i++)
8979 sort[ncg_new].ind = a[i];
8987 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
8990 gmx_domdec_sort_t *sort;
8991 gmx_cgsort_t *cgsort, *sort_i;
8993 int ncg_new, i, *ibuf, cgsize;
8996 sort = dd->comm->sort;
8998 if (dd->ncg_home > sort->sort_nalloc)
9000 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
9001 srenew(sort->sort, sort->sort_nalloc);
9002 srenew(sort->sort2, sort->sort_nalloc);
9004 cgsort = sort->sort;
9006 switch (fr->cutoff_scheme)
9009 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
9012 ncg_new = dd_sort_order_nbnxn(dd, fr);
9015 gmx_incons("unimplemented");
9019 /* We alloc with the old size, since cgindex is still old */
9020 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9021 vbuf = dd->comm->vbuf.v;
9025 cgindex = dd->cgindex;
9032 /* Remove the charge groups which are no longer at home here */
9033 dd->ncg_home = ncg_new;
9036 fprintf(debug, "Set the new home charge group count to %d\n",
9040 /* Reorder the state */
9041 for (i = 0; i < estNR; i++)
9043 if (EST_DISTR(i) && (state->flags & (1<<i)))
9048 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
9051 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
9054 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->sd_X, vbuf);
9057 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
9061 case estDISRE_INITF:
9062 case estDISRE_RM3TAV:
9063 case estORIRE_INITF:
9065 /* No ordering required */
9068 gmx_incons("Unknown state entry encountered in dd_sort_state");
9073 if (fr->cutoff_scheme == ecutsGROUP)
9076 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9079 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9081 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9082 srenew(sort->ibuf, sort->ibuf_nalloc);
9085 /* Reorder the global cg index */
9086 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9087 /* Reorder the cginfo */
9088 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9089 /* Rebuild the local cg index */
9093 for (i = 0; i < dd->ncg_home; i++)
9095 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9096 ibuf[i+1] = ibuf[i] + cgsize;
9098 for (i = 0; i < dd->ncg_home+1; i++)
9100 dd->cgindex[i] = ibuf[i];
9105 for (i = 0; i < dd->ncg_home+1; i++)
9110 /* Set the home atom number */
9111 dd->nat_home = dd->cgindex[dd->ncg_home];
9113 if (fr->cutoff_scheme == ecutsVERLET)
9115 /* The atoms are now exactly in grid order, update the grid order */
9116 nbnxn_set_atomorder(fr->nbv->nbs);
9120 /* Copy the sorted ns cell indices back to the ns grid struct */
9121 for (i = 0; i < dd->ncg_home; i++)
9123 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
9125 fr->ns.grid->nr = dd->ncg_home;
9129 static void add_dd_statistics(gmx_domdec_t *dd)
9131 gmx_domdec_comm_t *comm;
9136 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9138 comm->sum_nat[ddnat-ddnatZONE] +=
9139 comm->nat[ddnat] - comm->nat[ddnat-1];
9144 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9146 gmx_domdec_comm_t *comm;
9151 /* Reset all the statistics and counters for total run counting */
9152 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9154 comm->sum_nat[ddnat-ddnatZONE] = 0;
9158 comm->load_step = 0;
9161 clear_ivec(comm->load_lim);
9166 void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
9168 gmx_domdec_comm_t *comm;
9172 comm = cr->dd->comm;
9174 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9181 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9183 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9185 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9190 " av. #atoms communicated per step for force: %d x %.1f\n",
9194 if (cr->dd->vsite_comm)
9197 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9198 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9203 if (cr->dd->constraint_comm)
9206 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9207 1 + ir->nLincsIter, av);
9211 gmx_incons(" Unknown type for DD statistics");
9214 fprintf(fplog, "\n");
9216 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9218 print_dd_load_av(fplog, cr->dd);
9222 void dd_partition_system(FILE *fplog,
9225 gmx_bool bMasterState,
9227 t_state *state_global,
9228 gmx_mtop_t *top_global,
9230 t_state *state_local,
9233 gmx_localtop_t *top_local,
9236 gmx_shellfc_t shellfc,
9237 gmx_constr_t constr,
9239 gmx_wallcycle_t wcycle,
9243 gmx_domdec_comm_t *comm;
9244 gmx_ddbox_t ddbox = {0};
9246 gmx_int64_t step_pcoupl;
9247 rvec cell_ns_x0, cell_ns_x1;
9248 int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9249 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
9250 gmx_bool bRedist, bSortCG, bResortAll;
9251 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9258 bBoxChanged = (bMasterState || DEFORM(*ir));
9259 if (ir->epc != epcNO)
9261 /* With nstpcouple > 1 pressure coupling happens.
9262 * one step after calculating the pressure.
9263 * Box scaling happens at the end of the MD step,
9264 * after the DD partitioning.
9265 * We therefore have to do DLB in the first partitioning
9266 * after an MD step where P-coupling occured.
9267 * We need to determine the last step in which p-coupling occurred.
9268 * MRS -- need to validate this for vv?
9273 step_pcoupl = step - 1;
9277 step_pcoupl = ((step - 1)/n)*n + 1;
9279 if (step_pcoupl >= comm->partition_step)
9285 bNStGlobalComm = (step % nstglobalcomm == 0);
9287 if (!comm->bDynLoadBal)
9293 /* Should we do dynamic load balacing this step?
9294 * Since it requires (possibly expensive) global communication,
9295 * we might want to do DLB less frequently.
9297 if (bBoxChanged || ir->epc != epcNO)
9299 bDoDLB = bBoxChanged;
9303 bDoDLB = bNStGlobalComm;
9307 /* Check if we have recorded loads on the nodes */
9308 if (comm->bRecordLoad && dd_load_count(comm))
9310 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
9312 /* Check if we should use DLB at the second partitioning
9313 * and every 100 partitionings,
9314 * so the extra communication cost is negligible.
9316 n = max(100, nstglobalcomm);
9317 bCheckDLB = (comm->n_load_collect == 0 ||
9318 comm->n_load_have % n == n-1);
9325 /* Print load every nstlog, first and last step to the log file */
9326 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9327 comm->n_load_collect == 0 ||
9329 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9331 /* Avoid extra communication due to verbose screen output
9332 * when nstglobalcomm is set.
9334 if (bDoDLB || bLogLoad || bCheckDLB ||
9335 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9337 get_load_distribution(dd, wcycle);
9342 dd_print_load(fplog, dd, step-1);
9346 dd_print_load_verbose(dd);
9349 comm->n_load_collect++;
9353 /* Since the timings are node dependent, the master decides */
9357 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS);
9360 fprintf(debug, "step %s, imb loss %f\n",
9361 gmx_step_str(step, sbuf),
9362 dd_force_imb_perf_loss(dd));
9365 dd_bcast(dd, sizeof(bTurnOnDLB), &bTurnOnDLB);
9368 turn_on_dlb(fplog, cr, step);
9373 comm->n_load_have++;
9376 cgs_gl = &comm->cgs_gl;
9381 /* Clear the old state */
9382 clear_dd_indices(dd, 0, 0);
9385 set_ddbox(dd, bMasterState, cr, ir, state_global->box,
9386 TRUE, cgs_gl, state_global->x, &ddbox);
9388 get_cg_distribution(fplog, step, dd, cgs_gl,
9389 state_global->box, &ddbox, state_global->x);
9391 dd_distribute_state(dd, cgs_gl,
9392 state_global, state_local, f);
9394 dd_make_local_cgs(dd, &top_local->cgs);
9396 /* Ensure that we have space for the new distribution */
9397 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9399 if (fr->cutoff_scheme == ecutsGROUP)
9401 calc_cgcm(fplog, 0, dd->ncg_home,
9402 &top_local->cgs, state_local->x, fr->cg_cm);
9405 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9407 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9409 else if (state_local->ddp_count != dd->ddp_count)
9411 if (state_local->ddp_count > dd->ddp_count)
9413 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9416 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9418 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9421 /* Clear the old state */
9422 clear_dd_indices(dd, 0, 0);
9424 /* Build the new indices */
9425 rebuild_cgindex(dd, cgs_gl->index, state_local);
9426 make_dd_indices(dd, cgs_gl->index, 0);
9427 ncgindex_set = dd->ncg_home;
9429 if (fr->cutoff_scheme == ecutsGROUP)
9431 /* Redetermine the cg COMs */
9432 calc_cgcm(fplog, 0, dd->ncg_home,
9433 &top_local->cgs, state_local->x, fr->cg_cm);
9436 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9438 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9440 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9441 TRUE, &top_local->cgs, state_local->x, &ddbox);
9443 bRedist = comm->bDynLoadBal;
9447 /* We have the full state, only redistribute the cgs */
9449 /* Clear the non-home indices */
9450 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9453 /* Avoid global communication for dim's without pbc and -gcom */
9454 if (!bNStGlobalComm)
9456 copy_rvec(comm->box0, ddbox.box0 );
9457 copy_rvec(comm->box_size, ddbox.box_size);
9459 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9460 bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
9465 /* For dim's without pbc and -gcom */
9466 copy_rvec(ddbox.box0, comm->box0 );
9467 copy_rvec(ddbox.box_size, comm->box_size);
9469 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9472 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9474 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9477 /* Check if we should sort the charge groups */
9478 if (comm->nstSortCG > 0)
9480 bSortCG = (bMasterState ||
9481 (bRedist && (step % comm->nstSortCG == 0)));
9488 ncg_home_old = dd->ncg_home;
9493 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9495 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9497 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9499 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9502 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9504 &comm->cell_x0, &comm->cell_x1,
9505 dd->ncg_home, fr->cg_cm,
9506 cell_ns_x0, cell_ns_x1, &grid_density);
9510 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9513 switch (fr->cutoff_scheme)
9516 copy_ivec(fr->ns.grid->n, ncells_old);
9517 grid_first(fplog, fr->ns.grid, dd, &ddbox,
9518 state_local->box, cell_ns_x0, cell_ns_x1,
9519 fr->rlistlong, grid_density);
9522 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9525 gmx_incons("unimplemented");
9527 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9528 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9532 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9534 /* Sort the state on charge group position.
9535 * This enables exact restarts from this step.
9536 * It also improves performance by about 15% with larger numbers
9537 * of atoms per node.
9540 /* Fill the ns grid with the home cell,
9541 * so we can sort with the indices.
9543 set_zones_ncg_home(dd);
9545 switch (fr->cutoff_scheme)
9548 set_zones_size(dd, state_local->box, &ddbox, 0, 1);
9550 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9552 comm->zones.size[0].bb_x0,
9553 comm->zones.size[0].bb_x1,
9555 comm->zones.dens_zone0,
9558 ncg_moved, bRedist ? comm->moved : NULL,
9559 fr->nbv->grp[eintLocal].kernel_type,
9560 fr->nbv->grp[eintLocal].nbat);
9562 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9565 fill_grid(&comm->zones, fr->ns.grid, dd->ncg_home,
9566 0, dd->ncg_home, fr->cg_cm);
9568 copy_ivec(fr->ns.grid->n, ncells_new);
9571 gmx_incons("unimplemented");
9574 bResortAll = bMasterState;
9576 /* Check if we can user the old order and ns grid cell indices
9577 * of the charge groups to sort the charge groups efficiently.
9579 if (ncells_new[XX] != ncells_old[XX] ||
9580 ncells_new[YY] != ncells_old[YY] ||
9581 ncells_new[ZZ] != ncells_old[ZZ])
9588 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9589 gmx_step_str(step, sbuf), dd->ncg_home);
9591 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9592 bResortAll ? -1 : ncg_home_old);
9593 /* Rebuild all the indices */
9594 ga2la_clear(dd->ga2la);
9597 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9600 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9602 /* Setup up the communication and communicate the coordinates */
9603 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9605 /* Set the indices */
9606 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9608 /* Set the charge group boundaries for neighbor searching */
9609 set_cg_boundaries(&comm->zones);
9611 if (fr->cutoff_scheme == ecutsVERLET)
9613 set_zones_size(dd, state_local->box, &ddbox,
9614 bSortCG ? 1 : 0, comm->zones.n);
9617 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9620 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9621 -1,state_local->x,state_local->box);
9624 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9626 /* Extract a local topology from the global topology */
9627 for (i = 0; i < dd->ndim; i++)
9629 np[dd->dim[i]] = comm->cd[i].np;
9631 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9632 comm->cellsize_min, np,
9634 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
9635 vsite, top_global, top_local);
9637 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9639 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9641 /* Set up the special atom communication */
9642 n = comm->nat[ddnatZONE];
9643 for (i = ddnatZONE+1; i < ddnatNR; i++)
9648 if (vsite && vsite->n_intercg_vsite)
9650 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9654 if (dd->bInterCGcons || dd->bInterCGsettles)
9656 /* Only for inter-cg constraints we need special code */
9657 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9658 constr, ir->nProjOrder,
9659 top_local->idef.il);
9663 gmx_incons("Unknown special atom type setup");
9668 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9670 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9672 /* Make space for the extra coordinates for virtual site
9673 * or constraint communication.
9675 state_local->natoms = comm->nat[ddnatNR-1];
9676 if (state_local->natoms > state_local->nalloc)
9678 dd_realloc_state(state_local, f, state_local->natoms);
9681 if (fr->bF_NoVirSum)
9683 if (vsite && vsite->n_intercg_vsite)
9685 nat_f_novirsum = comm->nat[ddnatVSITE];
9689 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9691 nat_f_novirsum = dd->nat_tot;
9695 nat_f_novirsum = dd->nat_home;
9704 /* Set the number of atoms required for the force calculation.
9705 * Forces need to be constrained when using a twin-range setup
9706 * or with energy minimization. For simple simulations we could
9707 * avoid some allocation, zeroing and copying, but this is
9708 * probably not worth the complications ande checking.
9710 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9711 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9713 /* We make the all mdatoms up to nat_tot_con.
9714 * We could save some work by only setting invmass
9715 * between nat_tot and nat_tot_con.
9717 /* This call also sets the new number of home particles to dd->nat_home */
9718 atoms2md(top_global, ir,
9719 comm->nat[ddnatCON], dd->gatindex, dd->nat_home, mdatoms);
9721 /* Now we have the charges we can sort the FE interactions */
9722 dd_sort_local_top(dd, mdatoms, top_local);
9726 /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
9727 split_vsites_over_threads(top_local->idef.il, mdatoms, FALSE, vsite);
9732 /* Make the local shell stuff, currently no communication is done */
9733 make_local_shells(cr, mdatoms, shellfc);
9736 if (ir->implicit_solvent)
9738 make_local_gb(cr, fr->born, ir->gb_algorithm);
9741 setup_bonded_threading(fr, &top_local->idef);
9743 if (!(cr->duty & DUTY_PME))
9745 /* Send the charges and/or c6/sigmas to our PME only node */
9746 gmx_pme_send_parameters(cr, mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
9747 mdatoms->chargeA, mdatoms->chargeB,
9748 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
9749 mdatoms->sigmaA, mdatoms->sigmaB,
9750 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9755 set_constraints(constr, top_local, ir, mdatoms, cr);
9758 if (ir->ePull != epullNO)
9760 /* Update the local pull groups */
9761 dd_make_local_pull_groups(dd, ir->pull, mdatoms);
9766 /* Update the local rotation groups */
9767 dd_make_local_rotation_groups(dd, ir->rot);
9770 if (ir->eSwapCoords != eswapNO)
9772 /* Update the local groups needed for ion swapping */
9773 dd_make_local_swap_groups(dd, ir->swap);
9776 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
9777 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
9779 add_dd_statistics(dd);
9781 /* Make sure we only count the cycles for this DD partitioning */
9782 clear_dd_cycle_counts(dd);
9784 /* Because the order of the atoms might have changed since
9785 * the last vsite construction, we need to communicate the constructing
9786 * atom coordinates again (for spreading the forces this MD step).
9788 dd_move_x_vsites(dd, state_local->box, state_local->x);
9790 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9792 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9794 dd_move_x(dd, state_local->box, state_local->x);
9795 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9796 -1, state_local->x, state_local->box);
9799 /* Store the partitioning step */
9800 comm->partition_step = step;
9802 /* Increase the DD partitioning counter */
9804 /* The state currently matches this DD partitioning count, store it */
9805 state_local->ddp_count = dd->ddp_count;
9808 /* The DD master node knows the complete cg distribution,
9809 * store the count so we can possibly skip the cg info communication.
9811 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9814 if (comm->DD_debug > 0)
9816 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9817 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9818 "after partitioning");