1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
16 * Gnomes, ROck Monsters And Chili Sauce
30 #include "gmx_fatal.h"
31 #include "gmx_fatal_collective.h"
34 #include "domdec_network.h"
37 #include "chargegroup.h"
44 #include "pull_rotation.h"
48 #include "mtop_util.h"
49 #include "gmx_ga2la.h"
52 #include "nbnxn_search.h"
54 #include "gmx_omp_nthreads.h"
55 #include "gpu_utils.h"
57 #include "gromacs/fileio/futil.h"
58 #include "gromacs/fileio/gmxfio.h"
59 #include "gromacs/fileio/pdbio.h"
60 #include "gromacs/timing/wallcycle.h"
61 #include "gromacs/utility/gmxmpi.h"
63 #define DDRANK(dd, rank) (rank)
64 #define DDMASTERRANK(dd) (dd->masterrank)
66 typedef struct gmx_domdec_master
68 /* The cell boundaries */
70 /* The global charge group division */
71 int *ncg; /* Number of home charge groups for each node */
72 int *index; /* Index of nnodes+1 into cg */
73 int *cg; /* Global charge group index */
74 int *nat; /* Number of home atoms for each node. */
75 int *ibuf; /* Buffer for communication */
76 rvec *vbuf; /* Buffer for state scattering and gathering */
77 } gmx_domdec_master_t;
81 /* The numbers of charge groups to send and receive for each cell
82 * that requires communication, the last entry contains the total
83 * number of atoms that needs to be communicated.
85 int nsend[DD_MAXIZONE+2];
86 int nrecv[DD_MAXIZONE+2];
87 /* The charge groups to send */
90 /* The atom range for non-in-place communication */
91 int cell2at0[DD_MAXIZONE];
92 int cell2at1[DD_MAXIZONE];
97 int np; /* Number of grid pulses in this dimension */
98 int np_dlb; /* For dlb, for use with edlbAUTO */
99 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
101 gmx_bool bInPlace; /* Can we communicate in place? */
102 } gmx_domdec_comm_dim_t;
106 gmx_bool *bCellMin; /* Temp. var.: is this cell size at the limit */
107 real *cell_f; /* State var.: cell boundaries, box relative */
108 real *old_cell_f; /* Temp. var.: old cell size */
109 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
110 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
111 real *bound_min; /* Temp. var.: lower limit for cell boundary */
112 real *bound_max; /* Temp. var.: upper limit for cell boundary */
113 gmx_bool bLimited; /* State var.: is DLB limited in this dim and row */
114 real *buf_ncd; /* Temp. var. */
117 #define DD_NLOAD_MAX 9
119 /* Here floats are accurate enough, since these variables
120 * only influence the load balancing, not the actual MD results.
147 gmx_cgsort_t *sort_new;
159 /* This enum determines the order of the coordinates.
160 * ddnatHOME and ddnatZONE should be first and second,
161 * the others can be ordered as wanted.
164 ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR
168 edlbAUTO, edlbNO, edlbYES, edlbNR
170 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
174 int dim; /* The dimension */
175 gmx_bool dim_match; /* Tells if DD and PME dims match */
176 int nslab; /* The number of PME slabs in this dimension */
177 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
178 int *pp_min; /* The minimum pp node location, size nslab */
179 int *pp_max; /* The maximum pp node location,size nslab */
180 int maxshift; /* The maximum shift for coordinate redistribution in PME */
185 real min0; /* The minimum bottom of this zone */
186 real max1; /* The maximum top of this zone */
187 real min1; /* The minimum top of this zone */
188 real mch0; /* The maximum bottom communicaton height for this zone */
189 real mch1; /* The maximum top communicaton height for this zone */
190 real p1_0; /* The bottom value of the first cell in this zone */
191 real p1_1; /* The top value of the first cell in this zone */
196 gmx_domdec_ind_t ind;
203 } dd_comm_setup_work_t;
205 typedef struct gmx_domdec_comm
207 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
208 * unless stated otherwise.
211 /* The number of decomposition dimensions for PME, 0: no PME */
213 /* The number of nodes doing PME (PP/PME or only PME) */
217 /* The communication setup including the PME only nodes */
218 gmx_bool bCartesianPP_PME;
221 int *pmenodes; /* size npmenodes */
222 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
223 * but with bCartesianPP_PME */
224 gmx_ddpme_t ddpme[2];
226 /* The DD particle-particle nodes only */
227 gmx_bool bCartesianPP;
228 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
230 /* The global charge groups */
233 /* Should we sort the cgs */
235 gmx_domdec_sort_t *sort;
237 /* Are there charge groups? */
240 /* Are there bonded and multi-body interactions between charge groups? */
241 gmx_bool bInterCGBondeds;
242 gmx_bool bInterCGMultiBody;
244 /* Data for the optional bonded interaction atom communication range */
251 /* Are we actually using DLB? */
252 gmx_bool bDynLoadBal;
254 /* Cell sizes for static load balancing, first index cartesian */
257 /* The width of the communicated boundaries */
260 /* The minimum cell size (including triclinic correction) */
262 /* For dlb, for use with edlbAUTO */
263 rvec cellsize_min_dlb;
264 /* The lower limit for the DD cell size with DLB */
266 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
267 gmx_bool bVacDLBNoLimit;
269 /* With PME load balancing we set limits on DLB */
270 gmx_bool bPMELoadBalDLBLimits;
271 /* DLB needs to take into account that we want to allow this maximum
272 * cut-off (for PME load balancing), this could limit cell boundaries.
274 real PMELoadBal_max_cutoff;
276 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
278 /* box0 and box_size are required with dim's without pbc and -gcom */
282 /* The cell boundaries */
286 /* The old location of the cell boundaries, to check cg displacements */
290 /* The communication setup and charge group boundaries for the zones */
291 gmx_domdec_zones_t zones;
293 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
294 * cell boundaries of neighboring cells for dynamic load balancing.
296 gmx_ddzone_t zone_d1[2];
297 gmx_ddzone_t zone_d2[2][2];
299 /* The coordinate/force communication setup and indices */
300 gmx_domdec_comm_dim_t cd[DIM];
301 /* The maximum number of cells to communicate with in one dimension */
304 /* Which cg distribution is stored on the master node */
305 int master_cg_ddp_count;
307 /* The number of cg's received from the direct neighbors */
308 int zone_ncg1[DD_MAXZONE];
310 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
313 /* Array for signalling if atoms have moved to another domain */
317 /* Communication buffer for general use */
321 /* Communication buffer for general use */
324 /* Temporary storage for thread parallel communication setup */
326 dd_comm_setup_work_t *dth;
328 /* Communication buffers only used with multiple grid pulses */
333 /* Communication buffers for local redistribution */
335 int cggl_flag_nalloc[DIM*2];
337 int cgcm_state_nalloc[DIM*2];
339 /* Cell sizes for dynamic load balancing */
340 gmx_domdec_root_t **root;
344 real cell_f_max0[DIM];
345 real cell_f_min1[DIM];
347 /* Stuff for load communication */
348 gmx_bool bRecordLoad;
349 gmx_domdec_load_t *load;
350 int nrank_gpu_shared;
352 MPI_Comm *mpi_comm_load;
353 MPI_Comm mpi_comm_gpu_shared;
356 /* Maximum DLB scaling per load balancing step in percent */
360 float cycl[ddCyclNr];
361 int cycl_n[ddCyclNr];
362 float cycl_max[ddCyclNr];
363 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
367 /* Have often have did we have load measurements */
369 /* Have often have we collected the load measurements */
373 double sum_nat[ddnatNR-ddnatZONE];
383 /* The last partition step */
384 gmx_large_int_t partition_step;
392 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
395 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
396 #define DD_FLAG_NRCG 65535
397 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
398 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
400 /* Zone permutation required to obtain consecutive charge groups
401 * for neighbor searching.
403 static const int zone_perm[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
405 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
406 * components see only j zones with that component 0.
409 /* The DD zone order */
410 static const ivec dd_zo[DD_MAXZONE] =
411 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
416 static const ivec dd_zp3[dd_zp3n] = {{0, 0, 8}, {1, 3, 6}, {2, 5, 6}, {3, 5, 7}};
421 static const ivec dd_zp2[dd_zp2n] = {{0, 0, 4}, {1, 3, 4}};
426 static const ivec dd_zp1[dd_zp1n] = {{0, 0, 2}};
428 /* Factors used to avoid problems due to rounding issues */
429 #define DD_CELL_MARGIN 1.0001
430 #define DD_CELL_MARGIN2 1.00005
431 /* Factor to account for pressure scaling during nstlist steps */
432 #define DD_PRES_SCALE_MARGIN 1.02
434 /* Allowed performance loss before we DLB or warn */
435 #define DD_PERF_LOSS 0.05
437 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
439 /* Use separate MPI send and receive commands
440 * when nnodes <= GMX_DD_NNODES_SENDRECV.
441 * This saves memory (and some copying for small nnodes).
442 * For high parallelization scatter and gather calls are used.
444 #define GMX_DD_NNODES_SENDRECV 4
448 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
450 static void index2xyz(ivec nc,int ind,ivec xyz)
452 xyz[XX] = ind % nc[XX];
453 xyz[YY] = (ind / nc[XX]) % nc[YY];
454 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
458 /* This order is required to minimize the coordinate communication in PME
459 * which uses decomposition in the x direction.
461 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
463 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
465 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
466 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
467 xyz[ZZ] = ind % nc[ZZ];
470 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
475 ddindex = dd_index(dd->nc, c);
476 if (dd->comm->bCartesianPP_PME)
478 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
480 else if (dd->comm->bCartesianPP)
483 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
494 static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox, t_inputrec *ir)
496 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
499 int ddglatnr(gmx_domdec_t *dd, int i)
509 if (i >= dd->comm->nat[ddnatNR-1])
511 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
513 atnr = dd->gatindex[i] + 1;
519 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
521 return &dd->comm->cgs_gl;
524 static void vec_rvec_init(vec_rvec_t *v)
530 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
534 v->nalloc = over_alloc_dd(n);
535 srenew(v->v, v->nalloc);
539 void dd_store_state(gmx_domdec_t *dd, t_state *state)
543 if (state->ddp_count != dd->ddp_count)
545 gmx_incons("The state does not the domain decomposition state");
548 state->ncg_gl = dd->ncg_home;
549 if (state->ncg_gl > state->cg_gl_nalloc)
551 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
552 srenew(state->cg_gl, state->cg_gl_nalloc);
554 for (i = 0; i < state->ncg_gl; i++)
556 state->cg_gl[i] = dd->index_gl[i];
559 state->ddp_count_cg_gl = dd->ddp_count;
562 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
564 return &dd->comm->zones;
567 void dd_get_ns_ranges(gmx_domdec_t *dd, int icg,
568 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
570 gmx_domdec_zones_t *zones;
573 zones = &dd->comm->zones;
576 while (icg >= zones->izone[izone].cg1)
585 else if (izone < zones->nizone)
587 *jcg0 = zones->izone[izone].jcg0;
591 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
592 icg, izone, zones->nizone);
595 *jcg1 = zones->izone[izone].jcg1;
597 for (d = 0; d < dd->ndim; d++)
600 shift0[dim] = zones->izone[izone].shift0[dim];
601 shift1[dim] = zones->izone[izone].shift1[dim];
602 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
604 /* A conservative approach, this can be optimized */
611 int dd_natoms_vsite(gmx_domdec_t *dd)
613 return dd->comm->nat[ddnatVSITE];
616 void dd_get_constraint_range(gmx_domdec_t *dd, int *at_start, int *at_end)
618 *at_start = dd->comm->nat[ddnatCON-1];
619 *at_end = dd->comm->nat[ddnatCON];
622 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
624 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
625 int *index, *cgindex;
626 gmx_domdec_comm_t *comm;
627 gmx_domdec_comm_dim_t *cd;
628 gmx_domdec_ind_t *ind;
629 rvec shift = {0, 0, 0}, *buf, *rbuf;
630 gmx_bool bPBC, bScrew;
634 cgindex = dd->cgindex;
639 nat_tot = dd->nat_home;
640 for (d = 0; d < dd->ndim; d++)
642 bPBC = (dd->ci[dd->dim[d]] == 0);
643 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
646 copy_rvec(box[dd->dim[d]], shift);
649 for (p = 0; p < cd->np; p++)
656 for (i = 0; i < ind->nsend[nzone]; i++)
658 at0 = cgindex[index[i]];
659 at1 = cgindex[index[i]+1];
660 for (j = at0; j < at1; j++)
662 copy_rvec(x[j], buf[n]);
669 for (i = 0; i < ind->nsend[nzone]; i++)
671 at0 = cgindex[index[i]];
672 at1 = cgindex[index[i]+1];
673 for (j = at0; j < at1; j++)
675 /* We need to shift the coordinates */
676 rvec_add(x[j], shift, buf[n]);
683 for (i = 0; i < ind->nsend[nzone]; i++)
685 at0 = cgindex[index[i]];
686 at1 = cgindex[index[i]+1];
687 for (j = at0; j < at1; j++)
690 buf[n][XX] = x[j][XX] + shift[XX];
692 * This operation requires a special shift force
693 * treatment, which is performed in calc_vir.
695 buf[n][YY] = box[YY][YY] - x[j][YY];
696 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
708 rbuf = comm->vbuf2.v;
710 /* Send and receive the coordinates */
711 dd_sendrecv_rvec(dd, d, dddirBackward,
712 buf, ind->nsend[nzone+1],
713 rbuf, ind->nrecv[nzone+1]);
717 for (zone = 0; zone < nzone; zone++)
719 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
721 copy_rvec(rbuf[j], x[i]);
726 nat_tot += ind->nrecv[nzone+1];
732 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
734 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
735 int *index, *cgindex;
736 gmx_domdec_comm_t *comm;
737 gmx_domdec_comm_dim_t *cd;
738 gmx_domdec_ind_t *ind;
742 gmx_bool bPBC, bScrew;
746 cgindex = dd->cgindex;
751 nzone = comm->zones.n/2;
752 nat_tot = dd->nat_tot;
753 for (d = dd->ndim-1; d >= 0; d--)
755 bPBC = (dd->ci[dd->dim[d]] == 0);
756 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
757 if (fshift == NULL && !bScrew)
761 /* Determine which shift vector we need */
767 for (p = cd->np-1; p >= 0; p--)
770 nat_tot -= ind->nrecv[nzone+1];
777 sbuf = comm->vbuf2.v;
779 for (zone = 0; zone < nzone; zone++)
781 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
783 copy_rvec(f[i], sbuf[j]);
788 /* Communicate the forces */
789 dd_sendrecv_rvec(dd, d, dddirForward,
790 sbuf, ind->nrecv[nzone+1],
791 buf, ind->nsend[nzone+1]);
793 /* Add the received forces */
797 for (i = 0; i < ind->nsend[nzone]; i++)
799 at0 = cgindex[index[i]];
800 at1 = cgindex[index[i]+1];
801 for (j = at0; j < at1; j++)
803 rvec_inc(f[j], buf[n]);
810 for (i = 0; i < ind->nsend[nzone]; i++)
812 at0 = cgindex[index[i]];
813 at1 = cgindex[index[i]+1];
814 for (j = at0; j < at1; j++)
816 rvec_inc(f[j], buf[n]);
817 /* Add this force to the shift force */
818 rvec_inc(fshift[is], buf[n]);
825 for (i = 0; i < ind->nsend[nzone]; i++)
827 at0 = cgindex[index[i]];
828 at1 = cgindex[index[i]+1];
829 for (j = at0; j < at1; j++)
831 /* Rotate the force */
832 f[j][XX] += buf[n][XX];
833 f[j][YY] -= buf[n][YY];
834 f[j][ZZ] -= buf[n][ZZ];
837 /* Add this force to the shift force */
838 rvec_inc(fshift[is], buf[n]);
849 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
851 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
852 int *index, *cgindex;
853 gmx_domdec_comm_t *comm;
854 gmx_domdec_comm_dim_t *cd;
855 gmx_domdec_ind_t *ind;
860 cgindex = dd->cgindex;
862 buf = &comm->vbuf.v[0][0];
865 nat_tot = dd->nat_home;
866 for (d = 0; d < dd->ndim; d++)
869 for (p = 0; p < cd->np; p++)
874 for (i = 0; i < ind->nsend[nzone]; i++)
876 at0 = cgindex[index[i]];
877 at1 = cgindex[index[i]+1];
878 for (j = at0; j < at1; j++)
891 rbuf = &comm->vbuf2.v[0][0];
893 /* Send and receive the coordinates */
894 dd_sendrecv_real(dd, d, dddirBackward,
895 buf, ind->nsend[nzone+1],
896 rbuf, ind->nrecv[nzone+1]);
900 for (zone = 0; zone < nzone; zone++)
902 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
909 nat_tot += ind->nrecv[nzone+1];
915 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
917 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
918 int *index, *cgindex;
919 gmx_domdec_comm_t *comm;
920 gmx_domdec_comm_dim_t *cd;
921 gmx_domdec_ind_t *ind;
926 cgindex = dd->cgindex;
928 buf = &comm->vbuf.v[0][0];
931 nzone = comm->zones.n/2;
932 nat_tot = dd->nat_tot;
933 for (d = dd->ndim-1; d >= 0; d--)
936 for (p = cd->np-1; p >= 0; p--)
939 nat_tot -= ind->nrecv[nzone+1];
946 sbuf = &comm->vbuf2.v[0][0];
948 for (zone = 0; zone < nzone; zone++)
950 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
957 /* Communicate the forces */
958 dd_sendrecv_real(dd, d, dddirForward,
959 sbuf, ind->nrecv[nzone+1],
960 buf, ind->nsend[nzone+1]);
962 /* Add the received forces */
964 for (i = 0; i < ind->nsend[nzone]; i++)
966 at0 = cgindex[index[i]];
967 at1 = cgindex[index[i]+1];
968 for (j = at0; j < at1; j++)
979 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
981 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
983 zone->min0, zone->max1,
984 zone->mch0, zone->mch0,
985 zone->p1_0, zone->p1_1);
989 #define DDZONECOMM_MAXZONE 5
990 #define DDZONECOMM_BUFSIZE 3
992 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
993 int ddimind, int direction,
994 gmx_ddzone_t *buf_s, int n_s,
995 gmx_ddzone_t *buf_r, int n_r)
997 #define ZBS DDZONECOMM_BUFSIZE
998 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
999 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
1002 for (i = 0; i < n_s; i++)
1004 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
1005 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
1006 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
1007 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
1008 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
1009 vbuf_s[i*ZBS+1][2] = 0;
1010 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
1011 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
1012 vbuf_s[i*ZBS+2][2] = 0;
1015 dd_sendrecv_rvec(dd, ddimind, direction,
1019 for (i = 0; i < n_r; i++)
1021 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
1022 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
1023 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
1024 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
1025 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
1026 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
1027 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
1033 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
1034 rvec cell_ns_x0, rvec cell_ns_x1)
1036 int d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
1038 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
1039 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
1040 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
1041 rvec extr_s[2], extr_r[2];
1043 real dist_d, c = 0, det;
1044 gmx_domdec_comm_t *comm;
1045 gmx_bool bPBC, bUse;
1049 for (d = 1; d < dd->ndim; d++)
1052 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
1053 zp->min0 = cell_ns_x0[dim];
1054 zp->max1 = cell_ns_x1[dim];
1055 zp->min1 = cell_ns_x1[dim];
1056 zp->mch0 = cell_ns_x0[dim];
1057 zp->mch1 = cell_ns_x1[dim];
1058 zp->p1_0 = cell_ns_x0[dim];
1059 zp->p1_1 = cell_ns_x1[dim];
1062 for (d = dd->ndim-2; d >= 0; d--)
1065 bPBC = (dim < ddbox->npbcdim);
1067 /* Use an rvec to store two reals */
1068 extr_s[d][0] = comm->cell_f0[d+1];
1069 extr_s[d][1] = comm->cell_f1[d+1];
1070 extr_s[d][2] = comm->cell_f1[d+1];
1073 /* Store the extremes in the backward sending buffer,
1074 * so the get updated separately from the forward communication.
1076 for (d1 = d; d1 < dd->ndim-1; d1++)
1078 /* We invert the order to be able to use the same loop for buf_e */
1079 buf_s[pos].min0 = extr_s[d1][1];
1080 buf_s[pos].max1 = extr_s[d1][0];
1081 buf_s[pos].min1 = extr_s[d1][2];
1082 buf_s[pos].mch0 = 0;
1083 buf_s[pos].mch1 = 0;
1084 /* Store the cell corner of the dimension we communicate along */
1085 buf_s[pos].p1_0 = comm->cell_x0[dim];
1086 buf_s[pos].p1_1 = 0;
1090 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1093 if (dd->ndim == 3 && d == 0)
1095 buf_s[pos] = comm->zone_d2[0][1];
1097 buf_s[pos] = comm->zone_d1[0];
1101 /* We only need to communicate the extremes
1102 * in the forward direction
1104 npulse = comm->cd[d].np;
1107 /* Take the minimum to avoid double communication */
1108 npulse_min = min(npulse, dd->nc[dim]-1-npulse);
1112 /* Without PBC we should really not communicate over
1113 * the boundaries, but implementing that complicates
1114 * the communication setup and therefore we simply
1115 * do all communication, but ignore some data.
1117 npulse_min = npulse;
1119 for (p = 0; p < npulse_min; p++)
1121 /* Communicate the extremes forward */
1122 bUse = (bPBC || dd->ci[dim] > 0);
1124 dd_sendrecv_rvec(dd, d, dddirForward,
1125 extr_s+d, dd->ndim-d-1,
1126 extr_r+d, dd->ndim-d-1);
1130 for (d1 = d; d1 < dd->ndim-1; d1++)
1132 extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]);
1133 extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]);
1134 extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]);
1140 for (p = 0; p < npulse; p++)
1142 /* Communicate all the zone information backward */
1143 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1145 dd_sendrecv_ddzone(dd, d, dddirBackward,
1152 for (d1 = d+1; d1 < dd->ndim; d1++)
1154 /* Determine the decrease of maximum required
1155 * communication height along d1 due to the distance along d,
1156 * this avoids a lot of useless atom communication.
1158 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1160 if (ddbox->tric_dir[dim])
1162 /* c is the off-diagonal coupling between the cell planes
1163 * along directions d and d1.
1165 c = ddbox->v[dim][dd->dim[d1]][dim];
1171 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1174 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1178 /* A negative value signals out of range */
1184 /* Accumulate the extremes over all pulses */
1185 for (i = 0; i < buf_size; i++)
1189 buf_e[i] = buf_r[i];
1195 buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0);
1196 buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1);
1197 buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1);
1200 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1208 if (bUse && dh[d1] >= 0)
1210 buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
1211 buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
1214 /* Copy the received buffer to the send buffer,
1215 * to pass the data through with the next pulse.
1217 buf_s[i] = buf_r[i];
1219 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1220 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1222 /* Store the extremes */
1225 for (d1 = d; d1 < dd->ndim-1; d1++)
1227 extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0);
1228 extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1);
1229 extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1);
1233 if (d == 1 || (d == 0 && dd->ndim == 3))
1235 for (i = d; i < 2; i++)
1237 comm->zone_d2[1-d][i] = buf_e[pos];
1243 comm->zone_d1[1] = buf_e[pos];
1253 for (i = 0; i < 2; i++)
1257 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1259 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1260 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1266 for (i = 0; i < 2; i++)
1268 for (j = 0; j < 2; j++)
1272 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1274 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1275 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1279 for (d = 1; d < dd->ndim; d++)
1281 comm->cell_f_max0[d] = extr_s[d-1][0];
1282 comm->cell_f_min1[d] = extr_s[d-1][1];
1285 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1286 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1291 static void dd_collect_cg(gmx_domdec_t *dd,
1292 t_state *state_local)
1294 gmx_domdec_master_t *ma = NULL;
1295 int buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
1298 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1300 /* The master has the correct distribution */
1304 if (state_local->ddp_count == dd->ddp_count)
1306 ncg_home = dd->ncg_home;
1308 nat_home = dd->nat_home;
1310 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1312 cgs_gl = &dd->comm->cgs_gl;
1314 ncg_home = state_local->ncg_gl;
1315 cg = state_local->cg_gl;
1317 for (i = 0; i < ncg_home; i++)
1319 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1324 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1327 buf2[0] = dd->ncg_home;
1328 buf2[1] = dd->nat_home;
1338 /* Collect the charge group and atom counts on the master */
1339 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1344 for (i = 0; i < dd->nnodes; i++)
1346 ma->ncg[i] = ma->ibuf[2*i];
1347 ma->nat[i] = ma->ibuf[2*i+1];
1348 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1351 /* Make byte counts and indices */
1352 for (i = 0; i < dd->nnodes; i++)
1354 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1355 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1359 fprintf(debug, "Initial charge group distribution: ");
1360 for (i = 0; i < dd->nnodes; i++)
1362 fprintf(debug, " %d", ma->ncg[i]);
1364 fprintf(debug, "\n");
1368 /* Collect the charge group indices on the master */
1370 dd->ncg_home*sizeof(int), dd->index_gl,
1371 DDMASTER(dd) ? ma->ibuf : NULL,
1372 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1373 DDMASTER(dd) ? ma->cg : NULL);
1375 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1378 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1381 gmx_domdec_master_t *ma;
1382 int n, i, c, a, nalloc = 0;
1391 MPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1392 dd->rank, dd->mpi_comm_all);
1397 /* Copy the master coordinates to the global array */
1398 cgs_gl = &dd->comm->cgs_gl;
1400 n = DDMASTERRANK(dd);
1402 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1404 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1406 copy_rvec(lv[a++], v[c]);
1410 for (n = 0; n < dd->nnodes; n++)
1414 if (ma->nat[n] > nalloc)
1416 nalloc = over_alloc_dd(ma->nat[n]);
1417 srenew(buf, nalloc);
1420 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1421 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1424 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1426 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1428 copy_rvec(buf[a++], v[c]);
1437 static void get_commbuffer_counts(gmx_domdec_t *dd,
1438 int **counts, int **disps)
1440 gmx_domdec_master_t *ma;
1445 /* Make the rvec count and displacment arrays */
1447 *disps = ma->ibuf + dd->nnodes;
1448 for (n = 0; n < dd->nnodes; n++)
1450 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1451 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1455 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1458 gmx_domdec_master_t *ma;
1459 int *rcounts = NULL, *disps = NULL;
1468 get_commbuffer_counts(dd, &rcounts, &disps);
1473 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv, rcounts, disps, buf);
1477 cgs_gl = &dd->comm->cgs_gl;
1480 for (n = 0; n < dd->nnodes; n++)
1482 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1484 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1486 copy_rvec(buf[a++], v[c]);
1493 void dd_collect_vec(gmx_domdec_t *dd,
1494 t_state *state_local, rvec *lv, rvec *v)
1496 gmx_domdec_master_t *ma;
1497 int n, i, c, a, nalloc = 0;
1500 dd_collect_cg(dd, state_local);
1502 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1504 dd_collect_vec_sendrecv(dd, lv, v);
1508 dd_collect_vec_gatherv(dd, lv, v);
1513 void dd_collect_state(gmx_domdec_t *dd,
1514 t_state *state_local, t_state *state)
1518 nh = state->nhchainlength;
1522 for (i = 0; i < efptNR; i++)
1524 state->lambda[i] = state_local->lambda[i];
1526 state->fep_state = state_local->fep_state;
1527 state->veta = state_local->veta;
1528 state->vol0 = state_local->vol0;
1529 copy_mat(state_local->box, state->box);
1530 copy_mat(state_local->boxv, state->boxv);
1531 copy_mat(state_local->svir_prev, state->svir_prev);
1532 copy_mat(state_local->fvir_prev, state->fvir_prev);
1533 copy_mat(state_local->pres_prev, state->pres_prev);
1535 for (i = 0; i < state_local->ngtc; i++)
1537 for (j = 0; j < nh; j++)
1539 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1540 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1542 state->therm_integral[i] = state_local->therm_integral[i];
1544 for (i = 0; i < state_local->nnhpres; i++)
1546 for (j = 0; j < nh; j++)
1548 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1549 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1553 for (est = 0; est < estNR; est++)
1555 if (EST_DISTR(est) && (state_local->flags & (1<<est)))
1560 dd_collect_vec(dd, state_local, state_local->x, state->x);
1563 dd_collect_vec(dd, state_local, state_local->v, state->v);
1566 dd_collect_vec(dd, state_local, state_local->sd_X, state->sd_X);
1569 dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
1572 if (state->nrngi == 1)
1576 for (i = 0; i < state_local->nrng; i++)
1578 state->ld_rng[i] = state_local->ld_rng[i];
1584 dd_gather(dd, state_local->nrng*sizeof(state->ld_rng[0]),
1585 state_local->ld_rng, state->ld_rng);
1589 if (state->nrngi == 1)
1593 state->ld_rngi[0] = state_local->ld_rngi[0];
1598 dd_gather(dd, sizeof(state->ld_rngi[0]),
1599 state_local->ld_rngi, state->ld_rngi);
1602 case estDISRE_INITF:
1603 case estDISRE_RM3TAV:
1604 case estORIRE_INITF:
1608 gmx_incons("Unknown state entry encountered in dd_collect_state");
1614 static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
1620 fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
1623 state->nalloc = over_alloc_dd(nalloc);
1625 for (est = 0; est < estNR; est++)
1627 if (EST_DISTR(est) && (state->flags & (1<<est)))
1632 srenew(state->x, state->nalloc);
1635 srenew(state->v, state->nalloc);
1638 srenew(state->sd_X, state->nalloc);
1641 srenew(state->cg_p, state->nalloc);
1645 case estDISRE_INITF:
1646 case estDISRE_RM3TAV:
1647 case estORIRE_INITF:
1649 /* No reallocation required */
1652 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1659 srenew(*f, state->nalloc);
1663 static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
1666 if (nalloc > fr->cg_nalloc)
1670 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
1672 fr->cg_nalloc = over_alloc_dd(nalloc);
1673 srenew(fr->cginfo, fr->cg_nalloc);
1674 if (fr->cutoff_scheme == ecutsGROUP)
1676 srenew(fr->cg_cm, fr->cg_nalloc);
1679 if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
1681 /* We don't use charge groups, we use x in state to set up
1682 * the atom communication.
1684 dd_realloc_state(state, f, nalloc);
1688 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1691 gmx_domdec_master_t *ma;
1692 int n, i, c, a, nalloc = 0;
1699 for (n = 0; n < dd->nnodes; n++)
1703 if (ma->nat[n] > nalloc)
1705 nalloc = over_alloc_dd(ma->nat[n]);
1706 srenew(buf, nalloc);
1708 /* Use lv as a temporary buffer */
1710 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1712 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1714 copy_rvec(v[c], buf[a++]);
1717 if (a != ma->nat[n])
1719 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1724 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1725 DDRANK(dd, n), n, dd->mpi_comm_all);
1730 n = DDMASTERRANK(dd);
1732 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1734 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1736 copy_rvec(v[c], lv[a++]);
1743 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1744 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1749 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1752 gmx_domdec_master_t *ma;
1753 int *scounts = NULL, *disps = NULL;
1754 int n, i, c, a, nalloc = 0;
1761 get_commbuffer_counts(dd, &scounts, &disps);
1765 for (n = 0; n < dd->nnodes; n++)
1767 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1769 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1771 copy_rvec(v[c], buf[a++]);
1777 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1780 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
1782 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1784 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1788 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1792 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1795 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1796 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1797 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1799 if (dfhist->nlambda > 0)
1801 int nlam = dfhist->nlambda;
1802 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1803 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1804 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1805 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1806 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1807 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1809 for (i = 0; i < nlam; i++)
1811 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1812 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1813 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1814 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1815 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1816 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1821 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1822 t_state *state, t_state *state_local,
1827 nh = state->nhchainlength;
1831 for (i = 0; i < efptNR; i++)
1833 state_local->lambda[i] = state->lambda[i];
1835 state_local->fep_state = state->fep_state;
1836 state_local->veta = state->veta;
1837 state_local->vol0 = state->vol0;
1838 copy_mat(state->box, state_local->box);
1839 copy_mat(state->box_rel, state_local->box_rel);
1840 copy_mat(state->boxv, state_local->boxv);
1841 copy_mat(state->svir_prev, state_local->svir_prev);
1842 copy_mat(state->fvir_prev, state_local->fvir_prev);
1843 copy_df_history(&state_local->dfhist, &state->dfhist);
1844 for (i = 0; i < state_local->ngtc; i++)
1846 for (j = 0; j < nh; j++)
1848 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1849 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1851 state_local->therm_integral[i] = state->therm_integral[i];
1853 for (i = 0; i < state_local->nnhpres; i++)
1855 for (j = 0; j < nh; j++)
1857 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1858 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1862 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
1863 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1864 dd_bcast(dd, sizeof(real), &state_local->veta);
1865 dd_bcast(dd, sizeof(real), &state_local->vol0);
1866 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1867 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1868 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1869 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1870 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1871 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
1872 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
1873 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
1874 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
1875 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
1877 /* communicate df_history -- required for restarting from checkpoint */
1878 dd_distribute_dfhist(dd, &state_local->dfhist);
1880 if (dd->nat_home > state_local->nalloc)
1882 dd_realloc_state(state_local, f, dd->nat_home);
1884 for (i = 0; i < estNR; i++)
1886 if (EST_DISTR(i) && (state_local->flags & (1<<i)))
1891 dd_distribute_vec(dd, cgs, state->x, state_local->x);
1894 dd_distribute_vec(dd, cgs, state->v, state_local->v);
1897 dd_distribute_vec(dd, cgs, state->sd_X, state_local->sd_X);
1900 dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
1903 if (state->nrngi == 1)
1906 state_local->nrng*sizeof(state_local->ld_rng[0]),
1907 state->ld_rng, state_local->ld_rng);
1912 state_local->nrng*sizeof(state_local->ld_rng[0]),
1913 state->ld_rng, state_local->ld_rng);
1917 if (state->nrngi == 1)
1919 dd_bcastc(dd, sizeof(state_local->ld_rngi[0]),
1920 state->ld_rngi, state_local->ld_rngi);
1924 dd_scatter(dd, sizeof(state_local->ld_rngi[0]),
1925 state->ld_rngi, state_local->ld_rngi);
1928 case estDISRE_INITF:
1929 case estDISRE_RM3TAV:
1930 case estORIRE_INITF:
1932 /* Not implemented yet */
1935 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1941 static char dim2char(int dim)
1947 case XX: c = 'X'; break;
1948 case YY: c = 'Y'; break;
1949 case ZZ: c = 'Z'; break;
1950 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1956 static void write_dd_grid_pdb(const char *fn, gmx_large_int_t step,
1957 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1959 rvec grid_s[2], *grid_r = NULL, cx, r;
1960 char fname[STRLEN], format[STRLEN], buf[22];
1962 int a, i, d, z, y, x;
1966 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1967 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1971 snew(grid_r, 2*dd->nnodes);
1974 dd_gather(dd, 2*sizeof(rvec), grid_s[0], DDMASTER(dd) ? grid_r[0] : NULL);
1978 for (d = 0; d < DIM; d++)
1980 for (i = 0; i < DIM; i++)
1988 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1990 tric[d][i] = box[i][d]/box[i][i];
1999 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
2000 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
2001 out = gmx_fio_fopen(fname, "w");
2002 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2004 for (i = 0; i < dd->nnodes; i++)
2006 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
2007 for (d = 0; d < DIM; d++)
2009 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
2011 for (z = 0; z < 2; z++)
2013 for (y = 0; y < 2; y++)
2015 for (x = 0; x < 2; x++)
2017 cx[XX] = grid_r[i*2+x][XX];
2018 cx[YY] = grid_r[i*2+y][YY];
2019 cx[ZZ] = grid_r[i*2+z][ZZ];
2021 fprintf(out, format, "ATOM", a++, "CA", "GLY", ' ', 1+i,
2022 ' ', 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol);
2026 for (d = 0; d < DIM; d++)
2028 for (x = 0; x < 4; x++)
2032 case 0: y = 1 + i*8 + 2*x; break;
2033 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
2034 case 2: y = 1 + i*8 + x; break;
2036 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
2040 gmx_fio_fclose(out);
2045 void write_dd_pdb(const char *fn, gmx_large_int_t step, const char *title,
2046 gmx_mtop_t *mtop, t_commrec *cr,
2047 int natoms, rvec x[], matrix box)
2049 char fname[STRLEN], format[STRLEN], format4[STRLEN], buf[22];
2051 int i, ii, resnr, c;
2052 char *atomname, *resname;
2059 natoms = dd->comm->nat[ddnatVSITE];
2062 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
2064 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
2065 sprintf(format4, "%s%s\n", get_pdbformat4(), "%6.2f%6.2f");
2067 out = gmx_fio_fopen(fname, "w");
2069 fprintf(out, "TITLE %s\n", title);
2070 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2071 for (i = 0; i < natoms; i++)
2073 ii = dd->gatindex[i];
2074 gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
2075 if (i < dd->comm->nat[ddnatZONE])
2078 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
2084 else if (i < dd->comm->nat[ddnatVSITE])
2086 b = dd->comm->zones.n;
2090 b = dd->comm->zones.n + 1;
2092 fprintf(out, strlen(atomname) < 4 ? format : format4,
2093 "ATOM", (ii+1)%100000,
2094 atomname, resname, ' ', resnr%10000, ' ',
2095 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b);
2097 fprintf(out, "TER\n");
2099 gmx_fio_fclose(out);
2102 real dd_cutoff_mbody(gmx_domdec_t *dd)
2104 gmx_domdec_comm_t *comm;
2111 if (comm->bInterCGBondeds)
2113 if (comm->cutoff_mbody > 0)
2115 r = comm->cutoff_mbody;
2119 /* cutoff_mbody=0 means we do not have DLB */
2120 r = comm->cellsize_min[dd->dim[0]];
2121 for (di = 1; di < dd->ndim; di++)
2123 r = min(r, comm->cellsize_min[dd->dim[di]]);
2125 if (comm->bBondComm)
2127 r = max(r, comm->cutoff_mbody);
2131 r = min(r, comm->cutoff);
2139 real dd_cutoff_twobody(gmx_domdec_t *dd)
2143 r_mb = dd_cutoff_mbody(dd);
2145 return max(dd->comm->cutoff, r_mb);
2149 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd, ivec coord, ivec coord_pme)
2153 nc = dd->nc[dd->comm->cartpmedim];
2154 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2155 copy_ivec(coord, coord_pme);
2156 coord_pme[dd->comm->cartpmedim] =
2157 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2160 static int low_ddindex2pmeindex(int ndd, int npme, int ddindex)
2162 /* Here we assign a PME node to communicate with this DD node
2163 * by assuming that the major index of both is x.
2164 * We add cr->npmenodes/2 to obtain an even distribution.
2166 return (ddindex*npme + npme/2)/ndd;
2169 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
2171 return low_ddindex2pmeindex(dd->nnodes, dd->comm->npmenodes, ddindex);
2174 static int cr_ddindex2pmeindex(const t_commrec *cr, int ddindex)
2176 return low_ddindex2pmeindex(cr->dd->nnodes, cr->npmenodes, ddindex);
2179 static int *dd_pmenodes(t_commrec *cr)
2184 snew(pmenodes, cr->npmenodes);
2186 for (i = 0; i < cr->dd->nnodes; i++)
2188 p0 = cr_ddindex2pmeindex(cr, i);
2189 p1 = cr_ddindex2pmeindex(cr, i+1);
2190 if (i+1 == cr->dd->nnodes || p1 > p0)
2194 fprintf(debug, "pmenode[%d] = %d\n", n, i+1+n);
2196 pmenodes[n] = i + 1 + n;
2204 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
2207 ivec coords, coords_pme, nc;
2212 if (dd->comm->bCartesian) {
2213 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2214 dd_coords2pmecoords(dd,coords,coords_pme);
2215 copy_ivec(dd->ntot,nc);
2216 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2217 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2219 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2221 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2227 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
2232 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
2234 gmx_domdec_comm_t *comm;
2236 int ddindex, nodeid = -1;
2238 comm = cr->dd->comm;
2243 if (comm->bCartesianPP_PME)
2246 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
2251 ddindex = dd_index(cr->dd->nc, coords);
2252 if (comm->bCartesianPP)
2254 nodeid = comm->ddindex2simnodeid[ddindex];
2260 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
2272 static int dd_simnode2pmenode(t_commrec *cr, int sim_nodeid)
2275 gmx_domdec_comm_t *comm;
2276 ivec coord, coord_pme;
2283 /* This assumes a uniform x domain decomposition grid cell size */
2284 if (comm->bCartesianPP_PME)
2287 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
2288 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2290 /* This is a PP node */
2291 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2292 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
2296 else if (comm->bCartesianPP)
2298 if (sim_nodeid < dd->nnodes)
2300 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2305 /* This assumes DD cells with identical x coordinates
2306 * are numbered sequentially.
2308 if (dd->comm->pmenodes == NULL)
2310 if (sim_nodeid < dd->nnodes)
2312 /* The DD index equals the nodeid */
2313 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2319 while (sim_nodeid > dd->comm->pmenodes[i])
2323 if (sim_nodeid < dd->comm->pmenodes[i])
2325 pmenode = dd->comm->pmenodes[i];
2333 void get_pme_nnodes(const gmx_domdec_t *dd,
2334 int *npmenodes_x, int *npmenodes_y)
2338 *npmenodes_x = dd->comm->npmenodes_x;
2339 *npmenodes_y = dd->comm->npmenodes_y;
2348 gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
2350 gmx_bool bPMEOnlyNode;
2352 if (DOMAINDECOMP(cr))
2354 bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
2358 bPMEOnlyNode = FALSE;
2361 return bPMEOnlyNode;
2364 void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
2365 int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
2369 ivec coord, coord_pme;
2373 snew(*my_ddnodes, (dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2376 for (x = 0; x < dd->nc[XX]; x++)
2378 for (y = 0; y < dd->nc[YY]; y++)
2380 for (z = 0; z < dd->nc[ZZ]; z++)
2382 if (dd->comm->bCartesianPP_PME)
2387 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2388 if (dd->ci[XX] == coord_pme[XX] &&
2389 dd->ci[YY] == coord_pme[YY] &&
2390 dd->ci[ZZ] == coord_pme[ZZ])
2392 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2397 /* The slab corresponds to the nodeid in the PME group */
2398 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2400 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2407 /* The last PP-only node is the peer node */
2408 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2412 fprintf(debug, "Receive coordinates from PP nodes:");
2413 for (x = 0; x < *nmy_ddnodes; x++)
2415 fprintf(debug, " %d", (*my_ddnodes)[x]);
2417 fprintf(debug, "\n");
2421 static gmx_bool receive_vir_ener(t_commrec *cr)
2423 gmx_domdec_comm_t *comm;
2424 int pmenode, coords[DIM], rank;
2428 if (cr->npmenodes < cr->dd->nnodes)
2430 comm = cr->dd->comm;
2431 if (comm->bCartesianPP_PME)
2433 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2435 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2436 coords[comm->cartpmedim]++;
2437 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2439 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2440 if (dd_simnode2pmenode(cr, rank) == pmenode)
2442 /* This is not the last PP node for pmenode */
2450 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2451 if (cr->sim_nodeid+1 < cr->nnodes &&
2452 dd_simnode2pmenode(cr, cr->sim_nodeid+1) == pmenode)
2454 /* This is not the last PP node for pmenode */
2463 static void set_zones_ncg_home(gmx_domdec_t *dd)
2465 gmx_domdec_zones_t *zones;
2468 zones = &dd->comm->zones;
2470 zones->cg_range[0] = 0;
2471 for (i = 1; i < zones->n+1; i++)
2473 zones->cg_range[i] = dd->ncg_home;
2475 /* zone_ncg1[0] should always be equal to ncg_home */
2476 dd->comm->zone_ncg1[0] = dd->ncg_home;
2479 static void rebuild_cgindex(gmx_domdec_t *dd,
2480 const int *gcgs_index, t_state *state)
2482 int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
2485 dd_cg_gl = dd->index_gl;
2486 cgindex = dd->cgindex;
2489 for (i = 0; i < state->ncg_gl; i++)
2493 dd_cg_gl[i] = cg_gl;
2494 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2498 dd->ncg_home = state->ncg_gl;
2501 set_zones_ncg_home(dd);
2504 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2506 while (cg >= cginfo_mb->cg_end)
2511 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2514 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2515 t_forcerec *fr, char *bLocalCG)
2517 cginfo_mb_t *cginfo_mb;
2523 cginfo_mb = fr->cginfo_mb;
2524 cginfo = fr->cginfo;
2526 for (cg = cg0; cg < cg1; cg++)
2528 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2532 if (bLocalCG != NULL)
2534 for (cg = cg0; cg < cg1; cg++)
2536 bLocalCG[index_gl[cg]] = TRUE;
2541 static void make_dd_indices(gmx_domdec_t *dd,
2542 const int *gcgs_index, int cg_start)
2544 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2545 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2550 bLocalCG = dd->comm->bLocalCG;
2552 if (dd->nat_tot > dd->gatindex_nalloc)
2554 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2555 srenew(dd->gatindex, dd->gatindex_nalloc);
2558 nzone = dd->comm->zones.n;
2559 zone2cg = dd->comm->zones.cg_range;
2560 zone_ncg1 = dd->comm->zone_ncg1;
2561 index_gl = dd->index_gl;
2562 gatindex = dd->gatindex;
2563 bCGs = dd->comm->bCGs;
2565 if (zone2cg[1] != dd->ncg_home)
2567 gmx_incons("dd->ncg_zone is not up to date");
2570 /* Make the local to global and global to local atom index */
2571 a = dd->cgindex[cg_start];
2572 for (zone = 0; zone < nzone; zone++)
2580 cg0 = zone2cg[zone];
2582 cg1 = zone2cg[zone+1];
2583 cg1_p1 = cg0 + zone_ncg1[zone];
2585 for (cg = cg0; cg < cg1; cg++)
2590 /* Signal that this cg is from more than one pulse away */
2593 cg_gl = index_gl[cg];
2596 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2599 ga2la_set(dd->ga2la, a_gl, a, zone1);
2605 gatindex[a] = cg_gl;
2606 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2613 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2616 int ncg, i, ngl, nerr;
2619 if (bLocalCG == NULL)
2623 for (i = 0; i < dd->ncg_tot; i++)
2625 if (!bLocalCG[dd->index_gl[i]])
2628 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2633 for (i = 0; i < ncg_sys; i++)
2640 if (ngl != dd->ncg_tot)
2642 fprintf(stderr, "DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2649 static void check_index_consistency(gmx_domdec_t *dd,
2650 int natoms_sys, int ncg_sys,
2653 int nerr, ngl, i, a, cell;
2658 if (dd->comm->DD_debug > 1)
2660 snew(have, natoms_sys);
2661 for (a = 0; a < dd->nat_tot; a++)
2663 if (have[dd->gatindex[a]] > 0)
2665 fprintf(stderr, "DD node %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2669 have[dd->gatindex[a]] = a + 1;
2675 snew(have, dd->nat_tot);
2678 for (i = 0; i < natoms_sys; i++)
2680 if (ga2la_get(dd->ga2la, i, &a, &cell))
2682 if (a >= dd->nat_tot)
2684 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2690 if (dd->gatindex[a] != i)
2692 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2699 if (ngl != dd->nat_tot)
2702 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2703 dd->rank, where, ngl, dd->nat_tot);
2705 for (a = 0; a < dd->nat_tot; a++)
2710 "DD node %d, %s: local atom %d, global %d has no global index\n",
2711 dd->rank, where, a+1, dd->gatindex[a]+1);
2716 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2720 gmx_fatal(FARGS, "DD node %d, %s: %d atom/cg index inconsistencies",
2721 dd->rank, where, nerr);
2725 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2732 /* Clear the whole list without searching */
2733 ga2la_clear(dd->ga2la);
2737 for (i = a_start; i < dd->nat_tot; i++)
2739 ga2la_del(dd->ga2la, dd->gatindex[i]);
2743 bLocalCG = dd->comm->bLocalCG;
2746 for (i = cg_start; i < dd->ncg_tot; i++)
2748 bLocalCG[dd->index_gl[i]] = FALSE;
2752 dd_clear_local_vsite_indices(dd);
2754 if (dd->constraints)
2756 dd_clear_local_constraint_indices(dd);
2760 /* This function should be used for moving the domain boudaries during DLB,
2761 * for obtaining the minimum cell size. It checks the initially set limit
2762 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2763 * and, possibly, a longer cut-off limit set for PME load balancing.
2765 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2769 cellsize_min = comm->cellsize_min[dim];
2771 if (!comm->bVacDLBNoLimit)
2773 /* The cut-off might have changed, e.g. by PME load balacning,
2774 * from the value used to set comm->cellsize_min, so check it.
2776 cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2778 if (comm->bPMELoadBalDLBLimits)
2780 /* Check for the cut-off limit set by the PME load balancing */
2781 cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2785 return cellsize_min;
2788 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2791 real grid_jump_limit;
2793 /* The distance between the boundaries of cells at distance
2794 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2795 * and by the fact that cells should not be shifted by more than
2796 * half their size, such that cg's only shift by one cell
2797 * at redecomposition.
2799 grid_jump_limit = comm->cellsize_limit;
2800 if (!comm->bVacDLBNoLimit)
2802 if (comm->bPMELoadBalDLBLimits)
2804 cutoff = max(cutoff, comm->PMELoadBal_max_cutoff);
2806 grid_jump_limit = max(grid_jump_limit,
2807 cutoff/comm->cd[dim_ind].np);
2810 return grid_jump_limit;
2813 static gmx_bool check_grid_jump(gmx_large_int_t step,
2819 gmx_domdec_comm_t *comm;
2828 for (d = 1; d < dd->ndim; d++)
2831 limit = grid_jump_limit(comm, cutoff, d);
2832 bfac = ddbox->box_size[dim];
2833 if (ddbox->tric_dir[dim])
2835 bfac *= ddbox->skew_fac[dim];
2837 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2838 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2846 /* This error should never be triggered under normal
2847 * circumstances, but you never know ...
2849 gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with less nodes might avoid this issue.",
2850 gmx_step_str(step, buf),
2851 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2859 static int dd_load_count(gmx_domdec_comm_t *comm)
2861 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2864 static float dd_force_load(gmx_domdec_comm_t *comm)
2871 if (comm->eFlop > 1)
2873 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2878 load = comm->cycl[ddCyclF];
2879 if (comm->cycl_n[ddCyclF] > 1)
2881 /* Subtract the maximum of the last n cycle counts
2882 * to get rid of possible high counts due to other sources,
2883 * for instance system activity, that would otherwise
2884 * affect the dynamic load balancing.
2886 load -= comm->cycl_max[ddCyclF];
2890 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2892 float gpu_wait, gpu_wait_sum;
2894 gpu_wait = comm->cycl[ddCyclWaitGPU];
2895 if (comm->cycl_n[ddCyclF] > 1)
2897 /* We should remove the WaitGPU time of the same MD step
2898 * as the one with the maximum F time, since the F time
2899 * and the wait time are not independent.
2900 * Furthermore, the step for the max F time should be chosen
2901 * the same on all ranks that share the same GPU.
2902 * But to keep the code simple, we remove the average instead.
2903 * The main reason for artificially long times at some steps
2904 * is spurious CPU activity or MPI time, so we don't expect
2905 * that changes in the GPU wait time matter a lot here.
2907 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2909 /* Sum the wait times over the ranks that share the same GPU */
2910 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2911 comm->mpi_comm_gpu_shared);
2912 /* Replace the wait time by the average over the ranks */
2913 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2921 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2923 gmx_domdec_comm_t *comm;
2928 snew(*dim_f, dd->nc[dim]+1);
2930 for (i = 1; i < dd->nc[dim]; i++)
2932 if (comm->slb_frac[dim])
2934 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2938 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2941 (*dim_f)[dd->nc[dim]] = 1;
2944 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2946 int pmeindex, slab, nso, i;
2949 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2955 ddpme->dim = dimind;
2957 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2959 ddpme->nslab = (ddpme->dim == 0 ?
2960 dd->comm->npmenodes_x :
2961 dd->comm->npmenodes_y);
2963 if (ddpme->nslab <= 1)
2968 nso = dd->comm->npmenodes/ddpme->nslab;
2969 /* Determine for each PME slab the PP location range for dimension dim */
2970 snew(ddpme->pp_min, ddpme->nslab);
2971 snew(ddpme->pp_max, ddpme->nslab);
2972 for (slab = 0; slab < ddpme->nslab; slab++)
2974 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2975 ddpme->pp_max[slab] = 0;
2977 for (i = 0; i < dd->nnodes; i++)
2979 ddindex2xyz(dd->nc, i, xyz);
2980 /* For y only use our y/z slab.
2981 * This assumes that the PME x grid size matches the DD grid size.
2983 if (dimind == 0 || xyz[XX] == dd->ci[XX])
2985 pmeindex = ddindex2pmeindex(dd, i);
2988 slab = pmeindex/nso;
2992 slab = pmeindex % ddpme->nslab;
2994 ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]);
2995 ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]);
2999 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
3002 int dd_pme_maxshift_x(gmx_domdec_t *dd)
3004 if (dd->comm->ddpme[0].dim == XX)
3006 return dd->comm->ddpme[0].maxshift;
3014 int dd_pme_maxshift_y(gmx_domdec_t *dd)
3016 if (dd->comm->ddpme[0].dim == YY)
3018 return dd->comm->ddpme[0].maxshift;
3020 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
3022 return dd->comm->ddpme[1].maxshift;
3030 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
3031 gmx_bool bUniform, gmx_ddbox_t *ddbox, real *cell_f)
3033 gmx_domdec_comm_t *comm;
3036 real range, pme_boundary;
3040 nc = dd->nc[ddpme->dim];
3043 if (!ddpme->dim_match)
3045 /* PP decomposition is not along dim: the worst situation */
3048 else if (ns <= 3 || (bUniform && ns == nc))
3050 /* The optimal situation */
3055 /* We need to check for all pme nodes which nodes they
3056 * could possibly need to communicate with.
3058 xmin = ddpme->pp_min;
3059 xmax = ddpme->pp_max;
3060 /* Allow for atoms to be maximally 2/3 times the cut-off
3061 * out of their DD cell. This is a reasonable balance between
3062 * between performance and support for most charge-group/cut-off
3065 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
3066 /* Avoid extra communication when we are exactly at a boundary */
3070 for (s = 0; s < ns; s++)
3072 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
3073 pme_boundary = (real)s/ns;
3076 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
3078 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
3082 pme_boundary = (real)(s+1)/ns;
3085 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
3087 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
3094 ddpme->maxshift = sh;
3098 fprintf(debug, "PME slab communication range for dim %d is %d\n",
3099 ddpme->dim, ddpme->maxshift);
3103 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3107 for (d = 0; d < dd->ndim; d++)
3110 if (dim < ddbox->nboundeddim &&
3111 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
3112 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
3114 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
3115 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3116 dd->nc[dim], dd->comm->cellsize_limit);
3121 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
3122 gmx_bool bMaster, ivec npulse)
3124 gmx_domdec_comm_t *comm;
3127 real *cell_x, cell_dx, cellsize;
3131 for (d = 0; d < DIM; d++)
3133 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
3135 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
3138 cell_dx = ddbox->box_size[d]/dd->nc[d];
3141 for (j = 0; j < dd->nc[d]+1; j++)
3143 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
3148 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
3149 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
3151 cellsize = cell_dx*ddbox->skew_fac[d];
3152 while (cellsize*npulse[d] < comm->cutoff && npulse[d] < dd->nc[d]-1)
3156 cellsize_min[d] = cellsize;
3160 /* Statically load balanced grid */
3161 /* Also when we are not doing a master distribution we determine
3162 * all cell borders in a loop to obtain identical values
3163 * to the master distribution case and to determine npulse.
3167 cell_x = dd->ma->cell_x[d];
3171 snew(cell_x, dd->nc[d]+1);
3173 cell_x[0] = ddbox->box0[d];
3174 for (j = 0; j < dd->nc[d]; j++)
3176 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
3177 cell_x[j+1] = cell_x[j] + cell_dx;
3178 cellsize = cell_dx*ddbox->skew_fac[d];
3179 while (cellsize*npulse[d] < comm->cutoff &&
3180 npulse[d] < dd->nc[d]-1)
3184 cellsize_min[d] = min(cellsize_min[d], cellsize);
3188 comm->cell_x0[d] = cell_x[dd->ci[d]];
3189 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
3193 /* The following limitation is to avoid that a cell would receive
3194 * some of its own home charge groups back over the periodic boundary.
3195 * Double charge groups cause trouble with the global indices.
3197 if (d < ddbox->npbcdim &&
3198 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
3200 gmx_fatal_collective(FARGS, NULL, dd,
3201 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3202 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
3204 dd->nc[d], dd->nc[d],
3205 dd->nnodes > dd->nc[d] ? "cells" : "processors");
3209 if (!comm->bDynLoadBal)
3211 copy_rvec(cellsize_min, comm->cellsize_min);
3214 for (d = 0; d < comm->npmedecompdim; d++)
3216 set_pme_maxshift(dd, &comm->ddpme[d],
3217 comm->slb_frac[dd->dim[d]] == NULL, ddbox,
3218 comm->ddpme[d].slb_dim_f);
3223 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
3224 int d, int dim, gmx_domdec_root_t *root,
3226 gmx_bool bUniform, gmx_large_int_t step, real cellsize_limit_f, int range[])
3228 gmx_domdec_comm_t *comm;
3229 int ncd, i, j, nmin, nmin_old;
3230 gmx_bool bLimLo, bLimHi;
3232 real fac, halfway, cellsize_limit_f_i, region_size;
3233 gmx_bool bPBC, bLastHi = FALSE;
3234 int nrange[] = {range[0], range[1]};
3236 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
3242 bPBC = (dim < ddbox->npbcdim);
3244 cell_size = root->buf_ncd;
3248 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
3251 /* First we need to check if the scaling does not make cells
3252 * smaller than the smallest allowed size.
3253 * We need to do this iteratively, since if a cell is too small,
3254 * it needs to be enlarged, which makes all the other cells smaller,
3255 * which could in turn make another cell smaller than allowed.
3257 for (i = range[0]; i < range[1]; i++)
3259 root->bCellMin[i] = FALSE;
3265 /* We need the total for normalization */
3267 for (i = range[0]; i < range[1]; i++)
3269 if (root->bCellMin[i] == FALSE)
3271 fac += cell_size[i];
3274 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3275 /* Determine the cell boundaries */
3276 for (i = range[0]; i < range[1]; i++)
3278 if (root->bCellMin[i] == FALSE)
3280 cell_size[i] *= fac;
3281 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3283 cellsize_limit_f_i = 0;
3287 cellsize_limit_f_i = cellsize_limit_f;
3289 if (cell_size[i] < cellsize_limit_f_i)
3291 root->bCellMin[i] = TRUE;
3292 cell_size[i] = cellsize_limit_f_i;
3296 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3299 while (nmin > nmin_old);
3302 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3303 /* For this check we should not use DD_CELL_MARGIN,
3304 * but a slightly smaller factor,
3305 * since rounding could get use below the limit.
3307 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3310 gmx_fatal(FARGS, "Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3311 gmx_step_str(step, buf),
3312 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3313 ncd, comm->cellsize_min[dim]);
3316 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
3320 /* Check if the boundary did not displace more than halfway
3321 * each of the cells it bounds, as this could cause problems,
3322 * especially when the differences between cell sizes are large.
3323 * If changes are applied, they will not make cells smaller
3324 * than the cut-off, as we check all the boundaries which
3325 * might be affected by a change and if the old state was ok,
3326 * the cells will at most be shrunk back to their old size.
3328 for (i = range[0]+1; i < range[1]; i++)
3330 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3331 if (root->cell_f[i] < halfway)
3333 root->cell_f[i] = halfway;
3334 /* Check if the change also causes shifts of the next boundaries */
3335 for (j = i+1; j < range[1]; j++)
3337 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3339 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3343 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3344 if (root->cell_f[i] > halfway)
3346 root->cell_f[i] = halfway;
3347 /* Check if the change also causes shifts of the next boundaries */
3348 for (j = i-1; j >= range[0]+1; j--)
3350 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3352 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3359 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3360 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3361 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3362 * for a and b nrange is used */
3365 /* Take care of the staggering of the cell boundaries */
3368 for (i = range[0]; i < range[1]; i++)
3370 root->cell_f_max0[i] = root->cell_f[i];
3371 root->cell_f_min1[i] = root->cell_f[i+1];
3376 for (i = range[0]+1; i < range[1]; i++)
3378 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3379 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3380 if (bLimLo && bLimHi)
3382 /* Both limits violated, try the best we can */
3383 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3384 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3385 nrange[0] = range[0];
3387 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3390 nrange[1] = range[1];
3391 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3397 /* root->cell_f[i] = root->bound_min[i]; */
3398 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3401 else if (bLimHi && !bLastHi)
3404 if (nrange[1] < range[1]) /* found a LimLo before */
3406 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3407 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3408 nrange[0] = nrange[1];
3410 root->cell_f[i] = root->bound_max[i];
3412 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3414 nrange[1] = range[1];
3417 if (nrange[1] < range[1]) /* found last a LimLo */
3419 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3420 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3421 nrange[0] = nrange[1];
3422 nrange[1] = range[1];
3423 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3425 else if (nrange[0] > range[0]) /* found at least one LimHi */
3427 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3434 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3435 int d, int dim, gmx_domdec_root_t *root,
3436 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3437 gmx_bool bUniform, gmx_large_int_t step)
3439 gmx_domdec_comm_t *comm;
3440 int ncd, d1, i, j, pos;
3442 real load_aver, load_i, imbalance, change, change_max, sc;
3443 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3447 int range[] = { 0, 0 };
3451 /* Convert the maximum change from the input percentage to a fraction */
3452 change_limit = comm->dlb_scale_lim*0.01;
3456 bPBC = (dim < ddbox->npbcdim);
3458 cell_size = root->buf_ncd;
3460 /* Store the original boundaries */
3461 for (i = 0; i < ncd+1; i++)
3463 root->old_cell_f[i] = root->cell_f[i];
3467 for (i = 0; i < ncd; i++)
3469 cell_size[i] = 1.0/ncd;
3472 else if (dd_load_count(comm))
3474 load_aver = comm->load[d].sum_m/ncd;
3476 for (i = 0; i < ncd; i++)
3478 /* Determine the relative imbalance of cell i */
3479 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3480 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3481 /* Determine the change of the cell size using underrelaxation */
3482 change = -relax*imbalance;
3483 change_max = max(change_max, max(change, -change));
3485 /* Limit the amount of scaling.
3486 * We need to use the same rescaling for all cells in one row,
3487 * otherwise the load balancing might not converge.
3490 if (change_max > change_limit)
3492 sc *= change_limit/change_max;
3494 for (i = 0; i < ncd; i++)
3496 /* Determine the relative imbalance of cell i */
3497 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3498 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3499 /* Determine the change of the cell size using underrelaxation */
3500 change = -sc*imbalance;
3501 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3505 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3506 cellsize_limit_f *= DD_CELL_MARGIN;
3507 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3508 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3509 if (ddbox->tric_dir[dim])
3511 cellsize_limit_f /= ddbox->skew_fac[dim];
3512 dist_min_f /= ddbox->skew_fac[dim];
3514 if (bDynamicBox && d > 0)
3516 dist_min_f *= DD_PRES_SCALE_MARGIN;
3518 if (d > 0 && !bUniform)
3520 /* Make sure that the grid is not shifted too much */
3521 for (i = 1; i < ncd; i++)
3523 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3525 gmx_incons("Inconsistent DD boundary staggering limits!");
3527 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3528 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3531 root->bound_min[i] += 0.5*space;
3533 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3534 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3537 root->bound_max[i] += 0.5*space;
3542 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3544 root->cell_f_max0[i-1] + dist_min_f,
3545 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3546 root->cell_f_min1[i] - dist_min_f);
3551 root->cell_f[0] = 0;
3552 root->cell_f[ncd] = 1;
3553 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3556 /* After the checks above, the cells should obey the cut-off
3557 * restrictions, but it does not hurt to check.
3559 for (i = 0; i < ncd; i++)
3563 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3564 dim, i, root->cell_f[i], root->cell_f[i+1]);
3567 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3568 root->cell_f[i+1] - root->cell_f[i] <
3569 cellsize_limit_f/DD_CELL_MARGIN)
3573 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3574 gmx_step_str(step, buf), dim2char(dim), i,
3575 (root->cell_f[i+1] - root->cell_f[i])
3576 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3581 /* Store the cell boundaries of the lower dimensions at the end */
3582 for (d1 = 0; d1 < d; d1++)
3584 root->cell_f[pos++] = comm->cell_f0[d1];
3585 root->cell_f[pos++] = comm->cell_f1[d1];
3588 if (d < comm->npmedecompdim)
3590 /* The master determines the maximum shift for
3591 * the coordinate communication between separate PME nodes.
3593 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3595 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3598 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3602 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3603 gmx_ddbox_t *ddbox, int dimind)
3605 gmx_domdec_comm_t *comm;
3610 /* Set the cell dimensions */
3611 dim = dd->dim[dimind];
3612 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3613 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3614 if (dim >= ddbox->nboundeddim)
3616 comm->cell_x0[dim] += ddbox->box0[dim];
3617 comm->cell_x1[dim] += ddbox->box0[dim];
3621 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3622 int d, int dim, real *cell_f_row,
3625 gmx_domdec_comm_t *comm;
3631 /* Each node would only need to know two fractions,
3632 * but it is probably cheaper to broadcast the whole array.
3634 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3635 0, comm->mpi_comm_load[d]);
3637 /* Copy the fractions for this dimension from the buffer */
3638 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3639 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3640 /* The whole array was communicated, so set the buffer position */
3641 pos = dd->nc[dim] + 1;
3642 for (d1 = 0; d1 <= d; d1++)
3646 /* Copy the cell fractions of the lower dimensions */
3647 comm->cell_f0[d1] = cell_f_row[pos++];
3648 comm->cell_f1[d1] = cell_f_row[pos++];
3650 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3652 /* Convert the communicated shift from float to int */
3653 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3656 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3660 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3661 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3662 gmx_bool bUniform, gmx_large_int_t step)
3664 gmx_domdec_comm_t *comm;
3666 gmx_bool bRowMember, bRowRoot;
3671 for (d = 0; d < dd->ndim; d++)
3676 for (d1 = d; d1 < dd->ndim; d1++)
3678 if (dd->ci[dd->dim[d1]] > 0)
3691 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3692 ddbox, bDynamicBox, bUniform, step);
3693 cell_f_row = comm->root[d]->cell_f;
3697 cell_f_row = comm->cell_f_row;
3699 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3704 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3708 /* This function assumes the box is static and should therefore
3709 * not be called when the box has changed since the last
3710 * call to dd_partition_system.
3712 for (d = 0; d < dd->ndim; d++)
3714 relative_to_absolute_cell_bounds(dd, ddbox, d);
3720 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3721 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3722 gmx_bool bUniform, gmx_bool bDoDLB, gmx_large_int_t step,
3723 gmx_wallcycle_t wcycle)
3725 gmx_domdec_comm_t *comm;
3732 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3733 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3734 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3736 else if (bDynamicBox)
3738 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3741 /* Set the dimensions for which no DD is used */
3742 for (dim = 0; dim < DIM; dim++)
3744 if (dd->nc[dim] == 1)
3746 comm->cell_x0[dim] = 0;
3747 comm->cell_x1[dim] = ddbox->box_size[dim];
3748 if (dim >= ddbox->nboundeddim)
3750 comm->cell_x0[dim] += ddbox->box0[dim];
3751 comm->cell_x1[dim] += ddbox->box0[dim];
3757 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3760 gmx_domdec_comm_dim_t *cd;
3762 for (d = 0; d < dd->ndim; d++)
3764 cd = &dd->comm->cd[d];
3765 np = npulse[dd->dim[d]];
3766 if (np > cd->np_nalloc)
3770 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3771 dim2char(dd->dim[d]), np);
3773 if (DDMASTER(dd) && cd->np_nalloc > 0)
3775 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3777 srenew(cd->ind, np);
3778 for (i = cd->np_nalloc; i < np; i++)
3780 cd->ind[i].index = NULL;
3781 cd->ind[i].nalloc = 0;
3790 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3791 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3792 gmx_bool bUniform, gmx_bool bDoDLB, gmx_large_int_t step,
3793 gmx_wallcycle_t wcycle)
3795 gmx_domdec_comm_t *comm;
3801 /* Copy the old cell boundaries for the cg displacement check */
3802 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3803 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3805 if (comm->bDynLoadBal)
3809 check_box_size(dd, ddbox);
3811 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3815 set_dd_cell_sizes_slb(dd, ddbox, FALSE, npulse);
3816 realloc_comm_ind(dd, npulse);
3821 for (d = 0; d < DIM; d++)
3823 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3824 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3829 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3831 rvec cell_ns_x0, rvec cell_ns_x1,
3832 gmx_large_int_t step)
3834 gmx_domdec_comm_t *comm;
3839 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3841 dim = dd->dim[dim_ind];
3843 /* Without PBC we don't have restrictions on the outer cells */
3844 if (!(dim >= ddbox->npbcdim &&
3845 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3846 comm->bDynLoadBal &&
3847 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3848 comm->cellsize_min[dim])
3851 gmx_fatal(FARGS, "Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3852 gmx_step_str(step, buf), dim2char(dim),
3853 comm->cell_x1[dim] - comm->cell_x0[dim],
3854 ddbox->skew_fac[dim],
3855 dd->comm->cellsize_min[dim],
3856 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3860 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3862 /* Communicate the boundaries and update cell_ns_x0/1 */
3863 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3864 if (dd->bGridJump && dd->ndim > 1)
3866 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3871 static void make_tric_corr_matrix(int npbcdim, matrix box, matrix tcm)
3875 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3883 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3884 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3893 static void check_screw_box(matrix box)
3895 /* Mathematical limitation */
3896 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3898 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3901 /* Limitation due to the asymmetry of the eighth shell method */
3902 if (box[ZZ][YY] != 0)
3904 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3908 static void distribute_cg(FILE *fplog, gmx_large_int_t step,
3909 matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3912 gmx_domdec_master_t *ma;
3913 int **tmp_ind = NULL, *tmp_nalloc = NULL;
3914 int i, icg, j, k, k0, k1, d, npbcdim;
3916 rvec box_size, cg_cm;
3918 real nrcg, inv_ncg, pos_d;
3920 gmx_bool bUnbounded, bScrew;
3924 if (tmp_ind == NULL)
3926 snew(tmp_nalloc, dd->nnodes);
3927 snew(tmp_ind, dd->nnodes);
3928 for (i = 0; i < dd->nnodes; i++)
3930 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3931 snew(tmp_ind[i], tmp_nalloc[i]);
3935 /* Clear the count */
3936 for (i = 0; i < dd->nnodes; i++)
3942 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3944 cgindex = cgs->index;
3946 /* Compute the center of geometry for all charge groups */
3947 for (icg = 0; icg < cgs->nr; icg++)
3950 k1 = cgindex[icg+1];
3954 copy_rvec(pos[k0], cg_cm);
3961 for (k = k0; (k < k1); k++)
3963 rvec_inc(cg_cm, pos[k]);
3965 for (d = 0; (d < DIM); d++)
3967 cg_cm[d] *= inv_ncg;
3970 /* Put the charge group in the box and determine the cell index */
3971 for (d = DIM-1; d >= 0; d--)
3974 if (d < dd->npbcdim)
3976 bScrew = (dd->bScrewPBC && d == XX);
3977 if (tric_dir[d] && dd->nc[d] > 1)
3979 /* Use triclinic coordintates for this dimension */
3980 for (j = d+1; j < DIM; j++)
3982 pos_d += cg_cm[j]*tcm[j][d];
3985 while (pos_d >= box[d][d])
3988 rvec_dec(cg_cm, box[d]);
3991 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3992 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3994 for (k = k0; (k < k1); k++)
3996 rvec_dec(pos[k], box[d]);
3999 pos[k][YY] = box[YY][YY] - pos[k][YY];
4000 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4007 rvec_inc(cg_cm, box[d]);
4010 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
4011 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
4013 for (k = k0; (k < k1); k++)
4015 rvec_inc(pos[k], box[d]);
4018 pos[k][YY] = box[YY][YY] - pos[k][YY];
4019 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4024 /* This could be done more efficiently */
4026 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
4031 i = dd_index(dd->nc, ind);
4032 if (ma->ncg[i] == tmp_nalloc[i])
4034 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
4035 srenew(tmp_ind[i], tmp_nalloc[i]);
4037 tmp_ind[i][ma->ncg[i]] = icg;
4039 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
4043 for (i = 0; i < dd->nnodes; i++)
4046 for (k = 0; k < ma->ncg[i]; k++)
4048 ma->cg[k1++] = tmp_ind[i][k];
4051 ma->index[dd->nnodes] = k1;
4053 for (i = 0; i < dd->nnodes; i++)
4063 fprintf(fplog, "Charge group distribution at step %s:",
4064 gmx_step_str(step, buf));
4065 for (i = 0; i < dd->nnodes; i++)
4067 fprintf(fplog, " %d", ma->ncg[i]);
4069 fprintf(fplog, "\n");
4073 static void get_cg_distribution(FILE *fplog, gmx_large_int_t step, gmx_domdec_t *dd,
4074 t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
4077 gmx_domdec_master_t *ma = NULL;
4080 int *ibuf, buf2[2] = { 0, 0 };
4081 gmx_bool bMaster = DDMASTER(dd);
4088 check_screw_box(box);
4091 set_dd_cell_sizes_slb(dd, ddbox, TRUE, npulse);
4093 distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
4094 for (i = 0; i < dd->nnodes; i++)
4096 ma->ibuf[2*i] = ma->ncg[i];
4097 ma->ibuf[2*i+1] = ma->nat[i];
4105 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
4107 dd->ncg_home = buf2[0];
4108 dd->nat_home = buf2[1];
4109 dd->ncg_tot = dd->ncg_home;
4110 dd->nat_tot = dd->nat_home;
4111 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
4113 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
4114 srenew(dd->index_gl, dd->cg_nalloc);
4115 srenew(dd->cgindex, dd->cg_nalloc+1);
4119 for (i = 0; i < dd->nnodes; i++)
4121 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
4122 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
4127 DDMASTER(dd) ? ma->ibuf : NULL,
4128 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
4129 DDMASTER(dd) ? ma->cg : NULL,
4130 dd->ncg_home*sizeof(int), dd->index_gl);
4132 /* Determine the home charge group sizes */
4134 for (i = 0; i < dd->ncg_home; i++)
4136 cg_gl = dd->index_gl[i];
4138 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
4143 fprintf(debug, "Home charge groups:\n");
4144 for (i = 0; i < dd->ncg_home; i++)
4146 fprintf(debug, " %d", dd->index_gl[i]);
4149 fprintf(debug, "\n");
4152 fprintf(debug, "\n");
4156 static int compact_and_copy_vec_at(int ncg, int *move,
4159 rvec *src, gmx_domdec_comm_t *comm,
4162 int m, icg, i, i0, i1, nrcg;
4168 for (m = 0; m < DIM*2; m++)
4174 for (icg = 0; icg < ncg; icg++)
4176 i1 = cgindex[icg+1];
4182 /* Compact the home array in place */
4183 for (i = i0; i < i1; i++)
4185 copy_rvec(src[i], src[home_pos++]);
4191 /* Copy to the communication buffer */
4193 pos_vec[m] += 1 + vec*nrcg;
4194 for (i = i0; i < i1; i++)
4196 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
4198 pos_vec[m] += (nvec - vec - 1)*nrcg;
4202 home_pos += i1 - i0;
4210 static int compact_and_copy_vec_cg(int ncg, int *move,
4212 int nvec, rvec *src, gmx_domdec_comm_t *comm,
4215 int m, icg, i0, i1, nrcg;
4221 for (m = 0; m < DIM*2; m++)
4227 for (icg = 0; icg < ncg; icg++)
4229 i1 = cgindex[icg+1];
4235 /* Compact the home array in place */
4236 copy_rvec(src[icg], src[home_pos++]);
4242 /* Copy to the communication buffer */
4243 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
4244 pos_vec[m] += 1 + nrcg*nvec;
4256 static int compact_ind(int ncg, int *move,
4257 int *index_gl, int *cgindex,
4259 gmx_ga2la_t ga2la, char *bLocalCG,
4262 int cg, nat, a0, a1, a, a_gl;
4267 for (cg = 0; cg < ncg; cg++)
4273 /* Compact the home arrays in place.
4274 * Anything that can be done here avoids access to global arrays.
4276 cgindex[home_pos] = nat;
4277 for (a = a0; a < a1; a++)
4280 gatindex[nat] = a_gl;
4281 /* The cell number stays 0, so we don't need to set it */
4282 ga2la_change_la(ga2la, a_gl, nat);
4285 index_gl[home_pos] = index_gl[cg];
4286 cginfo[home_pos] = cginfo[cg];
4287 /* The charge group remains local, so bLocalCG does not change */
4292 /* Clear the global indices */
4293 for (a = a0; a < a1; a++)
4295 ga2la_del(ga2la, gatindex[a]);
4299 bLocalCG[index_gl[cg]] = FALSE;
4303 cgindex[home_pos] = nat;
4308 static void clear_and_mark_ind(int ncg, int *move,
4309 int *index_gl, int *cgindex, int *gatindex,
4310 gmx_ga2la_t ga2la, char *bLocalCG,
4315 for (cg = 0; cg < ncg; cg++)
4321 /* Clear the global indices */
4322 for (a = a0; a < a1; a++)
4324 ga2la_del(ga2la, gatindex[a]);
4328 bLocalCG[index_gl[cg]] = FALSE;
4330 /* Signal that this cg has moved using the ns cell index.
4331 * Here we set it to -1. fill_grid will change it
4332 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4334 cell_index[cg] = -1;
4339 static void print_cg_move(FILE *fplog,
4341 gmx_large_int_t step, int cg, int dim, int dir,
4342 gmx_bool bHaveLimitdAndCMOld, real limitd,
4343 rvec cm_old, rvec cm_new, real pos_d)
4345 gmx_domdec_comm_t *comm;
4350 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4351 if (bHaveLimitdAndCMOld)
4353 fprintf(fplog, "The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4354 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4358 fprintf(fplog, "The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4359 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4361 fprintf(fplog, "distance out of cell %f\n",
4362 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4363 if (bHaveLimitdAndCMOld)
4365 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4366 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4368 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4369 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4370 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4372 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4373 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4375 comm->cell_x0[dim], comm->cell_x1[dim]);
4378 static void cg_move_error(FILE *fplog,
4380 gmx_large_int_t step, int cg, int dim, int dir,
4381 gmx_bool bHaveLimitdAndCMOld, real limitd,
4382 rvec cm_old, rvec cm_new, real pos_d)
4386 print_cg_move(fplog, dd, step, cg, dim, dir,
4387 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4389 print_cg_move(stderr, dd, step, cg, dim, dir,
4390 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4392 "A charge group moved too far between two domain decomposition steps\n"
4393 "This usually means that your system is not well equilibrated");
4396 static void rotate_state_atom(t_state *state, int a)
4400 for (est = 0; est < estNR; est++)
4402 if (EST_DISTR(est) && (state->flags & (1<<est)))
4407 /* Rotate the complete state; for a rectangular box only */
4408 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4409 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4412 state->v[a][YY] = -state->v[a][YY];
4413 state->v[a][ZZ] = -state->v[a][ZZ];
4416 state->sd_X[a][YY] = -state->sd_X[a][YY];
4417 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4420 state->cg_p[a][YY] = -state->cg_p[a][YY];
4421 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4423 case estDISRE_INITF:
4424 case estDISRE_RM3TAV:
4425 case estORIRE_INITF:
4427 /* These are distances, so not affected by rotation */
4430 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4436 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4438 if (natoms > comm->moved_nalloc)
4440 /* Contents should be preserved here */
4441 comm->moved_nalloc = over_alloc_dd(natoms);
4442 srenew(comm->moved, comm->moved_nalloc);
4448 static void calc_cg_move(FILE *fplog, gmx_large_int_t step,
4451 ivec tric_dir, matrix tcm,
4452 rvec cell_x0, rvec cell_x1,
4453 rvec limitd, rvec limit0, rvec limit1,
4455 int cg_start, int cg_end,
4460 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4461 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4465 real inv_ncg, pos_d;
4468 npbcdim = dd->npbcdim;
4470 for (cg = cg_start; cg < cg_end; cg++)
4477 copy_rvec(state->x[k0], cm_new);
4484 for (k = k0; (k < k1); k++)
4486 rvec_inc(cm_new, state->x[k]);
4488 for (d = 0; (d < DIM); d++)
4490 cm_new[d] = inv_ncg*cm_new[d];
4495 /* Do pbc and check DD cell boundary crossings */
4496 for (d = DIM-1; d >= 0; d--)
4500 bScrew = (dd->bScrewPBC && d == XX);
4501 /* Determine the location of this cg in lattice coordinates */
4505 for (d2 = d+1; d2 < DIM; d2++)
4507 pos_d += cm_new[d2]*tcm[d2][d];
4510 /* Put the charge group in the triclinic unit-cell */
4511 if (pos_d >= cell_x1[d])
4513 if (pos_d >= limit1[d])
4515 cg_move_error(fplog, dd, step, cg, d, 1, TRUE, limitd[d],
4516 cg_cm[cg], cm_new, pos_d);
4519 if (dd->ci[d] == dd->nc[d] - 1)
4521 rvec_dec(cm_new, state->box[d]);
4524 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4525 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4527 for (k = k0; (k < k1); k++)
4529 rvec_dec(state->x[k], state->box[d]);
4532 rotate_state_atom(state, k);
4537 else if (pos_d < cell_x0[d])
4539 if (pos_d < limit0[d])
4541 cg_move_error(fplog, dd, step, cg, d, -1, TRUE, limitd[d],
4542 cg_cm[cg], cm_new, pos_d);
4547 rvec_inc(cm_new, state->box[d]);
4550 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4551 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4553 for (k = k0; (k < k1); k++)
4555 rvec_inc(state->x[k], state->box[d]);
4558 rotate_state_atom(state, k);
4564 else if (d < npbcdim)
4566 /* Put the charge group in the rectangular unit-cell */
4567 while (cm_new[d] >= state->box[d][d])
4569 rvec_dec(cm_new, state->box[d]);
4570 for (k = k0; (k < k1); k++)
4572 rvec_dec(state->x[k], state->box[d]);
4575 while (cm_new[d] < 0)
4577 rvec_inc(cm_new, state->box[d]);
4578 for (k = k0; (k < k1); k++)
4580 rvec_inc(state->x[k], state->box[d]);
4586 copy_rvec(cm_new, cg_cm[cg]);
4588 /* Determine where this cg should go */
4591 for (d = 0; d < dd->ndim; d++)
4596 flag |= DD_FLAG_FW(d);
4602 else if (dev[dim] == -1)
4604 flag |= DD_FLAG_BW(d);
4607 if (dd->nc[dim] > 2)
4618 /* Temporarily store the flag in move */
4619 move[cg] = mc + flag;
4623 static void dd_redistribute_cg(FILE *fplog, gmx_large_int_t step,
4624 gmx_domdec_t *dd, ivec tric_dir,
4625 t_state *state, rvec **f,
4634 int ncg[DIM*2], nat[DIM*2];
4635 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4636 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4637 int sbuf[2], rbuf[2];
4638 int home_pos_cg, home_pos_at, buf_pos;
4640 gmx_bool bV = FALSE, bSDX = FALSE, bCGP = FALSE;
4643 real inv_ncg, pos_d;
4645 rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
4647 cginfo_mb_t *cginfo_mb;
4648 gmx_domdec_comm_t *comm;
4650 int nthread, thread;
4654 check_screw_box(state->box);
4658 if (fr->cutoff_scheme == ecutsGROUP)
4663 for (i = 0; i < estNR; i++)
4669 case estX: /* Always present */ break;
4670 case estV: bV = (state->flags & (1<<i)); break;
4671 case estSDX: bSDX = (state->flags & (1<<i)); break;
4672 case estCGP: bCGP = (state->flags & (1<<i)); break;
4675 case estDISRE_INITF:
4676 case estDISRE_RM3TAV:
4677 case estORIRE_INITF:
4679 /* No processing required */
4682 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4687 if (dd->ncg_tot > comm->nalloc_int)
4689 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4690 srenew(comm->buf_int, comm->nalloc_int);
4692 move = comm->buf_int;
4694 /* Clear the count */
4695 for (c = 0; c < dd->ndim*2; c++)
4701 npbcdim = dd->npbcdim;
4703 for (d = 0; (d < DIM); d++)
4705 limitd[d] = dd->comm->cellsize_min[d];
4706 if (d >= npbcdim && dd->ci[d] == 0)
4708 cell_x0[d] = -GMX_FLOAT_MAX;
4712 cell_x0[d] = comm->cell_x0[d];
4714 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4716 cell_x1[d] = GMX_FLOAT_MAX;
4720 cell_x1[d] = comm->cell_x1[d];
4724 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4725 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4729 /* We check after communication if a charge group moved
4730 * more than one cell. Set the pre-comm check limit to float_max.
4732 limit0[d] = -GMX_FLOAT_MAX;
4733 limit1[d] = GMX_FLOAT_MAX;
4737 make_tric_corr_matrix(npbcdim, state->box, tcm);
4739 cgindex = dd->cgindex;
4741 nthread = gmx_omp_nthreads_get(emntDomdec);
4743 /* Compute the center of geometry for all home charge groups
4744 * and put them in the box and determine where they should go.
4746 #pragma omp parallel for num_threads(nthread) schedule(static)
4747 for (thread = 0; thread < nthread; thread++)
4749 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4750 cell_x0, cell_x1, limitd, limit0, limit1,
4752 ( thread *dd->ncg_home)/nthread,
4753 ((thread+1)*dd->ncg_home)/nthread,
4754 fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
4758 for (cg = 0; cg < dd->ncg_home; cg++)
4763 flag = mc & ~DD_FLAG_NRCG;
4764 mc = mc & DD_FLAG_NRCG;
4767 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4769 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4770 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4772 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4773 /* We store the cg size in the lower 16 bits
4774 * and the place where the charge group should go
4775 * in the next 6 bits. This saves some communication volume.
4777 nrcg = cgindex[cg+1] - cgindex[cg];
4778 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4784 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4785 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4788 for (i = 0; i < dd->ndim*2; i++)
4790 *ncg_moved += ncg[i];
4807 /* Make sure the communication buffers are large enough */
4808 for (mc = 0; mc < dd->ndim*2; mc++)
4810 nvr = ncg[mc] + nat[mc]*nvec;
4811 if (nvr > comm->cgcm_state_nalloc[mc])
4813 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4814 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4818 switch (fr->cutoff_scheme)
4821 /* Recalculating cg_cm might be cheaper than communicating,
4822 * but that could give rise to rounding issues.
4825 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4826 nvec, cg_cm, comm, bCompact);
4829 /* Without charge groups we send the moved atom coordinates
4830 * over twice. This is so the code below can be used without
4831 * many conditionals for both for with and without charge groups.
4834 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4835 nvec, state->x, comm, FALSE);
4838 home_pos_cg -= *ncg_moved;
4842 gmx_incons("unimplemented");
4848 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4849 nvec, vec++, state->x, comm, bCompact);
4852 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4853 nvec, vec++, state->v, comm, bCompact);
4857 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4858 nvec, vec++, state->sd_X, comm, bCompact);
4862 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4863 nvec, vec++, state->cg_p, comm, bCompact);
4868 compact_ind(dd->ncg_home, move,
4869 dd->index_gl, dd->cgindex, dd->gatindex,
4870 dd->ga2la, comm->bLocalCG,
4875 if (fr->cutoff_scheme == ecutsVERLET)
4877 moved = get_moved(comm, dd->ncg_home);
4879 for (k = 0; k < dd->ncg_home; k++)
4886 moved = fr->ns.grid->cell_index;
4889 clear_and_mark_ind(dd->ncg_home, move,
4890 dd->index_gl, dd->cgindex, dd->gatindex,
4891 dd->ga2la, comm->bLocalCG,
4895 cginfo_mb = fr->cginfo_mb;
4897 *ncg_stay_home = home_pos_cg;
4898 for (d = 0; d < dd->ndim; d++)
4904 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4907 /* Communicate the cg and atom counts */
4912 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4913 d, dir, sbuf[0], sbuf[1]);
4915 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4917 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4919 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4920 srenew(comm->buf_int, comm->nalloc_int);
4923 /* Communicate the charge group indices, sizes and flags */
4924 dd_sendrecv_int(dd, d, dir,
4925 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4926 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4928 nvs = ncg[cdd] + nat[cdd]*nvec;
4929 i = rbuf[0] + rbuf[1] *nvec;
4930 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4932 /* Communicate cgcm and state */
4933 dd_sendrecv_rvec(dd, d, dir,
4934 comm->cgcm_state[cdd], nvs,
4935 comm->vbuf.v+nvr, i);
4936 ncg_recv += rbuf[0];
4937 nat_recv += rbuf[1];
4941 /* Process the received charge groups */
4943 for (cg = 0; cg < ncg_recv; cg++)
4945 flag = comm->buf_int[cg*DD_CGIBS+1];
4947 if (dim >= npbcdim && dd->nc[dim] > 2)
4949 /* No pbc in this dim and more than one domain boundary.
4950 * We do a separate check if a charge group didn't move too far.
4952 if (((flag & DD_FLAG_FW(d)) &&
4953 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4954 ((flag & DD_FLAG_BW(d)) &&
4955 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4957 cg_move_error(fplog, dd, step, cg, dim,
4958 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4960 comm->vbuf.v[buf_pos],
4961 comm->vbuf.v[buf_pos],
4962 comm->vbuf.v[buf_pos][dim]);
4969 /* Check which direction this cg should go */
4970 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4974 /* The cell boundaries for dimension d2 are not equal
4975 * for each cell row of the lower dimension(s),
4976 * therefore we might need to redetermine where
4977 * this cg should go.
4980 /* If this cg crosses the box boundary in dimension d2
4981 * we can use the communicated flag, so we do not
4982 * have to worry about pbc.
4984 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4985 (flag & DD_FLAG_FW(d2))) ||
4986 (dd->ci[dim2] == 0 &&
4987 (flag & DD_FLAG_BW(d2)))))
4989 /* Clear the two flags for this dimension */
4990 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4991 /* Determine the location of this cg
4992 * in lattice coordinates
4994 pos_d = comm->vbuf.v[buf_pos][dim2];
4997 for (d3 = dim2+1; d3 < DIM; d3++)
5000 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
5003 /* Check of we are not at the box edge.
5004 * pbc is only handled in the first step above,
5005 * but this check could move over pbc while
5006 * the first step did not due to different rounding.
5008 if (pos_d >= cell_x1[dim2] &&
5009 dd->ci[dim2] != dd->nc[dim2]-1)
5011 flag |= DD_FLAG_FW(d2);
5013 else if (pos_d < cell_x0[dim2] &&
5016 flag |= DD_FLAG_BW(d2);
5018 comm->buf_int[cg*DD_CGIBS+1] = flag;
5021 /* Set to which neighboring cell this cg should go */
5022 if (flag & DD_FLAG_FW(d2))
5026 else if (flag & DD_FLAG_BW(d2))
5028 if (dd->nc[dd->dim[d2]] > 2)
5040 nrcg = flag & DD_FLAG_NRCG;
5043 if (home_pos_cg+1 > dd->cg_nalloc)
5045 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
5046 srenew(dd->index_gl, dd->cg_nalloc);
5047 srenew(dd->cgindex, dd->cg_nalloc+1);
5049 /* Set the global charge group index and size */
5050 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
5051 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
5052 /* Copy the state from the buffer */
5053 dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
5054 if (fr->cutoff_scheme == ecutsGROUP)
5057 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
5061 /* Set the cginfo */
5062 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
5063 dd->index_gl[home_pos_cg]);
5066 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
5069 if (home_pos_at+nrcg > state->nalloc)
5071 dd_realloc_state(state, f, home_pos_at+nrcg);
5073 for (i = 0; i < nrcg; i++)
5075 copy_rvec(comm->vbuf.v[buf_pos++],
5076 state->x[home_pos_at+i]);
5080 for (i = 0; i < nrcg; i++)
5082 copy_rvec(comm->vbuf.v[buf_pos++],
5083 state->v[home_pos_at+i]);
5088 for (i = 0; i < nrcg; i++)
5090 copy_rvec(comm->vbuf.v[buf_pos++],
5091 state->sd_X[home_pos_at+i]);
5096 for (i = 0; i < nrcg; i++)
5098 copy_rvec(comm->vbuf.v[buf_pos++],
5099 state->cg_p[home_pos_at+i]);
5103 home_pos_at += nrcg;
5107 /* Reallocate the buffers if necessary */
5108 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
5110 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
5111 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
5113 nvr = ncg[mc] + nat[mc]*nvec;
5114 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
5116 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
5117 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
5119 /* Copy from the receive to the send buffers */
5120 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
5121 comm->buf_int + cg*DD_CGIBS,
5122 DD_CGIBS*sizeof(int));
5123 memcpy(comm->cgcm_state[mc][nvr],
5124 comm->vbuf.v[buf_pos],
5125 (1+nrcg*nvec)*sizeof(rvec));
5126 buf_pos += 1 + nrcg*nvec;
5133 /* With sorting (!bCompact) the indices are now only partially up to date
5134 * and ncg_home and nat_home are not the real count, since there are
5135 * "holes" in the arrays for the charge groups that moved to neighbors.
5137 if (fr->cutoff_scheme == ecutsVERLET)
5139 moved = get_moved(comm, home_pos_cg);
5141 for (i = dd->ncg_home; i < home_pos_cg; i++)
5146 dd->ncg_home = home_pos_cg;
5147 dd->nat_home = home_pos_at;
5152 "Finished repartitioning: cgs moved out %d, new home %d\n",
5153 *ncg_moved, dd->ncg_home-*ncg_moved);
5158 void dd_cycles_add(gmx_domdec_t *dd, float cycles, int ddCycl)
5160 dd->comm->cycl[ddCycl] += cycles;
5161 dd->comm->cycl_n[ddCycl]++;
5162 if (cycles > dd->comm->cycl_max[ddCycl])
5164 dd->comm->cycl_max[ddCycl] = cycles;
5168 static double force_flop_count(t_nrnb *nrnb)
5175 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
5177 /* To get closer to the real timings, we half the count
5178 * for the normal loops and again half it for water loops.
5181 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5183 sum += nrnb->n[i]*0.25*cost_nrnb(i);
5187 sum += nrnb->n[i]*0.50*cost_nrnb(i);
5190 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
5193 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5195 sum += nrnb->n[i]*cost_nrnb(i);
5198 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
5200 sum += nrnb->n[i]*cost_nrnb(i);
5206 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
5208 if (dd->comm->eFlop)
5210 dd->comm->flop -= force_flop_count(nrnb);
5213 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
5215 if (dd->comm->eFlop)
5217 dd->comm->flop += force_flop_count(nrnb);
5222 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
5226 for (i = 0; i < ddCyclNr; i++)
5228 dd->comm->cycl[i] = 0;
5229 dd->comm->cycl_n[i] = 0;
5230 dd->comm->cycl_max[i] = 0;
5233 dd->comm->flop_n = 0;
5236 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
5238 gmx_domdec_comm_t *comm;
5239 gmx_domdec_load_t *load;
5240 gmx_domdec_root_t *root = NULL;
5241 int d, dim, cid, i, pos;
5242 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
5247 fprintf(debug, "get_load_distribution start\n");
5250 wallcycle_start(wcycle, ewcDDCOMMLOAD);
5254 bSepPME = (dd->pme_nodeid >= 0);
5256 for (d = dd->ndim-1; d >= 0; d--)
5259 /* Check if we participate in the communication in this dimension */
5260 if (d == dd->ndim-1 ||
5261 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
5263 load = &comm->load[d];
5266 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
5269 if (d == dd->ndim-1)
5271 sbuf[pos++] = dd_force_load(comm);
5272 sbuf[pos++] = sbuf[0];
5275 sbuf[pos++] = sbuf[0];
5276 sbuf[pos++] = cell_frac;
5279 sbuf[pos++] = comm->cell_f_max0[d];
5280 sbuf[pos++] = comm->cell_f_min1[d];
5285 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
5286 sbuf[pos++] = comm->cycl[ddCyclPME];
5291 sbuf[pos++] = comm->load[d+1].sum;
5292 sbuf[pos++] = comm->load[d+1].max;
5295 sbuf[pos++] = comm->load[d+1].sum_m;
5296 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
5297 sbuf[pos++] = comm->load[d+1].flags;
5300 sbuf[pos++] = comm->cell_f_max0[d];
5301 sbuf[pos++] = comm->cell_f_min1[d];
5306 sbuf[pos++] = comm->load[d+1].mdf;
5307 sbuf[pos++] = comm->load[d+1].pme;
5311 /* Communicate a row in DD direction d.
5312 * The communicators are setup such that the root always has rank 0.
5315 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
5316 load->load, load->nload*sizeof(float), MPI_BYTE,
5317 0, comm->mpi_comm_load[d]);
5319 if (dd->ci[dim] == dd->master_ci[dim])
5321 /* We are the root, process this row */
5322 if (comm->bDynLoadBal)
5324 root = comm->root[d];
5334 for (i = 0; i < dd->nc[dim]; i++)
5336 load->sum += load->load[pos++];
5337 load->max = max(load->max, load->load[pos]);
5343 /* This direction could not be load balanced properly,
5344 * therefore we need to use the maximum iso the average load.
5346 load->sum_m = max(load->sum_m, load->load[pos]);
5350 load->sum_m += load->load[pos];
5353 load->cvol_min = min(load->cvol_min, load->load[pos]);
5357 load->flags = (int)(load->load[pos++] + 0.5);
5361 root->cell_f_max0[i] = load->load[pos++];
5362 root->cell_f_min1[i] = load->load[pos++];
5367 load->mdf = max(load->mdf, load->load[pos]);
5369 load->pme = max(load->pme, load->load[pos]);
5373 if (comm->bDynLoadBal && root->bLimited)
5375 load->sum_m *= dd->nc[dim];
5376 load->flags |= (1<<d);
5384 comm->nload += dd_load_count(comm);
5385 comm->load_step += comm->cycl[ddCyclStep];
5386 comm->load_sum += comm->load[0].sum;
5387 comm->load_max += comm->load[0].max;
5388 if (comm->bDynLoadBal)
5390 for (d = 0; d < dd->ndim; d++)
5392 if (comm->load[0].flags & (1<<d))
5394 comm->load_lim[d]++;
5400 comm->load_mdf += comm->load[0].mdf;
5401 comm->load_pme += comm->load[0].pme;
5405 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5409 fprintf(debug, "get_load_distribution finished\n");
5413 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5415 /* Return the relative performance loss on the total run time
5416 * due to the force calculation load imbalance.
5418 if (dd->comm->nload > 0)
5421 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5422 (dd->comm->load_step*dd->nnodes);
5430 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5433 int npp, npme, nnodes, d, limp;
5434 float imbal, pme_f_ratio, lossf, lossp = 0;
5436 gmx_domdec_comm_t *comm;
5439 if (DDMASTER(dd) && comm->nload > 0)
5442 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5443 nnodes = npp + npme;
5444 imbal = comm->load_max*npp/comm->load_sum - 1;
5445 lossf = dd_force_imb_perf_loss(dd);
5446 sprintf(buf, " Average load imbalance: %.1f %%\n", imbal*100);
5447 fprintf(fplog, "%s", buf);
5448 fprintf(stderr, "\n");
5449 fprintf(stderr, "%s", buf);
5450 sprintf(buf, " Part of the total run time spent waiting due to load imbalance: %.1f %%\n", lossf*100);
5451 fprintf(fplog, "%s", buf);
5452 fprintf(stderr, "%s", buf);
5454 if (comm->bDynLoadBal)
5456 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5457 for (d = 0; d < dd->ndim; d++)
5459 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5460 sprintf(buf+strlen(buf), " %c %d %%", dim2char(dd->dim[d]), limp);
5466 sprintf(buf+strlen(buf), "\n");
5467 fprintf(fplog, "%s", buf);
5468 fprintf(stderr, "%s", buf);
5472 pme_f_ratio = comm->load_pme/comm->load_mdf;
5473 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5476 lossp *= (float)npme/(float)nnodes;
5480 lossp *= (float)npp/(float)nnodes;
5482 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pme_f_ratio);
5483 fprintf(fplog, "%s", buf);
5484 fprintf(stderr, "%s", buf);
5485 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossp)*100);
5486 fprintf(fplog, "%s", buf);
5487 fprintf(stderr, "%s", buf);
5489 fprintf(fplog, "\n");
5490 fprintf(stderr, "\n");
5492 if (lossf >= DD_PERF_LOSS)
5495 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5496 " in the domain decomposition.\n", lossf*100);
5497 if (!comm->bDynLoadBal)
5499 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5503 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5505 fprintf(fplog, "%s\n", buf);
5506 fprintf(stderr, "%s\n", buf);
5508 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS)
5511 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5512 " had %s work to do than the PP nodes.\n"
5513 " You might want to %s the number of PME nodes\n"
5514 " or %s the cut-off and the grid spacing.\n",
5516 (lossp < 0) ? "less" : "more",
5517 (lossp < 0) ? "decrease" : "increase",
5518 (lossp < 0) ? "decrease" : "increase");
5519 fprintf(fplog, "%s\n", buf);
5520 fprintf(stderr, "%s\n", buf);
5525 static float dd_vol_min(gmx_domdec_t *dd)
5527 return dd->comm->load[0].cvol_min*dd->nnodes;
5530 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5532 return dd->comm->load[0].flags;
5535 static float dd_f_imbal(gmx_domdec_t *dd)
5537 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5540 float dd_pme_f_ratio(gmx_domdec_t *dd)
5542 if (dd->comm->cycl_n[ddCyclPME] > 0)
5544 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5552 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_large_int_t step)
5557 flags = dd_load_flags(dd);
5561 "DD load balancing is limited by minimum cell size in dimension");
5562 for (d = 0; d < dd->ndim; d++)
5566 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5569 fprintf(fplog, "\n");
5571 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5572 if (dd->comm->bDynLoadBal)
5574 fprintf(fplog, " vol min/aver %5.3f%c",
5575 dd_vol_min(dd), flags ? '!' : ' ');
5577 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5578 if (dd->comm->cycl_n[ddCyclPME])
5580 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5582 fprintf(fplog, "\n\n");
5585 static void dd_print_load_verbose(gmx_domdec_t *dd)
5587 if (dd->comm->bDynLoadBal)
5589 fprintf(stderr, "vol %4.2f%c ",
5590 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5592 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5593 if (dd->comm->cycl_n[ddCyclPME])
5595 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5600 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5605 gmx_domdec_root_t *root;
5606 gmx_bool bPartOfGroup = FALSE;
5608 dim = dd->dim[dim_ind];
5609 copy_ivec(loc, loc_c);
5610 for (i = 0; i < dd->nc[dim]; i++)
5613 rank = dd_index(dd->nc, loc_c);
5614 if (rank == dd->rank)
5616 /* This process is part of the group */
5617 bPartOfGroup = TRUE;
5620 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5624 dd->comm->mpi_comm_load[dim_ind] = c_row;
5625 if (dd->comm->eDLB != edlbNO)
5627 if (dd->ci[dim] == dd->master_ci[dim])
5629 /* This is the root process of this row */
5630 snew(dd->comm->root[dim_ind], 1);
5631 root = dd->comm->root[dim_ind];
5632 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5633 snew(root->old_cell_f, dd->nc[dim]+1);
5634 snew(root->bCellMin, dd->nc[dim]);
5637 snew(root->cell_f_max0, dd->nc[dim]);
5638 snew(root->cell_f_min1, dd->nc[dim]);
5639 snew(root->bound_min, dd->nc[dim]);
5640 snew(root->bound_max, dd->nc[dim]);
5642 snew(root->buf_ncd, dd->nc[dim]);
5646 /* This is not a root process, we only need to receive cell_f */
5647 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5650 if (dd->ci[dim] == dd->master_ci[dim])
5652 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5658 void dd_setup_dlb_resource_sharing(t_commrec gmx_unused *cr,
5659 const gmx_hw_info_t gmx_unused *hwinfo,
5660 const gmx_hw_opt_t gmx_unused *hw_opt)
5663 int physicalnode_id_hash;
5666 MPI_Comm mpi_comm_pp_physicalnode;
5668 if (!(cr->duty & DUTY_PP) ||
5669 hw_opt->gpu_opt.ncuda_dev_use == 0)
5671 /* Only PP nodes (currently) use GPUs.
5672 * If we don't have GPUs, there are no resources to share.
5677 physicalnode_id_hash = gmx_physicalnode_id_hash();
5679 gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
5685 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5686 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5687 dd->rank, physicalnode_id_hash, gpu_id);
5689 /* Split the PP communicator over the physical nodes */
5690 /* TODO: See if we should store this (before), as it's also used for
5691 * for the nodecomm summution.
5693 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5694 &mpi_comm_pp_physicalnode);
5695 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5696 &dd->comm->mpi_comm_gpu_shared);
5697 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5698 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5702 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5705 /* Note that some ranks could share a GPU, while others don't */
5707 if (dd->comm->nrank_gpu_shared == 1)
5709 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5714 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
5717 int dim0, dim1, i, j;
5722 fprintf(debug, "Making load communicators\n");
5725 snew(dd->comm->load, dd->ndim);
5726 snew(dd->comm->mpi_comm_load, dd->ndim);
5729 make_load_communicator(dd, 0, loc);
5733 for (i = 0; i < dd->nc[dim0]; i++)
5736 make_load_communicator(dd, 1, loc);
5742 for (i = 0; i < dd->nc[dim0]; i++)
5746 for (j = 0; j < dd->nc[dim1]; j++)
5749 make_load_communicator(dd, 2, loc);
5756 fprintf(debug, "Finished making load communicators\n");
5761 void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
5764 int d, dim, i, j, m;
5767 ivec dd_zp[DD_MAXIZONE];
5768 gmx_domdec_zones_t *zones;
5769 gmx_domdec_ns_ranges_t *izone;
5771 for (d = 0; d < dd->ndim; d++)
5774 copy_ivec(dd->ci, tmp);
5775 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5776 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5777 copy_ivec(dd->ci, tmp);
5778 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5779 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5782 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5785 dd->neighbor[d][1]);
5791 fprintf(fplog, "\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5793 dd->nc[XX], dd->nc[YY], dd->nc[ZZ],
5794 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5801 for (i = 0; i < nzonep; i++)
5803 copy_ivec(dd_zp3[i], dd_zp[i]);
5809 for (i = 0; i < nzonep; i++)
5811 copy_ivec(dd_zp2[i], dd_zp[i]);
5817 for (i = 0; i < nzonep; i++)
5819 copy_ivec(dd_zp1[i], dd_zp[i]);
5823 gmx_fatal(FARGS, "Can only do 1, 2 or 3D domain decomposition");
5828 zones = &dd->comm->zones;
5830 for (i = 0; i < nzone; i++)
5833 clear_ivec(zones->shift[i]);
5834 for (d = 0; d < dd->ndim; d++)
5836 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5841 for (i = 0; i < nzone; i++)
5843 for (d = 0; d < DIM; d++)
5845 s[d] = dd->ci[d] - zones->shift[i][d];
5850 else if (s[d] >= dd->nc[d])
5856 zones->nizone = nzonep;
5857 for (i = 0; i < zones->nizone; i++)
5859 if (dd_zp[i][0] != i)
5861 gmx_fatal(FARGS, "Internal inconsistency in the dd grid setup");
5863 izone = &zones->izone[i];
5864 izone->j0 = dd_zp[i][1];
5865 izone->j1 = dd_zp[i][2];
5866 for (dim = 0; dim < DIM; dim++)
5868 if (dd->nc[dim] == 1)
5870 /* All shifts should be allowed */
5871 izone->shift0[dim] = -1;
5872 izone->shift1[dim] = 1;
5877 izone->shift0[d] = 0;
5878 izone->shift1[d] = 0;
5879 for(j=izone->j0; j<izone->j1; j++) {
5880 if (dd->shift[j][d] > dd->shift[i][d])
5881 izone->shift0[d] = -1;
5882 if (dd->shift[j][d] < dd->shift[i][d])
5883 izone->shift1[d] = 1;
5889 /* Assume the shift are not more than 1 cell */
5890 izone->shift0[dim] = 1;
5891 izone->shift1[dim] = -1;
5892 for (j = izone->j0; j < izone->j1; j++)
5894 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5895 if (shift_diff < izone->shift0[dim])
5897 izone->shift0[dim] = shift_diff;
5899 if (shift_diff > izone->shift1[dim])
5901 izone->shift1[dim] = shift_diff;
5908 if (dd->comm->eDLB != edlbNO)
5910 snew(dd->comm->root, dd->ndim);
5913 if (dd->comm->bRecordLoad)
5915 make_load_communicators(dd);
5919 static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reorder)
5922 gmx_domdec_comm_t *comm;
5933 if (comm->bCartesianPP)
5935 /* Set up cartesian communication for the particle-particle part */
5938 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5939 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5942 for (i = 0; i < DIM; i++)
5946 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5948 /* We overwrite the old communicator with the new cartesian one */
5949 cr->mpi_comm_mygroup = comm_cart;
5952 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5953 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5955 if (comm->bCartesianPP_PME)
5957 /* Since we want to use the original cartesian setup for sim,
5958 * and not the one after split, we need to make an index.
5960 snew(comm->ddindex2ddnodeid, dd->nnodes);
5961 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
5962 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5963 /* Get the rank of the DD master,
5964 * above we made sure that the master node is a PP node.
5974 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
5976 else if (comm->bCartesianPP)
5978 if (cr->npmenodes == 0)
5980 /* The PP communicator is also
5981 * the communicator for this simulation
5983 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5985 cr->nodeid = dd->rank;
5987 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
5989 /* We need to make an index to go from the coordinates
5990 * to the nodeid of this simulation.
5992 snew(comm->ddindex2simnodeid, dd->nnodes);
5993 snew(buf, dd->nnodes);
5994 if (cr->duty & DUTY_PP)
5996 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5998 /* Communicate the ddindex to simulation nodeid index */
5999 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6000 cr->mpi_comm_mysim);
6003 /* Determine the master coordinates and rank.
6004 * The DD master should be the same node as the master of this sim.
6006 for (i = 0; i < dd->nnodes; i++)
6008 if (comm->ddindex2simnodeid[i] == 0)
6010 ddindex2xyz(dd->nc, i, dd->master_ci);
6011 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
6016 fprintf(debug, "The master rank is %d\n", dd->masterrank);
6021 /* No Cartesian communicators */
6022 /* We use the rank in dd->comm->all as DD index */
6023 ddindex2xyz(dd->nc, dd->rank, dd->ci);
6024 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
6026 clear_ivec(dd->master_ci);
6033 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6034 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6039 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6040 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6044 static void receive_ddindex2simnodeid(t_commrec *cr)
6048 gmx_domdec_comm_t *comm;
6055 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
6057 snew(comm->ddindex2simnodeid, dd->nnodes);
6058 snew(buf, dd->nnodes);
6059 if (cr->duty & DUTY_PP)
6061 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
6064 /* Communicate the ddindex to simulation nodeid index */
6065 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6066 cr->mpi_comm_mysim);
6073 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
6074 int ncg, int natoms)
6076 gmx_domdec_master_t *ma;
6081 snew(ma->ncg, dd->nnodes);
6082 snew(ma->index, dd->nnodes+1);
6084 snew(ma->nat, dd->nnodes);
6085 snew(ma->ibuf, dd->nnodes*2);
6086 snew(ma->cell_x, DIM);
6087 for (i = 0; i < DIM; i++)
6089 snew(ma->cell_x[i], dd->nc[i]+1);
6092 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
6098 snew(ma->vbuf, natoms);
6104 static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_node_order,
6105 int gmx_unused reorder)
6108 gmx_domdec_comm_t *comm;
6119 if (comm->bCartesianPP)
6121 for (i = 1; i < DIM; i++)
6123 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
6125 if (bDiv[YY] || bDiv[ZZ])
6127 comm->bCartesianPP_PME = TRUE;
6128 /* If we have 2D PME decomposition, which is always in x+y,
6129 * we stack the PME only nodes in z.
6130 * Otherwise we choose the direction that provides the thinnest slab
6131 * of PME only nodes as this will have the least effect
6132 * on the PP communication.
6133 * But for the PME communication the opposite might be better.
6135 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
6137 dd->nc[YY] > dd->nc[ZZ]))
6139 comm->cartpmedim = ZZ;
6143 comm->cartpmedim = YY;
6145 comm->ntot[comm->cartpmedim]
6146 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
6150 fprintf(fplog, "#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
6152 "Will not use a Cartesian communicator for PP <-> PME\n\n");
6157 if (comm->bCartesianPP_PME)
6161 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
6164 for (i = 0; i < DIM; i++)
6168 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
6171 MPI_Comm_rank(comm_cart, &rank);
6172 if (MASTERNODE(cr) && rank != 0)
6174 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
6177 /* With this assigment we loose the link to the original communicator
6178 * which will usually be MPI_COMM_WORLD, unless have multisim.
6180 cr->mpi_comm_mysim = comm_cart;
6181 cr->sim_nodeid = rank;
6183 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
6187 fprintf(fplog, "Cartesian nodeid %d, coordinates %d %d %d\n\n",
6188 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6191 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
6195 if (cr->npmenodes == 0 ||
6196 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
6198 cr->duty = DUTY_PME;
6201 /* Split the sim communicator into PP and PME only nodes */
6202 MPI_Comm_split(cr->mpi_comm_mysim,
6204 dd_index(comm->ntot, dd->ci),
6205 &cr->mpi_comm_mygroup);
6209 switch (dd_node_order)
6214 fprintf(fplog, "Order of the nodes: PP first, PME last\n");
6217 case ddnoINTERLEAVE:
6218 /* Interleave the PP-only and PME-only nodes,
6219 * as on clusters with dual-core machines this will double
6220 * the communication bandwidth of the PME processes
6221 * and thus speed up the PP <-> PME and inter PME communication.
6225 fprintf(fplog, "Interleaving PP and PME nodes\n");
6227 comm->pmenodes = dd_pmenodes(cr);
6232 gmx_fatal(FARGS, "Unknown dd_node_order=%d", dd_node_order);
6235 if (dd_simnode2pmenode(cr, cr->sim_nodeid) == -1)
6237 cr->duty = DUTY_PME;
6244 /* Split the sim communicator into PP and PME only nodes */
6245 MPI_Comm_split(cr->mpi_comm_mysim,
6248 &cr->mpi_comm_mygroup);
6249 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
6255 fprintf(fplog, "This is a %s only node\n\n",
6256 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
6260 void make_dd_communicators(FILE *fplog, t_commrec *cr, int dd_node_order)
6263 gmx_domdec_comm_t *comm;
6269 copy_ivec(dd->nc, comm->ntot);
6271 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
6272 comm->bCartesianPP_PME = FALSE;
6274 /* Reorder the nodes by default. This might change the MPI ranks.
6275 * Real reordering is only supported on very few architectures,
6276 * Blue Gene is one of them.
6278 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
6280 if (cr->npmenodes > 0)
6282 /* Split the communicator into a PP and PME part */
6283 split_communicator(fplog, cr, dd_node_order, CartReorder);
6284 if (comm->bCartesianPP_PME)
6286 /* We (possibly) reordered the nodes in split_communicator,
6287 * so it is no longer required in make_pp_communicator.
6289 CartReorder = FALSE;
6294 /* All nodes do PP and PME */
6296 /* We do not require separate communicators */
6297 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
6301 if (cr->duty & DUTY_PP)
6303 /* Copy or make a new PP communicator */
6304 make_pp_communicator(fplog, cr, CartReorder);
6308 receive_ddindex2simnodeid(cr);
6311 if (!(cr->duty & DUTY_PME))
6313 /* Set up the commnuication to our PME node */
6314 dd->pme_nodeid = dd_simnode2pmenode(cr, cr->sim_nodeid);
6315 dd->pme_receive_vir_ener = receive_vir_ener(cr);
6318 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
6319 dd->pme_nodeid, dd->pme_receive_vir_ener);
6324 dd->pme_nodeid = -1;
6329 dd->ma = init_gmx_domdec_master_t(dd,
6331 comm->cgs_gl.index[comm->cgs_gl.nr]);
6335 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6337 real *slb_frac, tot;
6342 if (nc > 1 && size_string != NULL)
6346 fprintf(fplog, "Using static load balancing for the %s direction\n",
6351 for (i = 0; i < nc; i++)
6354 sscanf(size_string, "%lf%n", &dbl, &n);
6357 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6366 fprintf(fplog, "Relative cell sizes:");
6368 for (i = 0; i < nc; i++)
6373 fprintf(fplog, " %5.3f", slb_frac[i]);
6378 fprintf(fplog, "\n");
6385 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
6388 gmx_mtop_ilistloop_t iloop;
6392 iloop = gmx_mtop_ilistloop_init(mtop);
6393 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6395 for (ftype = 0; ftype < F_NRE; ftype++)
6397 if ((interaction_function[ftype].flags & IF_BOND) &&
6400 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6408 static int dd_nst_env(FILE *fplog, const char *env_var, int def)
6414 val = getenv(env_var);
6417 if (sscanf(val, "%d", &nst) <= 0)
6423 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6431 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6435 fprintf(stderr, "\n%s\n", warn_string);
6439 fprintf(fplog, "\n%s\n", warn_string);
6443 static void check_dd_restrictions(t_commrec *cr, gmx_domdec_t *dd,
6444 t_inputrec *ir, FILE *fplog)
6446 if (ir->ePBC == epbcSCREW &&
6447 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6449 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6452 if (ir->ns_type == ensSIMPLE)
6454 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
6457 if (ir->nstlist == 0)
6459 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6462 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6464 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6468 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6473 r = ddbox->box_size[XX];
6474 for (di = 0; di < dd->ndim; di++)
6477 /* Check using the initial average cell size */
6478 r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6484 static int check_dlb_support(FILE *fplog, t_commrec *cr,
6485 const char *dlb_opt, gmx_bool bRecordLoad,
6486 unsigned long Flags, t_inputrec *ir)
6494 case 'a': eDLB = edlbAUTO; break;
6495 case 'n': eDLB = edlbNO; break;
6496 case 'y': eDLB = edlbYES; break;
6497 default: gmx_incons("Unknown dlb_opt");
6500 if (Flags & MD_RERUN)
6505 if (!EI_DYNAMICS(ir->eI))
6507 if (eDLB == edlbYES)
6509 sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
6510 dd_warning(cr, fplog, buf);
6518 dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6523 if (Flags & MD_REPRODUCIBLE)
6530 dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
6534 dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6537 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", eDLB);
6545 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6550 if (getenv("GMX_DD_ORDER_ZYX") != NULL)
6552 /* Decomposition order z,y,x */
6555 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6557 for (dim = DIM-1; dim >= 0; dim--)
6559 if (dd->nc[dim] > 1)
6561 dd->dim[dd->ndim++] = dim;
6567 /* Decomposition order x,y,z */
6568 for (dim = 0; dim < DIM; dim++)
6570 if (dd->nc[dim] > 1)
6572 dd->dim[dd->ndim++] = dim;
6578 static gmx_domdec_comm_t *init_dd_comm()
6580 gmx_domdec_comm_t *comm;
6584 snew(comm->cggl_flag, DIM*2);
6585 snew(comm->cgcm_state, DIM*2);
6586 for (i = 0; i < DIM*2; i++)
6588 comm->cggl_flag_nalloc[i] = 0;
6589 comm->cgcm_state_nalloc[i] = 0;
6592 comm->nalloc_int = 0;
6593 comm->buf_int = NULL;
6595 vec_rvec_init(&comm->vbuf);
6597 comm->n_load_have = 0;
6598 comm->n_load_collect = 0;
6600 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6602 comm->sum_nat[i] = 0;
6606 comm->load_step = 0;
6609 clear_ivec(comm->load_lim);
6616 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
6617 unsigned long Flags,
6619 real comm_distance_min, real rconstr,
6620 const char *dlb_opt, real dlb_scale,
6621 const char *sizex, const char *sizey, const char *sizez,
6622 gmx_mtop_t *mtop, t_inputrec *ir,
6623 matrix box, rvec *x,
6625 int *npme_x, int *npme_y)
6628 gmx_domdec_comm_t *comm;
6631 real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
6638 "\nInitializing Domain Decomposition on %d nodes\n", cr->nnodes);
6643 dd->comm = init_dd_comm();
6645 snew(comm->cggl_flag, DIM*2);
6646 snew(comm->cgcm_state, DIM*2);
6648 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6649 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6651 dd->bSendRecv2 = dd_nst_env(fplog, "GMX_DD_SENDRECV2", 0);
6652 comm->dlb_scale_lim = dd_nst_env(fplog, "GMX_DLB_MAX", 10);
6653 comm->eFlop = dd_nst_env(fplog, "GMX_DLB_FLOP", 0);
6654 recload = dd_nst_env(fplog, "GMX_DD_LOAD", 1);
6655 comm->nstSortCG = dd_nst_env(fplog, "GMX_DD_SORT", 1);
6656 comm->nstDDDump = dd_nst_env(fplog, "GMX_DD_DUMP", 0);
6657 comm->nstDDDumpGrid = dd_nst_env(fplog, "GMX_DD_DUMP_GRID", 0);
6658 comm->DD_debug = dd_nst_env(fplog, "GMX_DD_DEBUG", 0);
6660 dd->pme_recv_f_alloc = 0;
6661 dd->pme_recv_f_buf = NULL;
6663 if (dd->bSendRecv2 && fplog)
6665 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6671 fprintf(fplog, "Will load balance based on FLOP count\n");
6673 if (comm->eFlop > 1)
6675 srand(1+cr->nodeid);
6677 comm->bRecordLoad = TRUE;
6681 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6685 /* Initialize to GPU share count to 0, might change later */
6686 comm->nrank_gpu_shared = 0;
6688 comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
6690 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6693 fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
6695 dd->bGridJump = comm->bDynLoadBal;
6696 comm->bPMELoadBalDLBLimits = FALSE;
6698 if (comm->nstSortCG)
6702 if (comm->nstSortCG == 1)
6704 fprintf(fplog, "Will sort the charge groups at every domain (re)decomposition\n");
6708 fprintf(fplog, "Will sort the charge groups every %d steps\n",
6712 snew(comm->sort, 1);
6718 fprintf(fplog, "Will not sort the charge groups\n");
6722 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6724 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6725 if (comm->bInterCGBondeds)
6727 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6731 comm->bInterCGMultiBody = FALSE;
6734 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6735 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6737 if (ir->rlistlong == 0)
6739 /* Set the cut-off to some very large value,
6740 * so we don't need if statements everywhere in the code.
6741 * We use sqrt, since the cut-off is squared in some places.
6743 comm->cutoff = GMX_CUTOFF_INF;
6747 comm->cutoff = ir->rlistlong;
6749 comm->cutoff_mbody = 0;
6751 comm->cellsize_limit = 0;
6752 comm->bBondComm = FALSE;
6754 if (comm->bInterCGBondeds)
6756 if (comm_distance_min > 0)
6758 comm->cutoff_mbody = comm_distance_min;
6759 if (Flags & MD_DDBONDCOMM)
6761 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6765 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6767 r_bonded_limit = comm->cutoff_mbody;
6769 else if (ir->bPeriodicMols)
6771 /* Can not easily determine the required cut-off */
6772 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6773 comm->cutoff_mbody = comm->cutoff/2;
6774 r_bonded_limit = comm->cutoff_mbody;
6780 dd_bonded_cg_distance(fplog, mtop, ir, x, box,
6781 Flags & MD_DDBONDCHECK, &r_2b, &r_mb);
6783 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6784 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6786 /* We use an initial margin of 10% for the minimum cell size,
6787 * except when we are just below the non-bonded cut-off.
6789 if (Flags & MD_DDBONDCOMM)
6791 if (max(r_2b, r_mb) > comm->cutoff)
6793 r_bonded = max(r_2b, r_mb);
6794 r_bonded_limit = 1.1*r_bonded;
6795 comm->bBondComm = TRUE;
6800 r_bonded_limit = min(1.1*r_bonded, comm->cutoff);
6802 /* We determine cutoff_mbody later */
6806 /* No special bonded communication,
6807 * simply increase the DD cut-off.
6809 r_bonded_limit = 1.1*max(r_2b, r_mb);
6810 comm->cutoff_mbody = r_bonded_limit;
6811 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6814 comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit);
6818 "Minimum cell size due to bonded interactions: %.3f nm\n",
6819 comm->cellsize_limit);
6823 if (dd->bInterCGcons && rconstr <= 0)
6825 /* There is a cell size limit due to the constraints (P-LINCS) */
6826 rconstr = constr_r_max(fplog, mtop, ir);
6830 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6832 if (rconstr > comm->cellsize_limit)
6834 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6838 else if (rconstr > 0 && fplog)
6840 /* Here we do not check for dd->bInterCGcons,
6841 * because one can also set a cell size limit for virtual sites only
6842 * and at this point we don't know yet if there are intercg v-sites.
6845 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6848 comm->cellsize_limit = max(comm->cellsize_limit, rconstr);
6850 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6854 copy_ivec(nc, dd->nc);
6855 set_dd_dim(fplog, dd);
6856 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, x, ddbox);
6858 if (cr->npmenodes == -1)
6862 acs = average_cellsize_min(dd, ddbox);
6863 if (acs < comm->cellsize_limit)
6867 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6869 gmx_fatal_collective(FARGS, cr, NULL,
6870 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6871 acs, comm->cellsize_limit);
6876 set_ddbox_cr(cr, NULL, ir, box, &comm->cgs_gl, x, ddbox);
6878 /* We need to choose the optimal DD grid and possibly PME nodes */
6879 limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6880 comm->eDLB != edlbNO, dlb_scale,
6881 comm->cellsize_limit, comm->cutoff,
6882 comm->bInterCGBondeds);
6884 if (dd->nc[XX] == 0)
6886 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6887 sprintf(buf, "Change the number of nodes or mdrun option %s%s%s",
6888 !bC ? "-rdd" : "-rcon",
6889 comm->eDLB != edlbNO ? " or -dds" : "",
6890 bC ? " or your LINCS settings" : "");
6892 gmx_fatal_collective(FARGS, cr, NULL,
6893 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6895 "Look in the log file for details on the domain decomposition",
6896 cr->nnodes-cr->npmenodes, limit, buf);
6898 set_dd_dim(fplog, dd);
6904 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6905 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6908 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6909 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6911 gmx_fatal_collective(FARGS, cr, NULL,
6912 "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6913 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6915 if (cr->npmenodes > dd->nnodes)
6917 gmx_fatal_collective(FARGS, cr, NULL,
6918 "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6920 if (cr->npmenodes > 0)
6922 comm->npmenodes = cr->npmenodes;
6926 comm->npmenodes = dd->nnodes;
6929 if (EEL_PME(ir->coulombtype))
6931 /* The following choices should match those
6932 * in comm_cost_est in domdec_setup.c.
6933 * Note that here the checks have to take into account
6934 * that the decomposition might occur in a different order than xyz
6935 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6936 * in which case they will not match those in comm_cost_est,
6937 * but since that is mainly for testing purposes that's fine.
6939 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6940 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6941 getenv("GMX_PMEONEDD") == NULL)
6943 comm->npmedecompdim = 2;
6944 comm->npmenodes_x = dd->nc[XX];
6945 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6949 /* In case nc is 1 in both x and y we could still choose to
6950 * decompose pme in y instead of x, but we use x for simplicity.
6952 comm->npmedecompdim = 1;
6953 if (dd->dim[0] == YY)
6955 comm->npmenodes_x = 1;
6956 comm->npmenodes_y = comm->npmenodes;
6960 comm->npmenodes_x = comm->npmenodes;
6961 comm->npmenodes_y = 1;
6966 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6967 comm->npmenodes_x, comm->npmenodes_y, 1);
6972 comm->npmedecompdim = 0;
6973 comm->npmenodes_x = 0;
6974 comm->npmenodes_y = 0;
6977 /* Technically we don't need both of these,
6978 * but it simplifies code not having to recalculate it.
6980 *npme_x = comm->npmenodes_x;
6981 *npme_y = comm->npmenodes_y;
6983 snew(comm->slb_frac, DIM);
6984 if (comm->eDLB == edlbNO)
6986 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
6987 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
6988 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], sizez);
6991 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6993 if (comm->bBondComm || comm->eDLB != edlbNO)
6995 /* Set the bonded communication distance to halfway
6996 * the minimum and the maximum,
6997 * since the extra communication cost is nearly zero.
6999 acs = average_cellsize_min(dd, ddbox);
7000 comm->cutoff_mbody = 0.5*(r_bonded + acs);
7001 if (comm->eDLB != edlbNO)
7003 /* Check if this does not limit the scaling */
7004 comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs);
7006 if (!comm->bBondComm)
7008 /* Without bBondComm do not go beyond the n.b. cut-off */
7009 comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff);
7010 if (comm->cellsize_limit >= comm->cutoff)
7012 /* We don't loose a lot of efficieny
7013 * when increasing it to the n.b. cut-off.
7014 * It can even be slightly faster, because we need
7015 * less checks for the communication setup.
7017 comm->cutoff_mbody = comm->cutoff;
7020 /* Check if we did not end up below our original limit */
7021 comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit);
7023 if (comm->cutoff_mbody > comm->cellsize_limit)
7025 comm->cellsize_limit = comm->cutoff_mbody;
7028 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
7033 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
7034 "cellsize limit %f\n",
7035 comm->bBondComm, comm->cellsize_limit);
7040 check_dd_restrictions(cr, dd, ir, fplog);
7043 comm->partition_step = INT_MIN;
7046 clear_dd_cycle_counts(dd);
7051 static void set_dlb_limits(gmx_domdec_t *dd)
7056 for (d = 0; d < dd->ndim; d++)
7058 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
7059 dd->comm->cellsize_min[dd->dim[d]] =
7060 dd->comm->cellsize_min_dlb[dd->dim[d]];
7065 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_large_int_t step)
7068 gmx_domdec_comm_t *comm;
7078 fprintf(fplog, "At step %s the performance loss due to force load imbalance is %.1f %%\n", gmx_step_str(step, buf), dd_force_imb_perf_loss(dd)*100);
7081 cellsize_min = comm->cellsize_min[dd->dim[0]];
7082 for (d = 1; d < dd->ndim; d++)
7084 cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
7087 if (cellsize_min < comm->cellsize_limit*1.05)
7089 dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
7091 /* Change DLB from "auto" to "no". */
7092 comm->eDLB = edlbNO;
7097 dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
7098 comm->bDynLoadBal = TRUE;
7099 dd->bGridJump = TRUE;
7103 /* We can set the required cell size info here,
7104 * so we do not need to communicate this.
7105 * The grid is completely uniform.
7107 for (d = 0; d < dd->ndim; d++)
7111 comm->load[d].sum_m = comm->load[d].sum;
7113 nc = dd->nc[dd->dim[d]];
7114 for (i = 0; i < nc; i++)
7116 comm->root[d]->cell_f[i] = i/(real)nc;
7119 comm->root[d]->cell_f_max0[i] = i /(real)nc;
7120 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
7123 comm->root[d]->cell_f[nc] = 1.0;
7128 static char *init_bLocalCG(gmx_mtop_t *mtop)
7133 ncg = ncg_mtop(mtop);
7134 snew(bLocalCG, ncg);
7135 for (cg = 0; cg < ncg; cg++)
7137 bLocalCG[cg] = FALSE;
7143 void dd_init_bondeds(FILE *fplog,
7144 gmx_domdec_t *dd, gmx_mtop_t *mtop,
7146 t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
7148 gmx_domdec_comm_t *comm;
7152 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
7156 if (comm->bBondComm)
7158 /* Communicate atoms beyond the cut-off for bonded interactions */
7161 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
7163 comm->bLocalCG = init_bLocalCG(mtop);
7167 /* Only communicate atoms based on cut-off */
7168 comm->cglink = NULL;
7169 comm->bLocalCG = NULL;
7173 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
7175 gmx_bool bDynLoadBal, real dlb_scale,
7178 gmx_domdec_comm_t *comm;
7193 fprintf(fplog, "The maximum number of communication pulses is:");
7194 for (d = 0; d < dd->ndim; d++)
7196 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
7198 fprintf(fplog, "\n");
7199 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
7200 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
7201 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
7202 for (d = 0; d < DIM; d++)
7206 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
7213 comm->cellsize_min_dlb[d]/
7214 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
7216 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
7219 fprintf(fplog, "\n");
7223 set_dd_cell_sizes_slb(dd, ddbox, FALSE, np);
7224 fprintf(fplog, "The initial number of communication pulses is:");
7225 for (d = 0; d < dd->ndim; d++)
7227 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
7229 fprintf(fplog, "\n");
7230 fprintf(fplog, "The initial domain decomposition cell size is:");
7231 for (d = 0; d < DIM; d++)
7235 fprintf(fplog, " %c %.2f nm",
7236 dim2char(d), dd->comm->cellsize_min[d]);
7239 fprintf(fplog, "\n\n");
7242 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
7244 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
7245 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7246 "non-bonded interactions", "", comm->cutoff);
7250 limit = dd->comm->cellsize_limit;
7254 if (dynamic_dd_box(ddbox, ir))
7256 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
7258 limit = dd->comm->cellsize_min[XX];
7259 for (d = 1; d < DIM; d++)
7261 limit = min(limit, dd->comm->cellsize_min[d]);
7265 if (comm->bInterCGBondeds)
7267 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7268 "two-body bonded interactions", "(-rdd)",
7269 max(comm->cutoff, comm->cutoff_mbody));
7270 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7271 "multi-body bonded interactions", "(-rdd)",
7272 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit));
7276 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7277 "virtual site constructions", "(-rcon)", limit);
7279 if (dd->constraint_comm)
7281 sprintf(buf, "atoms separated by up to %d constraints",
7283 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7284 buf, "(-rcon)", limit);
7286 fprintf(fplog, "\n");
7292 static void set_cell_limits_dlb(gmx_domdec_t *dd,
7294 const t_inputrec *ir,
7295 const gmx_ddbox_t *ddbox)
7297 gmx_domdec_comm_t *comm;
7298 int d, dim, npulse, npulse_d_max, npulse_d;
7303 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7305 /* Determine the maximum number of comm. pulses in one dimension */
7307 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7309 /* Determine the maximum required number of grid pulses */
7310 if (comm->cellsize_limit >= comm->cutoff)
7312 /* Only a single pulse is required */
7315 else if (!bNoCutOff && comm->cellsize_limit > 0)
7317 /* We round down slightly here to avoid overhead due to the latency
7318 * of extra communication calls when the cut-off
7319 * would be only slightly longer than the cell size.
7320 * Later cellsize_limit is redetermined,
7321 * so we can not miss interactions due to this rounding.
7323 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7327 /* There is no cell size limit */
7328 npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7331 if (!bNoCutOff && npulse > 1)
7333 /* See if we can do with less pulses, based on dlb_scale */
7335 for (d = 0; d < dd->ndim; d++)
7338 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7339 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7340 npulse_d_max = max(npulse_d_max, npulse_d);
7342 npulse = min(npulse, npulse_d_max);
7345 /* This env var can override npulse */
7346 d = dd_nst_env(debug, "GMX_DD_NPULSE", 0);
7353 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7354 for (d = 0; d < dd->ndim; d++)
7356 comm->cd[d].np_dlb = min(npulse, dd->nc[dd->dim[d]]-1);
7357 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7358 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7359 comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb);
7360 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7362 comm->bVacDLBNoLimit = FALSE;
7366 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7367 if (!comm->bVacDLBNoLimit)
7369 comm->cellsize_limit = max(comm->cellsize_limit,
7370 comm->cutoff/comm->maxpulse);
7372 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7373 /* Set the minimum cell size for each DD dimension */
7374 for (d = 0; d < dd->ndim; d++)
7376 if (comm->bVacDLBNoLimit ||
7377 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7379 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7383 comm->cellsize_min_dlb[dd->dim[d]] =
7384 comm->cutoff/comm->cd[d].np_dlb;
7387 if (comm->cutoff_mbody <= 0)
7389 comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit);
7391 if (comm->bDynLoadBal)
7397 gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd, int ePBC)
7399 /* If each molecule is a single charge group
7400 * or we use domain decomposition for each periodic dimension,
7401 * we do not need to take pbc into account for the bonded interactions.
7403 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7406 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7409 void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7410 t_inputrec *ir, gmx_ddbox_t *ddbox)
7412 gmx_domdec_comm_t *comm;
7418 /* Initialize the thread data.
7419 * This can not be done in init_domain_decomposition,
7420 * as the numbers of threads is determined later.
7422 comm->nth = gmx_omp_nthreads_get(emntDomdec);
7425 snew(comm->dth, comm->nth);
7428 if (EEL_PME(ir->coulombtype))
7430 init_ddpme(dd, &comm->ddpme[0], 0);
7431 if (comm->npmedecompdim >= 2)
7433 init_ddpme(dd, &comm->ddpme[1], 1);
7438 comm->npmenodes = 0;
7439 if (dd->pme_nodeid >= 0)
7441 gmx_fatal_collective(FARGS, NULL, dd,
7442 "Can not have separate PME nodes without PME electrostatics");
7448 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7450 if (comm->eDLB != edlbNO)
7452 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7455 print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
7456 if (comm->eDLB == edlbAUTO)
7460 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7462 print_dd_settings(fplog, dd, ir, TRUE, dlb_scale, ddbox);
7465 if (ir->ePBC == epbcNONE)
7467 vol_frac = 1 - 1/(double)dd->nnodes;
7472 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7476 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7478 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7480 dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
7483 static gmx_bool test_dd_cutoff(t_commrec *cr,
7484 t_state *state, t_inputrec *ir,
7495 set_ddbox(dd, FALSE, cr, ir, state->box,
7496 TRUE, &dd->comm->cgs_gl, state->x, &ddbox);
7500 for (d = 0; d < dd->ndim; d++)
7504 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7505 if (dynamic_dd_box(&ddbox, ir))
7507 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7510 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7512 if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
7513 dd->comm->cd[d].np_dlb > 0)
7515 if (np > dd->comm->cd[d].np_dlb)
7520 /* If a current local cell size is smaller than the requested
7521 * cut-off, we could still fix it, but this gets very complicated.
7522 * Without fixing here, we might actually need more checks.
7524 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7531 if (dd->comm->eDLB != edlbNO)
7533 /* If DLB is not active yet, we don't need to check the grid jumps.
7534 * Actually we shouldn't, because then the grid jump data is not set.
7536 if (dd->comm->bDynLoadBal &&
7537 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7542 gmx_sumi(1, &LocallyLimited, cr);
7544 if (LocallyLimited > 0)
7553 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, t_inputrec *ir,
7556 gmx_bool bCutoffAllowed;
7558 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7562 cr->dd->comm->cutoff = cutoff_req;
7565 return bCutoffAllowed;
7568 void change_dd_dlb_cutoff_limit(t_commrec *cr)
7570 gmx_domdec_comm_t *comm;
7572 comm = cr->dd->comm;
7574 /* Turn on the DLB limiting (might have been on already) */
7575 comm->bPMELoadBalDLBLimits = TRUE;
7577 /* Change the cut-off limit */
7578 comm->PMELoadBal_max_cutoff = comm->cutoff;
7581 static void merge_cg_buffers(int ncell,
7582 gmx_domdec_comm_dim_t *cd, int pulse,
7584 int *index_gl, int *recv_i,
7585 rvec *cg_cm, rvec *recv_vr,
7587 cginfo_mb_t *cginfo_mb, int *cginfo)
7589 gmx_domdec_ind_t *ind, *ind_p;
7590 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7591 int shift, shift_at;
7593 ind = &cd->ind[pulse];
7595 /* First correct the already stored data */
7596 shift = ind->nrecv[ncell];
7597 for (cell = ncell-1; cell >= 0; cell--)
7599 shift -= ind->nrecv[cell];
7602 /* Move the cg's present from previous grid pulses */
7603 cg0 = ncg_cell[ncell+cell];
7604 cg1 = ncg_cell[ncell+cell+1];
7605 cgindex[cg1+shift] = cgindex[cg1];
7606 for (cg = cg1-1; cg >= cg0; cg--)
7608 index_gl[cg+shift] = index_gl[cg];
7609 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7610 cgindex[cg+shift] = cgindex[cg];
7611 cginfo[cg+shift] = cginfo[cg];
7613 /* Correct the already stored send indices for the shift */
7614 for (p = 1; p <= pulse; p++)
7616 ind_p = &cd->ind[p];
7618 for (c = 0; c < cell; c++)
7620 cg0 += ind_p->nsend[c];
7622 cg1 = cg0 + ind_p->nsend[cell];
7623 for (cg = cg0; cg < cg1; cg++)
7625 ind_p->index[cg] += shift;
7631 /* Merge in the communicated buffers */
7635 for (cell = 0; cell < ncell; cell++)
7637 cg1 = ncg_cell[ncell+cell+1] + shift;
7640 /* Correct the old cg indices */
7641 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7643 cgindex[cg+1] += shift_at;
7646 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7648 /* Copy this charge group from the buffer */
7649 index_gl[cg1] = recv_i[cg0];
7650 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7651 /* Add it to the cgindex */
7652 cg_gl = index_gl[cg1];
7653 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7654 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7655 cgindex[cg1+1] = cgindex[cg1] + nat;
7660 shift += ind->nrecv[cell];
7661 ncg_cell[ncell+cell+1] = cg1;
7665 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7666 int nzone, int cg0, const int *cgindex)
7670 /* Store the atom block boundaries for easy copying of communication buffers
7673 for (zone = 0; zone < nzone; zone++)
7675 for (p = 0; p < cd->np; p++)
7677 cd->ind[p].cell2at0[zone] = cgindex[cg];
7678 cg += cd->ind[p].nrecv[zone];
7679 cd->ind[p].cell2at1[zone] = cgindex[cg];
7684 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7690 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7692 if (!bLocalCG[link->a[i]])
7701 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7703 real c[DIM][4]; /* the corners for the non-bonded communication */
7704 real cr0; /* corner for rounding */
7705 real cr1[4]; /* corners for rounding */
7706 real bc[DIM]; /* corners for bounded communication */
7707 real bcr1; /* corner for rounding for bonded communication */
7710 /* Determine the corners of the domain(s) we are communicating with */
7712 set_dd_corners(const gmx_domdec_t *dd,
7713 int dim0, int dim1, int dim2,
7717 const gmx_domdec_comm_t *comm;
7718 const gmx_domdec_zones_t *zones;
7723 zones = &comm->zones;
7725 /* Keep the compiler happy */
7729 /* The first dimension is equal for all cells */
7730 c->c[0][0] = comm->cell_x0[dim0];
7733 c->bc[0] = c->c[0][0];
7738 /* This cell row is only seen from the first row */
7739 c->c[1][0] = comm->cell_x0[dim1];
7740 /* All rows can see this row */
7741 c->c[1][1] = comm->cell_x0[dim1];
7744 c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7747 /* For the multi-body distance we need the maximum */
7748 c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7751 /* Set the upper-right corner for rounding */
7752 c->cr0 = comm->cell_x1[dim0];
7757 for (j = 0; j < 4; j++)
7759 c->c[2][j] = comm->cell_x0[dim2];
7763 /* Use the maximum of the i-cells that see a j-cell */
7764 for (i = 0; i < zones->nizone; i++)
7766 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7772 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7778 /* For the multi-body distance we need the maximum */
7779 c->bc[2] = comm->cell_x0[dim2];
7780 for (i = 0; i < 2; i++)
7782 for (j = 0; j < 2; j++)
7784 c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0);
7790 /* Set the upper-right corner for rounding */
7791 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7792 * Only cell (0,0,0) can see cell 7 (1,1,1)
7794 c->cr1[0] = comm->cell_x1[dim1];
7795 c->cr1[3] = comm->cell_x1[dim1];
7798 c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7801 /* For the multi-body distance we need the maximum */
7802 c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7809 /* Determine which cg's we need to send in this pulse from this zone */
7811 get_zone_pulse_cgs(gmx_domdec_t *dd,
7812 int zonei, int zone,
7814 const int *index_gl,
7816 int dim, int dim_ind,
7817 int dim0, int dim1, int dim2,
7818 real r_comm2, real r_bcomm2,
7822 real skew_fac2_d, real skew_fac_01,
7823 rvec *v_d, rvec *v_0, rvec *v_1,
7824 const dd_corners_t *c,
7826 gmx_bool bDistBonded,
7832 gmx_domdec_ind_t *ind,
7833 int **ibuf, int *ibuf_nalloc,
7839 gmx_domdec_comm_t *comm;
7841 gmx_bool bDistMB_pulse;
7843 real r2, rb2, r, tric_sh;
7846 int nsend_z, nsend, nat;
7850 bScrew = (dd->bScrewPBC && dim == XX);
7852 bDistMB_pulse = (bDistMB && bDistBonded);
7858 for (cg = cg0; cg < cg1; cg++)
7862 if (tric_dist[dim_ind] == 0)
7864 /* Rectangular direction, easy */
7865 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7872 r = cg_cm[cg][dim] - c->bc[dim_ind];
7878 /* Rounding gives at most a 16% reduction
7879 * in communicated atoms
7881 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7883 r = cg_cm[cg][dim0] - c->cr0;
7884 /* This is the first dimension, so always r >= 0 */
7891 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7893 r = cg_cm[cg][dim1] - c->cr1[zone];
7900 r = cg_cm[cg][dim1] - c->bcr1;
7910 /* Triclinic direction, more complicated */
7913 /* Rounding, conservative as the skew_fac multiplication
7914 * will slightly underestimate the distance.
7916 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7918 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7919 for (i = dim0+1; i < DIM; i++)
7921 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7923 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7926 rb[dim0] = rn[dim0];
7929 /* Take care that the cell planes along dim0 might not
7930 * be orthogonal to those along dim1 and dim2.
7932 for (i = 1; i <= dim_ind; i++)
7935 if (normal[dim0][dimd] > 0)
7937 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7940 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7945 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7947 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7949 for (i = dim1+1; i < DIM; i++)
7951 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7953 rn[dim1] += tric_sh;
7956 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7957 /* Take care of coupling of the distances
7958 * to the planes along dim0 and dim1 through dim2.
7960 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7961 /* Take care that the cell planes along dim1
7962 * might not be orthogonal to that along dim2.
7964 if (normal[dim1][dim2] > 0)
7966 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7972 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7975 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7976 /* Take care of coupling of the distances
7977 * to the planes along dim0 and dim1 through dim2.
7979 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7980 /* Take care that the cell planes along dim1
7981 * might not be orthogonal to that along dim2.
7983 if (normal[dim1][dim2] > 0)
7985 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7990 /* The distance along the communication direction */
7991 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
7993 for (i = dim+1; i < DIM; i++)
7995 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
8000 r2 += rn[dim]*rn[dim]*skew_fac2_d;
8001 /* Take care of coupling of the distances
8002 * to the planes along dim0 and dim1 through dim2.
8004 if (dim_ind == 1 && zonei == 1)
8006 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
8012 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
8015 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
8016 /* Take care of coupling of the distances
8017 * to the planes along dim0 and dim1 through dim2.
8019 if (dim_ind == 1 && zonei == 1)
8021 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
8029 ((bDistMB && rb2 < r_bcomm2) ||
8030 (bDist2B && r2 < r_bcomm2)) &&
8032 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
8033 missing_link(comm->cglink, index_gl[cg],
8036 /* Make an index to the local charge groups */
8037 if (nsend+1 > ind->nalloc)
8039 ind->nalloc = over_alloc_large(nsend+1);
8040 srenew(ind->index, ind->nalloc);
8042 if (nsend+1 > *ibuf_nalloc)
8044 *ibuf_nalloc = over_alloc_large(nsend+1);
8045 srenew(*ibuf, *ibuf_nalloc);
8047 ind->index[nsend] = cg;
8048 (*ibuf)[nsend] = index_gl[cg];
8050 vec_rvec_check_alloc(vbuf, nsend+1);
8052 if (dd->ci[dim] == 0)
8054 /* Correct cg_cm for pbc */
8055 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
8058 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
8059 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
8064 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
8067 nat += cgindex[cg+1] - cgindex[cg];
8073 *nsend_z_ptr = nsend_z;
8076 static void setup_dd_communication(gmx_domdec_t *dd,
8077 matrix box, gmx_ddbox_t *ddbox,
8078 t_forcerec *fr, t_state *state, rvec **f)
8080 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
8081 int nzone, nzone_send, zone, zonei, cg0, cg1;
8082 int c, i, j, cg, cg_gl, nrcg;
8083 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
8084 gmx_domdec_comm_t *comm;
8085 gmx_domdec_zones_t *zones;
8086 gmx_domdec_comm_dim_t *cd;
8087 gmx_domdec_ind_t *ind;
8088 cginfo_mb_t *cginfo_mb;
8089 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
8090 real r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
8091 dd_corners_t corners;
8093 rvec *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
8094 real skew_fac2_d, skew_fac_01;
8101 fprintf(debug, "Setting up DD communication\n");
8106 switch (fr->cutoff_scheme)
8115 gmx_incons("unimplemented");
8119 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8121 dim = dd->dim[dim_ind];
8123 /* Check if we need to use triclinic distances */
8124 tric_dist[dim_ind] = 0;
8125 for (i = 0; i <= dim_ind; i++)
8127 if (ddbox->tric_dir[dd->dim[i]])
8129 tric_dist[dim_ind] = 1;
8134 bBondComm = comm->bBondComm;
8136 /* Do we need to determine extra distances for multi-body bondeds? */
8137 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8139 /* Do we need to determine extra distances for only two-body bondeds? */
8140 bDist2B = (bBondComm && !bDistMB);
8142 r_comm2 = sqr(comm->cutoff);
8143 r_bcomm2 = sqr(comm->cutoff_mbody);
8147 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, sqrt(r_bcomm2));
8150 zones = &comm->zones;
8153 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8154 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8156 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8158 /* Triclinic stuff */
8159 normal = ddbox->normal;
8163 v_0 = ddbox->v[dim0];
8164 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8166 /* Determine the coupling coefficient for the distances
8167 * to the cell planes along dim0 and dim1 through dim2.
8168 * This is required for correct rounding.
8171 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8174 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8180 v_1 = ddbox->v[dim1];
8183 zone_cg_range = zones->cg_range;
8184 index_gl = dd->index_gl;
8185 cgindex = dd->cgindex;
8186 cginfo_mb = fr->cginfo_mb;
8188 zone_cg_range[0] = 0;
8189 zone_cg_range[1] = dd->ncg_home;
8190 comm->zone_ncg1[0] = dd->ncg_home;
8191 pos_cg = dd->ncg_home;
8193 nat_tot = dd->nat_home;
8195 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8197 dim = dd->dim[dim_ind];
8198 cd = &comm->cd[dim_ind];
8200 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8202 /* No pbc in this dimension, the first node should not comm. */
8210 v_d = ddbox->v[dim];
8211 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
8213 cd->bInPlace = TRUE;
8214 for (p = 0; p < cd->np; p++)
8216 /* Only atoms communicated in the first pulse are used
8217 * for multi-body bonded interactions or for bBondComm.
8219 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8224 for (zone = 0; zone < nzone_send; zone++)
8226 if (tric_dist[dim_ind] && dim_ind > 0)
8228 /* Determine slightly more optimized skew_fac's
8230 * This reduces the number of communicated atoms
8231 * by about 10% for 3D DD of rhombic dodecahedra.
8233 for (dimd = 0; dimd < dim; dimd++)
8235 sf2_round[dimd] = 1;
8236 if (ddbox->tric_dir[dimd])
8238 for (i = dd->dim[dimd]+1; i < DIM; i++)
8240 /* If we are shifted in dimension i
8241 * and the cell plane is tilted forward
8242 * in dimension i, skip this coupling.
8244 if (!(zones->shift[nzone+zone][i] &&
8245 ddbox->v[dimd][i][dimd] >= 0))
8248 sqr(ddbox->v[dimd][i][dimd]);
8251 sf2_round[dimd] = 1/sf2_round[dimd];
8256 zonei = zone_perm[dim_ind][zone];
8259 /* Here we permutate the zones to obtain a convenient order
8260 * for neighbor searching
8262 cg0 = zone_cg_range[zonei];
8263 cg1 = zone_cg_range[zonei+1];
8267 /* Look only at the cg's received in the previous grid pulse
8269 cg1 = zone_cg_range[nzone+zone+1];
8270 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8273 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8274 for (th = 0; th < comm->nth; th++)
8276 gmx_domdec_ind_t *ind_p;
8277 int **ibuf_p, *ibuf_nalloc_p;
8279 int *nsend_p, *nat_p;
8285 /* Thread 0 writes in the comm buffers */
8287 ibuf_p = &comm->buf_int;
8288 ibuf_nalloc_p = &comm->nalloc_int;
8289 vbuf_p = &comm->vbuf;
8292 nsend_zone_p = &ind->nsend[zone];
8296 /* Other threads write into temp buffers */
8297 ind_p = &comm->dth[th].ind;
8298 ibuf_p = &comm->dth[th].ibuf;
8299 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8300 vbuf_p = &comm->dth[th].vbuf;
8301 nsend_p = &comm->dth[th].nsend;
8302 nat_p = &comm->dth[th].nat;
8303 nsend_zone_p = &comm->dth[th].nsend_zone;
8305 comm->dth[th].nsend = 0;
8306 comm->dth[th].nat = 0;
8307 comm->dth[th].nsend_zone = 0;
8317 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8318 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8321 /* Get the cg's for this pulse in this zone */
8322 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8324 dim, dim_ind, dim0, dim1, dim2,
8327 normal, skew_fac2_d, skew_fac_01,
8328 v_d, v_0, v_1, &corners, sf2_round,
8329 bDistBonded, bBondComm,
8333 ibuf_p, ibuf_nalloc_p,
8339 /* Append data of threads>=1 to the communication buffers */
8340 for (th = 1; th < comm->nth; th++)
8342 dd_comm_setup_work_t *dth;
8345 dth = &comm->dth[th];
8347 ns1 = nsend + dth->nsend_zone;
8348 if (ns1 > ind->nalloc)
8350 ind->nalloc = over_alloc_dd(ns1);
8351 srenew(ind->index, ind->nalloc);
8353 if (ns1 > comm->nalloc_int)
8355 comm->nalloc_int = over_alloc_dd(ns1);
8356 srenew(comm->buf_int, comm->nalloc_int);
8358 if (ns1 > comm->vbuf.nalloc)
8360 comm->vbuf.nalloc = over_alloc_dd(ns1);
8361 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8364 for (i = 0; i < dth->nsend_zone; i++)
8366 ind->index[nsend] = dth->ind.index[i];
8367 comm->buf_int[nsend] = dth->ibuf[i];
8368 copy_rvec(dth->vbuf.v[i],
8369 comm->vbuf.v[nsend]);
8373 ind->nsend[zone] += dth->nsend_zone;
8376 /* Clear the counts in case we do not have pbc */
8377 for (zone = nzone_send; zone < nzone; zone++)
8379 ind->nsend[zone] = 0;
8381 ind->nsend[nzone] = nsend;
8382 ind->nsend[nzone+1] = nat;
8383 /* Communicate the number of cg's and atoms to receive */
8384 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8385 ind->nsend, nzone+2,
8386 ind->nrecv, nzone+2);
8388 /* The rvec buffer is also required for atom buffers of size nsend
8389 * in dd_move_x and dd_move_f.
8391 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8395 /* We can receive in place if only the last zone is not empty */
8396 for (zone = 0; zone < nzone-1; zone++)
8398 if (ind->nrecv[zone] > 0)
8400 cd->bInPlace = FALSE;
8405 /* The int buffer is only required here for the cg indices */
8406 if (ind->nrecv[nzone] > comm->nalloc_int2)
8408 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8409 srenew(comm->buf_int2, comm->nalloc_int2);
8411 /* The rvec buffer is also required for atom buffers
8412 * of size nrecv in dd_move_x and dd_move_f.
8414 i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8415 vec_rvec_check_alloc(&comm->vbuf2, i);
8419 /* Make space for the global cg indices */
8420 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8421 || dd->cg_nalloc == 0)
8423 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8424 srenew(index_gl, dd->cg_nalloc);
8425 srenew(cgindex, dd->cg_nalloc+1);
8427 /* Communicate the global cg indices */
8430 recv_i = index_gl + pos_cg;
8434 recv_i = comm->buf_int2;
8436 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8437 comm->buf_int, nsend,
8438 recv_i, ind->nrecv[nzone]);
8440 /* Make space for cg_cm */
8441 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8442 if (fr->cutoff_scheme == ecutsGROUP)
8450 /* Communicate cg_cm */
8453 recv_vr = cg_cm + pos_cg;
8457 recv_vr = comm->vbuf2.v;
8459 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8460 comm->vbuf.v, nsend,
8461 recv_vr, ind->nrecv[nzone]);
8463 /* Make the charge group index */
8466 zone = (p == 0 ? 0 : nzone - 1);
8467 while (zone < nzone)
8469 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8471 cg_gl = index_gl[pos_cg];
8472 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8473 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8474 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8477 /* Update the charge group presence,
8478 * so we can use it in the next pass of the loop.
8480 comm->bLocalCG[cg_gl] = TRUE;
8486 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8489 zone_cg_range[nzone+zone] = pos_cg;
8494 /* This part of the code is never executed with bBondComm. */
8495 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8496 index_gl, recv_i, cg_cm, recv_vr,
8497 cgindex, fr->cginfo_mb, fr->cginfo);
8498 pos_cg += ind->nrecv[nzone];
8500 nat_tot += ind->nrecv[nzone+1];
8504 /* Store the atom block for easy copying of communication buffers */
8505 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8509 dd->index_gl = index_gl;
8510 dd->cgindex = cgindex;
8512 dd->ncg_tot = zone_cg_range[zones->n];
8513 dd->nat_tot = nat_tot;
8514 comm->nat[ddnatHOME] = dd->nat_home;
8515 for (i = ddnatZONE; i < ddnatNR; i++)
8517 comm->nat[i] = dd->nat_tot;
8522 /* We don't need to update cginfo, since that was alrady done above.
8523 * So we pass NULL for the forcerec.
8525 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8526 NULL, comm->bLocalCG);
8531 fprintf(debug, "Finished setting up DD communication, zones:");
8532 for (c = 0; c < zones->n; c++)
8534 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8536 fprintf(debug, "\n");
8540 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8544 for (c = 0; c < zones->nizone; c++)
8546 zones->izone[c].cg1 = zones->cg_range[c+1];
8547 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8548 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8552 static void set_zones_size(gmx_domdec_t *dd,
8553 matrix box, const gmx_ddbox_t *ddbox,
8554 int zone_start, int zone_end)
8556 gmx_domdec_comm_t *comm;
8557 gmx_domdec_zones_t *zones;
8559 int z, zi, zj0, zj1, d, dim;
8562 real size_j, add_tric;
8567 zones = &comm->zones;
8569 /* Do we need to determine extra distances for multi-body bondeds? */
8570 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8572 for (z = zone_start; z < zone_end; z++)
8574 /* Copy cell limits to zone limits.
8575 * Valid for non-DD dims and non-shifted dims.
8577 copy_rvec(comm->cell_x0, zones->size[z].x0);
8578 copy_rvec(comm->cell_x1, zones->size[z].x1);
8581 for (d = 0; d < dd->ndim; d++)
8585 for (z = 0; z < zones->n; z++)
8587 /* With a staggered grid we have different sizes
8588 * for non-shifted dimensions.
8590 if (dd->bGridJump && zones->shift[z][dim] == 0)
8594 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8595 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8599 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8600 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8606 rcmbs = comm->cutoff_mbody;
8607 if (ddbox->tric_dir[dim])
8609 rcs /= ddbox->skew_fac[dim];
8610 rcmbs /= ddbox->skew_fac[dim];
8613 /* Set the lower limit for the shifted zone dimensions */
8614 for (z = zone_start; z < zone_end; z++)
8616 if (zones->shift[z][dim] > 0)
8619 if (!dd->bGridJump || d == 0)
8621 zones->size[z].x0[dim] = comm->cell_x1[dim];
8622 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8626 /* Here we take the lower limit of the zone from
8627 * the lowest domain of the zone below.
8631 zones->size[z].x0[dim] =
8632 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8638 zones->size[z].x0[dim] =
8639 zones->size[zone_perm[2][z-4]].x0[dim];
8643 zones->size[z].x0[dim] =
8644 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8647 /* A temporary limit, is updated below */
8648 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8652 for (zi = 0; zi < zones->nizone; zi++)
8654 if (zones->shift[zi][dim] == 0)
8656 /* This takes the whole zone into account.
8657 * With multiple pulses this will lead
8658 * to a larger zone then strictly necessary.
8660 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8661 zones->size[zi].x1[dim]+rcmbs);
8669 /* Loop over the i-zones to set the upper limit of each
8672 for (zi = 0; zi < zones->nizone; zi++)
8674 if (zones->shift[zi][dim] == 0)
8676 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8678 if (zones->shift[z][dim] > 0)
8680 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8681 zones->size[zi].x1[dim]+rcs);
8688 for (z = zone_start; z < zone_end; z++)
8690 /* Initialization only required to keep the compiler happy */
8691 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8694 /* To determine the bounding box for a zone we need to find
8695 * the extreme corners of 4, 2 or 1 corners.
8697 nc = 1 << (ddbox->npbcdim - 1);
8699 for (c = 0; c < nc; c++)
8701 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8705 corner[YY] = zones->size[z].x0[YY];
8709 corner[YY] = zones->size[z].x1[YY];
8713 corner[ZZ] = zones->size[z].x0[ZZ];
8717 corner[ZZ] = zones->size[z].x1[ZZ];
8719 if (dd->ndim == 1 && box[ZZ][YY] != 0)
8721 /* With 1D domain decomposition the cg's are not in
8722 * the triclinic box, but triclinic x-y and rectangular y-z.
8723 * Shift y back, so it will later end up at 0.
8725 corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
8727 /* Apply the triclinic couplings */
8728 for (i = YY; i < ddbox->npbcdim; i++)
8730 for (j = XX; j < i; j++)
8732 corner[j] += corner[i]*box[i][j]/box[i][i];
8737 copy_rvec(corner, corner_min);
8738 copy_rvec(corner, corner_max);
8742 for (i = 0; i < DIM; i++)
8744 corner_min[i] = min(corner_min[i], corner[i]);
8745 corner_max[i] = max(corner_max[i], corner[i]);
8749 /* Copy the extreme cornes without offset along x */
8750 for (i = 0; i < DIM; i++)
8752 zones->size[z].bb_x0[i] = corner_min[i];
8753 zones->size[z].bb_x1[i] = corner_max[i];
8755 /* Add the offset along x */
8756 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8757 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8760 if (zone_start == 0)
8763 for (dim = 0; dim < DIM; dim++)
8765 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8767 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
8772 for (z = zone_start; z < zone_end; z++)
8774 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8776 zones->size[z].x0[XX], zones->size[z].x1[XX],
8777 zones->size[z].x0[YY], zones->size[z].x1[YY],
8778 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8779 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8781 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8782 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8783 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8788 static int comp_cgsort(const void *a, const void *b)
8792 gmx_cgsort_t *cga, *cgb;
8793 cga = (gmx_cgsort_t *)a;
8794 cgb = (gmx_cgsort_t *)b;
8796 comp = cga->nsc - cgb->nsc;
8799 comp = cga->ind_gl - cgb->ind_gl;
8805 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8810 /* Order the data */
8811 for (i = 0; i < n; i++)
8813 buf[i] = a[sort[i].ind];
8816 /* Copy back to the original array */
8817 for (i = 0; i < n; i++)
8823 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8828 /* Order the data */
8829 for (i = 0; i < n; i++)
8831 copy_rvec(v[sort[i].ind], buf[i]);
8834 /* Copy back to the original array */
8835 for (i = 0; i < n; i++)
8837 copy_rvec(buf[i], v[i]);
8841 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8844 int a, atot, cg, cg0, cg1, i;
8846 if (cgindex == NULL)
8848 /* Avoid the useless loop of the atoms within a cg */
8849 order_vec_cg(ncg, sort, v, buf);
8854 /* Order the data */
8856 for (cg = 0; cg < ncg; cg++)
8858 cg0 = cgindex[sort[cg].ind];
8859 cg1 = cgindex[sort[cg].ind+1];
8860 for (i = cg0; i < cg1; i++)
8862 copy_rvec(v[i], buf[a]);
8868 /* Copy back to the original array */
8869 for (a = 0; a < atot; a++)
8871 copy_rvec(buf[a], v[a]);
8875 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8876 int nsort_new, gmx_cgsort_t *sort_new,
8877 gmx_cgsort_t *sort1)
8881 /* The new indices are not very ordered, so we qsort them */
8882 qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8884 /* sort2 is already ordered, so now we can merge the two arrays */
8888 while (i2 < nsort2 || i_new < nsort_new)
8892 sort1[i1++] = sort_new[i_new++];
8894 else if (i_new == nsort_new)
8896 sort1[i1++] = sort2[i2++];
8898 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8899 (sort2[i2].nsc == sort_new[i_new].nsc &&
8900 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8902 sort1[i1++] = sort2[i2++];
8906 sort1[i1++] = sort_new[i_new++];
8911 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8913 gmx_domdec_sort_t *sort;
8914 gmx_cgsort_t *cgsort, *sort_i;
8915 int ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
8916 int sort_last, sort_skip;
8918 sort = dd->comm->sort;
8920 a = fr->ns.grid->cell_index;
8922 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns.grid->ncells;
8924 if (ncg_home_old >= 0)
8926 /* The charge groups that remained in the same ns grid cell
8927 * are completely ordered. So we can sort efficiently by sorting
8928 * the charge groups that did move into the stationary list.
8933 for (i = 0; i < dd->ncg_home; i++)
8935 /* Check if this cg did not move to another node */
8938 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8940 /* This cg is new on this node or moved ns grid cell */
8941 if (nsort_new >= sort->sort_new_nalloc)
8943 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8944 srenew(sort->sort_new, sort->sort_new_nalloc);
8946 sort_i = &(sort->sort_new[nsort_new++]);
8950 /* This cg did not move */
8951 sort_i = &(sort->sort2[nsort2++]);
8953 /* Sort on the ns grid cell indices
8954 * and the global topology index.
8955 * index_gl is irrelevant with cell ns,
8956 * but we set it here anyhow to avoid a conditional.
8959 sort_i->ind_gl = dd->index_gl[i];
8966 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8969 /* Sort efficiently */
8970 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8975 cgsort = sort->sort;
8977 for (i = 0; i < dd->ncg_home; i++)
8979 /* Sort on the ns grid cell indices
8980 * and the global topology index
8982 cgsort[i].nsc = a[i];
8983 cgsort[i].ind_gl = dd->index_gl[i];
8985 if (cgsort[i].nsc < moved)
8992 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
8994 /* Determine the order of the charge groups using qsort */
8995 qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
9001 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
9004 int ncg_new, i, *a, na;
9006 sort = dd->comm->sort->sort;
9008 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
9011 for (i = 0; i < na; i++)
9015 sort[ncg_new].ind = a[i];
9023 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
9026 gmx_domdec_sort_t *sort;
9027 gmx_cgsort_t *cgsort, *sort_i;
9029 int ncg_new, i, *ibuf, cgsize;
9032 sort = dd->comm->sort;
9034 if (dd->ncg_home > sort->sort_nalloc)
9036 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
9037 srenew(sort->sort, sort->sort_nalloc);
9038 srenew(sort->sort2, sort->sort_nalloc);
9040 cgsort = sort->sort;
9042 switch (fr->cutoff_scheme)
9045 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
9048 ncg_new = dd_sort_order_nbnxn(dd, fr);
9051 gmx_incons("unimplemented");
9055 /* We alloc with the old size, since cgindex is still old */
9056 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9057 vbuf = dd->comm->vbuf.v;
9061 cgindex = dd->cgindex;
9068 /* Remove the charge groups which are no longer at home here */
9069 dd->ncg_home = ncg_new;
9072 fprintf(debug, "Set the new home charge group count to %d\n",
9076 /* Reorder the state */
9077 for (i = 0; i < estNR; i++)
9079 if (EST_DISTR(i) && (state->flags & (1<<i)))
9084 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
9087 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
9090 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->sd_X, vbuf);
9093 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
9097 case estDISRE_INITF:
9098 case estDISRE_RM3TAV:
9099 case estORIRE_INITF:
9101 /* No ordering required */
9104 gmx_incons("Unknown state entry encountered in dd_sort_state");
9109 if (fr->cutoff_scheme == ecutsGROUP)
9112 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9115 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9117 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9118 srenew(sort->ibuf, sort->ibuf_nalloc);
9121 /* Reorder the global cg index */
9122 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9123 /* Reorder the cginfo */
9124 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9125 /* Rebuild the local cg index */
9129 for (i = 0; i < dd->ncg_home; i++)
9131 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9132 ibuf[i+1] = ibuf[i] + cgsize;
9134 for (i = 0; i < dd->ncg_home+1; i++)
9136 dd->cgindex[i] = ibuf[i];
9141 for (i = 0; i < dd->ncg_home+1; i++)
9146 /* Set the home atom number */
9147 dd->nat_home = dd->cgindex[dd->ncg_home];
9149 if (fr->cutoff_scheme == ecutsVERLET)
9151 /* The atoms are now exactly in grid order, update the grid order */
9152 nbnxn_set_atomorder(fr->nbv->nbs);
9156 /* Copy the sorted ns cell indices back to the ns grid struct */
9157 for (i = 0; i < dd->ncg_home; i++)
9159 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
9161 fr->ns.grid->nr = dd->ncg_home;
9165 static void add_dd_statistics(gmx_domdec_t *dd)
9167 gmx_domdec_comm_t *comm;
9172 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9174 comm->sum_nat[ddnat-ddnatZONE] +=
9175 comm->nat[ddnat] - comm->nat[ddnat-1];
9180 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9182 gmx_domdec_comm_t *comm;
9187 /* Reset all the statistics and counters for total run counting */
9188 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9190 comm->sum_nat[ddnat-ddnatZONE] = 0;
9194 comm->load_step = 0;
9197 clear_ivec(comm->load_lim);
9202 void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
9204 gmx_domdec_comm_t *comm;
9208 comm = cr->dd->comm;
9210 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9217 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9219 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9221 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9226 " av. #atoms communicated per step for force: %d x %.1f\n",
9230 if (cr->dd->vsite_comm)
9233 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9234 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9239 if (cr->dd->constraint_comm)
9242 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9243 1 + ir->nLincsIter, av);
9247 gmx_incons(" Unknown type for DD statistics");
9250 fprintf(fplog, "\n");
9252 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9254 print_dd_load_av(fplog, cr->dd);
9258 void dd_partition_system(FILE *fplog,
9259 gmx_large_int_t step,
9261 gmx_bool bMasterState,
9263 t_state *state_global,
9264 gmx_mtop_t *top_global,
9266 t_state *state_local,
9269 gmx_localtop_t *top_local,
9272 gmx_shellfc_t shellfc,
9273 gmx_constr_t constr,
9275 gmx_wallcycle_t wcycle,
9279 gmx_domdec_comm_t *comm;
9280 gmx_ddbox_t ddbox = {0};
9282 gmx_large_int_t step_pcoupl;
9283 rvec cell_ns_x0, cell_ns_x1;
9284 int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9285 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
9286 gmx_bool bRedist, bSortCG, bResortAll;
9287 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9294 bBoxChanged = (bMasterState || DEFORM(*ir));
9295 if (ir->epc != epcNO)
9297 /* With nstpcouple > 1 pressure coupling happens.
9298 * one step after calculating the pressure.
9299 * Box scaling happens at the end of the MD step,
9300 * after the DD partitioning.
9301 * We therefore have to do DLB in the first partitioning
9302 * after an MD step where P-coupling occured.
9303 * We need to determine the last step in which p-coupling occurred.
9304 * MRS -- need to validate this for vv?
9309 step_pcoupl = step - 1;
9313 step_pcoupl = ((step - 1)/n)*n + 1;
9315 if (step_pcoupl >= comm->partition_step)
9321 bNStGlobalComm = (step % nstglobalcomm == 0);
9323 if (!comm->bDynLoadBal)
9329 /* Should we do dynamic load balacing this step?
9330 * Since it requires (possibly expensive) global communication,
9331 * we might want to do DLB less frequently.
9333 if (bBoxChanged || ir->epc != epcNO)
9335 bDoDLB = bBoxChanged;
9339 bDoDLB = bNStGlobalComm;
9343 /* Check if we have recorded loads on the nodes */
9344 if (comm->bRecordLoad && dd_load_count(comm))
9346 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
9348 /* Check if we should use DLB at the second partitioning
9349 * and every 100 partitionings,
9350 * so the extra communication cost is negligible.
9352 n = max(100, nstglobalcomm);
9353 bCheckDLB = (comm->n_load_collect == 0 ||
9354 comm->n_load_have % n == n-1);
9361 /* Print load every nstlog, first and last step to the log file */
9362 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9363 comm->n_load_collect == 0 ||
9365 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9367 /* Avoid extra communication due to verbose screen output
9368 * when nstglobalcomm is set.
9370 if (bDoDLB || bLogLoad || bCheckDLB ||
9371 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9373 get_load_distribution(dd, wcycle);
9378 dd_print_load(fplog, dd, step-1);
9382 dd_print_load_verbose(dd);
9385 comm->n_load_collect++;
9389 /* Since the timings are node dependent, the master decides */
9393 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS);
9396 fprintf(debug, "step %s, imb loss %f\n",
9397 gmx_step_str(step, sbuf),
9398 dd_force_imb_perf_loss(dd));
9401 dd_bcast(dd, sizeof(bTurnOnDLB), &bTurnOnDLB);
9404 turn_on_dlb(fplog, cr, step);
9409 comm->n_load_have++;
9412 cgs_gl = &comm->cgs_gl;
9417 /* Clear the old state */
9418 clear_dd_indices(dd, 0, 0);
9421 set_ddbox(dd, bMasterState, cr, ir, state_global->box,
9422 TRUE, cgs_gl, state_global->x, &ddbox);
9424 get_cg_distribution(fplog, step, dd, cgs_gl,
9425 state_global->box, &ddbox, state_global->x);
9427 dd_distribute_state(dd, cgs_gl,
9428 state_global, state_local, f);
9430 dd_make_local_cgs(dd, &top_local->cgs);
9432 /* Ensure that we have space for the new distribution */
9433 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9435 if (fr->cutoff_scheme == ecutsGROUP)
9437 calc_cgcm(fplog, 0, dd->ncg_home,
9438 &top_local->cgs, state_local->x, fr->cg_cm);
9441 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9443 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9445 else if (state_local->ddp_count != dd->ddp_count)
9447 if (state_local->ddp_count > dd->ddp_count)
9449 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9452 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9454 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9457 /* Clear the old state */
9458 clear_dd_indices(dd, 0, 0);
9460 /* Build the new indices */
9461 rebuild_cgindex(dd, cgs_gl->index, state_local);
9462 make_dd_indices(dd, cgs_gl->index, 0);
9463 ncgindex_set = dd->ncg_home;
9465 if (fr->cutoff_scheme == ecutsGROUP)
9467 /* Redetermine the cg COMs */
9468 calc_cgcm(fplog, 0, dd->ncg_home,
9469 &top_local->cgs, state_local->x, fr->cg_cm);
9472 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9474 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9476 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9477 TRUE, &top_local->cgs, state_local->x, &ddbox);
9479 bRedist = comm->bDynLoadBal;
9483 /* We have the full state, only redistribute the cgs */
9485 /* Clear the non-home indices */
9486 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9489 /* Avoid global communication for dim's without pbc and -gcom */
9490 if (!bNStGlobalComm)
9492 copy_rvec(comm->box0, ddbox.box0 );
9493 copy_rvec(comm->box_size, ddbox.box_size);
9495 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9496 bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
9501 /* For dim's without pbc and -gcom */
9502 copy_rvec(ddbox.box0, comm->box0 );
9503 copy_rvec(ddbox.box_size, comm->box_size);
9505 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9508 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9510 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9513 /* Check if we should sort the charge groups */
9514 if (comm->nstSortCG > 0)
9516 bSortCG = (bMasterState ||
9517 (bRedist && (step % comm->nstSortCG == 0)));
9524 ncg_home_old = dd->ncg_home;
9529 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9531 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9533 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9535 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9538 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9540 &comm->cell_x0, &comm->cell_x1,
9541 dd->ncg_home, fr->cg_cm,
9542 cell_ns_x0, cell_ns_x1, &grid_density);
9546 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9549 switch (fr->cutoff_scheme)
9552 copy_ivec(fr->ns.grid->n, ncells_old);
9553 grid_first(fplog, fr->ns.grid, dd, &ddbox,
9554 state_local->box, cell_ns_x0, cell_ns_x1,
9555 fr->rlistlong, grid_density);
9558 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9561 gmx_incons("unimplemented");
9563 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9564 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9568 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9570 /* Sort the state on charge group position.
9571 * This enables exact restarts from this step.
9572 * It also improves performance by about 15% with larger numbers
9573 * of atoms per node.
9576 /* Fill the ns grid with the home cell,
9577 * so we can sort with the indices.
9579 set_zones_ncg_home(dd);
9581 switch (fr->cutoff_scheme)
9584 set_zones_size(dd, state_local->box, &ddbox, 0, 1);
9586 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9588 comm->zones.size[0].bb_x0,
9589 comm->zones.size[0].bb_x1,
9591 comm->zones.dens_zone0,
9594 ncg_moved, bRedist ? comm->moved : NULL,
9595 fr->nbv->grp[eintLocal].kernel_type,
9596 fr->nbv->grp[eintLocal].nbat);
9598 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9601 fill_grid(&comm->zones, fr->ns.grid, dd->ncg_home,
9602 0, dd->ncg_home, fr->cg_cm);
9604 copy_ivec(fr->ns.grid->n, ncells_new);
9607 gmx_incons("unimplemented");
9610 bResortAll = bMasterState;
9612 /* Check if we can user the old order and ns grid cell indices
9613 * of the charge groups to sort the charge groups efficiently.
9615 if (ncells_new[XX] != ncells_old[XX] ||
9616 ncells_new[YY] != ncells_old[YY] ||
9617 ncells_new[ZZ] != ncells_old[ZZ])
9624 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9625 gmx_step_str(step, sbuf), dd->ncg_home);
9627 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9628 bResortAll ? -1 : ncg_home_old);
9629 /* Rebuild all the indices */
9630 ga2la_clear(dd->ga2la);
9633 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9636 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9638 /* Setup up the communication and communicate the coordinates */
9639 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9641 /* Set the indices */
9642 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9644 /* Set the charge group boundaries for neighbor searching */
9645 set_cg_boundaries(&comm->zones);
9647 if (fr->cutoff_scheme == ecutsVERLET)
9649 set_zones_size(dd, state_local->box, &ddbox,
9650 bSortCG ? 1 : 0, comm->zones.n);
9653 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9656 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9657 -1,state_local->x,state_local->box);
9660 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9662 /* Extract a local topology from the global topology */
9663 for (i = 0; i < dd->ndim; i++)
9665 np[dd->dim[i]] = comm->cd[i].np;
9667 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9668 comm->cellsize_min, np,
9670 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
9671 vsite, top_global, top_local);
9673 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9675 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9677 /* Set up the special atom communication */
9678 n = comm->nat[ddnatZONE];
9679 for (i = ddnatZONE+1; i < ddnatNR; i++)
9684 if (vsite && vsite->n_intercg_vsite)
9686 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9690 if (dd->bInterCGcons || dd->bInterCGsettles)
9692 /* Only for inter-cg constraints we need special code */
9693 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9694 constr, ir->nProjOrder,
9695 top_local->idef.il);
9699 gmx_incons("Unknown special atom type setup");
9704 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9706 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9708 /* Make space for the extra coordinates for virtual site
9709 * or constraint communication.
9711 state_local->natoms = comm->nat[ddnatNR-1];
9712 if (state_local->natoms > state_local->nalloc)
9714 dd_realloc_state(state_local, f, state_local->natoms);
9717 if (fr->bF_NoVirSum)
9719 if (vsite && vsite->n_intercg_vsite)
9721 nat_f_novirsum = comm->nat[ddnatVSITE];
9725 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9727 nat_f_novirsum = dd->nat_tot;
9731 nat_f_novirsum = dd->nat_home;
9740 /* Set the number of atoms required for the force calculation.
9741 * Forces need to be constrained when using a twin-range setup
9742 * or with energy minimization. For simple simulations we could
9743 * avoid some allocation, zeroing and copying, but this is
9744 * probably not worth the complications ande checking.
9746 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9747 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9749 /* We make the all mdatoms up to nat_tot_con.
9750 * We could save some work by only setting invmass
9751 * between nat_tot and nat_tot_con.
9753 /* This call also sets the new number of home particles to dd->nat_home */
9754 atoms2md(top_global, ir,
9755 comm->nat[ddnatCON], dd->gatindex, 0, dd->nat_home, mdatoms);
9757 /* Now we have the charges we can sort the FE interactions */
9758 dd_sort_local_top(dd, mdatoms, top_local);
9762 /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
9763 split_vsites_over_threads(top_local->idef.il, mdatoms, FALSE, vsite);
9768 /* Make the local shell stuff, currently no communication is done */
9769 make_local_shells(cr, mdatoms, shellfc);
9772 if (ir->implicit_solvent)
9774 make_local_gb(cr, fr->born, ir->gb_algorithm);
9777 setup_bonded_threading(fr, &top_local->idef);
9779 if (!(cr->duty & DUTY_PME))
9781 /* Send the charges to our PME only node */
9782 gmx_pme_send_q(cr, mdatoms->nChargePerturbed,
9783 mdatoms->chargeA, mdatoms->chargeB,
9784 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9789 set_constraints(constr, top_local, ir, mdatoms, cr);
9792 if (ir->ePull != epullNO)
9794 /* Update the local pull groups */
9795 dd_make_local_pull_groups(dd, ir->pull, mdatoms);
9800 /* Update the local rotation groups */
9801 dd_make_local_rotation_groups(dd, ir->rot);
9805 add_dd_statistics(dd);
9807 /* Make sure we only count the cycles for this DD partitioning */
9808 clear_dd_cycle_counts(dd);
9810 /* Because the order of the atoms might have changed since
9811 * the last vsite construction, we need to communicate the constructing
9812 * atom coordinates again (for spreading the forces this MD step).
9814 dd_move_x_vsites(dd, state_local->box, state_local->x);
9816 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9818 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9820 dd_move_x(dd, state_local->box, state_local->x);
9821 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9822 -1, state_local->x, state_local->box);
9825 /* Store the partitioning step */
9826 comm->partition_step = step;
9828 /* Increase the DD partitioning counter */
9830 /* The state currently matches this DD partitioning count, store it */
9831 state_local->ddp_count = dd->ddp_count;
9834 /* The DD master node knows the complete cg distribution,
9835 * store the count so we can possibly skip the cg info communication.
9837 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9840 if (comm->DD_debug > 0)
9842 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9843 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9844 "after partitioning");