1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
16 * Gnomes, ROck Monsters And Chili Sauce
30 #include "gmx_fatal.h"
31 #include "gmx_fatal_collective.h"
34 #include "domdec_network.h"
37 #include "chargegroup.h"
44 #include "pull_rotation.h"
48 #include "mtop_util.h"
49 #include "gmx_ga2la.h"
52 #include "nbnxn_search.h"
54 #include "gmx_omp_nthreads.h"
55 #include "gpu_utils.h"
57 #include "gromacs/fileio/futil.h"
58 #include "gromacs/fileio/gmxfio.h"
59 #include "gromacs/fileio/pdbio.h"
60 #include "gromacs/timing/wallcycle.h"
61 #include "gromacs/utility/gmxmpi.h"
63 #define DDRANK(dd, rank) (rank)
64 #define DDMASTERRANK(dd) (dd->masterrank)
66 typedef struct gmx_domdec_master
68 /* The cell boundaries */
70 /* The global charge group division */
71 int *ncg; /* Number of home charge groups for each node */
72 int *index; /* Index of nnodes+1 into cg */
73 int *cg; /* Global charge group index */
74 int *nat; /* Number of home atoms for each node. */
75 int *ibuf; /* Buffer for communication */
76 rvec *vbuf; /* Buffer for state scattering and gathering */
77 } gmx_domdec_master_t;
81 /* The numbers of charge groups to send and receive for each cell
82 * that requires communication, the last entry contains the total
83 * number of atoms that needs to be communicated.
85 int nsend[DD_MAXIZONE+2];
86 int nrecv[DD_MAXIZONE+2];
87 /* The charge groups to send */
90 /* The atom range for non-in-place communication */
91 int cell2at0[DD_MAXIZONE];
92 int cell2at1[DD_MAXIZONE];
97 int np; /* Number of grid pulses in this dimension */
98 int np_dlb; /* For dlb, for use with edlbAUTO */
99 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
101 gmx_bool bInPlace; /* Can we communicate in place? */
102 } gmx_domdec_comm_dim_t;
106 gmx_bool *bCellMin; /* Temp. var.: is this cell size at the limit */
107 real *cell_f; /* State var.: cell boundaries, box relative */
108 real *old_cell_f; /* Temp. var.: old cell size */
109 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
110 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
111 real *bound_min; /* Temp. var.: lower limit for cell boundary */
112 real *bound_max; /* Temp. var.: upper limit for cell boundary */
113 gmx_bool bLimited; /* State var.: is DLB limited in this dim and row */
114 real *buf_ncd; /* Temp. var. */
117 #define DD_NLOAD_MAX 9
119 /* Here floats are accurate enough, since these variables
120 * only influence the load balancing, not the actual MD results.
147 gmx_cgsort_t *sort_new;
159 /* This enum determines the order of the coordinates.
160 * ddnatHOME and ddnatZONE should be first and second,
161 * the others can be ordered as wanted.
164 ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR
168 edlbAUTO, edlbNO, edlbYES, edlbNR
170 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
174 int dim; /* The dimension */
175 gmx_bool dim_match; /* Tells if DD and PME dims match */
176 int nslab; /* The number of PME slabs in this dimension */
177 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
178 int *pp_min; /* The minimum pp node location, size nslab */
179 int *pp_max; /* The maximum pp node location,size nslab */
180 int maxshift; /* The maximum shift for coordinate redistribution in PME */
185 real min0; /* The minimum bottom of this zone */
186 real max1; /* The maximum top of this zone */
187 real min1; /* The minimum top of this zone */
188 real mch0; /* The maximum bottom communicaton height for this zone */
189 real mch1; /* The maximum top communicaton height for this zone */
190 real p1_0; /* The bottom value of the first cell in this zone */
191 real p1_1; /* The top value of the first cell in this zone */
196 gmx_domdec_ind_t ind;
203 } dd_comm_setup_work_t;
205 typedef struct gmx_domdec_comm
207 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
208 * unless stated otherwise.
211 /* The number of decomposition dimensions for PME, 0: no PME */
213 /* The number of nodes doing PME (PP/PME or only PME) */
217 /* The communication setup including the PME only nodes */
218 gmx_bool bCartesianPP_PME;
221 int *pmenodes; /* size npmenodes */
222 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
223 * but with bCartesianPP_PME */
224 gmx_ddpme_t ddpme[2];
226 /* The DD particle-particle nodes only */
227 gmx_bool bCartesianPP;
228 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
230 /* The global charge groups */
233 /* Should we sort the cgs */
235 gmx_domdec_sort_t *sort;
237 /* Are there charge groups? */
240 /* Are there bonded and multi-body interactions between charge groups? */
241 gmx_bool bInterCGBondeds;
242 gmx_bool bInterCGMultiBody;
244 /* Data for the optional bonded interaction atom communication range */
251 /* Are we actually using DLB? */
252 gmx_bool bDynLoadBal;
254 /* Cell sizes for static load balancing, first index cartesian */
257 /* The width of the communicated boundaries */
260 /* The minimum cell size (including triclinic correction) */
262 /* For dlb, for use with edlbAUTO */
263 rvec cellsize_min_dlb;
264 /* The lower limit for the DD cell size with DLB */
266 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
267 gmx_bool bVacDLBNoLimit;
269 /* With PME load balancing we set limits on DLB */
270 gmx_bool bPMELoadBalDLBLimits;
271 /* DLB needs to take into account that we want to allow this maximum
272 * cut-off (for PME load balancing), this could limit cell boundaries.
274 real PMELoadBal_max_cutoff;
276 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
278 /* box0 and box_size are required with dim's without pbc and -gcom */
282 /* The cell boundaries */
286 /* The old location of the cell boundaries, to check cg displacements */
290 /* The communication setup and charge group boundaries for the zones */
291 gmx_domdec_zones_t zones;
293 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
294 * cell boundaries of neighboring cells for dynamic load balancing.
296 gmx_ddzone_t zone_d1[2];
297 gmx_ddzone_t zone_d2[2][2];
299 /* The coordinate/force communication setup and indices */
300 gmx_domdec_comm_dim_t cd[DIM];
301 /* The maximum number of cells to communicate with in one dimension */
304 /* Which cg distribution is stored on the master node */
305 int master_cg_ddp_count;
307 /* The number of cg's received from the direct neighbors */
308 int zone_ncg1[DD_MAXZONE];
310 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
313 /* Array for signalling if atoms have moved to another domain */
317 /* Communication buffer for general use */
321 /* Communication buffer for general use */
324 /* Temporary storage for thread parallel communication setup */
326 dd_comm_setup_work_t *dth;
328 /* Communication buffers only used with multiple grid pulses */
333 /* Communication buffers for local redistribution */
335 int cggl_flag_nalloc[DIM*2];
337 int cgcm_state_nalloc[DIM*2];
339 /* Cell sizes for dynamic load balancing */
340 gmx_domdec_root_t **root;
344 real cell_f_max0[DIM];
345 real cell_f_min1[DIM];
347 /* Stuff for load communication */
348 gmx_bool bRecordLoad;
349 gmx_domdec_load_t *load;
350 int nrank_gpu_shared;
352 MPI_Comm *mpi_comm_load;
353 MPI_Comm mpi_comm_gpu_shared;
356 /* Maximum DLB scaling per load balancing step in percent */
360 float cycl[ddCyclNr];
361 int cycl_n[ddCyclNr];
362 float cycl_max[ddCyclNr];
363 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
367 /* Have often have did we have load measurements */
369 /* Have often have we collected the load measurements */
373 double sum_nat[ddnatNR-ddnatZONE];
383 /* The last partition step */
384 gmx_large_int_t partition_step;
392 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
395 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
396 #define DD_FLAG_NRCG 65535
397 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
398 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
400 /* Zone permutation required to obtain consecutive charge groups
401 * for neighbor searching.
403 static const int zone_perm[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
405 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
406 * components see only j zones with that component 0.
409 /* The DD zone order */
410 static const ivec dd_zo[DD_MAXZONE] =
411 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
416 static const ivec dd_zp3[dd_zp3n] = {{0, 0, 8}, {1, 3, 6}, {2, 5, 6}, {3, 5, 7}};
421 static const ivec dd_zp2[dd_zp2n] = {{0, 0, 4}, {1, 3, 4}};
426 static const ivec dd_zp1[dd_zp1n] = {{0, 0, 2}};
428 /* Factors used to avoid problems due to rounding issues */
429 #define DD_CELL_MARGIN 1.0001
430 #define DD_CELL_MARGIN2 1.00005
431 /* Factor to account for pressure scaling during nstlist steps */
432 #define DD_PRES_SCALE_MARGIN 1.02
434 /* Allowed performance loss before we DLB or warn */
435 #define DD_PERF_LOSS 0.05
437 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
439 /* Use separate MPI send and receive commands
440 * when nnodes <= GMX_DD_NNODES_SENDRECV.
441 * This saves memory (and some copying for small nnodes).
442 * For high parallelization scatter and gather calls are used.
444 #define GMX_DD_NNODES_SENDRECV 4
448 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
450 static void index2xyz(ivec nc,int ind,ivec xyz)
452 xyz[XX] = ind % nc[XX];
453 xyz[YY] = (ind / nc[XX]) % nc[YY];
454 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
458 /* This order is required to minimize the coordinate communication in PME
459 * which uses decomposition in the x direction.
461 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
463 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
465 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
466 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
467 xyz[ZZ] = ind % nc[ZZ];
470 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
475 ddindex = dd_index(dd->nc, c);
476 if (dd->comm->bCartesianPP_PME)
478 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
480 else if (dd->comm->bCartesianPP)
483 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
494 static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox, t_inputrec *ir)
496 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
499 int ddglatnr(gmx_domdec_t *dd, int i)
509 if (i >= dd->comm->nat[ddnatNR-1])
511 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
513 atnr = dd->gatindex[i] + 1;
519 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
521 return &dd->comm->cgs_gl;
524 static void vec_rvec_init(vec_rvec_t *v)
530 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
534 v->nalloc = over_alloc_dd(n);
535 srenew(v->v, v->nalloc);
539 void dd_store_state(gmx_domdec_t *dd, t_state *state)
543 if (state->ddp_count != dd->ddp_count)
545 gmx_incons("The state does not the domain decomposition state");
548 state->ncg_gl = dd->ncg_home;
549 if (state->ncg_gl > state->cg_gl_nalloc)
551 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
552 srenew(state->cg_gl, state->cg_gl_nalloc);
554 for (i = 0; i < state->ncg_gl; i++)
556 state->cg_gl[i] = dd->index_gl[i];
559 state->ddp_count_cg_gl = dd->ddp_count;
562 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
564 return &dd->comm->zones;
567 void dd_get_ns_ranges(gmx_domdec_t *dd, int icg,
568 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
570 gmx_domdec_zones_t *zones;
573 zones = &dd->comm->zones;
576 while (icg >= zones->izone[izone].cg1)
585 else if (izone < zones->nizone)
587 *jcg0 = zones->izone[izone].jcg0;
591 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
592 icg, izone, zones->nizone);
595 *jcg1 = zones->izone[izone].jcg1;
597 for (d = 0; d < dd->ndim; d++)
600 shift0[dim] = zones->izone[izone].shift0[dim];
601 shift1[dim] = zones->izone[izone].shift1[dim];
602 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
604 /* A conservative approach, this can be optimized */
611 int dd_natoms_vsite(gmx_domdec_t *dd)
613 return dd->comm->nat[ddnatVSITE];
616 void dd_get_constraint_range(gmx_domdec_t *dd, int *at_start, int *at_end)
618 *at_start = dd->comm->nat[ddnatCON-1];
619 *at_end = dd->comm->nat[ddnatCON];
622 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
624 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
625 int *index, *cgindex;
626 gmx_domdec_comm_t *comm;
627 gmx_domdec_comm_dim_t *cd;
628 gmx_domdec_ind_t *ind;
629 rvec shift = {0, 0, 0}, *buf, *rbuf;
630 gmx_bool bPBC, bScrew;
634 cgindex = dd->cgindex;
639 nat_tot = dd->nat_home;
640 for (d = 0; d < dd->ndim; d++)
642 bPBC = (dd->ci[dd->dim[d]] == 0);
643 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
646 copy_rvec(box[dd->dim[d]], shift);
649 for (p = 0; p < cd->np; p++)
656 for (i = 0; i < ind->nsend[nzone]; i++)
658 at0 = cgindex[index[i]];
659 at1 = cgindex[index[i]+1];
660 for (j = at0; j < at1; j++)
662 copy_rvec(x[j], buf[n]);
669 for (i = 0; i < ind->nsend[nzone]; i++)
671 at0 = cgindex[index[i]];
672 at1 = cgindex[index[i]+1];
673 for (j = at0; j < at1; j++)
675 /* We need to shift the coordinates */
676 rvec_add(x[j], shift, buf[n]);
683 for (i = 0; i < ind->nsend[nzone]; i++)
685 at0 = cgindex[index[i]];
686 at1 = cgindex[index[i]+1];
687 for (j = at0; j < at1; j++)
690 buf[n][XX] = x[j][XX] + shift[XX];
692 * This operation requires a special shift force
693 * treatment, which is performed in calc_vir.
695 buf[n][YY] = box[YY][YY] - x[j][YY];
696 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
708 rbuf = comm->vbuf2.v;
710 /* Send and receive the coordinates */
711 dd_sendrecv_rvec(dd, d, dddirBackward,
712 buf, ind->nsend[nzone+1],
713 rbuf, ind->nrecv[nzone+1]);
717 for (zone = 0; zone < nzone; zone++)
719 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
721 copy_rvec(rbuf[j], x[i]);
726 nat_tot += ind->nrecv[nzone+1];
732 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
734 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
735 int *index, *cgindex;
736 gmx_domdec_comm_t *comm;
737 gmx_domdec_comm_dim_t *cd;
738 gmx_domdec_ind_t *ind;
742 gmx_bool bPBC, bScrew;
746 cgindex = dd->cgindex;
751 nzone = comm->zones.n/2;
752 nat_tot = dd->nat_tot;
753 for (d = dd->ndim-1; d >= 0; d--)
755 bPBC = (dd->ci[dd->dim[d]] == 0);
756 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
757 if (fshift == NULL && !bScrew)
761 /* Determine which shift vector we need */
767 for (p = cd->np-1; p >= 0; p--)
770 nat_tot -= ind->nrecv[nzone+1];
777 sbuf = comm->vbuf2.v;
779 for (zone = 0; zone < nzone; zone++)
781 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
783 copy_rvec(f[i], sbuf[j]);
788 /* Communicate the forces */
789 dd_sendrecv_rvec(dd, d, dddirForward,
790 sbuf, ind->nrecv[nzone+1],
791 buf, ind->nsend[nzone+1]);
793 /* Add the received forces */
797 for (i = 0; i < ind->nsend[nzone]; i++)
799 at0 = cgindex[index[i]];
800 at1 = cgindex[index[i]+1];
801 for (j = at0; j < at1; j++)
803 rvec_inc(f[j], buf[n]);
810 for (i = 0; i < ind->nsend[nzone]; i++)
812 at0 = cgindex[index[i]];
813 at1 = cgindex[index[i]+1];
814 for (j = at0; j < at1; j++)
816 rvec_inc(f[j], buf[n]);
817 /* Add this force to the shift force */
818 rvec_inc(fshift[is], buf[n]);
825 for (i = 0; i < ind->nsend[nzone]; i++)
827 at0 = cgindex[index[i]];
828 at1 = cgindex[index[i]+1];
829 for (j = at0; j < at1; j++)
831 /* Rotate the force */
832 f[j][XX] += buf[n][XX];
833 f[j][YY] -= buf[n][YY];
834 f[j][ZZ] -= buf[n][ZZ];
837 /* Add this force to the shift force */
838 rvec_inc(fshift[is], buf[n]);
849 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
851 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
852 int *index, *cgindex;
853 gmx_domdec_comm_t *comm;
854 gmx_domdec_comm_dim_t *cd;
855 gmx_domdec_ind_t *ind;
860 cgindex = dd->cgindex;
862 buf = &comm->vbuf.v[0][0];
865 nat_tot = dd->nat_home;
866 for (d = 0; d < dd->ndim; d++)
869 for (p = 0; p < cd->np; p++)
874 for (i = 0; i < ind->nsend[nzone]; i++)
876 at0 = cgindex[index[i]];
877 at1 = cgindex[index[i]+1];
878 for (j = at0; j < at1; j++)
891 rbuf = &comm->vbuf2.v[0][0];
893 /* Send and receive the coordinates */
894 dd_sendrecv_real(dd, d, dddirBackward,
895 buf, ind->nsend[nzone+1],
896 rbuf, ind->nrecv[nzone+1]);
900 for (zone = 0; zone < nzone; zone++)
902 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
909 nat_tot += ind->nrecv[nzone+1];
915 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
917 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
918 int *index, *cgindex;
919 gmx_domdec_comm_t *comm;
920 gmx_domdec_comm_dim_t *cd;
921 gmx_domdec_ind_t *ind;
926 cgindex = dd->cgindex;
928 buf = &comm->vbuf.v[0][0];
931 nzone = comm->zones.n/2;
932 nat_tot = dd->nat_tot;
933 for (d = dd->ndim-1; d >= 0; d--)
936 for (p = cd->np-1; p >= 0; p--)
939 nat_tot -= ind->nrecv[nzone+1];
946 sbuf = &comm->vbuf2.v[0][0];
948 for (zone = 0; zone < nzone; zone++)
950 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
957 /* Communicate the forces */
958 dd_sendrecv_real(dd, d, dddirForward,
959 sbuf, ind->nrecv[nzone+1],
960 buf, ind->nsend[nzone+1]);
962 /* Add the received forces */
964 for (i = 0; i < ind->nsend[nzone]; i++)
966 at0 = cgindex[index[i]];
967 at1 = cgindex[index[i]+1];
968 for (j = at0; j < at1; j++)
979 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
981 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
983 zone->min0, zone->max1,
984 zone->mch0, zone->mch0,
985 zone->p1_0, zone->p1_1);
989 #define DDZONECOMM_MAXZONE 5
990 #define DDZONECOMM_BUFSIZE 3
992 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
993 int ddimind, int direction,
994 gmx_ddzone_t *buf_s, int n_s,
995 gmx_ddzone_t *buf_r, int n_r)
997 #define ZBS DDZONECOMM_BUFSIZE
998 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
999 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
1002 for (i = 0; i < n_s; i++)
1004 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
1005 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
1006 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
1007 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
1008 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
1009 vbuf_s[i*ZBS+1][2] = 0;
1010 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
1011 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
1012 vbuf_s[i*ZBS+2][2] = 0;
1015 dd_sendrecv_rvec(dd, ddimind, direction,
1019 for (i = 0; i < n_r; i++)
1021 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
1022 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
1023 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
1024 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
1025 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
1026 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
1027 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
1033 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
1034 rvec cell_ns_x0, rvec cell_ns_x1)
1036 int d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
1038 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
1039 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
1040 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
1041 rvec extr_s[2], extr_r[2];
1043 real dist_d, c = 0, det;
1044 gmx_domdec_comm_t *comm;
1045 gmx_bool bPBC, bUse;
1049 for (d = 1; d < dd->ndim; d++)
1052 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
1053 zp->min0 = cell_ns_x0[dim];
1054 zp->max1 = cell_ns_x1[dim];
1055 zp->min1 = cell_ns_x1[dim];
1056 zp->mch0 = cell_ns_x0[dim];
1057 zp->mch1 = cell_ns_x1[dim];
1058 zp->p1_0 = cell_ns_x0[dim];
1059 zp->p1_1 = cell_ns_x1[dim];
1062 for (d = dd->ndim-2; d >= 0; d--)
1065 bPBC = (dim < ddbox->npbcdim);
1067 /* Use an rvec to store two reals */
1068 extr_s[d][0] = comm->cell_f0[d+1];
1069 extr_s[d][1] = comm->cell_f1[d+1];
1070 extr_s[d][2] = comm->cell_f1[d+1];
1073 /* Store the extremes in the backward sending buffer,
1074 * so the get updated separately from the forward communication.
1076 for (d1 = d; d1 < dd->ndim-1; d1++)
1078 /* We invert the order to be able to use the same loop for buf_e */
1079 buf_s[pos].min0 = extr_s[d1][1];
1080 buf_s[pos].max1 = extr_s[d1][0];
1081 buf_s[pos].min1 = extr_s[d1][2];
1082 buf_s[pos].mch0 = 0;
1083 buf_s[pos].mch1 = 0;
1084 /* Store the cell corner of the dimension we communicate along */
1085 buf_s[pos].p1_0 = comm->cell_x0[dim];
1086 buf_s[pos].p1_1 = 0;
1090 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1093 if (dd->ndim == 3 && d == 0)
1095 buf_s[pos] = comm->zone_d2[0][1];
1097 buf_s[pos] = comm->zone_d1[0];
1101 /* We only need to communicate the extremes
1102 * in the forward direction
1104 npulse = comm->cd[d].np;
1107 /* Take the minimum to avoid double communication */
1108 npulse_min = min(npulse, dd->nc[dim]-1-npulse);
1112 /* Without PBC we should really not communicate over
1113 * the boundaries, but implementing that complicates
1114 * the communication setup and therefore we simply
1115 * do all communication, but ignore some data.
1117 npulse_min = npulse;
1119 for (p = 0; p < npulse_min; p++)
1121 /* Communicate the extremes forward */
1122 bUse = (bPBC || dd->ci[dim] > 0);
1124 dd_sendrecv_rvec(dd, d, dddirForward,
1125 extr_s+d, dd->ndim-d-1,
1126 extr_r+d, dd->ndim-d-1);
1130 for (d1 = d; d1 < dd->ndim-1; d1++)
1132 extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]);
1133 extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]);
1134 extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]);
1140 for (p = 0; p < npulse; p++)
1142 /* Communicate all the zone information backward */
1143 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1145 dd_sendrecv_ddzone(dd, d, dddirBackward,
1152 for (d1 = d+1; d1 < dd->ndim; d1++)
1154 /* Determine the decrease of maximum required
1155 * communication height along d1 due to the distance along d,
1156 * this avoids a lot of useless atom communication.
1158 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1160 if (ddbox->tric_dir[dim])
1162 /* c is the off-diagonal coupling between the cell planes
1163 * along directions d and d1.
1165 c = ddbox->v[dim][dd->dim[d1]][dim];
1171 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1174 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1178 /* A negative value signals out of range */
1184 /* Accumulate the extremes over all pulses */
1185 for (i = 0; i < buf_size; i++)
1189 buf_e[i] = buf_r[i];
1195 buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0);
1196 buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1);
1197 buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1);
1200 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1208 if (bUse && dh[d1] >= 0)
1210 buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
1211 buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
1214 /* Copy the received buffer to the send buffer,
1215 * to pass the data through with the next pulse.
1217 buf_s[i] = buf_r[i];
1219 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1220 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1222 /* Store the extremes */
1225 for (d1 = d; d1 < dd->ndim-1; d1++)
1227 extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0);
1228 extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1);
1229 extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1);
1233 if (d == 1 || (d == 0 && dd->ndim == 3))
1235 for (i = d; i < 2; i++)
1237 comm->zone_d2[1-d][i] = buf_e[pos];
1243 comm->zone_d1[1] = buf_e[pos];
1253 for (i = 0; i < 2; i++)
1257 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1259 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1260 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1266 for (i = 0; i < 2; i++)
1268 for (j = 0; j < 2; j++)
1272 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1274 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1275 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1279 for (d = 1; d < dd->ndim; d++)
1281 comm->cell_f_max0[d] = extr_s[d-1][0];
1282 comm->cell_f_min1[d] = extr_s[d-1][1];
1285 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1286 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1291 static void dd_collect_cg(gmx_domdec_t *dd,
1292 t_state *state_local)
1294 gmx_domdec_master_t *ma = NULL;
1295 int buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
1298 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1300 /* The master has the correct distribution */
1304 if (state_local->ddp_count == dd->ddp_count)
1306 ncg_home = dd->ncg_home;
1308 nat_home = dd->nat_home;
1310 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1312 cgs_gl = &dd->comm->cgs_gl;
1314 ncg_home = state_local->ncg_gl;
1315 cg = state_local->cg_gl;
1317 for (i = 0; i < ncg_home; i++)
1319 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1324 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1327 buf2[0] = dd->ncg_home;
1328 buf2[1] = dd->nat_home;
1338 /* Collect the charge group and atom counts on the master */
1339 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1344 for (i = 0; i < dd->nnodes; i++)
1346 ma->ncg[i] = ma->ibuf[2*i];
1347 ma->nat[i] = ma->ibuf[2*i+1];
1348 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1351 /* Make byte counts and indices */
1352 for (i = 0; i < dd->nnodes; i++)
1354 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1355 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1359 fprintf(debug, "Initial charge group distribution: ");
1360 for (i = 0; i < dd->nnodes; i++)
1362 fprintf(debug, " %d", ma->ncg[i]);
1364 fprintf(debug, "\n");
1368 /* Collect the charge group indices on the master */
1370 dd->ncg_home*sizeof(int), dd->index_gl,
1371 DDMASTER(dd) ? ma->ibuf : NULL,
1372 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1373 DDMASTER(dd) ? ma->cg : NULL);
1375 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1378 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1381 gmx_domdec_master_t *ma;
1382 int n, i, c, a, nalloc = 0;
1391 MPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1392 dd->rank, dd->mpi_comm_all);
1397 /* Copy the master coordinates to the global array */
1398 cgs_gl = &dd->comm->cgs_gl;
1400 n = DDMASTERRANK(dd);
1402 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1404 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1406 copy_rvec(lv[a++], v[c]);
1410 for (n = 0; n < dd->nnodes; n++)
1414 if (ma->nat[n] > nalloc)
1416 nalloc = over_alloc_dd(ma->nat[n]);
1417 srenew(buf, nalloc);
1420 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1421 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1424 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1426 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1428 copy_rvec(buf[a++], v[c]);
1437 static void get_commbuffer_counts(gmx_domdec_t *dd,
1438 int **counts, int **disps)
1440 gmx_domdec_master_t *ma;
1445 /* Make the rvec count and displacment arrays */
1447 *disps = ma->ibuf + dd->nnodes;
1448 for (n = 0; n < dd->nnodes; n++)
1450 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1451 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1455 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1458 gmx_domdec_master_t *ma;
1459 int *rcounts = NULL, *disps = NULL;
1468 get_commbuffer_counts(dd, &rcounts, &disps);
1473 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv, rcounts, disps, buf);
1477 cgs_gl = &dd->comm->cgs_gl;
1480 for (n = 0; n < dd->nnodes; n++)
1482 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1484 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1486 copy_rvec(buf[a++], v[c]);
1493 void dd_collect_vec(gmx_domdec_t *dd,
1494 t_state *state_local, rvec *lv, rvec *v)
1496 gmx_domdec_master_t *ma;
1497 int n, i, c, a, nalloc = 0;
1500 dd_collect_cg(dd, state_local);
1502 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1504 dd_collect_vec_sendrecv(dd, lv, v);
1508 dd_collect_vec_gatherv(dd, lv, v);
1513 void dd_collect_state(gmx_domdec_t *dd,
1514 t_state *state_local, t_state *state)
1518 nh = state->nhchainlength;
1522 for (i = 0; i < efptNR; i++)
1524 state->lambda[i] = state_local->lambda[i];
1526 state->fep_state = state_local->fep_state;
1527 state->veta = state_local->veta;
1528 state->vol0 = state_local->vol0;
1529 copy_mat(state_local->box, state->box);
1530 copy_mat(state_local->boxv, state->boxv);
1531 copy_mat(state_local->svir_prev, state->svir_prev);
1532 copy_mat(state_local->fvir_prev, state->fvir_prev);
1533 copy_mat(state_local->pres_prev, state->pres_prev);
1535 for (i = 0; i < state_local->ngtc; i++)
1537 for (j = 0; j < nh; j++)
1539 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1540 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1542 state->therm_integral[i] = state_local->therm_integral[i];
1544 for (i = 0; i < state_local->nnhpres; i++)
1546 for (j = 0; j < nh; j++)
1548 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1549 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1553 for (est = 0; est < estNR; est++)
1555 if (EST_DISTR(est) && (state_local->flags & (1<<est)))
1560 dd_collect_vec(dd, state_local, state_local->x, state->x);
1563 dd_collect_vec(dd, state_local, state_local->v, state->v);
1566 dd_collect_vec(dd, state_local, state_local->sd_X, state->sd_X);
1569 dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
1572 if (state->nrngi == 1)
1576 for (i = 0; i < state_local->nrng; i++)
1578 state->ld_rng[i] = state_local->ld_rng[i];
1584 dd_gather(dd, state_local->nrng*sizeof(state->ld_rng[0]),
1585 state_local->ld_rng, state->ld_rng);
1589 if (state->nrngi == 1)
1593 state->ld_rngi[0] = state_local->ld_rngi[0];
1598 dd_gather(dd, sizeof(state->ld_rngi[0]),
1599 state_local->ld_rngi, state->ld_rngi);
1602 case estDISRE_INITF:
1603 case estDISRE_RM3TAV:
1604 case estORIRE_INITF:
1608 gmx_incons("Unknown state entry encountered in dd_collect_state");
1614 static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
1620 fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
1623 state->nalloc = over_alloc_dd(nalloc);
1625 for (est = 0; est < estNR; est++)
1627 if (EST_DISTR(est) && (state->flags & (1<<est)))
1632 srenew(state->x, state->nalloc);
1635 srenew(state->v, state->nalloc);
1638 srenew(state->sd_X, state->nalloc);
1641 srenew(state->cg_p, state->nalloc);
1645 case estDISRE_INITF:
1646 case estDISRE_RM3TAV:
1647 case estORIRE_INITF:
1649 /* No reallocation required */
1652 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1659 srenew(*f, state->nalloc);
1663 static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
1666 if (nalloc > fr->cg_nalloc)
1670 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
1672 fr->cg_nalloc = over_alloc_dd(nalloc);
1673 srenew(fr->cginfo, fr->cg_nalloc);
1674 if (fr->cutoff_scheme == ecutsGROUP)
1676 srenew(fr->cg_cm, fr->cg_nalloc);
1679 if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
1681 /* We don't use charge groups, we use x in state to set up
1682 * the atom communication.
1684 dd_realloc_state(state, f, nalloc);
1688 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1691 gmx_domdec_master_t *ma;
1692 int n, i, c, a, nalloc = 0;
1699 for (n = 0; n < dd->nnodes; n++)
1703 if (ma->nat[n] > nalloc)
1705 nalloc = over_alloc_dd(ma->nat[n]);
1706 srenew(buf, nalloc);
1708 /* Use lv as a temporary buffer */
1710 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1712 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1714 copy_rvec(v[c], buf[a++]);
1717 if (a != ma->nat[n])
1719 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1724 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1725 DDRANK(dd, n), n, dd->mpi_comm_all);
1730 n = DDMASTERRANK(dd);
1732 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1734 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1736 copy_rvec(v[c], lv[a++]);
1743 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1744 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1749 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1752 gmx_domdec_master_t *ma;
1753 int *scounts = NULL, *disps = NULL;
1754 int n, i, c, a, nalloc = 0;
1761 get_commbuffer_counts(dd, &scounts, &disps);
1765 for (n = 0; n < dd->nnodes; n++)
1767 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1769 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1771 copy_rvec(v[c], buf[a++]);
1777 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1780 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
1782 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1784 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1788 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1792 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1795 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1796 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1797 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1799 if (dfhist->nlambda > 0)
1801 int nlam = dfhist->nlambda;
1802 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1803 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1804 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1805 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1806 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1807 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1809 for (i = 0; i<nlam; i++) {
1810 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1811 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1812 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1813 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1814 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1815 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1820 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1821 t_state *state, t_state *state_local,
1826 nh = state->nhchainlength;
1830 for (i = 0; i < efptNR; i++)
1832 state_local->lambda[i] = state->lambda[i];
1834 state_local->fep_state = state->fep_state;
1835 state_local->veta = state->veta;
1836 state_local->vol0 = state->vol0;
1837 copy_mat(state->box, state_local->box);
1838 copy_mat(state->box_rel, state_local->box_rel);
1839 copy_mat(state->boxv, state_local->boxv);
1840 copy_mat(state->svir_prev, state_local->svir_prev);
1841 copy_mat(state->fvir_prev, state_local->fvir_prev);
1842 copy_df_history(&state_local->dfhist,&state->dfhist);
1843 for (i = 0; i < state_local->ngtc; i++)
1845 for (j = 0; j < nh; j++)
1847 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1848 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1850 state_local->therm_integral[i] = state->therm_integral[i];
1852 for (i = 0; i < state_local->nnhpres; i++)
1854 for (j = 0; j < nh; j++)
1856 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1857 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1861 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
1862 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1863 dd_bcast(dd, sizeof(real), &state_local->veta);
1864 dd_bcast(dd, sizeof(real), &state_local->vol0);
1865 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1866 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1867 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1868 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1869 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1870 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
1871 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
1872 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
1873 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
1874 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
1876 /* communicate df_history -- required for restarting from checkpoint */
1877 dd_distribute_dfhist(dd,&state_local->dfhist);
1879 if (dd->nat_home > state_local->nalloc)
1881 dd_realloc_state(state_local, f, dd->nat_home);
1883 for (i = 0; i < estNR; i++)
1885 if (EST_DISTR(i) && (state_local->flags & (1<<i)))
1890 dd_distribute_vec(dd, cgs, state->x, state_local->x);
1893 dd_distribute_vec(dd, cgs, state->v, state_local->v);
1896 dd_distribute_vec(dd, cgs, state->sd_X, state_local->sd_X);
1899 dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
1902 if (state->nrngi == 1)
1905 state_local->nrng*sizeof(state_local->ld_rng[0]),
1906 state->ld_rng, state_local->ld_rng);
1911 state_local->nrng*sizeof(state_local->ld_rng[0]),
1912 state->ld_rng, state_local->ld_rng);
1916 if (state->nrngi == 1)
1918 dd_bcastc(dd, sizeof(state_local->ld_rngi[0]),
1919 state->ld_rngi, state_local->ld_rngi);
1923 dd_scatter(dd, sizeof(state_local->ld_rngi[0]),
1924 state->ld_rngi, state_local->ld_rngi);
1927 case estDISRE_INITF:
1928 case estDISRE_RM3TAV:
1929 case estORIRE_INITF:
1931 /* Not implemented yet */
1934 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1940 static char dim2char(int dim)
1946 case XX: c = 'X'; break;
1947 case YY: c = 'Y'; break;
1948 case ZZ: c = 'Z'; break;
1949 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1955 static void write_dd_grid_pdb(const char *fn, gmx_large_int_t step,
1956 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1958 rvec grid_s[2], *grid_r = NULL, cx, r;
1959 char fname[STRLEN], format[STRLEN], buf[22];
1961 int a, i, d, z, y, x;
1965 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1966 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1970 snew(grid_r, 2*dd->nnodes);
1973 dd_gather(dd, 2*sizeof(rvec), grid_s[0], DDMASTER(dd) ? grid_r[0] : NULL);
1977 for (d = 0; d < DIM; d++)
1979 for (i = 0; i < DIM; i++)
1987 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1989 tric[d][i] = box[i][d]/box[i][i];
1998 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1999 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
2000 out = gmx_fio_fopen(fname, "w");
2001 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2003 for (i = 0; i < dd->nnodes; i++)
2005 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
2006 for (d = 0; d < DIM; d++)
2008 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
2010 for (z = 0; z < 2; z++)
2012 for (y = 0; y < 2; y++)
2014 for (x = 0; x < 2; x++)
2016 cx[XX] = grid_r[i*2+x][XX];
2017 cx[YY] = grid_r[i*2+y][YY];
2018 cx[ZZ] = grid_r[i*2+z][ZZ];
2020 fprintf(out, format, "ATOM", a++, "CA", "GLY", ' ', 1+i,
2021 ' ', 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol);
2025 for (d = 0; d < DIM; d++)
2027 for (x = 0; x < 4; x++)
2031 case 0: y = 1 + i*8 + 2*x; break;
2032 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
2033 case 2: y = 1 + i*8 + x; break;
2035 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
2039 gmx_fio_fclose(out);
2044 void write_dd_pdb(const char *fn, gmx_large_int_t step, const char *title,
2045 gmx_mtop_t *mtop, t_commrec *cr,
2046 int natoms, rvec x[], matrix box)
2048 char fname[STRLEN], format[STRLEN], format4[STRLEN], buf[22];
2050 int i, ii, resnr, c;
2051 char *atomname, *resname;
2058 natoms = dd->comm->nat[ddnatVSITE];
2061 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
2063 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
2064 sprintf(format4, "%s%s\n", get_pdbformat4(), "%6.2f%6.2f");
2066 out = gmx_fio_fopen(fname, "w");
2068 fprintf(out, "TITLE %s\n", title);
2069 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2070 for (i = 0; i < natoms; i++)
2072 ii = dd->gatindex[i];
2073 gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
2074 if (i < dd->comm->nat[ddnatZONE])
2077 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
2083 else if (i < dd->comm->nat[ddnatVSITE])
2085 b = dd->comm->zones.n;
2089 b = dd->comm->zones.n + 1;
2091 fprintf(out, strlen(atomname) < 4 ? format : format4,
2092 "ATOM", (ii+1)%100000,
2093 atomname, resname, ' ', resnr%10000, ' ',
2094 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b);
2096 fprintf(out, "TER\n");
2098 gmx_fio_fclose(out);
2101 real dd_cutoff_mbody(gmx_domdec_t *dd)
2103 gmx_domdec_comm_t *comm;
2110 if (comm->bInterCGBondeds)
2112 if (comm->cutoff_mbody > 0)
2114 r = comm->cutoff_mbody;
2118 /* cutoff_mbody=0 means we do not have DLB */
2119 r = comm->cellsize_min[dd->dim[0]];
2120 for (di = 1; di < dd->ndim; di++)
2122 r = min(r, comm->cellsize_min[dd->dim[di]]);
2124 if (comm->bBondComm)
2126 r = max(r, comm->cutoff_mbody);
2130 r = min(r, comm->cutoff);
2138 real dd_cutoff_twobody(gmx_domdec_t *dd)
2142 r_mb = dd_cutoff_mbody(dd);
2144 return max(dd->comm->cutoff, r_mb);
2148 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd, ivec coord, ivec coord_pme)
2152 nc = dd->nc[dd->comm->cartpmedim];
2153 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2154 copy_ivec(coord, coord_pme);
2155 coord_pme[dd->comm->cartpmedim] =
2156 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2159 static int low_ddindex2pmeindex(int ndd, int npme, int ddindex)
2161 /* Here we assign a PME node to communicate with this DD node
2162 * by assuming that the major index of both is x.
2163 * We add cr->npmenodes/2 to obtain an even distribution.
2165 return (ddindex*npme + npme/2)/ndd;
2168 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
2170 return low_ddindex2pmeindex(dd->nnodes, dd->comm->npmenodes, ddindex);
2173 static int cr_ddindex2pmeindex(const t_commrec *cr, int ddindex)
2175 return low_ddindex2pmeindex(cr->dd->nnodes, cr->npmenodes, ddindex);
2178 static int *dd_pmenodes(t_commrec *cr)
2183 snew(pmenodes, cr->npmenodes);
2185 for (i = 0; i < cr->dd->nnodes; i++)
2187 p0 = cr_ddindex2pmeindex(cr, i);
2188 p1 = cr_ddindex2pmeindex(cr, i+1);
2189 if (i+1 == cr->dd->nnodes || p1 > p0)
2193 fprintf(debug, "pmenode[%d] = %d\n", n, i+1+n);
2195 pmenodes[n] = i + 1 + n;
2203 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
2206 ivec coords, coords_pme, nc;
2211 if (dd->comm->bCartesian) {
2212 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2213 dd_coords2pmecoords(dd,coords,coords_pme);
2214 copy_ivec(dd->ntot,nc);
2215 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2216 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2218 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2220 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2226 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
2231 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
2233 gmx_domdec_comm_t *comm;
2235 int ddindex, nodeid = -1;
2237 comm = cr->dd->comm;
2242 if (comm->bCartesianPP_PME)
2245 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
2250 ddindex = dd_index(cr->dd->nc, coords);
2251 if (comm->bCartesianPP)
2253 nodeid = comm->ddindex2simnodeid[ddindex];
2259 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
2271 static int dd_simnode2pmenode(t_commrec *cr, int sim_nodeid)
2274 gmx_domdec_comm_t *comm;
2275 ivec coord, coord_pme;
2282 /* This assumes a uniform x domain decomposition grid cell size */
2283 if (comm->bCartesianPP_PME)
2286 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
2287 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2289 /* This is a PP node */
2290 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2291 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
2295 else if (comm->bCartesianPP)
2297 if (sim_nodeid < dd->nnodes)
2299 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2304 /* This assumes DD cells with identical x coordinates
2305 * are numbered sequentially.
2307 if (dd->comm->pmenodes == NULL)
2309 if (sim_nodeid < dd->nnodes)
2311 /* The DD index equals the nodeid */
2312 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2318 while (sim_nodeid > dd->comm->pmenodes[i])
2322 if (sim_nodeid < dd->comm->pmenodes[i])
2324 pmenode = dd->comm->pmenodes[i];
2332 void get_pme_nnodes(const gmx_domdec_t *dd,
2333 int *npmenodes_x, int *npmenodes_y)
2337 *npmenodes_x = dd->comm->npmenodes_x;
2338 *npmenodes_y = dd->comm->npmenodes_y;
2347 gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
2349 gmx_bool bPMEOnlyNode;
2351 if (DOMAINDECOMP(cr))
2353 bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
2357 bPMEOnlyNode = FALSE;
2360 return bPMEOnlyNode;
2363 void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
2364 int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
2368 ivec coord, coord_pme;
2372 snew(*my_ddnodes, (dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2375 for (x = 0; x < dd->nc[XX]; x++)
2377 for (y = 0; y < dd->nc[YY]; y++)
2379 for (z = 0; z < dd->nc[ZZ]; z++)
2381 if (dd->comm->bCartesianPP_PME)
2386 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2387 if (dd->ci[XX] == coord_pme[XX] &&
2388 dd->ci[YY] == coord_pme[YY] &&
2389 dd->ci[ZZ] == coord_pme[ZZ])
2391 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2396 /* The slab corresponds to the nodeid in the PME group */
2397 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2399 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2406 /* The last PP-only node is the peer node */
2407 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2411 fprintf(debug, "Receive coordinates from PP nodes:");
2412 for (x = 0; x < *nmy_ddnodes; x++)
2414 fprintf(debug, " %d", (*my_ddnodes)[x]);
2416 fprintf(debug, "\n");
2420 static gmx_bool receive_vir_ener(t_commrec *cr)
2422 gmx_domdec_comm_t *comm;
2423 int pmenode, coords[DIM], rank;
2427 if (cr->npmenodes < cr->dd->nnodes)
2429 comm = cr->dd->comm;
2430 if (comm->bCartesianPP_PME)
2432 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2434 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2435 coords[comm->cartpmedim]++;
2436 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2438 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2439 if (dd_simnode2pmenode(cr, rank) == pmenode)
2441 /* This is not the last PP node for pmenode */
2449 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2450 if (cr->sim_nodeid+1 < cr->nnodes &&
2451 dd_simnode2pmenode(cr, cr->sim_nodeid+1) == pmenode)
2453 /* This is not the last PP node for pmenode */
2462 static void set_zones_ncg_home(gmx_domdec_t *dd)
2464 gmx_domdec_zones_t *zones;
2467 zones = &dd->comm->zones;
2469 zones->cg_range[0] = 0;
2470 for (i = 1; i < zones->n+1; i++)
2472 zones->cg_range[i] = dd->ncg_home;
2474 /* zone_ncg1[0] should always be equal to ncg_home */
2475 dd->comm->zone_ncg1[0] = dd->ncg_home;
2478 static void rebuild_cgindex(gmx_domdec_t *dd,
2479 const int *gcgs_index, t_state *state)
2481 int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
2484 dd_cg_gl = dd->index_gl;
2485 cgindex = dd->cgindex;
2488 for (i = 0; i < state->ncg_gl; i++)
2492 dd_cg_gl[i] = cg_gl;
2493 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2497 dd->ncg_home = state->ncg_gl;
2500 set_zones_ncg_home(dd);
2503 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2505 while (cg >= cginfo_mb->cg_end)
2510 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2513 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2514 t_forcerec *fr, char *bLocalCG)
2516 cginfo_mb_t *cginfo_mb;
2522 cginfo_mb = fr->cginfo_mb;
2523 cginfo = fr->cginfo;
2525 for (cg = cg0; cg < cg1; cg++)
2527 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2531 if (bLocalCG != NULL)
2533 for (cg = cg0; cg < cg1; cg++)
2535 bLocalCG[index_gl[cg]] = TRUE;
2540 static void make_dd_indices(gmx_domdec_t *dd,
2541 const int *gcgs_index, int cg_start)
2543 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2544 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2549 bLocalCG = dd->comm->bLocalCG;
2551 if (dd->nat_tot > dd->gatindex_nalloc)
2553 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2554 srenew(dd->gatindex, dd->gatindex_nalloc);
2557 nzone = dd->comm->zones.n;
2558 zone2cg = dd->comm->zones.cg_range;
2559 zone_ncg1 = dd->comm->zone_ncg1;
2560 index_gl = dd->index_gl;
2561 gatindex = dd->gatindex;
2562 bCGs = dd->comm->bCGs;
2564 if (zone2cg[1] != dd->ncg_home)
2566 gmx_incons("dd->ncg_zone is not up to date");
2569 /* Make the local to global and global to local atom index */
2570 a = dd->cgindex[cg_start];
2571 for (zone = 0; zone < nzone; zone++)
2579 cg0 = zone2cg[zone];
2581 cg1 = zone2cg[zone+1];
2582 cg1_p1 = cg0 + zone_ncg1[zone];
2584 for (cg = cg0; cg < cg1; cg++)
2589 /* Signal that this cg is from more than one pulse away */
2592 cg_gl = index_gl[cg];
2595 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2598 ga2la_set(dd->ga2la, a_gl, a, zone1);
2604 gatindex[a] = cg_gl;
2605 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2612 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2615 int ncg, i, ngl, nerr;
2618 if (bLocalCG == NULL)
2622 for (i = 0; i < dd->ncg_tot; i++)
2624 if (!bLocalCG[dd->index_gl[i]])
2627 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2632 for (i = 0; i < ncg_sys; i++)
2639 if (ngl != dd->ncg_tot)
2641 fprintf(stderr, "DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2648 static void check_index_consistency(gmx_domdec_t *dd,
2649 int natoms_sys, int ncg_sys,
2652 int nerr, ngl, i, a, cell;
2657 if (dd->comm->DD_debug > 1)
2659 snew(have, natoms_sys);
2660 for (a = 0; a < dd->nat_tot; a++)
2662 if (have[dd->gatindex[a]] > 0)
2664 fprintf(stderr, "DD node %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2668 have[dd->gatindex[a]] = a + 1;
2674 snew(have, dd->nat_tot);
2677 for (i = 0; i < natoms_sys; i++)
2679 if (ga2la_get(dd->ga2la, i, &a, &cell))
2681 if (a >= dd->nat_tot)
2683 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2689 if (dd->gatindex[a] != i)
2691 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2698 if (ngl != dd->nat_tot)
2701 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2702 dd->rank, where, ngl, dd->nat_tot);
2704 for (a = 0; a < dd->nat_tot; a++)
2709 "DD node %d, %s: local atom %d, global %d has no global index\n",
2710 dd->rank, where, a+1, dd->gatindex[a]+1);
2715 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2719 gmx_fatal(FARGS, "DD node %d, %s: %d atom/cg index inconsistencies",
2720 dd->rank, where, nerr);
2724 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2731 /* Clear the whole list without searching */
2732 ga2la_clear(dd->ga2la);
2736 for (i = a_start; i < dd->nat_tot; i++)
2738 ga2la_del(dd->ga2la, dd->gatindex[i]);
2742 bLocalCG = dd->comm->bLocalCG;
2745 for (i = cg_start; i < dd->ncg_tot; i++)
2747 bLocalCG[dd->index_gl[i]] = FALSE;
2751 dd_clear_local_vsite_indices(dd);
2753 if (dd->constraints)
2755 dd_clear_local_constraint_indices(dd);
2759 /* This function should be used for moving the domain boudaries during DLB,
2760 * for obtaining the minimum cell size. It checks the initially set limit
2761 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2762 * and, possibly, a longer cut-off limit set for PME load balancing.
2764 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2768 cellsize_min = comm->cellsize_min[dim];
2770 if (!comm->bVacDLBNoLimit)
2772 /* The cut-off might have changed, e.g. by PME load balacning,
2773 * from the value used to set comm->cellsize_min, so check it.
2775 cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2777 if (comm->bPMELoadBalDLBLimits)
2779 /* Check for the cut-off limit set by the PME load balancing */
2780 cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2784 return cellsize_min;
2787 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2790 real grid_jump_limit;
2792 /* The distance between the boundaries of cells at distance
2793 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2794 * and by the fact that cells should not be shifted by more than
2795 * half their size, such that cg's only shift by one cell
2796 * at redecomposition.
2798 grid_jump_limit = comm->cellsize_limit;
2799 if (!comm->bVacDLBNoLimit)
2801 if (comm->bPMELoadBalDLBLimits)
2803 cutoff = max(cutoff, comm->PMELoadBal_max_cutoff);
2805 grid_jump_limit = max(grid_jump_limit,
2806 cutoff/comm->cd[dim_ind].np);
2809 return grid_jump_limit;
2812 static gmx_bool check_grid_jump(gmx_large_int_t step,
2818 gmx_domdec_comm_t *comm;
2827 for (d = 1; d < dd->ndim; d++)
2830 limit = grid_jump_limit(comm, cutoff, d);
2831 bfac = ddbox->box_size[dim];
2832 if (ddbox->tric_dir[dim])
2834 bfac *= ddbox->skew_fac[dim];
2836 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2837 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2845 /* This error should never be triggered under normal
2846 * circumstances, but you never know ...
2848 gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with less nodes might avoid this issue.",
2849 gmx_step_str(step, buf),
2850 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2858 static int dd_load_count(gmx_domdec_comm_t *comm)
2860 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2863 static float dd_force_load(gmx_domdec_comm_t *comm)
2870 if (comm->eFlop > 1)
2872 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2877 load = comm->cycl[ddCyclF];
2878 if (comm->cycl_n[ddCyclF] > 1)
2880 /* Subtract the maximum of the last n cycle counts
2881 * to get rid of possible high counts due to other sources,
2882 * for instance system activity, that would otherwise
2883 * affect the dynamic load balancing.
2885 load -= comm->cycl_max[ddCyclF];
2889 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2891 float gpu_wait, gpu_wait_sum;
2893 gpu_wait = comm->cycl[ddCyclWaitGPU];
2894 if (comm->cycl_n[ddCyclF] > 1)
2896 /* We should remove the WaitGPU time of the same MD step
2897 * as the one with the maximum F time, since the F time
2898 * and the wait time are not independent.
2899 * Furthermore, the step for the max F time should be chosen
2900 * the same on all ranks that share the same GPU.
2901 * But to keep the code simple, we remove the average instead.
2902 * The main reason for artificially long times at some steps
2903 * is spurious CPU activity or MPI time, so we don't expect
2904 * that changes in the GPU wait time matter a lot here.
2906 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2908 /* Sum the wait times over the ranks that share the same GPU */
2909 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2910 comm->mpi_comm_gpu_shared);
2911 /* Replace the wait time by the average over the ranks */
2912 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2920 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2922 gmx_domdec_comm_t *comm;
2927 snew(*dim_f, dd->nc[dim]+1);
2929 for (i = 1; i < dd->nc[dim]; i++)
2931 if (comm->slb_frac[dim])
2933 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2937 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2940 (*dim_f)[dd->nc[dim]] = 1;
2943 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2945 int pmeindex, slab, nso, i;
2948 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2954 ddpme->dim = dimind;
2956 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2958 ddpme->nslab = (ddpme->dim == 0 ?
2959 dd->comm->npmenodes_x :
2960 dd->comm->npmenodes_y);
2962 if (ddpme->nslab <= 1)
2967 nso = dd->comm->npmenodes/ddpme->nslab;
2968 /* Determine for each PME slab the PP location range for dimension dim */
2969 snew(ddpme->pp_min, ddpme->nslab);
2970 snew(ddpme->pp_max, ddpme->nslab);
2971 for (slab = 0; slab < ddpme->nslab; slab++)
2973 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2974 ddpme->pp_max[slab] = 0;
2976 for (i = 0; i < dd->nnodes; i++)
2978 ddindex2xyz(dd->nc, i, xyz);
2979 /* For y only use our y/z slab.
2980 * This assumes that the PME x grid size matches the DD grid size.
2982 if (dimind == 0 || xyz[XX] == dd->ci[XX])
2984 pmeindex = ddindex2pmeindex(dd, i);
2987 slab = pmeindex/nso;
2991 slab = pmeindex % ddpme->nslab;
2993 ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]);
2994 ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]);
2998 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
3001 int dd_pme_maxshift_x(gmx_domdec_t *dd)
3003 if (dd->comm->ddpme[0].dim == XX)
3005 return dd->comm->ddpme[0].maxshift;
3013 int dd_pme_maxshift_y(gmx_domdec_t *dd)
3015 if (dd->comm->ddpme[0].dim == YY)
3017 return dd->comm->ddpme[0].maxshift;
3019 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
3021 return dd->comm->ddpme[1].maxshift;
3029 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
3030 gmx_bool bUniform, gmx_ddbox_t *ddbox, real *cell_f)
3032 gmx_domdec_comm_t *comm;
3035 real range, pme_boundary;
3039 nc = dd->nc[ddpme->dim];
3042 if (!ddpme->dim_match)
3044 /* PP decomposition is not along dim: the worst situation */
3047 else if (ns <= 3 || (bUniform && ns == nc))
3049 /* The optimal situation */
3054 /* We need to check for all pme nodes which nodes they
3055 * could possibly need to communicate with.
3057 xmin = ddpme->pp_min;
3058 xmax = ddpme->pp_max;
3059 /* Allow for atoms to be maximally 2/3 times the cut-off
3060 * out of their DD cell. This is a reasonable balance between
3061 * between performance and support for most charge-group/cut-off
3064 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
3065 /* Avoid extra communication when we are exactly at a boundary */
3069 for (s = 0; s < ns; s++)
3071 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
3072 pme_boundary = (real)s/ns;
3075 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
3077 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
3081 pme_boundary = (real)(s+1)/ns;
3084 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
3086 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
3093 ddpme->maxshift = sh;
3097 fprintf(debug, "PME slab communication range for dim %d is %d\n",
3098 ddpme->dim, ddpme->maxshift);
3102 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3106 for (d = 0; d < dd->ndim; d++)
3109 if (dim < ddbox->nboundeddim &&
3110 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
3111 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
3113 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
3114 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3115 dd->nc[dim], dd->comm->cellsize_limit);
3120 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
3121 gmx_bool bMaster, ivec npulse)
3123 gmx_domdec_comm_t *comm;
3126 real *cell_x, cell_dx, cellsize;
3130 for (d = 0; d < DIM; d++)
3132 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
3134 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
3137 cell_dx = ddbox->box_size[d]/dd->nc[d];
3140 for (j = 0; j < dd->nc[d]+1; j++)
3142 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
3147 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
3148 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
3150 cellsize = cell_dx*ddbox->skew_fac[d];
3151 while (cellsize*npulse[d] < comm->cutoff && npulse[d] < dd->nc[d]-1)
3155 cellsize_min[d] = cellsize;
3159 /* Statically load balanced grid */
3160 /* Also when we are not doing a master distribution we determine
3161 * all cell borders in a loop to obtain identical values
3162 * to the master distribution case and to determine npulse.
3166 cell_x = dd->ma->cell_x[d];
3170 snew(cell_x, dd->nc[d]+1);
3172 cell_x[0] = ddbox->box0[d];
3173 for (j = 0; j < dd->nc[d]; j++)
3175 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
3176 cell_x[j+1] = cell_x[j] + cell_dx;
3177 cellsize = cell_dx*ddbox->skew_fac[d];
3178 while (cellsize*npulse[d] < comm->cutoff &&
3179 npulse[d] < dd->nc[d]-1)
3183 cellsize_min[d] = min(cellsize_min[d], cellsize);
3187 comm->cell_x0[d] = cell_x[dd->ci[d]];
3188 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
3192 /* The following limitation is to avoid that a cell would receive
3193 * some of its own home charge groups back over the periodic boundary.
3194 * Double charge groups cause trouble with the global indices.
3196 if (d < ddbox->npbcdim &&
3197 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
3199 gmx_fatal_collective(FARGS, NULL, dd,
3200 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3201 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
3203 dd->nc[d], dd->nc[d],
3204 dd->nnodes > dd->nc[d] ? "cells" : "processors");
3208 if (!comm->bDynLoadBal)
3210 copy_rvec(cellsize_min, comm->cellsize_min);
3213 for (d = 0; d < comm->npmedecompdim; d++)
3215 set_pme_maxshift(dd, &comm->ddpme[d],
3216 comm->slb_frac[dd->dim[d]] == NULL, ddbox,
3217 comm->ddpme[d].slb_dim_f);
3222 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
3223 int d, int dim, gmx_domdec_root_t *root,
3225 gmx_bool bUniform, gmx_large_int_t step, real cellsize_limit_f, int range[])
3227 gmx_domdec_comm_t *comm;
3228 int ncd, i, j, nmin, nmin_old;
3229 gmx_bool bLimLo, bLimHi;
3231 real fac, halfway, cellsize_limit_f_i, region_size;
3232 gmx_bool bPBC, bLastHi = FALSE;
3233 int nrange[] = {range[0], range[1]};
3235 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
3241 bPBC = (dim < ddbox->npbcdim);
3243 cell_size = root->buf_ncd;
3247 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
3250 /* First we need to check if the scaling does not make cells
3251 * smaller than the smallest allowed size.
3252 * We need to do this iteratively, since if a cell is too small,
3253 * it needs to be enlarged, which makes all the other cells smaller,
3254 * which could in turn make another cell smaller than allowed.
3256 for (i = range[0]; i < range[1]; i++)
3258 root->bCellMin[i] = FALSE;
3264 /* We need the total for normalization */
3266 for (i = range[0]; i < range[1]; i++)
3268 if (root->bCellMin[i] == FALSE)
3270 fac += cell_size[i];
3273 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3274 /* Determine the cell boundaries */
3275 for (i = range[0]; i < range[1]; i++)
3277 if (root->bCellMin[i] == FALSE)
3279 cell_size[i] *= fac;
3280 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3282 cellsize_limit_f_i = 0;
3286 cellsize_limit_f_i = cellsize_limit_f;
3288 if (cell_size[i] < cellsize_limit_f_i)
3290 root->bCellMin[i] = TRUE;
3291 cell_size[i] = cellsize_limit_f_i;
3295 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3298 while (nmin > nmin_old);
3301 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3302 /* For this check we should not use DD_CELL_MARGIN,
3303 * but a slightly smaller factor,
3304 * since rounding could get use below the limit.
3306 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3309 gmx_fatal(FARGS, "Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3310 gmx_step_str(step, buf),
3311 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3312 ncd, comm->cellsize_min[dim]);
3315 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
3319 /* Check if the boundary did not displace more than halfway
3320 * each of the cells it bounds, as this could cause problems,
3321 * especially when the differences between cell sizes are large.
3322 * If changes are applied, they will not make cells smaller
3323 * than the cut-off, as we check all the boundaries which
3324 * might be affected by a change and if the old state was ok,
3325 * the cells will at most be shrunk back to their old size.
3327 for (i = range[0]+1; i < range[1]; i++)
3329 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3330 if (root->cell_f[i] < halfway)
3332 root->cell_f[i] = halfway;
3333 /* Check if the change also causes shifts of the next boundaries */
3334 for (j = i+1; j < range[1]; j++)
3336 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3338 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3342 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3343 if (root->cell_f[i] > halfway)
3345 root->cell_f[i] = halfway;
3346 /* Check if the change also causes shifts of the next boundaries */
3347 for (j = i-1; j >= range[0]+1; j--)
3349 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3351 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3358 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3359 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3360 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3361 * for a and b nrange is used */
3364 /* Take care of the staggering of the cell boundaries */
3367 for (i = range[0]; i < range[1]; i++)
3369 root->cell_f_max0[i] = root->cell_f[i];
3370 root->cell_f_min1[i] = root->cell_f[i+1];
3375 for (i = range[0]+1; i < range[1]; i++)
3377 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3378 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3379 if (bLimLo && bLimHi)
3381 /* Both limits violated, try the best we can */
3382 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3383 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3384 nrange[0] = range[0];
3386 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3389 nrange[1] = range[1];
3390 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3396 /* root->cell_f[i] = root->bound_min[i]; */
3397 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3400 else if (bLimHi && !bLastHi)
3403 if (nrange[1] < range[1]) /* found a LimLo before */
3405 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3406 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3407 nrange[0] = nrange[1];
3409 root->cell_f[i] = root->bound_max[i];
3411 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3413 nrange[1] = range[1];
3416 if (nrange[1] < range[1]) /* found last a LimLo */
3418 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3419 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3420 nrange[0] = nrange[1];
3421 nrange[1] = range[1];
3422 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3424 else if (nrange[0] > range[0]) /* found at least one LimHi */
3426 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3433 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3434 int d, int dim, gmx_domdec_root_t *root,
3435 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3436 gmx_bool bUniform, gmx_large_int_t step)
3438 gmx_domdec_comm_t *comm;
3439 int ncd, d1, i, j, pos;
3441 real load_aver, load_i, imbalance, change, change_max, sc;
3442 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3446 int range[] = { 0, 0 };
3450 /* Convert the maximum change from the input percentage to a fraction */
3451 change_limit = comm->dlb_scale_lim*0.01;
3455 bPBC = (dim < ddbox->npbcdim);
3457 cell_size = root->buf_ncd;
3459 /* Store the original boundaries */
3460 for (i = 0; i < ncd+1; i++)
3462 root->old_cell_f[i] = root->cell_f[i];
3466 for (i = 0; i < ncd; i++)
3468 cell_size[i] = 1.0/ncd;
3471 else if (dd_load_count(comm))
3473 load_aver = comm->load[d].sum_m/ncd;
3475 for (i = 0; i < ncd; i++)
3477 /* Determine the relative imbalance of cell i */
3478 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3479 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3480 /* Determine the change of the cell size using underrelaxation */
3481 change = -relax*imbalance;
3482 change_max = max(change_max, max(change, -change));
3484 /* Limit the amount of scaling.
3485 * We need to use the same rescaling for all cells in one row,
3486 * otherwise the load balancing might not converge.
3489 if (change_max > change_limit)
3491 sc *= change_limit/change_max;
3493 for (i = 0; i < ncd; i++)
3495 /* Determine the relative imbalance of cell i */
3496 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3497 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3498 /* Determine the change of the cell size using underrelaxation */
3499 change = -sc*imbalance;
3500 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3504 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3505 cellsize_limit_f *= DD_CELL_MARGIN;
3506 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3507 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3508 if (ddbox->tric_dir[dim])
3510 cellsize_limit_f /= ddbox->skew_fac[dim];
3511 dist_min_f /= ddbox->skew_fac[dim];
3513 if (bDynamicBox && d > 0)
3515 dist_min_f *= DD_PRES_SCALE_MARGIN;
3517 if (d > 0 && !bUniform)
3519 /* Make sure that the grid is not shifted too much */
3520 for (i = 1; i < ncd; i++)
3522 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3524 gmx_incons("Inconsistent DD boundary staggering limits!");
3526 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3527 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3530 root->bound_min[i] += 0.5*space;
3532 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3533 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3536 root->bound_max[i] += 0.5*space;
3541 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3543 root->cell_f_max0[i-1] + dist_min_f,
3544 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3545 root->cell_f_min1[i] - dist_min_f);
3550 root->cell_f[0] = 0;
3551 root->cell_f[ncd] = 1;
3552 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3555 /* After the checks above, the cells should obey the cut-off
3556 * restrictions, but it does not hurt to check.
3558 for (i = 0; i < ncd; i++)
3562 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3563 dim, i, root->cell_f[i], root->cell_f[i+1]);
3566 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3567 root->cell_f[i+1] - root->cell_f[i] <
3568 cellsize_limit_f/DD_CELL_MARGIN)
3572 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3573 gmx_step_str(step, buf), dim2char(dim), i,
3574 (root->cell_f[i+1] - root->cell_f[i])
3575 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3580 /* Store the cell boundaries of the lower dimensions at the end */
3581 for (d1 = 0; d1 < d; d1++)
3583 root->cell_f[pos++] = comm->cell_f0[d1];
3584 root->cell_f[pos++] = comm->cell_f1[d1];
3587 if (d < comm->npmedecompdim)
3589 /* The master determines the maximum shift for
3590 * the coordinate communication between separate PME nodes.
3592 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3594 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3597 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3601 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3602 gmx_ddbox_t *ddbox, int dimind)
3604 gmx_domdec_comm_t *comm;
3609 /* Set the cell dimensions */
3610 dim = dd->dim[dimind];
3611 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3612 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3613 if (dim >= ddbox->nboundeddim)
3615 comm->cell_x0[dim] += ddbox->box0[dim];
3616 comm->cell_x1[dim] += ddbox->box0[dim];
3620 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3621 int d, int dim, real *cell_f_row,
3624 gmx_domdec_comm_t *comm;
3630 /* Each node would only need to know two fractions,
3631 * but it is probably cheaper to broadcast the whole array.
3633 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3634 0, comm->mpi_comm_load[d]);
3636 /* Copy the fractions for this dimension from the buffer */
3637 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3638 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3639 /* The whole array was communicated, so set the buffer position */
3640 pos = dd->nc[dim] + 1;
3641 for (d1 = 0; d1 <= d; d1++)
3645 /* Copy the cell fractions of the lower dimensions */
3646 comm->cell_f0[d1] = cell_f_row[pos++];
3647 comm->cell_f1[d1] = cell_f_row[pos++];
3649 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3651 /* Convert the communicated shift from float to int */
3652 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3655 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3659 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3660 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3661 gmx_bool bUniform, gmx_large_int_t step)
3663 gmx_domdec_comm_t *comm;
3665 gmx_bool bRowMember, bRowRoot;
3670 for (d = 0; d < dd->ndim; d++)
3675 for (d1 = d; d1 < dd->ndim; d1++)
3677 if (dd->ci[dd->dim[d1]] > 0)
3690 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3691 ddbox, bDynamicBox, bUniform, step);
3692 cell_f_row = comm->root[d]->cell_f;
3696 cell_f_row = comm->cell_f_row;
3698 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3703 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3707 /* This function assumes the box is static and should therefore
3708 * not be called when the box has changed since the last
3709 * call to dd_partition_system.
3711 for (d = 0; d < dd->ndim; d++)
3713 relative_to_absolute_cell_bounds(dd, ddbox, d);
3719 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3720 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3721 gmx_bool bUniform, gmx_bool bDoDLB, gmx_large_int_t step,
3722 gmx_wallcycle_t wcycle)
3724 gmx_domdec_comm_t *comm;
3731 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3732 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3733 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3735 else if (bDynamicBox)
3737 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3740 /* Set the dimensions for which no DD is used */
3741 for (dim = 0; dim < DIM; dim++)
3743 if (dd->nc[dim] == 1)
3745 comm->cell_x0[dim] = 0;
3746 comm->cell_x1[dim] = ddbox->box_size[dim];
3747 if (dim >= ddbox->nboundeddim)
3749 comm->cell_x0[dim] += ddbox->box0[dim];
3750 comm->cell_x1[dim] += ddbox->box0[dim];
3756 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3759 gmx_domdec_comm_dim_t *cd;
3761 for (d = 0; d < dd->ndim; d++)
3763 cd = &dd->comm->cd[d];
3764 np = npulse[dd->dim[d]];
3765 if (np > cd->np_nalloc)
3769 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3770 dim2char(dd->dim[d]), np);
3772 if (DDMASTER(dd) && cd->np_nalloc > 0)
3774 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3776 srenew(cd->ind, np);
3777 for (i = cd->np_nalloc; i < np; i++)
3779 cd->ind[i].index = NULL;
3780 cd->ind[i].nalloc = 0;
3789 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3790 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3791 gmx_bool bUniform, gmx_bool bDoDLB, gmx_large_int_t step,
3792 gmx_wallcycle_t wcycle)
3794 gmx_domdec_comm_t *comm;
3800 /* Copy the old cell boundaries for the cg displacement check */
3801 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3802 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3804 if (comm->bDynLoadBal)
3808 check_box_size(dd, ddbox);
3810 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3814 set_dd_cell_sizes_slb(dd, ddbox, FALSE, npulse);
3815 realloc_comm_ind(dd, npulse);
3820 for (d = 0; d < DIM; d++)
3822 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3823 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3828 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3830 rvec cell_ns_x0, rvec cell_ns_x1,
3831 gmx_large_int_t step)
3833 gmx_domdec_comm_t *comm;
3838 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3840 dim = dd->dim[dim_ind];
3842 /* Without PBC we don't have restrictions on the outer cells */
3843 if (!(dim >= ddbox->npbcdim &&
3844 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3845 comm->bDynLoadBal &&
3846 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3847 comm->cellsize_min[dim])
3850 gmx_fatal(FARGS, "Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3851 gmx_step_str(step, buf), dim2char(dim),
3852 comm->cell_x1[dim] - comm->cell_x0[dim],
3853 ddbox->skew_fac[dim],
3854 dd->comm->cellsize_min[dim],
3855 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3859 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3861 /* Communicate the boundaries and update cell_ns_x0/1 */
3862 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3863 if (dd->bGridJump && dd->ndim > 1)
3865 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3870 static void make_tric_corr_matrix(int npbcdim, matrix box, matrix tcm)
3874 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3882 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3883 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3892 static void check_screw_box(matrix box)
3894 /* Mathematical limitation */
3895 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3897 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3900 /* Limitation due to the asymmetry of the eighth shell method */
3901 if (box[ZZ][YY] != 0)
3903 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3907 static void distribute_cg(FILE *fplog, gmx_large_int_t step,
3908 matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3911 gmx_domdec_master_t *ma;
3912 int **tmp_ind = NULL, *tmp_nalloc = NULL;
3913 int i, icg, j, k, k0, k1, d, npbcdim;
3915 rvec box_size, cg_cm;
3917 real nrcg, inv_ncg, pos_d;
3919 gmx_bool bUnbounded, bScrew;
3923 if (tmp_ind == NULL)
3925 snew(tmp_nalloc, dd->nnodes);
3926 snew(tmp_ind, dd->nnodes);
3927 for (i = 0; i < dd->nnodes; i++)
3929 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3930 snew(tmp_ind[i], tmp_nalloc[i]);
3934 /* Clear the count */
3935 for (i = 0; i < dd->nnodes; i++)
3941 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3943 cgindex = cgs->index;
3945 /* Compute the center of geometry for all charge groups */
3946 for (icg = 0; icg < cgs->nr; icg++)
3949 k1 = cgindex[icg+1];
3953 copy_rvec(pos[k0], cg_cm);
3960 for (k = k0; (k < k1); k++)
3962 rvec_inc(cg_cm, pos[k]);
3964 for (d = 0; (d < DIM); d++)
3966 cg_cm[d] *= inv_ncg;
3969 /* Put the charge group in the box and determine the cell index */
3970 for (d = DIM-1; d >= 0; d--)
3973 if (d < dd->npbcdim)
3975 bScrew = (dd->bScrewPBC && d == XX);
3976 if (tric_dir[d] && dd->nc[d] > 1)
3978 /* Use triclinic coordintates for this dimension */
3979 for (j = d+1; j < DIM; j++)
3981 pos_d += cg_cm[j]*tcm[j][d];
3984 while (pos_d >= box[d][d])
3987 rvec_dec(cg_cm, box[d]);
3990 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3991 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3993 for (k = k0; (k < k1); k++)
3995 rvec_dec(pos[k], box[d]);
3998 pos[k][YY] = box[YY][YY] - pos[k][YY];
3999 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4006 rvec_inc(cg_cm, box[d]);
4009 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
4010 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
4012 for (k = k0; (k < k1); k++)
4014 rvec_inc(pos[k], box[d]);
4017 pos[k][YY] = box[YY][YY] - pos[k][YY];
4018 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4023 /* This could be done more efficiently */
4025 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
4030 i = dd_index(dd->nc, ind);
4031 if (ma->ncg[i] == tmp_nalloc[i])
4033 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
4034 srenew(tmp_ind[i], tmp_nalloc[i]);
4036 tmp_ind[i][ma->ncg[i]] = icg;
4038 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
4042 for (i = 0; i < dd->nnodes; i++)
4045 for (k = 0; k < ma->ncg[i]; k++)
4047 ma->cg[k1++] = tmp_ind[i][k];
4050 ma->index[dd->nnodes] = k1;
4052 for (i = 0; i < dd->nnodes; i++)
4062 fprintf(fplog, "Charge group distribution at step %s:",
4063 gmx_step_str(step, buf));
4064 for (i = 0; i < dd->nnodes; i++)
4066 fprintf(fplog, " %d", ma->ncg[i]);
4068 fprintf(fplog, "\n");
4072 static void get_cg_distribution(FILE *fplog, gmx_large_int_t step, gmx_domdec_t *dd,
4073 t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
4076 gmx_domdec_master_t *ma = NULL;
4079 int *ibuf, buf2[2] = { 0, 0 };
4080 gmx_bool bMaster = DDMASTER(dd);
4087 check_screw_box(box);
4090 set_dd_cell_sizes_slb(dd, ddbox, TRUE, npulse);
4092 distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
4093 for (i = 0; i < dd->nnodes; i++)
4095 ma->ibuf[2*i] = ma->ncg[i];
4096 ma->ibuf[2*i+1] = ma->nat[i];
4104 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
4106 dd->ncg_home = buf2[0];
4107 dd->nat_home = buf2[1];
4108 dd->ncg_tot = dd->ncg_home;
4109 dd->nat_tot = dd->nat_home;
4110 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
4112 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
4113 srenew(dd->index_gl, dd->cg_nalloc);
4114 srenew(dd->cgindex, dd->cg_nalloc+1);
4118 for (i = 0; i < dd->nnodes; i++)
4120 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
4121 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
4126 DDMASTER(dd) ? ma->ibuf : NULL,
4127 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
4128 DDMASTER(dd) ? ma->cg : NULL,
4129 dd->ncg_home*sizeof(int), dd->index_gl);
4131 /* Determine the home charge group sizes */
4133 for (i = 0; i < dd->ncg_home; i++)
4135 cg_gl = dd->index_gl[i];
4137 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
4142 fprintf(debug, "Home charge groups:\n");
4143 for (i = 0; i < dd->ncg_home; i++)
4145 fprintf(debug, " %d", dd->index_gl[i]);
4148 fprintf(debug, "\n");
4151 fprintf(debug, "\n");
4155 static int compact_and_copy_vec_at(int ncg, int *move,
4158 rvec *src, gmx_domdec_comm_t *comm,
4161 int m, icg, i, i0, i1, nrcg;
4167 for (m = 0; m < DIM*2; m++)
4173 for (icg = 0; icg < ncg; icg++)
4175 i1 = cgindex[icg+1];
4181 /* Compact the home array in place */
4182 for (i = i0; i < i1; i++)
4184 copy_rvec(src[i], src[home_pos++]);
4190 /* Copy to the communication buffer */
4192 pos_vec[m] += 1 + vec*nrcg;
4193 for (i = i0; i < i1; i++)
4195 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
4197 pos_vec[m] += (nvec - vec - 1)*nrcg;
4201 home_pos += i1 - i0;
4209 static int compact_and_copy_vec_cg(int ncg, int *move,
4211 int nvec, rvec *src, gmx_domdec_comm_t *comm,
4214 int m, icg, i0, i1, nrcg;
4220 for (m = 0; m < DIM*2; m++)
4226 for (icg = 0; icg < ncg; icg++)
4228 i1 = cgindex[icg+1];
4234 /* Compact the home array in place */
4235 copy_rvec(src[icg], src[home_pos++]);
4241 /* Copy to the communication buffer */
4242 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
4243 pos_vec[m] += 1 + nrcg*nvec;
4255 static int compact_ind(int ncg, int *move,
4256 int *index_gl, int *cgindex,
4258 gmx_ga2la_t ga2la, char *bLocalCG,
4261 int cg, nat, a0, a1, a, a_gl;
4266 for (cg = 0; cg < ncg; cg++)
4272 /* Compact the home arrays in place.
4273 * Anything that can be done here avoids access to global arrays.
4275 cgindex[home_pos] = nat;
4276 for (a = a0; a < a1; a++)
4279 gatindex[nat] = a_gl;
4280 /* The cell number stays 0, so we don't need to set it */
4281 ga2la_change_la(ga2la, a_gl, nat);
4284 index_gl[home_pos] = index_gl[cg];
4285 cginfo[home_pos] = cginfo[cg];
4286 /* The charge group remains local, so bLocalCG does not change */
4291 /* Clear the global indices */
4292 for (a = a0; a < a1; a++)
4294 ga2la_del(ga2la, gatindex[a]);
4298 bLocalCG[index_gl[cg]] = FALSE;
4302 cgindex[home_pos] = nat;
4307 static void clear_and_mark_ind(int ncg, int *move,
4308 int *index_gl, int *cgindex, int *gatindex,
4309 gmx_ga2la_t ga2la, char *bLocalCG,
4314 for (cg = 0; cg < ncg; cg++)
4320 /* Clear the global indices */
4321 for (a = a0; a < a1; a++)
4323 ga2la_del(ga2la, gatindex[a]);
4327 bLocalCG[index_gl[cg]] = FALSE;
4329 /* Signal that this cg has moved using the ns cell index.
4330 * Here we set it to -1. fill_grid will change it
4331 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4333 cell_index[cg] = -1;
4338 static void print_cg_move(FILE *fplog,
4340 gmx_large_int_t step, int cg, int dim, int dir,
4341 gmx_bool bHaveLimitdAndCMOld, real limitd,
4342 rvec cm_old, rvec cm_new, real pos_d)
4344 gmx_domdec_comm_t *comm;
4349 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4350 if (bHaveLimitdAndCMOld)
4352 fprintf(fplog, "The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4353 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4357 fprintf(fplog, "The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4358 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4360 fprintf(fplog, "distance out of cell %f\n",
4361 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4362 if (bHaveLimitdAndCMOld)
4364 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4365 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4367 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4368 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4369 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4371 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4372 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4374 comm->cell_x0[dim], comm->cell_x1[dim]);
4377 static void cg_move_error(FILE *fplog,
4379 gmx_large_int_t step, int cg, int dim, int dir,
4380 gmx_bool bHaveLimitdAndCMOld, real limitd,
4381 rvec cm_old, rvec cm_new, real pos_d)
4385 print_cg_move(fplog, dd, step, cg, dim, dir,
4386 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4388 print_cg_move(stderr, dd, step, cg, dim, dir,
4389 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4391 "A charge group moved too far between two domain decomposition steps\n"
4392 "This usually means that your system is not well equilibrated");
4395 static void rotate_state_atom(t_state *state, int a)
4399 for (est = 0; est < estNR; est++)
4401 if (EST_DISTR(est) && (state->flags & (1<<est)))
4406 /* Rotate the complete state; for a rectangular box only */
4407 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4408 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4411 state->v[a][YY] = -state->v[a][YY];
4412 state->v[a][ZZ] = -state->v[a][ZZ];
4415 state->sd_X[a][YY] = -state->sd_X[a][YY];
4416 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4419 state->cg_p[a][YY] = -state->cg_p[a][YY];
4420 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4422 case estDISRE_INITF:
4423 case estDISRE_RM3TAV:
4424 case estORIRE_INITF:
4426 /* These are distances, so not affected by rotation */
4429 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4435 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4437 if (natoms > comm->moved_nalloc)
4439 /* Contents should be preserved here */
4440 comm->moved_nalloc = over_alloc_dd(natoms);
4441 srenew(comm->moved, comm->moved_nalloc);
4447 static void calc_cg_move(FILE *fplog, gmx_large_int_t step,
4450 ivec tric_dir, matrix tcm,
4451 rvec cell_x0, rvec cell_x1,
4452 rvec limitd, rvec limit0, rvec limit1,
4454 int cg_start, int cg_end,
4459 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4460 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4464 real inv_ncg, pos_d;
4467 npbcdim = dd->npbcdim;
4469 for (cg = cg_start; cg < cg_end; cg++)
4476 copy_rvec(state->x[k0], cm_new);
4483 for (k = k0; (k < k1); k++)
4485 rvec_inc(cm_new, state->x[k]);
4487 for (d = 0; (d < DIM); d++)
4489 cm_new[d] = inv_ncg*cm_new[d];
4494 /* Do pbc and check DD cell boundary crossings */
4495 for (d = DIM-1; d >= 0; d--)
4499 bScrew = (dd->bScrewPBC && d == XX);
4500 /* Determine the location of this cg in lattice coordinates */
4504 for (d2 = d+1; d2 < DIM; d2++)
4506 pos_d += cm_new[d2]*tcm[d2][d];
4509 /* Put the charge group in the triclinic unit-cell */
4510 if (pos_d >= cell_x1[d])
4512 if (pos_d >= limit1[d])
4514 cg_move_error(fplog, dd, step, cg, d, 1, TRUE, limitd[d],
4515 cg_cm[cg], cm_new, pos_d);
4518 if (dd->ci[d] == dd->nc[d] - 1)
4520 rvec_dec(cm_new, state->box[d]);
4523 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4524 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4526 for (k = k0; (k < k1); k++)
4528 rvec_dec(state->x[k], state->box[d]);
4531 rotate_state_atom(state, k);
4536 else if (pos_d < cell_x0[d])
4538 if (pos_d < limit0[d])
4540 cg_move_error(fplog, dd, step, cg, d, -1, TRUE, limitd[d],
4541 cg_cm[cg], cm_new, pos_d);
4546 rvec_inc(cm_new, state->box[d]);
4549 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4550 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4552 for (k = k0; (k < k1); k++)
4554 rvec_inc(state->x[k], state->box[d]);
4557 rotate_state_atom(state, k);
4563 else if (d < npbcdim)
4565 /* Put the charge group in the rectangular unit-cell */
4566 while (cm_new[d] >= state->box[d][d])
4568 rvec_dec(cm_new, state->box[d]);
4569 for (k = k0; (k < k1); k++)
4571 rvec_dec(state->x[k], state->box[d]);
4574 while (cm_new[d] < 0)
4576 rvec_inc(cm_new, state->box[d]);
4577 for (k = k0; (k < k1); k++)
4579 rvec_inc(state->x[k], state->box[d]);
4585 copy_rvec(cm_new, cg_cm[cg]);
4587 /* Determine where this cg should go */
4590 for (d = 0; d < dd->ndim; d++)
4595 flag |= DD_FLAG_FW(d);
4601 else if (dev[dim] == -1)
4603 flag |= DD_FLAG_BW(d);
4606 if (dd->nc[dim] > 2)
4617 /* Temporarily store the flag in move */
4618 move[cg] = mc + flag;
4622 static void dd_redistribute_cg(FILE *fplog, gmx_large_int_t step,
4623 gmx_domdec_t *dd, ivec tric_dir,
4624 t_state *state, rvec **f,
4633 int ncg[DIM*2], nat[DIM*2];
4634 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4635 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4636 int sbuf[2], rbuf[2];
4637 int home_pos_cg, home_pos_at, buf_pos;
4639 gmx_bool bV = FALSE, bSDX = FALSE, bCGP = FALSE;
4642 real inv_ncg, pos_d;
4644 rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
4646 cginfo_mb_t *cginfo_mb;
4647 gmx_domdec_comm_t *comm;
4649 int nthread, thread;
4653 check_screw_box(state->box);
4657 if (fr->cutoff_scheme == ecutsGROUP)
4662 for (i = 0; i < estNR; i++)
4668 case estX: /* Always present */ break;
4669 case estV: bV = (state->flags & (1<<i)); break;
4670 case estSDX: bSDX = (state->flags & (1<<i)); break;
4671 case estCGP: bCGP = (state->flags & (1<<i)); break;
4674 case estDISRE_INITF:
4675 case estDISRE_RM3TAV:
4676 case estORIRE_INITF:
4678 /* No processing required */
4681 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4686 if (dd->ncg_tot > comm->nalloc_int)
4688 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4689 srenew(comm->buf_int, comm->nalloc_int);
4691 move = comm->buf_int;
4693 /* Clear the count */
4694 for (c = 0; c < dd->ndim*2; c++)
4700 npbcdim = dd->npbcdim;
4702 for (d = 0; (d < DIM); d++)
4704 limitd[d] = dd->comm->cellsize_min[d];
4705 if (d >= npbcdim && dd->ci[d] == 0)
4707 cell_x0[d] = -GMX_FLOAT_MAX;
4711 cell_x0[d] = comm->cell_x0[d];
4713 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4715 cell_x1[d] = GMX_FLOAT_MAX;
4719 cell_x1[d] = comm->cell_x1[d];
4723 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4724 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4728 /* We check after communication if a charge group moved
4729 * more than one cell. Set the pre-comm check limit to float_max.
4731 limit0[d] = -GMX_FLOAT_MAX;
4732 limit1[d] = GMX_FLOAT_MAX;
4736 make_tric_corr_matrix(npbcdim, state->box, tcm);
4738 cgindex = dd->cgindex;
4740 nthread = gmx_omp_nthreads_get(emntDomdec);
4742 /* Compute the center of geometry for all home charge groups
4743 * and put them in the box and determine where they should go.
4745 #pragma omp parallel for num_threads(nthread) schedule(static)
4746 for (thread = 0; thread < nthread; thread++)
4748 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4749 cell_x0, cell_x1, limitd, limit0, limit1,
4751 ( thread *dd->ncg_home)/nthread,
4752 ((thread+1)*dd->ncg_home)/nthread,
4753 fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
4757 for (cg = 0; cg < dd->ncg_home; cg++)
4762 flag = mc & ~DD_FLAG_NRCG;
4763 mc = mc & DD_FLAG_NRCG;
4766 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4768 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4769 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4771 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4772 /* We store the cg size in the lower 16 bits
4773 * and the place where the charge group should go
4774 * in the next 6 bits. This saves some communication volume.
4776 nrcg = cgindex[cg+1] - cgindex[cg];
4777 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4783 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4784 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4787 for (i = 0; i < dd->ndim*2; i++)
4789 *ncg_moved += ncg[i];
4806 /* Make sure the communication buffers are large enough */
4807 for (mc = 0; mc < dd->ndim*2; mc++)
4809 nvr = ncg[mc] + nat[mc]*nvec;
4810 if (nvr > comm->cgcm_state_nalloc[mc])
4812 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4813 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4817 switch (fr->cutoff_scheme)
4820 /* Recalculating cg_cm might be cheaper than communicating,
4821 * but that could give rise to rounding issues.
4824 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4825 nvec, cg_cm, comm, bCompact);
4828 /* Without charge groups we send the moved atom coordinates
4829 * over twice. This is so the code below can be used without
4830 * many conditionals for both for with and without charge groups.
4833 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4834 nvec, state->x, comm, FALSE);
4837 home_pos_cg -= *ncg_moved;
4841 gmx_incons("unimplemented");
4847 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4848 nvec, vec++, state->x, comm, bCompact);
4851 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4852 nvec, vec++, state->v, comm, bCompact);
4856 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4857 nvec, vec++, state->sd_X, comm, bCompact);
4861 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4862 nvec, vec++, state->cg_p, comm, bCompact);
4867 compact_ind(dd->ncg_home, move,
4868 dd->index_gl, dd->cgindex, dd->gatindex,
4869 dd->ga2la, comm->bLocalCG,
4874 if (fr->cutoff_scheme == ecutsVERLET)
4876 moved = get_moved(comm, dd->ncg_home);
4878 for (k = 0; k < dd->ncg_home; k++)
4885 moved = fr->ns.grid->cell_index;
4888 clear_and_mark_ind(dd->ncg_home, move,
4889 dd->index_gl, dd->cgindex, dd->gatindex,
4890 dd->ga2la, comm->bLocalCG,
4894 cginfo_mb = fr->cginfo_mb;
4896 *ncg_stay_home = home_pos_cg;
4897 for (d = 0; d < dd->ndim; d++)
4903 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4906 /* Communicate the cg and atom counts */
4911 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4912 d, dir, sbuf[0], sbuf[1]);
4914 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4916 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4918 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4919 srenew(comm->buf_int, comm->nalloc_int);
4922 /* Communicate the charge group indices, sizes and flags */
4923 dd_sendrecv_int(dd, d, dir,
4924 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4925 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4927 nvs = ncg[cdd] + nat[cdd]*nvec;
4928 i = rbuf[0] + rbuf[1] *nvec;
4929 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4931 /* Communicate cgcm and state */
4932 dd_sendrecv_rvec(dd, d, dir,
4933 comm->cgcm_state[cdd], nvs,
4934 comm->vbuf.v+nvr, i);
4935 ncg_recv += rbuf[0];
4936 nat_recv += rbuf[1];
4940 /* Process the received charge groups */
4942 for (cg = 0; cg < ncg_recv; cg++)
4944 flag = comm->buf_int[cg*DD_CGIBS+1];
4946 if (dim >= npbcdim && dd->nc[dim] > 2)
4948 /* No pbc in this dim and more than one domain boundary.
4949 * We do a separate check if a charge group didn't move too far.
4951 if (((flag & DD_FLAG_FW(d)) &&
4952 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4953 ((flag & DD_FLAG_BW(d)) &&
4954 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4956 cg_move_error(fplog, dd, step, cg, dim,
4957 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4959 comm->vbuf.v[buf_pos],
4960 comm->vbuf.v[buf_pos],
4961 comm->vbuf.v[buf_pos][dim]);
4968 /* Check which direction this cg should go */
4969 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4973 /* The cell boundaries for dimension d2 are not equal
4974 * for each cell row of the lower dimension(s),
4975 * therefore we might need to redetermine where
4976 * this cg should go.
4979 /* If this cg crosses the box boundary in dimension d2
4980 * we can use the communicated flag, so we do not
4981 * have to worry about pbc.
4983 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4984 (flag & DD_FLAG_FW(d2))) ||
4985 (dd->ci[dim2] == 0 &&
4986 (flag & DD_FLAG_BW(d2)))))
4988 /* Clear the two flags for this dimension */
4989 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4990 /* Determine the location of this cg
4991 * in lattice coordinates
4993 pos_d = comm->vbuf.v[buf_pos][dim2];
4996 for (d3 = dim2+1; d3 < DIM; d3++)
4999 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
5002 /* Check of we are not at the box edge.
5003 * pbc is only handled in the first step above,
5004 * but this check could move over pbc while
5005 * the first step did not due to different rounding.
5007 if (pos_d >= cell_x1[dim2] &&
5008 dd->ci[dim2] != dd->nc[dim2]-1)
5010 flag |= DD_FLAG_FW(d2);
5012 else if (pos_d < cell_x0[dim2] &&
5015 flag |= DD_FLAG_BW(d2);
5017 comm->buf_int[cg*DD_CGIBS+1] = flag;
5020 /* Set to which neighboring cell this cg should go */
5021 if (flag & DD_FLAG_FW(d2))
5025 else if (flag & DD_FLAG_BW(d2))
5027 if (dd->nc[dd->dim[d2]] > 2)
5039 nrcg = flag & DD_FLAG_NRCG;
5042 if (home_pos_cg+1 > dd->cg_nalloc)
5044 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
5045 srenew(dd->index_gl, dd->cg_nalloc);
5046 srenew(dd->cgindex, dd->cg_nalloc+1);
5048 /* Set the global charge group index and size */
5049 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
5050 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
5051 /* Copy the state from the buffer */
5052 dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
5053 if (fr->cutoff_scheme == ecutsGROUP)
5056 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
5060 /* Set the cginfo */
5061 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
5062 dd->index_gl[home_pos_cg]);
5065 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
5068 if (home_pos_at+nrcg > state->nalloc)
5070 dd_realloc_state(state, f, home_pos_at+nrcg);
5072 for (i = 0; i < nrcg; i++)
5074 copy_rvec(comm->vbuf.v[buf_pos++],
5075 state->x[home_pos_at+i]);
5079 for (i = 0; i < nrcg; i++)
5081 copy_rvec(comm->vbuf.v[buf_pos++],
5082 state->v[home_pos_at+i]);
5087 for (i = 0; i < nrcg; i++)
5089 copy_rvec(comm->vbuf.v[buf_pos++],
5090 state->sd_X[home_pos_at+i]);
5095 for (i = 0; i < nrcg; i++)
5097 copy_rvec(comm->vbuf.v[buf_pos++],
5098 state->cg_p[home_pos_at+i]);
5102 home_pos_at += nrcg;
5106 /* Reallocate the buffers if necessary */
5107 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
5109 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
5110 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
5112 nvr = ncg[mc] + nat[mc]*nvec;
5113 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
5115 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
5116 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
5118 /* Copy from the receive to the send buffers */
5119 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
5120 comm->buf_int + cg*DD_CGIBS,
5121 DD_CGIBS*sizeof(int));
5122 memcpy(comm->cgcm_state[mc][nvr],
5123 comm->vbuf.v[buf_pos],
5124 (1+nrcg*nvec)*sizeof(rvec));
5125 buf_pos += 1 + nrcg*nvec;
5132 /* With sorting (!bCompact) the indices are now only partially up to date
5133 * and ncg_home and nat_home are not the real count, since there are
5134 * "holes" in the arrays for the charge groups that moved to neighbors.
5136 if (fr->cutoff_scheme == ecutsVERLET)
5138 moved = get_moved(comm, home_pos_cg);
5140 for (i = dd->ncg_home; i < home_pos_cg; i++)
5145 dd->ncg_home = home_pos_cg;
5146 dd->nat_home = home_pos_at;
5151 "Finished repartitioning: cgs moved out %d, new home %d\n",
5152 *ncg_moved, dd->ncg_home-*ncg_moved);
5157 void dd_cycles_add(gmx_domdec_t *dd, float cycles, int ddCycl)
5159 dd->comm->cycl[ddCycl] += cycles;
5160 dd->comm->cycl_n[ddCycl]++;
5161 if (cycles > dd->comm->cycl_max[ddCycl])
5163 dd->comm->cycl_max[ddCycl] = cycles;
5167 static double force_flop_count(t_nrnb *nrnb)
5174 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
5176 /* To get closer to the real timings, we half the count
5177 * for the normal loops and again half it for water loops.
5180 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5182 sum += nrnb->n[i]*0.25*cost_nrnb(i);
5186 sum += nrnb->n[i]*0.50*cost_nrnb(i);
5189 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
5192 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5194 sum += nrnb->n[i]*cost_nrnb(i);
5197 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
5199 sum += nrnb->n[i]*cost_nrnb(i);
5205 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
5207 if (dd->comm->eFlop)
5209 dd->comm->flop -= force_flop_count(nrnb);
5212 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
5214 if (dd->comm->eFlop)
5216 dd->comm->flop += force_flop_count(nrnb);
5221 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
5225 for (i = 0; i < ddCyclNr; i++)
5227 dd->comm->cycl[i] = 0;
5228 dd->comm->cycl_n[i] = 0;
5229 dd->comm->cycl_max[i] = 0;
5232 dd->comm->flop_n = 0;
5235 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
5237 gmx_domdec_comm_t *comm;
5238 gmx_domdec_load_t *load;
5239 gmx_domdec_root_t *root = NULL;
5240 int d, dim, cid, i, pos;
5241 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
5246 fprintf(debug, "get_load_distribution start\n");
5249 wallcycle_start(wcycle, ewcDDCOMMLOAD);
5253 bSepPME = (dd->pme_nodeid >= 0);
5255 for (d = dd->ndim-1; d >= 0; d--)
5258 /* Check if we participate in the communication in this dimension */
5259 if (d == dd->ndim-1 ||
5260 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
5262 load = &comm->load[d];
5265 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
5268 if (d == dd->ndim-1)
5270 sbuf[pos++] = dd_force_load(comm);
5271 sbuf[pos++] = sbuf[0];
5274 sbuf[pos++] = sbuf[0];
5275 sbuf[pos++] = cell_frac;
5278 sbuf[pos++] = comm->cell_f_max0[d];
5279 sbuf[pos++] = comm->cell_f_min1[d];
5284 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
5285 sbuf[pos++] = comm->cycl[ddCyclPME];
5290 sbuf[pos++] = comm->load[d+1].sum;
5291 sbuf[pos++] = comm->load[d+1].max;
5294 sbuf[pos++] = comm->load[d+1].sum_m;
5295 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
5296 sbuf[pos++] = comm->load[d+1].flags;
5299 sbuf[pos++] = comm->cell_f_max0[d];
5300 sbuf[pos++] = comm->cell_f_min1[d];
5305 sbuf[pos++] = comm->load[d+1].mdf;
5306 sbuf[pos++] = comm->load[d+1].pme;
5310 /* Communicate a row in DD direction d.
5311 * The communicators are setup such that the root always has rank 0.
5314 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
5315 load->load, load->nload*sizeof(float), MPI_BYTE,
5316 0, comm->mpi_comm_load[d]);
5318 if (dd->ci[dim] == dd->master_ci[dim])
5320 /* We are the root, process this row */
5321 if (comm->bDynLoadBal)
5323 root = comm->root[d];
5333 for (i = 0; i < dd->nc[dim]; i++)
5335 load->sum += load->load[pos++];
5336 load->max = max(load->max, load->load[pos]);
5342 /* This direction could not be load balanced properly,
5343 * therefore we need to use the maximum iso the average load.
5345 load->sum_m = max(load->sum_m, load->load[pos]);
5349 load->sum_m += load->load[pos];
5352 load->cvol_min = min(load->cvol_min, load->load[pos]);
5356 load->flags = (int)(load->load[pos++] + 0.5);
5360 root->cell_f_max0[i] = load->load[pos++];
5361 root->cell_f_min1[i] = load->load[pos++];
5366 load->mdf = max(load->mdf, load->load[pos]);
5368 load->pme = max(load->pme, load->load[pos]);
5372 if (comm->bDynLoadBal && root->bLimited)
5374 load->sum_m *= dd->nc[dim];
5375 load->flags |= (1<<d);
5383 comm->nload += dd_load_count(comm);
5384 comm->load_step += comm->cycl[ddCyclStep];
5385 comm->load_sum += comm->load[0].sum;
5386 comm->load_max += comm->load[0].max;
5387 if (comm->bDynLoadBal)
5389 for (d = 0; d < dd->ndim; d++)
5391 if (comm->load[0].flags & (1<<d))
5393 comm->load_lim[d]++;
5399 comm->load_mdf += comm->load[0].mdf;
5400 comm->load_pme += comm->load[0].pme;
5404 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5408 fprintf(debug, "get_load_distribution finished\n");
5412 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5414 /* Return the relative performance loss on the total run time
5415 * due to the force calculation load imbalance.
5417 if (dd->comm->nload > 0)
5420 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5421 (dd->comm->load_step*dd->nnodes);
5429 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5432 int npp, npme, nnodes, d, limp;
5433 float imbal, pme_f_ratio, lossf, lossp = 0;
5435 gmx_domdec_comm_t *comm;
5438 if (DDMASTER(dd) && comm->nload > 0)
5441 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5442 nnodes = npp + npme;
5443 imbal = comm->load_max*npp/comm->load_sum - 1;
5444 lossf = dd_force_imb_perf_loss(dd);
5445 sprintf(buf, " Average load imbalance: %.1f %%\n", imbal*100);
5446 fprintf(fplog, "%s", buf);
5447 fprintf(stderr, "\n");
5448 fprintf(stderr, "%s", buf);
5449 sprintf(buf, " Part of the total run time spent waiting due to load imbalance: %.1f %%\n", lossf*100);
5450 fprintf(fplog, "%s", buf);
5451 fprintf(stderr, "%s", buf);
5453 if (comm->bDynLoadBal)
5455 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5456 for (d = 0; d < dd->ndim; d++)
5458 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5459 sprintf(buf+strlen(buf), " %c %d %%", dim2char(dd->dim[d]), limp);
5465 sprintf(buf+strlen(buf), "\n");
5466 fprintf(fplog, "%s", buf);
5467 fprintf(stderr, "%s", buf);
5471 pme_f_ratio = comm->load_pme/comm->load_mdf;
5472 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5475 lossp *= (float)npme/(float)nnodes;
5479 lossp *= (float)npp/(float)nnodes;
5481 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pme_f_ratio);
5482 fprintf(fplog, "%s", buf);
5483 fprintf(stderr, "%s", buf);
5484 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossp)*100);
5485 fprintf(fplog, "%s", buf);
5486 fprintf(stderr, "%s", buf);
5488 fprintf(fplog, "\n");
5489 fprintf(stderr, "\n");
5491 if (lossf >= DD_PERF_LOSS)
5494 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5495 " in the domain decomposition.\n", lossf*100);
5496 if (!comm->bDynLoadBal)
5498 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5502 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5504 fprintf(fplog, "%s\n", buf);
5505 fprintf(stderr, "%s\n", buf);
5507 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS)
5510 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5511 " had %s work to do than the PP nodes.\n"
5512 " You might want to %s the number of PME nodes\n"
5513 " or %s the cut-off and the grid spacing.\n",
5515 (lossp < 0) ? "less" : "more",
5516 (lossp < 0) ? "decrease" : "increase",
5517 (lossp < 0) ? "decrease" : "increase");
5518 fprintf(fplog, "%s\n", buf);
5519 fprintf(stderr, "%s\n", buf);
5524 static float dd_vol_min(gmx_domdec_t *dd)
5526 return dd->comm->load[0].cvol_min*dd->nnodes;
5529 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5531 return dd->comm->load[0].flags;
5534 static float dd_f_imbal(gmx_domdec_t *dd)
5536 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5539 float dd_pme_f_ratio(gmx_domdec_t *dd)
5541 if (dd->comm->cycl_n[ddCyclPME] > 0)
5543 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5551 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_large_int_t step)
5556 flags = dd_load_flags(dd);
5560 "DD load balancing is limited by minimum cell size in dimension");
5561 for (d = 0; d < dd->ndim; d++)
5565 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5568 fprintf(fplog, "\n");
5570 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5571 if (dd->comm->bDynLoadBal)
5573 fprintf(fplog, " vol min/aver %5.3f%c",
5574 dd_vol_min(dd), flags ? '!' : ' ');
5576 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5577 if (dd->comm->cycl_n[ddCyclPME])
5579 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5581 fprintf(fplog, "\n\n");
5584 static void dd_print_load_verbose(gmx_domdec_t *dd)
5586 if (dd->comm->bDynLoadBal)
5588 fprintf(stderr, "vol %4.2f%c ",
5589 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5591 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5592 if (dd->comm->cycl_n[ddCyclPME])
5594 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5599 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5604 gmx_domdec_root_t *root;
5605 gmx_bool bPartOfGroup = FALSE;
5607 dim = dd->dim[dim_ind];
5608 copy_ivec(loc, loc_c);
5609 for (i = 0; i < dd->nc[dim]; i++)
5612 rank = dd_index(dd->nc, loc_c);
5613 if (rank == dd->rank)
5615 /* This process is part of the group */
5616 bPartOfGroup = TRUE;
5619 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5623 dd->comm->mpi_comm_load[dim_ind] = c_row;
5624 if (dd->comm->eDLB != edlbNO)
5626 if (dd->ci[dim] == dd->master_ci[dim])
5628 /* This is the root process of this row */
5629 snew(dd->comm->root[dim_ind], 1);
5630 root = dd->comm->root[dim_ind];
5631 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5632 snew(root->old_cell_f, dd->nc[dim]+1);
5633 snew(root->bCellMin, dd->nc[dim]);
5636 snew(root->cell_f_max0, dd->nc[dim]);
5637 snew(root->cell_f_min1, dd->nc[dim]);
5638 snew(root->bound_min, dd->nc[dim]);
5639 snew(root->bound_max, dd->nc[dim]);
5641 snew(root->buf_ncd, dd->nc[dim]);
5645 /* This is not a root process, we only need to receive cell_f */
5646 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5649 if (dd->ci[dim] == dd->master_ci[dim])
5651 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5657 void dd_setup_dlb_resource_sharing(t_commrec *cr,
5658 const gmx_hw_info_t *hwinfo,
5659 const gmx_hw_opt_t *hw_opt)
5662 int physicalnode_id_hash;
5665 MPI_Comm mpi_comm_pp_physicalnode;
5667 if (!(cr->duty & DUTY_PP) ||
5668 hw_opt->gpu_opt.ncuda_dev_use == 0)
5670 /* Only PP nodes (currently) use GPUs.
5671 * If we don't have GPUs, there are no resources to share.
5676 physicalnode_id_hash = gmx_physicalnode_id_hash();
5678 gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->nodeid);
5684 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5685 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5686 dd->rank, physicalnode_id_hash, gpu_id);
5688 /* Split the PP communicator over the physical nodes */
5689 /* TODO: See if we should store this (before), as it's also used for
5690 * for the nodecomm summution.
5692 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5693 &mpi_comm_pp_physicalnode);
5694 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5695 &dd->comm->mpi_comm_gpu_shared);
5696 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5697 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5701 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5704 /* Note that some ranks could share a GPU, while others don't */
5706 if (dd->comm->nrank_gpu_shared == 1)
5708 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5713 static void make_load_communicators(gmx_domdec_t *dd)
5716 int dim0, dim1, i, j;
5721 fprintf(debug, "Making load communicators\n");
5724 snew(dd->comm->load, dd->ndim);
5725 snew(dd->comm->mpi_comm_load, dd->ndim);
5728 make_load_communicator(dd, 0, loc);
5732 for (i = 0; i < dd->nc[dim0]; i++)
5735 make_load_communicator(dd, 1, loc);
5741 for (i = 0; i < dd->nc[dim0]; i++)
5745 for (j = 0; j < dd->nc[dim1]; j++)
5748 make_load_communicator(dd, 2, loc);
5755 fprintf(debug, "Finished making load communicators\n");
5760 void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
5763 int d, dim, i, j, m;
5766 ivec dd_zp[DD_MAXIZONE];
5767 gmx_domdec_zones_t *zones;
5768 gmx_domdec_ns_ranges_t *izone;
5770 for (d = 0; d < dd->ndim; d++)
5773 copy_ivec(dd->ci, tmp);
5774 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5775 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5776 copy_ivec(dd->ci, tmp);
5777 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5778 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5781 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5784 dd->neighbor[d][1]);
5790 fprintf(fplog, "\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5792 dd->nc[XX], dd->nc[YY], dd->nc[ZZ],
5793 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5800 for (i = 0; i < nzonep; i++)
5802 copy_ivec(dd_zp3[i], dd_zp[i]);
5808 for (i = 0; i < nzonep; i++)
5810 copy_ivec(dd_zp2[i], dd_zp[i]);
5816 for (i = 0; i < nzonep; i++)
5818 copy_ivec(dd_zp1[i], dd_zp[i]);
5822 gmx_fatal(FARGS, "Can only do 1, 2 or 3D domain decomposition");
5827 zones = &dd->comm->zones;
5829 for (i = 0; i < nzone; i++)
5832 clear_ivec(zones->shift[i]);
5833 for (d = 0; d < dd->ndim; d++)
5835 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5840 for (i = 0; i < nzone; i++)
5842 for (d = 0; d < DIM; d++)
5844 s[d] = dd->ci[d] - zones->shift[i][d];
5849 else if (s[d] >= dd->nc[d])
5855 zones->nizone = nzonep;
5856 for (i = 0; i < zones->nizone; i++)
5858 if (dd_zp[i][0] != i)
5860 gmx_fatal(FARGS, "Internal inconsistency in the dd grid setup");
5862 izone = &zones->izone[i];
5863 izone->j0 = dd_zp[i][1];
5864 izone->j1 = dd_zp[i][2];
5865 for (dim = 0; dim < DIM; dim++)
5867 if (dd->nc[dim] == 1)
5869 /* All shifts should be allowed */
5870 izone->shift0[dim] = -1;
5871 izone->shift1[dim] = 1;
5876 izone->shift0[d] = 0;
5877 izone->shift1[d] = 0;
5878 for(j=izone->j0; j<izone->j1; j++) {
5879 if (dd->shift[j][d] > dd->shift[i][d])
5880 izone->shift0[d] = -1;
5881 if (dd->shift[j][d] < dd->shift[i][d])
5882 izone->shift1[d] = 1;
5888 /* Assume the shift are not more than 1 cell */
5889 izone->shift0[dim] = 1;
5890 izone->shift1[dim] = -1;
5891 for (j = izone->j0; j < izone->j1; j++)
5893 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5894 if (shift_diff < izone->shift0[dim])
5896 izone->shift0[dim] = shift_diff;
5898 if (shift_diff > izone->shift1[dim])
5900 izone->shift1[dim] = shift_diff;
5907 if (dd->comm->eDLB != edlbNO)
5909 snew(dd->comm->root, dd->ndim);
5912 if (dd->comm->bRecordLoad)
5914 make_load_communicators(dd);
5918 static void make_pp_communicator(FILE *fplog, t_commrec *cr, int reorder)
5921 gmx_domdec_comm_t *comm;
5932 if (comm->bCartesianPP)
5934 /* Set up cartesian communication for the particle-particle part */
5937 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5938 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5941 for (i = 0; i < DIM; i++)
5945 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5947 /* We overwrite the old communicator with the new cartesian one */
5948 cr->mpi_comm_mygroup = comm_cart;
5951 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5952 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5954 if (comm->bCartesianPP_PME)
5956 /* Since we want to use the original cartesian setup for sim,
5957 * and not the one after split, we need to make an index.
5959 snew(comm->ddindex2ddnodeid, dd->nnodes);
5960 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
5961 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5962 /* Get the rank of the DD master,
5963 * above we made sure that the master node is a PP node.
5973 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
5975 else if (comm->bCartesianPP)
5977 if (cr->npmenodes == 0)
5979 /* The PP communicator is also
5980 * the communicator for this simulation
5982 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5984 cr->nodeid = dd->rank;
5986 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
5988 /* We need to make an index to go from the coordinates
5989 * to the nodeid of this simulation.
5991 snew(comm->ddindex2simnodeid, dd->nnodes);
5992 snew(buf, dd->nnodes);
5993 if (cr->duty & DUTY_PP)
5995 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5997 /* Communicate the ddindex to simulation nodeid index */
5998 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
5999 cr->mpi_comm_mysim);
6002 /* Determine the master coordinates and rank.
6003 * The DD master should be the same node as the master of this sim.
6005 for (i = 0; i < dd->nnodes; i++)
6007 if (comm->ddindex2simnodeid[i] == 0)
6009 ddindex2xyz(dd->nc, i, dd->master_ci);
6010 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
6015 fprintf(debug, "The master rank is %d\n", dd->masterrank);
6020 /* No Cartesian communicators */
6021 /* We use the rank in dd->comm->all as DD index */
6022 ddindex2xyz(dd->nc, dd->rank, dd->ci);
6023 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
6025 clear_ivec(dd->master_ci);
6032 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6033 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6038 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6039 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6043 static void receive_ddindex2simnodeid(t_commrec *cr)
6047 gmx_domdec_comm_t *comm;
6054 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
6056 snew(comm->ddindex2simnodeid, dd->nnodes);
6057 snew(buf, dd->nnodes);
6058 if (cr->duty & DUTY_PP)
6060 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
6063 /* Communicate the ddindex to simulation nodeid index */
6064 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6065 cr->mpi_comm_mysim);
6072 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
6073 int ncg, int natoms)
6075 gmx_domdec_master_t *ma;
6080 snew(ma->ncg, dd->nnodes);
6081 snew(ma->index, dd->nnodes+1);
6083 snew(ma->nat, dd->nnodes);
6084 snew(ma->ibuf, dd->nnodes*2);
6085 snew(ma->cell_x, DIM);
6086 for (i = 0; i < DIM; i++)
6088 snew(ma->cell_x[i], dd->nc[i]+1);
6091 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
6097 snew(ma->vbuf, natoms);
6103 static void split_communicator(FILE *fplog, t_commrec *cr, int dd_node_order,
6107 gmx_domdec_comm_t *comm;
6118 if (comm->bCartesianPP)
6120 for (i = 1; i < DIM; i++)
6122 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
6124 if (bDiv[YY] || bDiv[ZZ])
6126 comm->bCartesianPP_PME = TRUE;
6127 /* If we have 2D PME decomposition, which is always in x+y,
6128 * we stack the PME only nodes in z.
6129 * Otherwise we choose the direction that provides the thinnest slab
6130 * of PME only nodes as this will have the least effect
6131 * on the PP communication.
6132 * But for the PME communication the opposite might be better.
6134 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
6136 dd->nc[YY] > dd->nc[ZZ]))
6138 comm->cartpmedim = ZZ;
6142 comm->cartpmedim = YY;
6144 comm->ntot[comm->cartpmedim]
6145 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
6149 fprintf(fplog, "#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
6151 "Will not use a Cartesian communicator for PP <-> PME\n\n");
6156 if (comm->bCartesianPP_PME)
6160 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
6163 for (i = 0; i < DIM; i++)
6167 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
6170 MPI_Comm_rank(comm_cart, &rank);
6171 if (MASTERNODE(cr) && rank != 0)
6173 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
6176 /* With this assigment we loose the link to the original communicator
6177 * which will usually be MPI_COMM_WORLD, unless have multisim.
6179 cr->mpi_comm_mysim = comm_cart;
6180 cr->sim_nodeid = rank;
6182 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
6186 fprintf(fplog, "Cartesian nodeid %d, coordinates %d %d %d\n\n",
6187 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6190 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
6194 if (cr->npmenodes == 0 ||
6195 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
6197 cr->duty = DUTY_PME;
6200 /* Split the sim communicator into PP and PME only nodes */
6201 MPI_Comm_split(cr->mpi_comm_mysim,
6203 dd_index(comm->ntot, dd->ci),
6204 &cr->mpi_comm_mygroup);
6208 switch (dd_node_order)
6213 fprintf(fplog, "Order of the nodes: PP first, PME last\n");
6216 case ddnoINTERLEAVE:
6217 /* Interleave the PP-only and PME-only nodes,
6218 * as on clusters with dual-core machines this will double
6219 * the communication bandwidth of the PME processes
6220 * and thus speed up the PP <-> PME and inter PME communication.
6224 fprintf(fplog, "Interleaving PP and PME nodes\n");
6226 comm->pmenodes = dd_pmenodes(cr);
6231 gmx_fatal(FARGS, "Unknown dd_node_order=%d", dd_node_order);
6234 if (dd_simnode2pmenode(cr, cr->sim_nodeid) == -1)
6236 cr->duty = DUTY_PME;
6243 /* Split the sim communicator into PP and PME only nodes */
6244 MPI_Comm_split(cr->mpi_comm_mysim,
6247 &cr->mpi_comm_mygroup);
6248 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
6254 fprintf(fplog, "This is a %s only node\n\n",
6255 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
6259 void make_dd_communicators(FILE *fplog, t_commrec *cr, int dd_node_order)
6262 gmx_domdec_comm_t *comm;
6268 copy_ivec(dd->nc, comm->ntot);
6270 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
6271 comm->bCartesianPP_PME = FALSE;
6273 /* Reorder the nodes by default. This might change the MPI ranks.
6274 * Real reordering is only supported on very few architectures,
6275 * Blue Gene is one of them.
6277 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
6279 if (cr->npmenodes > 0)
6281 /* Split the communicator into a PP and PME part */
6282 split_communicator(fplog, cr, dd_node_order, CartReorder);
6283 if (comm->bCartesianPP_PME)
6285 /* We (possibly) reordered the nodes in split_communicator,
6286 * so it is no longer required in make_pp_communicator.
6288 CartReorder = FALSE;
6293 /* All nodes do PP and PME */
6295 /* We do not require separate communicators */
6296 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
6300 if (cr->duty & DUTY_PP)
6302 /* Copy or make a new PP communicator */
6303 make_pp_communicator(fplog, cr, CartReorder);
6307 receive_ddindex2simnodeid(cr);
6310 if (!(cr->duty & DUTY_PME))
6312 /* Set up the commnuication to our PME node */
6313 dd->pme_nodeid = dd_simnode2pmenode(cr, cr->sim_nodeid);
6314 dd->pme_receive_vir_ener = receive_vir_ener(cr);
6317 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
6318 dd->pme_nodeid, dd->pme_receive_vir_ener);
6323 dd->pme_nodeid = -1;
6328 dd->ma = init_gmx_domdec_master_t(dd,
6330 comm->cgs_gl.index[comm->cgs_gl.nr]);
6334 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6336 real *slb_frac, tot;
6341 if (nc > 1 && size_string != NULL)
6345 fprintf(fplog, "Using static load balancing for the %s direction\n",
6350 for (i = 0; i < nc; i++)
6353 sscanf(size_string, "%lf%n", &dbl, &n);
6356 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6365 fprintf(fplog, "Relative cell sizes:");
6367 for (i = 0; i < nc; i++)
6372 fprintf(fplog, " %5.3f", slb_frac[i]);
6377 fprintf(fplog, "\n");
6384 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
6387 gmx_mtop_ilistloop_t iloop;
6391 iloop = gmx_mtop_ilistloop_init(mtop);
6392 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6394 for (ftype = 0; ftype < F_NRE; ftype++)
6396 if ((interaction_function[ftype].flags & IF_BOND) &&
6399 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6407 static int dd_nst_env(FILE *fplog, const char *env_var, int def)
6413 val = getenv(env_var);
6416 if (sscanf(val, "%d", &nst) <= 0)
6422 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6430 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6434 fprintf(stderr, "\n%s\n", warn_string);
6438 fprintf(fplog, "\n%s\n", warn_string);
6442 static void check_dd_restrictions(t_commrec *cr, gmx_domdec_t *dd,
6443 t_inputrec *ir, FILE *fplog)
6445 if (ir->ePBC == epbcSCREW &&
6446 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6448 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6451 if (ir->ns_type == ensSIMPLE)
6453 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
6456 if (ir->nstlist == 0)
6458 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6461 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6463 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6467 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6472 r = ddbox->box_size[XX];
6473 for (di = 0; di < dd->ndim; di++)
6476 /* Check using the initial average cell size */
6477 r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6483 static int check_dlb_support(FILE *fplog, t_commrec *cr,
6484 const char *dlb_opt, gmx_bool bRecordLoad,
6485 unsigned long Flags, t_inputrec *ir)
6493 case 'a': eDLB = edlbAUTO; break;
6494 case 'n': eDLB = edlbNO; break;
6495 case 'y': eDLB = edlbYES; break;
6496 default: gmx_incons("Unknown dlb_opt");
6499 if (Flags & MD_RERUN)
6504 if (!EI_DYNAMICS(ir->eI))
6506 if (eDLB == edlbYES)
6508 sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
6509 dd_warning(cr, fplog, buf);
6517 dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6522 if (Flags & MD_REPRODUCIBLE)
6529 dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
6533 dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6536 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", eDLB);
6544 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6549 if (getenv("GMX_DD_ORDER_ZYX") != NULL)
6551 /* Decomposition order z,y,x */
6554 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6556 for (dim = DIM-1; dim >= 0; dim--)
6558 if (dd->nc[dim] > 1)
6560 dd->dim[dd->ndim++] = dim;
6566 /* Decomposition order x,y,z */
6567 for (dim = 0; dim < DIM; dim++)
6569 if (dd->nc[dim] > 1)
6571 dd->dim[dd->ndim++] = dim;
6577 static gmx_domdec_comm_t *init_dd_comm()
6579 gmx_domdec_comm_t *comm;
6583 snew(comm->cggl_flag, DIM*2);
6584 snew(comm->cgcm_state, DIM*2);
6585 for (i = 0; i < DIM*2; i++)
6587 comm->cggl_flag_nalloc[i] = 0;
6588 comm->cgcm_state_nalloc[i] = 0;
6591 comm->nalloc_int = 0;
6592 comm->buf_int = NULL;
6594 vec_rvec_init(&comm->vbuf);
6596 comm->n_load_have = 0;
6597 comm->n_load_collect = 0;
6599 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6601 comm->sum_nat[i] = 0;
6605 comm->load_step = 0;
6608 clear_ivec(comm->load_lim);
6615 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
6616 unsigned long Flags,
6618 real comm_distance_min, real rconstr,
6619 const char *dlb_opt, real dlb_scale,
6620 const char *sizex, const char *sizey, const char *sizez,
6621 gmx_mtop_t *mtop, t_inputrec *ir,
6622 matrix box, rvec *x,
6624 int *npme_x, int *npme_y)
6627 gmx_domdec_comm_t *comm;
6630 real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
6637 "\nInitializing Domain Decomposition on %d nodes\n", cr->nnodes);
6642 dd->comm = init_dd_comm();
6644 snew(comm->cggl_flag, DIM*2);
6645 snew(comm->cgcm_state, DIM*2);
6647 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6648 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6650 dd->bSendRecv2 = dd_nst_env(fplog, "GMX_DD_SENDRECV2", 0);
6651 comm->dlb_scale_lim = dd_nst_env(fplog, "GMX_DLB_MAX", 10);
6652 comm->eFlop = dd_nst_env(fplog, "GMX_DLB_FLOP", 0);
6653 recload = dd_nst_env(fplog, "GMX_DD_LOAD", 1);
6654 comm->nstSortCG = dd_nst_env(fplog, "GMX_DD_SORT", 1);
6655 comm->nstDDDump = dd_nst_env(fplog, "GMX_DD_DUMP", 0);
6656 comm->nstDDDumpGrid = dd_nst_env(fplog, "GMX_DD_DUMP_GRID", 0);
6657 comm->DD_debug = dd_nst_env(fplog, "GMX_DD_DEBUG", 0);
6659 dd->pme_recv_f_alloc = 0;
6660 dd->pme_recv_f_buf = NULL;
6662 if (dd->bSendRecv2 && fplog)
6664 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6670 fprintf(fplog, "Will load balance based on FLOP count\n");
6672 if (comm->eFlop > 1)
6674 srand(1+cr->nodeid);
6676 comm->bRecordLoad = TRUE;
6680 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6684 /* Initialize to GPU share count to 0, might change later */
6685 comm->nrank_gpu_shared = 0;
6687 comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
6689 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6692 fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
6694 dd->bGridJump = comm->bDynLoadBal;
6695 comm->bPMELoadBalDLBLimits = FALSE;
6697 if (comm->nstSortCG)
6701 if (comm->nstSortCG == 1)
6703 fprintf(fplog, "Will sort the charge groups at every domain (re)decomposition\n");
6707 fprintf(fplog, "Will sort the charge groups every %d steps\n",
6711 snew(comm->sort, 1);
6717 fprintf(fplog, "Will not sort the charge groups\n");
6721 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6723 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6724 if (comm->bInterCGBondeds)
6726 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6730 comm->bInterCGMultiBody = FALSE;
6733 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6734 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6736 if (ir->rlistlong == 0)
6738 /* Set the cut-off to some very large value,
6739 * so we don't need if statements everywhere in the code.
6740 * We use sqrt, since the cut-off is squared in some places.
6742 comm->cutoff = GMX_CUTOFF_INF;
6746 comm->cutoff = ir->rlistlong;
6748 comm->cutoff_mbody = 0;
6750 comm->cellsize_limit = 0;
6751 comm->bBondComm = FALSE;
6753 if (comm->bInterCGBondeds)
6755 if (comm_distance_min > 0)
6757 comm->cutoff_mbody = comm_distance_min;
6758 if (Flags & MD_DDBONDCOMM)
6760 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6764 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6766 r_bonded_limit = comm->cutoff_mbody;
6768 else if (ir->bPeriodicMols)
6770 /* Can not easily determine the required cut-off */
6771 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6772 comm->cutoff_mbody = comm->cutoff/2;
6773 r_bonded_limit = comm->cutoff_mbody;
6779 dd_bonded_cg_distance(fplog, mtop, ir, x, box,
6780 Flags & MD_DDBONDCHECK, &r_2b, &r_mb);
6782 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6783 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6785 /* We use an initial margin of 10% for the minimum cell size,
6786 * except when we are just below the non-bonded cut-off.
6788 if (Flags & MD_DDBONDCOMM)
6790 if (max(r_2b, r_mb) > comm->cutoff)
6792 r_bonded = max(r_2b, r_mb);
6793 r_bonded_limit = 1.1*r_bonded;
6794 comm->bBondComm = TRUE;
6799 r_bonded_limit = min(1.1*r_bonded, comm->cutoff);
6801 /* We determine cutoff_mbody later */
6805 /* No special bonded communication,
6806 * simply increase the DD cut-off.
6808 r_bonded_limit = 1.1*max(r_2b, r_mb);
6809 comm->cutoff_mbody = r_bonded_limit;
6810 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6813 comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit);
6817 "Minimum cell size due to bonded interactions: %.3f nm\n",
6818 comm->cellsize_limit);
6822 if (dd->bInterCGcons && rconstr <= 0)
6824 /* There is a cell size limit due to the constraints (P-LINCS) */
6825 rconstr = constr_r_max(fplog, mtop, ir);
6829 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6831 if (rconstr > comm->cellsize_limit)
6833 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6837 else if (rconstr > 0 && fplog)
6839 /* Here we do not check for dd->bInterCGcons,
6840 * because one can also set a cell size limit for virtual sites only
6841 * and at this point we don't know yet if there are intercg v-sites.
6844 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6847 comm->cellsize_limit = max(comm->cellsize_limit, rconstr);
6849 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6853 copy_ivec(nc, dd->nc);
6854 set_dd_dim(fplog, dd);
6855 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, x, ddbox);
6857 if (cr->npmenodes == -1)
6861 acs = average_cellsize_min(dd, ddbox);
6862 if (acs < comm->cellsize_limit)
6866 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6868 gmx_fatal_collective(FARGS, cr, NULL,
6869 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6870 acs, comm->cellsize_limit);
6875 set_ddbox_cr(cr, NULL, ir, box, &comm->cgs_gl, x, ddbox);
6877 /* We need to choose the optimal DD grid and possibly PME nodes */
6878 limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6879 comm->eDLB != edlbNO, dlb_scale,
6880 comm->cellsize_limit, comm->cutoff,
6881 comm->bInterCGBondeds);
6883 if (dd->nc[XX] == 0)
6885 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6886 sprintf(buf, "Change the number of nodes or mdrun option %s%s%s",
6887 !bC ? "-rdd" : "-rcon",
6888 comm->eDLB != edlbNO ? " or -dds" : "",
6889 bC ? " or your LINCS settings" : "");
6891 gmx_fatal_collective(FARGS, cr, NULL,
6892 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6894 "Look in the log file for details on the domain decomposition",
6895 cr->nnodes-cr->npmenodes, limit, buf);
6897 set_dd_dim(fplog, dd);
6903 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6904 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6907 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6908 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6910 gmx_fatal_collective(FARGS, cr, NULL,
6911 "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6912 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6914 if (cr->npmenodes > dd->nnodes)
6916 gmx_fatal_collective(FARGS, cr, NULL,
6917 "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6919 if (cr->npmenodes > 0)
6921 comm->npmenodes = cr->npmenodes;
6925 comm->npmenodes = dd->nnodes;
6928 if (EEL_PME(ir->coulombtype))
6930 /* The following choices should match those
6931 * in comm_cost_est in domdec_setup.c.
6932 * Note that here the checks have to take into account
6933 * that the decomposition might occur in a different order than xyz
6934 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6935 * in which case they will not match those in comm_cost_est,
6936 * but since that is mainly for testing purposes that's fine.
6938 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6939 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6940 getenv("GMX_PMEONEDD") == NULL)
6942 comm->npmedecompdim = 2;
6943 comm->npmenodes_x = dd->nc[XX];
6944 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6948 /* In case nc is 1 in both x and y we could still choose to
6949 * decompose pme in y instead of x, but we use x for simplicity.
6951 comm->npmedecompdim = 1;
6952 if (dd->dim[0] == YY)
6954 comm->npmenodes_x = 1;
6955 comm->npmenodes_y = comm->npmenodes;
6959 comm->npmenodes_x = comm->npmenodes;
6960 comm->npmenodes_y = 1;
6965 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6966 comm->npmenodes_x, comm->npmenodes_y, 1);
6971 comm->npmedecompdim = 0;
6972 comm->npmenodes_x = 0;
6973 comm->npmenodes_y = 0;
6976 /* Technically we don't need both of these,
6977 * but it simplifies code not having to recalculate it.
6979 *npme_x = comm->npmenodes_x;
6980 *npme_y = comm->npmenodes_y;
6982 snew(comm->slb_frac, DIM);
6983 if (comm->eDLB == edlbNO)
6985 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
6986 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
6987 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], sizez);
6990 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6992 if (comm->bBondComm || comm->eDLB != edlbNO)
6994 /* Set the bonded communication distance to halfway
6995 * the minimum and the maximum,
6996 * since the extra communication cost is nearly zero.
6998 acs = average_cellsize_min(dd, ddbox);
6999 comm->cutoff_mbody = 0.5*(r_bonded + acs);
7000 if (comm->eDLB != edlbNO)
7002 /* Check if this does not limit the scaling */
7003 comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs);
7005 if (!comm->bBondComm)
7007 /* Without bBondComm do not go beyond the n.b. cut-off */
7008 comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff);
7009 if (comm->cellsize_limit >= comm->cutoff)
7011 /* We don't loose a lot of efficieny
7012 * when increasing it to the n.b. cut-off.
7013 * It can even be slightly faster, because we need
7014 * less checks for the communication setup.
7016 comm->cutoff_mbody = comm->cutoff;
7019 /* Check if we did not end up below our original limit */
7020 comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit);
7022 if (comm->cutoff_mbody > comm->cellsize_limit)
7024 comm->cellsize_limit = comm->cutoff_mbody;
7027 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
7032 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
7033 "cellsize limit %f\n",
7034 comm->bBondComm, comm->cellsize_limit);
7039 check_dd_restrictions(cr, dd, ir, fplog);
7042 comm->partition_step = INT_MIN;
7045 clear_dd_cycle_counts(dd);
7050 static void set_dlb_limits(gmx_domdec_t *dd)
7055 for (d = 0; d < dd->ndim; d++)
7057 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
7058 dd->comm->cellsize_min[dd->dim[d]] =
7059 dd->comm->cellsize_min_dlb[dd->dim[d]];
7064 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_large_int_t step)
7067 gmx_domdec_comm_t *comm;
7077 fprintf(fplog, "At step %s the performance loss due to force load imbalance is %.1f %%\n", gmx_step_str(step, buf), dd_force_imb_perf_loss(dd)*100);
7080 cellsize_min = comm->cellsize_min[dd->dim[0]];
7081 for (d = 1; d < dd->ndim; d++)
7083 cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
7086 if (cellsize_min < comm->cellsize_limit*1.05)
7088 dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
7090 /* Change DLB from "auto" to "no". */
7091 comm->eDLB = edlbNO;
7096 dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
7097 comm->bDynLoadBal = TRUE;
7098 dd->bGridJump = TRUE;
7102 /* We can set the required cell size info here,
7103 * so we do not need to communicate this.
7104 * The grid is completely uniform.
7106 for (d = 0; d < dd->ndim; d++)
7110 comm->load[d].sum_m = comm->load[d].sum;
7112 nc = dd->nc[dd->dim[d]];
7113 for (i = 0; i < nc; i++)
7115 comm->root[d]->cell_f[i] = i/(real)nc;
7118 comm->root[d]->cell_f_max0[i] = i /(real)nc;
7119 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
7122 comm->root[d]->cell_f[nc] = 1.0;
7127 static char *init_bLocalCG(gmx_mtop_t *mtop)
7132 ncg = ncg_mtop(mtop);
7133 snew(bLocalCG, ncg);
7134 for (cg = 0; cg < ncg; cg++)
7136 bLocalCG[cg] = FALSE;
7142 void dd_init_bondeds(FILE *fplog,
7143 gmx_domdec_t *dd, gmx_mtop_t *mtop,
7145 t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
7147 gmx_domdec_comm_t *comm;
7151 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
7155 if (comm->bBondComm)
7157 /* Communicate atoms beyond the cut-off for bonded interactions */
7160 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
7162 comm->bLocalCG = init_bLocalCG(mtop);
7166 /* Only communicate atoms based on cut-off */
7167 comm->cglink = NULL;
7168 comm->bLocalCG = NULL;
7172 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
7174 gmx_bool bDynLoadBal, real dlb_scale,
7177 gmx_domdec_comm_t *comm;
7192 fprintf(fplog, "The maximum number of communication pulses is:");
7193 for (d = 0; d < dd->ndim; d++)
7195 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
7197 fprintf(fplog, "\n");
7198 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
7199 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
7200 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
7201 for (d = 0; d < DIM; d++)
7205 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
7212 comm->cellsize_min_dlb[d]/
7213 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
7215 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
7218 fprintf(fplog, "\n");
7222 set_dd_cell_sizes_slb(dd, ddbox, FALSE, np);
7223 fprintf(fplog, "The initial number of communication pulses is:");
7224 for (d = 0; d < dd->ndim; d++)
7226 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
7228 fprintf(fplog, "\n");
7229 fprintf(fplog, "The initial domain decomposition cell size is:");
7230 for (d = 0; d < DIM; d++)
7234 fprintf(fplog, " %c %.2f nm",
7235 dim2char(d), dd->comm->cellsize_min[d]);
7238 fprintf(fplog, "\n\n");
7241 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
7243 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
7244 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7245 "non-bonded interactions", "", comm->cutoff);
7249 limit = dd->comm->cellsize_limit;
7253 if (dynamic_dd_box(ddbox, ir))
7255 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
7257 limit = dd->comm->cellsize_min[XX];
7258 for (d = 1; d < DIM; d++)
7260 limit = min(limit, dd->comm->cellsize_min[d]);
7264 if (comm->bInterCGBondeds)
7266 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7267 "two-body bonded interactions", "(-rdd)",
7268 max(comm->cutoff, comm->cutoff_mbody));
7269 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7270 "multi-body bonded interactions", "(-rdd)",
7271 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit));
7275 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7276 "virtual site constructions", "(-rcon)", limit);
7278 if (dd->constraint_comm)
7280 sprintf(buf, "atoms separated by up to %d constraints",
7282 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7283 buf, "(-rcon)", limit);
7285 fprintf(fplog, "\n");
7291 static void set_cell_limits_dlb(gmx_domdec_t *dd,
7293 const t_inputrec *ir,
7294 const gmx_ddbox_t *ddbox)
7296 gmx_domdec_comm_t *comm;
7297 int d, dim, npulse, npulse_d_max, npulse_d;
7302 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7304 /* Determine the maximum number of comm. pulses in one dimension */
7306 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7308 /* Determine the maximum required number of grid pulses */
7309 if (comm->cellsize_limit >= comm->cutoff)
7311 /* Only a single pulse is required */
7314 else if (!bNoCutOff && comm->cellsize_limit > 0)
7316 /* We round down slightly here to avoid overhead due to the latency
7317 * of extra communication calls when the cut-off
7318 * would be only slightly longer than the cell size.
7319 * Later cellsize_limit is redetermined,
7320 * so we can not miss interactions due to this rounding.
7322 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7326 /* There is no cell size limit */
7327 npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7330 if (!bNoCutOff && npulse > 1)
7332 /* See if we can do with less pulses, based on dlb_scale */
7334 for (d = 0; d < dd->ndim; d++)
7337 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7338 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7339 npulse_d_max = max(npulse_d_max, npulse_d);
7341 npulse = min(npulse, npulse_d_max);
7344 /* This env var can override npulse */
7345 d = dd_nst_env(debug, "GMX_DD_NPULSE", 0);
7352 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7353 for (d = 0; d < dd->ndim; d++)
7355 comm->cd[d].np_dlb = min(npulse, dd->nc[dd->dim[d]]-1);
7356 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7357 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7358 comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb);
7359 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7361 comm->bVacDLBNoLimit = FALSE;
7365 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7366 if (!comm->bVacDLBNoLimit)
7368 comm->cellsize_limit = max(comm->cellsize_limit,
7369 comm->cutoff/comm->maxpulse);
7371 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7372 /* Set the minimum cell size for each DD dimension */
7373 for (d = 0; d < dd->ndim; d++)
7375 if (comm->bVacDLBNoLimit ||
7376 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7378 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7382 comm->cellsize_min_dlb[dd->dim[d]] =
7383 comm->cutoff/comm->cd[d].np_dlb;
7386 if (comm->cutoff_mbody <= 0)
7388 comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit);
7390 if (comm->bDynLoadBal)
7396 gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd, int ePBC)
7398 /* If each molecule is a single charge group
7399 * or we use domain decomposition for each periodic dimension,
7400 * we do not need to take pbc into account for the bonded interactions.
7402 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7405 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7408 void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7409 t_inputrec *ir, gmx_ddbox_t *ddbox)
7411 gmx_domdec_comm_t *comm;
7417 /* Initialize the thread data.
7418 * This can not be done in init_domain_decomposition,
7419 * as the numbers of threads is determined later.
7421 comm->nth = gmx_omp_nthreads_get(emntDomdec);
7424 snew(comm->dth, comm->nth);
7427 if (EEL_PME(ir->coulombtype))
7429 init_ddpme(dd, &comm->ddpme[0], 0);
7430 if (comm->npmedecompdim >= 2)
7432 init_ddpme(dd, &comm->ddpme[1], 1);
7437 comm->npmenodes = 0;
7438 if (dd->pme_nodeid >= 0)
7440 gmx_fatal_collective(FARGS, NULL, dd,
7441 "Can not have separate PME nodes without PME electrostatics");
7447 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7449 if (comm->eDLB != edlbNO)
7451 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7454 print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
7455 if (comm->eDLB == edlbAUTO)
7459 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7461 print_dd_settings(fplog, dd, ir, TRUE, dlb_scale, ddbox);
7464 if (ir->ePBC == epbcNONE)
7466 vol_frac = 1 - 1/(double)dd->nnodes;
7471 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7475 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7477 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7479 dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
7482 static gmx_bool test_dd_cutoff(t_commrec *cr,
7483 t_state *state, t_inputrec *ir,
7494 set_ddbox(dd, FALSE, cr, ir, state->box,
7495 TRUE, &dd->comm->cgs_gl, state->x, &ddbox);
7499 for (d = 0; d < dd->ndim; d++)
7503 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7504 if (dynamic_dd_box(&ddbox, ir))
7506 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7509 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7511 if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
7512 dd->comm->cd[d].np_dlb > 0)
7514 if (np > dd->comm->cd[d].np_dlb)
7519 /* If a current local cell size is smaller than the requested
7520 * cut-off, we could still fix it, but this gets very complicated.
7521 * Without fixing here, we might actually need more checks.
7523 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7530 if (dd->comm->eDLB != edlbNO)
7532 /* If DLB is not active yet, we don't need to check the grid jumps.
7533 * Actually we shouldn't, because then the grid jump data is not set.
7535 if (dd->comm->bDynLoadBal &&
7536 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7541 gmx_sumi(1, &LocallyLimited, cr);
7543 if (LocallyLimited > 0)
7552 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, t_inputrec *ir,
7555 gmx_bool bCutoffAllowed;
7557 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7561 cr->dd->comm->cutoff = cutoff_req;
7564 return bCutoffAllowed;
7567 void change_dd_dlb_cutoff_limit(t_commrec *cr)
7569 gmx_domdec_comm_t *comm;
7571 comm = cr->dd->comm;
7573 /* Turn on the DLB limiting (might have been on already) */
7574 comm->bPMELoadBalDLBLimits = TRUE;
7576 /* Change the cut-off limit */
7577 comm->PMELoadBal_max_cutoff = comm->cutoff;
7580 static void merge_cg_buffers(int ncell,
7581 gmx_domdec_comm_dim_t *cd, int pulse,
7583 int *index_gl, int *recv_i,
7584 rvec *cg_cm, rvec *recv_vr,
7586 cginfo_mb_t *cginfo_mb, int *cginfo)
7588 gmx_domdec_ind_t *ind, *ind_p;
7589 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7590 int shift, shift_at;
7592 ind = &cd->ind[pulse];
7594 /* First correct the already stored data */
7595 shift = ind->nrecv[ncell];
7596 for (cell = ncell-1; cell >= 0; cell--)
7598 shift -= ind->nrecv[cell];
7601 /* Move the cg's present from previous grid pulses */
7602 cg0 = ncg_cell[ncell+cell];
7603 cg1 = ncg_cell[ncell+cell+1];
7604 cgindex[cg1+shift] = cgindex[cg1];
7605 for (cg = cg1-1; cg >= cg0; cg--)
7607 index_gl[cg+shift] = index_gl[cg];
7608 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7609 cgindex[cg+shift] = cgindex[cg];
7610 cginfo[cg+shift] = cginfo[cg];
7612 /* Correct the already stored send indices for the shift */
7613 for (p = 1; p <= pulse; p++)
7615 ind_p = &cd->ind[p];
7617 for (c = 0; c < cell; c++)
7619 cg0 += ind_p->nsend[c];
7621 cg1 = cg0 + ind_p->nsend[cell];
7622 for (cg = cg0; cg < cg1; cg++)
7624 ind_p->index[cg] += shift;
7630 /* Merge in the communicated buffers */
7634 for (cell = 0; cell < ncell; cell++)
7636 cg1 = ncg_cell[ncell+cell+1] + shift;
7639 /* Correct the old cg indices */
7640 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7642 cgindex[cg+1] += shift_at;
7645 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7647 /* Copy this charge group from the buffer */
7648 index_gl[cg1] = recv_i[cg0];
7649 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7650 /* Add it to the cgindex */
7651 cg_gl = index_gl[cg1];
7652 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7653 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7654 cgindex[cg1+1] = cgindex[cg1] + nat;
7659 shift += ind->nrecv[cell];
7660 ncg_cell[ncell+cell+1] = cg1;
7664 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7665 int nzone, int cg0, const int *cgindex)
7669 /* Store the atom block boundaries for easy copying of communication buffers
7672 for (zone = 0; zone < nzone; zone++)
7674 for (p = 0; p < cd->np; p++)
7676 cd->ind[p].cell2at0[zone] = cgindex[cg];
7677 cg += cd->ind[p].nrecv[zone];
7678 cd->ind[p].cell2at1[zone] = cgindex[cg];
7683 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7689 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7691 if (!bLocalCG[link->a[i]])
7700 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7702 real c[DIM][4]; /* the corners for the non-bonded communication */
7703 real cr0; /* corner for rounding */
7704 real cr1[4]; /* corners for rounding */
7705 real bc[DIM]; /* corners for bounded communication */
7706 real bcr1; /* corner for rounding for bonded communication */
7709 /* Determine the corners of the domain(s) we are communicating with */
7711 set_dd_corners(const gmx_domdec_t *dd,
7712 int dim0, int dim1, int dim2,
7716 const gmx_domdec_comm_t *comm;
7717 const gmx_domdec_zones_t *zones;
7722 zones = &comm->zones;
7724 /* Keep the compiler happy */
7728 /* The first dimension is equal for all cells */
7729 c->c[0][0] = comm->cell_x0[dim0];
7732 c->bc[0] = c->c[0][0];
7737 /* This cell row is only seen from the first row */
7738 c->c[1][0] = comm->cell_x0[dim1];
7739 /* All rows can see this row */
7740 c->c[1][1] = comm->cell_x0[dim1];
7743 c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7746 /* For the multi-body distance we need the maximum */
7747 c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7750 /* Set the upper-right corner for rounding */
7751 c->cr0 = comm->cell_x1[dim0];
7756 for (j = 0; j < 4; j++)
7758 c->c[2][j] = comm->cell_x0[dim2];
7762 /* Use the maximum of the i-cells that see a j-cell */
7763 for (i = 0; i < zones->nizone; i++)
7765 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7771 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7777 /* For the multi-body distance we need the maximum */
7778 c->bc[2] = comm->cell_x0[dim2];
7779 for (i = 0; i < 2; i++)
7781 for (j = 0; j < 2; j++)
7783 c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0);
7789 /* Set the upper-right corner for rounding */
7790 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7791 * Only cell (0,0,0) can see cell 7 (1,1,1)
7793 c->cr1[0] = comm->cell_x1[dim1];
7794 c->cr1[3] = comm->cell_x1[dim1];
7797 c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7800 /* For the multi-body distance we need the maximum */
7801 c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7808 /* Determine which cg's we need to send in this pulse from this zone */
7810 get_zone_pulse_cgs(gmx_domdec_t *dd,
7811 int zonei, int zone,
7813 const int *index_gl,
7815 int dim, int dim_ind,
7816 int dim0, int dim1, int dim2,
7817 real r_comm2, real r_bcomm2,
7821 real skew_fac2_d, real skew_fac_01,
7822 rvec *v_d, rvec *v_0, rvec *v_1,
7823 const dd_corners_t *c,
7825 gmx_bool bDistBonded,
7831 gmx_domdec_ind_t *ind,
7832 int **ibuf, int *ibuf_nalloc,
7838 gmx_domdec_comm_t *comm;
7840 gmx_bool bDistMB_pulse;
7842 real r2, rb2, r, tric_sh;
7845 int nsend_z, nsend, nat;
7849 bScrew = (dd->bScrewPBC && dim == XX);
7851 bDistMB_pulse = (bDistMB && bDistBonded);
7857 for (cg = cg0; cg < cg1; cg++)
7861 if (tric_dist[dim_ind] == 0)
7863 /* Rectangular direction, easy */
7864 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7871 r = cg_cm[cg][dim] - c->bc[dim_ind];
7877 /* Rounding gives at most a 16% reduction
7878 * in communicated atoms
7880 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7882 r = cg_cm[cg][dim0] - c->cr0;
7883 /* This is the first dimension, so always r >= 0 */
7890 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7892 r = cg_cm[cg][dim1] - c->cr1[zone];
7899 r = cg_cm[cg][dim1] - c->bcr1;
7909 /* Triclinic direction, more complicated */
7912 /* Rounding, conservative as the skew_fac multiplication
7913 * will slightly underestimate the distance.
7915 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7917 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7918 for (i = dim0+1; i < DIM; i++)
7920 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7922 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7925 rb[dim0] = rn[dim0];
7928 /* Take care that the cell planes along dim0 might not
7929 * be orthogonal to those along dim1 and dim2.
7931 for (i = 1; i <= dim_ind; i++)
7934 if (normal[dim0][dimd] > 0)
7936 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7939 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7944 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7946 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7948 for (i = dim1+1; i < DIM; i++)
7950 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7952 rn[dim1] += tric_sh;
7955 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7956 /* Take care of coupling of the distances
7957 * to the planes along dim0 and dim1 through dim2.
7959 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7960 /* Take care that the cell planes along dim1
7961 * might not be orthogonal to that along dim2.
7963 if (normal[dim1][dim2] > 0)
7965 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7971 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7974 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7975 /* Take care of coupling of the distances
7976 * to the planes along dim0 and dim1 through dim2.
7978 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7979 /* Take care that the cell planes along dim1
7980 * might not be orthogonal to that along dim2.
7982 if (normal[dim1][dim2] > 0)
7984 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7989 /* The distance along the communication direction */
7990 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
7992 for (i = dim+1; i < DIM; i++)
7994 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7999 r2 += rn[dim]*rn[dim]*skew_fac2_d;
8000 /* Take care of coupling of the distances
8001 * to the planes along dim0 and dim1 through dim2.
8003 if (dim_ind == 1 && zonei == 1)
8005 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
8011 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
8014 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
8015 /* Take care of coupling of the distances
8016 * to the planes along dim0 and dim1 through dim2.
8018 if (dim_ind == 1 && zonei == 1)
8020 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
8028 ((bDistMB && rb2 < r_bcomm2) ||
8029 (bDist2B && r2 < r_bcomm2)) &&
8031 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
8032 missing_link(comm->cglink, index_gl[cg],
8035 /* Make an index to the local charge groups */
8036 if (nsend+1 > ind->nalloc)
8038 ind->nalloc = over_alloc_large(nsend+1);
8039 srenew(ind->index, ind->nalloc);
8041 if (nsend+1 > *ibuf_nalloc)
8043 *ibuf_nalloc = over_alloc_large(nsend+1);
8044 srenew(*ibuf, *ibuf_nalloc);
8046 ind->index[nsend] = cg;
8047 (*ibuf)[nsend] = index_gl[cg];
8049 vec_rvec_check_alloc(vbuf, nsend+1);
8051 if (dd->ci[dim] == 0)
8053 /* Correct cg_cm for pbc */
8054 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
8057 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
8058 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
8063 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
8066 nat += cgindex[cg+1] - cgindex[cg];
8072 *nsend_z_ptr = nsend_z;
8075 static void setup_dd_communication(gmx_domdec_t *dd,
8076 matrix box, gmx_ddbox_t *ddbox,
8077 t_forcerec *fr, t_state *state, rvec **f)
8079 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
8080 int nzone, nzone_send, zone, zonei, cg0, cg1;
8081 int c, i, j, cg, cg_gl, nrcg;
8082 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
8083 gmx_domdec_comm_t *comm;
8084 gmx_domdec_zones_t *zones;
8085 gmx_domdec_comm_dim_t *cd;
8086 gmx_domdec_ind_t *ind;
8087 cginfo_mb_t *cginfo_mb;
8088 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
8089 real r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
8090 dd_corners_t corners;
8092 rvec *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
8093 real skew_fac2_d, skew_fac_01;
8100 fprintf(debug, "Setting up DD communication\n");
8105 switch (fr->cutoff_scheme)
8114 gmx_incons("unimplemented");
8118 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8120 dim = dd->dim[dim_ind];
8122 /* Check if we need to use triclinic distances */
8123 tric_dist[dim_ind] = 0;
8124 for (i = 0; i <= dim_ind; i++)
8126 if (ddbox->tric_dir[dd->dim[i]])
8128 tric_dist[dim_ind] = 1;
8133 bBondComm = comm->bBondComm;
8135 /* Do we need to determine extra distances for multi-body bondeds? */
8136 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8138 /* Do we need to determine extra distances for only two-body bondeds? */
8139 bDist2B = (bBondComm && !bDistMB);
8141 r_comm2 = sqr(comm->cutoff);
8142 r_bcomm2 = sqr(comm->cutoff_mbody);
8146 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, sqrt(r_bcomm2));
8149 zones = &comm->zones;
8152 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8153 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8155 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8157 /* Triclinic stuff */
8158 normal = ddbox->normal;
8162 v_0 = ddbox->v[dim0];
8163 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8165 /* Determine the coupling coefficient for the distances
8166 * to the cell planes along dim0 and dim1 through dim2.
8167 * This is required for correct rounding.
8170 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8173 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8179 v_1 = ddbox->v[dim1];
8182 zone_cg_range = zones->cg_range;
8183 index_gl = dd->index_gl;
8184 cgindex = dd->cgindex;
8185 cginfo_mb = fr->cginfo_mb;
8187 zone_cg_range[0] = 0;
8188 zone_cg_range[1] = dd->ncg_home;
8189 comm->zone_ncg1[0] = dd->ncg_home;
8190 pos_cg = dd->ncg_home;
8192 nat_tot = dd->nat_home;
8194 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8196 dim = dd->dim[dim_ind];
8197 cd = &comm->cd[dim_ind];
8199 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8201 /* No pbc in this dimension, the first node should not comm. */
8209 v_d = ddbox->v[dim];
8210 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
8212 cd->bInPlace = TRUE;
8213 for (p = 0; p < cd->np; p++)
8215 /* Only atoms communicated in the first pulse are used
8216 * for multi-body bonded interactions or for bBondComm.
8218 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8223 for (zone = 0; zone < nzone_send; zone++)
8225 if (tric_dist[dim_ind] && dim_ind > 0)
8227 /* Determine slightly more optimized skew_fac's
8229 * This reduces the number of communicated atoms
8230 * by about 10% for 3D DD of rhombic dodecahedra.
8232 for (dimd = 0; dimd < dim; dimd++)
8234 sf2_round[dimd] = 1;
8235 if (ddbox->tric_dir[dimd])
8237 for (i = dd->dim[dimd]+1; i < DIM; i++)
8239 /* If we are shifted in dimension i
8240 * and the cell plane is tilted forward
8241 * in dimension i, skip this coupling.
8243 if (!(zones->shift[nzone+zone][i] &&
8244 ddbox->v[dimd][i][dimd] >= 0))
8247 sqr(ddbox->v[dimd][i][dimd]);
8250 sf2_round[dimd] = 1/sf2_round[dimd];
8255 zonei = zone_perm[dim_ind][zone];
8258 /* Here we permutate the zones to obtain a convenient order
8259 * for neighbor searching
8261 cg0 = zone_cg_range[zonei];
8262 cg1 = zone_cg_range[zonei+1];
8266 /* Look only at the cg's received in the previous grid pulse
8268 cg1 = zone_cg_range[nzone+zone+1];
8269 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8272 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8273 for (th = 0; th < comm->nth; th++)
8275 gmx_domdec_ind_t *ind_p;
8276 int **ibuf_p, *ibuf_nalloc_p;
8278 int *nsend_p, *nat_p;
8284 /* Thread 0 writes in the comm buffers */
8286 ibuf_p = &comm->buf_int;
8287 ibuf_nalloc_p = &comm->nalloc_int;
8288 vbuf_p = &comm->vbuf;
8291 nsend_zone_p = &ind->nsend[zone];
8295 /* Other threads write into temp buffers */
8296 ind_p = &comm->dth[th].ind;
8297 ibuf_p = &comm->dth[th].ibuf;
8298 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8299 vbuf_p = &comm->dth[th].vbuf;
8300 nsend_p = &comm->dth[th].nsend;
8301 nat_p = &comm->dth[th].nat;
8302 nsend_zone_p = &comm->dth[th].nsend_zone;
8304 comm->dth[th].nsend = 0;
8305 comm->dth[th].nat = 0;
8306 comm->dth[th].nsend_zone = 0;
8316 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8317 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8320 /* Get the cg's for this pulse in this zone */
8321 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8323 dim, dim_ind, dim0, dim1, dim2,
8326 normal, skew_fac2_d, skew_fac_01,
8327 v_d, v_0, v_1, &corners, sf2_round,
8328 bDistBonded, bBondComm,
8332 ibuf_p, ibuf_nalloc_p,
8338 /* Append data of threads>=1 to the communication buffers */
8339 for (th = 1; th < comm->nth; th++)
8341 dd_comm_setup_work_t *dth;
8344 dth = &comm->dth[th];
8346 ns1 = nsend + dth->nsend_zone;
8347 if (ns1 > ind->nalloc)
8349 ind->nalloc = over_alloc_dd(ns1);
8350 srenew(ind->index, ind->nalloc);
8352 if (ns1 > comm->nalloc_int)
8354 comm->nalloc_int = over_alloc_dd(ns1);
8355 srenew(comm->buf_int, comm->nalloc_int);
8357 if (ns1 > comm->vbuf.nalloc)
8359 comm->vbuf.nalloc = over_alloc_dd(ns1);
8360 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8363 for (i = 0; i < dth->nsend_zone; i++)
8365 ind->index[nsend] = dth->ind.index[i];
8366 comm->buf_int[nsend] = dth->ibuf[i];
8367 copy_rvec(dth->vbuf.v[i],
8368 comm->vbuf.v[nsend]);
8372 ind->nsend[zone] += dth->nsend_zone;
8375 /* Clear the counts in case we do not have pbc */
8376 for (zone = nzone_send; zone < nzone; zone++)
8378 ind->nsend[zone] = 0;
8380 ind->nsend[nzone] = nsend;
8381 ind->nsend[nzone+1] = nat;
8382 /* Communicate the number of cg's and atoms to receive */
8383 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8384 ind->nsend, nzone+2,
8385 ind->nrecv, nzone+2);
8387 /* The rvec buffer is also required for atom buffers of size nsend
8388 * in dd_move_x and dd_move_f.
8390 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8394 /* We can receive in place if only the last zone is not empty */
8395 for (zone = 0; zone < nzone-1; zone++)
8397 if (ind->nrecv[zone] > 0)
8399 cd->bInPlace = FALSE;
8404 /* The int buffer is only required here for the cg indices */
8405 if (ind->nrecv[nzone] > comm->nalloc_int2)
8407 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8408 srenew(comm->buf_int2, comm->nalloc_int2);
8410 /* The rvec buffer is also required for atom buffers
8411 * of size nrecv in dd_move_x and dd_move_f.
8413 i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8414 vec_rvec_check_alloc(&comm->vbuf2, i);
8418 /* Make space for the global cg indices */
8419 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8420 || dd->cg_nalloc == 0)
8422 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8423 srenew(index_gl, dd->cg_nalloc);
8424 srenew(cgindex, dd->cg_nalloc+1);
8426 /* Communicate the global cg indices */
8429 recv_i = index_gl + pos_cg;
8433 recv_i = comm->buf_int2;
8435 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8436 comm->buf_int, nsend,
8437 recv_i, ind->nrecv[nzone]);
8439 /* Make space for cg_cm */
8440 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8441 if (fr->cutoff_scheme == ecutsGROUP)
8449 /* Communicate cg_cm */
8452 recv_vr = cg_cm + pos_cg;
8456 recv_vr = comm->vbuf2.v;
8458 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8459 comm->vbuf.v, nsend,
8460 recv_vr, ind->nrecv[nzone]);
8462 /* Make the charge group index */
8465 zone = (p == 0 ? 0 : nzone - 1);
8466 while (zone < nzone)
8468 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8470 cg_gl = index_gl[pos_cg];
8471 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8472 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8473 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8476 /* Update the charge group presence,
8477 * so we can use it in the next pass of the loop.
8479 comm->bLocalCG[cg_gl] = TRUE;
8485 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8488 zone_cg_range[nzone+zone] = pos_cg;
8493 /* This part of the code is never executed with bBondComm. */
8494 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8495 index_gl, recv_i, cg_cm, recv_vr,
8496 cgindex, fr->cginfo_mb, fr->cginfo);
8497 pos_cg += ind->nrecv[nzone];
8499 nat_tot += ind->nrecv[nzone+1];
8503 /* Store the atom block for easy copying of communication buffers */
8504 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8508 dd->index_gl = index_gl;
8509 dd->cgindex = cgindex;
8511 dd->ncg_tot = zone_cg_range[zones->n];
8512 dd->nat_tot = nat_tot;
8513 comm->nat[ddnatHOME] = dd->nat_home;
8514 for (i = ddnatZONE; i < ddnatNR; i++)
8516 comm->nat[i] = dd->nat_tot;
8521 /* We don't need to update cginfo, since that was alrady done above.
8522 * So we pass NULL for the forcerec.
8524 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8525 NULL, comm->bLocalCG);
8530 fprintf(debug, "Finished setting up DD communication, zones:");
8531 for (c = 0; c < zones->n; c++)
8533 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8535 fprintf(debug, "\n");
8539 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8543 for (c = 0; c < zones->nizone; c++)
8545 zones->izone[c].cg1 = zones->cg_range[c+1];
8546 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8547 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8551 static void set_zones_size(gmx_domdec_t *dd,
8552 matrix box, const gmx_ddbox_t *ddbox,
8553 int zone_start, int zone_end)
8555 gmx_domdec_comm_t *comm;
8556 gmx_domdec_zones_t *zones;
8558 int z, zi, zj0, zj1, d, dim;
8561 real size_j, add_tric;
8566 zones = &comm->zones;
8568 /* Do we need to determine extra distances for multi-body bondeds? */
8569 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8571 for (z = zone_start; z < zone_end; z++)
8573 /* Copy cell limits to zone limits.
8574 * Valid for non-DD dims and non-shifted dims.
8576 copy_rvec(comm->cell_x0, zones->size[z].x0);
8577 copy_rvec(comm->cell_x1, zones->size[z].x1);
8580 for (d = 0; d < dd->ndim; d++)
8584 for (z = 0; z < zones->n; z++)
8586 /* With a staggered grid we have different sizes
8587 * for non-shifted dimensions.
8589 if (dd->bGridJump && zones->shift[z][dim] == 0)
8593 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8594 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8598 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8599 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8605 rcmbs = comm->cutoff_mbody;
8606 if (ddbox->tric_dir[dim])
8608 rcs /= ddbox->skew_fac[dim];
8609 rcmbs /= ddbox->skew_fac[dim];
8612 /* Set the lower limit for the shifted zone dimensions */
8613 for (z = zone_start; z < zone_end; z++)
8615 if (zones->shift[z][dim] > 0)
8618 if (!dd->bGridJump || d == 0)
8620 zones->size[z].x0[dim] = comm->cell_x1[dim];
8621 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8625 /* Here we take the lower limit of the zone from
8626 * the lowest domain of the zone below.
8630 zones->size[z].x0[dim] =
8631 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8637 zones->size[z].x0[dim] =
8638 zones->size[zone_perm[2][z-4]].x0[dim];
8642 zones->size[z].x0[dim] =
8643 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8646 /* A temporary limit, is updated below */
8647 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8651 for (zi = 0; zi < zones->nizone; zi++)
8653 if (zones->shift[zi][dim] == 0)
8655 /* This takes the whole zone into account.
8656 * With multiple pulses this will lead
8657 * to a larger zone then strictly necessary.
8659 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8660 zones->size[zi].x1[dim]+rcmbs);
8668 /* Loop over the i-zones to set the upper limit of each
8671 for (zi = 0; zi < zones->nizone; zi++)
8673 if (zones->shift[zi][dim] == 0)
8675 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8677 if (zones->shift[z][dim] > 0)
8679 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8680 zones->size[zi].x1[dim]+rcs);
8687 for (z = zone_start; z < zone_end; z++)
8689 /* Initialization only required to keep the compiler happy */
8690 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8693 /* To determine the bounding box for a zone we need to find
8694 * the extreme corners of 4, 2 or 1 corners.
8696 nc = 1 << (ddbox->npbcdim - 1);
8698 for (c = 0; c < nc; c++)
8700 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8704 corner[YY] = zones->size[z].x0[YY];
8708 corner[YY] = zones->size[z].x1[YY];
8712 corner[ZZ] = zones->size[z].x0[ZZ];
8716 corner[ZZ] = zones->size[z].x1[ZZ];
8718 if (dd->ndim == 1 && box[ZZ][YY] != 0)
8720 /* With 1D domain decomposition the cg's are not in
8721 * the triclinic box, but triclinic x-y and rectangular y-z.
8722 * Shift y back, so it will later end up at 0.
8724 corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
8726 /* Apply the triclinic couplings */
8727 for (i = YY; i < ddbox->npbcdim; i++)
8729 for (j = XX; j < i; j++)
8731 corner[j] += corner[i]*box[i][j]/box[i][i];
8736 copy_rvec(corner, corner_min);
8737 copy_rvec(corner, corner_max);
8741 for (i = 0; i < DIM; i++)
8743 corner_min[i] = min(corner_min[i], corner[i]);
8744 corner_max[i] = max(corner_max[i], corner[i]);
8748 /* Copy the extreme cornes without offset along x */
8749 for (i = 0; i < DIM; i++)
8751 zones->size[z].bb_x0[i] = corner_min[i];
8752 zones->size[z].bb_x1[i] = corner_max[i];
8754 /* Add the offset along x */
8755 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8756 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8759 if (zone_start == 0)
8762 for (dim = 0; dim < DIM; dim++)
8764 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8766 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
8771 for (z = zone_start; z < zone_end; z++)
8773 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8775 zones->size[z].x0[XX], zones->size[z].x1[XX],
8776 zones->size[z].x0[YY], zones->size[z].x1[YY],
8777 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8778 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8780 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8781 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8782 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8787 static int comp_cgsort(const void *a, const void *b)
8791 gmx_cgsort_t *cga, *cgb;
8792 cga = (gmx_cgsort_t *)a;
8793 cgb = (gmx_cgsort_t *)b;
8795 comp = cga->nsc - cgb->nsc;
8798 comp = cga->ind_gl - cgb->ind_gl;
8804 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8809 /* Order the data */
8810 for (i = 0; i < n; i++)
8812 buf[i] = a[sort[i].ind];
8815 /* Copy back to the original array */
8816 for (i = 0; i < n; i++)
8822 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8827 /* Order the data */
8828 for (i = 0; i < n; i++)
8830 copy_rvec(v[sort[i].ind], buf[i]);
8833 /* Copy back to the original array */
8834 for (i = 0; i < n; i++)
8836 copy_rvec(buf[i], v[i]);
8840 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8843 int a, atot, cg, cg0, cg1, i;
8845 if (cgindex == NULL)
8847 /* Avoid the useless loop of the atoms within a cg */
8848 order_vec_cg(ncg, sort, v, buf);
8853 /* Order the data */
8855 for (cg = 0; cg < ncg; cg++)
8857 cg0 = cgindex[sort[cg].ind];
8858 cg1 = cgindex[sort[cg].ind+1];
8859 for (i = cg0; i < cg1; i++)
8861 copy_rvec(v[i], buf[a]);
8867 /* Copy back to the original array */
8868 for (a = 0; a < atot; a++)
8870 copy_rvec(buf[a], v[a]);
8874 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8875 int nsort_new, gmx_cgsort_t *sort_new,
8876 gmx_cgsort_t *sort1)
8880 /* The new indices are not very ordered, so we qsort them */
8881 qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8883 /* sort2 is already ordered, so now we can merge the two arrays */
8887 while (i2 < nsort2 || i_new < nsort_new)
8891 sort1[i1++] = sort_new[i_new++];
8893 else if (i_new == nsort_new)
8895 sort1[i1++] = sort2[i2++];
8897 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8898 (sort2[i2].nsc == sort_new[i_new].nsc &&
8899 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8901 sort1[i1++] = sort2[i2++];
8905 sort1[i1++] = sort_new[i_new++];
8910 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8912 gmx_domdec_sort_t *sort;
8913 gmx_cgsort_t *cgsort, *sort_i;
8914 int ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
8915 int sort_last, sort_skip;
8917 sort = dd->comm->sort;
8919 a = fr->ns.grid->cell_index;
8921 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns.grid->ncells;
8923 if (ncg_home_old >= 0)
8925 /* The charge groups that remained in the same ns grid cell
8926 * are completely ordered. So we can sort efficiently by sorting
8927 * the charge groups that did move into the stationary list.
8932 for (i = 0; i < dd->ncg_home; i++)
8934 /* Check if this cg did not move to another node */
8937 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8939 /* This cg is new on this node or moved ns grid cell */
8940 if (nsort_new >= sort->sort_new_nalloc)
8942 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8943 srenew(sort->sort_new, sort->sort_new_nalloc);
8945 sort_i = &(sort->sort_new[nsort_new++]);
8949 /* This cg did not move */
8950 sort_i = &(sort->sort2[nsort2++]);
8952 /* Sort on the ns grid cell indices
8953 * and the global topology index.
8954 * index_gl is irrelevant with cell ns,
8955 * but we set it here anyhow to avoid a conditional.
8958 sort_i->ind_gl = dd->index_gl[i];
8965 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8968 /* Sort efficiently */
8969 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8974 cgsort = sort->sort;
8976 for (i = 0; i < dd->ncg_home; i++)
8978 /* Sort on the ns grid cell indices
8979 * and the global topology index
8981 cgsort[i].nsc = a[i];
8982 cgsort[i].ind_gl = dd->index_gl[i];
8984 if (cgsort[i].nsc < moved)
8991 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
8993 /* Determine the order of the charge groups using qsort */
8994 qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
9000 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
9003 int ncg_new, i, *a, na;
9005 sort = dd->comm->sort->sort;
9007 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
9010 for (i = 0; i < na; i++)
9014 sort[ncg_new].ind = a[i];
9022 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
9025 gmx_domdec_sort_t *sort;
9026 gmx_cgsort_t *cgsort, *sort_i;
9028 int ncg_new, i, *ibuf, cgsize;
9031 sort = dd->comm->sort;
9033 if (dd->ncg_home > sort->sort_nalloc)
9035 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
9036 srenew(sort->sort, sort->sort_nalloc);
9037 srenew(sort->sort2, sort->sort_nalloc);
9039 cgsort = sort->sort;
9041 switch (fr->cutoff_scheme)
9044 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
9047 ncg_new = dd_sort_order_nbnxn(dd, fr);
9050 gmx_incons("unimplemented");
9054 /* We alloc with the old size, since cgindex is still old */
9055 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9056 vbuf = dd->comm->vbuf.v;
9060 cgindex = dd->cgindex;
9067 /* Remove the charge groups which are no longer at home here */
9068 dd->ncg_home = ncg_new;
9071 fprintf(debug, "Set the new home charge group count to %d\n",
9075 /* Reorder the state */
9076 for (i = 0; i < estNR; i++)
9078 if (EST_DISTR(i) && (state->flags & (1<<i)))
9083 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
9086 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
9089 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->sd_X, vbuf);
9092 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
9096 case estDISRE_INITF:
9097 case estDISRE_RM3TAV:
9098 case estORIRE_INITF:
9100 /* No ordering required */
9103 gmx_incons("Unknown state entry encountered in dd_sort_state");
9108 if (fr->cutoff_scheme == ecutsGROUP)
9111 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9114 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9116 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9117 srenew(sort->ibuf, sort->ibuf_nalloc);
9120 /* Reorder the global cg index */
9121 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9122 /* Reorder the cginfo */
9123 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9124 /* Rebuild the local cg index */
9128 for (i = 0; i < dd->ncg_home; i++)
9130 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9131 ibuf[i+1] = ibuf[i] + cgsize;
9133 for (i = 0; i < dd->ncg_home+1; i++)
9135 dd->cgindex[i] = ibuf[i];
9140 for (i = 0; i < dd->ncg_home+1; i++)
9145 /* Set the home atom number */
9146 dd->nat_home = dd->cgindex[dd->ncg_home];
9148 if (fr->cutoff_scheme == ecutsVERLET)
9150 /* The atoms are now exactly in grid order, update the grid order */
9151 nbnxn_set_atomorder(fr->nbv->nbs);
9155 /* Copy the sorted ns cell indices back to the ns grid struct */
9156 for (i = 0; i < dd->ncg_home; i++)
9158 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
9160 fr->ns.grid->nr = dd->ncg_home;
9164 static void add_dd_statistics(gmx_domdec_t *dd)
9166 gmx_domdec_comm_t *comm;
9171 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9173 comm->sum_nat[ddnat-ddnatZONE] +=
9174 comm->nat[ddnat] - comm->nat[ddnat-1];
9179 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9181 gmx_domdec_comm_t *comm;
9186 /* Reset all the statistics and counters for total run counting */
9187 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9189 comm->sum_nat[ddnat-ddnatZONE] = 0;
9193 comm->load_step = 0;
9196 clear_ivec(comm->load_lim);
9201 void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
9203 gmx_domdec_comm_t *comm;
9207 comm = cr->dd->comm;
9209 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9216 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9218 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9220 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9225 " av. #atoms communicated per step for force: %d x %.1f\n",
9229 if (cr->dd->vsite_comm)
9232 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9233 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9238 if (cr->dd->constraint_comm)
9241 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9242 1 + ir->nLincsIter, av);
9246 gmx_incons(" Unknown type for DD statistics");
9249 fprintf(fplog, "\n");
9251 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9253 print_dd_load_av(fplog, cr->dd);
9257 void dd_partition_system(FILE *fplog,
9258 gmx_large_int_t step,
9260 gmx_bool bMasterState,
9262 t_state *state_global,
9263 gmx_mtop_t *top_global,
9265 t_state *state_local,
9268 gmx_localtop_t *top_local,
9271 gmx_shellfc_t shellfc,
9272 gmx_constr_t constr,
9274 gmx_wallcycle_t wcycle,
9278 gmx_domdec_comm_t *comm;
9279 gmx_ddbox_t ddbox = {0};
9281 gmx_large_int_t step_pcoupl;
9282 rvec cell_ns_x0, cell_ns_x1;
9283 int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9284 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
9285 gmx_bool bRedist, bSortCG, bResortAll;
9286 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9293 bBoxChanged = (bMasterState || DEFORM(*ir));
9294 if (ir->epc != epcNO)
9296 /* With nstpcouple > 1 pressure coupling happens.
9297 * one step after calculating the pressure.
9298 * Box scaling happens at the end of the MD step,
9299 * after the DD partitioning.
9300 * We therefore have to do DLB in the first partitioning
9301 * after an MD step where P-coupling occured.
9302 * We need to determine the last step in which p-coupling occurred.
9303 * MRS -- need to validate this for vv?
9308 step_pcoupl = step - 1;
9312 step_pcoupl = ((step - 1)/n)*n + 1;
9314 if (step_pcoupl >= comm->partition_step)
9320 bNStGlobalComm = (step % nstglobalcomm == 0);
9322 if (!comm->bDynLoadBal)
9328 /* Should we do dynamic load balacing this step?
9329 * Since it requires (possibly expensive) global communication,
9330 * we might want to do DLB less frequently.
9332 if (bBoxChanged || ir->epc != epcNO)
9334 bDoDLB = bBoxChanged;
9338 bDoDLB = bNStGlobalComm;
9342 /* Check if we have recorded loads on the nodes */
9343 if (comm->bRecordLoad && dd_load_count(comm))
9345 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
9347 /* Check if we should use DLB at the second partitioning
9348 * and every 100 partitionings,
9349 * so the extra communication cost is negligible.
9351 n = max(100, nstglobalcomm);
9352 bCheckDLB = (comm->n_load_collect == 0 ||
9353 comm->n_load_have % n == n-1);
9360 /* Print load every nstlog, first and last step to the log file */
9361 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9362 comm->n_load_collect == 0 ||
9364 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9366 /* Avoid extra communication due to verbose screen output
9367 * when nstglobalcomm is set.
9369 if (bDoDLB || bLogLoad || bCheckDLB ||
9370 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9372 get_load_distribution(dd, wcycle);
9377 dd_print_load(fplog, dd, step-1);
9381 dd_print_load_verbose(dd);
9384 comm->n_load_collect++;
9388 /* Since the timings are node dependent, the master decides */
9392 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS);
9395 fprintf(debug, "step %s, imb loss %f\n",
9396 gmx_step_str(step, sbuf),
9397 dd_force_imb_perf_loss(dd));
9400 dd_bcast(dd, sizeof(bTurnOnDLB), &bTurnOnDLB);
9403 turn_on_dlb(fplog, cr, step);
9408 comm->n_load_have++;
9411 cgs_gl = &comm->cgs_gl;
9416 /* Clear the old state */
9417 clear_dd_indices(dd, 0, 0);
9420 set_ddbox(dd, bMasterState, cr, ir, state_global->box,
9421 TRUE, cgs_gl, state_global->x, &ddbox);
9423 get_cg_distribution(fplog, step, dd, cgs_gl,
9424 state_global->box, &ddbox, state_global->x);
9426 dd_distribute_state(dd, cgs_gl,
9427 state_global, state_local, f);
9429 dd_make_local_cgs(dd, &top_local->cgs);
9431 /* Ensure that we have space for the new distribution */
9432 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9434 if (fr->cutoff_scheme == ecutsGROUP)
9436 calc_cgcm(fplog, 0, dd->ncg_home,
9437 &top_local->cgs, state_local->x, fr->cg_cm);
9440 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9442 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9444 else if (state_local->ddp_count != dd->ddp_count)
9446 if (state_local->ddp_count > dd->ddp_count)
9448 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9451 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9453 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9456 /* Clear the old state */
9457 clear_dd_indices(dd, 0, 0);
9459 /* Build the new indices */
9460 rebuild_cgindex(dd, cgs_gl->index, state_local);
9461 make_dd_indices(dd, cgs_gl->index, 0);
9462 ncgindex_set = dd->ncg_home;
9464 if (fr->cutoff_scheme == ecutsGROUP)
9466 /* Redetermine the cg COMs */
9467 calc_cgcm(fplog, 0, dd->ncg_home,
9468 &top_local->cgs, state_local->x, fr->cg_cm);
9471 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9473 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9475 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9476 TRUE, &top_local->cgs, state_local->x, &ddbox);
9478 bRedist = comm->bDynLoadBal;
9482 /* We have the full state, only redistribute the cgs */
9484 /* Clear the non-home indices */
9485 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9488 /* Avoid global communication for dim's without pbc and -gcom */
9489 if (!bNStGlobalComm)
9491 copy_rvec(comm->box0, ddbox.box0 );
9492 copy_rvec(comm->box_size, ddbox.box_size);
9494 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9495 bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
9500 /* For dim's without pbc and -gcom */
9501 copy_rvec(ddbox.box0, comm->box0 );
9502 copy_rvec(ddbox.box_size, comm->box_size);
9504 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9507 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9509 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9512 /* Check if we should sort the charge groups */
9513 if (comm->nstSortCG > 0)
9515 bSortCG = (bMasterState ||
9516 (bRedist && (step % comm->nstSortCG == 0)));
9523 ncg_home_old = dd->ncg_home;
9528 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9530 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9532 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9534 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9537 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9539 &comm->cell_x0, &comm->cell_x1,
9540 dd->ncg_home, fr->cg_cm,
9541 cell_ns_x0, cell_ns_x1, &grid_density);
9545 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9548 switch (fr->cutoff_scheme)
9551 copy_ivec(fr->ns.grid->n, ncells_old);
9552 grid_first(fplog, fr->ns.grid, dd, &ddbox,
9553 state_local->box, cell_ns_x0, cell_ns_x1,
9554 fr->rlistlong, grid_density);
9557 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9560 gmx_incons("unimplemented");
9562 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9563 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9567 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9569 /* Sort the state on charge group position.
9570 * This enables exact restarts from this step.
9571 * It also improves performance by about 15% with larger numbers
9572 * of atoms per node.
9575 /* Fill the ns grid with the home cell,
9576 * so we can sort with the indices.
9578 set_zones_ncg_home(dd);
9580 switch (fr->cutoff_scheme)
9583 set_zones_size(dd, state_local->box, &ddbox, 0, 1);
9585 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9587 comm->zones.size[0].bb_x0,
9588 comm->zones.size[0].bb_x1,
9590 comm->zones.dens_zone0,
9593 ncg_moved, bRedist ? comm->moved : NULL,
9594 fr->nbv->grp[eintLocal].kernel_type,
9595 fr->nbv->grp[eintLocal].nbat);
9597 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9600 fill_grid(&comm->zones, fr->ns.grid, dd->ncg_home,
9601 0, dd->ncg_home, fr->cg_cm);
9603 copy_ivec(fr->ns.grid->n, ncells_new);
9606 gmx_incons("unimplemented");
9609 bResortAll = bMasterState;
9611 /* Check if we can user the old order and ns grid cell indices
9612 * of the charge groups to sort the charge groups efficiently.
9614 if (ncells_new[XX] != ncells_old[XX] ||
9615 ncells_new[YY] != ncells_old[YY] ||
9616 ncells_new[ZZ] != ncells_old[ZZ])
9623 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9624 gmx_step_str(step, sbuf), dd->ncg_home);
9626 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9627 bResortAll ? -1 : ncg_home_old);
9628 /* Rebuild all the indices */
9629 ga2la_clear(dd->ga2la);
9632 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9635 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9637 /* Setup up the communication and communicate the coordinates */
9638 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9640 /* Set the indices */
9641 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9643 /* Set the charge group boundaries for neighbor searching */
9644 set_cg_boundaries(&comm->zones);
9646 if (fr->cutoff_scheme == ecutsVERLET)
9648 set_zones_size(dd, state_local->box, &ddbox,
9649 bSortCG ? 1 : 0, comm->zones.n);
9652 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9655 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9656 -1,state_local->x,state_local->box);
9659 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9661 /* Extract a local topology from the global topology */
9662 for (i = 0; i < dd->ndim; i++)
9664 np[dd->dim[i]] = comm->cd[i].np;
9666 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9667 comm->cellsize_min, np,
9669 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
9670 vsite, top_global, top_local);
9672 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9674 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9676 /* Set up the special atom communication */
9677 n = comm->nat[ddnatZONE];
9678 for (i = ddnatZONE+1; i < ddnatNR; i++)
9683 if (vsite && vsite->n_intercg_vsite)
9685 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9689 if (dd->bInterCGcons || dd->bInterCGsettles)
9691 /* Only for inter-cg constraints we need special code */
9692 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9693 constr, ir->nProjOrder,
9694 top_local->idef.il);
9698 gmx_incons("Unknown special atom type setup");
9703 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9705 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9707 /* Make space for the extra coordinates for virtual site
9708 * or constraint communication.
9710 state_local->natoms = comm->nat[ddnatNR-1];
9711 if (state_local->natoms > state_local->nalloc)
9713 dd_realloc_state(state_local, f, state_local->natoms);
9716 if (fr->bF_NoVirSum)
9718 if (vsite && vsite->n_intercg_vsite)
9720 nat_f_novirsum = comm->nat[ddnatVSITE];
9724 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9726 nat_f_novirsum = dd->nat_tot;
9730 nat_f_novirsum = dd->nat_home;
9739 /* Set the number of atoms required for the force calculation.
9740 * Forces need to be constrained when using a twin-range setup
9741 * or with energy minimization. For simple simulations we could
9742 * avoid some allocation, zeroing and copying, but this is
9743 * probably not worth the complications ande checking.
9745 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9746 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9748 /* We make the all mdatoms up to nat_tot_con.
9749 * We could save some work by only setting invmass
9750 * between nat_tot and nat_tot_con.
9752 /* This call also sets the new number of home particles to dd->nat_home */
9753 atoms2md(top_global, ir,
9754 comm->nat[ddnatCON], dd->gatindex, 0, dd->nat_home, mdatoms);
9756 /* Now we have the charges we can sort the FE interactions */
9757 dd_sort_local_top(dd, mdatoms, top_local);
9761 /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
9762 split_vsites_over_threads(top_local->idef.il, mdatoms, FALSE, vsite);
9767 /* Make the local shell stuff, currently no communication is done */
9768 make_local_shells(cr, mdatoms, shellfc);
9771 if (ir->implicit_solvent)
9773 make_local_gb(cr, fr->born, ir->gb_algorithm);
9776 setup_bonded_threading(fr, &top_local->idef);
9778 if (!(cr->duty & DUTY_PME))
9780 /* Send the charges to our PME only node */
9781 gmx_pme_send_q(cr, mdatoms->nChargePerturbed,
9782 mdatoms->chargeA, mdatoms->chargeB,
9783 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9788 set_constraints(constr, top_local, ir, mdatoms, cr);
9791 if (ir->ePull != epullNO)
9793 /* Update the local pull groups */
9794 dd_make_local_pull_groups(dd, ir->pull, mdatoms);
9799 /* Update the local rotation groups */
9800 dd_make_local_rotation_groups(dd, ir->rot);
9804 add_dd_statistics(dd);
9806 /* Make sure we only count the cycles for this DD partitioning */
9807 clear_dd_cycle_counts(dd);
9809 /* Because the order of the atoms might have changed since
9810 * the last vsite construction, we need to communicate the constructing
9811 * atom coordinates again (for spreading the forces this MD step).
9813 dd_move_x_vsites(dd, state_local->box, state_local->x);
9815 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9817 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9819 dd_move_x(dd, state_local->box, state_local->x);
9820 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9821 -1, state_local->x, state_local->box);
9824 /* Store the partitioning step */
9825 comm->partition_step = step;
9827 /* Increase the DD partitioning counter */
9829 /* The state currently matches this DD partitioning count, store it */
9830 state_local->ddp_count = dd->ddp_count;
9833 /* The DD master node knows the complete cg distribution,
9834 * store the count so we can possibly skip the cg info communication.
9836 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9839 if (comm->DD_debug > 0)
9841 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9842 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9843 "after partitioning");