2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
47 #include "gmx_fatal.h"
48 #include "gmx_fatal_collective.h"
51 #include "domdec_network.h"
54 #include "chargegroup.h"
61 #include "pull_rotation.h"
65 #include "mtop_util.h"
66 #include "gmx_ga2la.h"
68 #include "nbnxn_search.h"
70 #include "gmx_omp_nthreads.h"
71 #include "gpu_utils.h"
73 #include "gromacs/fileio/futil.h"
74 #include "gromacs/fileio/gmxfio.h"
75 #include "gromacs/fileio/pdbio.h"
76 #include "gromacs/timing/wallcycle.h"
77 #include "gromacs/utility/gmxmpi.h"
78 #include "gromacs/utility/qsort_threadsafe.h"
80 #define DDRANK(dd, rank) (rank)
81 #define DDMASTERRANK(dd) (dd->masterrank)
83 typedef struct gmx_domdec_master
85 /* The cell boundaries */
87 /* The global charge group division */
88 int *ncg; /* Number of home charge groups for each node */
89 int *index; /* Index of nnodes+1 into cg */
90 int *cg; /* Global charge group index */
91 int *nat; /* Number of home atoms for each node. */
92 int *ibuf; /* Buffer for communication */
93 rvec *vbuf; /* Buffer for state scattering and gathering */
94 } gmx_domdec_master_t;
98 /* The numbers of charge groups to send and receive for each cell
99 * that requires communication, the last entry contains the total
100 * number of atoms that needs to be communicated.
102 int nsend[DD_MAXIZONE+2];
103 int nrecv[DD_MAXIZONE+2];
104 /* The charge groups to send */
107 /* The atom range for non-in-place communication */
108 int cell2at0[DD_MAXIZONE];
109 int cell2at1[DD_MAXIZONE];
114 int np; /* Number of grid pulses in this dimension */
115 int np_dlb; /* For dlb, for use with edlbAUTO */
116 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
118 gmx_bool bInPlace; /* Can we communicate in place? */
119 } gmx_domdec_comm_dim_t;
123 gmx_bool *bCellMin; /* Temp. var.: is this cell size at the limit */
124 real *cell_f; /* State var.: cell boundaries, box relative */
125 real *old_cell_f; /* Temp. var.: old cell size */
126 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
127 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
128 real *bound_min; /* Temp. var.: lower limit for cell boundary */
129 real *bound_max; /* Temp. var.: upper limit for cell boundary */
130 gmx_bool bLimited; /* State var.: is DLB limited in this dim and row */
131 real *buf_ncd; /* Temp. var. */
134 #define DD_NLOAD_MAX 9
136 /* Here floats are accurate enough, since these variables
137 * only influence the load balancing, not the actual MD results.
164 gmx_cgsort_t *sort_new;
176 /* This enum determines the order of the coordinates.
177 * ddnatHOME and ddnatZONE should be first and second,
178 * the others can be ordered as wanted.
181 ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR
185 edlbAUTO, edlbNO, edlbYES, edlbNR
187 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
191 int dim; /* The dimension */
192 gmx_bool dim_match; /* Tells if DD and PME dims match */
193 int nslab; /* The number of PME slabs in this dimension */
194 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
195 int *pp_min; /* The minimum pp node location, size nslab */
196 int *pp_max; /* The maximum pp node location,size nslab */
197 int maxshift; /* The maximum shift for coordinate redistribution in PME */
202 real min0; /* The minimum bottom of this zone */
203 real max1; /* The maximum top of this zone */
204 real min1; /* The minimum top of this zone */
205 real mch0; /* The maximum bottom communicaton height for this zone */
206 real mch1; /* The maximum top communicaton height for this zone */
207 real p1_0; /* The bottom value of the first cell in this zone */
208 real p1_1; /* The top value of the first cell in this zone */
213 gmx_domdec_ind_t ind;
220 } dd_comm_setup_work_t;
222 typedef struct gmx_domdec_comm
224 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
225 * unless stated otherwise.
228 /* The number of decomposition dimensions for PME, 0: no PME */
230 /* The number of nodes doing PME (PP/PME or only PME) */
234 /* The communication setup including the PME only nodes */
235 gmx_bool bCartesianPP_PME;
238 int *pmenodes; /* size npmenodes */
239 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
240 * but with bCartesianPP_PME */
241 gmx_ddpme_t ddpme[2];
243 /* The DD particle-particle nodes only */
244 gmx_bool bCartesianPP;
245 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
247 /* The global charge groups */
250 /* Should we sort the cgs */
252 gmx_domdec_sort_t *sort;
254 /* Are there charge groups? */
257 /* Are there bonded and multi-body interactions between charge groups? */
258 gmx_bool bInterCGBondeds;
259 gmx_bool bInterCGMultiBody;
261 /* Data for the optional bonded interaction atom communication range */
268 /* Are we actually using DLB? */
269 gmx_bool bDynLoadBal;
271 /* Cell sizes for static load balancing, first index cartesian */
274 /* The width of the communicated boundaries */
277 /* The minimum cell size (including triclinic correction) */
279 /* For dlb, for use with edlbAUTO */
280 rvec cellsize_min_dlb;
281 /* The lower limit for the DD cell size with DLB */
283 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
284 gmx_bool bVacDLBNoLimit;
286 /* With PME load balancing we set limits on DLB */
287 gmx_bool bPMELoadBalDLBLimits;
288 /* DLB needs to take into account that we want to allow this maximum
289 * cut-off (for PME load balancing), this could limit cell boundaries.
291 real PMELoadBal_max_cutoff;
293 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
295 /* box0 and box_size are required with dim's without pbc and -gcom */
299 /* The cell boundaries */
303 /* The old location of the cell boundaries, to check cg displacements */
307 /* The communication setup and charge group boundaries for the zones */
308 gmx_domdec_zones_t zones;
310 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
311 * cell boundaries of neighboring cells for dynamic load balancing.
313 gmx_ddzone_t zone_d1[2];
314 gmx_ddzone_t zone_d2[2][2];
316 /* The coordinate/force communication setup and indices */
317 gmx_domdec_comm_dim_t cd[DIM];
318 /* The maximum number of cells to communicate with in one dimension */
321 /* Which cg distribution is stored on the master node */
322 int master_cg_ddp_count;
324 /* The number of cg's received from the direct neighbors */
325 int zone_ncg1[DD_MAXZONE];
327 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
330 /* Array for signalling if atoms have moved to another domain */
334 /* Communication buffer for general use */
338 /* Communication buffer for general use */
341 /* Temporary storage for thread parallel communication setup */
343 dd_comm_setup_work_t *dth;
345 /* Communication buffers only used with multiple grid pulses */
350 /* Communication buffers for local redistribution */
352 int cggl_flag_nalloc[DIM*2];
354 int cgcm_state_nalloc[DIM*2];
356 /* Cell sizes for dynamic load balancing */
357 gmx_domdec_root_t **root;
361 real cell_f_max0[DIM];
362 real cell_f_min1[DIM];
364 /* Stuff for load communication */
365 gmx_bool bRecordLoad;
366 gmx_domdec_load_t *load;
367 int nrank_gpu_shared;
369 MPI_Comm *mpi_comm_load;
370 MPI_Comm mpi_comm_gpu_shared;
373 /* Maximum DLB scaling per load balancing step in percent */
377 float cycl[ddCyclNr];
378 int cycl_n[ddCyclNr];
379 float cycl_max[ddCyclNr];
380 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
384 /* Have often have did we have load measurements */
386 /* Have often have we collected the load measurements */
390 double sum_nat[ddnatNR-ddnatZONE];
400 /* The last partition step */
401 gmx_int64_t partition_step;
409 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
412 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
413 #define DD_FLAG_NRCG 65535
414 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
415 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
417 /* Zone permutation required to obtain consecutive charge groups
418 * for neighbor searching.
420 static const int zone_perm[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
422 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
423 * components see only j zones with that component 0.
426 /* The DD zone order */
427 static const ivec dd_zo[DD_MAXZONE] =
428 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
433 static const ivec dd_zp3[dd_zp3n] = {{0, 0, 8}, {1, 3, 6}, {2, 5, 6}, {3, 5, 7}};
438 static const ivec dd_zp2[dd_zp2n] = {{0, 0, 4}, {1, 3, 4}};
443 static const ivec dd_zp1[dd_zp1n] = {{0, 0, 2}};
445 /* Factors used to avoid problems due to rounding issues */
446 #define DD_CELL_MARGIN 1.0001
447 #define DD_CELL_MARGIN2 1.00005
448 /* Factor to account for pressure scaling during nstlist steps */
449 #define DD_PRES_SCALE_MARGIN 1.02
451 /* Allowed performance loss before we DLB or warn */
452 #define DD_PERF_LOSS 0.05
454 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
456 /* Use separate MPI send and receive commands
457 * when nnodes <= GMX_DD_NNODES_SENDRECV.
458 * This saves memory (and some copying for small nnodes).
459 * For high parallelization scatter and gather calls are used.
461 #define GMX_DD_NNODES_SENDRECV 4
465 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
467 static void index2xyz(ivec nc,int ind,ivec xyz)
469 xyz[XX] = ind % nc[XX];
470 xyz[YY] = (ind / nc[XX]) % nc[YY];
471 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
475 /* This order is required to minimize the coordinate communication in PME
476 * which uses decomposition in the x direction.
478 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
480 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
482 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
483 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
484 xyz[ZZ] = ind % nc[ZZ];
487 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
492 ddindex = dd_index(dd->nc, c);
493 if (dd->comm->bCartesianPP_PME)
495 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
497 else if (dd->comm->bCartesianPP)
500 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
511 static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox, t_inputrec *ir)
513 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
516 int ddglatnr(gmx_domdec_t *dd, int i)
526 if (i >= dd->comm->nat[ddnatNR-1])
528 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
530 atnr = dd->gatindex[i] + 1;
536 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
538 return &dd->comm->cgs_gl;
541 static void vec_rvec_init(vec_rvec_t *v)
547 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
551 v->nalloc = over_alloc_dd(n);
552 srenew(v->v, v->nalloc);
556 void dd_store_state(gmx_domdec_t *dd, t_state *state)
560 if (state->ddp_count != dd->ddp_count)
562 gmx_incons("The state does not the domain decomposition state");
565 state->ncg_gl = dd->ncg_home;
566 if (state->ncg_gl > state->cg_gl_nalloc)
568 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
569 srenew(state->cg_gl, state->cg_gl_nalloc);
571 for (i = 0; i < state->ncg_gl; i++)
573 state->cg_gl[i] = dd->index_gl[i];
576 state->ddp_count_cg_gl = dd->ddp_count;
579 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
581 return &dd->comm->zones;
584 void dd_get_ns_ranges(gmx_domdec_t *dd, int icg,
585 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
587 gmx_domdec_zones_t *zones;
590 zones = &dd->comm->zones;
593 while (icg >= zones->izone[izone].cg1)
602 else if (izone < zones->nizone)
604 *jcg0 = zones->izone[izone].jcg0;
608 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
609 icg, izone, zones->nizone);
612 *jcg1 = zones->izone[izone].jcg1;
614 for (d = 0; d < dd->ndim; d++)
617 shift0[dim] = zones->izone[izone].shift0[dim];
618 shift1[dim] = zones->izone[izone].shift1[dim];
619 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
621 /* A conservative approach, this can be optimized */
628 int dd_natoms_vsite(gmx_domdec_t *dd)
630 return dd->comm->nat[ddnatVSITE];
633 void dd_get_constraint_range(gmx_domdec_t *dd, int *at_start, int *at_end)
635 *at_start = dd->comm->nat[ddnatCON-1];
636 *at_end = dd->comm->nat[ddnatCON];
639 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
641 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
642 int *index, *cgindex;
643 gmx_domdec_comm_t *comm;
644 gmx_domdec_comm_dim_t *cd;
645 gmx_domdec_ind_t *ind;
646 rvec shift = {0, 0, 0}, *buf, *rbuf;
647 gmx_bool bPBC, bScrew;
651 cgindex = dd->cgindex;
656 nat_tot = dd->nat_home;
657 for (d = 0; d < dd->ndim; d++)
659 bPBC = (dd->ci[dd->dim[d]] == 0);
660 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
663 copy_rvec(box[dd->dim[d]], shift);
666 for (p = 0; p < cd->np; p++)
673 for (i = 0; i < ind->nsend[nzone]; i++)
675 at0 = cgindex[index[i]];
676 at1 = cgindex[index[i]+1];
677 for (j = at0; j < at1; j++)
679 copy_rvec(x[j], buf[n]);
686 for (i = 0; i < ind->nsend[nzone]; i++)
688 at0 = cgindex[index[i]];
689 at1 = cgindex[index[i]+1];
690 for (j = at0; j < at1; j++)
692 /* We need to shift the coordinates */
693 rvec_add(x[j], shift, buf[n]);
700 for (i = 0; i < ind->nsend[nzone]; i++)
702 at0 = cgindex[index[i]];
703 at1 = cgindex[index[i]+1];
704 for (j = at0; j < at1; j++)
707 buf[n][XX] = x[j][XX] + shift[XX];
709 * This operation requires a special shift force
710 * treatment, which is performed in calc_vir.
712 buf[n][YY] = box[YY][YY] - x[j][YY];
713 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
725 rbuf = comm->vbuf2.v;
727 /* Send and receive the coordinates */
728 dd_sendrecv_rvec(dd, d, dddirBackward,
729 buf, ind->nsend[nzone+1],
730 rbuf, ind->nrecv[nzone+1]);
734 for (zone = 0; zone < nzone; zone++)
736 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
738 copy_rvec(rbuf[j], x[i]);
743 nat_tot += ind->nrecv[nzone+1];
749 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
751 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
752 int *index, *cgindex;
753 gmx_domdec_comm_t *comm;
754 gmx_domdec_comm_dim_t *cd;
755 gmx_domdec_ind_t *ind;
759 gmx_bool bPBC, bScrew;
763 cgindex = dd->cgindex;
768 nzone = comm->zones.n/2;
769 nat_tot = dd->nat_tot;
770 for (d = dd->ndim-1; d >= 0; d--)
772 bPBC = (dd->ci[dd->dim[d]] == 0);
773 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
774 if (fshift == NULL && !bScrew)
778 /* Determine which shift vector we need */
784 for (p = cd->np-1; p >= 0; p--)
787 nat_tot -= ind->nrecv[nzone+1];
794 sbuf = comm->vbuf2.v;
796 for (zone = 0; zone < nzone; zone++)
798 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
800 copy_rvec(f[i], sbuf[j]);
805 /* Communicate the forces */
806 dd_sendrecv_rvec(dd, d, dddirForward,
807 sbuf, ind->nrecv[nzone+1],
808 buf, ind->nsend[nzone+1]);
810 /* Add the received forces */
814 for (i = 0; i < ind->nsend[nzone]; i++)
816 at0 = cgindex[index[i]];
817 at1 = cgindex[index[i]+1];
818 for (j = at0; j < at1; j++)
820 rvec_inc(f[j], buf[n]);
827 for (i = 0; i < ind->nsend[nzone]; i++)
829 at0 = cgindex[index[i]];
830 at1 = cgindex[index[i]+1];
831 for (j = at0; j < at1; j++)
833 rvec_inc(f[j], buf[n]);
834 /* Add this force to the shift force */
835 rvec_inc(fshift[is], buf[n]);
842 for (i = 0; i < ind->nsend[nzone]; i++)
844 at0 = cgindex[index[i]];
845 at1 = cgindex[index[i]+1];
846 for (j = at0; j < at1; j++)
848 /* Rotate the force */
849 f[j][XX] += buf[n][XX];
850 f[j][YY] -= buf[n][YY];
851 f[j][ZZ] -= buf[n][ZZ];
854 /* Add this force to the shift force */
855 rvec_inc(fshift[is], buf[n]);
866 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
868 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
869 int *index, *cgindex;
870 gmx_domdec_comm_t *comm;
871 gmx_domdec_comm_dim_t *cd;
872 gmx_domdec_ind_t *ind;
877 cgindex = dd->cgindex;
879 buf = &comm->vbuf.v[0][0];
882 nat_tot = dd->nat_home;
883 for (d = 0; d < dd->ndim; d++)
886 for (p = 0; p < cd->np; p++)
891 for (i = 0; i < ind->nsend[nzone]; i++)
893 at0 = cgindex[index[i]];
894 at1 = cgindex[index[i]+1];
895 for (j = at0; j < at1; j++)
908 rbuf = &comm->vbuf2.v[0][0];
910 /* Send and receive the coordinates */
911 dd_sendrecv_real(dd, d, dddirBackward,
912 buf, ind->nsend[nzone+1],
913 rbuf, ind->nrecv[nzone+1]);
917 for (zone = 0; zone < nzone; zone++)
919 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
926 nat_tot += ind->nrecv[nzone+1];
932 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
934 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
935 int *index, *cgindex;
936 gmx_domdec_comm_t *comm;
937 gmx_domdec_comm_dim_t *cd;
938 gmx_domdec_ind_t *ind;
943 cgindex = dd->cgindex;
945 buf = &comm->vbuf.v[0][0];
948 nzone = comm->zones.n/2;
949 nat_tot = dd->nat_tot;
950 for (d = dd->ndim-1; d >= 0; d--)
953 for (p = cd->np-1; p >= 0; p--)
956 nat_tot -= ind->nrecv[nzone+1];
963 sbuf = &comm->vbuf2.v[0][0];
965 for (zone = 0; zone < nzone; zone++)
967 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
974 /* Communicate the forces */
975 dd_sendrecv_real(dd, d, dddirForward,
976 sbuf, ind->nrecv[nzone+1],
977 buf, ind->nsend[nzone+1]);
979 /* Add the received forces */
981 for (i = 0; i < ind->nsend[nzone]; i++)
983 at0 = cgindex[index[i]];
984 at1 = cgindex[index[i]+1];
985 for (j = at0; j < at1; j++)
996 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
998 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
1000 zone->min0, zone->max1,
1001 zone->mch0, zone->mch0,
1002 zone->p1_0, zone->p1_1);
1006 #define DDZONECOMM_MAXZONE 5
1007 #define DDZONECOMM_BUFSIZE 3
1009 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
1010 int ddimind, int direction,
1011 gmx_ddzone_t *buf_s, int n_s,
1012 gmx_ddzone_t *buf_r, int n_r)
1014 #define ZBS DDZONECOMM_BUFSIZE
1015 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
1016 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
1019 for (i = 0; i < n_s; i++)
1021 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
1022 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
1023 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
1024 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
1025 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
1026 vbuf_s[i*ZBS+1][2] = 0;
1027 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
1028 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
1029 vbuf_s[i*ZBS+2][2] = 0;
1032 dd_sendrecv_rvec(dd, ddimind, direction,
1036 for (i = 0; i < n_r; i++)
1038 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
1039 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
1040 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
1041 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
1042 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
1043 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
1044 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
1050 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
1051 rvec cell_ns_x0, rvec cell_ns_x1)
1053 int d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
1055 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
1056 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
1057 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
1058 rvec extr_s[2], extr_r[2];
1060 real dist_d, c = 0, det;
1061 gmx_domdec_comm_t *comm;
1062 gmx_bool bPBC, bUse;
1066 for (d = 1; d < dd->ndim; d++)
1069 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
1070 zp->min0 = cell_ns_x0[dim];
1071 zp->max1 = cell_ns_x1[dim];
1072 zp->min1 = cell_ns_x1[dim];
1073 zp->mch0 = cell_ns_x0[dim];
1074 zp->mch1 = cell_ns_x1[dim];
1075 zp->p1_0 = cell_ns_x0[dim];
1076 zp->p1_1 = cell_ns_x1[dim];
1079 for (d = dd->ndim-2; d >= 0; d--)
1082 bPBC = (dim < ddbox->npbcdim);
1084 /* Use an rvec to store two reals */
1085 extr_s[d][0] = comm->cell_f0[d+1];
1086 extr_s[d][1] = comm->cell_f1[d+1];
1087 extr_s[d][2] = comm->cell_f1[d+1];
1090 /* Store the extremes in the backward sending buffer,
1091 * so the get updated separately from the forward communication.
1093 for (d1 = d; d1 < dd->ndim-1; d1++)
1095 /* We invert the order to be able to use the same loop for buf_e */
1096 buf_s[pos].min0 = extr_s[d1][1];
1097 buf_s[pos].max1 = extr_s[d1][0];
1098 buf_s[pos].min1 = extr_s[d1][2];
1099 buf_s[pos].mch0 = 0;
1100 buf_s[pos].mch1 = 0;
1101 /* Store the cell corner of the dimension we communicate along */
1102 buf_s[pos].p1_0 = comm->cell_x0[dim];
1103 buf_s[pos].p1_1 = 0;
1107 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1110 if (dd->ndim == 3 && d == 0)
1112 buf_s[pos] = comm->zone_d2[0][1];
1114 buf_s[pos] = comm->zone_d1[0];
1118 /* We only need to communicate the extremes
1119 * in the forward direction
1121 npulse = comm->cd[d].np;
1124 /* Take the minimum to avoid double communication */
1125 npulse_min = min(npulse, dd->nc[dim]-1-npulse);
1129 /* Without PBC we should really not communicate over
1130 * the boundaries, but implementing that complicates
1131 * the communication setup and therefore we simply
1132 * do all communication, but ignore some data.
1134 npulse_min = npulse;
1136 for (p = 0; p < npulse_min; p++)
1138 /* Communicate the extremes forward */
1139 bUse = (bPBC || dd->ci[dim] > 0);
1141 dd_sendrecv_rvec(dd, d, dddirForward,
1142 extr_s+d, dd->ndim-d-1,
1143 extr_r+d, dd->ndim-d-1);
1147 for (d1 = d; d1 < dd->ndim-1; d1++)
1149 extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]);
1150 extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]);
1151 extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]);
1157 for (p = 0; p < npulse; p++)
1159 /* Communicate all the zone information backward */
1160 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1162 dd_sendrecv_ddzone(dd, d, dddirBackward,
1169 for (d1 = d+1; d1 < dd->ndim; d1++)
1171 /* Determine the decrease of maximum required
1172 * communication height along d1 due to the distance along d,
1173 * this avoids a lot of useless atom communication.
1175 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1177 if (ddbox->tric_dir[dim])
1179 /* c is the off-diagonal coupling between the cell planes
1180 * along directions d and d1.
1182 c = ddbox->v[dim][dd->dim[d1]][dim];
1188 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1191 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1195 /* A negative value signals out of range */
1201 /* Accumulate the extremes over all pulses */
1202 for (i = 0; i < buf_size; i++)
1206 buf_e[i] = buf_r[i];
1212 buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0);
1213 buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1);
1214 buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1);
1217 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1225 if (bUse && dh[d1] >= 0)
1227 buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
1228 buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
1231 /* Copy the received buffer to the send buffer,
1232 * to pass the data through with the next pulse.
1234 buf_s[i] = buf_r[i];
1236 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1237 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1239 /* Store the extremes */
1242 for (d1 = d; d1 < dd->ndim-1; d1++)
1244 extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0);
1245 extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1);
1246 extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1);
1250 if (d == 1 || (d == 0 && dd->ndim == 3))
1252 for (i = d; i < 2; i++)
1254 comm->zone_d2[1-d][i] = buf_e[pos];
1260 comm->zone_d1[1] = buf_e[pos];
1270 for (i = 0; i < 2; i++)
1274 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1276 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1277 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1283 for (i = 0; i < 2; i++)
1285 for (j = 0; j < 2; j++)
1289 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1291 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1292 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1296 for (d = 1; d < dd->ndim; d++)
1298 comm->cell_f_max0[d] = extr_s[d-1][0];
1299 comm->cell_f_min1[d] = extr_s[d-1][1];
1302 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1303 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1308 static void dd_collect_cg(gmx_domdec_t *dd,
1309 t_state *state_local)
1311 gmx_domdec_master_t *ma = NULL;
1312 int buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
1315 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1317 /* The master has the correct distribution */
1321 if (state_local->ddp_count == dd->ddp_count)
1323 ncg_home = dd->ncg_home;
1325 nat_home = dd->nat_home;
1327 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1329 cgs_gl = &dd->comm->cgs_gl;
1331 ncg_home = state_local->ncg_gl;
1332 cg = state_local->cg_gl;
1334 for (i = 0; i < ncg_home; i++)
1336 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1341 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1344 buf2[0] = dd->ncg_home;
1345 buf2[1] = dd->nat_home;
1355 /* Collect the charge group and atom counts on the master */
1356 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1361 for (i = 0; i < dd->nnodes; i++)
1363 ma->ncg[i] = ma->ibuf[2*i];
1364 ma->nat[i] = ma->ibuf[2*i+1];
1365 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1368 /* Make byte counts and indices */
1369 for (i = 0; i < dd->nnodes; i++)
1371 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1372 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1376 fprintf(debug, "Initial charge group distribution: ");
1377 for (i = 0; i < dd->nnodes; i++)
1379 fprintf(debug, " %d", ma->ncg[i]);
1381 fprintf(debug, "\n");
1385 /* Collect the charge group indices on the master */
1387 dd->ncg_home*sizeof(int), dd->index_gl,
1388 DDMASTER(dd) ? ma->ibuf : NULL,
1389 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1390 DDMASTER(dd) ? ma->cg : NULL);
1392 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1395 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1398 gmx_domdec_master_t *ma;
1399 int n, i, c, a, nalloc = 0;
1408 MPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1409 dd->rank, dd->mpi_comm_all);
1414 /* Copy the master coordinates to the global array */
1415 cgs_gl = &dd->comm->cgs_gl;
1417 n = DDMASTERRANK(dd);
1419 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1421 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1423 copy_rvec(lv[a++], v[c]);
1427 for (n = 0; n < dd->nnodes; n++)
1431 if (ma->nat[n] > nalloc)
1433 nalloc = over_alloc_dd(ma->nat[n]);
1434 srenew(buf, nalloc);
1437 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1438 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1441 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1443 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1445 copy_rvec(buf[a++], v[c]);
1454 static void get_commbuffer_counts(gmx_domdec_t *dd,
1455 int **counts, int **disps)
1457 gmx_domdec_master_t *ma;
1462 /* Make the rvec count and displacment arrays */
1464 *disps = ma->ibuf + dd->nnodes;
1465 for (n = 0; n < dd->nnodes; n++)
1467 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1468 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1472 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1475 gmx_domdec_master_t *ma;
1476 int *rcounts = NULL, *disps = NULL;
1485 get_commbuffer_counts(dd, &rcounts, &disps);
1490 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv, rcounts, disps, buf);
1494 cgs_gl = &dd->comm->cgs_gl;
1497 for (n = 0; n < dd->nnodes; n++)
1499 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1501 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1503 copy_rvec(buf[a++], v[c]);
1510 void dd_collect_vec(gmx_domdec_t *dd,
1511 t_state *state_local, rvec *lv, rvec *v)
1513 gmx_domdec_master_t *ma;
1514 int n, i, c, a, nalloc = 0;
1517 dd_collect_cg(dd, state_local);
1519 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1521 dd_collect_vec_sendrecv(dd, lv, v);
1525 dd_collect_vec_gatherv(dd, lv, v);
1530 void dd_collect_state(gmx_domdec_t *dd,
1531 t_state *state_local, t_state *state)
1535 nh = state->nhchainlength;
1539 for (i = 0; i < efptNR; i++)
1541 state->lambda[i] = state_local->lambda[i];
1543 state->fep_state = state_local->fep_state;
1544 state->veta = state_local->veta;
1545 state->vol0 = state_local->vol0;
1546 copy_mat(state_local->box, state->box);
1547 copy_mat(state_local->boxv, state->boxv);
1548 copy_mat(state_local->svir_prev, state->svir_prev);
1549 copy_mat(state_local->fvir_prev, state->fvir_prev);
1550 copy_mat(state_local->pres_prev, state->pres_prev);
1552 for (i = 0; i < state_local->ngtc; i++)
1554 for (j = 0; j < nh; j++)
1556 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1557 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1559 state->therm_integral[i] = state_local->therm_integral[i];
1561 for (i = 0; i < state_local->nnhpres; i++)
1563 for (j = 0; j < nh; j++)
1565 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1566 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1570 for (est = 0; est < estNR; est++)
1572 if (EST_DISTR(est) && (state_local->flags & (1<<est)))
1577 dd_collect_vec(dd, state_local, state_local->x, state->x);
1580 dd_collect_vec(dd, state_local, state_local->v, state->v);
1583 dd_collect_vec(dd, state_local, state_local->sd_X, state->sd_X);
1586 dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
1589 if (state->nrngi == 1)
1593 for (i = 0; i < state_local->nrng; i++)
1595 state->ld_rng[i] = state_local->ld_rng[i];
1601 dd_gather(dd, state_local->nrng*sizeof(state->ld_rng[0]),
1602 state_local->ld_rng, state->ld_rng);
1606 if (state->nrngi == 1)
1610 state->ld_rngi[0] = state_local->ld_rngi[0];
1615 dd_gather(dd, sizeof(state->ld_rngi[0]),
1616 state_local->ld_rngi, state->ld_rngi);
1619 case estDISRE_INITF:
1620 case estDISRE_RM3TAV:
1621 case estORIRE_INITF:
1625 gmx_incons("Unknown state entry encountered in dd_collect_state");
1631 static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
1637 fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
1640 state->nalloc = over_alloc_dd(nalloc);
1642 for (est = 0; est < estNR; est++)
1644 if (EST_DISTR(est) && (state->flags & (1<<est)))
1649 srenew(state->x, state->nalloc);
1652 srenew(state->v, state->nalloc);
1655 srenew(state->sd_X, state->nalloc);
1658 srenew(state->cg_p, state->nalloc);
1662 case estDISRE_INITF:
1663 case estDISRE_RM3TAV:
1664 case estORIRE_INITF:
1666 /* No reallocation required */
1669 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1676 srenew(*f, state->nalloc);
1680 static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
1683 if (nalloc > fr->cg_nalloc)
1687 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
1689 fr->cg_nalloc = over_alloc_dd(nalloc);
1690 srenew(fr->cginfo, fr->cg_nalloc);
1691 if (fr->cutoff_scheme == ecutsGROUP)
1693 srenew(fr->cg_cm, fr->cg_nalloc);
1696 if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
1698 /* We don't use charge groups, we use x in state to set up
1699 * the atom communication.
1701 dd_realloc_state(state, f, nalloc);
1705 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1708 gmx_domdec_master_t *ma;
1709 int n, i, c, a, nalloc = 0;
1716 for (n = 0; n < dd->nnodes; n++)
1720 if (ma->nat[n] > nalloc)
1722 nalloc = over_alloc_dd(ma->nat[n]);
1723 srenew(buf, nalloc);
1725 /* Use lv as a temporary buffer */
1727 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1729 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1731 copy_rvec(v[c], buf[a++]);
1734 if (a != ma->nat[n])
1736 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1741 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1742 DDRANK(dd, n), n, dd->mpi_comm_all);
1747 n = DDMASTERRANK(dd);
1749 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1751 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1753 copy_rvec(v[c], lv[a++]);
1760 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1761 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1766 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1769 gmx_domdec_master_t *ma;
1770 int *scounts = NULL, *disps = NULL;
1771 int n, i, c, a, nalloc = 0;
1778 get_commbuffer_counts(dd, &scounts, &disps);
1782 for (n = 0; n < dd->nnodes; n++)
1784 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1786 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1788 copy_rvec(v[c], buf[a++]);
1794 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1797 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
1799 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1801 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1805 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1809 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1812 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1813 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1814 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1816 if (dfhist->nlambda > 0)
1818 int nlam = dfhist->nlambda;
1819 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1820 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1821 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1822 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1823 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1824 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1826 for (i = 0; i < nlam; i++)
1828 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1829 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1830 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1831 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1832 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1833 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1838 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1839 t_state *state, t_state *state_local,
1844 nh = state->nhchainlength;
1848 for (i = 0; i < efptNR; i++)
1850 state_local->lambda[i] = state->lambda[i];
1852 state_local->fep_state = state->fep_state;
1853 state_local->veta = state->veta;
1854 state_local->vol0 = state->vol0;
1855 copy_mat(state->box, state_local->box);
1856 copy_mat(state->box_rel, state_local->box_rel);
1857 copy_mat(state->boxv, state_local->boxv);
1858 copy_mat(state->svir_prev, state_local->svir_prev);
1859 copy_mat(state->fvir_prev, state_local->fvir_prev);
1860 copy_df_history(&state_local->dfhist, &state->dfhist);
1861 for (i = 0; i < state_local->ngtc; i++)
1863 for (j = 0; j < nh; j++)
1865 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1866 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1868 state_local->therm_integral[i] = state->therm_integral[i];
1870 for (i = 0; i < state_local->nnhpres; i++)
1872 for (j = 0; j < nh; j++)
1874 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1875 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1879 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
1880 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1881 dd_bcast(dd, sizeof(real), &state_local->veta);
1882 dd_bcast(dd, sizeof(real), &state_local->vol0);
1883 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1884 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1885 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1886 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1887 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1888 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
1889 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
1890 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
1891 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
1892 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
1894 /* communicate df_history -- required for restarting from checkpoint */
1895 dd_distribute_dfhist(dd, &state_local->dfhist);
1897 if (dd->nat_home > state_local->nalloc)
1899 dd_realloc_state(state_local, f, dd->nat_home);
1901 for (i = 0; i < estNR; i++)
1903 if (EST_DISTR(i) && (state_local->flags & (1<<i)))
1908 dd_distribute_vec(dd, cgs, state->x, state_local->x);
1911 dd_distribute_vec(dd, cgs, state->v, state_local->v);
1914 dd_distribute_vec(dd, cgs, state->sd_X, state_local->sd_X);
1917 dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
1920 if (state->nrngi == 1)
1923 state_local->nrng*sizeof(state_local->ld_rng[0]),
1924 state->ld_rng, state_local->ld_rng);
1929 state_local->nrng*sizeof(state_local->ld_rng[0]),
1930 state->ld_rng, state_local->ld_rng);
1934 if (state->nrngi == 1)
1936 dd_bcastc(dd, sizeof(state_local->ld_rngi[0]),
1937 state->ld_rngi, state_local->ld_rngi);
1941 dd_scatter(dd, sizeof(state_local->ld_rngi[0]),
1942 state->ld_rngi, state_local->ld_rngi);
1945 case estDISRE_INITF:
1946 case estDISRE_RM3TAV:
1947 case estORIRE_INITF:
1949 /* Not implemented yet */
1952 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1958 static char dim2char(int dim)
1964 case XX: c = 'X'; break;
1965 case YY: c = 'Y'; break;
1966 case ZZ: c = 'Z'; break;
1967 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1973 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1974 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1976 rvec grid_s[2], *grid_r = NULL, cx, r;
1977 char fname[STRLEN], format[STRLEN], buf[22];
1979 int a, i, d, z, y, x;
1983 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1984 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1988 snew(grid_r, 2*dd->nnodes);
1991 dd_gather(dd, 2*sizeof(rvec), grid_s[0], DDMASTER(dd) ? grid_r[0] : NULL);
1995 for (d = 0; d < DIM; d++)
1997 for (i = 0; i < DIM; i++)
2005 if (d < ddbox->npbcdim && dd->nc[d] > 1)
2007 tric[d][i] = box[i][d]/box[i][i];
2016 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
2017 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
2018 out = gmx_fio_fopen(fname, "w");
2019 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2021 for (i = 0; i < dd->nnodes; i++)
2023 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
2024 for (d = 0; d < DIM; d++)
2026 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
2028 for (z = 0; z < 2; z++)
2030 for (y = 0; y < 2; y++)
2032 for (x = 0; x < 2; x++)
2034 cx[XX] = grid_r[i*2+x][XX];
2035 cx[YY] = grid_r[i*2+y][YY];
2036 cx[ZZ] = grid_r[i*2+z][ZZ];
2038 fprintf(out, format, "ATOM", a++, "CA", "GLY", ' ', 1+i,
2039 ' ', 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol);
2043 for (d = 0; d < DIM; d++)
2045 for (x = 0; x < 4; x++)
2049 case 0: y = 1 + i*8 + 2*x; break;
2050 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
2051 case 2: y = 1 + i*8 + x; break;
2053 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
2057 gmx_fio_fclose(out);
2062 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
2063 gmx_mtop_t *mtop, t_commrec *cr,
2064 int natoms, rvec x[], matrix box)
2066 char fname[STRLEN], format[STRLEN], format4[STRLEN], buf[22];
2068 int i, ii, resnr, c;
2069 char *atomname, *resname;
2076 natoms = dd->comm->nat[ddnatVSITE];
2079 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
2081 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
2082 sprintf(format4, "%s%s\n", get_pdbformat4(), "%6.2f%6.2f");
2084 out = gmx_fio_fopen(fname, "w");
2086 fprintf(out, "TITLE %s\n", title);
2087 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2088 for (i = 0; i < natoms; i++)
2090 ii = dd->gatindex[i];
2091 gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
2092 if (i < dd->comm->nat[ddnatZONE])
2095 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
2101 else if (i < dd->comm->nat[ddnatVSITE])
2103 b = dd->comm->zones.n;
2107 b = dd->comm->zones.n + 1;
2109 fprintf(out, strlen(atomname) < 4 ? format : format4,
2110 "ATOM", (ii+1)%100000,
2111 atomname, resname, ' ', resnr%10000, ' ',
2112 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b);
2114 fprintf(out, "TER\n");
2116 gmx_fio_fclose(out);
2119 real dd_cutoff_mbody(gmx_domdec_t *dd)
2121 gmx_domdec_comm_t *comm;
2128 if (comm->bInterCGBondeds)
2130 if (comm->cutoff_mbody > 0)
2132 r = comm->cutoff_mbody;
2136 /* cutoff_mbody=0 means we do not have DLB */
2137 r = comm->cellsize_min[dd->dim[0]];
2138 for (di = 1; di < dd->ndim; di++)
2140 r = min(r, comm->cellsize_min[dd->dim[di]]);
2142 if (comm->bBondComm)
2144 r = max(r, comm->cutoff_mbody);
2148 r = min(r, comm->cutoff);
2156 real dd_cutoff_twobody(gmx_domdec_t *dd)
2160 r_mb = dd_cutoff_mbody(dd);
2162 return max(dd->comm->cutoff, r_mb);
2166 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd, ivec coord, ivec coord_pme)
2170 nc = dd->nc[dd->comm->cartpmedim];
2171 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2172 copy_ivec(coord, coord_pme);
2173 coord_pme[dd->comm->cartpmedim] =
2174 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2177 static int low_ddindex2pmeindex(int ndd, int npme, int ddindex)
2179 /* Here we assign a PME node to communicate with this DD node
2180 * by assuming that the major index of both is x.
2181 * We add cr->npmenodes/2 to obtain an even distribution.
2183 return (ddindex*npme + npme/2)/ndd;
2186 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
2188 return low_ddindex2pmeindex(dd->nnodes, dd->comm->npmenodes, ddindex);
2191 static int cr_ddindex2pmeindex(const t_commrec *cr, int ddindex)
2193 return low_ddindex2pmeindex(cr->dd->nnodes, cr->npmenodes, ddindex);
2196 static int *dd_pmenodes(t_commrec *cr)
2201 snew(pmenodes, cr->npmenodes);
2203 for (i = 0; i < cr->dd->nnodes; i++)
2205 p0 = cr_ddindex2pmeindex(cr, i);
2206 p1 = cr_ddindex2pmeindex(cr, i+1);
2207 if (i+1 == cr->dd->nnodes || p1 > p0)
2211 fprintf(debug, "pmenode[%d] = %d\n", n, i+1+n);
2213 pmenodes[n] = i + 1 + n;
2221 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
2224 ivec coords, coords_pme, nc;
2229 if (dd->comm->bCartesian) {
2230 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2231 dd_coords2pmecoords(dd,coords,coords_pme);
2232 copy_ivec(dd->ntot,nc);
2233 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2234 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2236 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2238 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2244 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
2249 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
2251 gmx_domdec_comm_t *comm;
2253 int ddindex, nodeid = -1;
2255 comm = cr->dd->comm;
2260 if (comm->bCartesianPP_PME)
2263 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
2268 ddindex = dd_index(cr->dd->nc, coords);
2269 if (comm->bCartesianPP)
2271 nodeid = comm->ddindex2simnodeid[ddindex];
2277 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
2289 static int dd_simnode2pmenode(t_commrec *cr, int sim_nodeid)
2292 gmx_domdec_comm_t *comm;
2293 ivec coord, coord_pme;
2300 /* This assumes a uniform x domain decomposition grid cell size */
2301 if (comm->bCartesianPP_PME)
2304 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
2305 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2307 /* This is a PP node */
2308 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2309 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
2313 else if (comm->bCartesianPP)
2315 if (sim_nodeid < dd->nnodes)
2317 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2322 /* This assumes DD cells with identical x coordinates
2323 * are numbered sequentially.
2325 if (dd->comm->pmenodes == NULL)
2327 if (sim_nodeid < dd->nnodes)
2329 /* The DD index equals the nodeid */
2330 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2336 while (sim_nodeid > dd->comm->pmenodes[i])
2340 if (sim_nodeid < dd->comm->pmenodes[i])
2342 pmenode = dd->comm->pmenodes[i];
2350 void get_pme_nnodes(const gmx_domdec_t *dd,
2351 int *npmenodes_x, int *npmenodes_y)
2355 *npmenodes_x = dd->comm->npmenodes_x;
2356 *npmenodes_y = dd->comm->npmenodes_y;
2365 gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
2367 gmx_bool bPMEOnlyNode;
2369 if (DOMAINDECOMP(cr))
2371 bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
2375 bPMEOnlyNode = FALSE;
2378 return bPMEOnlyNode;
2381 void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
2382 int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
2386 ivec coord, coord_pme;
2390 snew(*my_ddnodes, (dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2393 for (x = 0; x < dd->nc[XX]; x++)
2395 for (y = 0; y < dd->nc[YY]; y++)
2397 for (z = 0; z < dd->nc[ZZ]; z++)
2399 if (dd->comm->bCartesianPP_PME)
2404 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2405 if (dd->ci[XX] == coord_pme[XX] &&
2406 dd->ci[YY] == coord_pme[YY] &&
2407 dd->ci[ZZ] == coord_pme[ZZ])
2409 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2414 /* The slab corresponds to the nodeid in the PME group */
2415 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2417 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2424 /* The last PP-only node is the peer node */
2425 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2429 fprintf(debug, "Receive coordinates from PP nodes:");
2430 for (x = 0; x < *nmy_ddnodes; x++)
2432 fprintf(debug, " %d", (*my_ddnodes)[x]);
2434 fprintf(debug, "\n");
2438 static gmx_bool receive_vir_ener(t_commrec *cr)
2440 gmx_domdec_comm_t *comm;
2441 int pmenode, coords[DIM], rank;
2445 if (cr->npmenodes < cr->dd->nnodes)
2447 comm = cr->dd->comm;
2448 if (comm->bCartesianPP_PME)
2450 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2452 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2453 coords[comm->cartpmedim]++;
2454 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2456 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2457 if (dd_simnode2pmenode(cr, rank) == pmenode)
2459 /* This is not the last PP node for pmenode */
2467 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2468 if (cr->sim_nodeid+1 < cr->nnodes &&
2469 dd_simnode2pmenode(cr, cr->sim_nodeid+1) == pmenode)
2471 /* This is not the last PP node for pmenode */
2480 static void set_zones_ncg_home(gmx_domdec_t *dd)
2482 gmx_domdec_zones_t *zones;
2485 zones = &dd->comm->zones;
2487 zones->cg_range[0] = 0;
2488 for (i = 1; i < zones->n+1; i++)
2490 zones->cg_range[i] = dd->ncg_home;
2492 /* zone_ncg1[0] should always be equal to ncg_home */
2493 dd->comm->zone_ncg1[0] = dd->ncg_home;
2496 static void rebuild_cgindex(gmx_domdec_t *dd,
2497 const int *gcgs_index, t_state *state)
2499 int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
2502 dd_cg_gl = dd->index_gl;
2503 cgindex = dd->cgindex;
2506 for (i = 0; i < state->ncg_gl; i++)
2510 dd_cg_gl[i] = cg_gl;
2511 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2515 dd->ncg_home = state->ncg_gl;
2518 set_zones_ncg_home(dd);
2521 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2523 while (cg >= cginfo_mb->cg_end)
2528 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2531 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2532 t_forcerec *fr, char *bLocalCG)
2534 cginfo_mb_t *cginfo_mb;
2540 cginfo_mb = fr->cginfo_mb;
2541 cginfo = fr->cginfo;
2543 for (cg = cg0; cg < cg1; cg++)
2545 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2549 if (bLocalCG != NULL)
2551 for (cg = cg0; cg < cg1; cg++)
2553 bLocalCG[index_gl[cg]] = TRUE;
2558 static void make_dd_indices(gmx_domdec_t *dd,
2559 const int *gcgs_index, int cg_start)
2561 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2562 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2567 bLocalCG = dd->comm->bLocalCG;
2569 if (dd->nat_tot > dd->gatindex_nalloc)
2571 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2572 srenew(dd->gatindex, dd->gatindex_nalloc);
2575 nzone = dd->comm->zones.n;
2576 zone2cg = dd->comm->zones.cg_range;
2577 zone_ncg1 = dd->comm->zone_ncg1;
2578 index_gl = dd->index_gl;
2579 gatindex = dd->gatindex;
2580 bCGs = dd->comm->bCGs;
2582 if (zone2cg[1] != dd->ncg_home)
2584 gmx_incons("dd->ncg_zone is not up to date");
2587 /* Make the local to global and global to local atom index */
2588 a = dd->cgindex[cg_start];
2589 for (zone = 0; zone < nzone; zone++)
2597 cg0 = zone2cg[zone];
2599 cg1 = zone2cg[zone+1];
2600 cg1_p1 = cg0 + zone_ncg1[zone];
2602 for (cg = cg0; cg < cg1; cg++)
2607 /* Signal that this cg is from more than one pulse away */
2610 cg_gl = index_gl[cg];
2613 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2616 ga2la_set(dd->ga2la, a_gl, a, zone1);
2622 gatindex[a] = cg_gl;
2623 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2630 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2633 int ncg, i, ngl, nerr;
2636 if (bLocalCG == NULL)
2640 for (i = 0; i < dd->ncg_tot; i++)
2642 if (!bLocalCG[dd->index_gl[i]])
2645 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2650 for (i = 0; i < ncg_sys; i++)
2657 if (ngl != dd->ncg_tot)
2659 fprintf(stderr, "DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2666 static void check_index_consistency(gmx_domdec_t *dd,
2667 int natoms_sys, int ncg_sys,
2670 int nerr, ngl, i, a, cell;
2675 if (dd->comm->DD_debug > 1)
2677 snew(have, natoms_sys);
2678 for (a = 0; a < dd->nat_tot; a++)
2680 if (have[dd->gatindex[a]] > 0)
2682 fprintf(stderr, "DD node %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2686 have[dd->gatindex[a]] = a + 1;
2692 snew(have, dd->nat_tot);
2695 for (i = 0; i < natoms_sys; i++)
2697 if (ga2la_get(dd->ga2la, i, &a, &cell))
2699 if (a >= dd->nat_tot)
2701 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2707 if (dd->gatindex[a] != i)
2709 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2716 if (ngl != dd->nat_tot)
2719 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2720 dd->rank, where, ngl, dd->nat_tot);
2722 for (a = 0; a < dd->nat_tot; a++)
2727 "DD node %d, %s: local atom %d, global %d has no global index\n",
2728 dd->rank, where, a+1, dd->gatindex[a]+1);
2733 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2737 gmx_fatal(FARGS, "DD node %d, %s: %d atom/cg index inconsistencies",
2738 dd->rank, where, nerr);
2742 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2749 /* Clear the whole list without searching */
2750 ga2la_clear(dd->ga2la);
2754 for (i = a_start; i < dd->nat_tot; i++)
2756 ga2la_del(dd->ga2la, dd->gatindex[i]);
2760 bLocalCG = dd->comm->bLocalCG;
2763 for (i = cg_start; i < dd->ncg_tot; i++)
2765 bLocalCG[dd->index_gl[i]] = FALSE;
2769 dd_clear_local_vsite_indices(dd);
2771 if (dd->constraints)
2773 dd_clear_local_constraint_indices(dd);
2777 /* This function should be used for moving the domain boudaries during DLB,
2778 * for obtaining the minimum cell size. It checks the initially set limit
2779 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2780 * and, possibly, a longer cut-off limit set for PME load balancing.
2782 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2786 cellsize_min = comm->cellsize_min[dim];
2788 if (!comm->bVacDLBNoLimit)
2790 /* The cut-off might have changed, e.g. by PME load balacning,
2791 * from the value used to set comm->cellsize_min, so check it.
2793 cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2795 if (comm->bPMELoadBalDLBLimits)
2797 /* Check for the cut-off limit set by the PME load balancing */
2798 cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2802 return cellsize_min;
2805 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2808 real grid_jump_limit;
2810 /* The distance between the boundaries of cells at distance
2811 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2812 * and by the fact that cells should not be shifted by more than
2813 * half their size, such that cg's only shift by one cell
2814 * at redecomposition.
2816 grid_jump_limit = comm->cellsize_limit;
2817 if (!comm->bVacDLBNoLimit)
2819 if (comm->bPMELoadBalDLBLimits)
2821 cutoff = max(cutoff, comm->PMELoadBal_max_cutoff);
2823 grid_jump_limit = max(grid_jump_limit,
2824 cutoff/comm->cd[dim_ind].np);
2827 return grid_jump_limit;
2830 static gmx_bool check_grid_jump(gmx_int64_t step,
2836 gmx_domdec_comm_t *comm;
2845 for (d = 1; d < dd->ndim; d++)
2848 limit = grid_jump_limit(comm, cutoff, d);
2849 bfac = ddbox->box_size[dim];
2850 if (ddbox->tric_dir[dim])
2852 bfac *= ddbox->skew_fac[dim];
2854 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2855 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2863 /* This error should never be triggered under normal
2864 * circumstances, but you never know ...
2866 gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with less nodes might avoid this issue.",
2867 gmx_step_str(step, buf),
2868 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2876 static int dd_load_count(gmx_domdec_comm_t *comm)
2878 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2881 static float dd_force_load(gmx_domdec_comm_t *comm)
2888 if (comm->eFlop > 1)
2890 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2895 load = comm->cycl[ddCyclF];
2896 if (comm->cycl_n[ddCyclF] > 1)
2898 /* Subtract the maximum of the last n cycle counts
2899 * to get rid of possible high counts due to other sources,
2900 * for instance system activity, that would otherwise
2901 * affect the dynamic load balancing.
2903 load -= comm->cycl_max[ddCyclF];
2907 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2909 float gpu_wait, gpu_wait_sum;
2911 gpu_wait = comm->cycl[ddCyclWaitGPU];
2912 if (comm->cycl_n[ddCyclF] > 1)
2914 /* We should remove the WaitGPU time of the same MD step
2915 * as the one with the maximum F time, since the F time
2916 * and the wait time are not independent.
2917 * Furthermore, the step for the max F time should be chosen
2918 * the same on all ranks that share the same GPU.
2919 * But to keep the code simple, we remove the average instead.
2920 * The main reason for artificially long times at some steps
2921 * is spurious CPU activity or MPI time, so we don't expect
2922 * that changes in the GPU wait time matter a lot here.
2924 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2926 /* Sum the wait times over the ranks that share the same GPU */
2927 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2928 comm->mpi_comm_gpu_shared);
2929 /* Replace the wait time by the average over the ranks */
2930 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2938 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2940 gmx_domdec_comm_t *comm;
2945 snew(*dim_f, dd->nc[dim]+1);
2947 for (i = 1; i < dd->nc[dim]; i++)
2949 if (comm->slb_frac[dim])
2951 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2955 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2958 (*dim_f)[dd->nc[dim]] = 1;
2961 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2963 int pmeindex, slab, nso, i;
2966 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2972 ddpme->dim = dimind;
2974 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2976 ddpme->nslab = (ddpme->dim == 0 ?
2977 dd->comm->npmenodes_x :
2978 dd->comm->npmenodes_y);
2980 if (ddpme->nslab <= 1)
2985 nso = dd->comm->npmenodes/ddpme->nslab;
2986 /* Determine for each PME slab the PP location range for dimension dim */
2987 snew(ddpme->pp_min, ddpme->nslab);
2988 snew(ddpme->pp_max, ddpme->nslab);
2989 for (slab = 0; slab < ddpme->nslab; slab++)
2991 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2992 ddpme->pp_max[slab] = 0;
2994 for (i = 0; i < dd->nnodes; i++)
2996 ddindex2xyz(dd->nc, i, xyz);
2997 /* For y only use our y/z slab.
2998 * This assumes that the PME x grid size matches the DD grid size.
3000 if (dimind == 0 || xyz[XX] == dd->ci[XX])
3002 pmeindex = ddindex2pmeindex(dd, i);
3005 slab = pmeindex/nso;
3009 slab = pmeindex % ddpme->nslab;
3011 ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]);
3012 ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]);
3016 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
3019 int dd_pme_maxshift_x(gmx_domdec_t *dd)
3021 if (dd->comm->ddpme[0].dim == XX)
3023 return dd->comm->ddpme[0].maxshift;
3031 int dd_pme_maxshift_y(gmx_domdec_t *dd)
3033 if (dd->comm->ddpme[0].dim == YY)
3035 return dd->comm->ddpme[0].maxshift;
3037 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
3039 return dd->comm->ddpme[1].maxshift;
3047 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
3048 gmx_bool bUniform, gmx_ddbox_t *ddbox, real *cell_f)
3050 gmx_domdec_comm_t *comm;
3053 real range, pme_boundary;
3057 nc = dd->nc[ddpme->dim];
3060 if (!ddpme->dim_match)
3062 /* PP decomposition is not along dim: the worst situation */
3065 else if (ns <= 3 || (bUniform && ns == nc))
3067 /* The optimal situation */
3072 /* We need to check for all pme nodes which nodes they
3073 * could possibly need to communicate with.
3075 xmin = ddpme->pp_min;
3076 xmax = ddpme->pp_max;
3077 /* Allow for atoms to be maximally 2/3 times the cut-off
3078 * out of their DD cell. This is a reasonable balance between
3079 * between performance and support for most charge-group/cut-off
3082 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
3083 /* Avoid extra communication when we are exactly at a boundary */
3087 for (s = 0; s < ns; s++)
3089 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
3090 pme_boundary = (real)s/ns;
3093 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
3095 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
3099 pme_boundary = (real)(s+1)/ns;
3102 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
3104 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
3111 ddpme->maxshift = sh;
3115 fprintf(debug, "PME slab communication range for dim %d is %d\n",
3116 ddpme->dim, ddpme->maxshift);
3120 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3124 for (d = 0; d < dd->ndim; d++)
3127 if (dim < ddbox->nboundeddim &&
3128 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
3129 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
3131 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
3132 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3133 dd->nc[dim], dd->comm->cellsize_limit);
3138 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
3139 gmx_bool bMaster, ivec npulse)
3141 gmx_domdec_comm_t *comm;
3144 real *cell_x, cell_dx, cellsize;
3148 for (d = 0; d < DIM; d++)
3150 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
3152 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
3155 cell_dx = ddbox->box_size[d]/dd->nc[d];
3158 for (j = 0; j < dd->nc[d]+1; j++)
3160 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
3165 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
3166 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
3168 cellsize = cell_dx*ddbox->skew_fac[d];
3169 while (cellsize*npulse[d] < comm->cutoff && npulse[d] < dd->nc[d]-1)
3173 cellsize_min[d] = cellsize;
3177 /* Statically load balanced grid */
3178 /* Also when we are not doing a master distribution we determine
3179 * all cell borders in a loop to obtain identical values
3180 * to the master distribution case and to determine npulse.
3184 cell_x = dd->ma->cell_x[d];
3188 snew(cell_x, dd->nc[d]+1);
3190 cell_x[0] = ddbox->box0[d];
3191 for (j = 0; j < dd->nc[d]; j++)
3193 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
3194 cell_x[j+1] = cell_x[j] + cell_dx;
3195 cellsize = cell_dx*ddbox->skew_fac[d];
3196 while (cellsize*npulse[d] < comm->cutoff &&
3197 npulse[d] < dd->nc[d]-1)
3201 cellsize_min[d] = min(cellsize_min[d], cellsize);
3205 comm->cell_x0[d] = cell_x[dd->ci[d]];
3206 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
3210 /* The following limitation is to avoid that a cell would receive
3211 * some of its own home charge groups back over the periodic boundary.
3212 * Double charge groups cause trouble with the global indices.
3214 if (d < ddbox->npbcdim &&
3215 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
3217 gmx_fatal_collective(FARGS, NULL, dd,
3218 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3219 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
3221 dd->nc[d], dd->nc[d],
3222 dd->nnodes > dd->nc[d] ? "cells" : "processors");
3226 if (!comm->bDynLoadBal)
3228 copy_rvec(cellsize_min, comm->cellsize_min);
3231 for (d = 0; d < comm->npmedecompdim; d++)
3233 set_pme_maxshift(dd, &comm->ddpme[d],
3234 comm->slb_frac[dd->dim[d]] == NULL, ddbox,
3235 comm->ddpme[d].slb_dim_f);
3240 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
3241 int d, int dim, gmx_domdec_root_t *root,
3243 gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
3245 gmx_domdec_comm_t *comm;
3246 int ncd, i, j, nmin, nmin_old;
3247 gmx_bool bLimLo, bLimHi;
3249 real fac, halfway, cellsize_limit_f_i, region_size;
3250 gmx_bool bPBC, bLastHi = FALSE;
3251 int nrange[] = {range[0], range[1]};
3253 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
3259 bPBC = (dim < ddbox->npbcdim);
3261 cell_size = root->buf_ncd;
3265 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
3268 /* First we need to check if the scaling does not make cells
3269 * smaller than the smallest allowed size.
3270 * We need to do this iteratively, since if a cell is too small,
3271 * it needs to be enlarged, which makes all the other cells smaller,
3272 * which could in turn make another cell smaller than allowed.
3274 for (i = range[0]; i < range[1]; i++)
3276 root->bCellMin[i] = FALSE;
3282 /* We need the total for normalization */
3284 for (i = range[0]; i < range[1]; i++)
3286 if (root->bCellMin[i] == FALSE)
3288 fac += cell_size[i];
3291 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3292 /* Determine the cell boundaries */
3293 for (i = range[0]; i < range[1]; i++)
3295 if (root->bCellMin[i] == FALSE)
3297 cell_size[i] *= fac;
3298 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3300 cellsize_limit_f_i = 0;
3304 cellsize_limit_f_i = cellsize_limit_f;
3306 if (cell_size[i] < cellsize_limit_f_i)
3308 root->bCellMin[i] = TRUE;
3309 cell_size[i] = cellsize_limit_f_i;
3313 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3316 while (nmin > nmin_old);
3319 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3320 /* For this check we should not use DD_CELL_MARGIN,
3321 * but a slightly smaller factor,
3322 * since rounding could get use below the limit.
3324 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3327 gmx_fatal(FARGS, "Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3328 gmx_step_str(step, buf),
3329 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3330 ncd, comm->cellsize_min[dim]);
3333 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
3337 /* Check if the boundary did not displace more than halfway
3338 * each of the cells it bounds, as this could cause problems,
3339 * especially when the differences between cell sizes are large.
3340 * If changes are applied, they will not make cells smaller
3341 * than the cut-off, as we check all the boundaries which
3342 * might be affected by a change and if the old state was ok,
3343 * the cells will at most be shrunk back to their old size.
3345 for (i = range[0]+1; i < range[1]; i++)
3347 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3348 if (root->cell_f[i] < halfway)
3350 root->cell_f[i] = halfway;
3351 /* Check if the change also causes shifts of the next boundaries */
3352 for (j = i+1; j < range[1]; j++)
3354 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3356 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3360 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3361 if (root->cell_f[i] > halfway)
3363 root->cell_f[i] = halfway;
3364 /* Check if the change also causes shifts of the next boundaries */
3365 for (j = i-1; j >= range[0]+1; j--)
3367 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3369 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3376 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3377 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3378 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3379 * for a and b nrange is used */
3382 /* Take care of the staggering of the cell boundaries */
3385 for (i = range[0]; i < range[1]; i++)
3387 root->cell_f_max0[i] = root->cell_f[i];
3388 root->cell_f_min1[i] = root->cell_f[i+1];
3393 for (i = range[0]+1; i < range[1]; i++)
3395 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3396 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3397 if (bLimLo && bLimHi)
3399 /* Both limits violated, try the best we can */
3400 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3401 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3402 nrange[0] = range[0];
3404 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3407 nrange[1] = range[1];
3408 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3414 /* root->cell_f[i] = root->bound_min[i]; */
3415 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3418 else if (bLimHi && !bLastHi)
3421 if (nrange[1] < range[1]) /* found a LimLo before */
3423 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3424 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3425 nrange[0] = nrange[1];
3427 root->cell_f[i] = root->bound_max[i];
3429 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3431 nrange[1] = range[1];
3434 if (nrange[1] < range[1]) /* found last a LimLo */
3436 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3437 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3438 nrange[0] = nrange[1];
3439 nrange[1] = range[1];
3440 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3442 else if (nrange[0] > range[0]) /* found at least one LimHi */
3444 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3451 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3452 int d, int dim, gmx_domdec_root_t *root,
3453 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3454 gmx_bool bUniform, gmx_int64_t step)
3456 gmx_domdec_comm_t *comm;
3457 int ncd, d1, i, j, pos;
3459 real load_aver, load_i, imbalance, change, change_max, sc;
3460 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3464 int range[] = { 0, 0 };
3468 /* Convert the maximum change from the input percentage to a fraction */
3469 change_limit = comm->dlb_scale_lim*0.01;
3473 bPBC = (dim < ddbox->npbcdim);
3475 cell_size = root->buf_ncd;
3477 /* Store the original boundaries */
3478 for (i = 0; i < ncd+1; i++)
3480 root->old_cell_f[i] = root->cell_f[i];
3484 for (i = 0; i < ncd; i++)
3486 cell_size[i] = 1.0/ncd;
3489 else if (dd_load_count(comm))
3491 load_aver = comm->load[d].sum_m/ncd;
3493 for (i = 0; i < ncd; i++)
3495 /* Determine the relative imbalance of cell i */
3496 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3497 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3498 /* Determine the change of the cell size using underrelaxation */
3499 change = -relax*imbalance;
3500 change_max = max(change_max, max(change, -change));
3502 /* Limit the amount of scaling.
3503 * We need to use the same rescaling for all cells in one row,
3504 * otherwise the load balancing might not converge.
3507 if (change_max > change_limit)
3509 sc *= change_limit/change_max;
3511 for (i = 0; i < ncd; i++)
3513 /* Determine the relative imbalance of cell i */
3514 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3515 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3516 /* Determine the change of the cell size using underrelaxation */
3517 change = -sc*imbalance;
3518 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3522 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3523 cellsize_limit_f *= DD_CELL_MARGIN;
3524 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3525 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3526 if (ddbox->tric_dir[dim])
3528 cellsize_limit_f /= ddbox->skew_fac[dim];
3529 dist_min_f /= ddbox->skew_fac[dim];
3531 if (bDynamicBox && d > 0)
3533 dist_min_f *= DD_PRES_SCALE_MARGIN;
3535 if (d > 0 && !bUniform)
3537 /* Make sure that the grid is not shifted too much */
3538 for (i = 1; i < ncd; i++)
3540 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3542 gmx_incons("Inconsistent DD boundary staggering limits!");
3544 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3545 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3548 root->bound_min[i] += 0.5*space;
3550 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3551 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3554 root->bound_max[i] += 0.5*space;
3559 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3561 root->cell_f_max0[i-1] + dist_min_f,
3562 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3563 root->cell_f_min1[i] - dist_min_f);
3568 root->cell_f[0] = 0;
3569 root->cell_f[ncd] = 1;
3570 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3573 /* After the checks above, the cells should obey the cut-off
3574 * restrictions, but it does not hurt to check.
3576 for (i = 0; i < ncd; i++)
3580 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3581 dim, i, root->cell_f[i], root->cell_f[i+1]);
3584 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3585 root->cell_f[i+1] - root->cell_f[i] <
3586 cellsize_limit_f/DD_CELL_MARGIN)
3590 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3591 gmx_step_str(step, buf), dim2char(dim), i,
3592 (root->cell_f[i+1] - root->cell_f[i])
3593 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3598 /* Store the cell boundaries of the lower dimensions at the end */
3599 for (d1 = 0; d1 < d; d1++)
3601 root->cell_f[pos++] = comm->cell_f0[d1];
3602 root->cell_f[pos++] = comm->cell_f1[d1];
3605 if (d < comm->npmedecompdim)
3607 /* The master determines the maximum shift for
3608 * the coordinate communication between separate PME nodes.
3610 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3612 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3615 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3619 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3620 gmx_ddbox_t *ddbox, int dimind)
3622 gmx_domdec_comm_t *comm;
3627 /* Set the cell dimensions */
3628 dim = dd->dim[dimind];
3629 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3630 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3631 if (dim >= ddbox->nboundeddim)
3633 comm->cell_x0[dim] += ddbox->box0[dim];
3634 comm->cell_x1[dim] += ddbox->box0[dim];
3638 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3639 int d, int dim, real *cell_f_row,
3642 gmx_domdec_comm_t *comm;
3648 /* Each node would only need to know two fractions,
3649 * but it is probably cheaper to broadcast the whole array.
3651 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3652 0, comm->mpi_comm_load[d]);
3654 /* Copy the fractions for this dimension from the buffer */
3655 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3656 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3657 /* The whole array was communicated, so set the buffer position */
3658 pos = dd->nc[dim] + 1;
3659 for (d1 = 0; d1 <= d; d1++)
3663 /* Copy the cell fractions of the lower dimensions */
3664 comm->cell_f0[d1] = cell_f_row[pos++];
3665 comm->cell_f1[d1] = cell_f_row[pos++];
3667 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3669 /* Convert the communicated shift from float to int */
3670 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3673 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3677 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3678 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3679 gmx_bool bUniform, gmx_int64_t step)
3681 gmx_domdec_comm_t *comm;
3683 gmx_bool bRowMember, bRowRoot;
3688 for (d = 0; d < dd->ndim; d++)
3693 for (d1 = d; d1 < dd->ndim; d1++)
3695 if (dd->ci[dd->dim[d1]] > 0)
3708 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3709 ddbox, bDynamicBox, bUniform, step);
3710 cell_f_row = comm->root[d]->cell_f;
3714 cell_f_row = comm->cell_f_row;
3716 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3721 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3725 /* This function assumes the box is static and should therefore
3726 * not be called when the box has changed since the last
3727 * call to dd_partition_system.
3729 for (d = 0; d < dd->ndim; d++)
3731 relative_to_absolute_cell_bounds(dd, ddbox, d);
3737 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3738 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3739 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3740 gmx_wallcycle_t wcycle)
3742 gmx_domdec_comm_t *comm;
3749 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3750 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3751 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3753 else if (bDynamicBox)
3755 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3758 /* Set the dimensions for which no DD is used */
3759 for (dim = 0; dim < DIM; dim++)
3761 if (dd->nc[dim] == 1)
3763 comm->cell_x0[dim] = 0;
3764 comm->cell_x1[dim] = ddbox->box_size[dim];
3765 if (dim >= ddbox->nboundeddim)
3767 comm->cell_x0[dim] += ddbox->box0[dim];
3768 comm->cell_x1[dim] += ddbox->box0[dim];
3774 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3777 gmx_domdec_comm_dim_t *cd;
3779 for (d = 0; d < dd->ndim; d++)
3781 cd = &dd->comm->cd[d];
3782 np = npulse[dd->dim[d]];
3783 if (np > cd->np_nalloc)
3787 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3788 dim2char(dd->dim[d]), np);
3790 if (DDMASTER(dd) && cd->np_nalloc > 0)
3792 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3794 srenew(cd->ind, np);
3795 for (i = cd->np_nalloc; i < np; i++)
3797 cd->ind[i].index = NULL;
3798 cd->ind[i].nalloc = 0;
3807 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3808 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3809 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3810 gmx_wallcycle_t wcycle)
3812 gmx_domdec_comm_t *comm;
3818 /* Copy the old cell boundaries for the cg displacement check */
3819 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3820 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3822 if (comm->bDynLoadBal)
3826 check_box_size(dd, ddbox);
3828 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3832 set_dd_cell_sizes_slb(dd, ddbox, FALSE, npulse);
3833 realloc_comm_ind(dd, npulse);
3838 for (d = 0; d < DIM; d++)
3840 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3841 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3846 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3848 rvec cell_ns_x0, rvec cell_ns_x1,
3851 gmx_domdec_comm_t *comm;
3856 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3858 dim = dd->dim[dim_ind];
3860 /* Without PBC we don't have restrictions on the outer cells */
3861 if (!(dim >= ddbox->npbcdim &&
3862 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3863 comm->bDynLoadBal &&
3864 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3865 comm->cellsize_min[dim])
3868 gmx_fatal(FARGS, "Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3869 gmx_step_str(step, buf), dim2char(dim),
3870 comm->cell_x1[dim] - comm->cell_x0[dim],
3871 ddbox->skew_fac[dim],
3872 dd->comm->cellsize_min[dim],
3873 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3877 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3879 /* Communicate the boundaries and update cell_ns_x0/1 */
3880 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3881 if (dd->bGridJump && dd->ndim > 1)
3883 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3888 static void make_tric_corr_matrix(int npbcdim, matrix box, matrix tcm)
3892 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3900 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3901 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3910 static void check_screw_box(matrix box)
3912 /* Mathematical limitation */
3913 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3915 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3918 /* Limitation due to the asymmetry of the eighth shell method */
3919 if (box[ZZ][YY] != 0)
3921 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3925 static void distribute_cg(FILE *fplog, gmx_int64_t step,
3926 matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3929 gmx_domdec_master_t *ma;
3930 int **tmp_ind = NULL, *tmp_nalloc = NULL;
3931 int i, icg, j, k, k0, k1, d, npbcdim;
3933 rvec box_size, cg_cm;
3935 real nrcg, inv_ncg, pos_d;
3937 gmx_bool bUnbounded, bScrew;
3941 if (tmp_ind == NULL)
3943 snew(tmp_nalloc, dd->nnodes);
3944 snew(tmp_ind, dd->nnodes);
3945 for (i = 0; i < dd->nnodes; i++)
3947 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3948 snew(tmp_ind[i], tmp_nalloc[i]);
3952 /* Clear the count */
3953 for (i = 0; i < dd->nnodes; i++)
3959 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3961 cgindex = cgs->index;
3963 /* Compute the center of geometry for all charge groups */
3964 for (icg = 0; icg < cgs->nr; icg++)
3967 k1 = cgindex[icg+1];
3971 copy_rvec(pos[k0], cg_cm);
3978 for (k = k0; (k < k1); k++)
3980 rvec_inc(cg_cm, pos[k]);
3982 for (d = 0; (d < DIM); d++)
3984 cg_cm[d] *= inv_ncg;
3987 /* Put the charge group in the box and determine the cell index */
3988 for (d = DIM-1; d >= 0; d--)
3991 if (d < dd->npbcdim)
3993 bScrew = (dd->bScrewPBC && d == XX);
3994 if (tric_dir[d] && dd->nc[d] > 1)
3996 /* Use triclinic coordintates for this dimension */
3997 for (j = d+1; j < DIM; j++)
3999 pos_d += cg_cm[j]*tcm[j][d];
4002 while (pos_d >= box[d][d])
4005 rvec_dec(cg_cm, box[d]);
4008 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
4009 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
4011 for (k = k0; (k < k1); k++)
4013 rvec_dec(pos[k], box[d]);
4016 pos[k][YY] = box[YY][YY] - pos[k][YY];
4017 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4024 rvec_inc(cg_cm, box[d]);
4027 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
4028 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
4030 for (k = k0; (k < k1); k++)
4032 rvec_inc(pos[k], box[d]);
4035 pos[k][YY] = box[YY][YY] - pos[k][YY];
4036 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4041 /* This could be done more efficiently */
4043 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
4048 i = dd_index(dd->nc, ind);
4049 if (ma->ncg[i] == tmp_nalloc[i])
4051 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
4052 srenew(tmp_ind[i], tmp_nalloc[i]);
4054 tmp_ind[i][ma->ncg[i]] = icg;
4056 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
4060 for (i = 0; i < dd->nnodes; i++)
4063 for (k = 0; k < ma->ncg[i]; k++)
4065 ma->cg[k1++] = tmp_ind[i][k];
4068 ma->index[dd->nnodes] = k1;
4070 for (i = 0; i < dd->nnodes; i++)
4080 fprintf(fplog, "Charge group distribution at step %s:",
4081 gmx_step_str(step, buf));
4082 for (i = 0; i < dd->nnodes; i++)
4084 fprintf(fplog, " %d", ma->ncg[i]);
4086 fprintf(fplog, "\n");
4090 static void get_cg_distribution(FILE *fplog, gmx_int64_t step, gmx_domdec_t *dd,
4091 t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
4094 gmx_domdec_master_t *ma = NULL;
4097 int *ibuf, buf2[2] = { 0, 0 };
4098 gmx_bool bMaster = DDMASTER(dd);
4105 check_screw_box(box);
4108 set_dd_cell_sizes_slb(dd, ddbox, TRUE, npulse);
4110 distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
4111 for (i = 0; i < dd->nnodes; i++)
4113 ma->ibuf[2*i] = ma->ncg[i];
4114 ma->ibuf[2*i+1] = ma->nat[i];
4122 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
4124 dd->ncg_home = buf2[0];
4125 dd->nat_home = buf2[1];
4126 dd->ncg_tot = dd->ncg_home;
4127 dd->nat_tot = dd->nat_home;
4128 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
4130 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
4131 srenew(dd->index_gl, dd->cg_nalloc);
4132 srenew(dd->cgindex, dd->cg_nalloc+1);
4136 for (i = 0; i < dd->nnodes; i++)
4138 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
4139 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
4144 DDMASTER(dd) ? ma->ibuf : NULL,
4145 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
4146 DDMASTER(dd) ? ma->cg : NULL,
4147 dd->ncg_home*sizeof(int), dd->index_gl);
4149 /* Determine the home charge group sizes */
4151 for (i = 0; i < dd->ncg_home; i++)
4153 cg_gl = dd->index_gl[i];
4155 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
4160 fprintf(debug, "Home charge groups:\n");
4161 for (i = 0; i < dd->ncg_home; i++)
4163 fprintf(debug, " %d", dd->index_gl[i]);
4166 fprintf(debug, "\n");
4169 fprintf(debug, "\n");
4173 static int compact_and_copy_vec_at(int ncg, int *move,
4176 rvec *src, gmx_domdec_comm_t *comm,
4179 int m, icg, i, i0, i1, nrcg;
4185 for (m = 0; m < DIM*2; m++)
4191 for (icg = 0; icg < ncg; icg++)
4193 i1 = cgindex[icg+1];
4199 /* Compact the home array in place */
4200 for (i = i0; i < i1; i++)
4202 copy_rvec(src[i], src[home_pos++]);
4208 /* Copy to the communication buffer */
4210 pos_vec[m] += 1 + vec*nrcg;
4211 for (i = i0; i < i1; i++)
4213 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
4215 pos_vec[m] += (nvec - vec - 1)*nrcg;
4219 home_pos += i1 - i0;
4227 static int compact_and_copy_vec_cg(int ncg, int *move,
4229 int nvec, rvec *src, gmx_domdec_comm_t *comm,
4232 int m, icg, i0, i1, nrcg;
4238 for (m = 0; m < DIM*2; m++)
4244 for (icg = 0; icg < ncg; icg++)
4246 i1 = cgindex[icg+1];
4252 /* Compact the home array in place */
4253 copy_rvec(src[icg], src[home_pos++]);
4259 /* Copy to the communication buffer */
4260 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
4261 pos_vec[m] += 1 + nrcg*nvec;
4273 static int compact_ind(int ncg, int *move,
4274 int *index_gl, int *cgindex,
4276 gmx_ga2la_t ga2la, char *bLocalCG,
4279 int cg, nat, a0, a1, a, a_gl;
4284 for (cg = 0; cg < ncg; cg++)
4290 /* Compact the home arrays in place.
4291 * Anything that can be done here avoids access to global arrays.
4293 cgindex[home_pos] = nat;
4294 for (a = a0; a < a1; a++)
4297 gatindex[nat] = a_gl;
4298 /* The cell number stays 0, so we don't need to set it */
4299 ga2la_change_la(ga2la, a_gl, nat);
4302 index_gl[home_pos] = index_gl[cg];
4303 cginfo[home_pos] = cginfo[cg];
4304 /* The charge group remains local, so bLocalCG does not change */
4309 /* Clear the global indices */
4310 for (a = a0; a < a1; a++)
4312 ga2la_del(ga2la, gatindex[a]);
4316 bLocalCG[index_gl[cg]] = FALSE;
4320 cgindex[home_pos] = nat;
4325 static void clear_and_mark_ind(int ncg, int *move,
4326 int *index_gl, int *cgindex, int *gatindex,
4327 gmx_ga2la_t ga2la, char *bLocalCG,
4332 for (cg = 0; cg < ncg; cg++)
4338 /* Clear the global indices */
4339 for (a = a0; a < a1; a++)
4341 ga2la_del(ga2la, gatindex[a]);
4345 bLocalCG[index_gl[cg]] = FALSE;
4347 /* Signal that this cg has moved using the ns cell index.
4348 * Here we set it to -1. fill_grid will change it
4349 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4351 cell_index[cg] = -1;
4356 static void print_cg_move(FILE *fplog,
4358 gmx_int64_t step, int cg, int dim, int dir,
4359 gmx_bool bHaveLimitdAndCMOld, real limitd,
4360 rvec cm_old, rvec cm_new, real pos_d)
4362 gmx_domdec_comm_t *comm;
4367 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4368 if (bHaveLimitdAndCMOld)
4370 fprintf(fplog, "The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4371 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4375 fprintf(fplog, "The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4376 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4378 fprintf(fplog, "distance out of cell %f\n",
4379 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4380 if (bHaveLimitdAndCMOld)
4382 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4383 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4385 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4386 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4387 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4389 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4390 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4392 comm->cell_x0[dim], comm->cell_x1[dim]);
4395 static void cg_move_error(FILE *fplog,
4397 gmx_int64_t step, int cg, int dim, int dir,
4398 gmx_bool bHaveLimitdAndCMOld, real limitd,
4399 rvec cm_old, rvec cm_new, real pos_d)
4403 print_cg_move(fplog, dd, step, cg, dim, dir,
4404 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4406 print_cg_move(stderr, dd, step, cg, dim, dir,
4407 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4409 "A charge group moved too far between two domain decomposition steps\n"
4410 "This usually means that your system is not well equilibrated");
4413 static void rotate_state_atom(t_state *state, int a)
4417 for (est = 0; est < estNR; est++)
4419 if (EST_DISTR(est) && (state->flags & (1<<est)))
4424 /* Rotate the complete state; for a rectangular box only */
4425 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4426 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4429 state->v[a][YY] = -state->v[a][YY];
4430 state->v[a][ZZ] = -state->v[a][ZZ];
4433 state->sd_X[a][YY] = -state->sd_X[a][YY];
4434 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4437 state->cg_p[a][YY] = -state->cg_p[a][YY];
4438 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4440 case estDISRE_INITF:
4441 case estDISRE_RM3TAV:
4442 case estORIRE_INITF:
4444 /* These are distances, so not affected by rotation */
4447 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4453 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4455 if (natoms > comm->moved_nalloc)
4457 /* Contents should be preserved here */
4458 comm->moved_nalloc = over_alloc_dd(natoms);
4459 srenew(comm->moved, comm->moved_nalloc);
4465 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
4468 ivec tric_dir, matrix tcm,
4469 rvec cell_x0, rvec cell_x1,
4470 rvec limitd, rvec limit0, rvec limit1,
4472 int cg_start, int cg_end,
4477 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4478 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4482 real inv_ncg, pos_d;
4485 npbcdim = dd->npbcdim;
4487 for (cg = cg_start; cg < cg_end; cg++)
4494 copy_rvec(state->x[k0], cm_new);
4501 for (k = k0; (k < k1); k++)
4503 rvec_inc(cm_new, state->x[k]);
4505 for (d = 0; (d < DIM); d++)
4507 cm_new[d] = inv_ncg*cm_new[d];
4512 /* Do pbc and check DD cell boundary crossings */
4513 for (d = DIM-1; d >= 0; d--)
4517 bScrew = (dd->bScrewPBC && d == XX);
4518 /* Determine the location of this cg in lattice coordinates */
4522 for (d2 = d+1; d2 < DIM; d2++)
4524 pos_d += cm_new[d2]*tcm[d2][d];
4527 /* Put the charge group in the triclinic unit-cell */
4528 if (pos_d >= cell_x1[d])
4530 if (pos_d >= limit1[d])
4532 cg_move_error(fplog, dd, step, cg, d, 1, TRUE, limitd[d],
4533 cg_cm[cg], cm_new, pos_d);
4536 if (dd->ci[d] == dd->nc[d] - 1)
4538 rvec_dec(cm_new, state->box[d]);
4541 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4542 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4544 for (k = k0; (k < k1); k++)
4546 rvec_dec(state->x[k], state->box[d]);
4549 rotate_state_atom(state, k);
4554 else if (pos_d < cell_x0[d])
4556 if (pos_d < limit0[d])
4558 cg_move_error(fplog, dd, step, cg, d, -1, TRUE, limitd[d],
4559 cg_cm[cg], cm_new, pos_d);
4564 rvec_inc(cm_new, state->box[d]);
4567 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4568 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4570 for (k = k0; (k < k1); k++)
4572 rvec_inc(state->x[k], state->box[d]);
4575 rotate_state_atom(state, k);
4581 else if (d < npbcdim)
4583 /* Put the charge group in the rectangular unit-cell */
4584 while (cm_new[d] >= state->box[d][d])
4586 rvec_dec(cm_new, state->box[d]);
4587 for (k = k0; (k < k1); k++)
4589 rvec_dec(state->x[k], state->box[d]);
4592 while (cm_new[d] < 0)
4594 rvec_inc(cm_new, state->box[d]);
4595 for (k = k0; (k < k1); k++)
4597 rvec_inc(state->x[k], state->box[d]);
4603 copy_rvec(cm_new, cg_cm[cg]);
4605 /* Determine where this cg should go */
4608 for (d = 0; d < dd->ndim; d++)
4613 flag |= DD_FLAG_FW(d);
4619 else if (dev[dim] == -1)
4621 flag |= DD_FLAG_BW(d);
4624 if (dd->nc[dim] > 2)
4635 /* Temporarily store the flag in move */
4636 move[cg] = mc + flag;
4640 static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
4641 gmx_domdec_t *dd, ivec tric_dir,
4642 t_state *state, rvec **f,
4651 int ncg[DIM*2], nat[DIM*2];
4652 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4653 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4654 int sbuf[2], rbuf[2];
4655 int home_pos_cg, home_pos_at, buf_pos;
4657 gmx_bool bV = FALSE, bSDX = FALSE, bCGP = FALSE;
4660 real inv_ncg, pos_d;
4662 rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
4664 cginfo_mb_t *cginfo_mb;
4665 gmx_domdec_comm_t *comm;
4667 int nthread, thread;
4671 check_screw_box(state->box);
4675 if (fr->cutoff_scheme == ecutsGROUP)
4680 for (i = 0; i < estNR; i++)
4686 case estX: /* Always present */ break;
4687 case estV: bV = (state->flags & (1<<i)); break;
4688 case estSDX: bSDX = (state->flags & (1<<i)); break;
4689 case estCGP: bCGP = (state->flags & (1<<i)); break;
4692 case estDISRE_INITF:
4693 case estDISRE_RM3TAV:
4694 case estORIRE_INITF:
4696 /* No processing required */
4699 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4704 if (dd->ncg_tot > comm->nalloc_int)
4706 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4707 srenew(comm->buf_int, comm->nalloc_int);
4709 move = comm->buf_int;
4711 /* Clear the count */
4712 for (c = 0; c < dd->ndim*2; c++)
4718 npbcdim = dd->npbcdim;
4720 for (d = 0; (d < DIM); d++)
4722 limitd[d] = dd->comm->cellsize_min[d];
4723 if (d >= npbcdim && dd->ci[d] == 0)
4725 cell_x0[d] = -GMX_FLOAT_MAX;
4729 cell_x0[d] = comm->cell_x0[d];
4731 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4733 cell_x1[d] = GMX_FLOAT_MAX;
4737 cell_x1[d] = comm->cell_x1[d];
4741 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4742 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4746 /* We check after communication if a charge group moved
4747 * more than one cell. Set the pre-comm check limit to float_max.
4749 limit0[d] = -GMX_FLOAT_MAX;
4750 limit1[d] = GMX_FLOAT_MAX;
4754 make_tric_corr_matrix(npbcdim, state->box, tcm);
4756 cgindex = dd->cgindex;
4758 nthread = gmx_omp_nthreads_get(emntDomdec);
4760 /* Compute the center of geometry for all home charge groups
4761 * and put them in the box and determine where they should go.
4763 #pragma omp parallel for num_threads(nthread) schedule(static)
4764 for (thread = 0; thread < nthread; thread++)
4766 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4767 cell_x0, cell_x1, limitd, limit0, limit1,
4769 ( thread *dd->ncg_home)/nthread,
4770 ((thread+1)*dd->ncg_home)/nthread,
4771 fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
4775 for (cg = 0; cg < dd->ncg_home; cg++)
4780 flag = mc & ~DD_FLAG_NRCG;
4781 mc = mc & DD_FLAG_NRCG;
4784 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4786 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4787 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4789 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4790 /* We store the cg size in the lower 16 bits
4791 * and the place where the charge group should go
4792 * in the next 6 bits. This saves some communication volume.
4794 nrcg = cgindex[cg+1] - cgindex[cg];
4795 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4801 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4802 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4805 for (i = 0; i < dd->ndim*2; i++)
4807 *ncg_moved += ncg[i];
4824 /* Make sure the communication buffers are large enough */
4825 for (mc = 0; mc < dd->ndim*2; mc++)
4827 nvr = ncg[mc] + nat[mc]*nvec;
4828 if (nvr > comm->cgcm_state_nalloc[mc])
4830 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4831 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4835 switch (fr->cutoff_scheme)
4838 /* Recalculating cg_cm might be cheaper than communicating,
4839 * but that could give rise to rounding issues.
4842 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4843 nvec, cg_cm, comm, bCompact);
4846 /* Without charge groups we send the moved atom coordinates
4847 * over twice. This is so the code below can be used without
4848 * many conditionals for both for with and without charge groups.
4851 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4852 nvec, state->x, comm, FALSE);
4855 home_pos_cg -= *ncg_moved;
4859 gmx_incons("unimplemented");
4865 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4866 nvec, vec++, state->x, comm, bCompact);
4869 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4870 nvec, vec++, state->v, comm, bCompact);
4874 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4875 nvec, vec++, state->sd_X, comm, bCompact);
4879 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4880 nvec, vec++, state->cg_p, comm, bCompact);
4885 compact_ind(dd->ncg_home, move,
4886 dd->index_gl, dd->cgindex, dd->gatindex,
4887 dd->ga2la, comm->bLocalCG,
4892 if (fr->cutoff_scheme == ecutsVERLET)
4894 moved = get_moved(comm, dd->ncg_home);
4896 for (k = 0; k < dd->ncg_home; k++)
4903 moved = fr->ns.grid->cell_index;
4906 clear_and_mark_ind(dd->ncg_home, move,
4907 dd->index_gl, dd->cgindex, dd->gatindex,
4908 dd->ga2la, comm->bLocalCG,
4912 cginfo_mb = fr->cginfo_mb;
4914 *ncg_stay_home = home_pos_cg;
4915 for (d = 0; d < dd->ndim; d++)
4921 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4924 /* Communicate the cg and atom counts */
4929 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4930 d, dir, sbuf[0], sbuf[1]);
4932 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4934 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4936 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4937 srenew(comm->buf_int, comm->nalloc_int);
4940 /* Communicate the charge group indices, sizes and flags */
4941 dd_sendrecv_int(dd, d, dir,
4942 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4943 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4945 nvs = ncg[cdd] + nat[cdd]*nvec;
4946 i = rbuf[0] + rbuf[1] *nvec;
4947 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4949 /* Communicate cgcm and state */
4950 dd_sendrecv_rvec(dd, d, dir,
4951 comm->cgcm_state[cdd], nvs,
4952 comm->vbuf.v+nvr, i);
4953 ncg_recv += rbuf[0];
4954 nat_recv += rbuf[1];
4958 /* Process the received charge groups */
4960 for (cg = 0; cg < ncg_recv; cg++)
4962 flag = comm->buf_int[cg*DD_CGIBS+1];
4964 if (dim >= npbcdim && dd->nc[dim] > 2)
4966 /* No pbc in this dim and more than one domain boundary.
4967 * We do a separate check if a charge group didn't move too far.
4969 if (((flag & DD_FLAG_FW(d)) &&
4970 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4971 ((flag & DD_FLAG_BW(d)) &&
4972 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4974 cg_move_error(fplog, dd, step, cg, dim,
4975 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4977 comm->vbuf.v[buf_pos],
4978 comm->vbuf.v[buf_pos],
4979 comm->vbuf.v[buf_pos][dim]);
4986 /* Check which direction this cg should go */
4987 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4991 /* The cell boundaries for dimension d2 are not equal
4992 * for each cell row of the lower dimension(s),
4993 * therefore we might need to redetermine where
4994 * this cg should go.
4997 /* If this cg crosses the box boundary in dimension d2
4998 * we can use the communicated flag, so we do not
4999 * have to worry about pbc.
5001 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
5002 (flag & DD_FLAG_FW(d2))) ||
5003 (dd->ci[dim2] == 0 &&
5004 (flag & DD_FLAG_BW(d2)))))
5006 /* Clear the two flags for this dimension */
5007 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
5008 /* Determine the location of this cg
5009 * in lattice coordinates
5011 pos_d = comm->vbuf.v[buf_pos][dim2];
5014 for (d3 = dim2+1; d3 < DIM; d3++)
5017 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
5020 /* Check of we are not at the box edge.
5021 * pbc is only handled in the first step above,
5022 * but this check could move over pbc while
5023 * the first step did not due to different rounding.
5025 if (pos_d >= cell_x1[dim2] &&
5026 dd->ci[dim2] != dd->nc[dim2]-1)
5028 flag |= DD_FLAG_FW(d2);
5030 else if (pos_d < cell_x0[dim2] &&
5033 flag |= DD_FLAG_BW(d2);
5035 comm->buf_int[cg*DD_CGIBS+1] = flag;
5038 /* Set to which neighboring cell this cg should go */
5039 if (flag & DD_FLAG_FW(d2))
5043 else if (flag & DD_FLAG_BW(d2))
5045 if (dd->nc[dd->dim[d2]] > 2)
5057 nrcg = flag & DD_FLAG_NRCG;
5060 if (home_pos_cg+1 > dd->cg_nalloc)
5062 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
5063 srenew(dd->index_gl, dd->cg_nalloc);
5064 srenew(dd->cgindex, dd->cg_nalloc+1);
5066 /* Set the global charge group index and size */
5067 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
5068 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
5069 /* Copy the state from the buffer */
5070 dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
5071 if (fr->cutoff_scheme == ecutsGROUP)
5074 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
5078 /* Set the cginfo */
5079 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
5080 dd->index_gl[home_pos_cg]);
5083 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
5086 if (home_pos_at+nrcg > state->nalloc)
5088 dd_realloc_state(state, f, home_pos_at+nrcg);
5090 for (i = 0; i < nrcg; i++)
5092 copy_rvec(comm->vbuf.v[buf_pos++],
5093 state->x[home_pos_at+i]);
5097 for (i = 0; i < nrcg; i++)
5099 copy_rvec(comm->vbuf.v[buf_pos++],
5100 state->v[home_pos_at+i]);
5105 for (i = 0; i < nrcg; i++)
5107 copy_rvec(comm->vbuf.v[buf_pos++],
5108 state->sd_X[home_pos_at+i]);
5113 for (i = 0; i < nrcg; i++)
5115 copy_rvec(comm->vbuf.v[buf_pos++],
5116 state->cg_p[home_pos_at+i]);
5120 home_pos_at += nrcg;
5124 /* Reallocate the buffers if necessary */
5125 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
5127 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
5128 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
5130 nvr = ncg[mc] + nat[mc]*nvec;
5131 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
5133 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
5134 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
5136 /* Copy from the receive to the send buffers */
5137 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
5138 comm->buf_int + cg*DD_CGIBS,
5139 DD_CGIBS*sizeof(int));
5140 memcpy(comm->cgcm_state[mc][nvr],
5141 comm->vbuf.v[buf_pos],
5142 (1+nrcg*nvec)*sizeof(rvec));
5143 buf_pos += 1 + nrcg*nvec;
5150 /* With sorting (!bCompact) the indices are now only partially up to date
5151 * and ncg_home and nat_home are not the real count, since there are
5152 * "holes" in the arrays for the charge groups that moved to neighbors.
5154 if (fr->cutoff_scheme == ecutsVERLET)
5156 moved = get_moved(comm, home_pos_cg);
5158 for (i = dd->ncg_home; i < home_pos_cg; i++)
5163 dd->ncg_home = home_pos_cg;
5164 dd->nat_home = home_pos_at;
5169 "Finished repartitioning: cgs moved out %d, new home %d\n",
5170 *ncg_moved, dd->ncg_home-*ncg_moved);
5175 void dd_cycles_add(gmx_domdec_t *dd, float cycles, int ddCycl)
5177 dd->comm->cycl[ddCycl] += cycles;
5178 dd->comm->cycl_n[ddCycl]++;
5179 if (cycles > dd->comm->cycl_max[ddCycl])
5181 dd->comm->cycl_max[ddCycl] = cycles;
5185 static double force_flop_count(t_nrnb *nrnb)
5192 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
5194 /* To get closer to the real timings, we half the count
5195 * for the normal loops and again half it for water loops.
5198 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5200 sum += nrnb->n[i]*0.25*cost_nrnb(i);
5204 sum += nrnb->n[i]*0.50*cost_nrnb(i);
5207 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
5210 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5212 sum += nrnb->n[i]*cost_nrnb(i);
5215 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
5217 sum += nrnb->n[i]*cost_nrnb(i);
5223 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
5225 if (dd->comm->eFlop)
5227 dd->comm->flop -= force_flop_count(nrnb);
5230 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
5232 if (dd->comm->eFlop)
5234 dd->comm->flop += force_flop_count(nrnb);
5239 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
5243 for (i = 0; i < ddCyclNr; i++)
5245 dd->comm->cycl[i] = 0;
5246 dd->comm->cycl_n[i] = 0;
5247 dd->comm->cycl_max[i] = 0;
5250 dd->comm->flop_n = 0;
5253 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
5255 gmx_domdec_comm_t *comm;
5256 gmx_domdec_load_t *load;
5257 gmx_domdec_root_t *root = NULL;
5258 int d, dim, cid, i, pos;
5259 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
5264 fprintf(debug, "get_load_distribution start\n");
5267 wallcycle_start(wcycle, ewcDDCOMMLOAD);
5271 bSepPME = (dd->pme_nodeid >= 0);
5273 for (d = dd->ndim-1; d >= 0; d--)
5276 /* Check if we participate in the communication in this dimension */
5277 if (d == dd->ndim-1 ||
5278 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
5280 load = &comm->load[d];
5283 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
5286 if (d == dd->ndim-1)
5288 sbuf[pos++] = dd_force_load(comm);
5289 sbuf[pos++] = sbuf[0];
5292 sbuf[pos++] = sbuf[0];
5293 sbuf[pos++] = cell_frac;
5296 sbuf[pos++] = comm->cell_f_max0[d];
5297 sbuf[pos++] = comm->cell_f_min1[d];
5302 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
5303 sbuf[pos++] = comm->cycl[ddCyclPME];
5308 sbuf[pos++] = comm->load[d+1].sum;
5309 sbuf[pos++] = comm->load[d+1].max;
5312 sbuf[pos++] = comm->load[d+1].sum_m;
5313 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
5314 sbuf[pos++] = comm->load[d+1].flags;
5317 sbuf[pos++] = comm->cell_f_max0[d];
5318 sbuf[pos++] = comm->cell_f_min1[d];
5323 sbuf[pos++] = comm->load[d+1].mdf;
5324 sbuf[pos++] = comm->load[d+1].pme;
5328 /* Communicate a row in DD direction d.
5329 * The communicators are setup such that the root always has rank 0.
5332 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
5333 load->load, load->nload*sizeof(float), MPI_BYTE,
5334 0, comm->mpi_comm_load[d]);
5336 if (dd->ci[dim] == dd->master_ci[dim])
5338 /* We are the root, process this row */
5339 if (comm->bDynLoadBal)
5341 root = comm->root[d];
5351 for (i = 0; i < dd->nc[dim]; i++)
5353 load->sum += load->load[pos++];
5354 load->max = max(load->max, load->load[pos]);
5360 /* This direction could not be load balanced properly,
5361 * therefore we need to use the maximum iso the average load.
5363 load->sum_m = max(load->sum_m, load->load[pos]);
5367 load->sum_m += load->load[pos];
5370 load->cvol_min = min(load->cvol_min, load->load[pos]);
5374 load->flags = (int)(load->load[pos++] + 0.5);
5378 root->cell_f_max0[i] = load->load[pos++];
5379 root->cell_f_min1[i] = load->load[pos++];
5384 load->mdf = max(load->mdf, load->load[pos]);
5386 load->pme = max(load->pme, load->load[pos]);
5390 if (comm->bDynLoadBal && root->bLimited)
5392 load->sum_m *= dd->nc[dim];
5393 load->flags |= (1<<d);
5401 comm->nload += dd_load_count(comm);
5402 comm->load_step += comm->cycl[ddCyclStep];
5403 comm->load_sum += comm->load[0].sum;
5404 comm->load_max += comm->load[0].max;
5405 if (comm->bDynLoadBal)
5407 for (d = 0; d < dd->ndim; d++)
5409 if (comm->load[0].flags & (1<<d))
5411 comm->load_lim[d]++;
5417 comm->load_mdf += comm->load[0].mdf;
5418 comm->load_pme += comm->load[0].pme;
5422 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5426 fprintf(debug, "get_load_distribution finished\n");
5430 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5432 /* Return the relative performance loss on the total run time
5433 * due to the force calculation load imbalance.
5435 if (dd->comm->nload > 0)
5438 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5439 (dd->comm->load_step*dd->nnodes);
5447 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5450 int npp, npme, nnodes, d, limp;
5451 float imbal, pme_f_ratio, lossf, lossp = 0;
5453 gmx_domdec_comm_t *comm;
5456 if (DDMASTER(dd) && comm->nload > 0)
5459 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5460 nnodes = npp + npme;
5461 imbal = comm->load_max*npp/comm->load_sum - 1;
5462 lossf = dd_force_imb_perf_loss(dd);
5463 sprintf(buf, " Average load imbalance: %.1f %%\n", imbal*100);
5464 fprintf(fplog, "%s", buf);
5465 fprintf(stderr, "\n");
5466 fprintf(stderr, "%s", buf);
5467 sprintf(buf, " Part of the total run time spent waiting due to load imbalance: %.1f %%\n", lossf*100);
5468 fprintf(fplog, "%s", buf);
5469 fprintf(stderr, "%s", buf);
5471 if (comm->bDynLoadBal)
5473 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5474 for (d = 0; d < dd->ndim; d++)
5476 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5477 sprintf(buf+strlen(buf), " %c %d %%", dim2char(dd->dim[d]), limp);
5483 sprintf(buf+strlen(buf), "\n");
5484 fprintf(fplog, "%s", buf);
5485 fprintf(stderr, "%s", buf);
5489 pme_f_ratio = comm->load_pme/comm->load_mdf;
5490 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5493 lossp *= (float)npme/(float)nnodes;
5497 lossp *= (float)npp/(float)nnodes;
5499 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pme_f_ratio);
5500 fprintf(fplog, "%s", buf);
5501 fprintf(stderr, "%s", buf);
5502 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossp)*100);
5503 fprintf(fplog, "%s", buf);
5504 fprintf(stderr, "%s", buf);
5506 fprintf(fplog, "\n");
5507 fprintf(stderr, "\n");
5509 if (lossf >= DD_PERF_LOSS)
5512 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5513 " in the domain decomposition.\n", lossf*100);
5514 if (!comm->bDynLoadBal)
5516 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5520 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5522 fprintf(fplog, "%s\n", buf);
5523 fprintf(stderr, "%s\n", buf);
5525 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS)
5528 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5529 " had %s work to do than the PP nodes.\n"
5530 " You might want to %s the number of PME nodes\n"
5531 " or %s the cut-off and the grid spacing.\n",
5533 (lossp < 0) ? "less" : "more",
5534 (lossp < 0) ? "decrease" : "increase",
5535 (lossp < 0) ? "decrease" : "increase");
5536 fprintf(fplog, "%s\n", buf);
5537 fprintf(stderr, "%s\n", buf);
5542 static float dd_vol_min(gmx_domdec_t *dd)
5544 return dd->comm->load[0].cvol_min*dd->nnodes;
5547 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5549 return dd->comm->load[0].flags;
5552 static float dd_f_imbal(gmx_domdec_t *dd)
5554 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5557 float dd_pme_f_ratio(gmx_domdec_t *dd)
5559 if (dd->comm->cycl_n[ddCyclPME] > 0)
5561 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5569 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
5574 flags = dd_load_flags(dd);
5578 "DD load balancing is limited by minimum cell size in dimension");
5579 for (d = 0; d < dd->ndim; d++)
5583 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5586 fprintf(fplog, "\n");
5588 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5589 if (dd->comm->bDynLoadBal)
5591 fprintf(fplog, " vol min/aver %5.3f%c",
5592 dd_vol_min(dd), flags ? '!' : ' ');
5594 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5595 if (dd->comm->cycl_n[ddCyclPME])
5597 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5599 fprintf(fplog, "\n\n");
5602 static void dd_print_load_verbose(gmx_domdec_t *dd)
5604 if (dd->comm->bDynLoadBal)
5606 fprintf(stderr, "vol %4.2f%c ",
5607 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5609 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5610 if (dd->comm->cycl_n[ddCyclPME])
5612 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5617 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5622 gmx_domdec_root_t *root;
5623 gmx_bool bPartOfGroup = FALSE;
5625 dim = dd->dim[dim_ind];
5626 copy_ivec(loc, loc_c);
5627 for (i = 0; i < dd->nc[dim]; i++)
5630 rank = dd_index(dd->nc, loc_c);
5631 if (rank == dd->rank)
5633 /* This process is part of the group */
5634 bPartOfGroup = TRUE;
5637 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5641 dd->comm->mpi_comm_load[dim_ind] = c_row;
5642 if (dd->comm->eDLB != edlbNO)
5644 if (dd->ci[dim] == dd->master_ci[dim])
5646 /* This is the root process of this row */
5647 snew(dd->comm->root[dim_ind], 1);
5648 root = dd->comm->root[dim_ind];
5649 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5650 snew(root->old_cell_f, dd->nc[dim]+1);
5651 snew(root->bCellMin, dd->nc[dim]);
5654 snew(root->cell_f_max0, dd->nc[dim]);
5655 snew(root->cell_f_min1, dd->nc[dim]);
5656 snew(root->bound_min, dd->nc[dim]);
5657 snew(root->bound_max, dd->nc[dim]);
5659 snew(root->buf_ncd, dd->nc[dim]);
5663 /* This is not a root process, we only need to receive cell_f */
5664 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5667 if (dd->ci[dim] == dd->master_ci[dim])
5669 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5675 void dd_setup_dlb_resource_sharing(t_commrec gmx_unused *cr,
5676 const gmx_hw_info_t gmx_unused *hwinfo,
5677 const gmx_hw_opt_t gmx_unused *hw_opt)
5680 int physicalnode_id_hash;
5683 MPI_Comm mpi_comm_pp_physicalnode;
5685 if (!(cr->duty & DUTY_PP) ||
5686 hw_opt->gpu_opt.ncuda_dev_use == 0)
5688 /* Only PP nodes (currently) use GPUs.
5689 * If we don't have GPUs, there are no resources to share.
5694 physicalnode_id_hash = gmx_physicalnode_id_hash();
5696 gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
5702 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5703 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5704 dd->rank, physicalnode_id_hash, gpu_id);
5706 /* Split the PP communicator over the physical nodes */
5707 /* TODO: See if we should store this (before), as it's also used for
5708 * for the nodecomm summution.
5710 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5711 &mpi_comm_pp_physicalnode);
5712 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5713 &dd->comm->mpi_comm_gpu_shared);
5714 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5715 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5719 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5722 /* Note that some ranks could share a GPU, while others don't */
5724 if (dd->comm->nrank_gpu_shared == 1)
5726 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5731 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
5734 int dim0, dim1, i, j;
5739 fprintf(debug, "Making load communicators\n");
5742 snew(dd->comm->load, dd->ndim);
5743 snew(dd->comm->mpi_comm_load, dd->ndim);
5746 make_load_communicator(dd, 0, loc);
5750 for (i = 0; i < dd->nc[dim0]; i++)
5753 make_load_communicator(dd, 1, loc);
5759 for (i = 0; i < dd->nc[dim0]; i++)
5763 for (j = 0; j < dd->nc[dim1]; j++)
5766 make_load_communicator(dd, 2, loc);
5773 fprintf(debug, "Finished making load communicators\n");
5778 void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
5781 int d, dim, i, j, m;
5784 ivec dd_zp[DD_MAXIZONE];
5785 gmx_domdec_zones_t *zones;
5786 gmx_domdec_ns_ranges_t *izone;
5788 for (d = 0; d < dd->ndim; d++)
5791 copy_ivec(dd->ci, tmp);
5792 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5793 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5794 copy_ivec(dd->ci, tmp);
5795 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5796 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5799 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5802 dd->neighbor[d][1]);
5808 fprintf(fplog, "\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5810 dd->nc[XX], dd->nc[YY], dd->nc[ZZ],
5811 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5818 for (i = 0; i < nzonep; i++)
5820 copy_ivec(dd_zp3[i], dd_zp[i]);
5826 for (i = 0; i < nzonep; i++)
5828 copy_ivec(dd_zp2[i], dd_zp[i]);
5834 for (i = 0; i < nzonep; i++)
5836 copy_ivec(dd_zp1[i], dd_zp[i]);
5840 gmx_fatal(FARGS, "Can only do 1, 2 or 3D domain decomposition");
5845 zones = &dd->comm->zones;
5847 for (i = 0; i < nzone; i++)
5850 clear_ivec(zones->shift[i]);
5851 for (d = 0; d < dd->ndim; d++)
5853 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5858 for (i = 0; i < nzone; i++)
5860 for (d = 0; d < DIM; d++)
5862 s[d] = dd->ci[d] - zones->shift[i][d];
5867 else if (s[d] >= dd->nc[d])
5873 zones->nizone = nzonep;
5874 for (i = 0; i < zones->nizone; i++)
5876 if (dd_zp[i][0] != i)
5878 gmx_fatal(FARGS, "Internal inconsistency in the dd grid setup");
5880 izone = &zones->izone[i];
5881 izone->j0 = dd_zp[i][1];
5882 izone->j1 = dd_zp[i][2];
5883 for (dim = 0; dim < DIM; dim++)
5885 if (dd->nc[dim] == 1)
5887 /* All shifts should be allowed */
5888 izone->shift0[dim] = -1;
5889 izone->shift1[dim] = 1;
5894 izone->shift0[d] = 0;
5895 izone->shift1[d] = 0;
5896 for(j=izone->j0; j<izone->j1; j++) {
5897 if (dd->shift[j][d] > dd->shift[i][d])
5898 izone->shift0[d] = -1;
5899 if (dd->shift[j][d] < dd->shift[i][d])
5900 izone->shift1[d] = 1;
5906 /* Assume the shift are not more than 1 cell */
5907 izone->shift0[dim] = 1;
5908 izone->shift1[dim] = -1;
5909 for (j = izone->j0; j < izone->j1; j++)
5911 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5912 if (shift_diff < izone->shift0[dim])
5914 izone->shift0[dim] = shift_diff;
5916 if (shift_diff > izone->shift1[dim])
5918 izone->shift1[dim] = shift_diff;
5925 if (dd->comm->eDLB != edlbNO)
5927 snew(dd->comm->root, dd->ndim);
5930 if (dd->comm->bRecordLoad)
5932 make_load_communicators(dd);
5936 static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reorder)
5939 gmx_domdec_comm_t *comm;
5950 if (comm->bCartesianPP)
5952 /* Set up cartesian communication for the particle-particle part */
5955 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5956 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5959 for (i = 0; i < DIM; i++)
5963 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5965 /* We overwrite the old communicator with the new cartesian one */
5966 cr->mpi_comm_mygroup = comm_cart;
5969 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5970 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5972 if (comm->bCartesianPP_PME)
5974 /* Since we want to use the original cartesian setup for sim,
5975 * and not the one after split, we need to make an index.
5977 snew(comm->ddindex2ddnodeid, dd->nnodes);
5978 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
5979 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5980 /* Get the rank of the DD master,
5981 * above we made sure that the master node is a PP node.
5991 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
5993 else if (comm->bCartesianPP)
5995 if (cr->npmenodes == 0)
5997 /* The PP communicator is also
5998 * the communicator for this simulation
6000 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
6002 cr->nodeid = dd->rank;
6004 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
6006 /* We need to make an index to go from the coordinates
6007 * to the nodeid of this simulation.
6009 snew(comm->ddindex2simnodeid, dd->nnodes);
6010 snew(buf, dd->nnodes);
6011 if (cr->duty & DUTY_PP)
6013 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
6015 /* Communicate the ddindex to simulation nodeid index */
6016 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6017 cr->mpi_comm_mysim);
6020 /* Determine the master coordinates and rank.
6021 * The DD master should be the same node as the master of this sim.
6023 for (i = 0; i < dd->nnodes; i++)
6025 if (comm->ddindex2simnodeid[i] == 0)
6027 ddindex2xyz(dd->nc, i, dd->master_ci);
6028 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
6033 fprintf(debug, "The master rank is %d\n", dd->masterrank);
6038 /* No Cartesian communicators */
6039 /* We use the rank in dd->comm->all as DD index */
6040 ddindex2xyz(dd->nc, dd->rank, dd->ci);
6041 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
6043 clear_ivec(dd->master_ci);
6050 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6051 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6056 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6057 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6061 static void receive_ddindex2simnodeid(t_commrec *cr)
6065 gmx_domdec_comm_t *comm;
6072 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
6074 snew(comm->ddindex2simnodeid, dd->nnodes);
6075 snew(buf, dd->nnodes);
6076 if (cr->duty & DUTY_PP)
6078 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
6081 /* Communicate the ddindex to simulation nodeid index */
6082 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6083 cr->mpi_comm_mysim);
6090 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
6091 int ncg, int natoms)
6093 gmx_domdec_master_t *ma;
6098 snew(ma->ncg, dd->nnodes);
6099 snew(ma->index, dd->nnodes+1);
6101 snew(ma->nat, dd->nnodes);
6102 snew(ma->ibuf, dd->nnodes*2);
6103 snew(ma->cell_x, DIM);
6104 for (i = 0; i < DIM; i++)
6106 snew(ma->cell_x[i], dd->nc[i]+1);
6109 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
6115 snew(ma->vbuf, natoms);
6121 static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_node_order,
6122 int gmx_unused reorder)
6125 gmx_domdec_comm_t *comm;
6136 if (comm->bCartesianPP)
6138 for (i = 1; i < DIM; i++)
6140 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
6142 if (bDiv[YY] || bDiv[ZZ])
6144 comm->bCartesianPP_PME = TRUE;
6145 /* If we have 2D PME decomposition, which is always in x+y,
6146 * we stack the PME only nodes in z.
6147 * Otherwise we choose the direction that provides the thinnest slab
6148 * of PME only nodes as this will have the least effect
6149 * on the PP communication.
6150 * But for the PME communication the opposite might be better.
6152 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
6154 dd->nc[YY] > dd->nc[ZZ]))
6156 comm->cartpmedim = ZZ;
6160 comm->cartpmedim = YY;
6162 comm->ntot[comm->cartpmedim]
6163 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
6167 fprintf(fplog, "#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
6169 "Will not use a Cartesian communicator for PP <-> PME\n\n");
6174 if (comm->bCartesianPP_PME)
6178 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
6181 for (i = 0; i < DIM; i++)
6185 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
6188 MPI_Comm_rank(comm_cart, &rank);
6189 if (MASTERNODE(cr) && rank != 0)
6191 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
6194 /* With this assigment we loose the link to the original communicator
6195 * which will usually be MPI_COMM_WORLD, unless have multisim.
6197 cr->mpi_comm_mysim = comm_cart;
6198 cr->sim_nodeid = rank;
6200 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
6204 fprintf(fplog, "Cartesian nodeid %d, coordinates %d %d %d\n\n",
6205 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6208 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
6212 if (cr->npmenodes == 0 ||
6213 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
6215 cr->duty = DUTY_PME;
6218 /* Split the sim communicator into PP and PME only nodes */
6219 MPI_Comm_split(cr->mpi_comm_mysim,
6221 dd_index(comm->ntot, dd->ci),
6222 &cr->mpi_comm_mygroup);
6226 switch (dd_node_order)
6231 fprintf(fplog, "Order of the nodes: PP first, PME last\n");
6234 case ddnoINTERLEAVE:
6235 /* Interleave the PP-only and PME-only nodes,
6236 * as on clusters with dual-core machines this will double
6237 * the communication bandwidth of the PME processes
6238 * and thus speed up the PP <-> PME and inter PME communication.
6242 fprintf(fplog, "Interleaving PP and PME nodes\n");
6244 comm->pmenodes = dd_pmenodes(cr);
6249 gmx_fatal(FARGS, "Unknown dd_node_order=%d", dd_node_order);
6252 if (dd_simnode2pmenode(cr, cr->sim_nodeid) == -1)
6254 cr->duty = DUTY_PME;
6261 /* Split the sim communicator into PP and PME only nodes */
6262 MPI_Comm_split(cr->mpi_comm_mysim,
6265 &cr->mpi_comm_mygroup);
6266 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
6272 fprintf(fplog, "This is a %s only node\n\n",
6273 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
6277 void make_dd_communicators(FILE *fplog, t_commrec *cr, int dd_node_order)
6280 gmx_domdec_comm_t *comm;
6286 copy_ivec(dd->nc, comm->ntot);
6288 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
6289 comm->bCartesianPP_PME = FALSE;
6291 /* Reorder the nodes by default. This might change the MPI ranks.
6292 * Real reordering is only supported on very few architectures,
6293 * Blue Gene is one of them.
6295 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
6297 if (cr->npmenodes > 0)
6299 /* Split the communicator into a PP and PME part */
6300 split_communicator(fplog, cr, dd_node_order, CartReorder);
6301 if (comm->bCartesianPP_PME)
6303 /* We (possibly) reordered the nodes in split_communicator,
6304 * so it is no longer required in make_pp_communicator.
6306 CartReorder = FALSE;
6311 /* All nodes do PP and PME */
6313 /* We do not require separate communicators */
6314 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
6318 if (cr->duty & DUTY_PP)
6320 /* Copy or make a new PP communicator */
6321 make_pp_communicator(fplog, cr, CartReorder);
6325 receive_ddindex2simnodeid(cr);
6328 if (!(cr->duty & DUTY_PME))
6330 /* Set up the commnuication to our PME node */
6331 dd->pme_nodeid = dd_simnode2pmenode(cr, cr->sim_nodeid);
6332 dd->pme_receive_vir_ener = receive_vir_ener(cr);
6335 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
6336 dd->pme_nodeid, dd->pme_receive_vir_ener);
6341 dd->pme_nodeid = -1;
6346 dd->ma = init_gmx_domdec_master_t(dd,
6348 comm->cgs_gl.index[comm->cgs_gl.nr]);
6352 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6354 real *slb_frac, tot;
6359 if (nc > 1 && size_string != NULL)
6363 fprintf(fplog, "Using static load balancing for the %s direction\n",
6368 for (i = 0; i < nc; i++)
6371 sscanf(size_string, "%lf%n", &dbl, &n);
6374 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6383 fprintf(fplog, "Relative cell sizes:");
6385 for (i = 0; i < nc; i++)
6390 fprintf(fplog, " %5.3f", slb_frac[i]);
6395 fprintf(fplog, "\n");
6402 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
6405 gmx_mtop_ilistloop_t iloop;
6409 iloop = gmx_mtop_ilistloop_init(mtop);
6410 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6412 for (ftype = 0; ftype < F_NRE; ftype++)
6414 if ((interaction_function[ftype].flags & IF_BOND) &&
6417 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6425 static int dd_nst_env(FILE *fplog, const char *env_var, int def)
6431 val = getenv(env_var);
6434 if (sscanf(val, "%d", &nst) <= 0)
6440 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6448 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6452 fprintf(stderr, "\n%s\n", warn_string);
6456 fprintf(fplog, "\n%s\n", warn_string);
6460 static void check_dd_restrictions(t_commrec *cr, gmx_domdec_t *dd,
6461 t_inputrec *ir, FILE *fplog)
6463 if (ir->ePBC == epbcSCREW &&
6464 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6466 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6469 if (ir->ns_type == ensSIMPLE)
6471 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
6474 if (ir->nstlist == 0)
6476 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6479 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6481 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6485 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6490 r = ddbox->box_size[XX];
6491 for (di = 0; di < dd->ndim; di++)
6494 /* Check using the initial average cell size */
6495 r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6501 static int check_dlb_support(FILE *fplog, t_commrec *cr,
6502 const char *dlb_opt, gmx_bool bRecordLoad,
6503 unsigned long Flags, t_inputrec *ir)
6511 case 'a': eDLB = edlbAUTO; break;
6512 case 'n': eDLB = edlbNO; break;
6513 case 'y': eDLB = edlbYES; break;
6514 default: gmx_incons("Unknown dlb_opt");
6517 if (Flags & MD_RERUN)
6522 if (!EI_DYNAMICS(ir->eI))
6524 if (eDLB == edlbYES)
6526 sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
6527 dd_warning(cr, fplog, buf);
6535 dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6540 if (Flags & MD_REPRODUCIBLE)
6547 dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
6551 dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6554 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", eDLB);
6562 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6567 if (getenv("GMX_DD_ORDER_ZYX") != NULL)
6569 /* Decomposition order z,y,x */
6572 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6574 for (dim = DIM-1; dim >= 0; dim--)
6576 if (dd->nc[dim] > 1)
6578 dd->dim[dd->ndim++] = dim;
6584 /* Decomposition order x,y,z */
6585 for (dim = 0; dim < DIM; dim++)
6587 if (dd->nc[dim] > 1)
6589 dd->dim[dd->ndim++] = dim;
6595 static gmx_domdec_comm_t *init_dd_comm()
6597 gmx_domdec_comm_t *comm;
6601 snew(comm->cggl_flag, DIM*2);
6602 snew(comm->cgcm_state, DIM*2);
6603 for (i = 0; i < DIM*2; i++)
6605 comm->cggl_flag_nalloc[i] = 0;
6606 comm->cgcm_state_nalloc[i] = 0;
6609 comm->nalloc_int = 0;
6610 comm->buf_int = NULL;
6612 vec_rvec_init(&comm->vbuf);
6614 comm->n_load_have = 0;
6615 comm->n_load_collect = 0;
6617 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6619 comm->sum_nat[i] = 0;
6623 comm->load_step = 0;
6626 clear_ivec(comm->load_lim);
6633 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
6634 unsigned long Flags,
6636 real comm_distance_min, real rconstr,
6637 const char *dlb_opt, real dlb_scale,
6638 const char *sizex, const char *sizey, const char *sizez,
6639 gmx_mtop_t *mtop, t_inputrec *ir,
6640 matrix box, rvec *x,
6642 int *npme_x, int *npme_y)
6645 gmx_domdec_comm_t *comm;
6648 real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
6655 "\nInitializing Domain Decomposition on %d nodes\n", cr->nnodes);
6660 dd->comm = init_dd_comm();
6662 snew(comm->cggl_flag, DIM*2);
6663 snew(comm->cgcm_state, DIM*2);
6665 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6666 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6668 dd->bSendRecv2 = dd_nst_env(fplog, "GMX_DD_SENDRECV2", 0);
6669 comm->dlb_scale_lim = dd_nst_env(fplog, "GMX_DLB_MAX", 10);
6670 comm->eFlop = dd_nst_env(fplog, "GMX_DLB_FLOP", 0);
6671 recload = dd_nst_env(fplog, "GMX_DD_LOAD", 1);
6672 comm->nstSortCG = dd_nst_env(fplog, "GMX_DD_SORT", 1);
6673 comm->nstDDDump = dd_nst_env(fplog, "GMX_DD_DUMP", 0);
6674 comm->nstDDDumpGrid = dd_nst_env(fplog, "GMX_DD_DUMP_GRID", 0);
6675 comm->DD_debug = dd_nst_env(fplog, "GMX_DD_DEBUG", 0);
6677 dd->pme_recv_f_alloc = 0;
6678 dd->pme_recv_f_buf = NULL;
6680 if (dd->bSendRecv2 && fplog)
6682 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6688 fprintf(fplog, "Will load balance based on FLOP count\n");
6690 if (comm->eFlop > 1)
6692 srand(1+cr->nodeid);
6694 comm->bRecordLoad = TRUE;
6698 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6702 /* Initialize to GPU share count to 0, might change later */
6703 comm->nrank_gpu_shared = 0;
6705 comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
6707 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6710 fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
6712 dd->bGridJump = comm->bDynLoadBal;
6713 comm->bPMELoadBalDLBLimits = FALSE;
6715 if (comm->nstSortCG)
6719 if (comm->nstSortCG == 1)
6721 fprintf(fplog, "Will sort the charge groups at every domain (re)decomposition\n");
6725 fprintf(fplog, "Will sort the charge groups every %d steps\n",
6729 snew(comm->sort, 1);
6735 fprintf(fplog, "Will not sort the charge groups\n");
6739 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6741 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6742 if (comm->bInterCGBondeds)
6744 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6748 comm->bInterCGMultiBody = FALSE;
6751 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6752 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6754 if (ir->rlistlong == 0)
6756 /* Set the cut-off to some very large value,
6757 * so we don't need if statements everywhere in the code.
6758 * We use sqrt, since the cut-off is squared in some places.
6760 comm->cutoff = GMX_CUTOFF_INF;
6764 comm->cutoff = ir->rlistlong;
6766 comm->cutoff_mbody = 0;
6768 comm->cellsize_limit = 0;
6769 comm->bBondComm = FALSE;
6771 if (comm->bInterCGBondeds)
6773 if (comm_distance_min > 0)
6775 comm->cutoff_mbody = comm_distance_min;
6776 if (Flags & MD_DDBONDCOMM)
6778 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6782 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6784 r_bonded_limit = comm->cutoff_mbody;
6786 else if (ir->bPeriodicMols)
6788 /* Can not easily determine the required cut-off */
6789 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6790 comm->cutoff_mbody = comm->cutoff/2;
6791 r_bonded_limit = comm->cutoff_mbody;
6797 dd_bonded_cg_distance(fplog, mtop, ir, x, box,
6798 Flags & MD_DDBONDCHECK, &r_2b, &r_mb);
6800 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6801 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6803 /* We use an initial margin of 10% for the minimum cell size,
6804 * except when we are just below the non-bonded cut-off.
6806 if (Flags & MD_DDBONDCOMM)
6808 if (max(r_2b, r_mb) > comm->cutoff)
6810 r_bonded = max(r_2b, r_mb);
6811 r_bonded_limit = 1.1*r_bonded;
6812 comm->bBondComm = TRUE;
6817 r_bonded_limit = min(1.1*r_bonded, comm->cutoff);
6819 /* We determine cutoff_mbody later */
6823 /* No special bonded communication,
6824 * simply increase the DD cut-off.
6826 r_bonded_limit = 1.1*max(r_2b, r_mb);
6827 comm->cutoff_mbody = r_bonded_limit;
6828 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6831 comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit);
6835 "Minimum cell size due to bonded interactions: %.3f nm\n",
6836 comm->cellsize_limit);
6840 if (dd->bInterCGcons && rconstr <= 0)
6842 /* There is a cell size limit due to the constraints (P-LINCS) */
6843 rconstr = constr_r_max(fplog, mtop, ir);
6847 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6849 if (rconstr > comm->cellsize_limit)
6851 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6855 else if (rconstr > 0 && fplog)
6857 /* Here we do not check for dd->bInterCGcons,
6858 * because one can also set a cell size limit for virtual sites only
6859 * and at this point we don't know yet if there are intercg v-sites.
6862 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6865 comm->cellsize_limit = max(comm->cellsize_limit, rconstr);
6867 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6871 copy_ivec(nc, dd->nc);
6872 set_dd_dim(fplog, dd);
6873 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, x, ddbox);
6875 if (cr->npmenodes == -1)
6879 acs = average_cellsize_min(dd, ddbox);
6880 if (acs < comm->cellsize_limit)
6884 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6886 gmx_fatal_collective(FARGS, cr, NULL,
6887 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6888 acs, comm->cellsize_limit);
6893 set_ddbox_cr(cr, NULL, ir, box, &comm->cgs_gl, x, ddbox);
6895 /* We need to choose the optimal DD grid and possibly PME nodes */
6896 limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6897 comm->eDLB != edlbNO, dlb_scale,
6898 comm->cellsize_limit, comm->cutoff,
6899 comm->bInterCGBondeds);
6901 if (dd->nc[XX] == 0)
6903 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6904 sprintf(buf, "Change the number of nodes or mdrun option %s%s%s",
6905 !bC ? "-rdd" : "-rcon",
6906 comm->eDLB != edlbNO ? " or -dds" : "",
6907 bC ? " or your LINCS settings" : "");
6909 gmx_fatal_collective(FARGS, cr, NULL,
6910 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6912 "Look in the log file for details on the domain decomposition",
6913 cr->nnodes-cr->npmenodes, limit, buf);
6915 set_dd_dim(fplog, dd);
6921 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6922 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6925 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6926 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6928 gmx_fatal_collective(FARGS, cr, NULL,
6929 "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6930 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6932 if (cr->npmenodes > dd->nnodes)
6934 gmx_fatal_collective(FARGS, cr, NULL,
6935 "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6937 if (cr->npmenodes > 0)
6939 comm->npmenodes = cr->npmenodes;
6943 comm->npmenodes = dd->nnodes;
6946 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
6948 /* The following choices should match those
6949 * in comm_cost_est in domdec_setup.c.
6950 * Note that here the checks have to take into account
6951 * that the decomposition might occur in a different order than xyz
6952 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6953 * in which case they will not match those in comm_cost_est,
6954 * but since that is mainly for testing purposes that's fine.
6956 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6957 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6958 getenv("GMX_PMEONEDD") == NULL)
6960 comm->npmedecompdim = 2;
6961 comm->npmenodes_x = dd->nc[XX];
6962 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6966 /* In case nc is 1 in both x and y we could still choose to
6967 * decompose pme in y instead of x, but we use x for simplicity.
6969 comm->npmedecompdim = 1;
6970 if (dd->dim[0] == YY)
6972 comm->npmenodes_x = 1;
6973 comm->npmenodes_y = comm->npmenodes;
6977 comm->npmenodes_x = comm->npmenodes;
6978 comm->npmenodes_y = 1;
6983 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6984 comm->npmenodes_x, comm->npmenodes_y, 1);
6989 comm->npmedecompdim = 0;
6990 comm->npmenodes_x = 0;
6991 comm->npmenodes_y = 0;
6994 /* Technically we don't need both of these,
6995 * but it simplifies code not having to recalculate it.
6997 *npme_x = comm->npmenodes_x;
6998 *npme_y = comm->npmenodes_y;
7000 snew(comm->slb_frac, DIM);
7001 if (comm->eDLB == edlbNO)
7003 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
7004 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
7005 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], sizez);
7008 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
7010 if (comm->bBondComm || comm->eDLB != edlbNO)
7012 /* Set the bonded communication distance to halfway
7013 * the minimum and the maximum,
7014 * since the extra communication cost is nearly zero.
7016 acs = average_cellsize_min(dd, ddbox);
7017 comm->cutoff_mbody = 0.5*(r_bonded + acs);
7018 if (comm->eDLB != edlbNO)
7020 /* Check if this does not limit the scaling */
7021 comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs);
7023 if (!comm->bBondComm)
7025 /* Without bBondComm do not go beyond the n.b. cut-off */
7026 comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff);
7027 if (comm->cellsize_limit >= comm->cutoff)
7029 /* We don't loose a lot of efficieny
7030 * when increasing it to the n.b. cut-off.
7031 * It can even be slightly faster, because we need
7032 * less checks for the communication setup.
7034 comm->cutoff_mbody = comm->cutoff;
7037 /* Check if we did not end up below our original limit */
7038 comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit);
7040 if (comm->cutoff_mbody > comm->cellsize_limit)
7042 comm->cellsize_limit = comm->cutoff_mbody;
7045 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
7050 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
7051 "cellsize limit %f\n",
7052 comm->bBondComm, comm->cellsize_limit);
7057 check_dd_restrictions(cr, dd, ir, fplog);
7060 comm->partition_step = INT_MIN;
7063 clear_dd_cycle_counts(dd);
7068 static void set_dlb_limits(gmx_domdec_t *dd)
7073 for (d = 0; d < dd->ndim; d++)
7075 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
7076 dd->comm->cellsize_min[dd->dim[d]] =
7077 dd->comm->cellsize_min_dlb[dd->dim[d]];
7082 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
7085 gmx_domdec_comm_t *comm;
7095 fprintf(fplog, "At step %s the performance loss due to force load imbalance is %.1f %%\n", gmx_step_str(step, buf), dd_force_imb_perf_loss(dd)*100);
7098 cellsize_min = comm->cellsize_min[dd->dim[0]];
7099 for (d = 1; d < dd->ndim; d++)
7101 cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
7104 if (cellsize_min < comm->cellsize_limit*1.05)
7106 dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
7108 /* Change DLB from "auto" to "no". */
7109 comm->eDLB = edlbNO;
7114 dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
7115 comm->bDynLoadBal = TRUE;
7116 dd->bGridJump = TRUE;
7120 /* We can set the required cell size info here,
7121 * so we do not need to communicate this.
7122 * The grid is completely uniform.
7124 for (d = 0; d < dd->ndim; d++)
7128 comm->load[d].sum_m = comm->load[d].sum;
7130 nc = dd->nc[dd->dim[d]];
7131 for (i = 0; i < nc; i++)
7133 comm->root[d]->cell_f[i] = i/(real)nc;
7136 comm->root[d]->cell_f_max0[i] = i /(real)nc;
7137 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
7140 comm->root[d]->cell_f[nc] = 1.0;
7145 static char *init_bLocalCG(gmx_mtop_t *mtop)
7150 ncg = ncg_mtop(mtop);
7151 snew(bLocalCG, ncg);
7152 for (cg = 0; cg < ncg; cg++)
7154 bLocalCG[cg] = FALSE;
7160 void dd_init_bondeds(FILE *fplog,
7161 gmx_domdec_t *dd, gmx_mtop_t *mtop,
7163 t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
7165 gmx_domdec_comm_t *comm;
7169 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
7173 if (comm->bBondComm)
7175 /* Communicate atoms beyond the cut-off for bonded interactions */
7178 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
7180 comm->bLocalCG = init_bLocalCG(mtop);
7184 /* Only communicate atoms based on cut-off */
7185 comm->cglink = NULL;
7186 comm->bLocalCG = NULL;
7190 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
7192 gmx_bool bDynLoadBal, real dlb_scale,
7195 gmx_domdec_comm_t *comm;
7210 fprintf(fplog, "The maximum number of communication pulses is:");
7211 for (d = 0; d < dd->ndim; d++)
7213 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
7215 fprintf(fplog, "\n");
7216 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
7217 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
7218 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
7219 for (d = 0; d < DIM; d++)
7223 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
7230 comm->cellsize_min_dlb[d]/
7231 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
7233 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
7236 fprintf(fplog, "\n");
7240 set_dd_cell_sizes_slb(dd, ddbox, FALSE, np);
7241 fprintf(fplog, "The initial number of communication pulses is:");
7242 for (d = 0; d < dd->ndim; d++)
7244 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
7246 fprintf(fplog, "\n");
7247 fprintf(fplog, "The initial domain decomposition cell size is:");
7248 for (d = 0; d < DIM; d++)
7252 fprintf(fplog, " %c %.2f nm",
7253 dim2char(d), dd->comm->cellsize_min[d]);
7256 fprintf(fplog, "\n\n");
7259 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
7261 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
7262 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7263 "non-bonded interactions", "", comm->cutoff);
7267 limit = dd->comm->cellsize_limit;
7271 if (dynamic_dd_box(ddbox, ir))
7273 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
7275 limit = dd->comm->cellsize_min[XX];
7276 for (d = 1; d < DIM; d++)
7278 limit = min(limit, dd->comm->cellsize_min[d]);
7282 if (comm->bInterCGBondeds)
7284 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7285 "two-body bonded interactions", "(-rdd)",
7286 max(comm->cutoff, comm->cutoff_mbody));
7287 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7288 "multi-body bonded interactions", "(-rdd)",
7289 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit));
7293 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7294 "virtual site constructions", "(-rcon)", limit);
7296 if (dd->constraint_comm)
7298 sprintf(buf, "atoms separated by up to %d constraints",
7300 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7301 buf, "(-rcon)", limit);
7303 fprintf(fplog, "\n");
7309 static void set_cell_limits_dlb(gmx_domdec_t *dd,
7311 const t_inputrec *ir,
7312 const gmx_ddbox_t *ddbox)
7314 gmx_domdec_comm_t *comm;
7315 int d, dim, npulse, npulse_d_max, npulse_d;
7320 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7322 /* Determine the maximum number of comm. pulses in one dimension */
7324 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7326 /* Determine the maximum required number of grid pulses */
7327 if (comm->cellsize_limit >= comm->cutoff)
7329 /* Only a single pulse is required */
7332 else if (!bNoCutOff && comm->cellsize_limit > 0)
7334 /* We round down slightly here to avoid overhead due to the latency
7335 * of extra communication calls when the cut-off
7336 * would be only slightly longer than the cell size.
7337 * Later cellsize_limit is redetermined,
7338 * so we can not miss interactions due to this rounding.
7340 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7344 /* There is no cell size limit */
7345 npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7348 if (!bNoCutOff && npulse > 1)
7350 /* See if we can do with less pulses, based on dlb_scale */
7352 for (d = 0; d < dd->ndim; d++)
7355 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7356 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7357 npulse_d_max = max(npulse_d_max, npulse_d);
7359 npulse = min(npulse, npulse_d_max);
7362 /* This env var can override npulse */
7363 d = dd_nst_env(debug, "GMX_DD_NPULSE", 0);
7370 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7371 for (d = 0; d < dd->ndim; d++)
7373 comm->cd[d].np_dlb = min(npulse, dd->nc[dd->dim[d]]-1);
7374 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7375 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7376 comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb);
7377 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7379 comm->bVacDLBNoLimit = FALSE;
7383 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7384 if (!comm->bVacDLBNoLimit)
7386 comm->cellsize_limit = max(comm->cellsize_limit,
7387 comm->cutoff/comm->maxpulse);
7389 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7390 /* Set the minimum cell size for each DD dimension */
7391 for (d = 0; d < dd->ndim; d++)
7393 if (comm->bVacDLBNoLimit ||
7394 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7396 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7400 comm->cellsize_min_dlb[dd->dim[d]] =
7401 comm->cutoff/comm->cd[d].np_dlb;
7404 if (comm->cutoff_mbody <= 0)
7406 comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit);
7408 if (comm->bDynLoadBal)
7414 gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd, int ePBC)
7416 /* If each molecule is a single charge group
7417 * or we use domain decomposition for each periodic dimension,
7418 * we do not need to take pbc into account for the bonded interactions.
7420 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7423 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7426 void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7427 t_inputrec *ir, gmx_ddbox_t *ddbox)
7429 gmx_domdec_comm_t *comm;
7435 /* Initialize the thread data.
7436 * This can not be done in init_domain_decomposition,
7437 * as the numbers of threads is determined later.
7439 comm->nth = gmx_omp_nthreads_get(emntDomdec);
7442 snew(comm->dth, comm->nth);
7445 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
7447 init_ddpme(dd, &comm->ddpme[0], 0);
7448 if (comm->npmedecompdim >= 2)
7450 init_ddpme(dd, &comm->ddpme[1], 1);
7455 comm->npmenodes = 0;
7456 if (dd->pme_nodeid >= 0)
7458 gmx_fatal_collective(FARGS, NULL, dd,
7459 "Can not have separate PME nodes without PME electrostatics");
7465 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7467 if (comm->eDLB != edlbNO)
7469 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7472 print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
7473 if (comm->eDLB == edlbAUTO)
7477 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7479 print_dd_settings(fplog, dd, ir, TRUE, dlb_scale, ddbox);
7482 if (ir->ePBC == epbcNONE)
7484 vol_frac = 1 - 1/(double)dd->nnodes;
7489 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7493 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7495 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7497 dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
7500 static gmx_bool test_dd_cutoff(t_commrec *cr,
7501 t_state *state, t_inputrec *ir,
7512 set_ddbox(dd, FALSE, cr, ir, state->box,
7513 TRUE, &dd->comm->cgs_gl, state->x, &ddbox);
7517 for (d = 0; d < dd->ndim; d++)
7521 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7522 if (dynamic_dd_box(&ddbox, ir))
7524 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7527 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7529 if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
7530 dd->comm->cd[d].np_dlb > 0)
7532 if (np > dd->comm->cd[d].np_dlb)
7537 /* If a current local cell size is smaller than the requested
7538 * cut-off, we could still fix it, but this gets very complicated.
7539 * Without fixing here, we might actually need more checks.
7541 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7548 if (dd->comm->eDLB != edlbNO)
7550 /* If DLB is not active yet, we don't need to check the grid jumps.
7551 * Actually we shouldn't, because then the grid jump data is not set.
7553 if (dd->comm->bDynLoadBal &&
7554 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7559 gmx_sumi(1, &LocallyLimited, cr);
7561 if (LocallyLimited > 0)
7570 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, t_inputrec *ir,
7573 gmx_bool bCutoffAllowed;
7575 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7579 cr->dd->comm->cutoff = cutoff_req;
7582 return bCutoffAllowed;
7585 void change_dd_dlb_cutoff_limit(t_commrec *cr)
7587 gmx_domdec_comm_t *comm;
7589 comm = cr->dd->comm;
7591 /* Turn on the DLB limiting (might have been on already) */
7592 comm->bPMELoadBalDLBLimits = TRUE;
7594 /* Change the cut-off limit */
7595 comm->PMELoadBal_max_cutoff = comm->cutoff;
7598 static void merge_cg_buffers(int ncell,
7599 gmx_domdec_comm_dim_t *cd, int pulse,
7601 int *index_gl, int *recv_i,
7602 rvec *cg_cm, rvec *recv_vr,
7604 cginfo_mb_t *cginfo_mb, int *cginfo)
7606 gmx_domdec_ind_t *ind, *ind_p;
7607 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7608 int shift, shift_at;
7610 ind = &cd->ind[pulse];
7612 /* First correct the already stored data */
7613 shift = ind->nrecv[ncell];
7614 for (cell = ncell-1; cell >= 0; cell--)
7616 shift -= ind->nrecv[cell];
7619 /* Move the cg's present from previous grid pulses */
7620 cg0 = ncg_cell[ncell+cell];
7621 cg1 = ncg_cell[ncell+cell+1];
7622 cgindex[cg1+shift] = cgindex[cg1];
7623 for (cg = cg1-1; cg >= cg0; cg--)
7625 index_gl[cg+shift] = index_gl[cg];
7626 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7627 cgindex[cg+shift] = cgindex[cg];
7628 cginfo[cg+shift] = cginfo[cg];
7630 /* Correct the already stored send indices for the shift */
7631 for (p = 1; p <= pulse; p++)
7633 ind_p = &cd->ind[p];
7635 for (c = 0; c < cell; c++)
7637 cg0 += ind_p->nsend[c];
7639 cg1 = cg0 + ind_p->nsend[cell];
7640 for (cg = cg0; cg < cg1; cg++)
7642 ind_p->index[cg] += shift;
7648 /* Merge in the communicated buffers */
7652 for (cell = 0; cell < ncell; cell++)
7654 cg1 = ncg_cell[ncell+cell+1] + shift;
7657 /* Correct the old cg indices */
7658 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7660 cgindex[cg+1] += shift_at;
7663 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7665 /* Copy this charge group from the buffer */
7666 index_gl[cg1] = recv_i[cg0];
7667 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7668 /* Add it to the cgindex */
7669 cg_gl = index_gl[cg1];
7670 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7671 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7672 cgindex[cg1+1] = cgindex[cg1] + nat;
7677 shift += ind->nrecv[cell];
7678 ncg_cell[ncell+cell+1] = cg1;
7682 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7683 int nzone, int cg0, const int *cgindex)
7687 /* Store the atom block boundaries for easy copying of communication buffers
7690 for (zone = 0; zone < nzone; zone++)
7692 for (p = 0; p < cd->np; p++)
7694 cd->ind[p].cell2at0[zone] = cgindex[cg];
7695 cg += cd->ind[p].nrecv[zone];
7696 cd->ind[p].cell2at1[zone] = cgindex[cg];
7701 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7707 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7709 if (!bLocalCG[link->a[i]])
7718 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7720 real c[DIM][4]; /* the corners for the non-bonded communication */
7721 real cr0; /* corner for rounding */
7722 real cr1[4]; /* corners for rounding */
7723 real bc[DIM]; /* corners for bounded communication */
7724 real bcr1; /* corner for rounding for bonded communication */
7727 /* Determine the corners of the domain(s) we are communicating with */
7729 set_dd_corners(const gmx_domdec_t *dd,
7730 int dim0, int dim1, int dim2,
7734 const gmx_domdec_comm_t *comm;
7735 const gmx_domdec_zones_t *zones;
7740 zones = &comm->zones;
7742 /* Keep the compiler happy */
7746 /* The first dimension is equal for all cells */
7747 c->c[0][0] = comm->cell_x0[dim0];
7750 c->bc[0] = c->c[0][0];
7755 /* This cell row is only seen from the first row */
7756 c->c[1][0] = comm->cell_x0[dim1];
7757 /* All rows can see this row */
7758 c->c[1][1] = comm->cell_x0[dim1];
7761 c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7764 /* For the multi-body distance we need the maximum */
7765 c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7768 /* Set the upper-right corner for rounding */
7769 c->cr0 = comm->cell_x1[dim0];
7774 for (j = 0; j < 4; j++)
7776 c->c[2][j] = comm->cell_x0[dim2];
7780 /* Use the maximum of the i-cells that see a j-cell */
7781 for (i = 0; i < zones->nizone; i++)
7783 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7789 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7795 /* For the multi-body distance we need the maximum */
7796 c->bc[2] = comm->cell_x0[dim2];
7797 for (i = 0; i < 2; i++)
7799 for (j = 0; j < 2; j++)
7801 c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0);
7807 /* Set the upper-right corner for rounding */
7808 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7809 * Only cell (0,0,0) can see cell 7 (1,1,1)
7811 c->cr1[0] = comm->cell_x1[dim1];
7812 c->cr1[3] = comm->cell_x1[dim1];
7815 c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7818 /* For the multi-body distance we need the maximum */
7819 c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7826 /* Determine which cg's we need to send in this pulse from this zone */
7828 get_zone_pulse_cgs(gmx_domdec_t *dd,
7829 int zonei, int zone,
7831 const int *index_gl,
7833 int dim, int dim_ind,
7834 int dim0, int dim1, int dim2,
7835 real r_comm2, real r_bcomm2,
7839 real skew_fac2_d, real skew_fac_01,
7840 rvec *v_d, rvec *v_0, rvec *v_1,
7841 const dd_corners_t *c,
7843 gmx_bool bDistBonded,
7849 gmx_domdec_ind_t *ind,
7850 int **ibuf, int *ibuf_nalloc,
7856 gmx_domdec_comm_t *comm;
7858 gmx_bool bDistMB_pulse;
7860 real r2, rb2, r, tric_sh;
7863 int nsend_z, nsend, nat;
7867 bScrew = (dd->bScrewPBC && dim == XX);
7869 bDistMB_pulse = (bDistMB && bDistBonded);
7875 for (cg = cg0; cg < cg1; cg++)
7879 if (tric_dist[dim_ind] == 0)
7881 /* Rectangular direction, easy */
7882 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7889 r = cg_cm[cg][dim] - c->bc[dim_ind];
7895 /* Rounding gives at most a 16% reduction
7896 * in communicated atoms
7898 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7900 r = cg_cm[cg][dim0] - c->cr0;
7901 /* This is the first dimension, so always r >= 0 */
7908 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7910 r = cg_cm[cg][dim1] - c->cr1[zone];
7917 r = cg_cm[cg][dim1] - c->bcr1;
7927 /* Triclinic direction, more complicated */
7930 /* Rounding, conservative as the skew_fac multiplication
7931 * will slightly underestimate the distance.
7933 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7935 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7936 for (i = dim0+1; i < DIM; i++)
7938 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7940 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7943 rb[dim0] = rn[dim0];
7946 /* Take care that the cell planes along dim0 might not
7947 * be orthogonal to those along dim1 and dim2.
7949 for (i = 1; i <= dim_ind; i++)
7952 if (normal[dim0][dimd] > 0)
7954 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7957 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7962 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7964 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7966 for (i = dim1+1; i < DIM; i++)
7968 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7970 rn[dim1] += tric_sh;
7973 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7974 /* Take care of coupling of the distances
7975 * to the planes along dim0 and dim1 through dim2.
7977 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7978 /* Take care that the cell planes along dim1
7979 * might not be orthogonal to that along dim2.
7981 if (normal[dim1][dim2] > 0)
7983 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7989 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7992 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7993 /* Take care of coupling of the distances
7994 * to the planes along dim0 and dim1 through dim2.
7996 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7997 /* Take care that the cell planes along dim1
7998 * might not be orthogonal to that along dim2.
8000 if (normal[dim1][dim2] > 0)
8002 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
8007 /* The distance along the communication direction */
8008 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
8010 for (i = dim+1; i < DIM; i++)
8012 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
8017 r2 += rn[dim]*rn[dim]*skew_fac2_d;
8018 /* Take care of coupling of the distances
8019 * to the planes along dim0 and dim1 through dim2.
8021 if (dim_ind == 1 && zonei == 1)
8023 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
8029 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
8032 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
8033 /* Take care of coupling of the distances
8034 * to the planes along dim0 and dim1 through dim2.
8036 if (dim_ind == 1 && zonei == 1)
8038 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
8046 ((bDistMB && rb2 < r_bcomm2) ||
8047 (bDist2B && r2 < r_bcomm2)) &&
8049 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
8050 missing_link(comm->cglink, index_gl[cg],
8053 /* Make an index to the local charge groups */
8054 if (nsend+1 > ind->nalloc)
8056 ind->nalloc = over_alloc_large(nsend+1);
8057 srenew(ind->index, ind->nalloc);
8059 if (nsend+1 > *ibuf_nalloc)
8061 *ibuf_nalloc = over_alloc_large(nsend+1);
8062 srenew(*ibuf, *ibuf_nalloc);
8064 ind->index[nsend] = cg;
8065 (*ibuf)[nsend] = index_gl[cg];
8067 vec_rvec_check_alloc(vbuf, nsend+1);
8069 if (dd->ci[dim] == 0)
8071 /* Correct cg_cm for pbc */
8072 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
8075 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
8076 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
8081 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
8084 nat += cgindex[cg+1] - cgindex[cg];
8090 *nsend_z_ptr = nsend_z;
8093 static void setup_dd_communication(gmx_domdec_t *dd,
8094 matrix box, gmx_ddbox_t *ddbox,
8095 t_forcerec *fr, t_state *state, rvec **f)
8097 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
8098 int nzone, nzone_send, zone, zonei, cg0, cg1;
8099 int c, i, j, cg, cg_gl, nrcg;
8100 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
8101 gmx_domdec_comm_t *comm;
8102 gmx_domdec_zones_t *zones;
8103 gmx_domdec_comm_dim_t *cd;
8104 gmx_domdec_ind_t *ind;
8105 cginfo_mb_t *cginfo_mb;
8106 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
8107 real r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
8108 dd_corners_t corners;
8110 rvec *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
8111 real skew_fac2_d, skew_fac_01;
8118 fprintf(debug, "Setting up DD communication\n");
8123 switch (fr->cutoff_scheme)
8132 gmx_incons("unimplemented");
8136 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8138 dim = dd->dim[dim_ind];
8140 /* Check if we need to use triclinic distances */
8141 tric_dist[dim_ind] = 0;
8142 for (i = 0; i <= dim_ind; i++)
8144 if (ddbox->tric_dir[dd->dim[i]])
8146 tric_dist[dim_ind] = 1;
8151 bBondComm = comm->bBondComm;
8153 /* Do we need to determine extra distances for multi-body bondeds? */
8154 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8156 /* Do we need to determine extra distances for only two-body bondeds? */
8157 bDist2B = (bBondComm && !bDistMB);
8159 r_comm2 = sqr(comm->cutoff);
8160 r_bcomm2 = sqr(comm->cutoff_mbody);
8164 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, sqrt(r_bcomm2));
8167 zones = &comm->zones;
8170 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8171 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8173 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8175 /* Triclinic stuff */
8176 normal = ddbox->normal;
8180 v_0 = ddbox->v[dim0];
8181 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8183 /* Determine the coupling coefficient for the distances
8184 * to the cell planes along dim0 and dim1 through dim2.
8185 * This is required for correct rounding.
8188 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8191 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8197 v_1 = ddbox->v[dim1];
8200 zone_cg_range = zones->cg_range;
8201 index_gl = dd->index_gl;
8202 cgindex = dd->cgindex;
8203 cginfo_mb = fr->cginfo_mb;
8205 zone_cg_range[0] = 0;
8206 zone_cg_range[1] = dd->ncg_home;
8207 comm->zone_ncg1[0] = dd->ncg_home;
8208 pos_cg = dd->ncg_home;
8210 nat_tot = dd->nat_home;
8212 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8214 dim = dd->dim[dim_ind];
8215 cd = &comm->cd[dim_ind];
8217 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8219 /* No pbc in this dimension, the first node should not comm. */
8227 v_d = ddbox->v[dim];
8228 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
8230 cd->bInPlace = TRUE;
8231 for (p = 0; p < cd->np; p++)
8233 /* Only atoms communicated in the first pulse are used
8234 * for multi-body bonded interactions or for bBondComm.
8236 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8241 for (zone = 0; zone < nzone_send; zone++)
8243 if (tric_dist[dim_ind] && dim_ind > 0)
8245 /* Determine slightly more optimized skew_fac's
8247 * This reduces the number of communicated atoms
8248 * by about 10% for 3D DD of rhombic dodecahedra.
8250 for (dimd = 0; dimd < dim; dimd++)
8252 sf2_round[dimd] = 1;
8253 if (ddbox->tric_dir[dimd])
8255 for (i = dd->dim[dimd]+1; i < DIM; i++)
8257 /* If we are shifted in dimension i
8258 * and the cell plane is tilted forward
8259 * in dimension i, skip this coupling.
8261 if (!(zones->shift[nzone+zone][i] &&
8262 ddbox->v[dimd][i][dimd] >= 0))
8265 sqr(ddbox->v[dimd][i][dimd]);
8268 sf2_round[dimd] = 1/sf2_round[dimd];
8273 zonei = zone_perm[dim_ind][zone];
8276 /* Here we permutate the zones to obtain a convenient order
8277 * for neighbor searching
8279 cg0 = zone_cg_range[zonei];
8280 cg1 = zone_cg_range[zonei+1];
8284 /* Look only at the cg's received in the previous grid pulse
8286 cg1 = zone_cg_range[nzone+zone+1];
8287 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8290 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8291 for (th = 0; th < comm->nth; th++)
8293 gmx_domdec_ind_t *ind_p;
8294 int **ibuf_p, *ibuf_nalloc_p;
8296 int *nsend_p, *nat_p;
8302 /* Thread 0 writes in the comm buffers */
8304 ibuf_p = &comm->buf_int;
8305 ibuf_nalloc_p = &comm->nalloc_int;
8306 vbuf_p = &comm->vbuf;
8309 nsend_zone_p = &ind->nsend[zone];
8313 /* Other threads write into temp buffers */
8314 ind_p = &comm->dth[th].ind;
8315 ibuf_p = &comm->dth[th].ibuf;
8316 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8317 vbuf_p = &comm->dth[th].vbuf;
8318 nsend_p = &comm->dth[th].nsend;
8319 nat_p = &comm->dth[th].nat;
8320 nsend_zone_p = &comm->dth[th].nsend_zone;
8322 comm->dth[th].nsend = 0;
8323 comm->dth[th].nat = 0;
8324 comm->dth[th].nsend_zone = 0;
8334 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8335 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8338 /* Get the cg's for this pulse in this zone */
8339 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8341 dim, dim_ind, dim0, dim1, dim2,
8344 normal, skew_fac2_d, skew_fac_01,
8345 v_d, v_0, v_1, &corners, sf2_round,
8346 bDistBonded, bBondComm,
8350 ibuf_p, ibuf_nalloc_p,
8356 /* Append data of threads>=1 to the communication buffers */
8357 for (th = 1; th < comm->nth; th++)
8359 dd_comm_setup_work_t *dth;
8362 dth = &comm->dth[th];
8364 ns1 = nsend + dth->nsend_zone;
8365 if (ns1 > ind->nalloc)
8367 ind->nalloc = over_alloc_dd(ns1);
8368 srenew(ind->index, ind->nalloc);
8370 if (ns1 > comm->nalloc_int)
8372 comm->nalloc_int = over_alloc_dd(ns1);
8373 srenew(comm->buf_int, comm->nalloc_int);
8375 if (ns1 > comm->vbuf.nalloc)
8377 comm->vbuf.nalloc = over_alloc_dd(ns1);
8378 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8381 for (i = 0; i < dth->nsend_zone; i++)
8383 ind->index[nsend] = dth->ind.index[i];
8384 comm->buf_int[nsend] = dth->ibuf[i];
8385 copy_rvec(dth->vbuf.v[i],
8386 comm->vbuf.v[nsend]);
8390 ind->nsend[zone] += dth->nsend_zone;
8393 /* Clear the counts in case we do not have pbc */
8394 for (zone = nzone_send; zone < nzone; zone++)
8396 ind->nsend[zone] = 0;
8398 ind->nsend[nzone] = nsend;
8399 ind->nsend[nzone+1] = nat;
8400 /* Communicate the number of cg's and atoms to receive */
8401 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8402 ind->nsend, nzone+2,
8403 ind->nrecv, nzone+2);
8405 /* The rvec buffer is also required for atom buffers of size nsend
8406 * in dd_move_x and dd_move_f.
8408 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8412 /* We can receive in place if only the last zone is not empty */
8413 for (zone = 0; zone < nzone-1; zone++)
8415 if (ind->nrecv[zone] > 0)
8417 cd->bInPlace = FALSE;
8422 /* The int buffer is only required here for the cg indices */
8423 if (ind->nrecv[nzone] > comm->nalloc_int2)
8425 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8426 srenew(comm->buf_int2, comm->nalloc_int2);
8428 /* The rvec buffer is also required for atom buffers
8429 * of size nrecv in dd_move_x and dd_move_f.
8431 i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8432 vec_rvec_check_alloc(&comm->vbuf2, i);
8436 /* Make space for the global cg indices */
8437 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8438 || dd->cg_nalloc == 0)
8440 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8441 srenew(index_gl, dd->cg_nalloc);
8442 srenew(cgindex, dd->cg_nalloc+1);
8444 /* Communicate the global cg indices */
8447 recv_i = index_gl + pos_cg;
8451 recv_i = comm->buf_int2;
8453 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8454 comm->buf_int, nsend,
8455 recv_i, ind->nrecv[nzone]);
8457 /* Make space for cg_cm */
8458 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8459 if (fr->cutoff_scheme == ecutsGROUP)
8467 /* Communicate cg_cm */
8470 recv_vr = cg_cm + pos_cg;
8474 recv_vr = comm->vbuf2.v;
8476 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8477 comm->vbuf.v, nsend,
8478 recv_vr, ind->nrecv[nzone]);
8480 /* Make the charge group index */
8483 zone = (p == 0 ? 0 : nzone - 1);
8484 while (zone < nzone)
8486 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8488 cg_gl = index_gl[pos_cg];
8489 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8490 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8491 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8494 /* Update the charge group presence,
8495 * so we can use it in the next pass of the loop.
8497 comm->bLocalCG[cg_gl] = TRUE;
8503 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8506 zone_cg_range[nzone+zone] = pos_cg;
8511 /* This part of the code is never executed with bBondComm. */
8512 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8513 index_gl, recv_i, cg_cm, recv_vr,
8514 cgindex, fr->cginfo_mb, fr->cginfo);
8515 pos_cg += ind->nrecv[nzone];
8517 nat_tot += ind->nrecv[nzone+1];
8521 /* Store the atom block for easy copying of communication buffers */
8522 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8526 dd->index_gl = index_gl;
8527 dd->cgindex = cgindex;
8529 dd->ncg_tot = zone_cg_range[zones->n];
8530 dd->nat_tot = nat_tot;
8531 comm->nat[ddnatHOME] = dd->nat_home;
8532 for (i = ddnatZONE; i < ddnatNR; i++)
8534 comm->nat[i] = dd->nat_tot;
8539 /* We don't need to update cginfo, since that was alrady done above.
8540 * So we pass NULL for the forcerec.
8542 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8543 NULL, comm->bLocalCG);
8548 fprintf(debug, "Finished setting up DD communication, zones:");
8549 for (c = 0; c < zones->n; c++)
8551 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8553 fprintf(debug, "\n");
8557 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8561 for (c = 0; c < zones->nizone; c++)
8563 zones->izone[c].cg1 = zones->cg_range[c+1];
8564 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8565 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8569 static void set_zones_size(gmx_domdec_t *dd,
8570 matrix box, const gmx_ddbox_t *ddbox,
8571 int zone_start, int zone_end)
8573 gmx_domdec_comm_t *comm;
8574 gmx_domdec_zones_t *zones;
8576 int z, zi, zj0, zj1, d, dim;
8579 real size_j, add_tric;
8584 zones = &comm->zones;
8586 /* Do we need to determine extra distances for multi-body bondeds? */
8587 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8589 for (z = zone_start; z < zone_end; z++)
8591 /* Copy cell limits to zone limits.
8592 * Valid for non-DD dims and non-shifted dims.
8594 copy_rvec(comm->cell_x0, zones->size[z].x0);
8595 copy_rvec(comm->cell_x1, zones->size[z].x1);
8598 for (d = 0; d < dd->ndim; d++)
8602 for (z = 0; z < zones->n; z++)
8604 /* With a staggered grid we have different sizes
8605 * for non-shifted dimensions.
8607 if (dd->bGridJump && zones->shift[z][dim] == 0)
8611 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8612 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8616 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8617 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8623 rcmbs = comm->cutoff_mbody;
8624 if (ddbox->tric_dir[dim])
8626 rcs /= ddbox->skew_fac[dim];
8627 rcmbs /= ddbox->skew_fac[dim];
8630 /* Set the lower limit for the shifted zone dimensions */
8631 for (z = zone_start; z < zone_end; z++)
8633 if (zones->shift[z][dim] > 0)
8636 if (!dd->bGridJump || d == 0)
8638 zones->size[z].x0[dim] = comm->cell_x1[dim];
8639 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8643 /* Here we take the lower limit of the zone from
8644 * the lowest domain of the zone below.
8648 zones->size[z].x0[dim] =
8649 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8655 zones->size[z].x0[dim] =
8656 zones->size[zone_perm[2][z-4]].x0[dim];
8660 zones->size[z].x0[dim] =
8661 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8664 /* A temporary limit, is updated below */
8665 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8669 for (zi = 0; zi < zones->nizone; zi++)
8671 if (zones->shift[zi][dim] == 0)
8673 /* This takes the whole zone into account.
8674 * With multiple pulses this will lead
8675 * to a larger zone then strictly necessary.
8677 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8678 zones->size[zi].x1[dim]+rcmbs);
8686 /* Loop over the i-zones to set the upper limit of each
8689 for (zi = 0; zi < zones->nizone; zi++)
8691 if (zones->shift[zi][dim] == 0)
8693 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8695 if (zones->shift[z][dim] > 0)
8697 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8698 zones->size[zi].x1[dim]+rcs);
8705 for (z = zone_start; z < zone_end; z++)
8707 /* Initialization only required to keep the compiler happy */
8708 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8711 /* To determine the bounding box for a zone we need to find
8712 * the extreme corners of 4, 2 or 1 corners.
8714 nc = 1 << (ddbox->npbcdim - 1);
8716 for (c = 0; c < nc; c++)
8718 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8722 corner[YY] = zones->size[z].x0[YY];
8726 corner[YY] = zones->size[z].x1[YY];
8730 corner[ZZ] = zones->size[z].x0[ZZ];
8734 corner[ZZ] = zones->size[z].x1[ZZ];
8736 if (dd->ndim == 1 && box[ZZ][YY] != 0)
8738 /* With 1D domain decomposition the cg's are not in
8739 * the triclinic box, but triclinic x-y and rectangular y-z.
8740 * Shift y back, so it will later end up at 0.
8742 corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
8744 /* Apply the triclinic couplings */
8745 for (i = YY; i < ddbox->npbcdim; i++)
8747 for (j = XX; j < i; j++)
8749 corner[j] += corner[i]*box[i][j]/box[i][i];
8754 copy_rvec(corner, corner_min);
8755 copy_rvec(corner, corner_max);
8759 for (i = 0; i < DIM; i++)
8761 corner_min[i] = min(corner_min[i], corner[i]);
8762 corner_max[i] = max(corner_max[i], corner[i]);
8766 /* Copy the extreme cornes without offset along x */
8767 for (i = 0; i < DIM; i++)
8769 zones->size[z].bb_x0[i] = corner_min[i];
8770 zones->size[z].bb_x1[i] = corner_max[i];
8772 /* Add the offset along x */
8773 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8774 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8777 if (zone_start == 0)
8780 for (dim = 0; dim < DIM; dim++)
8782 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8784 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
8789 for (z = zone_start; z < zone_end; z++)
8791 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8793 zones->size[z].x0[XX], zones->size[z].x1[XX],
8794 zones->size[z].x0[YY], zones->size[z].x1[YY],
8795 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8796 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8798 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8799 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8800 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8805 static int comp_cgsort(const void *a, const void *b)
8809 gmx_cgsort_t *cga, *cgb;
8810 cga = (gmx_cgsort_t *)a;
8811 cgb = (gmx_cgsort_t *)b;
8813 comp = cga->nsc - cgb->nsc;
8816 comp = cga->ind_gl - cgb->ind_gl;
8822 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8827 /* Order the data */
8828 for (i = 0; i < n; i++)
8830 buf[i] = a[sort[i].ind];
8833 /* Copy back to the original array */
8834 for (i = 0; i < n; i++)
8840 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8845 /* Order the data */
8846 for (i = 0; i < n; i++)
8848 copy_rvec(v[sort[i].ind], buf[i]);
8851 /* Copy back to the original array */
8852 for (i = 0; i < n; i++)
8854 copy_rvec(buf[i], v[i]);
8858 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8861 int a, atot, cg, cg0, cg1, i;
8863 if (cgindex == NULL)
8865 /* Avoid the useless loop of the atoms within a cg */
8866 order_vec_cg(ncg, sort, v, buf);
8871 /* Order the data */
8873 for (cg = 0; cg < ncg; cg++)
8875 cg0 = cgindex[sort[cg].ind];
8876 cg1 = cgindex[sort[cg].ind+1];
8877 for (i = cg0; i < cg1; i++)
8879 copy_rvec(v[i], buf[a]);
8885 /* Copy back to the original array */
8886 for (a = 0; a < atot; a++)
8888 copy_rvec(buf[a], v[a]);
8892 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8893 int nsort_new, gmx_cgsort_t *sort_new,
8894 gmx_cgsort_t *sort1)
8898 /* The new indices are not very ordered, so we qsort them */
8899 gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8901 /* sort2 is already ordered, so now we can merge the two arrays */
8905 while (i2 < nsort2 || i_new < nsort_new)
8909 sort1[i1++] = sort_new[i_new++];
8911 else if (i_new == nsort_new)
8913 sort1[i1++] = sort2[i2++];
8915 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8916 (sort2[i2].nsc == sort_new[i_new].nsc &&
8917 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8919 sort1[i1++] = sort2[i2++];
8923 sort1[i1++] = sort_new[i_new++];
8928 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8930 gmx_domdec_sort_t *sort;
8931 gmx_cgsort_t *cgsort, *sort_i;
8932 int ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
8933 int sort_last, sort_skip;
8935 sort = dd->comm->sort;
8937 a = fr->ns.grid->cell_index;
8939 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns.grid->ncells;
8941 if (ncg_home_old >= 0)
8943 /* The charge groups that remained in the same ns grid cell
8944 * are completely ordered. So we can sort efficiently by sorting
8945 * the charge groups that did move into the stationary list.
8950 for (i = 0; i < dd->ncg_home; i++)
8952 /* Check if this cg did not move to another node */
8955 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8957 /* This cg is new on this node or moved ns grid cell */
8958 if (nsort_new >= sort->sort_new_nalloc)
8960 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8961 srenew(sort->sort_new, sort->sort_new_nalloc);
8963 sort_i = &(sort->sort_new[nsort_new++]);
8967 /* This cg did not move */
8968 sort_i = &(sort->sort2[nsort2++]);
8970 /* Sort on the ns grid cell indices
8971 * and the global topology index.
8972 * index_gl is irrelevant with cell ns,
8973 * but we set it here anyhow to avoid a conditional.
8976 sort_i->ind_gl = dd->index_gl[i];
8983 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8986 /* Sort efficiently */
8987 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8992 cgsort = sort->sort;
8994 for (i = 0; i < dd->ncg_home; i++)
8996 /* Sort on the ns grid cell indices
8997 * and the global topology index
8999 cgsort[i].nsc = a[i];
9000 cgsort[i].ind_gl = dd->index_gl[i];
9002 if (cgsort[i].nsc < moved)
9009 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
9011 /* Determine the order of the charge groups using qsort */
9012 gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
9018 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
9021 int ncg_new, i, *a, na;
9023 sort = dd->comm->sort->sort;
9025 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
9028 for (i = 0; i < na; i++)
9032 sort[ncg_new].ind = a[i];
9040 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
9043 gmx_domdec_sort_t *sort;
9044 gmx_cgsort_t *cgsort, *sort_i;
9046 int ncg_new, i, *ibuf, cgsize;
9049 sort = dd->comm->sort;
9051 if (dd->ncg_home > sort->sort_nalloc)
9053 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
9054 srenew(sort->sort, sort->sort_nalloc);
9055 srenew(sort->sort2, sort->sort_nalloc);
9057 cgsort = sort->sort;
9059 switch (fr->cutoff_scheme)
9062 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
9065 ncg_new = dd_sort_order_nbnxn(dd, fr);
9068 gmx_incons("unimplemented");
9072 /* We alloc with the old size, since cgindex is still old */
9073 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9074 vbuf = dd->comm->vbuf.v;
9078 cgindex = dd->cgindex;
9085 /* Remove the charge groups which are no longer at home here */
9086 dd->ncg_home = ncg_new;
9089 fprintf(debug, "Set the new home charge group count to %d\n",
9093 /* Reorder the state */
9094 for (i = 0; i < estNR; i++)
9096 if (EST_DISTR(i) && (state->flags & (1<<i)))
9101 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
9104 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
9107 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->sd_X, vbuf);
9110 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
9114 case estDISRE_INITF:
9115 case estDISRE_RM3TAV:
9116 case estORIRE_INITF:
9118 /* No ordering required */
9121 gmx_incons("Unknown state entry encountered in dd_sort_state");
9126 if (fr->cutoff_scheme == ecutsGROUP)
9129 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9132 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9134 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9135 srenew(sort->ibuf, sort->ibuf_nalloc);
9138 /* Reorder the global cg index */
9139 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9140 /* Reorder the cginfo */
9141 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9142 /* Rebuild the local cg index */
9146 for (i = 0; i < dd->ncg_home; i++)
9148 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9149 ibuf[i+1] = ibuf[i] + cgsize;
9151 for (i = 0; i < dd->ncg_home+1; i++)
9153 dd->cgindex[i] = ibuf[i];
9158 for (i = 0; i < dd->ncg_home+1; i++)
9163 /* Set the home atom number */
9164 dd->nat_home = dd->cgindex[dd->ncg_home];
9166 if (fr->cutoff_scheme == ecutsVERLET)
9168 /* The atoms are now exactly in grid order, update the grid order */
9169 nbnxn_set_atomorder(fr->nbv->nbs);
9173 /* Copy the sorted ns cell indices back to the ns grid struct */
9174 for (i = 0; i < dd->ncg_home; i++)
9176 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
9178 fr->ns.grid->nr = dd->ncg_home;
9182 static void add_dd_statistics(gmx_domdec_t *dd)
9184 gmx_domdec_comm_t *comm;
9189 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9191 comm->sum_nat[ddnat-ddnatZONE] +=
9192 comm->nat[ddnat] - comm->nat[ddnat-1];
9197 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9199 gmx_domdec_comm_t *comm;
9204 /* Reset all the statistics and counters for total run counting */
9205 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9207 comm->sum_nat[ddnat-ddnatZONE] = 0;
9211 comm->load_step = 0;
9214 clear_ivec(comm->load_lim);
9219 void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
9221 gmx_domdec_comm_t *comm;
9225 comm = cr->dd->comm;
9227 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9234 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9236 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9238 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9243 " av. #atoms communicated per step for force: %d x %.1f\n",
9247 if (cr->dd->vsite_comm)
9250 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9251 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9256 if (cr->dd->constraint_comm)
9259 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9260 1 + ir->nLincsIter, av);
9264 gmx_incons(" Unknown type for DD statistics");
9267 fprintf(fplog, "\n");
9269 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9271 print_dd_load_av(fplog, cr->dd);
9275 void dd_partition_system(FILE *fplog,
9278 gmx_bool bMasterState,
9280 t_state *state_global,
9281 gmx_mtop_t *top_global,
9283 t_state *state_local,
9286 gmx_localtop_t *top_local,
9289 gmx_shellfc_t shellfc,
9290 gmx_constr_t constr,
9292 gmx_wallcycle_t wcycle,
9296 gmx_domdec_comm_t *comm;
9297 gmx_ddbox_t ddbox = {0};
9299 gmx_int64_t step_pcoupl;
9300 rvec cell_ns_x0, cell_ns_x1;
9301 int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9302 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
9303 gmx_bool bRedist, bSortCG, bResortAll;
9304 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9311 bBoxChanged = (bMasterState || DEFORM(*ir));
9312 if (ir->epc != epcNO)
9314 /* With nstpcouple > 1 pressure coupling happens.
9315 * one step after calculating the pressure.
9316 * Box scaling happens at the end of the MD step,
9317 * after the DD partitioning.
9318 * We therefore have to do DLB in the first partitioning
9319 * after an MD step where P-coupling occured.
9320 * We need to determine the last step in which p-coupling occurred.
9321 * MRS -- need to validate this for vv?
9326 step_pcoupl = step - 1;
9330 step_pcoupl = ((step - 1)/n)*n + 1;
9332 if (step_pcoupl >= comm->partition_step)
9338 bNStGlobalComm = (step % nstglobalcomm == 0);
9340 if (!comm->bDynLoadBal)
9346 /* Should we do dynamic load balacing this step?
9347 * Since it requires (possibly expensive) global communication,
9348 * we might want to do DLB less frequently.
9350 if (bBoxChanged || ir->epc != epcNO)
9352 bDoDLB = bBoxChanged;
9356 bDoDLB = bNStGlobalComm;
9360 /* Check if we have recorded loads on the nodes */
9361 if (comm->bRecordLoad && dd_load_count(comm))
9363 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
9365 /* Check if we should use DLB at the second partitioning
9366 * and every 100 partitionings,
9367 * so the extra communication cost is negligible.
9369 n = max(100, nstglobalcomm);
9370 bCheckDLB = (comm->n_load_collect == 0 ||
9371 comm->n_load_have % n == n-1);
9378 /* Print load every nstlog, first and last step to the log file */
9379 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9380 comm->n_load_collect == 0 ||
9382 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9384 /* Avoid extra communication due to verbose screen output
9385 * when nstglobalcomm is set.
9387 if (bDoDLB || bLogLoad || bCheckDLB ||
9388 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9390 get_load_distribution(dd, wcycle);
9395 dd_print_load(fplog, dd, step-1);
9399 dd_print_load_verbose(dd);
9402 comm->n_load_collect++;
9406 /* Since the timings are node dependent, the master decides */
9410 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS);
9413 fprintf(debug, "step %s, imb loss %f\n",
9414 gmx_step_str(step, sbuf),
9415 dd_force_imb_perf_loss(dd));
9418 dd_bcast(dd, sizeof(bTurnOnDLB), &bTurnOnDLB);
9421 turn_on_dlb(fplog, cr, step);
9426 comm->n_load_have++;
9429 cgs_gl = &comm->cgs_gl;
9434 /* Clear the old state */
9435 clear_dd_indices(dd, 0, 0);
9438 set_ddbox(dd, bMasterState, cr, ir, state_global->box,
9439 TRUE, cgs_gl, state_global->x, &ddbox);
9441 get_cg_distribution(fplog, step, dd, cgs_gl,
9442 state_global->box, &ddbox, state_global->x);
9444 dd_distribute_state(dd, cgs_gl,
9445 state_global, state_local, f);
9447 dd_make_local_cgs(dd, &top_local->cgs);
9449 /* Ensure that we have space for the new distribution */
9450 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9452 if (fr->cutoff_scheme == ecutsGROUP)
9454 calc_cgcm(fplog, 0, dd->ncg_home,
9455 &top_local->cgs, state_local->x, fr->cg_cm);
9458 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9460 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9462 else if (state_local->ddp_count != dd->ddp_count)
9464 if (state_local->ddp_count > dd->ddp_count)
9466 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9469 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9471 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9474 /* Clear the old state */
9475 clear_dd_indices(dd, 0, 0);
9477 /* Build the new indices */
9478 rebuild_cgindex(dd, cgs_gl->index, state_local);
9479 make_dd_indices(dd, cgs_gl->index, 0);
9480 ncgindex_set = dd->ncg_home;
9482 if (fr->cutoff_scheme == ecutsGROUP)
9484 /* Redetermine the cg COMs */
9485 calc_cgcm(fplog, 0, dd->ncg_home,
9486 &top_local->cgs, state_local->x, fr->cg_cm);
9489 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9491 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9493 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9494 TRUE, &top_local->cgs, state_local->x, &ddbox);
9496 bRedist = comm->bDynLoadBal;
9500 /* We have the full state, only redistribute the cgs */
9502 /* Clear the non-home indices */
9503 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9506 /* Avoid global communication for dim's without pbc and -gcom */
9507 if (!bNStGlobalComm)
9509 copy_rvec(comm->box0, ddbox.box0 );
9510 copy_rvec(comm->box_size, ddbox.box_size);
9512 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9513 bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
9518 /* For dim's without pbc and -gcom */
9519 copy_rvec(ddbox.box0, comm->box0 );
9520 copy_rvec(ddbox.box_size, comm->box_size);
9522 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9525 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9527 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9530 /* Check if we should sort the charge groups */
9531 if (comm->nstSortCG > 0)
9533 bSortCG = (bMasterState ||
9534 (bRedist && (step % comm->nstSortCG == 0)));
9541 ncg_home_old = dd->ncg_home;
9546 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9548 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9550 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9552 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9555 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9557 &comm->cell_x0, &comm->cell_x1,
9558 dd->ncg_home, fr->cg_cm,
9559 cell_ns_x0, cell_ns_x1, &grid_density);
9563 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9566 switch (fr->cutoff_scheme)
9569 copy_ivec(fr->ns.grid->n, ncells_old);
9570 grid_first(fplog, fr->ns.grid, dd, &ddbox,
9571 state_local->box, cell_ns_x0, cell_ns_x1,
9572 fr->rlistlong, grid_density);
9575 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9578 gmx_incons("unimplemented");
9580 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9581 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9585 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9587 /* Sort the state on charge group position.
9588 * This enables exact restarts from this step.
9589 * It also improves performance by about 15% with larger numbers
9590 * of atoms per node.
9593 /* Fill the ns grid with the home cell,
9594 * so we can sort with the indices.
9596 set_zones_ncg_home(dd);
9598 switch (fr->cutoff_scheme)
9601 set_zones_size(dd, state_local->box, &ddbox, 0, 1);
9603 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9605 comm->zones.size[0].bb_x0,
9606 comm->zones.size[0].bb_x1,
9608 comm->zones.dens_zone0,
9611 ncg_moved, bRedist ? comm->moved : NULL,
9612 fr->nbv->grp[eintLocal].kernel_type,
9613 fr->nbv->grp[eintLocal].nbat);
9615 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9618 fill_grid(&comm->zones, fr->ns.grid, dd->ncg_home,
9619 0, dd->ncg_home, fr->cg_cm);
9621 copy_ivec(fr->ns.grid->n, ncells_new);
9624 gmx_incons("unimplemented");
9627 bResortAll = bMasterState;
9629 /* Check if we can user the old order and ns grid cell indices
9630 * of the charge groups to sort the charge groups efficiently.
9632 if (ncells_new[XX] != ncells_old[XX] ||
9633 ncells_new[YY] != ncells_old[YY] ||
9634 ncells_new[ZZ] != ncells_old[ZZ])
9641 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9642 gmx_step_str(step, sbuf), dd->ncg_home);
9644 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9645 bResortAll ? -1 : ncg_home_old);
9646 /* Rebuild all the indices */
9647 ga2la_clear(dd->ga2la);
9650 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9653 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9655 /* Setup up the communication and communicate the coordinates */
9656 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9658 /* Set the indices */
9659 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9661 /* Set the charge group boundaries for neighbor searching */
9662 set_cg_boundaries(&comm->zones);
9664 if (fr->cutoff_scheme == ecutsVERLET)
9666 set_zones_size(dd, state_local->box, &ddbox,
9667 bSortCG ? 1 : 0, comm->zones.n);
9670 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9673 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9674 -1,state_local->x,state_local->box);
9677 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9679 /* Extract a local topology from the global topology */
9680 for (i = 0; i < dd->ndim; i++)
9682 np[dd->dim[i]] = comm->cd[i].np;
9684 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9685 comm->cellsize_min, np,
9687 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
9688 vsite, top_global, top_local);
9690 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9692 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9694 /* Set up the special atom communication */
9695 n = comm->nat[ddnatZONE];
9696 for (i = ddnatZONE+1; i < ddnatNR; i++)
9701 if (vsite && vsite->n_intercg_vsite)
9703 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9707 if (dd->bInterCGcons || dd->bInterCGsettles)
9709 /* Only for inter-cg constraints we need special code */
9710 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9711 constr, ir->nProjOrder,
9712 top_local->idef.il);
9716 gmx_incons("Unknown special atom type setup");
9721 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9723 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9725 /* Make space for the extra coordinates for virtual site
9726 * or constraint communication.
9728 state_local->natoms = comm->nat[ddnatNR-1];
9729 if (state_local->natoms > state_local->nalloc)
9731 dd_realloc_state(state_local, f, state_local->natoms);
9734 if (fr->bF_NoVirSum)
9736 if (vsite && vsite->n_intercg_vsite)
9738 nat_f_novirsum = comm->nat[ddnatVSITE];
9742 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9744 nat_f_novirsum = dd->nat_tot;
9748 nat_f_novirsum = dd->nat_home;
9757 /* Set the number of atoms required for the force calculation.
9758 * Forces need to be constrained when using a twin-range setup
9759 * or with energy minimization. For simple simulations we could
9760 * avoid some allocation, zeroing and copying, but this is
9761 * probably not worth the complications ande checking.
9763 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9764 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9766 /* We make the all mdatoms up to nat_tot_con.
9767 * We could save some work by only setting invmass
9768 * between nat_tot and nat_tot_con.
9770 /* This call also sets the new number of home particles to dd->nat_home */
9771 atoms2md(top_global, ir,
9772 comm->nat[ddnatCON], dd->gatindex, 0, dd->nat_home, mdatoms);
9774 /* Now we have the charges we can sort the FE interactions */
9775 dd_sort_local_top(dd, mdatoms, top_local);
9779 /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
9780 split_vsites_over_threads(top_local->idef.il, mdatoms, FALSE, vsite);
9785 /* Make the local shell stuff, currently no communication is done */
9786 make_local_shells(cr, mdatoms, shellfc);
9789 if (ir->implicit_solvent)
9791 make_local_gb(cr, fr->born, ir->gb_algorithm);
9794 setup_bonded_threading(fr, &top_local->idef);
9796 if (!(cr->duty & DUTY_PME))
9798 /* Send the charges and/or c6/sigmas to our PME only node */
9799 gmx_pme_send_parameters(cr, mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
9800 mdatoms->chargeA, mdatoms->chargeB,
9801 mdatoms->c6A, mdatoms->c6B,
9802 mdatoms->sigmaA, mdatoms->sigmaB,
9803 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9808 set_constraints(constr, top_local, ir, mdatoms, cr);
9811 if (ir->ePull != epullNO)
9813 /* Update the local pull groups */
9814 dd_make_local_pull_groups(dd, ir->pull, mdatoms);
9819 /* Update the local rotation groups */
9820 dd_make_local_rotation_groups(dd, ir->rot);
9824 add_dd_statistics(dd);
9826 /* Make sure we only count the cycles for this DD partitioning */
9827 clear_dd_cycle_counts(dd);
9829 /* Because the order of the atoms might have changed since
9830 * the last vsite construction, we need to communicate the constructing
9831 * atom coordinates again (for spreading the forces this MD step).
9833 dd_move_x_vsites(dd, state_local->box, state_local->x);
9835 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9837 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9839 dd_move_x(dd, state_local->box, state_local->x);
9840 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9841 -1, state_local->x, state_local->box);
9844 /* Store the partitioning step */
9845 comm->partition_step = step;
9847 /* Increase the DD partitioning counter */
9849 /* The state currently matches this DD partitioning count, store it */
9850 state_local->ddp_count = dd->ddp_count;
9853 /* The DD master node knows the complete cg distribution,
9854 * store the count so we can possibly skip the cg info communication.
9856 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9859 if (comm->DD_debug > 0)
9861 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9862 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9863 "after partitioning");