2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
48 #include "gromacs/utility/smalloc.h"
49 #include "gmx_fatal.h"
50 #include "gmx_fatal_collective.h"
53 #include "domdec_network.h"
56 #include "chargegroup.h"
65 #include "mtop_util.h"
66 #include "gmx_ga2la.h"
68 #include "nbnxn_search.h"
70 #include "gmx_omp_nthreads.h"
71 #include "gpu_utils.h"
73 #include "gromacs/fileio/futil.h"
74 #include "gromacs/fileio/gmxfio.h"
75 #include "gromacs/fileio/pdbio.h"
76 #include "gromacs/timing/wallcycle.h"
77 #include "gromacs/utility/gmxmpi.h"
78 #include "gromacs/swap/swapcoords.h"
79 #include "gromacs/utility/qsort_threadsafe.h"
80 #include "gromacs/pulling/pull.h"
81 #include "gromacs/pulling/pull_rotation.h"
82 #include "gromacs/imd/imd.h"
84 #define DDRANK(dd, rank) (rank)
85 #define DDMASTERRANK(dd) (dd->masterrank)
87 typedef struct gmx_domdec_master
89 /* The cell boundaries */
91 /* The global charge group division */
92 int *ncg; /* Number of home charge groups for each node */
93 int *index; /* Index of nnodes+1 into cg */
94 int *cg; /* Global charge group index */
95 int *nat; /* Number of home atoms for each node. */
96 int *ibuf; /* Buffer for communication */
97 rvec *vbuf; /* Buffer for state scattering and gathering */
98 } gmx_domdec_master_t;
102 /* The numbers of charge groups to send and receive for each cell
103 * that requires communication, the last entry contains the total
104 * number of atoms that needs to be communicated.
106 int nsend[DD_MAXIZONE+2];
107 int nrecv[DD_MAXIZONE+2];
108 /* The charge groups to send */
111 /* The atom range for non-in-place communication */
112 int cell2at0[DD_MAXIZONE];
113 int cell2at1[DD_MAXIZONE];
118 int np; /* Number of grid pulses in this dimension */
119 int np_dlb; /* For dlb, for use with edlbAUTO */
120 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
122 gmx_bool bInPlace; /* Can we communicate in place? */
123 } gmx_domdec_comm_dim_t;
127 gmx_bool *bCellMin; /* Temp. var.: is this cell size at the limit */
128 real *cell_f; /* State var.: cell boundaries, box relative */
129 real *old_cell_f; /* Temp. var.: old cell size */
130 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
131 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
132 real *bound_min; /* Temp. var.: lower limit for cell boundary */
133 real *bound_max; /* Temp. var.: upper limit for cell boundary */
134 gmx_bool bLimited; /* State var.: is DLB limited in this dim and row */
135 real *buf_ncd; /* Temp. var. */
138 #define DD_NLOAD_MAX 9
140 /* Here floats are accurate enough, since these variables
141 * only influence the load balancing, not the actual MD results.
168 gmx_cgsort_t *sort_new;
180 /* This enum determines the order of the coordinates.
181 * ddnatHOME and ddnatZONE should be first and second,
182 * the others can be ordered as wanted.
185 ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR
189 edlbAUTO, edlbNO, edlbYES, edlbNR
191 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
195 int dim; /* The dimension */
196 gmx_bool dim_match; /* Tells if DD and PME dims match */
197 int nslab; /* The number of PME slabs in this dimension */
198 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
199 int *pp_min; /* The minimum pp node location, size nslab */
200 int *pp_max; /* The maximum pp node location,size nslab */
201 int maxshift; /* The maximum shift for coordinate redistribution in PME */
206 real min0; /* The minimum bottom of this zone */
207 real max1; /* The maximum top of this zone */
208 real min1; /* The minimum top of this zone */
209 real mch0; /* The maximum bottom communicaton height for this zone */
210 real mch1; /* The maximum top communicaton height for this zone */
211 real p1_0; /* The bottom value of the first cell in this zone */
212 real p1_1; /* The top value of the first cell in this zone */
217 gmx_domdec_ind_t ind;
224 } dd_comm_setup_work_t;
226 typedef struct gmx_domdec_comm
228 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
229 * unless stated otherwise.
232 /* The number of decomposition dimensions for PME, 0: no PME */
234 /* The number of nodes doing PME (PP/PME or only PME) */
238 /* The communication setup including the PME only nodes */
239 gmx_bool bCartesianPP_PME;
242 int *pmenodes; /* size npmenodes */
243 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
244 * but with bCartesianPP_PME */
245 gmx_ddpme_t ddpme[2];
247 /* The DD particle-particle nodes only */
248 gmx_bool bCartesianPP;
249 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
251 /* The global charge groups */
254 /* Should we sort the cgs */
256 gmx_domdec_sort_t *sort;
258 /* Are there charge groups? */
261 /* Are there bonded and multi-body interactions between charge groups? */
262 gmx_bool bInterCGBondeds;
263 gmx_bool bInterCGMultiBody;
265 /* Data for the optional bonded interaction atom communication range */
272 /* Are we actually using DLB? */
273 gmx_bool bDynLoadBal;
275 /* Cell sizes for static load balancing, first index cartesian */
278 /* The width of the communicated boundaries */
281 /* The minimum cell size (including triclinic correction) */
283 /* For dlb, for use with edlbAUTO */
284 rvec cellsize_min_dlb;
285 /* The lower limit for the DD cell size with DLB */
287 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
288 gmx_bool bVacDLBNoLimit;
290 /* With PME load balancing we set limits on DLB */
291 gmx_bool bPMELoadBalDLBLimits;
292 /* DLB needs to take into account that we want to allow this maximum
293 * cut-off (for PME load balancing), this could limit cell boundaries.
295 real PMELoadBal_max_cutoff;
297 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
299 /* box0 and box_size are required with dim's without pbc and -gcom */
303 /* The cell boundaries */
307 /* The old location of the cell boundaries, to check cg displacements */
311 /* The communication setup and charge group boundaries for the zones */
312 gmx_domdec_zones_t zones;
314 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
315 * cell boundaries of neighboring cells for dynamic load balancing.
317 gmx_ddzone_t zone_d1[2];
318 gmx_ddzone_t zone_d2[2][2];
320 /* The coordinate/force communication setup and indices */
321 gmx_domdec_comm_dim_t cd[DIM];
322 /* The maximum number of cells to communicate with in one dimension */
325 /* Which cg distribution is stored on the master node */
326 int master_cg_ddp_count;
328 /* The number of cg's received from the direct neighbors */
329 int zone_ncg1[DD_MAXZONE];
331 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
334 /* Array for signalling if atoms have moved to another domain */
338 /* Communication buffer for general use */
342 /* Communication buffer for general use */
345 /* Temporary storage for thread parallel communication setup */
347 dd_comm_setup_work_t *dth;
349 /* Communication buffers only used with multiple grid pulses */
354 /* Communication buffers for local redistribution */
356 int cggl_flag_nalloc[DIM*2];
358 int cgcm_state_nalloc[DIM*2];
360 /* Cell sizes for dynamic load balancing */
361 gmx_domdec_root_t **root;
365 real cell_f_max0[DIM];
366 real cell_f_min1[DIM];
368 /* Stuff for load communication */
369 gmx_bool bRecordLoad;
370 gmx_domdec_load_t *load;
371 int nrank_gpu_shared;
373 MPI_Comm *mpi_comm_load;
374 MPI_Comm mpi_comm_gpu_shared;
377 /* Maximum DLB scaling per load balancing step in percent */
381 float cycl[ddCyclNr];
382 int cycl_n[ddCyclNr];
383 float cycl_max[ddCyclNr];
384 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
388 /* Have often have did we have load measurements */
390 /* Have often have we collected the load measurements */
394 double sum_nat[ddnatNR-ddnatZONE];
404 /* The last partition step */
405 gmx_int64_t partition_step;
413 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
416 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
417 #define DD_FLAG_NRCG 65535
418 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
419 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
421 /* Zone permutation required to obtain consecutive charge groups
422 * for neighbor searching.
424 static const int zone_perm[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
426 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
427 * components see only j zones with that component 0.
430 /* The DD zone order */
431 static const ivec dd_zo[DD_MAXZONE] =
432 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
437 static const ivec dd_zp3[dd_zp3n] = {{0, 0, 8}, {1, 3, 6}, {2, 5, 6}, {3, 5, 7}};
442 static const ivec dd_zp2[dd_zp2n] = {{0, 0, 4}, {1, 3, 4}};
447 static const ivec dd_zp1[dd_zp1n] = {{0, 0, 2}};
449 /* Factors used to avoid problems due to rounding issues */
450 #define DD_CELL_MARGIN 1.0001
451 #define DD_CELL_MARGIN2 1.00005
452 /* Factor to account for pressure scaling during nstlist steps */
453 #define DD_PRES_SCALE_MARGIN 1.02
455 /* Allowed performance loss before we DLB or warn */
456 #define DD_PERF_LOSS 0.05
458 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
460 /* Use separate MPI send and receive commands
461 * when nnodes <= GMX_DD_NNODES_SENDRECV.
462 * This saves memory (and some copying for small nnodes).
463 * For high parallelization scatter and gather calls are used.
465 #define GMX_DD_NNODES_SENDRECV 4
469 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
471 static void index2xyz(ivec nc,int ind,ivec xyz)
473 xyz[XX] = ind % nc[XX];
474 xyz[YY] = (ind / nc[XX]) % nc[YY];
475 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
479 /* This order is required to minimize the coordinate communication in PME
480 * which uses decomposition in the x direction.
482 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
484 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
486 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
487 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
488 xyz[ZZ] = ind % nc[ZZ];
491 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
496 ddindex = dd_index(dd->nc, c);
497 if (dd->comm->bCartesianPP_PME)
499 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
501 else if (dd->comm->bCartesianPP)
504 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
515 static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox, t_inputrec *ir)
517 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
520 int ddglatnr(gmx_domdec_t *dd, int i)
530 if (i >= dd->comm->nat[ddnatNR-1])
532 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
534 atnr = dd->gatindex[i] + 1;
540 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
542 return &dd->comm->cgs_gl;
545 static void vec_rvec_init(vec_rvec_t *v)
551 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
555 v->nalloc = over_alloc_dd(n);
556 srenew(v->v, v->nalloc);
560 void dd_store_state(gmx_domdec_t *dd, t_state *state)
564 if (state->ddp_count != dd->ddp_count)
566 gmx_incons("The state does not the domain decomposition state");
569 state->ncg_gl = dd->ncg_home;
570 if (state->ncg_gl > state->cg_gl_nalloc)
572 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
573 srenew(state->cg_gl, state->cg_gl_nalloc);
575 for (i = 0; i < state->ncg_gl; i++)
577 state->cg_gl[i] = dd->index_gl[i];
580 state->ddp_count_cg_gl = dd->ddp_count;
583 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
585 return &dd->comm->zones;
588 void dd_get_ns_ranges(gmx_domdec_t *dd, int icg,
589 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
591 gmx_domdec_zones_t *zones;
594 zones = &dd->comm->zones;
597 while (icg >= zones->izone[izone].cg1)
606 else if (izone < zones->nizone)
608 *jcg0 = zones->izone[izone].jcg0;
612 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
613 icg, izone, zones->nizone);
616 *jcg1 = zones->izone[izone].jcg1;
618 for (d = 0; d < dd->ndim; d++)
621 shift0[dim] = zones->izone[izone].shift0[dim];
622 shift1[dim] = zones->izone[izone].shift1[dim];
623 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
625 /* A conservative approach, this can be optimized */
632 int dd_natoms_vsite(gmx_domdec_t *dd)
634 return dd->comm->nat[ddnatVSITE];
637 void dd_get_constraint_range(gmx_domdec_t *dd, int *at_start, int *at_end)
639 *at_start = dd->comm->nat[ddnatCON-1];
640 *at_end = dd->comm->nat[ddnatCON];
643 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
645 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
646 int *index, *cgindex;
647 gmx_domdec_comm_t *comm;
648 gmx_domdec_comm_dim_t *cd;
649 gmx_domdec_ind_t *ind;
650 rvec shift = {0, 0, 0}, *buf, *rbuf;
651 gmx_bool bPBC, bScrew;
655 cgindex = dd->cgindex;
660 nat_tot = dd->nat_home;
661 for (d = 0; d < dd->ndim; d++)
663 bPBC = (dd->ci[dd->dim[d]] == 0);
664 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
667 copy_rvec(box[dd->dim[d]], shift);
670 for (p = 0; p < cd->np; p++)
677 for (i = 0; i < ind->nsend[nzone]; i++)
679 at0 = cgindex[index[i]];
680 at1 = cgindex[index[i]+1];
681 for (j = at0; j < at1; j++)
683 copy_rvec(x[j], buf[n]);
690 for (i = 0; i < ind->nsend[nzone]; i++)
692 at0 = cgindex[index[i]];
693 at1 = cgindex[index[i]+1];
694 for (j = at0; j < at1; j++)
696 /* We need to shift the coordinates */
697 rvec_add(x[j], shift, buf[n]);
704 for (i = 0; i < ind->nsend[nzone]; i++)
706 at0 = cgindex[index[i]];
707 at1 = cgindex[index[i]+1];
708 for (j = at0; j < at1; j++)
711 buf[n][XX] = x[j][XX] + shift[XX];
713 * This operation requires a special shift force
714 * treatment, which is performed in calc_vir.
716 buf[n][YY] = box[YY][YY] - x[j][YY];
717 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
729 rbuf = comm->vbuf2.v;
731 /* Send and receive the coordinates */
732 dd_sendrecv_rvec(dd, d, dddirBackward,
733 buf, ind->nsend[nzone+1],
734 rbuf, ind->nrecv[nzone+1]);
738 for (zone = 0; zone < nzone; zone++)
740 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
742 copy_rvec(rbuf[j], x[i]);
747 nat_tot += ind->nrecv[nzone+1];
753 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
755 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
756 int *index, *cgindex;
757 gmx_domdec_comm_t *comm;
758 gmx_domdec_comm_dim_t *cd;
759 gmx_domdec_ind_t *ind;
763 gmx_bool bPBC, bScrew;
767 cgindex = dd->cgindex;
772 nzone = comm->zones.n/2;
773 nat_tot = dd->nat_tot;
774 for (d = dd->ndim-1; d >= 0; d--)
776 bPBC = (dd->ci[dd->dim[d]] == 0);
777 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
778 if (fshift == NULL && !bScrew)
782 /* Determine which shift vector we need */
788 for (p = cd->np-1; p >= 0; p--)
791 nat_tot -= ind->nrecv[nzone+1];
798 sbuf = comm->vbuf2.v;
800 for (zone = 0; zone < nzone; zone++)
802 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
804 copy_rvec(f[i], sbuf[j]);
809 /* Communicate the forces */
810 dd_sendrecv_rvec(dd, d, dddirForward,
811 sbuf, ind->nrecv[nzone+1],
812 buf, ind->nsend[nzone+1]);
814 /* Add the received forces */
818 for (i = 0; i < ind->nsend[nzone]; i++)
820 at0 = cgindex[index[i]];
821 at1 = cgindex[index[i]+1];
822 for (j = at0; j < at1; j++)
824 rvec_inc(f[j], buf[n]);
831 for (i = 0; i < ind->nsend[nzone]; i++)
833 at0 = cgindex[index[i]];
834 at1 = cgindex[index[i]+1];
835 for (j = at0; j < at1; j++)
837 rvec_inc(f[j], buf[n]);
838 /* Add this force to the shift force */
839 rvec_inc(fshift[is], buf[n]);
846 for (i = 0; i < ind->nsend[nzone]; i++)
848 at0 = cgindex[index[i]];
849 at1 = cgindex[index[i]+1];
850 for (j = at0; j < at1; j++)
852 /* Rotate the force */
853 f[j][XX] += buf[n][XX];
854 f[j][YY] -= buf[n][YY];
855 f[j][ZZ] -= buf[n][ZZ];
858 /* Add this force to the shift force */
859 rvec_inc(fshift[is], buf[n]);
870 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
872 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
873 int *index, *cgindex;
874 gmx_domdec_comm_t *comm;
875 gmx_domdec_comm_dim_t *cd;
876 gmx_domdec_ind_t *ind;
881 cgindex = dd->cgindex;
883 buf = &comm->vbuf.v[0][0];
886 nat_tot = dd->nat_home;
887 for (d = 0; d < dd->ndim; d++)
890 for (p = 0; p < cd->np; p++)
895 for (i = 0; i < ind->nsend[nzone]; i++)
897 at0 = cgindex[index[i]];
898 at1 = cgindex[index[i]+1];
899 for (j = at0; j < at1; j++)
912 rbuf = &comm->vbuf2.v[0][0];
914 /* Send and receive the coordinates */
915 dd_sendrecv_real(dd, d, dddirBackward,
916 buf, ind->nsend[nzone+1],
917 rbuf, ind->nrecv[nzone+1]);
921 for (zone = 0; zone < nzone; zone++)
923 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
930 nat_tot += ind->nrecv[nzone+1];
936 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
938 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
939 int *index, *cgindex;
940 gmx_domdec_comm_t *comm;
941 gmx_domdec_comm_dim_t *cd;
942 gmx_domdec_ind_t *ind;
947 cgindex = dd->cgindex;
949 buf = &comm->vbuf.v[0][0];
952 nzone = comm->zones.n/2;
953 nat_tot = dd->nat_tot;
954 for (d = dd->ndim-1; d >= 0; d--)
957 for (p = cd->np-1; p >= 0; p--)
960 nat_tot -= ind->nrecv[nzone+1];
967 sbuf = &comm->vbuf2.v[0][0];
969 for (zone = 0; zone < nzone; zone++)
971 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
978 /* Communicate the forces */
979 dd_sendrecv_real(dd, d, dddirForward,
980 sbuf, ind->nrecv[nzone+1],
981 buf, ind->nsend[nzone+1]);
983 /* Add the received forces */
985 for (i = 0; i < ind->nsend[nzone]; i++)
987 at0 = cgindex[index[i]];
988 at1 = cgindex[index[i]+1];
989 for (j = at0; j < at1; j++)
1000 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
1002 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
1004 zone->min0, zone->max1,
1005 zone->mch0, zone->mch0,
1006 zone->p1_0, zone->p1_1);
1010 #define DDZONECOMM_MAXZONE 5
1011 #define DDZONECOMM_BUFSIZE 3
1013 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
1014 int ddimind, int direction,
1015 gmx_ddzone_t *buf_s, int n_s,
1016 gmx_ddzone_t *buf_r, int n_r)
1018 #define ZBS DDZONECOMM_BUFSIZE
1019 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
1020 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
1023 for (i = 0; i < n_s; i++)
1025 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
1026 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
1027 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
1028 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
1029 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
1030 vbuf_s[i*ZBS+1][2] = 0;
1031 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
1032 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
1033 vbuf_s[i*ZBS+2][2] = 0;
1036 dd_sendrecv_rvec(dd, ddimind, direction,
1040 for (i = 0; i < n_r; i++)
1042 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
1043 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
1044 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
1045 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
1046 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
1047 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
1048 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
1054 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
1055 rvec cell_ns_x0, rvec cell_ns_x1)
1057 int d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
1059 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
1060 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
1061 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
1062 rvec extr_s[2], extr_r[2];
1064 real dist_d, c = 0, det;
1065 gmx_domdec_comm_t *comm;
1066 gmx_bool bPBC, bUse;
1070 for (d = 1; d < dd->ndim; d++)
1073 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
1074 zp->min0 = cell_ns_x0[dim];
1075 zp->max1 = cell_ns_x1[dim];
1076 zp->min1 = cell_ns_x1[dim];
1077 zp->mch0 = cell_ns_x0[dim];
1078 zp->mch1 = cell_ns_x1[dim];
1079 zp->p1_0 = cell_ns_x0[dim];
1080 zp->p1_1 = cell_ns_x1[dim];
1083 for (d = dd->ndim-2; d >= 0; d--)
1086 bPBC = (dim < ddbox->npbcdim);
1088 /* Use an rvec to store two reals */
1089 extr_s[d][0] = comm->cell_f0[d+1];
1090 extr_s[d][1] = comm->cell_f1[d+1];
1091 extr_s[d][2] = comm->cell_f1[d+1];
1094 /* Store the extremes in the backward sending buffer,
1095 * so the get updated separately from the forward communication.
1097 for (d1 = d; d1 < dd->ndim-1; d1++)
1099 /* We invert the order to be able to use the same loop for buf_e */
1100 buf_s[pos].min0 = extr_s[d1][1];
1101 buf_s[pos].max1 = extr_s[d1][0];
1102 buf_s[pos].min1 = extr_s[d1][2];
1103 buf_s[pos].mch0 = 0;
1104 buf_s[pos].mch1 = 0;
1105 /* Store the cell corner of the dimension we communicate along */
1106 buf_s[pos].p1_0 = comm->cell_x0[dim];
1107 buf_s[pos].p1_1 = 0;
1111 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1114 if (dd->ndim == 3 && d == 0)
1116 buf_s[pos] = comm->zone_d2[0][1];
1118 buf_s[pos] = comm->zone_d1[0];
1122 /* We only need to communicate the extremes
1123 * in the forward direction
1125 npulse = comm->cd[d].np;
1128 /* Take the minimum to avoid double communication */
1129 npulse_min = min(npulse, dd->nc[dim]-1-npulse);
1133 /* Without PBC we should really not communicate over
1134 * the boundaries, but implementing that complicates
1135 * the communication setup and therefore we simply
1136 * do all communication, but ignore some data.
1138 npulse_min = npulse;
1140 for (p = 0; p < npulse_min; p++)
1142 /* Communicate the extremes forward */
1143 bUse = (bPBC || dd->ci[dim] > 0);
1145 dd_sendrecv_rvec(dd, d, dddirForward,
1146 extr_s+d, dd->ndim-d-1,
1147 extr_r+d, dd->ndim-d-1);
1151 for (d1 = d; d1 < dd->ndim-1; d1++)
1153 extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]);
1154 extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]);
1155 extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]);
1161 for (p = 0; p < npulse; p++)
1163 /* Communicate all the zone information backward */
1164 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1166 dd_sendrecv_ddzone(dd, d, dddirBackward,
1173 for (d1 = d+1; d1 < dd->ndim; d1++)
1175 /* Determine the decrease of maximum required
1176 * communication height along d1 due to the distance along d,
1177 * this avoids a lot of useless atom communication.
1179 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1181 if (ddbox->tric_dir[dim])
1183 /* c is the off-diagonal coupling between the cell planes
1184 * along directions d and d1.
1186 c = ddbox->v[dim][dd->dim[d1]][dim];
1192 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1195 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1199 /* A negative value signals out of range */
1205 /* Accumulate the extremes over all pulses */
1206 for (i = 0; i < buf_size; i++)
1210 buf_e[i] = buf_r[i];
1216 buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0);
1217 buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1);
1218 buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1);
1221 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1229 if (bUse && dh[d1] >= 0)
1231 buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
1232 buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
1235 /* Copy the received buffer to the send buffer,
1236 * to pass the data through with the next pulse.
1238 buf_s[i] = buf_r[i];
1240 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1241 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1243 /* Store the extremes */
1246 for (d1 = d; d1 < dd->ndim-1; d1++)
1248 extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0);
1249 extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1);
1250 extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1);
1254 if (d == 1 || (d == 0 && dd->ndim == 3))
1256 for (i = d; i < 2; i++)
1258 comm->zone_d2[1-d][i] = buf_e[pos];
1264 comm->zone_d1[1] = buf_e[pos];
1274 for (i = 0; i < 2; i++)
1278 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1280 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1281 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1287 for (i = 0; i < 2; i++)
1289 for (j = 0; j < 2; j++)
1293 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1295 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1296 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1300 for (d = 1; d < dd->ndim; d++)
1302 comm->cell_f_max0[d] = extr_s[d-1][0];
1303 comm->cell_f_min1[d] = extr_s[d-1][1];
1306 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1307 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1312 static void dd_collect_cg(gmx_domdec_t *dd,
1313 t_state *state_local)
1315 gmx_domdec_master_t *ma = NULL;
1316 int buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
1319 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1321 /* The master has the correct distribution */
1325 if (state_local->ddp_count == dd->ddp_count)
1327 ncg_home = dd->ncg_home;
1329 nat_home = dd->nat_home;
1331 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1333 cgs_gl = &dd->comm->cgs_gl;
1335 ncg_home = state_local->ncg_gl;
1336 cg = state_local->cg_gl;
1338 for (i = 0; i < ncg_home; i++)
1340 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1345 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1348 buf2[0] = dd->ncg_home;
1349 buf2[1] = dd->nat_home;
1359 /* Collect the charge group and atom counts on the master */
1360 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1365 for (i = 0; i < dd->nnodes; i++)
1367 ma->ncg[i] = ma->ibuf[2*i];
1368 ma->nat[i] = ma->ibuf[2*i+1];
1369 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1372 /* Make byte counts and indices */
1373 for (i = 0; i < dd->nnodes; i++)
1375 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1376 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1380 fprintf(debug, "Initial charge group distribution: ");
1381 for (i = 0; i < dd->nnodes; i++)
1383 fprintf(debug, " %d", ma->ncg[i]);
1385 fprintf(debug, "\n");
1389 /* Collect the charge group indices on the master */
1391 dd->ncg_home*sizeof(int), dd->index_gl,
1392 DDMASTER(dd) ? ma->ibuf : NULL,
1393 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1394 DDMASTER(dd) ? ma->cg : NULL);
1396 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1399 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1402 gmx_domdec_master_t *ma;
1403 int n, i, c, a, nalloc = 0;
1412 MPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1413 dd->rank, dd->mpi_comm_all);
1418 /* Copy the master coordinates to the global array */
1419 cgs_gl = &dd->comm->cgs_gl;
1421 n = DDMASTERRANK(dd);
1423 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1425 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1427 copy_rvec(lv[a++], v[c]);
1431 for (n = 0; n < dd->nnodes; n++)
1435 if (ma->nat[n] > nalloc)
1437 nalloc = over_alloc_dd(ma->nat[n]);
1438 srenew(buf, nalloc);
1441 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1442 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1445 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1447 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1449 copy_rvec(buf[a++], v[c]);
1458 static void get_commbuffer_counts(gmx_domdec_t *dd,
1459 int **counts, int **disps)
1461 gmx_domdec_master_t *ma;
1466 /* Make the rvec count and displacment arrays */
1468 *disps = ma->ibuf + dd->nnodes;
1469 for (n = 0; n < dd->nnodes; n++)
1471 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1472 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1476 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1479 gmx_domdec_master_t *ma;
1480 int *rcounts = NULL, *disps = NULL;
1489 get_commbuffer_counts(dd, &rcounts, &disps);
1494 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv, rcounts, disps, buf);
1498 cgs_gl = &dd->comm->cgs_gl;
1501 for (n = 0; n < dd->nnodes; n++)
1503 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1505 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1507 copy_rvec(buf[a++], v[c]);
1514 void dd_collect_vec(gmx_domdec_t *dd,
1515 t_state *state_local, rvec *lv, rvec *v)
1517 gmx_domdec_master_t *ma;
1518 int n, i, c, a, nalloc = 0;
1521 dd_collect_cg(dd, state_local);
1523 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1525 dd_collect_vec_sendrecv(dd, lv, v);
1529 dd_collect_vec_gatherv(dd, lv, v);
1534 void dd_collect_state(gmx_domdec_t *dd,
1535 t_state *state_local, t_state *state)
1539 nh = state->nhchainlength;
1543 for (i = 0; i < efptNR; i++)
1545 state->lambda[i] = state_local->lambda[i];
1547 state->fep_state = state_local->fep_state;
1548 state->veta = state_local->veta;
1549 state->vol0 = state_local->vol0;
1550 copy_mat(state_local->box, state->box);
1551 copy_mat(state_local->boxv, state->boxv);
1552 copy_mat(state_local->svir_prev, state->svir_prev);
1553 copy_mat(state_local->fvir_prev, state->fvir_prev);
1554 copy_mat(state_local->pres_prev, state->pres_prev);
1556 for (i = 0; i < state_local->ngtc; i++)
1558 for (j = 0; j < nh; j++)
1560 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1561 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1563 state->therm_integral[i] = state_local->therm_integral[i];
1565 for (i = 0; i < state_local->nnhpres; i++)
1567 for (j = 0; j < nh; j++)
1569 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1570 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1574 for (est = 0; est < estNR; est++)
1576 if (EST_DISTR(est) && (state_local->flags & (1<<est)))
1581 dd_collect_vec(dd, state_local, state_local->x, state->x);
1584 dd_collect_vec(dd, state_local, state_local->v, state->v);
1587 dd_collect_vec(dd, state_local, state_local->sd_X, state->sd_X);
1590 dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
1592 case estDISRE_INITF:
1593 case estDISRE_RM3TAV:
1594 case estORIRE_INITF:
1598 gmx_incons("Unknown state entry encountered in dd_collect_state");
1604 static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
1610 fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
1613 state->nalloc = over_alloc_dd(nalloc);
1615 for (est = 0; est < estNR; est++)
1617 if (EST_DISTR(est) && (state->flags & (1<<est)))
1622 srenew(state->x, state->nalloc);
1625 srenew(state->v, state->nalloc);
1628 srenew(state->sd_X, state->nalloc);
1631 srenew(state->cg_p, state->nalloc);
1633 case estDISRE_INITF:
1634 case estDISRE_RM3TAV:
1635 case estORIRE_INITF:
1637 /* No reallocation required */
1640 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1647 srenew(*f, state->nalloc);
1651 static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
1654 if (nalloc > fr->cg_nalloc)
1658 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
1660 fr->cg_nalloc = over_alloc_dd(nalloc);
1661 srenew(fr->cginfo, fr->cg_nalloc);
1662 if (fr->cutoff_scheme == ecutsGROUP)
1664 srenew(fr->cg_cm, fr->cg_nalloc);
1667 if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
1669 /* We don't use charge groups, we use x in state to set up
1670 * the atom communication.
1672 dd_realloc_state(state, f, nalloc);
1676 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1679 gmx_domdec_master_t *ma;
1680 int n, i, c, a, nalloc = 0;
1687 for (n = 0; n < dd->nnodes; n++)
1691 if (ma->nat[n] > nalloc)
1693 nalloc = over_alloc_dd(ma->nat[n]);
1694 srenew(buf, nalloc);
1696 /* Use lv as a temporary buffer */
1698 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1700 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1702 copy_rvec(v[c], buf[a++]);
1705 if (a != ma->nat[n])
1707 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1712 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1713 DDRANK(dd, n), n, dd->mpi_comm_all);
1718 n = DDMASTERRANK(dd);
1720 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1722 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1724 copy_rvec(v[c], lv[a++]);
1731 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1732 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1737 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1740 gmx_domdec_master_t *ma;
1741 int *scounts = NULL, *disps = NULL;
1742 int n, i, c, a, nalloc = 0;
1749 get_commbuffer_counts(dd, &scounts, &disps);
1753 for (n = 0; n < dd->nnodes; n++)
1755 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1757 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1759 copy_rvec(v[c], buf[a++]);
1765 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1768 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
1770 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1772 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1776 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1780 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1783 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1784 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1785 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1787 if (dfhist->nlambda > 0)
1789 int nlam = dfhist->nlambda;
1790 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1791 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1792 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1793 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1794 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1795 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1797 for (i = 0; i < nlam; i++)
1799 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1800 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1801 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1802 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1803 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1804 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1809 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1810 t_state *state, t_state *state_local,
1815 nh = state->nhchainlength;
1819 for (i = 0; i < efptNR; i++)
1821 state_local->lambda[i] = state->lambda[i];
1823 state_local->fep_state = state->fep_state;
1824 state_local->veta = state->veta;
1825 state_local->vol0 = state->vol0;
1826 copy_mat(state->box, state_local->box);
1827 copy_mat(state->box_rel, state_local->box_rel);
1828 copy_mat(state->boxv, state_local->boxv);
1829 copy_mat(state->svir_prev, state_local->svir_prev);
1830 copy_mat(state->fvir_prev, state_local->fvir_prev);
1831 copy_df_history(&state_local->dfhist, &state->dfhist);
1832 for (i = 0; i < state_local->ngtc; i++)
1834 for (j = 0; j < nh; j++)
1836 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1837 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1839 state_local->therm_integral[i] = state->therm_integral[i];
1841 for (i = 0; i < state_local->nnhpres; i++)
1843 for (j = 0; j < nh; j++)
1845 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1846 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1850 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
1851 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1852 dd_bcast(dd, sizeof(real), &state_local->veta);
1853 dd_bcast(dd, sizeof(real), &state_local->vol0);
1854 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1855 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1856 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1857 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1858 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1859 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
1860 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
1861 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
1862 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
1863 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
1865 /* communicate df_history -- required for restarting from checkpoint */
1866 dd_distribute_dfhist(dd, &state_local->dfhist);
1868 if (dd->nat_home > state_local->nalloc)
1870 dd_realloc_state(state_local, f, dd->nat_home);
1872 for (i = 0; i < estNR; i++)
1874 if (EST_DISTR(i) && (state_local->flags & (1<<i)))
1879 dd_distribute_vec(dd, cgs, state->x, state_local->x);
1882 dd_distribute_vec(dd, cgs, state->v, state_local->v);
1885 dd_distribute_vec(dd, cgs, state->sd_X, state_local->sd_X);
1888 dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
1890 case estDISRE_INITF:
1891 case estDISRE_RM3TAV:
1892 case estORIRE_INITF:
1894 /* Not implemented yet */
1897 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1903 static char dim2char(int dim)
1909 case XX: c = 'X'; break;
1910 case YY: c = 'Y'; break;
1911 case ZZ: c = 'Z'; break;
1912 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1918 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1919 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1921 rvec grid_s[2], *grid_r = NULL, cx, r;
1922 char fname[STRLEN], format[STRLEN], buf[22];
1924 int a, i, d, z, y, x;
1928 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1929 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1933 snew(grid_r, 2*dd->nnodes);
1936 dd_gather(dd, 2*sizeof(rvec), grid_s[0], DDMASTER(dd) ? grid_r[0] : NULL);
1940 for (d = 0; d < DIM; d++)
1942 for (i = 0; i < DIM; i++)
1950 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1952 tric[d][i] = box[i][d]/box[i][i];
1961 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1962 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
1963 out = gmx_fio_fopen(fname, "w");
1964 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1966 for (i = 0; i < dd->nnodes; i++)
1968 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1969 for (d = 0; d < DIM; d++)
1971 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1973 for (z = 0; z < 2; z++)
1975 for (y = 0; y < 2; y++)
1977 for (x = 0; x < 2; x++)
1979 cx[XX] = grid_r[i*2+x][XX];
1980 cx[YY] = grid_r[i*2+y][YY];
1981 cx[ZZ] = grid_r[i*2+z][ZZ];
1983 fprintf(out, format, "ATOM", a++, "CA", "GLY", ' ', 1+i,
1984 ' ', 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol);
1988 for (d = 0; d < DIM; d++)
1990 for (x = 0; x < 4; x++)
1994 case 0: y = 1 + i*8 + 2*x; break;
1995 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1996 case 2: y = 1 + i*8 + x; break;
1998 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
2002 gmx_fio_fclose(out);
2007 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
2008 gmx_mtop_t *mtop, t_commrec *cr,
2009 int natoms, rvec x[], matrix box)
2011 char fname[STRLEN], format[STRLEN], format4[STRLEN], buf[22];
2013 int i, ii, resnr, c;
2014 char *atomname, *resname;
2021 natoms = dd->comm->nat[ddnatVSITE];
2024 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
2026 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
2027 sprintf(format4, "%s%s\n", get_pdbformat4(), "%6.2f%6.2f");
2029 out = gmx_fio_fopen(fname, "w");
2031 fprintf(out, "TITLE %s\n", title);
2032 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2033 for (i = 0; i < natoms; i++)
2035 ii = dd->gatindex[i];
2036 gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
2037 if (i < dd->comm->nat[ddnatZONE])
2040 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
2046 else if (i < dd->comm->nat[ddnatVSITE])
2048 b = dd->comm->zones.n;
2052 b = dd->comm->zones.n + 1;
2054 fprintf(out, strlen(atomname) < 4 ? format : format4,
2055 "ATOM", (ii+1)%100000,
2056 atomname, resname, ' ', resnr%10000, ' ',
2057 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b);
2059 fprintf(out, "TER\n");
2061 gmx_fio_fclose(out);
2064 real dd_cutoff_mbody(gmx_domdec_t *dd)
2066 gmx_domdec_comm_t *comm;
2073 if (comm->bInterCGBondeds)
2075 if (comm->cutoff_mbody > 0)
2077 r = comm->cutoff_mbody;
2081 /* cutoff_mbody=0 means we do not have DLB */
2082 r = comm->cellsize_min[dd->dim[0]];
2083 for (di = 1; di < dd->ndim; di++)
2085 r = min(r, comm->cellsize_min[dd->dim[di]]);
2087 if (comm->bBondComm)
2089 r = max(r, comm->cutoff_mbody);
2093 r = min(r, comm->cutoff);
2101 real dd_cutoff_twobody(gmx_domdec_t *dd)
2105 r_mb = dd_cutoff_mbody(dd);
2107 return max(dd->comm->cutoff, r_mb);
2111 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd, ivec coord, ivec coord_pme)
2115 nc = dd->nc[dd->comm->cartpmedim];
2116 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2117 copy_ivec(coord, coord_pme);
2118 coord_pme[dd->comm->cartpmedim] =
2119 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2122 static int low_ddindex2pmeindex(int ndd, int npme, int ddindex)
2124 /* Here we assign a PME node to communicate with this DD node
2125 * by assuming that the major index of both is x.
2126 * We add cr->npmenodes/2 to obtain an even distribution.
2128 return (ddindex*npme + npme/2)/ndd;
2131 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
2133 return low_ddindex2pmeindex(dd->nnodes, dd->comm->npmenodes, ddindex);
2136 static int cr_ddindex2pmeindex(const t_commrec *cr, int ddindex)
2138 return low_ddindex2pmeindex(cr->dd->nnodes, cr->npmenodes, ddindex);
2141 static int *dd_pmenodes(t_commrec *cr)
2146 snew(pmenodes, cr->npmenodes);
2148 for (i = 0; i < cr->dd->nnodes; i++)
2150 p0 = cr_ddindex2pmeindex(cr, i);
2151 p1 = cr_ddindex2pmeindex(cr, i+1);
2152 if (i+1 == cr->dd->nnodes || p1 > p0)
2156 fprintf(debug, "pmenode[%d] = %d\n", n, i+1+n);
2158 pmenodes[n] = i + 1 + n;
2166 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
2169 ivec coords, coords_pme, nc;
2174 if (dd->comm->bCartesian) {
2175 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2176 dd_coords2pmecoords(dd,coords,coords_pme);
2177 copy_ivec(dd->ntot,nc);
2178 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2179 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2181 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2183 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2189 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
2194 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
2196 gmx_domdec_comm_t *comm;
2198 int ddindex, nodeid = -1;
2200 comm = cr->dd->comm;
2205 if (comm->bCartesianPP_PME)
2208 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
2213 ddindex = dd_index(cr->dd->nc, coords);
2214 if (comm->bCartesianPP)
2216 nodeid = comm->ddindex2simnodeid[ddindex];
2222 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
2234 static int dd_simnode2pmenode(t_commrec *cr, int sim_nodeid)
2237 gmx_domdec_comm_t *comm;
2238 ivec coord, coord_pme;
2245 /* This assumes a uniform x domain decomposition grid cell size */
2246 if (comm->bCartesianPP_PME)
2249 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
2250 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2252 /* This is a PP node */
2253 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2254 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
2258 else if (comm->bCartesianPP)
2260 if (sim_nodeid < dd->nnodes)
2262 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2267 /* This assumes DD cells with identical x coordinates
2268 * are numbered sequentially.
2270 if (dd->comm->pmenodes == NULL)
2272 if (sim_nodeid < dd->nnodes)
2274 /* The DD index equals the nodeid */
2275 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2281 while (sim_nodeid > dd->comm->pmenodes[i])
2285 if (sim_nodeid < dd->comm->pmenodes[i])
2287 pmenode = dd->comm->pmenodes[i];
2295 void get_pme_nnodes(const gmx_domdec_t *dd,
2296 int *npmenodes_x, int *npmenodes_y)
2300 *npmenodes_x = dd->comm->npmenodes_x;
2301 *npmenodes_y = dd->comm->npmenodes_y;
2310 gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
2312 gmx_bool bPMEOnlyNode;
2314 if (DOMAINDECOMP(cr))
2316 bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
2320 bPMEOnlyNode = FALSE;
2323 return bPMEOnlyNode;
2326 void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
2327 int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
2331 ivec coord, coord_pme;
2335 snew(*my_ddnodes, (dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2338 for (x = 0; x < dd->nc[XX]; x++)
2340 for (y = 0; y < dd->nc[YY]; y++)
2342 for (z = 0; z < dd->nc[ZZ]; z++)
2344 if (dd->comm->bCartesianPP_PME)
2349 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2350 if (dd->ci[XX] == coord_pme[XX] &&
2351 dd->ci[YY] == coord_pme[YY] &&
2352 dd->ci[ZZ] == coord_pme[ZZ])
2354 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2359 /* The slab corresponds to the nodeid in the PME group */
2360 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2362 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2369 /* The last PP-only node is the peer node */
2370 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2374 fprintf(debug, "Receive coordinates from PP nodes:");
2375 for (x = 0; x < *nmy_ddnodes; x++)
2377 fprintf(debug, " %d", (*my_ddnodes)[x]);
2379 fprintf(debug, "\n");
2383 static gmx_bool receive_vir_ener(t_commrec *cr)
2385 gmx_domdec_comm_t *comm;
2386 int pmenode, coords[DIM], rank;
2390 if (cr->npmenodes < cr->dd->nnodes)
2392 comm = cr->dd->comm;
2393 if (comm->bCartesianPP_PME)
2395 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2397 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2398 coords[comm->cartpmedim]++;
2399 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2401 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2402 if (dd_simnode2pmenode(cr, rank) == pmenode)
2404 /* This is not the last PP node for pmenode */
2412 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2413 if (cr->sim_nodeid+1 < cr->nnodes &&
2414 dd_simnode2pmenode(cr, cr->sim_nodeid+1) == pmenode)
2416 /* This is not the last PP node for pmenode */
2425 static void set_zones_ncg_home(gmx_domdec_t *dd)
2427 gmx_domdec_zones_t *zones;
2430 zones = &dd->comm->zones;
2432 zones->cg_range[0] = 0;
2433 for (i = 1; i < zones->n+1; i++)
2435 zones->cg_range[i] = dd->ncg_home;
2437 /* zone_ncg1[0] should always be equal to ncg_home */
2438 dd->comm->zone_ncg1[0] = dd->ncg_home;
2441 static void rebuild_cgindex(gmx_domdec_t *dd,
2442 const int *gcgs_index, t_state *state)
2444 int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
2447 dd_cg_gl = dd->index_gl;
2448 cgindex = dd->cgindex;
2451 for (i = 0; i < state->ncg_gl; i++)
2455 dd_cg_gl[i] = cg_gl;
2456 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2460 dd->ncg_home = state->ncg_gl;
2463 set_zones_ncg_home(dd);
2466 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2468 while (cg >= cginfo_mb->cg_end)
2473 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2476 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2477 t_forcerec *fr, char *bLocalCG)
2479 cginfo_mb_t *cginfo_mb;
2485 cginfo_mb = fr->cginfo_mb;
2486 cginfo = fr->cginfo;
2488 for (cg = cg0; cg < cg1; cg++)
2490 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2494 if (bLocalCG != NULL)
2496 for (cg = cg0; cg < cg1; cg++)
2498 bLocalCG[index_gl[cg]] = TRUE;
2503 static void make_dd_indices(gmx_domdec_t *dd,
2504 const int *gcgs_index, int cg_start)
2506 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2507 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2512 bLocalCG = dd->comm->bLocalCG;
2514 if (dd->nat_tot > dd->gatindex_nalloc)
2516 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2517 srenew(dd->gatindex, dd->gatindex_nalloc);
2520 nzone = dd->comm->zones.n;
2521 zone2cg = dd->comm->zones.cg_range;
2522 zone_ncg1 = dd->comm->zone_ncg1;
2523 index_gl = dd->index_gl;
2524 gatindex = dd->gatindex;
2525 bCGs = dd->comm->bCGs;
2527 if (zone2cg[1] != dd->ncg_home)
2529 gmx_incons("dd->ncg_zone is not up to date");
2532 /* Make the local to global and global to local atom index */
2533 a = dd->cgindex[cg_start];
2534 for (zone = 0; zone < nzone; zone++)
2542 cg0 = zone2cg[zone];
2544 cg1 = zone2cg[zone+1];
2545 cg1_p1 = cg0 + zone_ncg1[zone];
2547 for (cg = cg0; cg < cg1; cg++)
2552 /* Signal that this cg is from more than one pulse away */
2555 cg_gl = index_gl[cg];
2558 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2561 ga2la_set(dd->ga2la, a_gl, a, zone1);
2567 gatindex[a] = cg_gl;
2568 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2575 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2578 int ncg, i, ngl, nerr;
2581 if (bLocalCG == NULL)
2585 for (i = 0; i < dd->ncg_tot; i++)
2587 if (!bLocalCG[dd->index_gl[i]])
2590 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2595 for (i = 0; i < ncg_sys; i++)
2602 if (ngl != dd->ncg_tot)
2604 fprintf(stderr, "DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2611 static void check_index_consistency(gmx_domdec_t *dd,
2612 int natoms_sys, int ncg_sys,
2615 int nerr, ngl, i, a, cell;
2620 if (dd->comm->DD_debug > 1)
2622 snew(have, natoms_sys);
2623 for (a = 0; a < dd->nat_tot; a++)
2625 if (have[dd->gatindex[a]] > 0)
2627 fprintf(stderr, "DD node %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2631 have[dd->gatindex[a]] = a + 1;
2637 snew(have, dd->nat_tot);
2640 for (i = 0; i < natoms_sys; i++)
2642 if (ga2la_get(dd->ga2la, i, &a, &cell))
2644 if (a >= dd->nat_tot)
2646 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2652 if (dd->gatindex[a] != i)
2654 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2661 if (ngl != dd->nat_tot)
2664 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2665 dd->rank, where, ngl, dd->nat_tot);
2667 for (a = 0; a < dd->nat_tot; a++)
2672 "DD node %d, %s: local atom %d, global %d has no global index\n",
2673 dd->rank, where, a+1, dd->gatindex[a]+1);
2678 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2682 gmx_fatal(FARGS, "DD node %d, %s: %d atom/cg index inconsistencies",
2683 dd->rank, where, nerr);
2687 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2694 /* Clear the whole list without searching */
2695 ga2la_clear(dd->ga2la);
2699 for (i = a_start; i < dd->nat_tot; i++)
2701 ga2la_del(dd->ga2la, dd->gatindex[i]);
2705 bLocalCG = dd->comm->bLocalCG;
2708 for (i = cg_start; i < dd->ncg_tot; i++)
2710 bLocalCG[dd->index_gl[i]] = FALSE;
2714 dd_clear_local_vsite_indices(dd);
2716 if (dd->constraints)
2718 dd_clear_local_constraint_indices(dd);
2722 /* This function should be used for moving the domain boudaries during DLB,
2723 * for obtaining the minimum cell size. It checks the initially set limit
2724 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2725 * and, possibly, a longer cut-off limit set for PME load balancing.
2727 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2731 cellsize_min = comm->cellsize_min[dim];
2733 if (!comm->bVacDLBNoLimit)
2735 /* The cut-off might have changed, e.g. by PME load balacning,
2736 * from the value used to set comm->cellsize_min, so check it.
2738 cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2740 if (comm->bPMELoadBalDLBLimits)
2742 /* Check for the cut-off limit set by the PME load balancing */
2743 cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2747 return cellsize_min;
2750 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2753 real grid_jump_limit;
2755 /* The distance between the boundaries of cells at distance
2756 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2757 * and by the fact that cells should not be shifted by more than
2758 * half their size, such that cg's only shift by one cell
2759 * at redecomposition.
2761 grid_jump_limit = comm->cellsize_limit;
2762 if (!comm->bVacDLBNoLimit)
2764 if (comm->bPMELoadBalDLBLimits)
2766 cutoff = max(cutoff, comm->PMELoadBal_max_cutoff);
2768 grid_jump_limit = max(grid_jump_limit,
2769 cutoff/comm->cd[dim_ind].np);
2772 return grid_jump_limit;
2775 static gmx_bool check_grid_jump(gmx_int64_t step,
2781 gmx_domdec_comm_t *comm;
2790 for (d = 1; d < dd->ndim; d++)
2793 limit = grid_jump_limit(comm, cutoff, d);
2794 bfac = ddbox->box_size[dim];
2795 if (ddbox->tric_dir[dim])
2797 bfac *= ddbox->skew_fac[dim];
2799 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2800 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2808 /* This error should never be triggered under normal
2809 * circumstances, but you never know ...
2811 gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with less nodes might avoid this issue.",
2812 gmx_step_str(step, buf),
2813 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2821 static int dd_load_count(gmx_domdec_comm_t *comm)
2823 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2826 static float dd_force_load(gmx_domdec_comm_t *comm)
2833 if (comm->eFlop > 1)
2835 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2840 load = comm->cycl[ddCyclF];
2841 if (comm->cycl_n[ddCyclF] > 1)
2843 /* Subtract the maximum of the last n cycle counts
2844 * to get rid of possible high counts due to other sources,
2845 * for instance system activity, that would otherwise
2846 * affect the dynamic load balancing.
2848 load -= comm->cycl_max[ddCyclF];
2852 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2854 float gpu_wait, gpu_wait_sum;
2856 gpu_wait = comm->cycl[ddCyclWaitGPU];
2857 if (comm->cycl_n[ddCyclF] > 1)
2859 /* We should remove the WaitGPU time of the same MD step
2860 * as the one with the maximum F time, since the F time
2861 * and the wait time are not independent.
2862 * Furthermore, the step for the max F time should be chosen
2863 * the same on all ranks that share the same GPU.
2864 * But to keep the code simple, we remove the average instead.
2865 * The main reason for artificially long times at some steps
2866 * is spurious CPU activity or MPI time, so we don't expect
2867 * that changes in the GPU wait time matter a lot here.
2869 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2871 /* Sum the wait times over the ranks that share the same GPU */
2872 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2873 comm->mpi_comm_gpu_shared);
2874 /* Replace the wait time by the average over the ranks */
2875 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2883 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2885 gmx_domdec_comm_t *comm;
2890 snew(*dim_f, dd->nc[dim]+1);
2892 for (i = 1; i < dd->nc[dim]; i++)
2894 if (comm->slb_frac[dim])
2896 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2900 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2903 (*dim_f)[dd->nc[dim]] = 1;
2906 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2908 int pmeindex, slab, nso, i;
2911 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2917 ddpme->dim = dimind;
2919 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2921 ddpme->nslab = (ddpme->dim == 0 ?
2922 dd->comm->npmenodes_x :
2923 dd->comm->npmenodes_y);
2925 if (ddpme->nslab <= 1)
2930 nso = dd->comm->npmenodes/ddpme->nslab;
2931 /* Determine for each PME slab the PP location range for dimension dim */
2932 snew(ddpme->pp_min, ddpme->nslab);
2933 snew(ddpme->pp_max, ddpme->nslab);
2934 for (slab = 0; slab < ddpme->nslab; slab++)
2936 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2937 ddpme->pp_max[slab] = 0;
2939 for (i = 0; i < dd->nnodes; i++)
2941 ddindex2xyz(dd->nc, i, xyz);
2942 /* For y only use our y/z slab.
2943 * This assumes that the PME x grid size matches the DD grid size.
2945 if (dimind == 0 || xyz[XX] == dd->ci[XX])
2947 pmeindex = ddindex2pmeindex(dd, i);
2950 slab = pmeindex/nso;
2954 slab = pmeindex % ddpme->nslab;
2956 ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]);
2957 ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]);
2961 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
2964 int dd_pme_maxshift_x(gmx_domdec_t *dd)
2966 if (dd->comm->ddpme[0].dim == XX)
2968 return dd->comm->ddpme[0].maxshift;
2976 int dd_pme_maxshift_y(gmx_domdec_t *dd)
2978 if (dd->comm->ddpme[0].dim == YY)
2980 return dd->comm->ddpme[0].maxshift;
2982 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
2984 return dd->comm->ddpme[1].maxshift;
2992 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
2993 gmx_bool bUniform, gmx_ddbox_t *ddbox, real *cell_f)
2995 gmx_domdec_comm_t *comm;
2998 real range, pme_boundary;
3002 nc = dd->nc[ddpme->dim];
3005 if (!ddpme->dim_match)
3007 /* PP decomposition is not along dim: the worst situation */
3010 else if (ns <= 3 || (bUniform && ns == nc))
3012 /* The optimal situation */
3017 /* We need to check for all pme nodes which nodes they
3018 * could possibly need to communicate with.
3020 xmin = ddpme->pp_min;
3021 xmax = ddpme->pp_max;
3022 /* Allow for atoms to be maximally 2/3 times the cut-off
3023 * out of their DD cell. This is a reasonable balance between
3024 * between performance and support for most charge-group/cut-off
3027 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
3028 /* Avoid extra communication when we are exactly at a boundary */
3032 for (s = 0; s < ns; s++)
3034 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
3035 pme_boundary = (real)s/ns;
3038 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
3040 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
3044 pme_boundary = (real)(s+1)/ns;
3047 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
3049 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
3056 ddpme->maxshift = sh;
3060 fprintf(debug, "PME slab communication range for dim %d is %d\n",
3061 ddpme->dim, ddpme->maxshift);
3065 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3069 for (d = 0; d < dd->ndim; d++)
3072 if (dim < ddbox->nboundeddim &&
3073 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
3074 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
3076 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
3077 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3078 dd->nc[dim], dd->comm->cellsize_limit);
3083 enum { setcellsizeslbLOCAL, setcellsizeslbMASTER, setcellsizeslbPULSE_ONLY };
3085 /* Set the domain boundaries. Use for static (or no) load balancing,
3086 * and also for the starting state for dynamic load balancing.
3087 * setmode determine if and where the boundaries are stored, use enum above.
3088 * Returns the number communication pulses in npulse.
3090 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
3091 int setmode, ivec npulse)
3093 gmx_domdec_comm_t *comm;
3096 real *cell_x, cell_dx, cellsize;
3100 for (d = 0; d < DIM; d++)
3102 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
3104 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
3107 cell_dx = ddbox->box_size[d]/dd->nc[d];
3110 case setcellsizeslbMASTER:
3111 for (j = 0; j < dd->nc[d]+1; j++)
3113 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
3116 case setcellsizeslbLOCAL:
3117 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
3118 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
3123 cellsize = cell_dx*ddbox->skew_fac[d];
3124 while (cellsize*npulse[d] < comm->cutoff)
3128 cellsize_min[d] = cellsize;
3132 /* Statically load balanced grid */
3133 /* Also when we are not doing a master distribution we determine
3134 * all cell borders in a loop to obtain identical values
3135 * to the master distribution case and to determine npulse.
3137 if (setmode == setcellsizeslbMASTER)
3139 cell_x = dd->ma->cell_x[d];
3143 snew(cell_x, dd->nc[d]+1);
3145 cell_x[0] = ddbox->box0[d];
3146 for (j = 0; j < dd->nc[d]; j++)
3148 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
3149 cell_x[j+1] = cell_x[j] + cell_dx;
3150 cellsize = cell_dx*ddbox->skew_fac[d];
3151 while (cellsize*npulse[d] < comm->cutoff &&
3152 npulse[d] < dd->nc[d]-1)
3156 cellsize_min[d] = min(cellsize_min[d], cellsize);
3158 if (setmode == setcellsizeslbLOCAL)
3160 comm->cell_x0[d] = cell_x[dd->ci[d]];
3161 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
3163 if (setmode != setcellsizeslbMASTER)
3168 /* The following limitation is to avoid that a cell would receive
3169 * some of its own home charge groups back over the periodic boundary.
3170 * Double charge groups cause trouble with the global indices.
3172 if (d < ddbox->npbcdim &&
3173 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
3175 char error_string[STRLEN];
3177 sprintf(error_string,
3178 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3179 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
3181 dd->nc[d], dd->nc[d],
3182 dd->nnodes > dd->nc[d] ? "cells" : "processors");
3184 if (setmode == setcellsizeslbLOCAL)
3186 gmx_fatal_collective(FARGS, NULL, dd, error_string);
3190 gmx_fatal(FARGS, error_string);
3195 if (!comm->bDynLoadBal)
3197 copy_rvec(cellsize_min, comm->cellsize_min);
3200 for (d = 0; d < comm->npmedecompdim; d++)
3202 set_pme_maxshift(dd, &comm->ddpme[d],
3203 comm->slb_frac[dd->dim[d]] == NULL, ddbox,
3204 comm->ddpme[d].slb_dim_f);
3209 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
3210 int d, int dim, gmx_domdec_root_t *root,
3212 gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
3214 gmx_domdec_comm_t *comm;
3215 int ncd, i, j, nmin, nmin_old;
3216 gmx_bool bLimLo, bLimHi;
3218 real fac, halfway, cellsize_limit_f_i, region_size;
3219 gmx_bool bPBC, bLastHi = FALSE;
3220 int nrange[] = {range[0], range[1]};
3222 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
3228 bPBC = (dim < ddbox->npbcdim);
3230 cell_size = root->buf_ncd;
3234 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
3237 /* First we need to check if the scaling does not make cells
3238 * smaller than the smallest allowed size.
3239 * We need to do this iteratively, since if a cell is too small,
3240 * it needs to be enlarged, which makes all the other cells smaller,
3241 * which could in turn make another cell smaller than allowed.
3243 for (i = range[0]; i < range[1]; i++)
3245 root->bCellMin[i] = FALSE;
3251 /* We need the total for normalization */
3253 for (i = range[0]; i < range[1]; i++)
3255 if (root->bCellMin[i] == FALSE)
3257 fac += cell_size[i];
3260 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3261 /* Determine the cell boundaries */
3262 for (i = range[0]; i < range[1]; i++)
3264 if (root->bCellMin[i] == FALSE)
3266 cell_size[i] *= fac;
3267 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3269 cellsize_limit_f_i = 0;
3273 cellsize_limit_f_i = cellsize_limit_f;
3275 if (cell_size[i] < cellsize_limit_f_i)
3277 root->bCellMin[i] = TRUE;
3278 cell_size[i] = cellsize_limit_f_i;
3282 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3285 while (nmin > nmin_old);
3288 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3289 /* For this check we should not use DD_CELL_MARGIN,
3290 * but a slightly smaller factor,
3291 * since rounding could get use below the limit.
3293 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3296 gmx_fatal(FARGS, "Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3297 gmx_step_str(step, buf),
3298 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3299 ncd, comm->cellsize_min[dim]);
3302 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
3306 /* Check if the boundary did not displace more than halfway
3307 * each of the cells it bounds, as this could cause problems,
3308 * especially when the differences between cell sizes are large.
3309 * If changes are applied, they will not make cells smaller
3310 * than the cut-off, as we check all the boundaries which
3311 * might be affected by a change and if the old state was ok,
3312 * the cells will at most be shrunk back to their old size.
3314 for (i = range[0]+1; i < range[1]; i++)
3316 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3317 if (root->cell_f[i] < halfway)
3319 root->cell_f[i] = halfway;
3320 /* Check if the change also causes shifts of the next boundaries */
3321 for (j = i+1; j < range[1]; j++)
3323 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3325 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3329 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3330 if (root->cell_f[i] > halfway)
3332 root->cell_f[i] = halfway;
3333 /* Check if the change also causes shifts of the next boundaries */
3334 for (j = i-1; j >= range[0]+1; j--)
3336 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3338 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3345 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3346 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3347 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3348 * for a and b nrange is used */
3351 /* Take care of the staggering of the cell boundaries */
3354 for (i = range[0]; i < range[1]; i++)
3356 root->cell_f_max0[i] = root->cell_f[i];
3357 root->cell_f_min1[i] = root->cell_f[i+1];
3362 for (i = range[0]+1; i < range[1]; i++)
3364 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3365 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3366 if (bLimLo && bLimHi)
3368 /* Both limits violated, try the best we can */
3369 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3370 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3371 nrange[0] = range[0];
3373 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3376 nrange[1] = range[1];
3377 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3383 /* root->cell_f[i] = root->bound_min[i]; */
3384 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3387 else if (bLimHi && !bLastHi)
3390 if (nrange[1] < range[1]) /* found a LimLo before */
3392 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3393 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3394 nrange[0] = nrange[1];
3396 root->cell_f[i] = root->bound_max[i];
3398 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3400 nrange[1] = range[1];
3403 if (nrange[1] < range[1]) /* found last a LimLo */
3405 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3406 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3407 nrange[0] = nrange[1];
3408 nrange[1] = range[1];
3409 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3411 else if (nrange[0] > range[0]) /* found at least one LimHi */
3413 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3420 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3421 int d, int dim, gmx_domdec_root_t *root,
3422 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3423 gmx_bool bUniform, gmx_int64_t step)
3425 gmx_domdec_comm_t *comm;
3426 int ncd, d1, i, j, pos;
3428 real load_aver, load_i, imbalance, change, change_max, sc;
3429 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3433 int range[] = { 0, 0 };
3437 /* Convert the maximum change from the input percentage to a fraction */
3438 change_limit = comm->dlb_scale_lim*0.01;
3442 bPBC = (dim < ddbox->npbcdim);
3444 cell_size = root->buf_ncd;
3446 /* Store the original boundaries */
3447 for (i = 0; i < ncd+1; i++)
3449 root->old_cell_f[i] = root->cell_f[i];
3453 for (i = 0; i < ncd; i++)
3455 cell_size[i] = 1.0/ncd;
3458 else if (dd_load_count(comm))
3460 load_aver = comm->load[d].sum_m/ncd;
3462 for (i = 0; i < ncd; i++)
3464 /* Determine the relative imbalance of cell i */
3465 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3466 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3467 /* Determine the change of the cell size using underrelaxation */
3468 change = -relax*imbalance;
3469 change_max = max(change_max, max(change, -change));
3471 /* Limit the amount of scaling.
3472 * We need to use the same rescaling for all cells in one row,
3473 * otherwise the load balancing might not converge.
3476 if (change_max > change_limit)
3478 sc *= change_limit/change_max;
3480 for (i = 0; i < ncd; i++)
3482 /* Determine the relative imbalance of cell i */
3483 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3484 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3485 /* Determine the change of the cell size using underrelaxation */
3486 change = -sc*imbalance;
3487 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3491 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3492 cellsize_limit_f *= DD_CELL_MARGIN;
3493 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3494 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3495 if (ddbox->tric_dir[dim])
3497 cellsize_limit_f /= ddbox->skew_fac[dim];
3498 dist_min_f /= ddbox->skew_fac[dim];
3500 if (bDynamicBox && d > 0)
3502 dist_min_f *= DD_PRES_SCALE_MARGIN;
3504 if (d > 0 && !bUniform)
3506 /* Make sure that the grid is not shifted too much */
3507 for (i = 1; i < ncd; i++)
3509 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3511 gmx_incons("Inconsistent DD boundary staggering limits!");
3513 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3514 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3517 root->bound_min[i] += 0.5*space;
3519 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3520 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3523 root->bound_max[i] += 0.5*space;
3528 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3530 root->cell_f_max0[i-1] + dist_min_f,
3531 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3532 root->cell_f_min1[i] - dist_min_f);
3537 root->cell_f[0] = 0;
3538 root->cell_f[ncd] = 1;
3539 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3542 /* After the checks above, the cells should obey the cut-off
3543 * restrictions, but it does not hurt to check.
3545 for (i = 0; i < ncd; i++)
3549 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3550 dim, i, root->cell_f[i], root->cell_f[i+1]);
3553 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3554 root->cell_f[i+1] - root->cell_f[i] <
3555 cellsize_limit_f/DD_CELL_MARGIN)
3559 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3560 gmx_step_str(step, buf), dim2char(dim), i,
3561 (root->cell_f[i+1] - root->cell_f[i])
3562 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3567 /* Store the cell boundaries of the lower dimensions at the end */
3568 for (d1 = 0; d1 < d; d1++)
3570 root->cell_f[pos++] = comm->cell_f0[d1];
3571 root->cell_f[pos++] = comm->cell_f1[d1];
3574 if (d < comm->npmedecompdim)
3576 /* The master determines the maximum shift for
3577 * the coordinate communication between separate PME nodes.
3579 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3581 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3584 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3588 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3589 gmx_ddbox_t *ddbox, int dimind)
3591 gmx_domdec_comm_t *comm;
3596 /* Set the cell dimensions */
3597 dim = dd->dim[dimind];
3598 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3599 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3600 if (dim >= ddbox->nboundeddim)
3602 comm->cell_x0[dim] += ddbox->box0[dim];
3603 comm->cell_x1[dim] += ddbox->box0[dim];
3607 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3608 int d, int dim, real *cell_f_row,
3611 gmx_domdec_comm_t *comm;
3617 /* Each node would only need to know two fractions,
3618 * but it is probably cheaper to broadcast the whole array.
3620 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3621 0, comm->mpi_comm_load[d]);
3623 /* Copy the fractions for this dimension from the buffer */
3624 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3625 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3626 /* The whole array was communicated, so set the buffer position */
3627 pos = dd->nc[dim] + 1;
3628 for (d1 = 0; d1 <= d; d1++)
3632 /* Copy the cell fractions of the lower dimensions */
3633 comm->cell_f0[d1] = cell_f_row[pos++];
3634 comm->cell_f1[d1] = cell_f_row[pos++];
3636 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3638 /* Convert the communicated shift from float to int */
3639 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3642 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3646 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3647 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3648 gmx_bool bUniform, gmx_int64_t step)
3650 gmx_domdec_comm_t *comm;
3652 gmx_bool bRowMember, bRowRoot;
3657 for (d = 0; d < dd->ndim; d++)
3662 for (d1 = d; d1 < dd->ndim; d1++)
3664 if (dd->ci[dd->dim[d1]] > 0)
3677 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3678 ddbox, bDynamicBox, bUniform, step);
3679 cell_f_row = comm->root[d]->cell_f;
3683 cell_f_row = comm->cell_f_row;
3685 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3690 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3694 /* This function assumes the box is static and should therefore
3695 * not be called when the box has changed since the last
3696 * call to dd_partition_system.
3698 for (d = 0; d < dd->ndim; d++)
3700 relative_to_absolute_cell_bounds(dd, ddbox, d);
3706 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3707 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3708 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3709 gmx_wallcycle_t wcycle)
3711 gmx_domdec_comm_t *comm;
3718 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3719 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3720 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3722 else if (bDynamicBox)
3724 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3727 /* Set the dimensions for which no DD is used */
3728 for (dim = 0; dim < DIM; dim++)
3730 if (dd->nc[dim] == 1)
3732 comm->cell_x0[dim] = 0;
3733 comm->cell_x1[dim] = ddbox->box_size[dim];
3734 if (dim >= ddbox->nboundeddim)
3736 comm->cell_x0[dim] += ddbox->box0[dim];
3737 comm->cell_x1[dim] += ddbox->box0[dim];
3743 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3746 gmx_domdec_comm_dim_t *cd;
3748 for (d = 0; d < dd->ndim; d++)
3750 cd = &dd->comm->cd[d];
3751 np = npulse[dd->dim[d]];
3752 if (np > cd->np_nalloc)
3756 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3757 dim2char(dd->dim[d]), np);
3759 if (DDMASTER(dd) && cd->np_nalloc > 0)
3761 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3763 srenew(cd->ind, np);
3764 for (i = cd->np_nalloc; i < np; i++)
3766 cd->ind[i].index = NULL;
3767 cd->ind[i].nalloc = 0;
3776 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3777 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3778 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3779 gmx_wallcycle_t wcycle)
3781 gmx_domdec_comm_t *comm;
3787 /* Copy the old cell boundaries for the cg displacement check */
3788 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3789 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3791 if (comm->bDynLoadBal)
3795 check_box_size(dd, ddbox);
3797 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3801 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbLOCAL, npulse);
3802 realloc_comm_ind(dd, npulse);
3807 for (d = 0; d < DIM; d++)
3809 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3810 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3815 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3817 rvec cell_ns_x0, rvec cell_ns_x1,
3820 gmx_domdec_comm_t *comm;
3825 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3827 dim = dd->dim[dim_ind];
3829 /* Without PBC we don't have restrictions on the outer cells */
3830 if (!(dim >= ddbox->npbcdim &&
3831 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3832 comm->bDynLoadBal &&
3833 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3834 comm->cellsize_min[dim])
3837 gmx_fatal(FARGS, "Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3838 gmx_step_str(step, buf), dim2char(dim),
3839 comm->cell_x1[dim] - comm->cell_x0[dim],
3840 ddbox->skew_fac[dim],
3841 dd->comm->cellsize_min[dim],
3842 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3846 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3848 /* Communicate the boundaries and update cell_ns_x0/1 */
3849 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3850 if (dd->bGridJump && dd->ndim > 1)
3852 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3857 static void make_tric_corr_matrix(int npbcdim, matrix box, matrix tcm)
3861 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3869 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3870 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3879 static void check_screw_box(matrix box)
3881 /* Mathematical limitation */
3882 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3884 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3887 /* Limitation due to the asymmetry of the eighth shell method */
3888 if (box[ZZ][YY] != 0)
3890 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3894 static void distribute_cg(FILE *fplog, gmx_int64_t step,
3895 matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3898 gmx_domdec_master_t *ma;
3899 int **tmp_ind = NULL, *tmp_nalloc = NULL;
3900 int i, icg, j, k, k0, k1, d, npbcdim;
3902 rvec box_size, cg_cm;
3904 real nrcg, inv_ncg, pos_d;
3906 gmx_bool bUnbounded, bScrew;
3910 if (tmp_ind == NULL)
3912 snew(tmp_nalloc, dd->nnodes);
3913 snew(tmp_ind, dd->nnodes);
3914 for (i = 0; i < dd->nnodes; i++)
3916 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3917 snew(tmp_ind[i], tmp_nalloc[i]);
3921 /* Clear the count */
3922 for (i = 0; i < dd->nnodes; i++)
3928 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3930 cgindex = cgs->index;
3932 /* Compute the center of geometry for all charge groups */
3933 for (icg = 0; icg < cgs->nr; icg++)
3936 k1 = cgindex[icg+1];
3940 copy_rvec(pos[k0], cg_cm);
3947 for (k = k0; (k < k1); k++)
3949 rvec_inc(cg_cm, pos[k]);
3951 for (d = 0; (d < DIM); d++)
3953 cg_cm[d] *= inv_ncg;
3956 /* Put the charge group in the box and determine the cell index */
3957 for (d = DIM-1; d >= 0; d--)
3960 if (d < dd->npbcdim)
3962 bScrew = (dd->bScrewPBC && d == XX);
3963 if (tric_dir[d] && dd->nc[d] > 1)
3965 /* Use triclinic coordintates for this dimension */
3966 for (j = d+1; j < DIM; j++)
3968 pos_d += cg_cm[j]*tcm[j][d];
3971 while (pos_d >= box[d][d])
3974 rvec_dec(cg_cm, box[d]);
3977 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3978 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3980 for (k = k0; (k < k1); k++)
3982 rvec_dec(pos[k], box[d]);
3985 pos[k][YY] = box[YY][YY] - pos[k][YY];
3986 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3993 rvec_inc(cg_cm, box[d]);
3996 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3997 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3999 for (k = k0; (k < k1); k++)
4001 rvec_inc(pos[k], box[d]);
4004 pos[k][YY] = box[YY][YY] - pos[k][YY];
4005 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4010 /* This could be done more efficiently */
4012 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
4017 i = dd_index(dd->nc, ind);
4018 if (ma->ncg[i] == tmp_nalloc[i])
4020 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
4021 srenew(tmp_ind[i], tmp_nalloc[i]);
4023 tmp_ind[i][ma->ncg[i]] = icg;
4025 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
4029 for (i = 0; i < dd->nnodes; i++)
4032 for (k = 0; k < ma->ncg[i]; k++)
4034 ma->cg[k1++] = tmp_ind[i][k];
4037 ma->index[dd->nnodes] = k1;
4039 for (i = 0; i < dd->nnodes; i++)
4049 fprintf(fplog, "Charge group distribution at step %s:",
4050 gmx_step_str(step, buf));
4051 for (i = 0; i < dd->nnodes; i++)
4053 fprintf(fplog, " %d", ma->ncg[i]);
4055 fprintf(fplog, "\n");
4059 static void get_cg_distribution(FILE *fplog, gmx_int64_t step, gmx_domdec_t *dd,
4060 t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
4063 gmx_domdec_master_t *ma = NULL;
4066 int *ibuf, buf2[2] = { 0, 0 };
4067 gmx_bool bMaster = DDMASTER(dd);
4075 check_screw_box(box);
4078 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbMASTER, npulse);
4080 distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
4081 for (i = 0; i < dd->nnodes; i++)
4083 ma->ibuf[2*i] = ma->ncg[i];
4084 ma->ibuf[2*i+1] = ma->nat[i];
4092 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
4094 dd->ncg_home = buf2[0];
4095 dd->nat_home = buf2[1];
4096 dd->ncg_tot = dd->ncg_home;
4097 dd->nat_tot = dd->nat_home;
4098 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
4100 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
4101 srenew(dd->index_gl, dd->cg_nalloc);
4102 srenew(dd->cgindex, dd->cg_nalloc+1);
4106 for (i = 0; i < dd->nnodes; i++)
4108 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
4109 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
4114 DDMASTER(dd) ? ma->ibuf : NULL,
4115 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
4116 DDMASTER(dd) ? ma->cg : NULL,
4117 dd->ncg_home*sizeof(int), dd->index_gl);
4119 /* Determine the home charge group sizes */
4121 for (i = 0; i < dd->ncg_home; i++)
4123 cg_gl = dd->index_gl[i];
4125 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
4130 fprintf(debug, "Home charge groups:\n");
4131 for (i = 0; i < dd->ncg_home; i++)
4133 fprintf(debug, " %d", dd->index_gl[i]);
4136 fprintf(debug, "\n");
4139 fprintf(debug, "\n");
4143 static int compact_and_copy_vec_at(int ncg, int *move,
4146 rvec *src, gmx_domdec_comm_t *comm,
4149 int m, icg, i, i0, i1, nrcg;
4155 for (m = 0; m < DIM*2; m++)
4161 for (icg = 0; icg < ncg; icg++)
4163 i1 = cgindex[icg+1];
4169 /* Compact the home array in place */
4170 for (i = i0; i < i1; i++)
4172 copy_rvec(src[i], src[home_pos++]);
4178 /* Copy to the communication buffer */
4180 pos_vec[m] += 1 + vec*nrcg;
4181 for (i = i0; i < i1; i++)
4183 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
4185 pos_vec[m] += (nvec - vec - 1)*nrcg;
4189 home_pos += i1 - i0;
4197 static int compact_and_copy_vec_cg(int ncg, int *move,
4199 int nvec, rvec *src, gmx_domdec_comm_t *comm,
4202 int m, icg, i0, i1, nrcg;
4208 for (m = 0; m < DIM*2; m++)
4214 for (icg = 0; icg < ncg; icg++)
4216 i1 = cgindex[icg+1];
4222 /* Compact the home array in place */
4223 copy_rvec(src[icg], src[home_pos++]);
4229 /* Copy to the communication buffer */
4230 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
4231 pos_vec[m] += 1 + nrcg*nvec;
4243 static int compact_ind(int ncg, int *move,
4244 int *index_gl, int *cgindex,
4246 gmx_ga2la_t ga2la, char *bLocalCG,
4249 int cg, nat, a0, a1, a, a_gl;
4254 for (cg = 0; cg < ncg; cg++)
4260 /* Compact the home arrays in place.
4261 * Anything that can be done here avoids access to global arrays.
4263 cgindex[home_pos] = nat;
4264 for (a = a0; a < a1; a++)
4267 gatindex[nat] = a_gl;
4268 /* The cell number stays 0, so we don't need to set it */
4269 ga2la_change_la(ga2la, a_gl, nat);
4272 index_gl[home_pos] = index_gl[cg];
4273 cginfo[home_pos] = cginfo[cg];
4274 /* The charge group remains local, so bLocalCG does not change */
4279 /* Clear the global indices */
4280 for (a = a0; a < a1; a++)
4282 ga2la_del(ga2la, gatindex[a]);
4286 bLocalCG[index_gl[cg]] = FALSE;
4290 cgindex[home_pos] = nat;
4295 static void clear_and_mark_ind(int ncg, int *move,
4296 int *index_gl, int *cgindex, int *gatindex,
4297 gmx_ga2la_t ga2la, char *bLocalCG,
4302 for (cg = 0; cg < ncg; cg++)
4308 /* Clear the global indices */
4309 for (a = a0; a < a1; a++)
4311 ga2la_del(ga2la, gatindex[a]);
4315 bLocalCG[index_gl[cg]] = FALSE;
4317 /* Signal that this cg has moved using the ns cell index.
4318 * Here we set it to -1. fill_grid will change it
4319 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4321 cell_index[cg] = -1;
4326 static void print_cg_move(FILE *fplog,
4328 gmx_int64_t step, int cg, int dim, int dir,
4329 gmx_bool bHaveLimitdAndCMOld, real limitd,
4330 rvec cm_old, rvec cm_new, real pos_d)
4332 gmx_domdec_comm_t *comm;
4337 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4338 if (bHaveLimitdAndCMOld)
4340 fprintf(fplog, "The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4341 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4345 fprintf(fplog, "The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4346 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4348 fprintf(fplog, "distance out of cell %f\n",
4349 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4350 if (bHaveLimitdAndCMOld)
4352 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4353 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4355 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4356 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4357 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4359 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4360 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4362 comm->cell_x0[dim], comm->cell_x1[dim]);
4365 static void cg_move_error(FILE *fplog,
4367 gmx_int64_t step, int cg, int dim, int dir,
4368 gmx_bool bHaveLimitdAndCMOld, real limitd,
4369 rvec cm_old, rvec cm_new, real pos_d)
4373 print_cg_move(fplog, dd, step, cg, dim, dir,
4374 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4376 print_cg_move(stderr, dd, step, cg, dim, dir,
4377 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4379 "A charge group moved too far between two domain decomposition steps\n"
4380 "This usually means that your system is not well equilibrated");
4383 static void rotate_state_atom(t_state *state, int a)
4387 for (est = 0; est < estNR; est++)
4389 if (EST_DISTR(est) && (state->flags & (1<<est)))
4394 /* Rotate the complete state; for a rectangular box only */
4395 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4396 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4399 state->v[a][YY] = -state->v[a][YY];
4400 state->v[a][ZZ] = -state->v[a][ZZ];
4403 state->sd_X[a][YY] = -state->sd_X[a][YY];
4404 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4407 state->cg_p[a][YY] = -state->cg_p[a][YY];
4408 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4410 case estDISRE_INITF:
4411 case estDISRE_RM3TAV:
4412 case estORIRE_INITF:
4414 /* These are distances, so not affected by rotation */
4417 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4423 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4425 if (natoms > comm->moved_nalloc)
4427 /* Contents should be preserved here */
4428 comm->moved_nalloc = over_alloc_dd(natoms);
4429 srenew(comm->moved, comm->moved_nalloc);
4435 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
4438 ivec tric_dir, matrix tcm,
4439 rvec cell_x0, rvec cell_x1,
4440 rvec limitd, rvec limit0, rvec limit1,
4442 int cg_start, int cg_end,
4447 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4448 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4452 real inv_ncg, pos_d;
4455 npbcdim = dd->npbcdim;
4457 for (cg = cg_start; cg < cg_end; cg++)
4464 copy_rvec(state->x[k0], cm_new);
4471 for (k = k0; (k < k1); k++)
4473 rvec_inc(cm_new, state->x[k]);
4475 for (d = 0; (d < DIM); d++)
4477 cm_new[d] = inv_ncg*cm_new[d];
4482 /* Do pbc and check DD cell boundary crossings */
4483 for (d = DIM-1; d >= 0; d--)
4487 bScrew = (dd->bScrewPBC && d == XX);
4488 /* Determine the location of this cg in lattice coordinates */
4492 for (d2 = d+1; d2 < DIM; d2++)
4494 pos_d += cm_new[d2]*tcm[d2][d];
4497 /* Put the charge group in the triclinic unit-cell */
4498 if (pos_d >= cell_x1[d])
4500 if (pos_d >= limit1[d])
4502 cg_move_error(fplog, dd, step, cg, d, 1, TRUE, limitd[d],
4503 cg_cm[cg], cm_new, pos_d);
4506 if (dd->ci[d] == dd->nc[d] - 1)
4508 rvec_dec(cm_new, state->box[d]);
4511 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4512 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4514 for (k = k0; (k < k1); k++)
4516 rvec_dec(state->x[k], state->box[d]);
4519 rotate_state_atom(state, k);
4524 else if (pos_d < cell_x0[d])
4526 if (pos_d < limit0[d])
4528 cg_move_error(fplog, dd, step, cg, d, -1, TRUE, limitd[d],
4529 cg_cm[cg], cm_new, pos_d);
4534 rvec_inc(cm_new, state->box[d]);
4537 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4538 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4540 for (k = k0; (k < k1); k++)
4542 rvec_inc(state->x[k], state->box[d]);
4545 rotate_state_atom(state, k);
4551 else if (d < npbcdim)
4553 /* Put the charge group in the rectangular unit-cell */
4554 while (cm_new[d] >= state->box[d][d])
4556 rvec_dec(cm_new, state->box[d]);
4557 for (k = k0; (k < k1); k++)
4559 rvec_dec(state->x[k], state->box[d]);
4562 while (cm_new[d] < 0)
4564 rvec_inc(cm_new, state->box[d]);
4565 for (k = k0; (k < k1); k++)
4567 rvec_inc(state->x[k], state->box[d]);
4573 copy_rvec(cm_new, cg_cm[cg]);
4575 /* Determine where this cg should go */
4578 for (d = 0; d < dd->ndim; d++)
4583 flag |= DD_FLAG_FW(d);
4589 else if (dev[dim] == -1)
4591 flag |= DD_FLAG_BW(d);
4594 if (dd->nc[dim] > 2)
4605 /* Temporarily store the flag in move */
4606 move[cg] = mc + flag;
4610 static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
4611 gmx_domdec_t *dd, ivec tric_dir,
4612 t_state *state, rvec **f,
4621 int ncg[DIM*2], nat[DIM*2];
4622 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4623 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4624 int sbuf[2], rbuf[2];
4625 int home_pos_cg, home_pos_at, buf_pos;
4627 gmx_bool bV = FALSE, bSDX = FALSE, bCGP = FALSE;
4630 real inv_ncg, pos_d;
4632 rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
4634 cginfo_mb_t *cginfo_mb;
4635 gmx_domdec_comm_t *comm;
4637 int nthread, thread;
4641 check_screw_box(state->box);
4645 if (fr->cutoff_scheme == ecutsGROUP)
4650 for (i = 0; i < estNR; i++)
4656 case estX: /* Always present */ break;
4657 case estV: bV = (state->flags & (1<<i)); break;
4658 case estSDX: bSDX = (state->flags & (1<<i)); break;
4659 case estCGP: bCGP = (state->flags & (1<<i)); break;
4662 case estDISRE_INITF:
4663 case estDISRE_RM3TAV:
4664 case estORIRE_INITF:
4666 /* No processing required */
4669 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4674 if (dd->ncg_tot > comm->nalloc_int)
4676 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4677 srenew(comm->buf_int, comm->nalloc_int);
4679 move = comm->buf_int;
4681 /* Clear the count */
4682 for (c = 0; c < dd->ndim*2; c++)
4688 npbcdim = dd->npbcdim;
4690 for (d = 0; (d < DIM); d++)
4692 limitd[d] = dd->comm->cellsize_min[d];
4693 if (d >= npbcdim && dd->ci[d] == 0)
4695 cell_x0[d] = -GMX_FLOAT_MAX;
4699 cell_x0[d] = comm->cell_x0[d];
4701 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4703 cell_x1[d] = GMX_FLOAT_MAX;
4707 cell_x1[d] = comm->cell_x1[d];
4711 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4712 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4716 /* We check after communication if a charge group moved
4717 * more than one cell. Set the pre-comm check limit to float_max.
4719 limit0[d] = -GMX_FLOAT_MAX;
4720 limit1[d] = GMX_FLOAT_MAX;
4724 make_tric_corr_matrix(npbcdim, state->box, tcm);
4726 cgindex = dd->cgindex;
4728 nthread = gmx_omp_nthreads_get(emntDomdec);
4730 /* Compute the center of geometry for all home charge groups
4731 * and put them in the box and determine where they should go.
4733 #pragma omp parallel for num_threads(nthread) schedule(static)
4734 for (thread = 0; thread < nthread; thread++)
4736 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4737 cell_x0, cell_x1, limitd, limit0, limit1,
4739 ( thread *dd->ncg_home)/nthread,
4740 ((thread+1)*dd->ncg_home)/nthread,
4741 fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
4745 for (cg = 0; cg < dd->ncg_home; cg++)
4750 flag = mc & ~DD_FLAG_NRCG;
4751 mc = mc & DD_FLAG_NRCG;
4754 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4756 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4757 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4759 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4760 /* We store the cg size in the lower 16 bits
4761 * and the place where the charge group should go
4762 * in the next 6 bits. This saves some communication volume.
4764 nrcg = cgindex[cg+1] - cgindex[cg];
4765 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4771 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4772 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4775 for (i = 0; i < dd->ndim*2; i++)
4777 *ncg_moved += ncg[i];
4794 /* Make sure the communication buffers are large enough */
4795 for (mc = 0; mc < dd->ndim*2; mc++)
4797 nvr = ncg[mc] + nat[mc]*nvec;
4798 if (nvr > comm->cgcm_state_nalloc[mc])
4800 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4801 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4805 switch (fr->cutoff_scheme)
4808 /* Recalculating cg_cm might be cheaper than communicating,
4809 * but that could give rise to rounding issues.
4812 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4813 nvec, cg_cm, comm, bCompact);
4816 /* Without charge groups we send the moved atom coordinates
4817 * over twice. This is so the code below can be used without
4818 * many conditionals for both for with and without charge groups.
4821 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4822 nvec, state->x, comm, FALSE);
4825 home_pos_cg -= *ncg_moved;
4829 gmx_incons("unimplemented");
4835 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4836 nvec, vec++, state->x, comm, bCompact);
4839 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4840 nvec, vec++, state->v, comm, bCompact);
4844 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4845 nvec, vec++, state->sd_X, comm, bCompact);
4849 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4850 nvec, vec++, state->cg_p, comm, bCompact);
4855 compact_ind(dd->ncg_home, move,
4856 dd->index_gl, dd->cgindex, dd->gatindex,
4857 dd->ga2la, comm->bLocalCG,
4862 if (fr->cutoff_scheme == ecutsVERLET)
4864 moved = get_moved(comm, dd->ncg_home);
4866 for (k = 0; k < dd->ncg_home; k++)
4873 moved = fr->ns.grid->cell_index;
4876 clear_and_mark_ind(dd->ncg_home, move,
4877 dd->index_gl, dd->cgindex, dd->gatindex,
4878 dd->ga2la, comm->bLocalCG,
4882 cginfo_mb = fr->cginfo_mb;
4884 *ncg_stay_home = home_pos_cg;
4885 for (d = 0; d < dd->ndim; d++)
4891 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4894 /* Communicate the cg and atom counts */
4899 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4900 d, dir, sbuf[0], sbuf[1]);
4902 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4904 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4906 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4907 srenew(comm->buf_int, comm->nalloc_int);
4910 /* Communicate the charge group indices, sizes and flags */
4911 dd_sendrecv_int(dd, d, dir,
4912 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4913 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4915 nvs = ncg[cdd] + nat[cdd]*nvec;
4916 i = rbuf[0] + rbuf[1] *nvec;
4917 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4919 /* Communicate cgcm and state */
4920 dd_sendrecv_rvec(dd, d, dir,
4921 comm->cgcm_state[cdd], nvs,
4922 comm->vbuf.v+nvr, i);
4923 ncg_recv += rbuf[0];
4924 nat_recv += rbuf[1];
4928 /* Process the received charge groups */
4930 for (cg = 0; cg < ncg_recv; cg++)
4932 flag = comm->buf_int[cg*DD_CGIBS+1];
4934 if (dim >= npbcdim && dd->nc[dim] > 2)
4936 /* No pbc in this dim and more than one domain boundary.
4937 * We do a separate check if a charge group didn't move too far.
4939 if (((flag & DD_FLAG_FW(d)) &&
4940 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4941 ((flag & DD_FLAG_BW(d)) &&
4942 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4944 cg_move_error(fplog, dd, step, cg, dim,
4945 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4947 comm->vbuf.v[buf_pos],
4948 comm->vbuf.v[buf_pos],
4949 comm->vbuf.v[buf_pos][dim]);
4956 /* Check which direction this cg should go */
4957 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4961 /* The cell boundaries for dimension d2 are not equal
4962 * for each cell row of the lower dimension(s),
4963 * therefore we might need to redetermine where
4964 * this cg should go.
4967 /* If this cg crosses the box boundary in dimension d2
4968 * we can use the communicated flag, so we do not
4969 * have to worry about pbc.
4971 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4972 (flag & DD_FLAG_FW(d2))) ||
4973 (dd->ci[dim2] == 0 &&
4974 (flag & DD_FLAG_BW(d2)))))
4976 /* Clear the two flags for this dimension */
4977 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4978 /* Determine the location of this cg
4979 * in lattice coordinates
4981 pos_d = comm->vbuf.v[buf_pos][dim2];
4984 for (d3 = dim2+1; d3 < DIM; d3++)
4987 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
4990 /* Check of we are not at the box edge.
4991 * pbc is only handled in the first step above,
4992 * but this check could move over pbc while
4993 * the first step did not due to different rounding.
4995 if (pos_d >= cell_x1[dim2] &&
4996 dd->ci[dim2] != dd->nc[dim2]-1)
4998 flag |= DD_FLAG_FW(d2);
5000 else if (pos_d < cell_x0[dim2] &&
5003 flag |= DD_FLAG_BW(d2);
5005 comm->buf_int[cg*DD_CGIBS+1] = flag;
5008 /* Set to which neighboring cell this cg should go */
5009 if (flag & DD_FLAG_FW(d2))
5013 else if (flag & DD_FLAG_BW(d2))
5015 if (dd->nc[dd->dim[d2]] > 2)
5027 nrcg = flag & DD_FLAG_NRCG;
5030 if (home_pos_cg+1 > dd->cg_nalloc)
5032 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
5033 srenew(dd->index_gl, dd->cg_nalloc);
5034 srenew(dd->cgindex, dd->cg_nalloc+1);
5036 /* Set the global charge group index and size */
5037 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
5038 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
5039 /* Copy the state from the buffer */
5040 dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
5041 if (fr->cutoff_scheme == ecutsGROUP)
5044 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
5048 /* Set the cginfo */
5049 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
5050 dd->index_gl[home_pos_cg]);
5053 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
5056 if (home_pos_at+nrcg > state->nalloc)
5058 dd_realloc_state(state, f, home_pos_at+nrcg);
5060 for (i = 0; i < nrcg; i++)
5062 copy_rvec(comm->vbuf.v[buf_pos++],
5063 state->x[home_pos_at+i]);
5067 for (i = 0; i < nrcg; i++)
5069 copy_rvec(comm->vbuf.v[buf_pos++],
5070 state->v[home_pos_at+i]);
5075 for (i = 0; i < nrcg; i++)
5077 copy_rvec(comm->vbuf.v[buf_pos++],
5078 state->sd_X[home_pos_at+i]);
5083 for (i = 0; i < nrcg; i++)
5085 copy_rvec(comm->vbuf.v[buf_pos++],
5086 state->cg_p[home_pos_at+i]);
5090 home_pos_at += nrcg;
5094 /* Reallocate the buffers if necessary */
5095 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
5097 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
5098 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
5100 nvr = ncg[mc] + nat[mc]*nvec;
5101 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
5103 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
5104 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
5106 /* Copy from the receive to the send buffers */
5107 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
5108 comm->buf_int + cg*DD_CGIBS,
5109 DD_CGIBS*sizeof(int));
5110 memcpy(comm->cgcm_state[mc][nvr],
5111 comm->vbuf.v[buf_pos],
5112 (1+nrcg*nvec)*sizeof(rvec));
5113 buf_pos += 1 + nrcg*nvec;
5120 /* With sorting (!bCompact) the indices are now only partially up to date
5121 * and ncg_home and nat_home are not the real count, since there are
5122 * "holes" in the arrays for the charge groups that moved to neighbors.
5124 if (fr->cutoff_scheme == ecutsVERLET)
5126 moved = get_moved(comm, home_pos_cg);
5128 for (i = dd->ncg_home; i < home_pos_cg; i++)
5133 dd->ncg_home = home_pos_cg;
5134 dd->nat_home = home_pos_at;
5139 "Finished repartitioning: cgs moved out %d, new home %d\n",
5140 *ncg_moved, dd->ncg_home-*ncg_moved);
5145 void dd_cycles_add(gmx_domdec_t *dd, float cycles, int ddCycl)
5147 dd->comm->cycl[ddCycl] += cycles;
5148 dd->comm->cycl_n[ddCycl]++;
5149 if (cycles > dd->comm->cycl_max[ddCycl])
5151 dd->comm->cycl_max[ddCycl] = cycles;
5155 static double force_flop_count(t_nrnb *nrnb)
5162 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
5164 /* To get closer to the real timings, we half the count
5165 * for the normal loops and again half it for water loops.
5168 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5170 sum += nrnb->n[i]*0.25*cost_nrnb(i);
5174 sum += nrnb->n[i]*0.50*cost_nrnb(i);
5177 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
5180 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5182 sum += nrnb->n[i]*cost_nrnb(i);
5185 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
5187 sum += nrnb->n[i]*cost_nrnb(i);
5193 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
5195 if (dd->comm->eFlop)
5197 dd->comm->flop -= force_flop_count(nrnb);
5200 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
5202 if (dd->comm->eFlop)
5204 dd->comm->flop += force_flop_count(nrnb);
5209 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
5213 for (i = 0; i < ddCyclNr; i++)
5215 dd->comm->cycl[i] = 0;
5216 dd->comm->cycl_n[i] = 0;
5217 dd->comm->cycl_max[i] = 0;
5220 dd->comm->flop_n = 0;
5223 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
5225 gmx_domdec_comm_t *comm;
5226 gmx_domdec_load_t *load;
5227 gmx_domdec_root_t *root = NULL;
5228 int d, dim, cid, i, pos;
5229 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
5234 fprintf(debug, "get_load_distribution start\n");
5237 wallcycle_start(wcycle, ewcDDCOMMLOAD);
5241 bSepPME = (dd->pme_nodeid >= 0);
5243 for (d = dd->ndim-1; d >= 0; d--)
5246 /* Check if we participate in the communication in this dimension */
5247 if (d == dd->ndim-1 ||
5248 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
5250 load = &comm->load[d];
5253 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
5256 if (d == dd->ndim-1)
5258 sbuf[pos++] = dd_force_load(comm);
5259 sbuf[pos++] = sbuf[0];
5262 sbuf[pos++] = sbuf[0];
5263 sbuf[pos++] = cell_frac;
5266 sbuf[pos++] = comm->cell_f_max0[d];
5267 sbuf[pos++] = comm->cell_f_min1[d];
5272 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
5273 sbuf[pos++] = comm->cycl[ddCyclPME];
5278 sbuf[pos++] = comm->load[d+1].sum;
5279 sbuf[pos++] = comm->load[d+1].max;
5282 sbuf[pos++] = comm->load[d+1].sum_m;
5283 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
5284 sbuf[pos++] = comm->load[d+1].flags;
5287 sbuf[pos++] = comm->cell_f_max0[d];
5288 sbuf[pos++] = comm->cell_f_min1[d];
5293 sbuf[pos++] = comm->load[d+1].mdf;
5294 sbuf[pos++] = comm->load[d+1].pme;
5298 /* Communicate a row in DD direction d.
5299 * The communicators are setup such that the root always has rank 0.
5302 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
5303 load->load, load->nload*sizeof(float), MPI_BYTE,
5304 0, comm->mpi_comm_load[d]);
5306 if (dd->ci[dim] == dd->master_ci[dim])
5308 /* We are the root, process this row */
5309 if (comm->bDynLoadBal)
5311 root = comm->root[d];
5321 for (i = 0; i < dd->nc[dim]; i++)
5323 load->sum += load->load[pos++];
5324 load->max = max(load->max, load->load[pos]);
5330 /* This direction could not be load balanced properly,
5331 * therefore we need to use the maximum iso the average load.
5333 load->sum_m = max(load->sum_m, load->load[pos]);
5337 load->sum_m += load->load[pos];
5340 load->cvol_min = min(load->cvol_min, load->load[pos]);
5344 load->flags = (int)(load->load[pos++] + 0.5);
5348 root->cell_f_max0[i] = load->load[pos++];
5349 root->cell_f_min1[i] = load->load[pos++];
5354 load->mdf = max(load->mdf, load->load[pos]);
5356 load->pme = max(load->pme, load->load[pos]);
5360 if (comm->bDynLoadBal && root->bLimited)
5362 load->sum_m *= dd->nc[dim];
5363 load->flags |= (1<<d);
5371 comm->nload += dd_load_count(comm);
5372 comm->load_step += comm->cycl[ddCyclStep];
5373 comm->load_sum += comm->load[0].sum;
5374 comm->load_max += comm->load[0].max;
5375 if (comm->bDynLoadBal)
5377 for (d = 0; d < dd->ndim; d++)
5379 if (comm->load[0].flags & (1<<d))
5381 comm->load_lim[d]++;
5387 comm->load_mdf += comm->load[0].mdf;
5388 comm->load_pme += comm->load[0].pme;
5392 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5396 fprintf(debug, "get_load_distribution finished\n");
5400 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5402 /* Return the relative performance loss on the total run time
5403 * due to the force calculation load imbalance.
5405 if (dd->comm->nload > 0)
5408 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5409 (dd->comm->load_step*dd->nnodes);
5417 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5420 int npp, npme, nnodes, d, limp;
5421 float imbal, pme_f_ratio, lossf, lossp = 0;
5423 gmx_domdec_comm_t *comm;
5426 if (DDMASTER(dd) && comm->nload > 0)
5429 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5430 nnodes = npp + npme;
5431 imbal = comm->load_max*npp/comm->load_sum - 1;
5432 lossf = dd_force_imb_perf_loss(dd);
5433 sprintf(buf, " Average load imbalance: %.1f %%\n", imbal*100);
5434 fprintf(fplog, "%s", buf);
5435 fprintf(stderr, "\n");
5436 fprintf(stderr, "%s", buf);
5437 sprintf(buf, " Part of the total run time spent waiting due to load imbalance: %.1f %%\n", lossf*100);
5438 fprintf(fplog, "%s", buf);
5439 fprintf(stderr, "%s", buf);
5441 if (comm->bDynLoadBal)
5443 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5444 for (d = 0; d < dd->ndim; d++)
5446 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5447 sprintf(buf+strlen(buf), " %c %d %%", dim2char(dd->dim[d]), limp);
5453 sprintf(buf+strlen(buf), "\n");
5454 fprintf(fplog, "%s", buf);
5455 fprintf(stderr, "%s", buf);
5459 pme_f_ratio = comm->load_pme/comm->load_mdf;
5460 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5463 lossp *= (float)npme/(float)nnodes;
5467 lossp *= (float)npp/(float)nnodes;
5469 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pme_f_ratio);
5470 fprintf(fplog, "%s", buf);
5471 fprintf(stderr, "%s", buf);
5472 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossp)*100);
5473 fprintf(fplog, "%s", buf);
5474 fprintf(stderr, "%s", buf);
5476 fprintf(fplog, "\n");
5477 fprintf(stderr, "\n");
5479 if (lossf >= DD_PERF_LOSS)
5482 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5483 " in the domain decomposition.\n", lossf*100);
5484 if (!comm->bDynLoadBal)
5486 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5490 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5492 fprintf(fplog, "%s\n", buf);
5493 fprintf(stderr, "%s\n", buf);
5495 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS)
5498 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5499 " had %s work to do than the PP nodes.\n"
5500 " You might want to %s the number of PME nodes\n"
5501 " or %s the cut-off and the grid spacing.\n",
5503 (lossp < 0) ? "less" : "more",
5504 (lossp < 0) ? "decrease" : "increase",
5505 (lossp < 0) ? "decrease" : "increase");
5506 fprintf(fplog, "%s\n", buf);
5507 fprintf(stderr, "%s\n", buf);
5512 static float dd_vol_min(gmx_domdec_t *dd)
5514 return dd->comm->load[0].cvol_min*dd->nnodes;
5517 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5519 return dd->comm->load[0].flags;
5522 static float dd_f_imbal(gmx_domdec_t *dd)
5524 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5527 float dd_pme_f_ratio(gmx_domdec_t *dd)
5529 if (dd->comm->cycl_n[ddCyclPME] > 0)
5531 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5539 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
5544 flags = dd_load_flags(dd);
5548 "DD load balancing is limited by minimum cell size in dimension");
5549 for (d = 0; d < dd->ndim; d++)
5553 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5556 fprintf(fplog, "\n");
5558 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5559 if (dd->comm->bDynLoadBal)
5561 fprintf(fplog, " vol min/aver %5.3f%c",
5562 dd_vol_min(dd), flags ? '!' : ' ');
5564 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5565 if (dd->comm->cycl_n[ddCyclPME])
5567 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5569 fprintf(fplog, "\n\n");
5572 static void dd_print_load_verbose(gmx_domdec_t *dd)
5574 if (dd->comm->bDynLoadBal)
5576 fprintf(stderr, "vol %4.2f%c ",
5577 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5579 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5580 if (dd->comm->cycl_n[ddCyclPME])
5582 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5587 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5592 gmx_domdec_root_t *root;
5593 gmx_bool bPartOfGroup = FALSE;
5595 dim = dd->dim[dim_ind];
5596 copy_ivec(loc, loc_c);
5597 for (i = 0; i < dd->nc[dim]; i++)
5600 rank = dd_index(dd->nc, loc_c);
5601 if (rank == dd->rank)
5603 /* This process is part of the group */
5604 bPartOfGroup = TRUE;
5607 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5611 dd->comm->mpi_comm_load[dim_ind] = c_row;
5612 if (dd->comm->eDLB != edlbNO)
5614 if (dd->ci[dim] == dd->master_ci[dim])
5616 /* This is the root process of this row */
5617 snew(dd->comm->root[dim_ind], 1);
5618 root = dd->comm->root[dim_ind];
5619 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5620 snew(root->old_cell_f, dd->nc[dim]+1);
5621 snew(root->bCellMin, dd->nc[dim]);
5624 snew(root->cell_f_max0, dd->nc[dim]);
5625 snew(root->cell_f_min1, dd->nc[dim]);
5626 snew(root->bound_min, dd->nc[dim]);
5627 snew(root->bound_max, dd->nc[dim]);
5629 snew(root->buf_ncd, dd->nc[dim]);
5633 /* This is not a root process, we only need to receive cell_f */
5634 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5637 if (dd->ci[dim] == dd->master_ci[dim])
5639 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5645 void dd_setup_dlb_resource_sharing(t_commrec gmx_unused *cr,
5646 const gmx_hw_info_t gmx_unused *hwinfo,
5647 const gmx_hw_opt_t gmx_unused *hw_opt)
5650 int physicalnode_id_hash;
5653 MPI_Comm mpi_comm_pp_physicalnode;
5655 if (!(cr->duty & DUTY_PP) ||
5656 hw_opt->gpu_opt.ncuda_dev_use == 0)
5658 /* Only PP nodes (currently) use GPUs.
5659 * If we don't have GPUs, there are no resources to share.
5664 physicalnode_id_hash = gmx_physicalnode_id_hash();
5666 gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
5672 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5673 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5674 dd->rank, physicalnode_id_hash, gpu_id);
5676 /* Split the PP communicator over the physical nodes */
5677 /* TODO: See if we should store this (before), as it's also used for
5678 * for the nodecomm summution.
5680 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5681 &mpi_comm_pp_physicalnode);
5682 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5683 &dd->comm->mpi_comm_gpu_shared);
5684 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5685 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5689 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5692 /* Note that some ranks could share a GPU, while others don't */
5694 if (dd->comm->nrank_gpu_shared == 1)
5696 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5701 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
5704 int dim0, dim1, i, j;
5709 fprintf(debug, "Making load communicators\n");
5712 snew(dd->comm->load, dd->ndim);
5713 snew(dd->comm->mpi_comm_load, dd->ndim);
5716 make_load_communicator(dd, 0, loc);
5720 for (i = 0; i < dd->nc[dim0]; i++)
5723 make_load_communicator(dd, 1, loc);
5729 for (i = 0; i < dd->nc[dim0]; i++)
5733 for (j = 0; j < dd->nc[dim1]; j++)
5736 make_load_communicator(dd, 2, loc);
5743 fprintf(debug, "Finished making load communicators\n");
5748 void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
5751 int d, dim, i, j, m;
5754 ivec dd_zp[DD_MAXIZONE];
5755 gmx_domdec_zones_t *zones;
5756 gmx_domdec_ns_ranges_t *izone;
5758 for (d = 0; d < dd->ndim; d++)
5761 copy_ivec(dd->ci, tmp);
5762 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5763 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5764 copy_ivec(dd->ci, tmp);
5765 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5766 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5769 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5772 dd->neighbor[d][1]);
5778 fprintf(fplog, "\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5780 dd->nc[XX], dd->nc[YY], dd->nc[ZZ],
5781 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5788 for (i = 0; i < nzonep; i++)
5790 copy_ivec(dd_zp3[i], dd_zp[i]);
5796 for (i = 0; i < nzonep; i++)
5798 copy_ivec(dd_zp2[i], dd_zp[i]);
5804 for (i = 0; i < nzonep; i++)
5806 copy_ivec(dd_zp1[i], dd_zp[i]);
5810 gmx_fatal(FARGS, "Can only do 1, 2 or 3D domain decomposition");
5815 zones = &dd->comm->zones;
5817 for (i = 0; i < nzone; i++)
5820 clear_ivec(zones->shift[i]);
5821 for (d = 0; d < dd->ndim; d++)
5823 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5828 for (i = 0; i < nzone; i++)
5830 for (d = 0; d < DIM; d++)
5832 s[d] = dd->ci[d] - zones->shift[i][d];
5837 else if (s[d] >= dd->nc[d])
5843 zones->nizone = nzonep;
5844 for (i = 0; i < zones->nizone; i++)
5846 if (dd_zp[i][0] != i)
5848 gmx_fatal(FARGS, "Internal inconsistency in the dd grid setup");
5850 izone = &zones->izone[i];
5851 izone->j0 = dd_zp[i][1];
5852 izone->j1 = dd_zp[i][2];
5853 for (dim = 0; dim < DIM; dim++)
5855 if (dd->nc[dim] == 1)
5857 /* All shifts should be allowed */
5858 izone->shift0[dim] = -1;
5859 izone->shift1[dim] = 1;
5864 izone->shift0[d] = 0;
5865 izone->shift1[d] = 0;
5866 for(j=izone->j0; j<izone->j1; j++) {
5867 if (dd->shift[j][d] > dd->shift[i][d])
5868 izone->shift0[d] = -1;
5869 if (dd->shift[j][d] < dd->shift[i][d])
5870 izone->shift1[d] = 1;
5876 /* Assume the shift are not more than 1 cell */
5877 izone->shift0[dim] = 1;
5878 izone->shift1[dim] = -1;
5879 for (j = izone->j0; j < izone->j1; j++)
5881 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5882 if (shift_diff < izone->shift0[dim])
5884 izone->shift0[dim] = shift_diff;
5886 if (shift_diff > izone->shift1[dim])
5888 izone->shift1[dim] = shift_diff;
5895 if (dd->comm->eDLB != edlbNO)
5897 snew(dd->comm->root, dd->ndim);
5900 if (dd->comm->bRecordLoad)
5902 make_load_communicators(dd);
5906 static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reorder)
5909 gmx_domdec_comm_t *comm;
5920 if (comm->bCartesianPP)
5922 /* Set up cartesian communication for the particle-particle part */
5925 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5926 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5929 for (i = 0; i < DIM; i++)
5933 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5935 /* We overwrite the old communicator with the new cartesian one */
5936 cr->mpi_comm_mygroup = comm_cart;
5939 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5940 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5942 if (comm->bCartesianPP_PME)
5944 /* Since we want to use the original cartesian setup for sim,
5945 * and not the one after split, we need to make an index.
5947 snew(comm->ddindex2ddnodeid, dd->nnodes);
5948 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
5949 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5950 /* Get the rank of the DD master,
5951 * above we made sure that the master node is a PP node.
5961 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
5963 else if (comm->bCartesianPP)
5965 if (cr->npmenodes == 0)
5967 /* The PP communicator is also
5968 * the communicator for this simulation
5970 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5972 cr->nodeid = dd->rank;
5974 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
5976 /* We need to make an index to go from the coordinates
5977 * to the nodeid of this simulation.
5979 snew(comm->ddindex2simnodeid, dd->nnodes);
5980 snew(buf, dd->nnodes);
5981 if (cr->duty & DUTY_PP)
5983 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5985 /* Communicate the ddindex to simulation nodeid index */
5986 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
5987 cr->mpi_comm_mysim);
5990 /* Determine the master coordinates and rank.
5991 * The DD master should be the same node as the master of this sim.
5993 for (i = 0; i < dd->nnodes; i++)
5995 if (comm->ddindex2simnodeid[i] == 0)
5997 ddindex2xyz(dd->nc, i, dd->master_ci);
5998 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
6003 fprintf(debug, "The master rank is %d\n", dd->masterrank);
6008 /* No Cartesian communicators */
6009 /* We use the rank in dd->comm->all as DD index */
6010 ddindex2xyz(dd->nc, dd->rank, dd->ci);
6011 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
6013 clear_ivec(dd->master_ci);
6020 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6021 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6026 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6027 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6031 static void receive_ddindex2simnodeid(t_commrec *cr)
6035 gmx_domdec_comm_t *comm;
6042 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
6044 snew(comm->ddindex2simnodeid, dd->nnodes);
6045 snew(buf, dd->nnodes);
6046 if (cr->duty & DUTY_PP)
6048 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
6051 /* Communicate the ddindex to simulation nodeid index */
6052 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6053 cr->mpi_comm_mysim);
6060 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
6061 int ncg, int natoms)
6063 gmx_domdec_master_t *ma;
6068 snew(ma->ncg, dd->nnodes);
6069 snew(ma->index, dd->nnodes+1);
6071 snew(ma->nat, dd->nnodes);
6072 snew(ma->ibuf, dd->nnodes*2);
6073 snew(ma->cell_x, DIM);
6074 for (i = 0; i < DIM; i++)
6076 snew(ma->cell_x[i], dd->nc[i]+1);
6079 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
6085 snew(ma->vbuf, natoms);
6091 static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_node_order,
6092 int gmx_unused reorder)
6095 gmx_domdec_comm_t *comm;
6106 if (comm->bCartesianPP)
6108 for (i = 1; i < DIM; i++)
6110 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
6112 if (bDiv[YY] || bDiv[ZZ])
6114 comm->bCartesianPP_PME = TRUE;
6115 /* If we have 2D PME decomposition, which is always in x+y,
6116 * we stack the PME only nodes in z.
6117 * Otherwise we choose the direction that provides the thinnest slab
6118 * of PME only nodes as this will have the least effect
6119 * on the PP communication.
6120 * But for the PME communication the opposite might be better.
6122 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
6124 dd->nc[YY] > dd->nc[ZZ]))
6126 comm->cartpmedim = ZZ;
6130 comm->cartpmedim = YY;
6132 comm->ntot[comm->cartpmedim]
6133 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
6137 fprintf(fplog, "#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
6139 "Will not use a Cartesian communicator for PP <-> PME\n\n");
6144 if (comm->bCartesianPP_PME)
6148 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
6151 for (i = 0; i < DIM; i++)
6155 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
6158 MPI_Comm_rank(comm_cart, &rank);
6159 if (MASTERNODE(cr) && rank != 0)
6161 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
6164 /* With this assigment we loose the link to the original communicator
6165 * which will usually be MPI_COMM_WORLD, unless have multisim.
6167 cr->mpi_comm_mysim = comm_cart;
6168 cr->sim_nodeid = rank;
6170 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
6174 fprintf(fplog, "Cartesian nodeid %d, coordinates %d %d %d\n\n",
6175 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6178 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
6182 if (cr->npmenodes == 0 ||
6183 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
6185 cr->duty = DUTY_PME;
6188 /* Split the sim communicator into PP and PME only nodes */
6189 MPI_Comm_split(cr->mpi_comm_mysim,
6191 dd_index(comm->ntot, dd->ci),
6192 &cr->mpi_comm_mygroup);
6196 switch (dd_node_order)
6201 fprintf(fplog, "Order of the nodes: PP first, PME last\n");
6204 case ddnoINTERLEAVE:
6205 /* Interleave the PP-only and PME-only nodes,
6206 * as on clusters with dual-core machines this will double
6207 * the communication bandwidth of the PME processes
6208 * and thus speed up the PP <-> PME and inter PME communication.
6212 fprintf(fplog, "Interleaving PP and PME nodes\n");
6214 comm->pmenodes = dd_pmenodes(cr);
6219 gmx_fatal(FARGS, "Unknown dd_node_order=%d", dd_node_order);
6222 if (dd_simnode2pmenode(cr, cr->sim_nodeid) == -1)
6224 cr->duty = DUTY_PME;
6231 /* Split the sim communicator into PP and PME only nodes */
6232 MPI_Comm_split(cr->mpi_comm_mysim,
6235 &cr->mpi_comm_mygroup);
6236 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
6242 fprintf(fplog, "This is a %s only node\n\n",
6243 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
6247 void make_dd_communicators(FILE *fplog, t_commrec *cr, int dd_node_order)
6250 gmx_domdec_comm_t *comm;
6256 copy_ivec(dd->nc, comm->ntot);
6258 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
6259 comm->bCartesianPP_PME = FALSE;
6261 /* Reorder the nodes by default. This might change the MPI ranks.
6262 * Real reordering is only supported on very few architectures,
6263 * Blue Gene is one of them.
6265 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
6267 if (cr->npmenodes > 0)
6269 /* Split the communicator into a PP and PME part */
6270 split_communicator(fplog, cr, dd_node_order, CartReorder);
6271 if (comm->bCartesianPP_PME)
6273 /* We (possibly) reordered the nodes in split_communicator,
6274 * so it is no longer required in make_pp_communicator.
6276 CartReorder = FALSE;
6281 /* All nodes do PP and PME */
6283 /* We do not require separate communicators */
6284 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
6288 if (cr->duty & DUTY_PP)
6290 /* Copy or make a new PP communicator */
6291 make_pp_communicator(fplog, cr, CartReorder);
6295 receive_ddindex2simnodeid(cr);
6298 if (!(cr->duty & DUTY_PME))
6300 /* Set up the commnuication to our PME node */
6301 dd->pme_nodeid = dd_simnode2pmenode(cr, cr->sim_nodeid);
6302 dd->pme_receive_vir_ener = receive_vir_ener(cr);
6305 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
6306 dd->pme_nodeid, dd->pme_receive_vir_ener);
6311 dd->pme_nodeid = -1;
6316 dd->ma = init_gmx_domdec_master_t(dd,
6318 comm->cgs_gl.index[comm->cgs_gl.nr]);
6322 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6324 real *slb_frac, tot;
6329 if (nc > 1 && size_string != NULL)
6333 fprintf(fplog, "Using static load balancing for the %s direction\n",
6338 for (i = 0; i < nc; i++)
6341 sscanf(size_string, "%lf%n", &dbl, &n);
6344 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6353 fprintf(fplog, "Relative cell sizes:");
6355 for (i = 0; i < nc; i++)
6360 fprintf(fplog, " %5.3f", slb_frac[i]);
6365 fprintf(fplog, "\n");
6372 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
6375 gmx_mtop_ilistloop_t iloop;
6379 iloop = gmx_mtop_ilistloop_init(mtop);
6380 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6382 for (ftype = 0; ftype < F_NRE; ftype++)
6384 if ((interaction_function[ftype].flags & IF_BOND) &&
6387 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6395 static int dd_getenv(FILE *fplog, const char *env_var, int def)
6401 val = getenv(env_var);
6404 if (sscanf(val, "%d", &nst) <= 0)
6410 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6418 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6422 fprintf(stderr, "\n%s\n", warn_string);
6426 fprintf(fplog, "\n%s\n", warn_string);
6430 static void check_dd_restrictions(t_commrec *cr, gmx_domdec_t *dd,
6431 t_inputrec *ir, FILE *fplog)
6433 if (ir->ePBC == epbcSCREW &&
6434 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6436 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6439 if (ir->ns_type == ensSIMPLE)
6441 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
6444 if (ir->nstlist == 0)
6446 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6449 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6451 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6455 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6460 r = ddbox->box_size[XX];
6461 for (di = 0; di < dd->ndim; di++)
6464 /* Check using the initial average cell size */
6465 r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6471 static int check_dlb_support(FILE *fplog, t_commrec *cr,
6472 const char *dlb_opt, gmx_bool bRecordLoad,
6473 unsigned long Flags, t_inputrec *ir)
6481 case 'a': eDLB = edlbAUTO; break;
6482 case 'n': eDLB = edlbNO; break;
6483 case 'y': eDLB = edlbYES; break;
6484 default: gmx_incons("Unknown dlb_opt");
6487 if (Flags & MD_RERUN)
6492 if (!EI_DYNAMICS(ir->eI))
6494 if (eDLB == edlbYES)
6496 sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
6497 dd_warning(cr, fplog, buf);
6505 dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6510 if (Flags & MD_REPRODUCIBLE)
6517 dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
6521 dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6524 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", eDLB);
6532 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6537 if (getenv("GMX_DD_ORDER_ZYX") != NULL)
6539 /* Decomposition order z,y,x */
6542 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6544 for (dim = DIM-1; dim >= 0; dim--)
6546 if (dd->nc[dim] > 1)
6548 dd->dim[dd->ndim++] = dim;
6554 /* Decomposition order x,y,z */
6555 for (dim = 0; dim < DIM; dim++)
6557 if (dd->nc[dim] > 1)
6559 dd->dim[dd->ndim++] = dim;
6565 static gmx_domdec_comm_t *init_dd_comm()
6567 gmx_domdec_comm_t *comm;
6571 snew(comm->cggl_flag, DIM*2);
6572 snew(comm->cgcm_state, DIM*2);
6573 for (i = 0; i < DIM*2; i++)
6575 comm->cggl_flag_nalloc[i] = 0;
6576 comm->cgcm_state_nalloc[i] = 0;
6579 comm->nalloc_int = 0;
6580 comm->buf_int = NULL;
6582 vec_rvec_init(&comm->vbuf);
6584 comm->n_load_have = 0;
6585 comm->n_load_collect = 0;
6587 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6589 comm->sum_nat[i] = 0;
6593 comm->load_step = 0;
6596 clear_ivec(comm->load_lim);
6603 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
6604 unsigned long Flags,
6606 real comm_distance_min, real rconstr,
6607 const char *dlb_opt, real dlb_scale,
6608 const char *sizex, const char *sizey, const char *sizez,
6609 gmx_mtop_t *mtop, t_inputrec *ir,
6610 matrix box, rvec *x,
6612 int *npme_x, int *npme_y)
6615 gmx_domdec_comm_t *comm;
6618 real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
6625 "\nInitializing Domain Decomposition on %d nodes\n", cr->nnodes);
6630 dd->comm = init_dd_comm();
6632 snew(comm->cggl_flag, DIM*2);
6633 snew(comm->cgcm_state, DIM*2);
6635 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6636 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6638 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
6639 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
6640 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
6641 recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
6642 comm->nstSortCG = dd_getenv(fplog, "GMX_DD_NST_SORT_CHARGE_GROUPS", 1);
6643 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
6644 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
6645 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
6647 dd->pme_recv_f_alloc = 0;
6648 dd->pme_recv_f_buf = NULL;
6650 if (dd->bSendRecv2 && fplog)
6652 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6658 fprintf(fplog, "Will load balance based on FLOP count\n");
6660 if (comm->eFlop > 1)
6662 srand(1+cr->nodeid);
6664 comm->bRecordLoad = TRUE;
6668 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6672 /* Initialize to GPU share count to 0, might change later */
6673 comm->nrank_gpu_shared = 0;
6675 comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
6677 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6680 fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
6682 dd->bGridJump = comm->bDynLoadBal;
6683 comm->bPMELoadBalDLBLimits = FALSE;
6685 if (comm->nstSortCG)
6689 if (comm->nstSortCG == 1)
6691 fprintf(fplog, "Will sort the charge groups at every domain (re)decomposition\n");
6695 fprintf(fplog, "Will sort the charge groups every %d steps\n",
6699 snew(comm->sort, 1);
6705 fprintf(fplog, "Will not sort the charge groups\n");
6709 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6711 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6712 if (comm->bInterCGBondeds)
6714 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6718 comm->bInterCGMultiBody = FALSE;
6721 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6722 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6724 if (ir->rlistlong == 0)
6726 /* Set the cut-off to some very large value,
6727 * so we don't need if statements everywhere in the code.
6728 * We use sqrt, since the cut-off is squared in some places.
6730 comm->cutoff = GMX_CUTOFF_INF;
6734 comm->cutoff = ir->rlistlong;
6736 comm->cutoff_mbody = 0;
6738 comm->cellsize_limit = 0;
6739 comm->bBondComm = FALSE;
6741 if (comm->bInterCGBondeds)
6743 if (comm_distance_min > 0)
6745 comm->cutoff_mbody = comm_distance_min;
6746 if (Flags & MD_DDBONDCOMM)
6748 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6752 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6754 r_bonded_limit = comm->cutoff_mbody;
6756 else if (ir->bPeriodicMols)
6758 /* Can not easily determine the required cut-off */
6759 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6760 comm->cutoff_mbody = comm->cutoff/2;
6761 r_bonded_limit = comm->cutoff_mbody;
6767 dd_bonded_cg_distance(fplog, mtop, ir, x, box,
6768 Flags & MD_DDBONDCHECK, &r_2b, &r_mb);
6770 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6771 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6773 /* We use an initial margin of 10% for the minimum cell size,
6774 * except when we are just below the non-bonded cut-off.
6776 if (Flags & MD_DDBONDCOMM)
6778 if (max(r_2b, r_mb) > comm->cutoff)
6780 r_bonded = max(r_2b, r_mb);
6781 r_bonded_limit = 1.1*r_bonded;
6782 comm->bBondComm = TRUE;
6787 r_bonded_limit = min(1.1*r_bonded, comm->cutoff);
6789 /* We determine cutoff_mbody later */
6793 /* No special bonded communication,
6794 * simply increase the DD cut-off.
6796 r_bonded_limit = 1.1*max(r_2b, r_mb);
6797 comm->cutoff_mbody = r_bonded_limit;
6798 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6801 comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit);
6805 "Minimum cell size due to bonded interactions: %.3f nm\n",
6806 comm->cellsize_limit);
6810 if (dd->bInterCGcons && rconstr <= 0)
6812 /* There is a cell size limit due to the constraints (P-LINCS) */
6813 rconstr = constr_r_max(fplog, mtop, ir);
6817 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6819 if (rconstr > comm->cellsize_limit)
6821 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6825 else if (rconstr > 0 && fplog)
6827 /* Here we do not check for dd->bInterCGcons,
6828 * because one can also set a cell size limit for virtual sites only
6829 * and at this point we don't know yet if there are intercg v-sites.
6832 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6835 comm->cellsize_limit = max(comm->cellsize_limit, rconstr);
6837 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6841 copy_ivec(nc, dd->nc);
6842 set_dd_dim(fplog, dd);
6843 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, x, ddbox);
6845 if (cr->npmenodes == -1)
6849 acs = average_cellsize_min(dd, ddbox);
6850 if (acs < comm->cellsize_limit)
6854 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6856 gmx_fatal_collective(FARGS, cr, NULL,
6857 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6858 acs, comm->cellsize_limit);
6863 set_ddbox_cr(cr, NULL, ir, box, &comm->cgs_gl, x, ddbox);
6865 /* We need to choose the optimal DD grid and possibly PME nodes */
6866 limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6867 comm->eDLB != edlbNO, dlb_scale,
6868 comm->cellsize_limit, comm->cutoff,
6869 comm->bInterCGBondeds);
6871 if (dd->nc[XX] == 0)
6873 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6874 sprintf(buf, "Change the number of nodes or mdrun option %s%s%s",
6875 !bC ? "-rdd" : "-rcon",
6876 comm->eDLB != edlbNO ? " or -dds" : "",
6877 bC ? " or your LINCS settings" : "");
6879 gmx_fatal_collective(FARGS, cr, NULL,
6880 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6882 "Look in the log file for details on the domain decomposition",
6883 cr->nnodes-cr->npmenodes, limit, buf);
6885 set_dd_dim(fplog, dd);
6891 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6892 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6895 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6896 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6898 gmx_fatal_collective(FARGS, cr, NULL,
6899 "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6900 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6902 if (cr->npmenodes > dd->nnodes)
6904 gmx_fatal_collective(FARGS, cr, NULL,
6905 "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6907 if (cr->npmenodes > 0)
6909 comm->npmenodes = cr->npmenodes;
6913 comm->npmenodes = dd->nnodes;
6916 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
6918 /* The following choices should match those
6919 * in comm_cost_est in domdec_setup.c.
6920 * Note that here the checks have to take into account
6921 * that the decomposition might occur in a different order than xyz
6922 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6923 * in which case they will not match those in comm_cost_est,
6924 * but since that is mainly for testing purposes that's fine.
6926 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6927 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6928 getenv("GMX_PMEONEDD") == NULL)
6930 comm->npmedecompdim = 2;
6931 comm->npmenodes_x = dd->nc[XX];
6932 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6936 /* In case nc is 1 in both x and y we could still choose to
6937 * decompose pme in y instead of x, but we use x for simplicity.
6939 comm->npmedecompdim = 1;
6940 if (dd->dim[0] == YY)
6942 comm->npmenodes_x = 1;
6943 comm->npmenodes_y = comm->npmenodes;
6947 comm->npmenodes_x = comm->npmenodes;
6948 comm->npmenodes_y = 1;
6953 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6954 comm->npmenodes_x, comm->npmenodes_y, 1);
6959 comm->npmedecompdim = 0;
6960 comm->npmenodes_x = 0;
6961 comm->npmenodes_y = 0;
6964 /* Technically we don't need both of these,
6965 * but it simplifies code not having to recalculate it.
6967 *npme_x = comm->npmenodes_x;
6968 *npme_y = comm->npmenodes_y;
6970 snew(comm->slb_frac, DIM);
6971 if (comm->eDLB == edlbNO)
6973 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
6974 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
6975 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], sizez);
6978 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6980 if (comm->bBondComm || comm->eDLB != edlbNO)
6982 /* Set the bonded communication distance to halfway
6983 * the minimum and the maximum,
6984 * since the extra communication cost is nearly zero.
6986 acs = average_cellsize_min(dd, ddbox);
6987 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6988 if (comm->eDLB != edlbNO)
6990 /* Check if this does not limit the scaling */
6991 comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs);
6993 if (!comm->bBondComm)
6995 /* Without bBondComm do not go beyond the n.b. cut-off */
6996 comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff);
6997 if (comm->cellsize_limit >= comm->cutoff)
6999 /* We don't loose a lot of efficieny
7000 * when increasing it to the n.b. cut-off.
7001 * It can even be slightly faster, because we need
7002 * less checks for the communication setup.
7004 comm->cutoff_mbody = comm->cutoff;
7007 /* Check if we did not end up below our original limit */
7008 comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit);
7010 if (comm->cutoff_mbody > comm->cellsize_limit)
7012 comm->cellsize_limit = comm->cutoff_mbody;
7015 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
7020 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
7021 "cellsize limit %f\n",
7022 comm->bBondComm, comm->cellsize_limit);
7027 check_dd_restrictions(cr, dd, ir, fplog);
7030 comm->partition_step = INT_MIN;
7033 clear_dd_cycle_counts(dd);
7038 static void set_dlb_limits(gmx_domdec_t *dd)
7043 for (d = 0; d < dd->ndim; d++)
7045 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
7046 dd->comm->cellsize_min[dd->dim[d]] =
7047 dd->comm->cellsize_min_dlb[dd->dim[d]];
7052 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
7055 gmx_domdec_comm_t *comm;
7065 fprintf(fplog, "At step %s the performance loss due to force load imbalance is %.1f %%\n", gmx_step_str(step, buf), dd_force_imb_perf_loss(dd)*100);
7068 cellsize_min = comm->cellsize_min[dd->dim[0]];
7069 for (d = 1; d < dd->ndim; d++)
7071 cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
7074 if (cellsize_min < comm->cellsize_limit*1.05)
7076 dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
7078 /* Change DLB from "auto" to "no". */
7079 comm->eDLB = edlbNO;
7084 dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
7085 comm->bDynLoadBal = TRUE;
7086 dd->bGridJump = TRUE;
7090 /* We can set the required cell size info here,
7091 * so we do not need to communicate this.
7092 * The grid is completely uniform.
7094 for (d = 0; d < dd->ndim; d++)
7098 comm->load[d].sum_m = comm->load[d].sum;
7100 nc = dd->nc[dd->dim[d]];
7101 for (i = 0; i < nc; i++)
7103 comm->root[d]->cell_f[i] = i/(real)nc;
7106 comm->root[d]->cell_f_max0[i] = i /(real)nc;
7107 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
7110 comm->root[d]->cell_f[nc] = 1.0;
7115 static char *init_bLocalCG(gmx_mtop_t *mtop)
7120 ncg = ncg_mtop(mtop);
7121 snew(bLocalCG, ncg);
7122 for (cg = 0; cg < ncg; cg++)
7124 bLocalCG[cg] = FALSE;
7130 void dd_init_bondeds(FILE *fplog,
7131 gmx_domdec_t *dd, gmx_mtop_t *mtop,
7133 t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
7135 gmx_domdec_comm_t *comm;
7139 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
7143 if (comm->bBondComm)
7145 /* Communicate atoms beyond the cut-off for bonded interactions */
7148 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
7150 comm->bLocalCG = init_bLocalCG(mtop);
7154 /* Only communicate atoms based on cut-off */
7155 comm->cglink = NULL;
7156 comm->bLocalCG = NULL;
7160 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
7162 gmx_bool bDynLoadBal, real dlb_scale,
7165 gmx_domdec_comm_t *comm;
7180 fprintf(fplog, "The maximum number of communication pulses is:");
7181 for (d = 0; d < dd->ndim; d++)
7183 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
7185 fprintf(fplog, "\n");
7186 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
7187 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
7188 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
7189 for (d = 0; d < DIM; d++)
7193 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
7200 comm->cellsize_min_dlb[d]/
7201 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
7203 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
7206 fprintf(fplog, "\n");
7210 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
7211 fprintf(fplog, "The initial number of communication pulses is:");
7212 for (d = 0; d < dd->ndim; d++)
7214 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
7216 fprintf(fplog, "\n");
7217 fprintf(fplog, "The initial domain decomposition cell size is:");
7218 for (d = 0; d < DIM; d++)
7222 fprintf(fplog, " %c %.2f nm",
7223 dim2char(d), dd->comm->cellsize_min[d]);
7226 fprintf(fplog, "\n\n");
7229 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
7231 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
7232 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7233 "non-bonded interactions", "", comm->cutoff);
7237 limit = dd->comm->cellsize_limit;
7241 if (dynamic_dd_box(ddbox, ir))
7243 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
7245 limit = dd->comm->cellsize_min[XX];
7246 for (d = 1; d < DIM; d++)
7248 limit = min(limit, dd->comm->cellsize_min[d]);
7252 if (comm->bInterCGBondeds)
7254 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7255 "two-body bonded interactions", "(-rdd)",
7256 max(comm->cutoff, comm->cutoff_mbody));
7257 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7258 "multi-body bonded interactions", "(-rdd)",
7259 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit));
7263 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7264 "virtual site constructions", "(-rcon)", limit);
7266 if (dd->constraint_comm)
7268 sprintf(buf, "atoms separated by up to %d constraints",
7270 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7271 buf, "(-rcon)", limit);
7273 fprintf(fplog, "\n");
7279 static void set_cell_limits_dlb(gmx_domdec_t *dd,
7281 const t_inputrec *ir,
7282 const gmx_ddbox_t *ddbox)
7284 gmx_domdec_comm_t *comm;
7285 int d, dim, npulse, npulse_d_max, npulse_d;
7290 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7292 /* Determine the maximum number of comm. pulses in one dimension */
7294 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7296 /* Determine the maximum required number of grid pulses */
7297 if (comm->cellsize_limit >= comm->cutoff)
7299 /* Only a single pulse is required */
7302 else if (!bNoCutOff && comm->cellsize_limit > 0)
7304 /* We round down slightly here to avoid overhead due to the latency
7305 * of extra communication calls when the cut-off
7306 * would be only slightly longer than the cell size.
7307 * Later cellsize_limit is redetermined,
7308 * so we can not miss interactions due to this rounding.
7310 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7314 /* There is no cell size limit */
7315 npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7318 if (!bNoCutOff && npulse > 1)
7320 /* See if we can do with less pulses, based on dlb_scale */
7322 for (d = 0; d < dd->ndim; d++)
7325 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7326 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7327 npulse_d_max = max(npulse_d_max, npulse_d);
7329 npulse = min(npulse, npulse_d_max);
7332 /* This env var can override npulse */
7333 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
7340 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7341 for (d = 0; d < dd->ndim; d++)
7343 comm->cd[d].np_dlb = min(npulse, dd->nc[dd->dim[d]]-1);
7344 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7345 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7346 comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb);
7347 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7349 comm->bVacDLBNoLimit = FALSE;
7353 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7354 if (!comm->bVacDLBNoLimit)
7356 comm->cellsize_limit = max(comm->cellsize_limit,
7357 comm->cutoff/comm->maxpulse);
7359 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7360 /* Set the minimum cell size for each DD dimension */
7361 for (d = 0; d < dd->ndim; d++)
7363 if (comm->bVacDLBNoLimit ||
7364 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7366 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7370 comm->cellsize_min_dlb[dd->dim[d]] =
7371 comm->cutoff/comm->cd[d].np_dlb;
7374 if (comm->cutoff_mbody <= 0)
7376 comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit);
7378 if (comm->bDynLoadBal)
7384 gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd, int ePBC)
7386 /* If each molecule is a single charge group
7387 * or we use domain decomposition for each periodic dimension,
7388 * we do not need to take pbc into account for the bonded interactions.
7390 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7393 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7396 void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7397 t_inputrec *ir, gmx_ddbox_t *ddbox)
7399 gmx_domdec_comm_t *comm;
7405 /* Initialize the thread data.
7406 * This can not be done in init_domain_decomposition,
7407 * as the numbers of threads is determined later.
7409 comm->nth = gmx_omp_nthreads_get(emntDomdec);
7412 snew(comm->dth, comm->nth);
7415 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
7417 init_ddpme(dd, &comm->ddpme[0], 0);
7418 if (comm->npmedecompdim >= 2)
7420 init_ddpme(dd, &comm->ddpme[1], 1);
7425 comm->npmenodes = 0;
7426 if (dd->pme_nodeid >= 0)
7428 gmx_fatal_collective(FARGS, NULL, dd,
7429 "Can not have separate PME nodes without PME electrostatics");
7435 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7437 if (comm->eDLB != edlbNO)
7439 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7442 print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
7443 if (comm->eDLB == edlbAUTO)
7447 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7449 print_dd_settings(fplog, dd, ir, TRUE, dlb_scale, ddbox);
7452 if (ir->ePBC == epbcNONE)
7454 vol_frac = 1 - 1/(double)dd->nnodes;
7459 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7463 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7465 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7467 dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
7470 static gmx_bool test_dd_cutoff(t_commrec *cr,
7471 t_state *state, t_inputrec *ir,
7482 set_ddbox(dd, FALSE, cr, ir, state->box,
7483 TRUE, &dd->comm->cgs_gl, state->x, &ddbox);
7487 for (d = 0; d < dd->ndim; d++)
7491 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7492 if (dynamic_dd_box(&ddbox, ir))
7494 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7497 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7499 if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
7500 dd->comm->cd[d].np_dlb > 0)
7502 if (np > dd->comm->cd[d].np_dlb)
7507 /* If a current local cell size is smaller than the requested
7508 * cut-off, we could still fix it, but this gets very complicated.
7509 * Without fixing here, we might actually need more checks.
7511 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7518 if (dd->comm->eDLB != edlbNO)
7520 /* If DLB is not active yet, we don't need to check the grid jumps.
7521 * Actually we shouldn't, because then the grid jump data is not set.
7523 if (dd->comm->bDynLoadBal &&
7524 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7529 gmx_sumi(1, &LocallyLimited, cr);
7531 if (LocallyLimited > 0)
7540 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, t_inputrec *ir,
7543 gmx_bool bCutoffAllowed;
7545 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7549 cr->dd->comm->cutoff = cutoff_req;
7552 return bCutoffAllowed;
7555 void change_dd_dlb_cutoff_limit(t_commrec *cr)
7557 gmx_domdec_comm_t *comm;
7559 comm = cr->dd->comm;
7561 /* Turn on the DLB limiting (might have been on already) */
7562 comm->bPMELoadBalDLBLimits = TRUE;
7564 /* Change the cut-off limit */
7565 comm->PMELoadBal_max_cutoff = comm->cutoff;
7568 static void merge_cg_buffers(int ncell,
7569 gmx_domdec_comm_dim_t *cd, int pulse,
7571 int *index_gl, int *recv_i,
7572 rvec *cg_cm, rvec *recv_vr,
7574 cginfo_mb_t *cginfo_mb, int *cginfo)
7576 gmx_domdec_ind_t *ind, *ind_p;
7577 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7578 int shift, shift_at;
7580 ind = &cd->ind[pulse];
7582 /* First correct the already stored data */
7583 shift = ind->nrecv[ncell];
7584 for (cell = ncell-1; cell >= 0; cell--)
7586 shift -= ind->nrecv[cell];
7589 /* Move the cg's present from previous grid pulses */
7590 cg0 = ncg_cell[ncell+cell];
7591 cg1 = ncg_cell[ncell+cell+1];
7592 cgindex[cg1+shift] = cgindex[cg1];
7593 for (cg = cg1-1; cg >= cg0; cg--)
7595 index_gl[cg+shift] = index_gl[cg];
7596 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7597 cgindex[cg+shift] = cgindex[cg];
7598 cginfo[cg+shift] = cginfo[cg];
7600 /* Correct the already stored send indices for the shift */
7601 for (p = 1; p <= pulse; p++)
7603 ind_p = &cd->ind[p];
7605 for (c = 0; c < cell; c++)
7607 cg0 += ind_p->nsend[c];
7609 cg1 = cg0 + ind_p->nsend[cell];
7610 for (cg = cg0; cg < cg1; cg++)
7612 ind_p->index[cg] += shift;
7618 /* Merge in the communicated buffers */
7622 for (cell = 0; cell < ncell; cell++)
7624 cg1 = ncg_cell[ncell+cell+1] + shift;
7627 /* Correct the old cg indices */
7628 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7630 cgindex[cg+1] += shift_at;
7633 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7635 /* Copy this charge group from the buffer */
7636 index_gl[cg1] = recv_i[cg0];
7637 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7638 /* Add it to the cgindex */
7639 cg_gl = index_gl[cg1];
7640 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7641 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7642 cgindex[cg1+1] = cgindex[cg1] + nat;
7647 shift += ind->nrecv[cell];
7648 ncg_cell[ncell+cell+1] = cg1;
7652 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7653 int nzone, int cg0, const int *cgindex)
7657 /* Store the atom block boundaries for easy copying of communication buffers
7660 for (zone = 0; zone < nzone; zone++)
7662 for (p = 0; p < cd->np; p++)
7664 cd->ind[p].cell2at0[zone] = cgindex[cg];
7665 cg += cd->ind[p].nrecv[zone];
7666 cd->ind[p].cell2at1[zone] = cgindex[cg];
7671 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7677 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7679 if (!bLocalCG[link->a[i]])
7688 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7690 real c[DIM][4]; /* the corners for the non-bonded communication */
7691 real cr0; /* corner for rounding */
7692 real cr1[4]; /* corners for rounding */
7693 real bc[DIM]; /* corners for bounded communication */
7694 real bcr1; /* corner for rounding for bonded communication */
7697 /* Determine the corners of the domain(s) we are communicating with */
7699 set_dd_corners(const gmx_domdec_t *dd,
7700 int dim0, int dim1, int dim2,
7704 const gmx_domdec_comm_t *comm;
7705 const gmx_domdec_zones_t *zones;
7710 zones = &comm->zones;
7712 /* Keep the compiler happy */
7716 /* The first dimension is equal for all cells */
7717 c->c[0][0] = comm->cell_x0[dim0];
7720 c->bc[0] = c->c[0][0];
7725 /* This cell row is only seen from the first row */
7726 c->c[1][0] = comm->cell_x0[dim1];
7727 /* All rows can see this row */
7728 c->c[1][1] = comm->cell_x0[dim1];
7731 c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7734 /* For the multi-body distance we need the maximum */
7735 c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7738 /* Set the upper-right corner for rounding */
7739 c->cr0 = comm->cell_x1[dim0];
7744 for (j = 0; j < 4; j++)
7746 c->c[2][j] = comm->cell_x0[dim2];
7750 /* Use the maximum of the i-cells that see a j-cell */
7751 for (i = 0; i < zones->nizone; i++)
7753 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7759 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7765 /* For the multi-body distance we need the maximum */
7766 c->bc[2] = comm->cell_x0[dim2];
7767 for (i = 0; i < 2; i++)
7769 for (j = 0; j < 2; j++)
7771 c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0);
7777 /* Set the upper-right corner for rounding */
7778 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7779 * Only cell (0,0,0) can see cell 7 (1,1,1)
7781 c->cr1[0] = comm->cell_x1[dim1];
7782 c->cr1[3] = comm->cell_x1[dim1];
7785 c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7788 /* For the multi-body distance we need the maximum */
7789 c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7796 /* Determine which cg's we need to send in this pulse from this zone */
7798 get_zone_pulse_cgs(gmx_domdec_t *dd,
7799 int zonei, int zone,
7801 const int *index_gl,
7803 int dim, int dim_ind,
7804 int dim0, int dim1, int dim2,
7805 real r_comm2, real r_bcomm2,
7809 real skew_fac2_d, real skew_fac_01,
7810 rvec *v_d, rvec *v_0, rvec *v_1,
7811 const dd_corners_t *c,
7813 gmx_bool bDistBonded,
7819 gmx_domdec_ind_t *ind,
7820 int **ibuf, int *ibuf_nalloc,
7826 gmx_domdec_comm_t *comm;
7828 gmx_bool bDistMB_pulse;
7830 real r2, rb2, r, tric_sh;
7833 int nsend_z, nsend, nat;
7837 bScrew = (dd->bScrewPBC && dim == XX);
7839 bDistMB_pulse = (bDistMB && bDistBonded);
7845 for (cg = cg0; cg < cg1; cg++)
7849 if (tric_dist[dim_ind] == 0)
7851 /* Rectangular direction, easy */
7852 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7859 r = cg_cm[cg][dim] - c->bc[dim_ind];
7865 /* Rounding gives at most a 16% reduction
7866 * in communicated atoms
7868 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7870 r = cg_cm[cg][dim0] - c->cr0;
7871 /* This is the first dimension, so always r >= 0 */
7878 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7880 r = cg_cm[cg][dim1] - c->cr1[zone];
7887 r = cg_cm[cg][dim1] - c->bcr1;
7897 /* Triclinic direction, more complicated */
7900 /* Rounding, conservative as the skew_fac multiplication
7901 * will slightly underestimate the distance.
7903 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7905 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7906 for (i = dim0+1; i < DIM; i++)
7908 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7910 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7913 rb[dim0] = rn[dim0];
7916 /* Take care that the cell planes along dim0 might not
7917 * be orthogonal to those along dim1 and dim2.
7919 for (i = 1; i <= dim_ind; i++)
7922 if (normal[dim0][dimd] > 0)
7924 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7927 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7932 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7934 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7936 for (i = dim1+1; i < DIM; i++)
7938 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7940 rn[dim1] += tric_sh;
7943 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7944 /* Take care of coupling of the distances
7945 * to the planes along dim0 and dim1 through dim2.
7947 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7948 /* Take care that the cell planes along dim1
7949 * might not be orthogonal to that along dim2.
7951 if (normal[dim1][dim2] > 0)
7953 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7959 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7962 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7963 /* Take care of coupling of the distances
7964 * to the planes along dim0 and dim1 through dim2.
7966 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7967 /* Take care that the cell planes along dim1
7968 * might not be orthogonal to that along dim2.
7970 if (normal[dim1][dim2] > 0)
7972 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7977 /* The distance along the communication direction */
7978 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
7980 for (i = dim+1; i < DIM; i++)
7982 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7987 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7988 /* Take care of coupling of the distances
7989 * to the planes along dim0 and dim1 through dim2.
7991 if (dim_ind == 1 && zonei == 1)
7993 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
7999 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
8002 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
8003 /* Take care of coupling of the distances
8004 * to the planes along dim0 and dim1 through dim2.
8006 if (dim_ind == 1 && zonei == 1)
8008 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
8016 ((bDistMB && rb2 < r_bcomm2) ||
8017 (bDist2B && r2 < r_bcomm2)) &&
8019 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
8020 missing_link(comm->cglink, index_gl[cg],
8023 /* Make an index to the local charge groups */
8024 if (nsend+1 > ind->nalloc)
8026 ind->nalloc = over_alloc_large(nsend+1);
8027 srenew(ind->index, ind->nalloc);
8029 if (nsend+1 > *ibuf_nalloc)
8031 *ibuf_nalloc = over_alloc_large(nsend+1);
8032 srenew(*ibuf, *ibuf_nalloc);
8034 ind->index[nsend] = cg;
8035 (*ibuf)[nsend] = index_gl[cg];
8037 vec_rvec_check_alloc(vbuf, nsend+1);
8039 if (dd->ci[dim] == 0)
8041 /* Correct cg_cm for pbc */
8042 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
8045 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
8046 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
8051 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
8054 nat += cgindex[cg+1] - cgindex[cg];
8060 *nsend_z_ptr = nsend_z;
8063 static void setup_dd_communication(gmx_domdec_t *dd,
8064 matrix box, gmx_ddbox_t *ddbox,
8065 t_forcerec *fr, t_state *state, rvec **f)
8067 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
8068 int nzone, nzone_send, zone, zonei, cg0, cg1;
8069 int c, i, j, cg, cg_gl, nrcg;
8070 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
8071 gmx_domdec_comm_t *comm;
8072 gmx_domdec_zones_t *zones;
8073 gmx_domdec_comm_dim_t *cd;
8074 gmx_domdec_ind_t *ind;
8075 cginfo_mb_t *cginfo_mb;
8076 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
8077 real r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
8078 dd_corners_t corners;
8080 rvec *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
8081 real skew_fac2_d, skew_fac_01;
8088 fprintf(debug, "Setting up DD communication\n");
8093 switch (fr->cutoff_scheme)
8102 gmx_incons("unimplemented");
8106 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8108 dim = dd->dim[dim_ind];
8110 /* Check if we need to use triclinic distances */
8111 tric_dist[dim_ind] = 0;
8112 for (i = 0; i <= dim_ind; i++)
8114 if (ddbox->tric_dir[dd->dim[i]])
8116 tric_dist[dim_ind] = 1;
8121 bBondComm = comm->bBondComm;
8123 /* Do we need to determine extra distances for multi-body bondeds? */
8124 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8126 /* Do we need to determine extra distances for only two-body bondeds? */
8127 bDist2B = (bBondComm && !bDistMB);
8129 r_comm2 = sqr(comm->cutoff);
8130 r_bcomm2 = sqr(comm->cutoff_mbody);
8134 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, sqrt(r_bcomm2));
8137 zones = &comm->zones;
8140 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8141 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8143 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8145 /* Triclinic stuff */
8146 normal = ddbox->normal;
8150 v_0 = ddbox->v[dim0];
8151 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8153 /* Determine the coupling coefficient for the distances
8154 * to the cell planes along dim0 and dim1 through dim2.
8155 * This is required for correct rounding.
8158 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8161 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8167 v_1 = ddbox->v[dim1];
8170 zone_cg_range = zones->cg_range;
8171 index_gl = dd->index_gl;
8172 cgindex = dd->cgindex;
8173 cginfo_mb = fr->cginfo_mb;
8175 zone_cg_range[0] = 0;
8176 zone_cg_range[1] = dd->ncg_home;
8177 comm->zone_ncg1[0] = dd->ncg_home;
8178 pos_cg = dd->ncg_home;
8180 nat_tot = dd->nat_home;
8182 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8184 dim = dd->dim[dim_ind];
8185 cd = &comm->cd[dim_ind];
8187 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8189 /* No pbc in this dimension, the first node should not comm. */
8197 v_d = ddbox->v[dim];
8198 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
8200 cd->bInPlace = TRUE;
8201 for (p = 0; p < cd->np; p++)
8203 /* Only atoms communicated in the first pulse are used
8204 * for multi-body bonded interactions or for bBondComm.
8206 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8211 for (zone = 0; zone < nzone_send; zone++)
8213 if (tric_dist[dim_ind] && dim_ind > 0)
8215 /* Determine slightly more optimized skew_fac's
8217 * This reduces the number of communicated atoms
8218 * by about 10% for 3D DD of rhombic dodecahedra.
8220 for (dimd = 0; dimd < dim; dimd++)
8222 sf2_round[dimd] = 1;
8223 if (ddbox->tric_dir[dimd])
8225 for (i = dd->dim[dimd]+1; i < DIM; i++)
8227 /* If we are shifted in dimension i
8228 * and the cell plane is tilted forward
8229 * in dimension i, skip this coupling.
8231 if (!(zones->shift[nzone+zone][i] &&
8232 ddbox->v[dimd][i][dimd] >= 0))
8235 sqr(ddbox->v[dimd][i][dimd]);
8238 sf2_round[dimd] = 1/sf2_round[dimd];
8243 zonei = zone_perm[dim_ind][zone];
8246 /* Here we permutate the zones to obtain a convenient order
8247 * for neighbor searching
8249 cg0 = zone_cg_range[zonei];
8250 cg1 = zone_cg_range[zonei+1];
8254 /* Look only at the cg's received in the previous grid pulse
8256 cg1 = zone_cg_range[nzone+zone+1];
8257 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8260 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8261 for (th = 0; th < comm->nth; th++)
8263 gmx_domdec_ind_t *ind_p;
8264 int **ibuf_p, *ibuf_nalloc_p;
8266 int *nsend_p, *nat_p;
8272 /* Thread 0 writes in the comm buffers */
8274 ibuf_p = &comm->buf_int;
8275 ibuf_nalloc_p = &comm->nalloc_int;
8276 vbuf_p = &comm->vbuf;
8279 nsend_zone_p = &ind->nsend[zone];
8283 /* Other threads write into temp buffers */
8284 ind_p = &comm->dth[th].ind;
8285 ibuf_p = &comm->dth[th].ibuf;
8286 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8287 vbuf_p = &comm->dth[th].vbuf;
8288 nsend_p = &comm->dth[th].nsend;
8289 nat_p = &comm->dth[th].nat;
8290 nsend_zone_p = &comm->dth[th].nsend_zone;
8292 comm->dth[th].nsend = 0;
8293 comm->dth[th].nat = 0;
8294 comm->dth[th].nsend_zone = 0;
8304 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8305 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8308 /* Get the cg's for this pulse in this zone */
8309 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8311 dim, dim_ind, dim0, dim1, dim2,
8314 normal, skew_fac2_d, skew_fac_01,
8315 v_d, v_0, v_1, &corners, sf2_round,
8316 bDistBonded, bBondComm,
8320 ibuf_p, ibuf_nalloc_p,
8326 /* Append data of threads>=1 to the communication buffers */
8327 for (th = 1; th < comm->nth; th++)
8329 dd_comm_setup_work_t *dth;
8332 dth = &comm->dth[th];
8334 ns1 = nsend + dth->nsend_zone;
8335 if (ns1 > ind->nalloc)
8337 ind->nalloc = over_alloc_dd(ns1);
8338 srenew(ind->index, ind->nalloc);
8340 if (ns1 > comm->nalloc_int)
8342 comm->nalloc_int = over_alloc_dd(ns1);
8343 srenew(comm->buf_int, comm->nalloc_int);
8345 if (ns1 > comm->vbuf.nalloc)
8347 comm->vbuf.nalloc = over_alloc_dd(ns1);
8348 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8351 for (i = 0; i < dth->nsend_zone; i++)
8353 ind->index[nsend] = dth->ind.index[i];
8354 comm->buf_int[nsend] = dth->ibuf[i];
8355 copy_rvec(dth->vbuf.v[i],
8356 comm->vbuf.v[nsend]);
8360 ind->nsend[zone] += dth->nsend_zone;
8363 /* Clear the counts in case we do not have pbc */
8364 for (zone = nzone_send; zone < nzone; zone++)
8366 ind->nsend[zone] = 0;
8368 ind->nsend[nzone] = nsend;
8369 ind->nsend[nzone+1] = nat;
8370 /* Communicate the number of cg's and atoms to receive */
8371 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8372 ind->nsend, nzone+2,
8373 ind->nrecv, nzone+2);
8375 /* The rvec buffer is also required for atom buffers of size nsend
8376 * in dd_move_x and dd_move_f.
8378 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8382 /* We can receive in place if only the last zone is not empty */
8383 for (zone = 0; zone < nzone-1; zone++)
8385 if (ind->nrecv[zone] > 0)
8387 cd->bInPlace = FALSE;
8392 /* The int buffer is only required here for the cg indices */
8393 if (ind->nrecv[nzone] > comm->nalloc_int2)
8395 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8396 srenew(comm->buf_int2, comm->nalloc_int2);
8398 /* The rvec buffer is also required for atom buffers
8399 * of size nrecv in dd_move_x and dd_move_f.
8401 i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8402 vec_rvec_check_alloc(&comm->vbuf2, i);
8406 /* Make space for the global cg indices */
8407 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8408 || dd->cg_nalloc == 0)
8410 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8411 srenew(index_gl, dd->cg_nalloc);
8412 srenew(cgindex, dd->cg_nalloc+1);
8414 /* Communicate the global cg indices */
8417 recv_i = index_gl + pos_cg;
8421 recv_i = comm->buf_int2;
8423 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8424 comm->buf_int, nsend,
8425 recv_i, ind->nrecv[nzone]);
8427 /* Make space for cg_cm */
8428 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8429 if (fr->cutoff_scheme == ecutsGROUP)
8437 /* Communicate cg_cm */
8440 recv_vr = cg_cm + pos_cg;
8444 recv_vr = comm->vbuf2.v;
8446 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8447 comm->vbuf.v, nsend,
8448 recv_vr, ind->nrecv[nzone]);
8450 /* Make the charge group index */
8453 zone = (p == 0 ? 0 : nzone - 1);
8454 while (zone < nzone)
8456 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8458 cg_gl = index_gl[pos_cg];
8459 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8460 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8461 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8464 /* Update the charge group presence,
8465 * so we can use it in the next pass of the loop.
8467 comm->bLocalCG[cg_gl] = TRUE;
8473 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8476 zone_cg_range[nzone+zone] = pos_cg;
8481 /* This part of the code is never executed with bBondComm. */
8482 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8483 index_gl, recv_i, cg_cm, recv_vr,
8484 cgindex, fr->cginfo_mb, fr->cginfo);
8485 pos_cg += ind->nrecv[nzone];
8487 nat_tot += ind->nrecv[nzone+1];
8491 /* Store the atom block for easy copying of communication buffers */
8492 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8496 dd->index_gl = index_gl;
8497 dd->cgindex = cgindex;
8499 dd->ncg_tot = zone_cg_range[zones->n];
8500 dd->nat_tot = nat_tot;
8501 comm->nat[ddnatHOME] = dd->nat_home;
8502 for (i = ddnatZONE; i < ddnatNR; i++)
8504 comm->nat[i] = dd->nat_tot;
8509 /* We don't need to update cginfo, since that was alrady done above.
8510 * So we pass NULL for the forcerec.
8512 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8513 NULL, comm->bLocalCG);
8518 fprintf(debug, "Finished setting up DD communication, zones:");
8519 for (c = 0; c < zones->n; c++)
8521 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8523 fprintf(debug, "\n");
8527 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8531 for (c = 0; c < zones->nizone; c++)
8533 zones->izone[c].cg1 = zones->cg_range[c+1];
8534 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8535 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8539 static void set_zones_size(gmx_domdec_t *dd,
8540 matrix box, const gmx_ddbox_t *ddbox,
8541 int zone_start, int zone_end)
8543 gmx_domdec_comm_t *comm;
8544 gmx_domdec_zones_t *zones;
8546 int z, zi, zj0, zj1, d, dim;
8549 real size_j, add_tric;
8554 zones = &comm->zones;
8556 /* Do we need to determine extra distances for multi-body bondeds? */
8557 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8559 for (z = zone_start; z < zone_end; z++)
8561 /* Copy cell limits to zone limits.
8562 * Valid for non-DD dims and non-shifted dims.
8564 copy_rvec(comm->cell_x0, zones->size[z].x0);
8565 copy_rvec(comm->cell_x1, zones->size[z].x1);
8568 for (d = 0; d < dd->ndim; d++)
8572 for (z = 0; z < zones->n; z++)
8574 /* With a staggered grid we have different sizes
8575 * for non-shifted dimensions.
8577 if (dd->bGridJump && zones->shift[z][dim] == 0)
8581 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8582 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8586 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8587 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8593 rcmbs = comm->cutoff_mbody;
8594 if (ddbox->tric_dir[dim])
8596 rcs /= ddbox->skew_fac[dim];
8597 rcmbs /= ddbox->skew_fac[dim];
8600 /* Set the lower limit for the shifted zone dimensions */
8601 for (z = zone_start; z < zone_end; z++)
8603 if (zones->shift[z][dim] > 0)
8606 if (!dd->bGridJump || d == 0)
8608 zones->size[z].x0[dim] = comm->cell_x1[dim];
8609 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8613 /* Here we take the lower limit of the zone from
8614 * the lowest domain of the zone below.
8618 zones->size[z].x0[dim] =
8619 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8625 zones->size[z].x0[dim] =
8626 zones->size[zone_perm[2][z-4]].x0[dim];
8630 zones->size[z].x0[dim] =
8631 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8634 /* A temporary limit, is updated below */
8635 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8639 for (zi = 0; zi < zones->nizone; zi++)
8641 if (zones->shift[zi][dim] == 0)
8643 /* This takes the whole zone into account.
8644 * With multiple pulses this will lead
8645 * to a larger zone then strictly necessary.
8647 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8648 zones->size[zi].x1[dim]+rcmbs);
8656 /* Loop over the i-zones to set the upper limit of each
8659 for (zi = 0; zi < zones->nizone; zi++)
8661 if (zones->shift[zi][dim] == 0)
8663 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8665 if (zones->shift[z][dim] > 0)
8667 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8668 zones->size[zi].x1[dim]+rcs);
8675 for (z = zone_start; z < zone_end; z++)
8677 /* Initialization only required to keep the compiler happy */
8678 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8681 /* To determine the bounding box for a zone we need to find
8682 * the extreme corners of 4, 2 or 1 corners.
8684 nc = 1 << (ddbox->npbcdim - 1);
8686 for (c = 0; c < nc; c++)
8688 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8692 corner[YY] = zones->size[z].x0[YY];
8696 corner[YY] = zones->size[z].x1[YY];
8700 corner[ZZ] = zones->size[z].x0[ZZ];
8704 corner[ZZ] = zones->size[z].x1[ZZ];
8706 if (dd->ndim == 1 && box[ZZ][YY] != 0)
8708 /* With 1D domain decomposition the cg's are not in
8709 * the triclinic box, but triclinic x-y and rectangular y-z.
8710 * Shift y back, so it will later end up at 0.
8712 corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
8714 /* Apply the triclinic couplings */
8715 assert(ddbox->npbcdim <= DIM);
8716 for (i = YY; i < ddbox->npbcdim; i++)
8718 for (j = XX; j < i; j++)
8720 corner[j] += corner[i]*box[i][j]/box[i][i];
8725 copy_rvec(corner, corner_min);
8726 copy_rvec(corner, corner_max);
8730 for (i = 0; i < DIM; i++)
8732 corner_min[i] = min(corner_min[i], corner[i]);
8733 corner_max[i] = max(corner_max[i], corner[i]);
8737 /* Copy the extreme cornes without offset along x */
8738 for (i = 0; i < DIM; i++)
8740 zones->size[z].bb_x0[i] = corner_min[i];
8741 zones->size[z].bb_x1[i] = corner_max[i];
8743 /* Add the offset along x */
8744 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8745 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8748 if (zone_start == 0)
8751 for (dim = 0; dim < DIM; dim++)
8753 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8755 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
8760 for (z = zone_start; z < zone_end; z++)
8762 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8764 zones->size[z].x0[XX], zones->size[z].x1[XX],
8765 zones->size[z].x0[YY], zones->size[z].x1[YY],
8766 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8767 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8769 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8770 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8771 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8776 static int comp_cgsort(const void *a, const void *b)
8780 gmx_cgsort_t *cga, *cgb;
8781 cga = (gmx_cgsort_t *)a;
8782 cgb = (gmx_cgsort_t *)b;
8784 comp = cga->nsc - cgb->nsc;
8787 comp = cga->ind_gl - cgb->ind_gl;
8793 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8798 /* Order the data */
8799 for (i = 0; i < n; i++)
8801 buf[i] = a[sort[i].ind];
8804 /* Copy back to the original array */
8805 for (i = 0; i < n; i++)
8811 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8816 /* Order the data */
8817 for (i = 0; i < n; i++)
8819 copy_rvec(v[sort[i].ind], buf[i]);
8822 /* Copy back to the original array */
8823 for (i = 0; i < n; i++)
8825 copy_rvec(buf[i], v[i]);
8829 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8832 int a, atot, cg, cg0, cg1, i;
8834 if (cgindex == NULL)
8836 /* Avoid the useless loop of the atoms within a cg */
8837 order_vec_cg(ncg, sort, v, buf);
8842 /* Order the data */
8844 for (cg = 0; cg < ncg; cg++)
8846 cg0 = cgindex[sort[cg].ind];
8847 cg1 = cgindex[sort[cg].ind+1];
8848 for (i = cg0; i < cg1; i++)
8850 copy_rvec(v[i], buf[a]);
8856 /* Copy back to the original array */
8857 for (a = 0; a < atot; a++)
8859 copy_rvec(buf[a], v[a]);
8863 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8864 int nsort_new, gmx_cgsort_t *sort_new,
8865 gmx_cgsort_t *sort1)
8869 /* The new indices are not very ordered, so we qsort them */
8870 gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8872 /* sort2 is already ordered, so now we can merge the two arrays */
8876 while (i2 < nsort2 || i_new < nsort_new)
8880 sort1[i1++] = sort_new[i_new++];
8882 else if (i_new == nsort_new)
8884 sort1[i1++] = sort2[i2++];
8886 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8887 (sort2[i2].nsc == sort_new[i_new].nsc &&
8888 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8890 sort1[i1++] = sort2[i2++];
8894 sort1[i1++] = sort_new[i_new++];
8899 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8901 gmx_domdec_sort_t *sort;
8902 gmx_cgsort_t *cgsort, *sort_i;
8903 int ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
8904 int sort_last, sort_skip;
8906 sort = dd->comm->sort;
8908 a = fr->ns.grid->cell_index;
8910 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns.grid->ncells;
8912 if (ncg_home_old >= 0)
8914 /* The charge groups that remained in the same ns grid cell
8915 * are completely ordered. So we can sort efficiently by sorting
8916 * the charge groups that did move into the stationary list.
8921 for (i = 0; i < dd->ncg_home; i++)
8923 /* Check if this cg did not move to another node */
8926 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8928 /* This cg is new on this node or moved ns grid cell */
8929 if (nsort_new >= sort->sort_new_nalloc)
8931 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8932 srenew(sort->sort_new, sort->sort_new_nalloc);
8934 sort_i = &(sort->sort_new[nsort_new++]);
8938 /* This cg did not move */
8939 sort_i = &(sort->sort2[nsort2++]);
8941 /* Sort on the ns grid cell indices
8942 * and the global topology index.
8943 * index_gl is irrelevant with cell ns,
8944 * but we set it here anyhow to avoid a conditional.
8947 sort_i->ind_gl = dd->index_gl[i];
8954 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8957 /* Sort efficiently */
8958 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8963 cgsort = sort->sort;
8965 for (i = 0; i < dd->ncg_home; i++)
8967 /* Sort on the ns grid cell indices
8968 * and the global topology index
8970 cgsort[i].nsc = a[i];
8971 cgsort[i].ind_gl = dd->index_gl[i];
8973 if (cgsort[i].nsc < moved)
8980 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
8982 /* Determine the order of the charge groups using qsort */
8983 gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
8989 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
8992 int ncg_new, i, *a, na;
8994 sort = dd->comm->sort->sort;
8996 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
8999 for (i = 0; i < na; i++)
9003 sort[ncg_new].ind = a[i];
9011 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
9014 gmx_domdec_sort_t *sort;
9015 gmx_cgsort_t *cgsort, *sort_i;
9017 int ncg_new, i, *ibuf, cgsize;
9020 sort = dd->comm->sort;
9022 if (dd->ncg_home > sort->sort_nalloc)
9024 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
9025 srenew(sort->sort, sort->sort_nalloc);
9026 srenew(sort->sort2, sort->sort_nalloc);
9028 cgsort = sort->sort;
9030 switch (fr->cutoff_scheme)
9033 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
9036 ncg_new = dd_sort_order_nbnxn(dd, fr);
9039 gmx_incons("unimplemented");
9043 /* We alloc with the old size, since cgindex is still old */
9044 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9045 vbuf = dd->comm->vbuf.v;
9049 cgindex = dd->cgindex;
9056 /* Remove the charge groups which are no longer at home here */
9057 dd->ncg_home = ncg_new;
9060 fprintf(debug, "Set the new home charge group count to %d\n",
9064 /* Reorder the state */
9065 for (i = 0; i < estNR; i++)
9067 if (EST_DISTR(i) && (state->flags & (1<<i)))
9072 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
9075 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
9078 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->sd_X, vbuf);
9081 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
9085 case estDISRE_INITF:
9086 case estDISRE_RM3TAV:
9087 case estORIRE_INITF:
9089 /* No ordering required */
9092 gmx_incons("Unknown state entry encountered in dd_sort_state");
9097 if (fr->cutoff_scheme == ecutsGROUP)
9100 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9103 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9105 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9106 srenew(sort->ibuf, sort->ibuf_nalloc);
9109 /* Reorder the global cg index */
9110 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9111 /* Reorder the cginfo */
9112 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9113 /* Rebuild the local cg index */
9117 for (i = 0; i < dd->ncg_home; i++)
9119 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9120 ibuf[i+1] = ibuf[i] + cgsize;
9122 for (i = 0; i < dd->ncg_home+1; i++)
9124 dd->cgindex[i] = ibuf[i];
9129 for (i = 0; i < dd->ncg_home+1; i++)
9134 /* Set the home atom number */
9135 dd->nat_home = dd->cgindex[dd->ncg_home];
9137 if (fr->cutoff_scheme == ecutsVERLET)
9139 /* The atoms are now exactly in grid order, update the grid order */
9140 nbnxn_set_atomorder(fr->nbv->nbs);
9144 /* Copy the sorted ns cell indices back to the ns grid struct */
9145 for (i = 0; i < dd->ncg_home; i++)
9147 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
9149 fr->ns.grid->nr = dd->ncg_home;
9153 static void add_dd_statistics(gmx_domdec_t *dd)
9155 gmx_domdec_comm_t *comm;
9160 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9162 comm->sum_nat[ddnat-ddnatZONE] +=
9163 comm->nat[ddnat] - comm->nat[ddnat-1];
9168 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9170 gmx_domdec_comm_t *comm;
9175 /* Reset all the statistics and counters for total run counting */
9176 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9178 comm->sum_nat[ddnat-ddnatZONE] = 0;
9182 comm->load_step = 0;
9185 clear_ivec(comm->load_lim);
9190 void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
9192 gmx_domdec_comm_t *comm;
9196 comm = cr->dd->comm;
9198 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9205 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9207 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9209 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9214 " av. #atoms communicated per step for force: %d x %.1f\n",
9218 if (cr->dd->vsite_comm)
9221 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9222 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9227 if (cr->dd->constraint_comm)
9230 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9231 1 + ir->nLincsIter, av);
9235 gmx_incons(" Unknown type for DD statistics");
9238 fprintf(fplog, "\n");
9240 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9242 print_dd_load_av(fplog, cr->dd);
9246 void dd_partition_system(FILE *fplog,
9249 gmx_bool bMasterState,
9251 t_state *state_global,
9252 gmx_mtop_t *top_global,
9254 t_state *state_local,
9257 gmx_localtop_t *top_local,
9260 gmx_shellfc_t shellfc,
9261 gmx_constr_t constr,
9263 gmx_wallcycle_t wcycle,
9267 gmx_domdec_comm_t *comm;
9268 gmx_ddbox_t ddbox = {0};
9270 gmx_int64_t step_pcoupl;
9271 rvec cell_ns_x0, cell_ns_x1;
9272 int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9273 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
9274 gmx_bool bRedist, bSortCG, bResortAll;
9275 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9282 bBoxChanged = (bMasterState || DEFORM(*ir));
9283 if (ir->epc != epcNO)
9285 /* With nstpcouple > 1 pressure coupling happens.
9286 * one step after calculating the pressure.
9287 * Box scaling happens at the end of the MD step,
9288 * after the DD partitioning.
9289 * We therefore have to do DLB in the first partitioning
9290 * after an MD step where P-coupling occured.
9291 * We need to determine the last step in which p-coupling occurred.
9292 * MRS -- need to validate this for vv?
9297 step_pcoupl = step - 1;
9301 step_pcoupl = ((step - 1)/n)*n + 1;
9303 if (step_pcoupl >= comm->partition_step)
9309 bNStGlobalComm = (step % nstglobalcomm == 0);
9311 if (!comm->bDynLoadBal)
9317 /* Should we do dynamic load balacing this step?
9318 * Since it requires (possibly expensive) global communication,
9319 * we might want to do DLB less frequently.
9321 if (bBoxChanged || ir->epc != epcNO)
9323 bDoDLB = bBoxChanged;
9327 bDoDLB = bNStGlobalComm;
9331 /* Check if we have recorded loads on the nodes */
9332 if (comm->bRecordLoad && dd_load_count(comm))
9334 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
9336 /* Check if we should use DLB at the second partitioning
9337 * and every 100 partitionings,
9338 * so the extra communication cost is negligible.
9340 n = max(100, nstglobalcomm);
9341 bCheckDLB = (comm->n_load_collect == 0 ||
9342 comm->n_load_have % n == n-1);
9349 /* Print load every nstlog, first and last step to the log file */
9350 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9351 comm->n_load_collect == 0 ||
9353 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9355 /* Avoid extra communication due to verbose screen output
9356 * when nstglobalcomm is set.
9358 if (bDoDLB || bLogLoad || bCheckDLB ||
9359 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9361 get_load_distribution(dd, wcycle);
9366 dd_print_load(fplog, dd, step-1);
9370 dd_print_load_verbose(dd);
9373 comm->n_load_collect++;
9377 /* Since the timings are node dependent, the master decides */
9381 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS);
9384 fprintf(debug, "step %s, imb loss %f\n",
9385 gmx_step_str(step, sbuf),
9386 dd_force_imb_perf_loss(dd));
9389 dd_bcast(dd, sizeof(bTurnOnDLB), &bTurnOnDLB);
9392 turn_on_dlb(fplog, cr, step);
9397 comm->n_load_have++;
9400 cgs_gl = &comm->cgs_gl;
9405 /* Clear the old state */
9406 clear_dd_indices(dd, 0, 0);
9409 set_ddbox(dd, bMasterState, cr, ir, state_global->box,
9410 TRUE, cgs_gl, state_global->x, &ddbox);
9412 get_cg_distribution(fplog, step, dd, cgs_gl,
9413 state_global->box, &ddbox, state_global->x);
9415 dd_distribute_state(dd, cgs_gl,
9416 state_global, state_local, f);
9418 dd_make_local_cgs(dd, &top_local->cgs);
9420 /* Ensure that we have space for the new distribution */
9421 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9423 if (fr->cutoff_scheme == ecutsGROUP)
9425 calc_cgcm(fplog, 0, dd->ncg_home,
9426 &top_local->cgs, state_local->x, fr->cg_cm);
9429 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9431 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9433 else if (state_local->ddp_count != dd->ddp_count)
9435 if (state_local->ddp_count > dd->ddp_count)
9437 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9440 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9442 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9445 /* Clear the old state */
9446 clear_dd_indices(dd, 0, 0);
9448 /* Build the new indices */
9449 rebuild_cgindex(dd, cgs_gl->index, state_local);
9450 make_dd_indices(dd, cgs_gl->index, 0);
9451 ncgindex_set = dd->ncg_home;
9453 if (fr->cutoff_scheme == ecutsGROUP)
9455 /* Redetermine the cg COMs */
9456 calc_cgcm(fplog, 0, dd->ncg_home,
9457 &top_local->cgs, state_local->x, fr->cg_cm);
9460 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9462 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9464 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9465 TRUE, &top_local->cgs, state_local->x, &ddbox);
9467 bRedist = comm->bDynLoadBal;
9471 /* We have the full state, only redistribute the cgs */
9473 /* Clear the non-home indices */
9474 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9477 /* Avoid global communication for dim's without pbc and -gcom */
9478 if (!bNStGlobalComm)
9480 copy_rvec(comm->box0, ddbox.box0 );
9481 copy_rvec(comm->box_size, ddbox.box_size);
9483 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9484 bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
9489 /* For dim's without pbc and -gcom */
9490 copy_rvec(ddbox.box0, comm->box0 );
9491 copy_rvec(ddbox.box_size, comm->box_size);
9493 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9496 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9498 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9501 /* Check if we should sort the charge groups */
9502 if (comm->nstSortCG > 0)
9504 bSortCG = (bMasterState ||
9505 (bRedist && (step % comm->nstSortCG == 0)));
9512 ncg_home_old = dd->ncg_home;
9517 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9519 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9521 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9523 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9526 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9528 &comm->cell_x0, &comm->cell_x1,
9529 dd->ncg_home, fr->cg_cm,
9530 cell_ns_x0, cell_ns_x1, &grid_density);
9534 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9537 switch (fr->cutoff_scheme)
9540 copy_ivec(fr->ns.grid->n, ncells_old);
9541 grid_first(fplog, fr->ns.grid, dd, &ddbox,
9542 state_local->box, cell_ns_x0, cell_ns_x1,
9543 fr->rlistlong, grid_density);
9546 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9549 gmx_incons("unimplemented");
9551 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9552 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9556 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9558 /* Sort the state on charge group position.
9559 * This enables exact restarts from this step.
9560 * It also improves performance by about 15% with larger numbers
9561 * of atoms per node.
9564 /* Fill the ns grid with the home cell,
9565 * so we can sort with the indices.
9567 set_zones_ncg_home(dd);
9569 switch (fr->cutoff_scheme)
9572 set_zones_size(dd, state_local->box, &ddbox, 0, 1);
9574 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9576 comm->zones.size[0].bb_x0,
9577 comm->zones.size[0].bb_x1,
9579 comm->zones.dens_zone0,
9582 ncg_moved, bRedist ? comm->moved : NULL,
9583 fr->nbv->grp[eintLocal].kernel_type,
9584 fr->nbv->grp[eintLocal].nbat);
9586 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9589 fill_grid(&comm->zones, fr->ns.grid, dd->ncg_home,
9590 0, dd->ncg_home, fr->cg_cm);
9592 copy_ivec(fr->ns.grid->n, ncells_new);
9595 gmx_incons("unimplemented");
9598 bResortAll = bMasterState;
9600 /* Check if we can user the old order and ns grid cell indices
9601 * of the charge groups to sort the charge groups efficiently.
9603 if (ncells_new[XX] != ncells_old[XX] ||
9604 ncells_new[YY] != ncells_old[YY] ||
9605 ncells_new[ZZ] != ncells_old[ZZ])
9612 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9613 gmx_step_str(step, sbuf), dd->ncg_home);
9615 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9616 bResortAll ? -1 : ncg_home_old);
9617 /* Rebuild all the indices */
9618 ga2la_clear(dd->ga2la);
9621 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9624 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9626 /* Setup up the communication and communicate the coordinates */
9627 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9629 /* Set the indices */
9630 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9632 /* Set the charge group boundaries for neighbor searching */
9633 set_cg_boundaries(&comm->zones);
9635 if (fr->cutoff_scheme == ecutsVERLET)
9637 set_zones_size(dd, state_local->box, &ddbox,
9638 bSortCG ? 1 : 0, comm->zones.n);
9641 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9644 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9645 -1,state_local->x,state_local->box);
9648 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9650 /* Extract a local topology from the global topology */
9651 for (i = 0; i < dd->ndim; i++)
9653 np[dd->dim[i]] = comm->cd[i].np;
9655 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9656 comm->cellsize_min, np,
9658 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
9659 vsite, top_global, top_local);
9661 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9663 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9665 /* Set up the special atom communication */
9666 n = comm->nat[ddnatZONE];
9667 for (i = ddnatZONE+1; i < ddnatNR; i++)
9672 if (vsite && vsite->n_intercg_vsite)
9674 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9678 if (dd->bInterCGcons || dd->bInterCGsettles)
9680 /* Only for inter-cg constraints we need special code */
9681 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9682 constr, ir->nProjOrder,
9683 top_local->idef.il);
9687 gmx_incons("Unknown special atom type setup");
9692 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9694 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9696 /* Make space for the extra coordinates for virtual site
9697 * or constraint communication.
9699 state_local->natoms = comm->nat[ddnatNR-1];
9700 if (state_local->natoms > state_local->nalloc)
9702 dd_realloc_state(state_local, f, state_local->natoms);
9705 if (fr->bF_NoVirSum)
9707 if (vsite && vsite->n_intercg_vsite)
9709 nat_f_novirsum = comm->nat[ddnatVSITE];
9713 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9715 nat_f_novirsum = dd->nat_tot;
9719 nat_f_novirsum = dd->nat_home;
9728 /* Set the number of atoms required for the force calculation.
9729 * Forces need to be constrained when using a twin-range setup
9730 * or with energy minimization. For simple simulations we could
9731 * avoid some allocation, zeroing and copying, but this is
9732 * probably not worth the complications ande checking.
9734 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9735 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9737 /* We make the all mdatoms up to nat_tot_con.
9738 * We could save some work by only setting invmass
9739 * between nat_tot and nat_tot_con.
9741 /* This call also sets the new number of home particles to dd->nat_home */
9742 atoms2md(top_global, ir,
9743 comm->nat[ddnatCON], dd->gatindex, dd->nat_home, mdatoms);
9745 /* Now we have the charges we can sort the FE interactions */
9746 dd_sort_local_top(dd, mdatoms, top_local);
9750 /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
9751 split_vsites_over_threads(top_local->idef.il, mdatoms, FALSE, vsite);
9756 /* Make the local shell stuff, currently no communication is done */
9757 make_local_shells(cr, mdatoms, shellfc);
9760 if (ir->implicit_solvent)
9762 make_local_gb(cr, fr->born, ir->gb_algorithm);
9765 setup_bonded_threading(fr, &top_local->idef);
9767 if (!(cr->duty & DUTY_PME))
9769 /* Send the charges and/or c6/sigmas to our PME only node */
9770 gmx_pme_send_parameters(cr, mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
9771 mdatoms->chargeA, mdatoms->chargeB,
9772 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
9773 mdatoms->sigmaA, mdatoms->sigmaB,
9774 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9779 set_constraints(constr, top_local, ir, mdatoms, cr);
9782 if (ir->ePull != epullNO)
9784 /* Update the local pull groups */
9785 dd_make_local_pull_groups(dd, ir->pull, mdatoms);
9790 /* Update the local rotation groups */
9791 dd_make_local_rotation_groups(dd, ir->rot);
9794 if (ir->eSwapCoords != eswapNO)
9796 /* Update the local groups needed for ion swapping */
9797 dd_make_local_swap_groups(dd, ir->swap);
9800 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
9801 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
9803 add_dd_statistics(dd);
9805 /* Make sure we only count the cycles for this DD partitioning */
9806 clear_dd_cycle_counts(dd);
9808 /* Because the order of the atoms might have changed since
9809 * the last vsite construction, we need to communicate the constructing
9810 * atom coordinates again (for spreading the forces this MD step).
9812 dd_move_x_vsites(dd, state_local->box, state_local->x);
9814 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9816 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9818 dd_move_x(dd, state_local->box, state_local->x);
9819 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9820 -1, state_local->x, state_local->box);
9823 /* Store the partitioning step */
9824 comm->partition_step = step;
9826 /* Increase the DD partitioning counter */
9828 /* The state currently matches this DD partitioning count, store it */
9829 state_local->ddp_count = dd->ddp_count;
9832 /* The DD master node knows the complete cg distribution,
9833 * store the count so we can possibly skip the cg info communication.
9835 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9838 if (comm->DD_debug > 0)
9840 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9841 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9842 "after partitioning");