2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
48 #include "gromacs/utility/smalloc.h"
49 #include "gmx_fatal.h"
50 #include "gmx_fatal_collective.h"
53 #include "domdec_network.h"
56 #include "chargegroup.h"
65 #include "mtop_util.h"
66 #include "gmx_ga2la.h"
68 #include "nbnxn_search.h"
70 #include "gmx_omp_nthreads.h"
71 #include "gpu_utils.h"
73 #include "gromacs/fileio/futil.h"
74 #include "gromacs/fileio/gmxfio.h"
75 #include "gromacs/fileio/pdbio.h"
76 #include "gromacs/timing/wallcycle.h"
77 #include "gromacs/utility/gmxmpi.h"
78 #include "gromacs/swap/swapcoords.h"
79 #include "gromacs/utility/qsort_threadsafe.h"
80 #include "gromacs/pulling/pull.h"
81 #include "gromacs/pulling/pull_rotation.h"
82 #include "gromacs/imd/imd.h"
84 #define DDRANK(dd, rank) (rank)
85 #define DDMASTERRANK(dd) (dd->masterrank)
87 typedef struct gmx_domdec_master
89 /* The cell boundaries */
91 /* The global charge group division */
92 int *ncg; /* Number of home charge groups for each node */
93 int *index; /* Index of nnodes+1 into cg */
94 int *cg; /* Global charge group index */
95 int *nat; /* Number of home atoms for each node. */
96 int *ibuf; /* Buffer for communication */
97 rvec *vbuf; /* Buffer for state scattering and gathering */
98 } gmx_domdec_master_t;
102 /* The numbers of charge groups to send and receive for each cell
103 * that requires communication, the last entry contains the total
104 * number of atoms that needs to be communicated.
106 int nsend[DD_MAXIZONE+2];
107 int nrecv[DD_MAXIZONE+2];
108 /* The charge groups to send */
111 /* The atom range for non-in-place communication */
112 int cell2at0[DD_MAXIZONE];
113 int cell2at1[DD_MAXIZONE];
118 int np; /* Number of grid pulses in this dimension */
119 int np_dlb; /* For dlb, for use with edlbAUTO */
120 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
122 gmx_bool bInPlace; /* Can we communicate in place? */
123 } gmx_domdec_comm_dim_t;
127 gmx_bool *bCellMin; /* Temp. var.: is this cell size at the limit */
128 real *cell_f; /* State var.: cell boundaries, box relative */
129 real *old_cell_f; /* Temp. var.: old cell size */
130 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
131 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
132 real *bound_min; /* Temp. var.: lower limit for cell boundary */
133 real *bound_max; /* Temp. var.: upper limit for cell boundary */
134 gmx_bool bLimited; /* State var.: is DLB limited in this dim and row */
135 real *buf_ncd; /* Temp. var. */
138 #define DD_NLOAD_MAX 9
140 /* Here floats are accurate enough, since these variables
141 * only influence the load balancing, not the actual MD results.
168 gmx_cgsort_t *sort_new;
180 /* This enum determines the order of the coordinates.
181 * ddnatHOME and ddnatZONE should be first and second,
182 * the others can be ordered as wanted.
185 ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR
189 edlbAUTO, edlbNO, edlbYES, edlbNR
191 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
195 int dim; /* The dimension */
196 gmx_bool dim_match; /* Tells if DD and PME dims match */
197 int nslab; /* The number of PME slabs in this dimension */
198 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
199 int *pp_min; /* The minimum pp node location, size nslab */
200 int *pp_max; /* The maximum pp node location,size nslab */
201 int maxshift; /* The maximum shift for coordinate redistribution in PME */
206 real min0; /* The minimum bottom of this zone */
207 real max1; /* The maximum top of this zone */
208 real min1; /* The minimum top of this zone */
209 real mch0; /* The maximum bottom communicaton height for this zone */
210 real mch1; /* The maximum top communicaton height for this zone */
211 real p1_0; /* The bottom value of the first cell in this zone */
212 real p1_1; /* The top value of the first cell in this zone */
217 gmx_domdec_ind_t ind;
224 } dd_comm_setup_work_t;
226 typedef struct gmx_domdec_comm
228 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
229 * unless stated otherwise.
232 /* The number of decomposition dimensions for PME, 0: no PME */
234 /* The number of nodes doing PME (PP/PME or only PME) */
238 /* The communication setup including the PME only nodes */
239 gmx_bool bCartesianPP_PME;
242 int *pmenodes; /* size npmenodes */
243 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
244 * but with bCartesianPP_PME */
245 gmx_ddpme_t ddpme[2];
247 /* The DD particle-particle nodes only */
248 gmx_bool bCartesianPP;
249 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
251 /* The global charge groups */
254 /* Should we sort the cgs */
256 gmx_domdec_sort_t *sort;
258 /* Are there charge groups? */
261 /* Are there bonded and multi-body interactions between charge groups? */
262 gmx_bool bInterCGBondeds;
263 gmx_bool bInterCGMultiBody;
265 /* Data for the optional bonded interaction atom communication range */
272 /* Are we actually using DLB? */
273 gmx_bool bDynLoadBal;
275 /* Cell sizes for static load balancing, first index cartesian */
278 /* The width of the communicated boundaries */
281 /* The minimum cell size (including triclinic correction) */
283 /* For dlb, for use with edlbAUTO */
284 rvec cellsize_min_dlb;
285 /* The lower limit for the DD cell size with DLB */
287 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
288 gmx_bool bVacDLBNoLimit;
290 /* With PME load balancing we set limits on DLB */
291 gmx_bool bPMELoadBalDLBLimits;
292 /* DLB needs to take into account that we want to allow this maximum
293 * cut-off (for PME load balancing), this could limit cell boundaries.
295 real PMELoadBal_max_cutoff;
297 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
299 /* box0 and box_size are required with dim's without pbc and -gcom */
303 /* The cell boundaries */
307 /* The old location of the cell boundaries, to check cg displacements */
311 /* The communication setup and charge group boundaries for the zones */
312 gmx_domdec_zones_t zones;
314 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
315 * cell boundaries of neighboring cells for dynamic load balancing.
317 gmx_ddzone_t zone_d1[2];
318 gmx_ddzone_t zone_d2[2][2];
320 /* The coordinate/force communication setup and indices */
321 gmx_domdec_comm_dim_t cd[DIM];
322 /* The maximum number of cells to communicate with in one dimension */
325 /* Which cg distribution is stored on the master node */
326 int master_cg_ddp_count;
328 /* The number of cg's received from the direct neighbors */
329 int zone_ncg1[DD_MAXZONE];
331 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
334 /* Array for signalling if atoms have moved to another domain */
338 /* Communication buffer for general use */
342 /* Communication buffer for general use */
345 /* Temporary storage for thread parallel communication setup */
347 dd_comm_setup_work_t *dth;
349 /* Communication buffers only used with multiple grid pulses */
354 /* Communication buffers for local redistribution */
356 int cggl_flag_nalloc[DIM*2];
358 int cgcm_state_nalloc[DIM*2];
360 /* Cell sizes for dynamic load balancing */
361 gmx_domdec_root_t **root;
365 real cell_f_max0[DIM];
366 real cell_f_min1[DIM];
368 /* Stuff for load communication */
369 gmx_bool bRecordLoad;
370 gmx_domdec_load_t *load;
371 int nrank_gpu_shared;
373 MPI_Comm *mpi_comm_load;
374 MPI_Comm mpi_comm_gpu_shared;
377 /* Maximum DLB scaling per load balancing step in percent */
381 float cycl[ddCyclNr];
382 int cycl_n[ddCyclNr];
383 float cycl_max[ddCyclNr];
384 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
388 /* Have often have did we have load measurements */
390 /* Have often have we collected the load measurements */
394 double sum_nat[ddnatNR-ddnatZONE];
404 /* The last partition step */
405 gmx_int64_t partition_step;
413 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
416 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
417 #define DD_FLAG_NRCG 65535
418 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
419 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
421 /* Zone permutation required to obtain consecutive charge groups
422 * for neighbor searching.
424 static const int zone_perm[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
426 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
427 * components see only j zones with that component 0.
430 /* The DD zone order */
431 static const ivec dd_zo[DD_MAXZONE] =
432 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
437 static const ivec dd_zp3[dd_zp3n] = {{0, 0, 8}, {1, 3, 6}, {2, 5, 6}, {3, 5, 7}};
442 static const ivec dd_zp2[dd_zp2n] = {{0, 0, 4}, {1, 3, 4}};
447 static const ivec dd_zp1[dd_zp1n] = {{0, 0, 2}};
449 /* Factors used to avoid problems due to rounding issues */
450 #define DD_CELL_MARGIN 1.0001
451 #define DD_CELL_MARGIN2 1.00005
452 /* Factor to account for pressure scaling during nstlist steps */
453 #define DD_PRES_SCALE_MARGIN 1.02
455 /* Allowed performance loss before we DLB or warn */
456 #define DD_PERF_LOSS 0.05
458 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
460 /* Use separate MPI send and receive commands
461 * when nnodes <= GMX_DD_NNODES_SENDRECV.
462 * This saves memory (and some copying for small nnodes).
463 * For high parallelization scatter and gather calls are used.
465 #define GMX_DD_NNODES_SENDRECV 4
469 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
471 static void index2xyz(ivec nc,int ind,ivec xyz)
473 xyz[XX] = ind % nc[XX];
474 xyz[YY] = (ind / nc[XX]) % nc[YY];
475 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
479 /* This order is required to minimize the coordinate communication in PME
480 * which uses decomposition in the x direction.
482 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
484 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
486 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
487 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
488 xyz[ZZ] = ind % nc[ZZ];
491 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
496 ddindex = dd_index(dd->nc, c);
497 if (dd->comm->bCartesianPP_PME)
499 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
501 else if (dd->comm->bCartesianPP)
504 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
515 static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox, t_inputrec *ir)
517 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
520 int ddglatnr(gmx_domdec_t *dd, int i)
530 if (i >= dd->comm->nat[ddnatNR-1])
532 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
534 atnr = dd->gatindex[i] + 1;
540 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
542 return &dd->comm->cgs_gl;
545 static void vec_rvec_init(vec_rvec_t *v)
551 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
555 v->nalloc = over_alloc_dd(n);
556 srenew(v->v, v->nalloc);
560 void dd_store_state(gmx_domdec_t *dd, t_state *state)
564 if (state->ddp_count != dd->ddp_count)
566 gmx_incons("The state does not the domain decomposition state");
569 state->ncg_gl = dd->ncg_home;
570 if (state->ncg_gl > state->cg_gl_nalloc)
572 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
573 srenew(state->cg_gl, state->cg_gl_nalloc);
575 for (i = 0; i < state->ncg_gl; i++)
577 state->cg_gl[i] = dd->index_gl[i];
580 state->ddp_count_cg_gl = dd->ddp_count;
583 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
585 return &dd->comm->zones;
588 void dd_get_ns_ranges(gmx_domdec_t *dd, int icg,
589 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
591 gmx_domdec_zones_t *zones;
594 zones = &dd->comm->zones;
597 while (icg >= zones->izone[izone].cg1)
606 else if (izone < zones->nizone)
608 *jcg0 = zones->izone[izone].jcg0;
612 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
613 icg, izone, zones->nizone);
616 *jcg1 = zones->izone[izone].jcg1;
618 for (d = 0; d < dd->ndim; d++)
621 shift0[dim] = zones->izone[izone].shift0[dim];
622 shift1[dim] = zones->izone[izone].shift1[dim];
623 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
625 /* A conservative approach, this can be optimized */
632 int dd_natoms_vsite(gmx_domdec_t *dd)
634 return dd->comm->nat[ddnatVSITE];
637 void dd_get_constraint_range(gmx_domdec_t *dd, int *at_start, int *at_end)
639 *at_start = dd->comm->nat[ddnatCON-1];
640 *at_end = dd->comm->nat[ddnatCON];
643 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
645 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
646 int *index, *cgindex;
647 gmx_domdec_comm_t *comm;
648 gmx_domdec_comm_dim_t *cd;
649 gmx_domdec_ind_t *ind;
650 rvec shift = {0, 0, 0}, *buf, *rbuf;
651 gmx_bool bPBC, bScrew;
655 cgindex = dd->cgindex;
660 nat_tot = dd->nat_home;
661 for (d = 0; d < dd->ndim; d++)
663 bPBC = (dd->ci[dd->dim[d]] == 0);
664 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
667 copy_rvec(box[dd->dim[d]], shift);
670 for (p = 0; p < cd->np; p++)
677 for (i = 0; i < ind->nsend[nzone]; i++)
679 at0 = cgindex[index[i]];
680 at1 = cgindex[index[i]+1];
681 for (j = at0; j < at1; j++)
683 copy_rvec(x[j], buf[n]);
690 for (i = 0; i < ind->nsend[nzone]; i++)
692 at0 = cgindex[index[i]];
693 at1 = cgindex[index[i]+1];
694 for (j = at0; j < at1; j++)
696 /* We need to shift the coordinates */
697 rvec_add(x[j], shift, buf[n]);
704 for (i = 0; i < ind->nsend[nzone]; i++)
706 at0 = cgindex[index[i]];
707 at1 = cgindex[index[i]+1];
708 for (j = at0; j < at1; j++)
711 buf[n][XX] = x[j][XX] + shift[XX];
713 * This operation requires a special shift force
714 * treatment, which is performed in calc_vir.
716 buf[n][YY] = box[YY][YY] - x[j][YY];
717 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
729 rbuf = comm->vbuf2.v;
731 /* Send and receive the coordinates */
732 dd_sendrecv_rvec(dd, d, dddirBackward,
733 buf, ind->nsend[nzone+1],
734 rbuf, ind->nrecv[nzone+1]);
738 for (zone = 0; zone < nzone; zone++)
740 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
742 copy_rvec(rbuf[j], x[i]);
747 nat_tot += ind->nrecv[nzone+1];
753 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
755 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
756 int *index, *cgindex;
757 gmx_domdec_comm_t *comm;
758 gmx_domdec_comm_dim_t *cd;
759 gmx_domdec_ind_t *ind;
763 gmx_bool bPBC, bScrew;
767 cgindex = dd->cgindex;
772 nzone = comm->zones.n/2;
773 nat_tot = dd->nat_tot;
774 for (d = dd->ndim-1; d >= 0; d--)
776 bPBC = (dd->ci[dd->dim[d]] == 0);
777 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
778 if (fshift == NULL && !bScrew)
782 /* Determine which shift vector we need */
788 for (p = cd->np-1; p >= 0; p--)
791 nat_tot -= ind->nrecv[nzone+1];
798 sbuf = comm->vbuf2.v;
800 for (zone = 0; zone < nzone; zone++)
802 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
804 copy_rvec(f[i], sbuf[j]);
809 /* Communicate the forces */
810 dd_sendrecv_rvec(dd, d, dddirForward,
811 sbuf, ind->nrecv[nzone+1],
812 buf, ind->nsend[nzone+1]);
814 /* Add the received forces */
818 for (i = 0; i < ind->nsend[nzone]; i++)
820 at0 = cgindex[index[i]];
821 at1 = cgindex[index[i]+1];
822 for (j = at0; j < at1; j++)
824 rvec_inc(f[j], buf[n]);
831 for (i = 0; i < ind->nsend[nzone]; i++)
833 at0 = cgindex[index[i]];
834 at1 = cgindex[index[i]+1];
835 for (j = at0; j < at1; j++)
837 rvec_inc(f[j], buf[n]);
838 /* Add this force to the shift force */
839 rvec_inc(fshift[is], buf[n]);
846 for (i = 0; i < ind->nsend[nzone]; i++)
848 at0 = cgindex[index[i]];
849 at1 = cgindex[index[i]+1];
850 for (j = at0; j < at1; j++)
852 /* Rotate the force */
853 f[j][XX] += buf[n][XX];
854 f[j][YY] -= buf[n][YY];
855 f[j][ZZ] -= buf[n][ZZ];
858 /* Add this force to the shift force */
859 rvec_inc(fshift[is], buf[n]);
870 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
872 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
873 int *index, *cgindex;
874 gmx_domdec_comm_t *comm;
875 gmx_domdec_comm_dim_t *cd;
876 gmx_domdec_ind_t *ind;
881 cgindex = dd->cgindex;
883 buf = &comm->vbuf.v[0][0];
886 nat_tot = dd->nat_home;
887 for (d = 0; d < dd->ndim; d++)
890 for (p = 0; p < cd->np; p++)
895 for (i = 0; i < ind->nsend[nzone]; i++)
897 at0 = cgindex[index[i]];
898 at1 = cgindex[index[i]+1];
899 for (j = at0; j < at1; j++)
912 rbuf = &comm->vbuf2.v[0][0];
914 /* Send and receive the coordinates */
915 dd_sendrecv_real(dd, d, dddirBackward,
916 buf, ind->nsend[nzone+1],
917 rbuf, ind->nrecv[nzone+1]);
921 for (zone = 0; zone < nzone; zone++)
923 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
930 nat_tot += ind->nrecv[nzone+1];
936 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
938 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
939 int *index, *cgindex;
940 gmx_domdec_comm_t *comm;
941 gmx_domdec_comm_dim_t *cd;
942 gmx_domdec_ind_t *ind;
947 cgindex = dd->cgindex;
949 buf = &comm->vbuf.v[0][0];
952 nzone = comm->zones.n/2;
953 nat_tot = dd->nat_tot;
954 for (d = dd->ndim-1; d >= 0; d--)
957 for (p = cd->np-1; p >= 0; p--)
960 nat_tot -= ind->nrecv[nzone+1];
967 sbuf = &comm->vbuf2.v[0][0];
969 for (zone = 0; zone < nzone; zone++)
971 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
978 /* Communicate the forces */
979 dd_sendrecv_real(dd, d, dddirForward,
980 sbuf, ind->nrecv[nzone+1],
981 buf, ind->nsend[nzone+1]);
983 /* Add the received forces */
985 for (i = 0; i < ind->nsend[nzone]; i++)
987 at0 = cgindex[index[i]];
988 at1 = cgindex[index[i]+1];
989 for (j = at0; j < at1; j++)
1000 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
1002 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
1004 zone->min0, zone->max1,
1005 zone->mch0, zone->mch0,
1006 zone->p1_0, zone->p1_1);
1010 #define DDZONECOMM_MAXZONE 5
1011 #define DDZONECOMM_BUFSIZE 3
1013 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
1014 int ddimind, int direction,
1015 gmx_ddzone_t *buf_s, int n_s,
1016 gmx_ddzone_t *buf_r, int n_r)
1018 #define ZBS DDZONECOMM_BUFSIZE
1019 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
1020 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
1023 for (i = 0; i < n_s; i++)
1025 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
1026 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
1027 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
1028 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
1029 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
1030 vbuf_s[i*ZBS+1][2] = 0;
1031 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
1032 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
1033 vbuf_s[i*ZBS+2][2] = 0;
1036 dd_sendrecv_rvec(dd, ddimind, direction,
1040 for (i = 0; i < n_r; i++)
1042 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
1043 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
1044 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
1045 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
1046 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
1047 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
1048 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
1054 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
1055 rvec cell_ns_x0, rvec cell_ns_x1)
1057 int d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
1059 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
1060 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
1061 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
1062 rvec extr_s[2], extr_r[2];
1064 real dist_d, c = 0, det;
1065 gmx_domdec_comm_t *comm;
1066 gmx_bool bPBC, bUse;
1070 for (d = 1; d < dd->ndim; d++)
1073 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
1074 zp->min0 = cell_ns_x0[dim];
1075 zp->max1 = cell_ns_x1[dim];
1076 zp->min1 = cell_ns_x1[dim];
1077 zp->mch0 = cell_ns_x0[dim];
1078 zp->mch1 = cell_ns_x1[dim];
1079 zp->p1_0 = cell_ns_x0[dim];
1080 zp->p1_1 = cell_ns_x1[dim];
1083 for (d = dd->ndim-2; d >= 0; d--)
1086 bPBC = (dim < ddbox->npbcdim);
1088 /* Use an rvec to store two reals */
1089 extr_s[d][0] = comm->cell_f0[d+1];
1090 extr_s[d][1] = comm->cell_f1[d+1];
1091 extr_s[d][2] = comm->cell_f1[d+1];
1094 /* Store the extremes in the backward sending buffer,
1095 * so the get updated separately from the forward communication.
1097 for (d1 = d; d1 < dd->ndim-1; d1++)
1099 /* We invert the order to be able to use the same loop for buf_e */
1100 buf_s[pos].min0 = extr_s[d1][1];
1101 buf_s[pos].max1 = extr_s[d1][0];
1102 buf_s[pos].min1 = extr_s[d1][2];
1103 buf_s[pos].mch0 = 0;
1104 buf_s[pos].mch1 = 0;
1105 /* Store the cell corner of the dimension we communicate along */
1106 buf_s[pos].p1_0 = comm->cell_x0[dim];
1107 buf_s[pos].p1_1 = 0;
1111 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1114 if (dd->ndim == 3 && d == 0)
1116 buf_s[pos] = comm->zone_d2[0][1];
1118 buf_s[pos] = comm->zone_d1[0];
1122 /* We only need to communicate the extremes
1123 * in the forward direction
1125 npulse = comm->cd[d].np;
1128 /* Take the minimum to avoid double communication */
1129 npulse_min = min(npulse, dd->nc[dim]-1-npulse);
1133 /* Without PBC we should really not communicate over
1134 * the boundaries, but implementing that complicates
1135 * the communication setup and therefore we simply
1136 * do all communication, but ignore some data.
1138 npulse_min = npulse;
1140 for (p = 0; p < npulse_min; p++)
1142 /* Communicate the extremes forward */
1143 bUse = (bPBC || dd->ci[dim] > 0);
1145 dd_sendrecv_rvec(dd, d, dddirForward,
1146 extr_s+d, dd->ndim-d-1,
1147 extr_r+d, dd->ndim-d-1);
1151 for (d1 = d; d1 < dd->ndim-1; d1++)
1153 extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]);
1154 extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]);
1155 extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]);
1161 for (p = 0; p < npulse; p++)
1163 /* Communicate all the zone information backward */
1164 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1166 dd_sendrecv_ddzone(dd, d, dddirBackward,
1173 for (d1 = d+1; d1 < dd->ndim; d1++)
1175 /* Determine the decrease of maximum required
1176 * communication height along d1 due to the distance along d,
1177 * this avoids a lot of useless atom communication.
1179 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1181 if (ddbox->tric_dir[dim])
1183 /* c is the off-diagonal coupling between the cell planes
1184 * along directions d and d1.
1186 c = ddbox->v[dim][dd->dim[d1]][dim];
1192 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1195 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1199 /* A negative value signals out of range */
1205 /* Accumulate the extremes over all pulses */
1206 for (i = 0; i < buf_size; i++)
1210 buf_e[i] = buf_r[i];
1216 buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0);
1217 buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1);
1218 buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1);
1221 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1229 if (bUse && dh[d1] >= 0)
1231 buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
1232 buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
1235 /* Copy the received buffer to the send buffer,
1236 * to pass the data through with the next pulse.
1238 buf_s[i] = buf_r[i];
1240 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1241 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1243 /* Store the extremes */
1246 for (d1 = d; d1 < dd->ndim-1; d1++)
1248 extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0);
1249 extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1);
1250 extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1);
1254 if (d == 1 || (d == 0 && dd->ndim == 3))
1256 for (i = d; i < 2; i++)
1258 comm->zone_d2[1-d][i] = buf_e[pos];
1264 comm->zone_d1[1] = buf_e[pos];
1274 for (i = 0; i < 2; i++)
1278 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1280 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1281 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1287 for (i = 0; i < 2; i++)
1289 for (j = 0; j < 2; j++)
1293 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1295 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1296 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1300 for (d = 1; d < dd->ndim; d++)
1302 comm->cell_f_max0[d] = extr_s[d-1][0];
1303 comm->cell_f_min1[d] = extr_s[d-1][1];
1306 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1307 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1312 static void dd_collect_cg(gmx_domdec_t *dd,
1313 t_state *state_local)
1315 gmx_domdec_master_t *ma = NULL;
1316 int buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
1319 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1321 /* The master has the correct distribution */
1325 if (state_local->ddp_count == dd->ddp_count)
1327 ncg_home = dd->ncg_home;
1329 nat_home = dd->nat_home;
1331 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1333 cgs_gl = &dd->comm->cgs_gl;
1335 ncg_home = state_local->ncg_gl;
1336 cg = state_local->cg_gl;
1338 for (i = 0; i < ncg_home; i++)
1340 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1345 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1348 buf2[0] = dd->ncg_home;
1349 buf2[1] = dd->nat_home;
1359 /* Collect the charge group and atom counts on the master */
1360 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1365 for (i = 0; i < dd->nnodes; i++)
1367 ma->ncg[i] = ma->ibuf[2*i];
1368 ma->nat[i] = ma->ibuf[2*i+1];
1369 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1372 /* Make byte counts and indices */
1373 for (i = 0; i < dd->nnodes; i++)
1375 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1376 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1380 fprintf(debug, "Initial charge group distribution: ");
1381 for (i = 0; i < dd->nnodes; i++)
1383 fprintf(debug, " %d", ma->ncg[i]);
1385 fprintf(debug, "\n");
1389 /* Collect the charge group indices on the master */
1391 dd->ncg_home*sizeof(int), dd->index_gl,
1392 DDMASTER(dd) ? ma->ibuf : NULL,
1393 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1394 DDMASTER(dd) ? ma->cg : NULL);
1396 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1399 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1402 gmx_domdec_master_t *ma;
1403 int n, i, c, a, nalloc = 0;
1412 MPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1413 dd->rank, dd->mpi_comm_all);
1418 /* Copy the master coordinates to the global array */
1419 cgs_gl = &dd->comm->cgs_gl;
1421 n = DDMASTERRANK(dd);
1423 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1425 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1427 copy_rvec(lv[a++], v[c]);
1431 for (n = 0; n < dd->nnodes; n++)
1435 if (ma->nat[n] > nalloc)
1437 nalloc = over_alloc_dd(ma->nat[n]);
1438 srenew(buf, nalloc);
1441 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1442 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1445 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1447 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1449 copy_rvec(buf[a++], v[c]);
1458 static void get_commbuffer_counts(gmx_domdec_t *dd,
1459 int **counts, int **disps)
1461 gmx_domdec_master_t *ma;
1466 /* Make the rvec count and displacment arrays */
1468 *disps = ma->ibuf + dd->nnodes;
1469 for (n = 0; n < dd->nnodes; n++)
1471 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1472 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1476 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1479 gmx_domdec_master_t *ma;
1480 int *rcounts = NULL, *disps = NULL;
1489 get_commbuffer_counts(dd, &rcounts, &disps);
1494 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv, rcounts, disps, buf);
1498 cgs_gl = &dd->comm->cgs_gl;
1501 for (n = 0; n < dd->nnodes; n++)
1503 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1505 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1507 copy_rvec(buf[a++], v[c]);
1514 void dd_collect_vec(gmx_domdec_t *dd,
1515 t_state *state_local, rvec *lv, rvec *v)
1517 gmx_domdec_master_t *ma;
1518 int n, i, c, a, nalloc = 0;
1521 dd_collect_cg(dd, state_local);
1523 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1525 dd_collect_vec_sendrecv(dd, lv, v);
1529 dd_collect_vec_gatherv(dd, lv, v);
1534 void dd_collect_state(gmx_domdec_t *dd,
1535 t_state *state_local, t_state *state)
1539 nh = state->nhchainlength;
1543 for (i = 0; i < efptNR; i++)
1545 state->lambda[i] = state_local->lambda[i];
1547 state->fep_state = state_local->fep_state;
1548 state->veta = state_local->veta;
1549 state->vol0 = state_local->vol0;
1550 copy_mat(state_local->box, state->box);
1551 copy_mat(state_local->boxv, state->boxv);
1552 copy_mat(state_local->svir_prev, state->svir_prev);
1553 copy_mat(state_local->fvir_prev, state->fvir_prev);
1554 copy_mat(state_local->pres_prev, state->pres_prev);
1556 for (i = 0; i < state_local->ngtc; i++)
1558 for (j = 0; j < nh; j++)
1560 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1561 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1563 state->therm_integral[i] = state_local->therm_integral[i];
1565 for (i = 0; i < state_local->nnhpres; i++)
1567 for (j = 0; j < nh; j++)
1569 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1570 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1574 for (est = 0; est < estNR; est++)
1576 if (EST_DISTR(est) && (state_local->flags & (1<<est)))
1581 dd_collect_vec(dd, state_local, state_local->x, state->x);
1584 dd_collect_vec(dd, state_local, state_local->v, state->v);
1587 dd_collect_vec(dd, state_local, state_local->sd_X, state->sd_X);
1590 dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
1592 case estDISRE_INITF:
1593 case estDISRE_RM3TAV:
1594 case estORIRE_INITF:
1598 gmx_incons("Unknown state entry encountered in dd_collect_state");
1604 static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
1610 fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
1613 state->nalloc = over_alloc_dd(nalloc);
1615 for (est = 0; est < estNR; est++)
1617 if (EST_DISTR(est) && (state->flags & (1<<est)))
1622 srenew(state->x, state->nalloc);
1625 srenew(state->v, state->nalloc);
1628 srenew(state->sd_X, state->nalloc);
1631 srenew(state->cg_p, state->nalloc);
1633 case estDISRE_INITF:
1634 case estDISRE_RM3TAV:
1635 case estORIRE_INITF:
1637 /* No reallocation required */
1640 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1647 srenew(*f, state->nalloc);
1651 static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
1654 if (nalloc > fr->cg_nalloc)
1658 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
1660 fr->cg_nalloc = over_alloc_dd(nalloc);
1661 srenew(fr->cginfo, fr->cg_nalloc);
1662 if (fr->cutoff_scheme == ecutsGROUP)
1664 srenew(fr->cg_cm, fr->cg_nalloc);
1667 if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
1669 /* We don't use charge groups, we use x in state to set up
1670 * the atom communication.
1672 dd_realloc_state(state, f, nalloc);
1676 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1679 gmx_domdec_master_t *ma;
1680 int n, i, c, a, nalloc = 0;
1687 for (n = 0; n < dd->nnodes; n++)
1691 if (ma->nat[n] > nalloc)
1693 nalloc = over_alloc_dd(ma->nat[n]);
1694 srenew(buf, nalloc);
1696 /* Use lv as a temporary buffer */
1698 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1700 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1702 copy_rvec(v[c], buf[a++]);
1705 if (a != ma->nat[n])
1707 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1712 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1713 DDRANK(dd, n), n, dd->mpi_comm_all);
1718 n = DDMASTERRANK(dd);
1720 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1722 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1724 copy_rvec(v[c], lv[a++]);
1731 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1732 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1737 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1740 gmx_domdec_master_t *ma;
1741 int *scounts = NULL, *disps = NULL;
1742 int n, i, c, a, nalloc = 0;
1749 get_commbuffer_counts(dd, &scounts, &disps);
1753 for (n = 0; n < dd->nnodes; n++)
1755 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1757 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1759 copy_rvec(v[c], buf[a++]);
1765 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1768 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
1770 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1772 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1776 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1780 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1783 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1784 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1785 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1787 if (dfhist->nlambda > 0)
1789 int nlam = dfhist->nlambda;
1790 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1791 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1792 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1793 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1794 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1795 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1797 for (i = 0; i < nlam; i++)
1799 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1800 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1801 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1802 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1803 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1804 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1809 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1810 t_state *state, t_state *state_local,
1815 nh = state->nhchainlength;
1819 for (i = 0; i < efptNR; i++)
1821 state_local->lambda[i] = state->lambda[i];
1823 state_local->fep_state = state->fep_state;
1824 state_local->veta = state->veta;
1825 state_local->vol0 = state->vol0;
1826 copy_mat(state->box, state_local->box);
1827 copy_mat(state->box_rel, state_local->box_rel);
1828 copy_mat(state->boxv, state_local->boxv);
1829 copy_mat(state->svir_prev, state_local->svir_prev);
1830 copy_mat(state->fvir_prev, state_local->fvir_prev);
1831 copy_df_history(&state_local->dfhist, &state->dfhist);
1832 for (i = 0; i < state_local->ngtc; i++)
1834 for (j = 0; j < nh; j++)
1836 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1837 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1839 state_local->therm_integral[i] = state->therm_integral[i];
1841 for (i = 0; i < state_local->nnhpres; i++)
1843 for (j = 0; j < nh; j++)
1845 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1846 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1850 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
1851 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1852 dd_bcast(dd, sizeof(real), &state_local->veta);
1853 dd_bcast(dd, sizeof(real), &state_local->vol0);
1854 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1855 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1856 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1857 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1858 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1859 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
1860 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
1861 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
1862 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
1863 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
1865 /* communicate df_history -- required for restarting from checkpoint */
1866 dd_distribute_dfhist(dd, &state_local->dfhist);
1868 if (dd->nat_home > state_local->nalloc)
1870 dd_realloc_state(state_local, f, dd->nat_home);
1872 for (i = 0; i < estNR; i++)
1874 if (EST_DISTR(i) && (state_local->flags & (1<<i)))
1879 dd_distribute_vec(dd, cgs, state->x, state_local->x);
1882 dd_distribute_vec(dd, cgs, state->v, state_local->v);
1885 dd_distribute_vec(dd, cgs, state->sd_X, state_local->sd_X);
1888 dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
1890 case estDISRE_INITF:
1891 case estDISRE_RM3TAV:
1892 case estORIRE_INITF:
1894 /* Not implemented yet */
1897 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1903 static char dim2char(int dim)
1909 case XX: c = 'X'; break;
1910 case YY: c = 'Y'; break;
1911 case ZZ: c = 'Z'; break;
1912 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1918 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1919 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1921 rvec grid_s[2], *grid_r = NULL, cx, r;
1922 char fname[STRLEN], format[STRLEN], buf[22];
1924 int a, i, d, z, y, x;
1928 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1929 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1933 snew(grid_r, 2*dd->nnodes);
1936 dd_gather(dd, 2*sizeof(rvec), grid_s[0], DDMASTER(dd) ? grid_r[0] : NULL);
1940 for (d = 0; d < DIM; d++)
1942 for (i = 0; i < DIM; i++)
1950 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1952 tric[d][i] = box[i][d]/box[i][i];
1961 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1962 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
1963 out = gmx_fio_fopen(fname, "w");
1964 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1966 for (i = 0; i < dd->nnodes; i++)
1968 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1969 for (d = 0; d < DIM; d++)
1971 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1973 for (z = 0; z < 2; z++)
1975 for (y = 0; y < 2; y++)
1977 for (x = 0; x < 2; x++)
1979 cx[XX] = grid_r[i*2+x][XX];
1980 cx[YY] = grid_r[i*2+y][YY];
1981 cx[ZZ] = grid_r[i*2+z][ZZ];
1983 fprintf(out, format, "ATOM", a++, "CA", "GLY", ' ', 1+i,
1984 ' ', 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol);
1988 for (d = 0; d < DIM; d++)
1990 for (x = 0; x < 4; x++)
1994 case 0: y = 1 + i*8 + 2*x; break;
1995 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1996 case 2: y = 1 + i*8 + x; break;
1998 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
2002 gmx_fio_fclose(out);
2007 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
2008 gmx_mtop_t *mtop, t_commrec *cr,
2009 int natoms, rvec x[], matrix box)
2011 char fname[STRLEN], format[STRLEN], format4[STRLEN], buf[22];
2013 int i, ii, resnr, c;
2014 char *atomname, *resname;
2021 natoms = dd->comm->nat[ddnatVSITE];
2024 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
2026 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
2027 sprintf(format4, "%s%s\n", get_pdbformat4(), "%6.2f%6.2f");
2029 out = gmx_fio_fopen(fname, "w");
2031 fprintf(out, "TITLE %s\n", title);
2032 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2033 for (i = 0; i < natoms; i++)
2035 ii = dd->gatindex[i];
2036 gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
2037 if (i < dd->comm->nat[ddnatZONE])
2040 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
2046 else if (i < dd->comm->nat[ddnatVSITE])
2048 b = dd->comm->zones.n;
2052 b = dd->comm->zones.n + 1;
2054 fprintf(out, strlen(atomname) < 4 ? format : format4,
2055 "ATOM", (ii+1)%100000,
2056 atomname, resname, ' ', resnr%10000, ' ',
2057 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b);
2059 fprintf(out, "TER\n");
2061 gmx_fio_fclose(out);
2064 real dd_cutoff_mbody(gmx_domdec_t *dd)
2066 gmx_domdec_comm_t *comm;
2073 if (comm->bInterCGBondeds)
2075 if (comm->cutoff_mbody > 0)
2077 r = comm->cutoff_mbody;
2081 /* cutoff_mbody=0 means we do not have DLB */
2082 r = comm->cellsize_min[dd->dim[0]];
2083 for (di = 1; di < dd->ndim; di++)
2085 r = min(r, comm->cellsize_min[dd->dim[di]]);
2087 if (comm->bBondComm)
2089 r = max(r, comm->cutoff_mbody);
2093 r = min(r, comm->cutoff);
2101 real dd_cutoff_twobody(gmx_domdec_t *dd)
2105 r_mb = dd_cutoff_mbody(dd);
2107 return max(dd->comm->cutoff, r_mb);
2111 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd, ivec coord, ivec coord_pme)
2115 nc = dd->nc[dd->comm->cartpmedim];
2116 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2117 copy_ivec(coord, coord_pme);
2118 coord_pme[dd->comm->cartpmedim] =
2119 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2122 static int low_ddindex2pmeindex(int ndd, int npme, int ddindex)
2124 /* Here we assign a PME node to communicate with this DD node
2125 * by assuming that the major index of both is x.
2126 * We add cr->npmenodes/2 to obtain an even distribution.
2128 return (ddindex*npme + npme/2)/ndd;
2131 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
2133 return low_ddindex2pmeindex(dd->nnodes, dd->comm->npmenodes, ddindex);
2136 static int cr_ddindex2pmeindex(const t_commrec *cr, int ddindex)
2138 return low_ddindex2pmeindex(cr->dd->nnodes, cr->npmenodes, ddindex);
2141 static int *dd_pmenodes(t_commrec *cr)
2146 snew(pmenodes, cr->npmenodes);
2148 for (i = 0; i < cr->dd->nnodes; i++)
2150 p0 = cr_ddindex2pmeindex(cr, i);
2151 p1 = cr_ddindex2pmeindex(cr, i+1);
2152 if (i+1 == cr->dd->nnodes || p1 > p0)
2156 fprintf(debug, "pmenode[%d] = %d\n", n, i+1+n);
2158 pmenodes[n] = i + 1 + n;
2166 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
2169 ivec coords, coords_pme, nc;
2174 if (dd->comm->bCartesian) {
2175 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2176 dd_coords2pmecoords(dd,coords,coords_pme);
2177 copy_ivec(dd->ntot,nc);
2178 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2179 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2181 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2183 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2189 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
2194 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
2196 gmx_domdec_comm_t *comm;
2198 int ddindex, nodeid = -1;
2200 comm = cr->dd->comm;
2205 if (comm->bCartesianPP_PME)
2208 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
2213 ddindex = dd_index(cr->dd->nc, coords);
2214 if (comm->bCartesianPP)
2216 nodeid = comm->ddindex2simnodeid[ddindex];
2222 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
2234 static int dd_simnode2pmenode(t_commrec *cr, int sim_nodeid)
2237 gmx_domdec_comm_t *comm;
2238 ivec coord, coord_pme;
2245 /* This assumes a uniform x domain decomposition grid cell size */
2246 if (comm->bCartesianPP_PME)
2249 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
2250 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2252 /* This is a PP node */
2253 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2254 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
2258 else if (comm->bCartesianPP)
2260 if (sim_nodeid < dd->nnodes)
2262 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2267 /* This assumes DD cells with identical x coordinates
2268 * are numbered sequentially.
2270 if (dd->comm->pmenodes == NULL)
2272 if (sim_nodeid < dd->nnodes)
2274 /* The DD index equals the nodeid */
2275 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2281 while (sim_nodeid > dd->comm->pmenodes[i])
2285 if (sim_nodeid < dd->comm->pmenodes[i])
2287 pmenode = dd->comm->pmenodes[i];
2295 void get_pme_nnodes(const gmx_domdec_t *dd,
2296 int *npmenodes_x, int *npmenodes_y)
2300 *npmenodes_x = dd->comm->npmenodes_x;
2301 *npmenodes_y = dd->comm->npmenodes_y;
2310 gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
2312 gmx_bool bPMEOnlyNode;
2314 if (DOMAINDECOMP(cr))
2316 bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
2320 bPMEOnlyNode = FALSE;
2323 return bPMEOnlyNode;
2326 void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
2327 int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
2331 ivec coord, coord_pme;
2335 snew(*my_ddnodes, (dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2338 for (x = 0; x < dd->nc[XX]; x++)
2340 for (y = 0; y < dd->nc[YY]; y++)
2342 for (z = 0; z < dd->nc[ZZ]; z++)
2344 if (dd->comm->bCartesianPP_PME)
2349 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2350 if (dd->ci[XX] == coord_pme[XX] &&
2351 dd->ci[YY] == coord_pme[YY] &&
2352 dd->ci[ZZ] == coord_pme[ZZ])
2354 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2359 /* The slab corresponds to the nodeid in the PME group */
2360 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2362 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2369 /* The last PP-only node is the peer node */
2370 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2374 fprintf(debug, "Receive coordinates from PP nodes:");
2375 for (x = 0; x < *nmy_ddnodes; x++)
2377 fprintf(debug, " %d", (*my_ddnodes)[x]);
2379 fprintf(debug, "\n");
2383 static gmx_bool receive_vir_ener(t_commrec *cr)
2385 gmx_domdec_comm_t *comm;
2386 int pmenode, coords[DIM], rank;
2390 if (cr->npmenodes < cr->dd->nnodes)
2392 comm = cr->dd->comm;
2393 if (comm->bCartesianPP_PME)
2395 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2397 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2398 coords[comm->cartpmedim]++;
2399 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2401 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2402 if (dd_simnode2pmenode(cr, rank) == pmenode)
2404 /* This is not the last PP node for pmenode */
2412 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2413 if (cr->sim_nodeid+1 < cr->nnodes &&
2414 dd_simnode2pmenode(cr, cr->sim_nodeid+1) == pmenode)
2416 /* This is not the last PP node for pmenode */
2425 static void set_zones_ncg_home(gmx_domdec_t *dd)
2427 gmx_domdec_zones_t *zones;
2430 zones = &dd->comm->zones;
2432 zones->cg_range[0] = 0;
2433 for (i = 1; i < zones->n+1; i++)
2435 zones->cg_range[i] = dd->ncg_home;
2437 /* zone_ncg1[0] should always be equal to ncg_home */
2438 dd->comm->zone_ncg1[0] = dd->ncg_home;
2441 static void rebuild_cgindex(gmx_domdec_t *dd,
2442 const int *gcgs_index, t_state *state)
2444 int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
2447 dd_cg_gl = dd->index_gl;
2448 cgindex = dd->cgindex;
2451 for (i = 0; i < state->ncg_gl; i++)
2455 dd_cg_gl[i] = cg_gl;
2456 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2460 dd->ncg_home = state->ncg_gl;
2463 set_zones_ncg_home(dd);
2466 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2468 while (cg >= cginfo_mb->cg_end)
2473 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2476 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2477 t_forcerec *fr, char *bLocalCG)
2479 cginfo_mb_t *cginfo_mb;
2485 cginfo_mb = fr->cginfo_mb;
2486 cginfo = fr->cginfo;
2488 for (cg = cg0; cg < cg1; cg++)
2490 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2494 if (bLocalCG != NULL)
2496 for (cg = cg0; cg < cg1; cg++)
2498 bLocalCG[index_gl[cg]] = TRUE;
2503 static void make_dd_indices(gmx_domdec_t *dd,
2504 const int *gcgs_index, int cg_start)
2506 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2507 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2512 bLocalCG = dd->comm->bLocalCG;
2514 if (dd->nat_tot > dd->gatindex_nalloc)
2516 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2517 srenew(dd->gatindex, dd->gatindex_nalloc);
2520 nzone = dd->comm->zones.n;
2521 zone2cg = dd->comm->zones.cg_range;
2522 zone_ncg1 = dd->comm->zone_ncg1;
2523 index_gl = dd->index_gl;
2524 gatindex = dd->gatindex;
2525 bCGs = dd->comm->bCGs;
2527 if (zone2cg[1] != dd->ncg_home)
2529 gmx_incons("dd->ncg_zone is not up to date");
2532 /* Make the local to global and global to local atom index */
2533 a = dd->cgindex[cg_start];
2534 for (zone = 0; zone < nzone; zone++)
2542 cg0 = zone2cg[zone];
2544 cg1 = zone2cg[zone+1];
2545 cg1_p1 = cg0 + zone_ncg1[zone];
2547 for (cg = cg0; cg < cg1; cg++)
2552 /* Signal that this cg is from more than one pulse away */
2555 cg_gl = index_gl[cg];
2558 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2561 ga2la_set(dd->ga2la, a_gl, a, zone1);
2567 gatindex[a] = cg_gl;
2568 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2575 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2578 int ncg, i, ngl, nerr;
2581 if (bLocalCG == NULL)
2585 for (i = 0; i < dd->ncg_tot; i++)
2587 if (!bLocalCG[dd->index_gl[i]])
2590 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2595 for (i = 0; i < ncg_sys; i++)
2602 if (ngl != dd->ncg_tot)
2604 fprintf(stderr, "DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2611 static void check_index_consistency(gmx_domdec_t *dd,
2612 int natoms_sys, int ncg_sys,
2615 int nerr, ngl, i, a, cell;
2620 if (dd->comm->DD_debug > 1)
2622 snew(have, natoms_sys);
2623 for (a = 0; a < dd->nat_tot; a++)
2625 if (have[dd->gatindex[a]] > 0)
2627 fprintf(stderr, "DD node %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2631 have[dd->gatindex[a]] = a + 1;
2637 snew(have, dd->nat_tot);
2640 for (i = 0; i < natoms_sys; i++)
2642 if (ga2la_get(dd->ga2la, i, &a, &cell))
2644 if (a >= dd->nat_tot)
2646 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2652 if (dd->gatindex[a] != i)
2654 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2661 if (ngl != dd->nat_tot)
2664 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2665 dd->rank, where, ngl, dd->nat_tot);
2667 for (a = 0; a < dd->nat_tot; a++)
2672 "DD node %d, %s: local atom %d, global %d has no global index\n",
2673 dd->rank, where, a+1, dd->gatindex[a]+1);
2678 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2682 gmx_fatal(FARGS, "DD node %d, %s: %d atom/cg index inconsistencies",
2683 dd->rank, where, nerr);
2687 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2694 /* Clear the whole list without searching */
2695 ga2la_clear(dd->ga2la);
2699 for (i = a_start; i < dd->nat_tot; i++)
2701 ga2la_del(dd->ga2la, dd->gatindex[i]);
2705 bLocalCG = dd->comm->bLocalCG;
2708 for (i = cg_start; i < dd->ncg_tot; i++)
2710 bLocalCG[dd->index_gl[i]] = FALSE;
2714 dd_clear_local_vsite_indices(dd);
2716 if (dd->constraints)
2718 dd_clear_local_constraint_indices(dd);
2722 /* This function should be used for moving the domain boudaries during DLB,
2723 * for obtaining the minimum cell size. It checks the initially set limit
2724 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2725 * and, possibly, a longer cut-off limit set for PME load balancing.
2727 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2731 cellsize_min = comm->cellsize_min[dim];
2733 if (!comm->bVacDLBNoLimit)
2735 /* The cut-off might have changed, e.g. by PME load balacning,
2736 * from the value used to set comm->cellsize_min, so check it.
2738 cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2740 if (comm->bPMELoadBalDLBLimits)
2742 /* Check for the cut-off limit set by the PME load balancing */
2743 cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2747 return cellsize_min;
2750 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2753 real grid_jump_limit;
2755 /* The distance between the boundaries of cells at distance
2756 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2757 * and by the fact that cells should not be shifted by more than
2758 * half their size, such that cg's only shift by one cell
2759 * at redecomposition.
2761 grid_jump_limit = comm->cellsize_limit;
2762 if (!comm->bVacDLBNoLimit)
2764 if (comm->bPMELoadBalDLBLimits)
2766 cutoff = max(cutoff, comm->PMELoadBal_max_cutoff);
2768 grid_jump_limit = max(grid_jump_limit,
2769 cutoff/comm->cd[dim_ind].np);
2772 return grid_jump_limit;
2775 static gmx_bool check_grid_jump(gmx_int64_t step,
2781 gmx_domdec_comm_t *comm;
2790 for (d = 1; d < dd->ndim; d++)
2793 limit = grid_jump_limit(comm, cutoff, d);
2794 bfac = ddbox->box_size[dim];
2795 if (ddbox->tric_dir[dim])
2797 bfac *= ddbox->skew_fac[dim];
2799 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2800 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2808 /* This error should never be triggered under normal
2809 * circumstances, but you never know ...
2811 gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with less nodes might avoid this issue.",
2812 gmx_step_str(step, buf),
2813 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2821 static int dd_load_count(gmx_domdec_comm_t *comm)
2823 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2826 static float dd_force_load(gmx_domdec_comm_t *comm)
2833 if (comm->eFlop > 1)
2835 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2840 load = comm->cycl[ddCyclF];
2841 if (comm->cycl_n[ddCyclF] > 1)
2843 /* Subtract the maximum of the last n cycle counts
2844 * to get rid of possible high counts due to other sources,
2845 * for instance system activity, that would otherwise
2846 * affect the dynamic load balancing.
2848 load -= comm->cycl_max[ddCyclF];
2852 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2854 float gpu_wait, gpu_wait_sum;
2856 gpu_wait = comm->cycl[ddCyclWaitGPU];
2857 if (comm->cycl_n[ddCyclF] > 1)
2859 /* We should remove the WaitGPU time of the same MD step
2860 * as the one with the maximum F time, since the F time
2861 * and the wait time are not independent.
2862 * Furthermore, the step for the max F time should be chosen
2863 * the same on all ranks that share the same GPU.
2864 * But to keep the code simple, we remove the average instead.
2865 * The main reason for artificially long times at some steps
2866 * is spurious CPU activity or MPI time, so we don't expect
2867 * that changes in the GPU wait time matter a lot here.
2869 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2871 /* Sum the wait times over the ranks that share the same GPU */
2872 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2873 comm->mpi_comm_gpu_shared);
2874 /* Replace the wait time by the average over the ranks */
2875 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2883 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2885 gmx_domdec_comm_t *comm;
2890 snew(*dim_f, dd->nc[dim]+1);
2892 for (i = 1; i < dd->nc[dim]; i++)
2894 if (comm->slb_frac[dim])
2896 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2900 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2903 (*dim_f)[dd->nc[dim]] = 1;
2906 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2908 int pmeindex, slab, nso, i;
2911 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2917 ddpme->dim = dimind;
2919 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2921 ddpme->nslab = (ddpme->dim == 0 ?
2922 dd->comm->npmenodes_x :
2923 dd->comm->npmenodes_y);
2925 if (ddpme->nslab <= 1)
2930 nso = dd->comm->npmenodes/ddpme->nslab;
2931 /* Determine for each PME slab the PP location range for dimension dim */
2932 snew(ddpme->pp_min, ddpme->nslab);
2933 snew(ddpme->pp_max, ddpme->nslab);
2934 for (slab = 0; slab < ddpme->nslab; slab++)
2936 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2937 ddpme->pp_max[slab] = 0;
2939 for (i = 0; i < dd->nnodes; i++)
2941 ddindex2xyz(dd->nc, i, xyz);
2942 /* For y only use our y/z slab.
2943 * This assumes that the PME x grid size matches the DD grid size.
2945 if (dimind == 0 || xyz[XX] == dd->ci[XX])
2947 pmeindex = ddindex2pmeindex(dd, i);
2950 slab = pmeindex/nso;
2954 slab = pmeindex % ddpme->nslab;
2956 ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]);
2957 ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]);
2961 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
2964 int dd_pme_maxshift_x(gmx_domdec_t *dd)
2966 if (dd->comm->ddpme[0].dim == XX)
2968 return dd->comm->ddpme[0].maxshift;
2976 int dd_pme_maxshift_y(gmx_domdec_t *dd)
2978 if (dd->comm->ddpme[0].dim == YY)
2980 return dd->comm->ddpme[0].maxshift;
2982 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
2984 return dd->comm->ddpme[1].maxshift;
2992 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
2993 gmx_bool bUniform, gmx_ddbox_t *ddbox, real *cell_f)
2995 gmx_domdec_comm_t *comm;
2998 real range, pme_boundary;
3002 nc = dd->nc[ddpme->dim];
3005 if (!ddpme->dim_match)
3007 /* PP decomposition is not along dim: the worst situation */
3010 else if (ns <= 3 || (bUniform && ns == nc))
3012 /* The optimal situation */
3017 /* We need to check for all pme nodes which nodes they
3018 * could possibly need to communicate with.
3020 xmin = ddpme->pp_min;
3021 xmax = ddpme->pp_max;
3022 /* Allow for atoms to be maximally 2/3 times the cut-off
3023 * out of their DD cell. This is a reasonable balance between
3024 * between performance and support for most charge-group/cut-off
3027 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
3028 /* Avoid extra communication when we are exactly at a boundary */
3032 for (s = 0; s < ns; s++)
3034 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
3035 pme_boundary = (real)s/ns;
3038 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
3040 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
3044 pme_boundary = (real)(s+1)/ns;
3047 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
3049 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
3056 ddpme->maxshift = sh;
3060 fprintf(debug, "PME slab communication range for dim %d is %d\n",
3061 ddpme->dim, ddpme->maxshift);
3065 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3069 for (d = 0; d < dd->ndim; d++)
3072 if (dim < ddbox->nboundeddim &&
3073 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
3074 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
3076 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
3077 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3078 dd->nc[dim], dd->comm->cellsize_limit);
3083 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
3084 gmx_bool bMaster, ivec npulse)
3086 gmx_domdec_comm_t *comm;
3089 real *cell_x, cell_dx, cellsize;
3093 for (d = 0; d < DIM; d++)
3095 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
3097 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
3100 cell_dx = ddbox->box_size[d]/dd->nc[d];
3103 for (j = 0; j < dd->nc[d]+1; j++)
3105 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
3110 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
3111 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
3113 cellsize = cell_dx*ddbox->skew_fac[d];
3114 while (cellsize*npulse[d] < comm->cutoff && npulse[d] < dd->nc[d]-1)
3118 cellsize_min[d] = cellsize;
3122 /* Statically load balanced grid */
3123 /* Also when we are not doing a master distribution we determine
3124 * all cell borders in a loop to obtain identical values
3125 * to the master distribution case and to determine npulse.
3129 cell_x = dd->ma->cell_x[d];
3133 snew(cell_x, dd->nc[d]+1);
3135 cell_x[0] = ddbox->box0[d];
3136 for (j = 0; j < dd->nc[d]; j++)
3138 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
3139 cell_x[j+1] = cell_x[j] + cell_dx;
3140 cellsize = cell_dx*ddbox->skew_fac[d];
3141 while (cellsize*npulse[d] < comm->cutoff &&
3142 npulse[d] < dd->nc[d]-1)
3146 cellsize_min[d] = min(cellsize_min[d], cellsize);
3150 comm->cell_x0[d] = cell_x[dd->ci[d]];
3151 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
3155 /* The following limitation is to avoid that a cell would receive
3156 * some of its own home charge groups back over the periodic boundary.
3157 * Double charge groups cause trouble with the global indices.
3159 if (d < ddbox->npbcdim &&
3160 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
3162 gmx_fatal_collective(FARGS, NULL, dd,
3163 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3164 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
3166 dd->nc[d], dd->nc[d],
3167 dd->nnodes > dd->nc[d] ? "cells" : "processors");
3171 if (!comm->bDynLoadBal)
3173 copy_rvec(cellsize_min, comm->cellsize_min);
3176 for (d = 0; d < comm->npmedecompdim; d++)
3178 set_pme_maxshift(dd, &comm->ddpme[d],
3179 comm->slb_frac[dd->dim[d]] == NULL, ddbox,
3180 comm->ddpme[d].slb_dim_f);
3185 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
3186 int d, int dim, gmx_domdec_root_t *root,
3188 gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
3190 gmx_domdec_comm_t *comm;
3191 int ncd, i, j, nmin, nmin_old;
3192 gmx_bool bLimLo, bLimHi;
3194 real fac, halfway, cellsize_limit_f_i, region_size;
3195 gmx_bool bPBC, bLastHi = FALSE;
3196 int nrange[] = {range[0], range[1]};
3198 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
3204 bPBC = (dim < ddbox->npbcdim);
3206 cell_size = root->buf_ncd;
3210 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
3213 /* First we need to check if the scaling does not make cells
3214 * smaller than the smallest allowed size.
3215 * We need to do this iteratively, since if a cell is too small,
3216 * it needs to be enlarged, which makes all the other cells smaller,
3217 * which could in turn make another cell smaller than allowed.
3219 for (i = range[0]; i < range[1]; i++)
3221 root->bCellMin[i] = FALSE;
3227 /* We need the total for normalization */
3229 for (i = range[0]; i < range[1]; i++)
3231 if (root->bCellMin[i] == FALSE)
3233 fac += cell_size[i];
3236 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3237 /* Determine the cell boundaries */
3238 for (i = range[0]; i < range[1]; i++)
3240 if (root->bCellMin[i] == FALSE)
3242 cell_size[i] *= fac;
3243 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3245 cellsize_limit_f_i = 0;
3249 cellsize_limit_f_i = cellsize_limit_f;
3251 if (cell_size[i] < cellsize_limit_f_i)
3253 root->bCellMin[i] = TRUE;
3254 cell_size[i] = cellsize_limit_f_i;
3258 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3261 while (nmin > nmin_old);
3264 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3265 /* For this check we should not use DD_CELL_MARGIN,
3266 * but a slightly smaller factor,
3267 * since rounding could get use below the limit.
3269 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3272 gmx_fatal(FARGS, "Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3273 gmx_step_str(step, buf),
3274 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3275 ncd, comm->cellsize_min[dim]);
3278 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
3282 /* Check if the boundary did not displace more than halfway
3283 * each of the cells it bounds, as this could cause problems,
3284 * especially when the differences between cell sizes are large.
3285 * If changes are applied, they will not make cells smaller
3286 * than the cut-off, as we check all the boundaries which
3287 * might be affected by a change and if the old state was ok,
3288 * the cells will at most be shrunk back to their old size.
3290 for (i = range[0]+1; i < range[1]; i++)
3292 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3293 if (root->cell_f[i] < halfway)
3295 root->cell_f[i] = halfway;
3296 /* Check if the change also causes shifts of the next boundaries */
3297 for (j = i+1; j < range[1]; j++)
3299 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3301 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3305 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3306 if (root->cell_f[i] > halfway)
3308 root->cell_f[i] = halfway;
3309 /* Check if the change also causes shifts of the next boundaries */
3310 for (j = i-1; j >= range[0]+1; j--)
3312 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3314 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3321 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3322 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3323 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3324 * for a and b nrange is used */
3327 /* Take care of the staggering of the cell boundaries */
3330 for (i = range[0]; i < range[1]; i++)
3332 root->cell_f_max0[i] = root->cell_f[i];
3333 root->cell_f_min1[i] = root->cell_f[i+1];
3338 for (i = range[0]+1; i < range[1]; i++)
3340 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3341 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3342 if (bLimLo && bLimHi)
3344 /* Both limits violated, try the best we can */
3345 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3346 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3347 nrange[0] = range[0];
3349 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3352 nrange[1] = range[1];
3353 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3359 /* root->cell_f[i] = root->bound_min[i]; */
3360 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3363 else if (bLimHi && !bLastHi)
3366 if (nrange[1] < range[1]) /* found a LimLo before */
3368 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3369 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3370 nrange[0] = nrange[1];
3372 root->cell_f[i] = root->bound_max[i];
3374 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3376 nrange[1] = range[1];
3379 if (nrange[1] < range[1]) /* found last a LimLo */
3381 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3382 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3383 nrange[0] = nrange[1];
3384 nrange[1] = range[1];
3385 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3387 else if (nrange[0] > range[0]) /* found at least one LimHi */
3389 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3396 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3397 int d, int dim, gmx_domdec_root_t *root,
3398 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3399 gmx_bool bUniform, gmx_int64_t step)
3401 gmx_domdec_comm_t *comm;
3402 int ncd, d1, i, j, pos;
3404 real load_aver, load_i, imbalance, change, change_max, sc;
3405 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3409 int range[] = { 0, 0 };
3413 /* Convert the maximum change from the input percentage to a fraction */
3414 change_limit = comm->dlb_scale_lim*0.01;
3418 bPBC = (dim < ddbox->npbcdim);
3420 cell_size = root->buf_ncd;
3422 /* Store the original boundaries */
3423 for (i = 0; i < ncd+1; i++)
3425 root->old_cell_f[i] = root->cell_f[i];
3429 for (i = 0; i < ncd; i++)
3431 cell_size[i] = 1.0/ncd;
3434 else if (dd_load_count(comm))
3436 load_aver = comm->load[d].sum_m/ncd;
3438 for (i = 0; i < ncd; i++)
3440 /* Determine the relative imbalance of cell i */
3441 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3442 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3443 /* Determine the change of the cell size using underrelaxation */
3444 change = -relax*imbalance;
3445 change_max = max(change_max, max(change, -change));
3447 /* Limit the amount of scaling.
3448 * We need to use the same rescaling for all cells in one row,
3449 * otherwise the load balancing might not converge.
3452 if (change_max > change_limit)
3454 sc *= change_limit/change_max;
3456 for (i = 0; i < ncd; i++)
3458 /* Determine the relative imbalance of cell i */
3459 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3460 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3461 /* Determine the change of the cell size using underrelaxation */
3462 change = -sc*imbalance;
3463 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3467 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3468 cellsize_limit_f *= DD_CELL_MARGIN;
3469 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3470 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3471 if (ddbox->tric_dir[dim])
3473 cellsize_limit_f /= ddbox->skew_fac[dim];
3474 dist_min_f /= ddbox->skew_fac[dim];
3476 if (bDynamicBox && d > 0)
3478 dist_min_f *= DD_PRES_SCALE_MARGIN;
3480 if (d > 0 && !bUniform)
3482 /* Make sure that the grid is not shifted too much */
3483 for (i = 1; i < ncd; i++)
3485 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3487 gmx_incons("Inconsistent DD boundary staggering limits!");
3489 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3490 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3493 root->bound_min[i] += 0.5*space;
3495 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3496 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3499 root->bound_max[i] += 0.5*space;
3504 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3506 root->cell_f_max0[i-1] + dist_min_f,
3507 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3508 root->cell_f_min1[i] - dist_min_f);
3513 root->cell_f[0] = 0;
3514 root->cell_f[ncd] = 1;
3515 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3518 /* After the checks above, the cells should obey the cut-off
3519 * restrictions, but it does not hurt to check.
3521 for (i = 0; i < ncd; i++)
3525 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3526 dim, i, root->cell_f[i], root->cell_f[i+1]);
3529 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3530 root->cell_f[i+1] - root->cell_f[i] <
3531 cellsize_limit_f/DD_CELL_MARGIN)
3535 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3536 gmx_step_str(step, buf), dim2char(dim), i,
3537 (root->cell_f[i+1] - root->cell_f[i])
3538 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3543 /* Store the cell boundaries of the lower dimensions at the end */
3544 for (d1 = 0; d1 < d; d1++)
3546 root->cell_f[pos++] = comm->cell_f0[d1];
3547 root->cell_f[pos++] = comm->cell_f1[d1];
3550 if (d < comm->npmedecompdim)
3552 /* The master determines the maximum shift for
3553 * the coordinate communication between separate PME nodes.
3555 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3557 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3560 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3564 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3565 gmx_ddbox_t *ddbox, int dimind)
3567 gmx_domdec_comm_t *comm;
3572 /* Set the cell dimensions */
3573 dim = dd->dim[dimind];
3574 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3575 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3576 if (dim >= ddbox->nboundeddim)
3578 comm->cell_x0[dim] += ddbox->box0[dim];
3579 comm->cell_x1[dim] += ddbox->box0[dim];
3583 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3584 int d, int dim, real *cell_f_row,
3587 gmx_domdec_comm_t *comm;
3593 /* Each node would only need to know two fractions,
3594 * but it is probably cheaper to broadcast the whole array.
3596 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3597 0, comm->mpi_comm_load[d]);
3599 /* Copy the fractions for this dimension from the buffer */
3600 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3601 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3602 /* The whole array was communicated, so set the buffer position */
3603 pos = dd->nc[dim] + 1;
3604 for (d1 = 0; d1 <= d; d1++)
3608 /* Copy the cell fractions of the lower dimensions */
3609 comm->cell_f0[d1] = cell_f_row[pos++];
3610 comm->cell_f1[d1] = cell_f_row[pos++];
3612 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3614 /* Convert the communicated shift from float to int */
3615 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3618 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3622 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3623 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3624 gmx_bool bUniform, gmx_int64_t step)
3626 gmx_domdec_comm_t *comm;
3628 gmx_bool bRowMember, bRowRoot;
3633 for (d = 0; d < dd->ndim; d++)
3638 for (d1 = d; d1 < dd->ndim; d1++)
3640 if (dd->ci[dd->dim[d1]] > 0)
3653 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3654 ddbox, bDynamicBox, bUniform, step);
3655 cell_f_row = comm->root[d]->cell_f;
3659 cell_f_row = comm->cell_f_row;
3661 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3666 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3670 /* This function assumes the box is static and should therefore
3671 * not be called when the box has changed since the last
3672 * call to dd_partition_system.
3674 for (d = 0; d < dd->ndim; d++)
3676 relative_to_absolute_cell_bounds(dd, ddbox, d);
3682 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3683 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3684 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3685 gmx_wallcycle_t wcycle)
3687 gmx_domdec_comm_t *comm;
3694 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3695 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3696 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3698 else if (bDynamicBox)
3700 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3703 /* Set the dimensions for which no DD is used */
3704 for (dim = 0; dim < DIM; dim++)
3706 if (dd->nc[dim] == 1)
3708 comm->cell_x0[dim] = 0;
3709 comm->cell_x1[dim] = ddbox->box_size[dim];
3710 if (dim >= ddbox->nboundeddim)
3712 comm->cell_x0[dim] += ddbox->box0[dim];
3713 comm->cell_x1[dim] += ddbox->box0[dim];
3719 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3722 gmx_domdec_comm_dim_t *cd;
3724 for (d = 0; d < dd->ndim; d++)
3726 cd = &dd->comm->cd[d];
3727 np = npulse[dd->dim[d]];
3728 if (np > cd->np_nalloc)
3732 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3733 dim2char(dd->dim[d]), np);
3735 if (DDMASTER(dd) && cd->np_nalloc > 0)
3737 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3739 srenew(cd->ind, np);
3740 for (i = cd->np_nalloc; i < np; i++)
3742 cd->ind[i].index = NULL;
3743 cd->ind[i].nalloc = 0;
3752 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3753 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3754 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3755 gmx_wallcycle_t wcycle)
3757 gmx_domdec_comm_t *comm;
3763 /* Copy the old cell boundaries for the cg displacement check */
3764 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3765 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3767 if (comm->bDynLoadBal)
3771 check_box_size(dd, ddbox);
3773 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3777 set_dd_cell_sizes_slb(dd, ddbox, FALSE, npulse);
3778 realloc_comm_ind(dd, npulse);
3783 for (d = 0; d < DIM; d++)
3785 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3786 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3791 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3793 rvec cell_ns_x0, rvec cell_ns_x1,
3796 gmx_domdec_comm_t *comm;
3801 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3803 dim = dd->dim[dim_ind];
3805 /* Without PBC we don't have restrictions on the outer cells */
3806 if (!(dim >= ddbox->npbcdim &&
3807 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3808 comm->bDynLoadBal &&
3809 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3810 comm->cellsize_min[dim])
3813 gmx_fatal(FARGS, "Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3814 gmx_step_str(step, buf), dim2char(dim),
3815 comm->cell_x1[dim] - comm->cell_x0[dim],
3816 ddbox->skew_fac[dim],
3817 dd->comm->cellsize_min[dim],
3818 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3822 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3824 /* Communicate the boundaries and update cell_ns_x0/1 */
3825 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3826 if (dd->bGridJump && dd->ndim > 1)
3828 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3833 static void make_tric_corr_matrix(int npbcdim, matrix box, matrix tcm)
3837 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3845 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3846 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3855 static void check_screw_box(matrix box)
3857 /* Mathematical limitation */
3858 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3860 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3863 /* Limitation due to the asymmetry of the eighth shell method */
3864 if (box[ZZ][YY] != 0)
3866 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3870 static void distribute_cg(FILE *fplog, gmx_int64_t step,
3871 matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3874 gmx_domdec_master_t *ma;
3875 int **tmp_ind = NULL, *tmp_nalloc = NULL;
3876 int i, icg, j, k, k0, k1, d, npbcdim;
3878 rvec box_size, cg_cm;
3880 real nrcg, inv_ncg, pos_d;
3882 gmx_bool bUnbounded, bScrew;
3886 if (tmp_ind == NULL)
3888 snew(tmp_nalloc, dd->nnodes);
3889 snew(tmp_ind, dd->nnodes);
3890 for (i = 0; i < dd->nnodes; i++)
3892 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3893 snew(tmp_ind[i], tmp_nalloc[i]);
3897 /* Clear the count */
3898 for (i = 0; i < dd->nnodes; i++)
3904 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3906 cgindex = cgs->index;
3908 /* Compute the center of geometry for all charge groups */
3909 for (icg = 0; icg < cgs->nr; icg++)
3912 k1 = cgindex[icg+1];
3916 copy_rvec(pos[k0], cg_cm);
3923 for (k = k0; (k < k1); k++)
3925 rvec_inc(cg_cm, pos[k]);
3927 for (d = 0; (d < DIM); d++)
3929 cg_cm[d] *= inv_ncg;
3932 /* Put the charge group in the box and determine the cell index */
3933 for (d = DIM-1; d >= 0; d--)
3936 if (d < dd->npbcdim)
3938 bScrew = (dd->bScrewPBC && d == XX);
3939 if (tric_dir[d] && dd->nc[d] > 1)
3941 /* Use triclinic coordintates for this dimension */
3942 for (j = d+1; j < DIM; j++)
3944 pos_d += cg_cm[j]*tcm[j][d];
3947 while (pos_d >= box[d][d])
3950 rvec_dec(cg_cm, box[d]);
3953 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3954 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3956 for (k = k0; (k < k1); k++)
3958 rvec_dec(pos[k], box[d]);
3961 pos[k][YY] = box[YY][YY] - pos[k][YY];
3962 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3969 rvec_inc(cg_cm, box[d]);
3972 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3973 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3975 for (k = k0; (k < k1); k++)
3977 rvec_inc(pos[k], box[d]);
3980 pos[k][YY] = box[YY][YY] - pos[k][YY];
3981 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3986 /* This could be done more efficiently */
3988 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
3993 i = dd_index(dd->nc, ind);
3994 if (ma->ncg[i] == tmp_nalloc[i])
3996 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
3997 srenew(tmp_ind[i], tmp_nalloc[i]);
3999 tmp_ind[i][ma->ncg[i]] = icg;
4001 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
4005 for (i = 0; i < dd->nnodes; i++)
4008 for (k = 0; k < ma->ncg[i]; k++)
4010 ma->cg[k1++] = tmp_ind[i][k];
4013 ma->index[dd->nnodes] = k1;
4015 for (i = 0; i < dd->nnodes; i++)
4025 fprintf(fplog, "Charge group distribution at step %s:",
4026 gmx_step_str(step, buf));
4027 for (i = 0; i < dd->nnodes; i++)
4029 fprintf(fplog, " %d", ma->ncg[i]);
4031 fprintf(fplog, "\n");
4035 static void get_cg_distribution(FILE *fplog, gmx_int64_t step, gmx_domdec_t *dd,
4036 t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
4039 gmx_domdec_master_t *ma = NULL;
4042 int *ibuf, buf2[2] = { 0, 0 };
4043 gmx_bool bMaster = DDMASTER(dd);
4050 check_screw_box(box);
4053 set_dd_cell_sizes_slb(dd, ddbox, TRUE, npulse);
4055 distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
4056 for (i = 0; i < dd->nnodes; i++)
4058 ma->ibuf[2*i] = ma->ncg[i];
4059 ma->ibuf[2*i+1] = ma->nat[i];
4067 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
4069 dd->ncg_home = buf2[0];
4070 dd->nat_home = buf2[1];
4071 dd->ncg_tot = dd->ncg_home;
4072 dd->nat_tot = dd->nat_home;
4073 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
4075 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
4076 srenew(dd->index_gl, dd->cg_nalloc);
4077 srenew(dd->cgindex, dd->cg_nalloc+1);
4081 for (i = 0; i < dd->nnodes; i++)
4083 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
4084 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
4089 DDMASTER(dd) ? ma->ibuf : NULL,
4090 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
4091 DDMASTER(dd) ? ma->cg : NULL,
4092 dd->ncg_home*sizeof(int), dd->index_gl);
4094 /* Determine the home charge group sizes */
4096 for (i = 0; i < dd->ncg_home; i++)
4098 cg_gl = dd->index_gl[i];
4100 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
4105 fprintf(debug, "Home charge groups:\n");
4106 for (i = 0; i < dd->ncg_home; i++)
4108 fprintf(debug, " %d", dd->index_gl[i]);
4111 fprintf(debug, "\n");
4114 fprintf(debug, "\n");
4118 static int compact_and_copy_vec_at(int ncg, int *move,
4121 rvec *src, gmx_domdec_comm_t *comm,
4124 int m, icg, i, i0, i1, nrcg;
4130 for (m = 0; m < DIM*2; m++)
4136 for (icg = 0; icg < ncg; icg++)
4138 i1 = cgindex[icg+1];
4144 /* Compact the home array in place */
4145 for (i = i0; i < i1; i++)
4147 copy_rvec(src[i], src[home_pos++]);
4153 /* Copy to the communication buffer */
4155 pos_vec[m] += 1 + vec*nrcg;
4156 for (i = i0; i < i1; i++)
4158 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
4160 pos_vec[m] += (nvec - vec - 1)*nrcg;
4164 home_pos += i1 - i0;
4172 static int compact_and_copy_vec_cg(int ncg, int *move,
4174 int nvec, rvec *src, gmx_domdec_comm_t *comm,
4177 int m, icg, i0, i1, nrcg;
4183 for (m = 0; m < DIM*2; m++)
4189 for (icg = 0; icg < ncg; icg++)
4191 i1 = cgindex[icg+1];
4197 /* Compact the home array in place */
4198 copy_rvec(src[icg], src[home_pos++]);
4204 /* Copy to the communication buffer */
4205 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
4206 pos_vec[m] += 1 + nrcg*nvec;
4218 static int compact_ind(int ncg, int *move,
4219 int *index_gl, int *cgindex,
4221 gmx_ga2la_t ga2la, char *bLocalCG,
4224 int cg, nat, a0, a1, a, a_gl;
4229 for (cg = 0; cg < ncg; cg++)
4235 /* Compact the home arrays in place.
4236 * Anything that can be done here avoids access to global arrays.
4238 cgindex[home_pos] = nat;
4239 for (a = a0; a < a1; a++)
4242 gatindex[nat] = a_gl;
4243 /* The cell number stays 0, so we don't need to set it */
4244 ga2la_change_la(ga2la, a_gl, nat);
4247 index_gl[home_pos] = index_gl[cg];
4248 cginfo[home_pos] = cginfo[cg];
4249 /* The charge group remains local, so bLocalCG does not change */
4254 /* Clear the global indices */
4255 for (a = a0; a < a1; a++)
4257 ga2la_del(ga2la, gatindex[a]);
4261 bLocalCG[index_gl[cg]] = FALSE;
4265 cgindex[home_pos] = nat;
4270 static void clear_and_mark_ind(int ncg, int *move,
4271 int *index_gl, int *cgindex, int *gatindex,
4272 gmx_ga2la_t ga2la, char *bLocalCG,
4277 for (cg = 0; cg < ncg; cg++)
4283 /* Clear the global indices */
4284 for (a = a0; a < a1; a++)
4286 ga2la_del(ga2la, gatindex[a]);
4290 bLocalCG[index_gl[cg]] = FALSE;
4292 /* Signal that this cg has moved using the ns cell index.
4293 * Here we set it to -1. fill_grid will change it
4294 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4296 cell_index[cg] = -1;
4301 static void print_cg_move(FILE *fplog,
4303 gmx_int64_t step, int cg, int dim, int dir,
4304 gmx_bool bHaveLimitdAndCMOld, real limitd,
4305 rvec cm_old, rvec cm_new, real pos_d)
4307 gmx_domdec_comm_t *comm;
4312 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4313 if (bHaveLimitdAndCMOld)
4315 fprintf(fplog, "The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4316 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4320 fprintf(fplog, "The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4321 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4323 fprintf(fplog, "distance out of cell %f\n",
4324 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4325 if (bHaveLimitdAndCMOld)
4327 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4328 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4330 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4331 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4332 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4334 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4335 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4337 comm->cell_x0[dim], comm->cell_x1[dim]);
4340 static void cg_move_error(FILE *fplog,
4342 gmx_int64_t step, int cg, int dim, int dir,
4343 gmx_bool bHaveLimitdAndCMOld, real limitd,
4344 rvec cm_old, rvec cm_new, real pos_d)
4348 print_cg_move(fplog, dd, step, cg, dim, dir,
4349 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4351 print_cg_move(stderr, dd, step, cg, dim, dir,
4352 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4354 "A charge group moved too far between two domain decomposition steps\n"
4355 "This usually means that your system is not well equilibrated");
4358 static void rotate_state_atom(t_state *state, int a)
4362 for (est = 0; est < estNR; est++)
4364 if (EST_DISTR(est) && (state->flags & (1<<est)))
4369 /* Rotate the complete state; for a rectangular box only */
4370 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4371 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4374 state->v[a][YY] = -state->v[a][YY];
4375 state->v[a][ZZ] = -state->v[a][ZZ];
4378 state->sd_X[a][YY] = -state->sd_X[a][YY];
4379 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4382 state->cg_p[a][YY] = -state->cg_p[a][YY];
4383 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4385 case estDISRE_INITF:
4386 case estDISRE_RM3TAV:
4387 case estORIRE_INITF:
4389 /* These are distances, so not affected by rotation */
4392 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4398 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4400 if (natoms > comm->moved_nalloc)
4402 /* Contents should be preserved here */
4403 comm->moved_nalloc = over_alloc_dd(natoms);
4404 srenew(comm->moved, comm->moved_nalloc);
4410 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
4413 ivec tric_dir, matrix tcm,
4414 rvec cell_x0, rvec cell_x1,
4415 rvec limitd, rvec limit0, rvec limit1,
4417 int cg_start, int cg_end,
4422 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4423 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4427 real inv_ncg, pos_d;
4430 npbcdim = dd->npbcdim;
4432 for (cg = cg_start; cg < cg_end; cg++)
4439 copy_rvec(state->x[k0], cm_new);
4446 for (k = k0; (k < k1); k++)
4448 rvec_inc(cm_new, state->x[k]);
4450 for (d = 0; (d < DIM); d++)
4452 cm_new[d] = inv_ncg*cm_new[d];
4457 /* Do pbc and check DD cell boundary crossings */
4458 for (d = DIM-1; d >= 0; d--)
4462 bScrew = (dd->bScrewPBC && d == XX);
4463 /* Determine the location of this cg in lattice coordinates */
4467 for (d2 = d+1; d2 < DIM; d2++)
4469 pos_d += cm_new[d2]*tcm[d2][d];
4472 /* Put the charge group in the triclinic unit-cell */
4473 if (pos_d >= cell_x1[d])
4475 if (pos_d >= limit1[d])
4477 cg_move_error(fplog, dd, step, cg, d, 1, TRUE, limitd[d],
4478 cg_cm[cg], cm_new, pos_d);
4481 if (dd->ci[d] == dd->nc[d] - 1)
4483 rvec_dec(cm_new, state->box[d]);
4486 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4487 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4489 for (k = k0; (k < k1); k++)
4491 rvec_dec(state->x[k], state->box[d]);
4494 rotate_state_atom(state, k);
4499 else if (pos_d < cell_x0[d])
4501 if (pos_d < limit0[d])
4503 cg_move_error(fplog, dd, step, cg, d, -1, TRUE, limitd[d],
4504 cg_cm[cg], cm_new, pos_d);
4509 rvec_inc(cm_new, state->box[d]);
4512 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4513 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4515 for (k = k0; (k < k1); k++)
4517 rvec_inc(state->x[k], state->box[d]);
4520 rotate_state_atom(state, k);
4526 else if (d < npbcdim)
4528 /* Put the charge group in the rectangular unit-cell */
4529 while (cm_new[d] >= state->box[d][d])
4531 rvec_dec(cm_new, state->box[d]);
4532 for (k = k0; (k < k1); k++)
4534 rvec_dec(state->x[k], state->box[d]);
4537 while (cm_new[d] < 0)
4539 rvec_inc(cm_new, state->box[d]);
4540 for (k = k0; (k < k1); k++)
4542 rvec_inc(state->x[k], state->box[d]);
4548 copy_rvec(cm_new, cg_cm[cg]);
4550 /* Determine where this cg should go */
4553 for (d = 0; d < dd->ndim; d++)
4558 flag |= DD_FLAG_FW(d);
4564 else if (dev[dim] == -1)
4566 flag |= DD_FLAG_BW(d);
4569 if (dd->nc[dim] > 2)
4580 /* Temporarily store the flag in move */
4581 move[cg] = mc + flag;
4585 static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
4586 gmx_domdec_t *dd, ivec tric_dir,
4587 t_state *state, rvec **f,
4596 int ncg[DIM*2], nat[DIM*2];
4597 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4598 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4599 int sbuf[2], rbuf[2];
4600 int home_pos_cg, home_pos_at, buf_pos;
4602 gmx_bool bV = FALSE, bSDX = FALSE, bCGP = FALSE;
4605 real inv_ncg, pos_d;
4607 rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
4609 cginfo_mb_t *cginfo_mb;
4610 gmx_domdec_comm_t *comm;
4612 int nthread, thread;
4616 check_screw_box(state->box);
4620 if (fr->cutoff_scheme == ecutsGROUP)
4625 for (i = 0; i < estNR; i++)
4631 case estX: /* Always present */ break;
4632 case estV: bV = (state->flags & (1<<i)); break;
4633 case estSDX: bSDX = (state->flags & (1<<i)); break;
4634 case estCGP: bCGP = (state->flags & (1<<i)); break;
4637 case estDISRE_INITF:
4638 case estDISRE_RM3TAV:
4639 case estORIRE_INITF:
4641 /* No processing required */
4644 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4649 if (dd->ncg_tot > comm->nalloc_int)
4651 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4652 srenew(comm->buf_int, comm->nalloc_int);
4654 move = comm->buf_int;
4656 /* Clear the count */
4657 for (c = 0; c < dd->ndim*2; c++)
4663 npbcdim = dd->npbcdim;
4665 for (d = 0; (d < DIM); d++)
4667 limitd[d] = dd->comm->cellsize_min[d];
4668 if (d >= npbcdim && dd->ci[d] == 0)
4670 cell_x0[d] = -GMX_FLOAT_MAX;
4674 cell_x0[d] = comm->cell_x0[d];
4676 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4678 cell_x1[d] = GMX_FLOAT_MAX;
4682 cell_x1[d] = comm->cell_x1[d];
4686 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4687 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4691 /* We check after communication if a charge group moved
4692 * more than one cell. Set the pre-comm check limit to float_max.
4694 limit0[d] = -GMX_FLOAT_MAX;
4695 limit1[d] = GMX_FLOAT_MAX;
4699 make_tric_corr_matrix(npbcdim, state->box, tcm);
4701 cgindex = dd->cgindex;
4703 nthread = gmx_omp_nthreads_get(emntDomdec);
4705 /* Compute the center of geometry for all home charge groups
4706 * and put them in the box and determine where they should go.
4708 #pragma omp parallel for num_threads(nthread) schedule(static)
4709 for (thread = 0; thread < nthread; thread++)
4711 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4712 cell_x0, cell_x1, limitd, limit0, limit1,
4714 ( thread *dd->ncg_home)/nthread,
4715 ((thread+1)*dd->ncg_home)/nthread,
4716 fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
4720 for (cg = 0; cg < dd->ncg_home; cg++)
4725 flag = mc & ~DD_FLAG_NRCG;
4726 mc = mc & DD_FLAG_NRCG;
4729 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4731 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4732 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4734 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4735 /* We store the cg size in the lower 16 bits
4736 * and the place where the charge group should go
4737 * in the next 6 bits. This saves some communication volume.
4739 nrcg = cgindex[cg+1] - cgindex[cg];
4740 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4746 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4747 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4750 for (i = 0; i < dd->ndim*2; i++)
4752 *ncg_moved += ncg[i];
4769 /* Make sure the communication buffers are large enough */
4770 for (mc = 0; mc < dd->ndim*2; mc++)
4772 nvr = ncg[mc] + nat[mc]*nvec;
4773 if (nvr > comm->cgcm_state_nalloc[mc])
4775 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4776 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4780 switch (fr->cutoff_scheme)
4783 /* Recalculating cg_cm might be cheaper than communicating,
4784 * but that could give rise to rounding issues.
4787 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4788 nvec, cg_cm, comm, bCompact);
4791 /* Without charge groups we send the moved atom coordinates
4792 * over twice. This is so the code below can be used without
4793 * many conditionals for both for with and without charge groups.
4796 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4797 nvec, state->x, comm, FALSE);
4800 home_pos_cg -= *ncg_moved;
4804 gmx_incons("unimplemented");
4810 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4811 nvec, vec++, state->x, comm, bCompact);
4814 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4815 nvec, vec++, state->v, comm, bCompact);
4819 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4820 nvec, vec++, state->sd_X, comm, bCompact);
4824 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4825 nvec, vec++, state->cg_p, comm, bCompact);
4830 compact_ind(dd->ncg_home, move,
4831 dd->index_gl, dd->cgindex, dd->gatindex,
4832 dd->ga2la, comm->bLocalCG,
4837 if (fr->cutoff_scheme == ecutsVERLET)
4839 moved = get_moved(comm, dd->ncg_home);
4841 for (k = 0; k < dd->ncg_home; k++)
4848 moved = fr->ns.grid->cell_index;
4851 clear_and_mark_ind(dd->ncg_home, move,
4852 dd->index_gl, dd->cgindex, dd->gatindex,
4853 dd->ga2la, comm->bLocalCG,
4857 cginfo_mb = fr->cginfo_mb;
4859 *ncg_stay_home = home_pos_cg;
4860 for (d = 0; d < dd->ndim; d++)
4866 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4869 /* Communicate the cg and atom counts */
4874 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4875 d, dir, sbuf[0], sbuf[1]);
4877 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4879 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4881 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4882 srenew(comm->buf_int, comm->nalloc_int);
4885 /* Communicate the charge group indices, sizes and flags */
4886 dd_sendrecv_int(dd, d, dir,
4887 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4888 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4890 nvs = ncg[cdd] + nat[cdd]*nvec;
4891 i = rbuf[0] + rbuf[1] *nvec;
4892 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4894 /* Communicate cgcm and state */
4895 dd_sendrecv_rvec(dd, d, dir,
4896 comm->cgcm_state[cdd], nvs,
4897 comm->vbuf.v+nvr, i);
4898 ncg_recv += rbuf[0];
4899 nat_recv += rbuf[1];
4903 /* Process the received charge groups */
4905 for (cg = 0; cg < ncg_recv; cg++)
4907 flag = comm->buf_int[cg*DD_CGIBS+1];
4909 if (dim >= npbcdim && dd->nc[dim] > 2)
4911 /* No pbc in this dim and more than one domain boundary.
4912 * We do a separate check if a charge group didn't move too far.
4914 if (((flag & DD_FLAG_FW(d)) &&
4915 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4916 ((flag & DD_FLAG_BW(d)) &&
4917 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4919 cg_move_error(fplog, dd, step, cg, dim,
4920 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4922 comm->vbuf.v[buf_pos],
4923 comm->vbuf.v[buf_pos],
4924 comm->vbuf.v[buf_pos][dim]);
4931 /* Check which direction this cg should go */
4932 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4936 /* The cell boundaries for dimension d2 are not equal
4937 * for each cell row of the lower dimension(s),
4938 * therefore we might need to redetermine where
4939 * this cg should go.
4942 /* If this cg crosses the box boundary in dimension d2
4943 * we can use the communicated flag, so we do not
4944 * have to worry about pbc.
4946 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4947 (flag & DD_FLAG_FW(d2))) ||
4948 (dd->ci[dim2] == 0 &&
4949 (flag & DD_FLAG_BW(d2)))))
4951 /* Clear the two flags for this dimension */
4952 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4953 /* Determine the location of this cg
4954 * in lattice coordinates
4956 pos_d = comm->vbuf.v[buf_pos][dim2];
4959 for (d3 = dim2+1; d3 < DIM; d3++)
4962 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
4965 /* Check of we are not at the box edge.
4966 * pbc is only handled in the first step above,
4967 * but this check could move over pbc while
4968 * the first step did not due to different rounding.
4970 if (pos_d >= cell_x1[dim2] &&
4971 dd->ci[dim2] != dd->nc[dim2]-1)
4973 flag |= DD_FLAG_FW(d2);
4975 else if (pos_d < cell_x0[dim2] &&
4978 flag |= DD_FLAG_BW(d2);
4980 comm->buf_int[cg*DD_CGIBS+1] = flag;
4983 /* Set to which neighboring cell this cg should go */
4984 if (flag & DD_FLAG_FW(d2))
4988 else if (flag & DD_FLAG_BW(d2))
4990 if (dd->nc[dd->dim[d2]] > 2)
5002 nrcg = flag & DD_FLAG_NRCG;
5005 if (home_pos_cg+1 > dd->cg_nalloc)
5007 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
5008 srenew(dd->index_gl, dd->cg_nalloc);
5009 srenew(dd->cgindex, dd->cg_nalloc+1);
5011 /* Set the global charge group index and size */
5012 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
5013 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
5014 /* Copy the state from the buffer */
5015 dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
5016 if (fr->cutoff_scheme == ecutsGROUP)
5019 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
5023 /* Set the cginfo */
5024 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
5025 dd->index_gl[home_pos_cg]);
5028 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
5031 if (home_pos_at+nrcg > state->nalloc)
5033 dd_realloc_state(state, f, home_pos_at+nrcg);
5035 for (i = 0; i < nrcg; i++)
5037 copy_rvec(comm->vbuf.v[buf_pos++],
5038 state->x[home_pos_at+i]);
5042 for (i = 0; i < nrcg; i++)
5044 copy_rvec(comm->vbuf.v[buf_pos++],
5045 state->v[home_pos_at+i]);
5050 for (i = 0; i < nrcg; i++)
5052 copy_rvec(comm->vbuf.v[buf_pos++],
5053 state->sd_X[home_pos_at+i]);
5058 for (i = 0; i < nrcg; i++)
5060 copy_rvec(comm->vbuf.v[buf_pos++],
5061 state->cg_p[home_pos_at+i]);
5065 home_pos_at += nrcg;
5069 /* Reallocate the buffers if necessary */
5070 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
5072 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
5073 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
5075 nvr = ncg[mc] + nat[mc]*nvec;
5076 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
5078 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
5079 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
5081 /* Copy from the receive to the send buffers */
5082 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
5083 comm->buf_int + cg*DD_CGIBS,
5084 DD_CGIBS*sizeof(int));
5085 memcpy(comm->cgcm_state[mc][nvr],
5086 comm->vbuf.v[buf_pos],
5087 (1+nrcg*nvec)*sizeof(rvec));
5088 buf_pos += 1 + nrcg*nvec;
5095 /* With sorting (!bCompact) the indices are now only partially up to date
5096 * and ncg_home and nat_home are not the real count, since there are
5097 * "holes" in the arrays for the charge groups that moved to neighbors.
5099 if (fr->cutoff_scheme == ecutsVERLET)
5101 moved = get_moved(comm, home_pos_cg);
5103 for (i = dd->ncg_home; i < home_pos_cg; i++)
5108 dd->ncg_home = home_pos_cg;
5109 dd->nat_home = home_pos_at;
5114 "Finished repartitioning: cgs moved out %d, new home %d\n",
5115 *ncg_moved, dd->ncg_home-*ncg_moved);
5120 void dd_cycles_add(gmx_domdec_t *dd, float cycles, int ddCycl)
5122 dd->comm->cycl[ddCycl] += cycles;
5123 dd->comm->cycl_n[ddCycl]++;
5124 if (cycles > dd->comm->cycl_max[ddCycl])
5126 dd->comm->cycl_max[ddCycl] = cycles;
5130 static double force_flop_count(t_nrnb *nrnb)
5137 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
5139 /* To get closer to the real timings, we half the count
5140 * for the normal loops and again half it for water loops.
5143 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5145 sum += nrnb->n[i]*0.25*cost_nrnb(i);
5149 sum += nrnb->n[i]*0.50*cost_nrnb(i);
5152 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
5155 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5157 sum += nrnb->n[i]*cost_nrnb(i);
5160 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
5162 sum += nrnb->n[i]*cost_nrnb(i);
5168 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
5170 if (dd->comm->eFlop)
5172 dd->comm->flop -= force_flop_count(nrnb);
5175 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
5177 if (dd->comm->eFlop)
5179 dd->comm->flop += force_flop_count(nrnb);
5184 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
5188 for (i = 0; i < ddCyclNr; i++)
5190 dd->comm->cycl[i] = 0;
5191 dd->comm->cycl_n[i] = 0;
5192 dd->comm->cycl_max[i] = 0;
5195 dd->comm->flop_n = 0;
5198 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
5200 gmx_domdec_comm_t *comm;
5201 gmx_domdec_load_t *load;
5202 gmx_domdec_root_t *root = NULL;
5203 int d, dim, cid, i, pos;
5204 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
5209 fprintf(debug, "get_load_distribution start\n");
5212 wallcycle_start(wcycle, ewcDDCOMMLOAD);
5216 bSepPME = (dd->pme_nodeid >= 0);
5218 for (d = dd->ndim-1; d >= 0; d--)
5221 /* Check if we participate in the communication in this dimension */
5222 if (d == dd->ndim-1 ||
5223 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
5225 load = &comm->load[d];
5228 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
5231 if (d == dd->ndim-1)
5233 sbuf[pos++] = dd_force_load(comm);
5234 sbuf[pos++] = sbuf[0];
5237 sbuf[pos++] = sbuf[0];
5238 sbuf[pos++] = cell_frac;
5241 sbuf[pos++] = comm->cell_f_max0[d];
5242 sbuf[pos++] = comm->cell_f_min1[d];
5247 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
5248 sbuf[pos++] = comm->cycl[ddCyclPME];
5253 sbuf[pos++] = comm->load[d+1].sum;
5254 sbuf[pos++] = comm->load[d+1].max;
5257 sbuf[pos++] = comm->load[d+1].sum_m;
5258 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
5259 sbuf[pos++] = comm->load[d+1].flags;
5262 sbuf[pos++] = comm->cell_f_max0[d];
5263 sbuf[pos++] = comm->cell_f_min1[d];
5268 sbuf[pos++] = comm->load[d+1].mdf;
5269 sbuf[pos++] = comm->load[d+1].pme;
5273 /* Communicate a row in DD direction d.
5274 * The communicators are setup such that the root always has rank 0.
5277 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
5278 load->load, load->nload*sizeof(float), MPI_BYTE,
5279 0, comm->mpi_comm_load[d]);
5281 if (dd->ci[dim] == dd->master_ci[dim])
5283 /* We are the root, process this row */
5284 if (comm->bDynLoadBal)
5286 root = comm->root[d];
5296 for (i = 0; i < dd->nc[dim]; i++)
5298 load->sum += load->load[pos++];
5299 load->max = max(load->max, load->load[pos]);
5305 /* This direction could not be load balanced properly,
5306 * therefore we need to use the maximum iso the average load.
5308 load->sum_m = max(load->sum_m, load->load[pos]);
5312 load->sum_m += load->load[pos];
5315 load->cvol_min = min(load->cvol_min, load->load[pos]);
5319 load->flags = (int)(load->load[pos++] + 0.5);
5323 root->cell_f_max0[i] = load->load[pos++];
5324 root->cell_f_min1[i] = load->load[pos++];
5329 load->mdf = max(load->mdf, load->load[pos]);
5331 load->pme = max(load->pme, load->load[pos]);
5335 if (comm->bDynLoadBal && root->bLimited)
5337 load->sum_m *= dd->nc[dim];
5338 load->flags |= (1<<d);
5346 comm->nload += dd_load_count(comm);
5347 comm->load_step += comm->cycl[ddCyclStep];
5348 comm->load_sum += comm->load[0].sum;
5349 comm->load_max += comm->load[0].max;
5350 if (comm->bDynLoadBal)
5352 for (d = 0; d < dd->ndim; d++)
5354 if (comm->load[0].flags & (1<<d))
5356 comm->load_lim[d]++;
5362 comm->load_mdf += comm->load[0].mdf;
5363 comm->load_pme += comm->load[0].pme;
5367 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5371 fprintf(debug, "get_load_distribution finished\n");
5375 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5377 /* Return the relative performance loss on the total run time
5378 * due to the force calculation load imbalance.
5380 if (dd->comm->nload > 0)
5383 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5384 (dd->comm->load_step*dd->nnodes);
5392 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5395 int npp, npme, nnodes, d, limp;
5396 float imbal, pme_f_ratio, lossf, lossp = 0;
5398 gmx_domdec_comm_t *comm;
5401 if (DDMASTER(dd) && comm->nload > 0)
5404 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5405 nnodes = npp + npme;
5406 imbal = comm->load_max*npp/comm->load_sum - 1;
5407 lossf = dd_force_imb_perf_loss(dd);
5408 sprintf(buf, " Average load imbalance: %.1f %%\n", imbal*100);
5409 fprintf(fplog, "%s", buf);
5410 fprintf(stderr, "\n");
5411 fprintf(stderr, "%s", buf);
5412 sprintf(buf, " Part of the total run time spent waiting due to load imbalance: %.1f %%\n", lossf*100);
5413 fprintf(fplog, "%s", buf);
5414 fprintf(stderr, "%s", buf);
5416 if (comm->bDynLoadBal)
5418 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5419 for (d = 0; d < dd->ndim; d++)
5421 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5422 sprintf(buf+strlen(buf), " %c %d %%", dim2char(dd->dim[d]), limp);
5428 sprintf(buf+strlen(buf), "\n");
5429 fprintf(fplog, "%s", buf);
5430 fprintf(stderr, "%s", buf);
5434 pme_f_ratio = comm->load_pme/comm->load_mdf;
5435 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5438 lossp *= (float)npme/(float)nnodes;
5442 lossp *= (float)npp/(float)nnodes;
5444 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pme_f_ratio);
5445 fprintf(fplog, "%s", buf);
5446 fprintf(stderr, "%s", buf);
5447 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossp)*100);
5448 fprintf(fplog, "%s", buf);
5449 fprintf(stderr, "%s", buf);
5451 fprintf(fplog, "\n");
5452 fprintf(stderr, "\n");
5454 if (lossf >= DD_PERF_LOSS)
5457 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5458 " in the domain decomposition.\n", lossf*100);
5459 if (!comm->bDynLoadBal)
5461 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5465 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5467 fprintf(fplog, "%s\n", buf);
5468 fprintf(stderr, "%s\n", buf);
5470 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS)
5473 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5474 " had %s work to do than the PP nodes.\n"
5475 " You might want to %s the number of PME nodes\n"
5476 " or %s the cut-off and the grid spacing.\n",
5478 (lossp < 0) ? "less" : "more",
5479 (lossp < 0) ? "decrease" : "increase",
5480 (lossp < 0) ? "decrease" : "increase");
5481 fprintf(fplog, "%s\n", buf);
5482 fprintf(stderr, "%s\n", buf);
5487 static float dd_vol_min(gmx_domdec_t *dd)
5489 return dd->comm->load[0].cvol_min*dd->nnodes;
5492 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5494 return dd->comm->load[0].flags;
5497 static float dd_f_imbal(gmx_domdec_t *dd)
5499 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5502 float dd_pme_f_ratio(gmx_domdec_t *dd)
5504 if (dd->comm->cycl_n[ddCyclPME] > 0)
5506 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5514 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
5519 flags = dd_load_flags(dd);
5523 "DD load balancing is limited by minimum cell size in dimension");
5524 for (d = 0; d < dd->ndim; d++)
5528 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5531 fprintf(fplog, "\n");
5533 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5534 if (dd->comm->bDynLoadBal)
5536 fprintf(fplog, " vol min/aver %5.3f%c",
5537 dd_vol_min(dd), flags ? '!' : ' ');
5539 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5540 if (dd->comm->cycl_n[ddCyclPME])
5542 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5544 fprintf(fplog, "\n\n");
5547 static void dd_print_load_verbose(gmx_domdec_t *dd)
5549 if (dd->comm->bDynLoadBal)
5551 fprintf(stderr, "vol %4.2f%c ",
5552 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5554 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5555 if (dd->comm->cycl_n[ddCyclPME])
5557 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5562 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5567 gmx_domdec_root_t *root;
5568 gmx_bool bPartOfGroup = FALSE;
5570 dim = dd->dim[dim_ind];
5571 copy_ivec(loc, loc_c);
5572 for (i = 0; i < dd->nc[dim]; i++)
5575 rank = dd_index(dd->nc, loc_c);
5576 if (rank == dd->rank)
5578 /* This process is part of the group */
5579 bPartOfGroup = TRUE;
5582 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5586 dd->comm->mpi_comm_load[dim_ind] = c_row;
5587 if (dd->comm->eDLB != edlbNO)
5589 if (dd->ci[dim] == dd->master_ci[dim])
5591 /* This is the root process of this row */
5592 snew(dd->comm->root[dim_ind], 1);
5593 root = dd->comm->root[dim_ind];
5594 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5595 snew(root->old_cell_f, dd->nc[dim]+1);
5596 snew(root->bCellMin, dd->nc[dim]);
5599 snew(root->cell_f_max0, dd->nc[dim]);
5600 snew(root->cell_f_min1, dd->nc[dim]);
5601 snew(root->bound_min, dd->nc[dim]);
5602 snew(root->bound_max, dd->nc[dim]);
5604 snew(root->buf_ncd, dd->nc[dim]);
5608 /* This is not a root process, we only need to receive cell_f */
5609 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5612 if (dd->ci[dim] == dd->master_ci[dim])
5614 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5620 void dd_setup_dlb_resource_sharing(t_commrec gmx_unused *cr,
5621 const gmx_hw_info_t gmx_unused *hwinfo,
5622 const gmx_hw_opt_t gmx_unused *hw_opt)
5625 int physicalnode_id_hash;
5628 MPI_Comm mpi_comm_pp_physicalnode;
5630 if (!(cr->duty & DUTY_PP) ||
5631 hw_opt->gpu_opt.ncuda_dev_use == 0)
5633 /* Only PP nodes (currently) use GPUs.
5634 * If we don't have GPUs, there are no resources to share.
5639 physicalnode_id_hash = gmx_physicalnode_id_hash();
5641 gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
5647 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5648 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5649 dd->rank, physicalnode_id_hash, gpu_id);
5651 /* Split the PP communicator over the physical nodes */
5652 /* TODO: See if we should store this (before), as it's also used for
5653 * for the nodecomm summution.
5655 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5656 &mpi_comm_pp_physicalnode);
5657 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5658 &dd->comm->mpi_comm_gpu_shared);
5659 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5660 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5664 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5667 /* Note that some ranks could share a GPU, while others don't */
5669 if (dd->comm->nrank_gpu_shared == 1)
5671 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5676 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
5679 int dim0, dim1, i, j;
5684 fprintf(debug, "Making load communicators\n");
5687 snew(dd->comm->load, dd->ndim);
5688 snew(dd->comm->mpi_comm_load, dd->ndim);
5691 make_load_communicator(dd, 0, loc);
5695 for (i = 0; i < dd->nc[dim0]; i++)
5698 make_load_communicator(dd, 1, loc);
5704 for (i = 0; i < dd->nc[dim0]; i++)
5708 for (j = 0; j < dd->nc[dim1]; j++)
5711 make_load_communicator(dd, 2, loc);
5718 fprintf(debug, "Finished making load communicators\n");
5723 void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
5726 int d, dim, i, j, m;
5729 ivec dd_zp[DD_MAXIZONE];
5730 gmx_domdec_zones_t *zones;
5731 gmx_domdec_ns_ranges_t *izone;
5733 for (d = 0; d < dd->ndim; d++)
5736 copy_ivec(dd->ci, tmp);
5737 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5738 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5739 copy_ivec(dd->ci, tmp);
5740 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5741 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5744 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5747 dd->neighbor[d][1]);
5753 fprintf(fplog, "\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5755 dd->nc[XX], dd->nc[YY], dd->nc[ZZ],
5756 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5763 for (i = 0; i < nzonep; i++)
5765 copy_ivec(dd_zp3[i], dd_zp[i]);
5771 for (i = 0; i < nzonep; i++)
5773 copy_ivec(dd_zp2[i], dd_zp[i]);
5779 for (i = 0; i < nzonep; i++)
5781 copy_ivec(dd_zp1[i], dd_zp[i]);
5785 gmx_fatal(FARGS, "Can only do 1, 2 or 3D domain decomposition");
5790 zones = &dd->comm->zones;
5792 for (i = 0; i < nzone; i++)
5795 clear_ivec(zones->shift[i]);
5796 for (d = 0; d < dd->ndim; d++)
5798 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5803 for (i = 0; i < nzone; i++)
5805 for (d = 0; d < DIM; d++)
5807 s[d] = dd->ci[d] - zones->shift[i][d];
5812 else if (s[d] >= dd->nc[d])
5818 zones->nizone = nzonep;
5819 for (i = 0; i < zones->nizone; i++)
5821 if (dd_zp[i][0] != i)
5823 gmx_fatal(FARGS, "Internal inconsistency in the dd grid setup");
5825 izone = &zones->izone[i];
5826 izone->j0 = dd_zp[i][1];
5827 izone->j1 = dd_zp[i][2];
5828 for (dim = 0; dim < DIM; dim++)
5830 if (dd->nc[dim] == 1)
5832 /* All shifts should be allowed */
5833 izone->shift0[dim] = -1;
5834 izone->shift1[dim] = 1;
5839 izone->shift0[d] = 0;
5840 izone->shift1[d] = 0;
5841 for(j=izone->j0; j<izone->j1; j++) {
5842 if (dd->shift[j][d] > dd->shift[i][d])
5843 izone->shift0[d] = -1;
5844 if (dd->shift[j][d] < dd->shift[i][d])
5845 izone->shift1[d] = 1;
5851 /* Assume the shift are not more than 1 cell */
5852 izone->shift0[dim] = 1;
5853 izone->shift1[dim] = -1;
5854 for (j = izone->j0; j < izone->j1; j++)
5856 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5857 if (shift_diff < izone->shift0[dim])
5859 izone->shift0[dim] = shift_diff;
5861 if (shift_diff > izone->shift1[dim])
5863 izone->shift1[dim] = shift_diff;
5870 if (dd->comm->eDLB != edlbNO)
5872 snew(dd->comm->root, dd->ndim);
5875 if (dd->comm->bRecordLoad)
5877 make_load_communicators(dd);
5881 static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reorder)
5884 gmx_domdec_comm_t *comm;
5895 if (comm->bCartesianPP)
5897 /* Set up cartesian communication for the particle-particle part */
5900 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5901 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5904 for (i = 0; i < DIM; i++)
5908 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5910 /* We overwrite the old communicator with the new cartesian one */
5911 cr->mpi_comm_mygroup = comm_cart;
5914 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5915 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5917 if (comm->bCartesianPP_PME)
5919 /* Since we want to use the original cartesian setup for sim,
5920 * and not the one after split, we need to make an index.
5922 snew(comm->ddindex2ddnodeid, dd->nnodes);
5923 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
5924 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5925 /* Get the rank of the DD master,
5926 * above we made sure that the master node is a PP node.
5936 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
5938 else if (comm->bCartesianPP)
5940 if (cr->npmenodes == 0)
5942 /* The PP communicator is also
5943 * the communicator for this simulation
5945 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5947 cr->nodeid = dd->rank;
5949 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
5951 /* We need to make an index to go from the coordinates
5952 * to the nodeid of this simulation.
5954 snew(comm->ddindex2simnodeid, dd->nnodes);
5955 snew(buf, dd->nnodes);
5956 if (cr->duty & DUTY_PP)
5958 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5960 /* Communicate the ddindex to simulation nodeid index */
5961 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
5962 cr->mpi_comm_mysim);
5965 /* Determine the master coordinates and rank.
5966 * The DD master should be the same node as the master of this sim.
5968 for (i = 0; i < dd->nnodes; i++)
5970 if (comm->ddindex2simnodeid[i] == 0)
5972 ddindex2xyz(dd->nc, i, dd->master_ci);
5973 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
5978 fprintf(debug, "The master rank is %d\n", dd->masterrank);
5983 /* No Cartesian communicators */
5984 /* We use the rank in dd->comm->all as DD index */
5985 ddindex2xyz(dd->nc, dd->rank, dd->ci);
5986 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
5988 clear_ivec(dd->master_ci);
5995 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5996 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6001 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6002 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6006 static void receive_ddindex2simnodeid(t_commrec *cr)
6010 gmx_domdec_comm_t *comm;
6017 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
6019 snew(comm->ddindex2simnodeid, dd->nnodes);
6020 snew(buf, dd->nnodes);
6021 if (cr->duty & DUTY_PP)
6023 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
6026 /* Communicate the ddindex to simulation nodeid index */
6027 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6028 cr->mpi_comm_mysim);
6035 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
6036 int ncg, int natoms)
6038 gmx_domdec_master_t *ma;
6043 snew(ma->ncg, dd->nnodes);
6044 snew(ma->index, dd->nnodes+1);
6046 snew(ma->nat, dd->nnodes);
6047 snew(ma->ibuf, dd->nnodes*2);
6048 snew(ma->cell_x, DIM);
6049 for (i = 0; i < DIM; i++)
6051 snew(ma->cell_x[i], dd->nc[i]+1);
6054 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
6060 snew(ma->vbuf, natoms);
6066 static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_node_order,
6067 int gmx_unused reorder)
6070 gmx_domdec_comm_t *comm;
6081 if (comm->bCartesianPP)
6083 for (i = 1; i < DIM; i++)
6085 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
6087 if (bDiv[YY] || bDiv[ZZ])
6089 comm->bCartesianPP_PME = TRUE;
6090 /* If we have 2D PME decomposition, which is always in x+y,
6091 * we stack the PME only nodes in z.
6092 * Otherwise we choose the direction that provides the thinnest slab
6093 * of PME only nodes as this will have the least effect
6094 * on the PP communication.
6095 * But for the PME communication the opposite might be better.
6097 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
6099 dd->nc[YY] > dd->nc[ZZ]))
6101 comm->cartpmedim = ZZ;
6105 comm->cartpmedim = YY;
6107 comm->ntot[comm->cartpmedim]
6108 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
6112 fprintf(fplog, "#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
6114 "Will not use a Cartesian communicator for PP <-> PME\n\n");
6119 if (comm->bCartesianPP_PME)
6123 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
6126 for (i = 0; i < DIM; i++)
6130 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
6133 MPI_Comm_rank(comm_cart, &rank);
6134 if (MASTERNODE(cr) && rank != 0)
6136 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
6139 /* With this assigment we loose the link to the original communicator
6140 * which will usually be MPI_COMM_WORLD, unless have multisim.
6142 cr->mpi_comm_mysim = comm_cart;
6143 cr->sim_nodeid = rank;
6145 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
6149 fprintf(fplog, "Cartesian nodeid %d, coordinates %d %d %d\n\n",
6150 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6153 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
6157 if (cr->npmenodes == 0 ||
6158 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
6160 cr->duty = DUTY_PME;
6163 /* Split the sim communicator into PP and PME only nodes */
6164 MPI_Comm_split(cr->mpi_comm_mysim,
6166 dd_index(comm->ntot, dd->ci),
6167 &cr->mpi_comm_mygroup);
6171 switch (dd_node_order)
6176 fprintf(fplog, "Order of the nodes: PP first, PME last\n");
6179 case ddnoINTERLEAVE:
6180 /* Interleave the PP-only and PME-only nodes,
6181 * as on clusters with dual-core machines this will double
6182 * the communication bandwidth of the PME processes
6183 * and thus speed up the PP <-> PME and inter PME communication.
6187 fprintf(fplog, "Interleaving PP and PME nodes\n");
6189 comm->pmenodes = dd_pmenodes(cr);
6194 gmx_fatal(FARGS, "Unknown dd_node_order=%d", dd_node_order);
6197 if (dd_simnode2pmenode(cr, cr->sim_nodeid) == -1)
6199 cr->duty = DUTY_PME;
6206 /* Split the sim communicator into PP and PME only nodes */
6207 MPI_Comm_split(cr->mpi_comm_mysim,
6210 &cr->mpi_comm_mygroup);
6211 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
6217 fprintf(fplog, "This is a %s only node\n\n",
6218 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
6222 void make_dd_communicators(FILE *fplog, t_commrec *cr, int dd_node_order)
6225 gmx_domdec_comm_t *comm;
6231 copy_ivec(dd->nc, comm->ntot);
6233 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
6234 comm->bCartesianPP_PME = FALSE;
6236 /* Reorder the nodes by default. This might change the MPI ranks.
6237 * Real reordering is only supported on very few architectures,
6238 * Blue Gene is one of them.
6240 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
6242 if (cr->npmenodes > 0)
6244 /* Split the communicator into a PP and PME part */
6245 split_communicator(fplog, cr, dd_node_order, CartReorder);
6246 if (comm->bCartesianPP_PME)
6248 /* We (possibly) reordered the nodes in split_communicator,
6249 * so it is no longer required in make_pp_communicator.
6251 CartReorder = FALSE;
6256 /* All nodes do PP and PME */
6258 /* We do not require separate communicators */
6259 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
6263 if (cr->duty & DUTY_PP)
6265 /* Copy or make a new PP communicator */
6266 make_pp_communicator(fplog, cr, CartReorder);
6270 receive_ddindex2simnodeid(cr);
6273 if (!(cr->duty & DUTY_PME))
6275 /* Set up the commnuication to our PME node */
6276 dd->pme_nodeid = dd_simnode2pmenode(cr, cr->sim_nodeid);
6277 dd->pme_receive_vir_ener = receive_vir_ener(cr);
6280 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
6281 dd->pme_nodeid, dd->pme_receive_vir_ener);
6286 dd->pme_nodeid = -1;
6291 dd->ma = init_gmx_domdec_master_t(dd,
6293 comm->cgs_gl.index[comm->cgs_gl.nr]);
6297 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6299 real *slb_frac, tot;
6304 if (nc > 1 && size_string != NULL)
6308 fprintf(fplog, "Using static load balancing for the %s direction\n",
6313 for (i = 0; i < nc; i++)
6316 sscanf(size_string, "%lf%n", &dbl, &n);
6319 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6328 fprintf(fplog, "Relative cell sizes:");
6330 for (i = 0; i < nc; i++)
6335 fprintf(fplog, " %5.3f", slb_frac[i]);
6340 fprintf(fplog, "\n");
6347 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
6350 gmx_mtop_ilistloop_t iloop;
6354 iloop = gmx_mtop_ilistloop_init(mtop);
6355 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6357 for (ftype = 0; ftype < F_NRE; ftype++)
6359 if ((interaction_function[ftype].flags & IF_BOND) &&
6362 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6370 static int dd_getenv(FILE *fplog, const char *env_var, int def)
6376 val = getenv(env_var);
6379 if (sscanf(val, "%d", &nst) <= 0)
6385 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6393 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6397 fprintf(stderr, "\n%s\n", warn_string);
6401 fprintf(fplog, "\n%s\n", warn_string);
6405 static void check_dd_restrictions(t_commrec *cr, gmx_domdec_t *dd,
6406 t_inputrec *ir, FILE *fplog)
6408 if (ir->ePBC == epbcSCREW &&
6409 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6411 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6414 if (ir->ns_type == ensSIMPLE)
6416 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
6419 if (ir->nstlist == 0)
6421 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6424 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6426 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6430 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6435 r = ddbox->box_size[XX];
6436 for (di = 0; di < dd->ndim; di++)
6439 /* Check using the initial average cell size */
6440 r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6446 static int check_dlb_support(FILE *fplog, t_commrec *cr,
6447 const char *dlb_opt, gmx_bool bRecordLoad,
6448 unsigned long Flags, t_inputrec *ir)
6456 case 'a': eDLB = edlbAUTO; break;
6457 case 'n': eDLB = edlbNO; break;
6458 case 'y': eDLB = edlbYES; break;
6459 default: gmx_incons("Unknown dlb_opt");
6462 if (Flags & MD_RERUN)
6467 if (!EI_DYNAMICS(ir->eI))
6469 if (eDLB == edlbYES)
6471 sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
6472 dd_warning(cr, fplog, buf);
6480 dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6485 if (Flags & MD_REPRODUCIBLE)
6492 dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
6496 dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6499 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", eDLB);
6507 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6512 if (getenv("GMX_DD_ORDER_ZYX") != NULL)
6514 /* Decomposition order z,y,x */
6517 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6519 for (dim = DIM-1; dim >= 0; dim--)
6521 if (dd->nc[dim] > 1)
6523 dd->dim[dd->ndim++] = dim;
6529 /* Decomposition order x,y,z */
6530 for (dim = 0; dim < DIM; dim++)
6532 if (dd->nc[dim] > 1)
6534 dd->dim[dd->ndim++] = dim;
6540 static gmx_domdec_comm_t *init_dd_comm()
6542 gmx_domdec_comm_t *comm;
6546 snew(comm->cggl_flag, DIM*2);
6547 snew(comm->cgcm_state, DIM*2);
6548 for (i = 0; i < DIM*2; i++)
6550 comm->cggl_flag_nalloc[i] = 0;
6551 comm->cgcm_state_nalloc[i] = 0;
6554 comm->nalloc_int = 0;
6555 comm->buf_int = NULL;
6557 vec_rvec_init(&comm->vbuf);
6559 comm->n_load_have = 0;
6560 comm->n_load_collect = 0;
6562 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6564 comm->sum_nat[i] = 0;
6568 comm->load_step = 0;
6571 clear_ivec(comm->load_lim);
6578 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
6579 unsigned long Flags,
6581 real comm_distance_min, real rconstr,
6582 const char *dlb_opt, real dlb_scale,
6583 const char *sizex, const char *sizey, const char *sizez,
6584 gmx_mtop_t *mtop, t_inputrec *ir,
6585 matrix box, rvec *x,
6587 int *npme_x, int *npme_y)
6590 gmx_domdec_comm_t *comm;
6593 real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
6600 "\nInitializing Domain Decomposition on %d nodes\n", cr->nnodes);
6605 dd->comm = init_dd_comm();
6607 snew(comm->cggl_flag, DIM*2);
6608 snew(comm->cgcm_state, DIM*2);
6610 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6611 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6613 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
6614 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
6615 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
6616 recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
6617 comm->nstSortCG = dd_getenv(fplog, "GMX_DD_NST_SORT_CHARGE_GROUPS", 1);
6618 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
6619 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
6620 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
6622 dd->pme_recv_f_alloc = 0;
6623 dd->pme_recv_f_buf = NULL;
6625 if (dd->bSendRecv2 && fplog)
6627 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6633 fprintf(fplog, "Will load balance based on FLOP count\n");
6635 if (comm->eFlop > 1)
6637 srand(1+cr->nodeid);
6639 comm->bRecordLoad = TRUE;
6643 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6647 /* Initialize to GPU share count to 0, might change later */
6648 comm->nrank_gpu_shared = 0;
6650 comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
6652 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6655 fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
6657 dd->bGridJump = comm->bDynLoadBal;
6658 comm->bPMELoadBalDLBLimits = FALSE;
6660 if (comm->nstSortCG)
6664 if (comm->nstSortCG == 1)
6666 fprintf(fplog, "Will sort the charge groups at every domain (re)decomposition\n");
6670 fprintf(fplog, "Will sort the charge groups every %d steps\n",
6674 snew(comm->sort, 1);
6680 fprintf(fplog, "Will not sort the charge groups\n");
6684 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6686 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6687 if (comm->bInterCGBondeds)
6689 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6693 comm->bInterCGMultiBody = FALSE;
6696 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6697 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6699 if (ir->rlistlong == 0)
6701 /* Set the cut-off to some very large value,
6702 * so we don't need if statements everywhere in the code.
6703 * We use sqrt, since the cut-off is squared in some places.
6705 comm->cutoff = GMX_CUTOFF_INF;
6709 comm->cutoff = ir->rlistlong;
6711 comm->cutoff_mbody = 0;
6713 comm->cellsize_limit = 0;
6714 comm->bBondComm = FALSE;
6716 if (comm->bInterCGBondeds)
6718 if (comm_distance_min > 0)
6720 comm->cutoff_mbody = comm_distance_min;
6721 if (Flags & MD_DDBONDCOMM)
6723 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6727 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6729 r_bonded_limit = comm->cutoff_mbody;
6731 else if (ir->bPeriodicMols)
6733 /* Can not easily determine the required cut-off */
6734 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6735 comm->cutoff_mbody = comm->cutoff/2;
6736 r_bonded_limit = comm->cutoff_mbody;
6742 dd_bonded_cg_distance(fplog, mtop, ir, x, box,
6743 Flags & MD_DDBONDCHECK, &r_2b, &r_mb);
6745 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6746 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6748 /* We use an initial margin of 10% for the minimum cell size,
6749 * except when we are just below the non-bonded cut-off.
6751 if (Flags & MD_DDBONDCOMM)
6753 if (max(r_2b, r_mb) > comm->cutoff)
6755 r_bonded = max(r_2b, r_mb);
6756 r_bonded_limit = 1.1*r_bonded;
6757 comm->bBondComm = TRUE;
6762 r_bonded_limit = min(1.1*r_bonded, comm->cutoff);
6764 /* We determine cutoff_mbody later */
6768 /* No special bonded communication,
6769 * simply increase the DD cut-off.
6771 r_bonded_limit = 1.1*max(r_2b, r_mb);
6772 comm->cutoff_mbody = r_bonded_limit;
6773 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6776 comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit);
6780 "Minimum cell size due to bonded interactions: %.3f nm\n",
6781 comm->cellsize_limit);
6785 if (dd->bInterCGcons && rconstr <= 0)
6787 /* There is a cell size limit due to the constraints (P-LINCS) */
6788 rconstr = constr_r_max(fplog, mtop, ir);
6792 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6794 if (rconstr > comm->cellsize_limit)
6796 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6800 else if (rconstr > 0 && fplog)
6802 /* Here we do not check for dd->bInterCGcons,
6803 * because one can also set a cell size limit for virtual sites only
6804 * and at this point we don't know yet if there are intercg v-sites.
6807 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6810 comm->cellsize_limit = max(comm->cellsize_limit, rconstr);
6812 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6816 copy_ivec(nc, dd->nc);
6817 set_dd_dim(fplog, dd);
6818 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, x, ddbox);
6820 if (cr->npmenodes == -1)
6824 acs = average_cellsize_min(dd, ddbox);
6825 if (acs < comm->cellsize_limit)
6829 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6831 gmx_fatal_collective(FARGS, cr, NULL,
6832 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6833 acs, comm->cellsize_limit);
6838 set_ddbox_cr(cr, NULL, ir, box, &comm->cgs_gl, x, ddbox);
6840 /* We need to choose the optimal DD grid and possibly PME nodes */
6841 limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6842 comm->eDLB != edlbNO, dlb_scale,
6843 comm->cellsize_limit, comm->cutoff,
6844 comm->bInterCGBondeds);
6846 if (dd->nc[XX] == 0)
6848 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6849 sprintf(buf, "Change the number of nodes or mdrun option %s%s%s",
6850 !bC ? "-rdd" : "-rcon",
6851 comm->eDLB != edlbNO ? " or -dds" : "",
6852 bC ? " or your LINCS settings" : "");
6854 gmx_fatal_collective(FARGS, cr, NULL,
6855 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6857 "Look in the log file for details on the domain decomposition",
6858 cr->nnodes-cr->npmenodes, limit, buf);
6860 set_dd_dim(fplog, dd);
6866 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6867 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6870 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6871 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6873 gmx_fatal_collective(FARGS, cr, NULL,
6874 "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6875 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6877 if (cr->npmenodes > dd->nnodes)
6879 gmx_fatal_collective(FARGS, cr, NULL,
6880 "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6882 if (cr->npmenodes > 0)
6884 comm->npmenodes = cr->npmenodes;
6888 comm->npmenodes = dd->nnodes;
6891 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
6893 /* The following choices should match those
6894 * in comm_cost_est in domdec_setup.c.
6895 * Note that here the checks have to take into account
6896 * that the decomposition might occur in a different order than xyz
6897 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6898 * in which case they will not match those in comm_cost_est,
6899 * but since that is mainly for testing purposes that's fine.
6901 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6902 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6903 getenv("GMX_PMEONEDD") == NULL)
6905 comm->npmedecompdim = 2;
6906 comm->npmenodes_x = dd->nc[XX];
6907 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6911 /* In case nc is 1 in both x and y we could still choose to
6912 * decompose pme in y instead of x, but we use x for simplicity.
6914 comm->npmedecompdim = 1;
6915 if (dd->dim[0] == YY)
6917 comm->npmenodes_x = 1;
6918 comm->npmenodes_y = comm->npmenodes;
6922 comm->npmenodes_x = comm->npmenodes;
6923 comm->npmenodes_y = 1;
6928 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6929 comm->npmenodes_x, comm->npmenodes_y, 1);
6934 comm->npmedecompdim = 0;
6935 comm->npmenodes_x = 0;
6936 comm->npmenodes_y = 0;
6939 /* Technically we don't need both of these,
6940 * but it simplifies code not having to recalculate it.
6942 *npme_x = comm->npmenodes_x;
6943 *npme_y = comm->npmenodes_y;
6945 snew(comm->slb_frac, DIM);
6946 if (comm->eDLB == edlbNO)
6948 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
6949 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
6950 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], sizez);
6953 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6955 if (comm->bBondComm || comm->eDLB != edlbNO)
6957 /* Set the bonded communication distance to halfway
6958 * the minimum and the maximum,
6959 * since the extra communication cost is nearly zero.
6961 acs = average_cellsize_min(dd, ddbox);
6962 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6963 if (comm->eDLB != edlbNO)
6965 /* Check if this does not limit the scaling */
6966 comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs);
6968 if (!comm->bBondComm)
6970 /* Without bBondComm do not go beyond the n.b. cut-off */
6971 comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff);
6972 if (comm->cellsize_limit >= comm->cutoff)
6974 /* We don't loose a lot of efficieny
6975 * when increasing it to the n.b. cut-off.
6976 * It can even be slightly faster, because we need
6977 * less checks for the communication setup.
6979 comm->cutoff_mbody = comm->cutoff;
6982 /* Check if we did not end up below our original limit */
6983 comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit);
6985 if (comm->cutoff_mbody > comm->cellsize_limit)
6987 comm->cellsize_limit = comm->cutoff_mbody;
6990 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
6995 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
6996 "cellsize limit %f\n",
6997 comm->bBondComm, comm->cellsize_limit);
7002 check_dd_restrictions(cr, dd, ir, fplog);
7005 comm->partition_step = INT_MIN;
7008 clear_dd_cycle_counts(dd);
7013 static void set_dlb_limits(gmx_domdec_t *dd)
7018 for (d = 0; d < dd->ndim; d++)
7020 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
7021 dd->comm->cellsize_min[dd->dim[d]] =
7022 dd->comm->cellsize_min_dlb[dd->dim[d]];
7027 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
7030 gmx_domdec_comm_t *comm;
7040 fprintf(fplog, "At step %s the performance loss due to force load imbalance is %.1f %%\n", gmx_step_str(step, buf), dd_force_imb_perf_loss(dd)*100);
7043 cellsize_min = comm->cellsize_min[dd->dim[0]];
7044 for (d = 1; d < dd->ndim; d++)
7046 cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
7049 if (cellsize_min < comm->cellsize_limit*1.05)
7051 dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
7053 /* Change DLB from "auto" to "no". */
7054 comm->eDLB = edlbNO;
7059 dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
7060 comm->bDynLoadBal = TRUE;
7061 dd->bGridJump = TRUE;
7065 /* We can set the required cell size info here,
7066 * so we do not need to communicate this.
7067 * The grid is completely uniform.
7069 for (d = 0; d < dd->ndim; d++)
7073 comm->load[d].sum_m = comm->load[d].sum;
7075 nc = dd->nc[dd->dim[d]];
7076 for (i = 0; i < nc; i++)
7078 comm->root[d]->cell_f[i] = i/(real)nc;
7081 comm->root[d]->cell_f_max0[i] = i /(real)nc;
7082 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
7085 comm->root[d]->cell_f[nc] = 1.0;
7090 static char *init_bLocalCG(gmx_mtop_t *mtop)
7095 ncg = ncg_mtop(mtop);
7096 snew(bLocalCG, ncg);
7097 for (cg = 0; cg < ncg; cg++)
7099 bLocalCG[cg] = FALSE;
7105 void dd_init_bondeds(FILE *fplog,
7106 gmx_domdec_t *dd, gmx_mtop_t *mtop,
7108 t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
7110 gmx_domdec_comm_t *comm;
7114 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
7118 if (comm->bBondComm)
7120 /* Communicate atoms beyond the cut-off for bonded interactions */
7123 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
7125 comm->bLocalCG = init_bLocalCG(mtop);
7129 /* Only communicate atoms based on cut-off */
7130 comm->cglink = NULL;
7131 comm->bLocalCG = NULL;
7135 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
7137 gmx_bool bDynLoadBal, real dlb_scale,
7140 gmx_domdec_comm_t *comm;
7155 fprintf(fplog, "The maximum number of communication pulses is:");
7156 for (d = 0; d < dd->ndim; d++)
7158 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
7160 fprintf(fplog, "\n");
7161 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
7162 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
7163 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
7164 for (d = 0; d < DIM; d++)
7168 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
7175 comm->cellsize_min_dlb[d]/
7176 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
7178 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
7181 fprintf(fplog, "\n");
7185 set_dd_cell_sizes_slb(dd, ddbox, FALSE, np);
7186 fprintf(fplog, "The initial number of communication pulses is:");
7187 for (d = 0; d < dd->ndim; d++)
7189 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
7191 fprintf(fplog, "\n");
7192 fprintf(fplog, "The initial domain decomposition cell size is:");
7193 for (d = 0; d < DIM; d++)
7197 fprintf(fplog, " %c %.2f nm",
7198 dim2char(d), dd->comm->cellsize_min[d]);
7201 fprintf(fplog, "\n\n");
7204 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
7206 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
7207 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7208 "non-bonded interactions", "", comm->cutoff);
7212 limit = dd->comm->cellsize_limit;
7216 if (dynamic_dd_box(ddbox, ir))
7218 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
7220 limit = dd->comm->cellsize_min[XX];
7221 for (d = 1; d < DIM; d++)
7223 limit = min(limit, dd->comm->cellsize_min[d]);
7227 if (comm->bInterCGBondeds)
7229 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7230 "two-body bonded interactions", "(-rdd)",
7231 max(comm->cutoff, comm->cutoff_mbody));
7232 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7233 "multi-body bonded interactions", "(-rdd)",
7234 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit));
7238 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7239 "virtual site constructions", "(-rcon)", limit);
7241 if (dd->constraint_comm)
7243 sprintf(buf, "atoms separated by up to %d constraints",
7245 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7246 buf, "(-rcon)", limit);
7248 fprintf(fplog, "\n");
7254 static void set_cell_limits_dlb(gmx_domdec_t *dd,
7256 const t_inputrec *ir,
7257 const gmx_ddbox_t *ddbox)
7259 gmx_domdec_comm_t *comm;
7260 int d, dim, npulse, npulse_d_max, npulse_d;
7265 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7267 /* Determine the maximum number of comm. pulses in one dimension */
7269 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7271 /* Determine the maximum required number of grid pulses */
7272 if (comm->cellsize_limit >= comm->cutoff)
7274 /* Only a single pulse is required */
7277 else if (!bNoCutOff && comm->cellsize_limit > 0)
7279 /* We round down slightly here to avoid overhead due to the latency
7280 * of extra communication calls when the cut-off
7281 * would be only slightly longer than the cell size.
7282 * Later cellsize_limit is redetermined,
7283 * so we can not miss interactions due to this rounding.
7285 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7289 /* There is no cell size limit */
7290 npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7293 if (!bNoCutOff && npulse > 1)
7295 /* See if we can do with less pulses, based on dlb_scale */
7297 for (d = 0; d < dd->ndim; d++)
7300 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7301 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7302 npulse_d_max = max(npulse_d_max, npulse_d);
7304 npulse = min(npulse, npulse_d_max);
7307 /* This env var can override npulse */
7308 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
7315 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7316 for (d = 0; d < dd->ndim; d++)
7318 comm->cd[d].np_dlb = min(npulse, dd->nc[dd->dim[d]]-1);
7319 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7320 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7321 comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb);
7322 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7324 comm->bVacDLBNoLimit = FALSE;
7328 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7329 if (!comm->bVacDLBNoLimit)
7331 comm->cellsize_limit = max(comm->cellsize_limit,
7332 comm->cutoff/comm->maxpulse);
7334 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7335 /* Set the minimum cell size for each DD dimension */
7336 for (d = 0; d < dd->ndim; d++)
7338 if (comm->bVacDLBNoLimit ||
7339 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7341 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7345 comm->cellsize_min_dlb[dd->dim[d]] =
7346 comm->cutoff/comm->cd[d].np_dlb;
7349 if (comm->cutoff_mbody <= 0)
7351 comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit);
7353 if (comm->bDynLoadBal)
7359 gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd, int ePBC)
7361 /* If each molecule is a single charge group
7362 * or we use domain decomposition for each periodic dimension,
7363 * we do not need to take pbc into account for the bonded interactions.
7365 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7368 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7371 void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7372 t_inputrec *ir, gmx_ddbox_t *ddbox)
7374 gmx_domdec_comm_t *comm;
7380 /* Initialize the thread data.
7381 * This can not be done in init_domain_decomposition,
7382 * as the numbers of threads is determined later.
7384 comm->nth = gmx_omp_nthreads_get(emntDomdec);
7387 snew(comm->dth, comm->nth);
7390 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
7392 init_ddpme(dd, &comm->ddpme[0], 0);
7393 if (comm->npmedecompdim >= 2)
7395 init_ddpme(dd, &comm->ddpme[1], 1);
7400 comm->npmenodes = 0;
7401 if (dd->pme_nodeid >= 0)
7403 gmx_fatal_collective(FARGS, NULL, dd,
7404 "Can not have separate PME nodes without PME electrostatics");
7410 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7412 if (comm->eDLB != edlbNO)
7414 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7417 print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
7418 if (comm->eDLB == edlbAUTO)
7422 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7424 print_dd_settings(fplog, dd, ir, TRUE, dlb_scale, ddbox);
7427 if (ir->ePBC == epbcNONE)
7429 vol_frac = 1 - 1/(double)dd->nnodes;
7434 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7438 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7440 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7442 dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
7445 static gmx_bool test_dd_cutoff(t_commrec *cr,
7446 t_state *state, t_inputrec *ir,
7457 set_ddbox(dd, FALSE, cr, ir, state->box,
7458 TRUE, &dd->comm->cgs_gl, state->x, &ddbox);
7462 for (d = 0; d < dd->ndim; d++)
7466 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7467 if (dynamic_dd_box(&ddbox, ir))
7469 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7472 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7474 if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
7475 dd->comm->cd[d].np_dlb > 0)
7477 if (np > dd->comm->cd[d].np_dlb)
7482 /* If a current local cell size is smaller than the requested
7483 * cut-off, we could still fix it, but this gets very complicated.
7484 * Without fixing here, we might actually need more checks.
7486 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7493 if (dd->comm->eDLB != edlbNO)
7495 /* If DLB is not active yet, we don't need to check the grid jumps.
7496 * Actually we shouldn't, because then the grid jump data is not set.
7498 if (dd->comm->bDynLoadBal &&
7499 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7504 gmx_sumi(1, &LocallyLimited, cr);
7506 if (LocallyLimited > 0)
7515 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, t_inputrec *ir,
7518 gmx_bool bCutoffAllowed;
7520 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7524 cr->dd->comm->cutoff = cutoff_req;
7527 return bCutoffAllowed;
7530 void change_dd_dlb_cutoff_limit(t_commrec *cr)
7532 gmx_domdec_comm_t *comm;
7534 comm = cr->dd->comm;
7536 /* Turn on the DLB limiting (might have been on already) */
7537 comm->bPMELoadBalDLBLimits = TRUE;
7539 /* Change the cut-off limit */
7540 comm->PMELoadBal_max_cutoff = comm->cutoff;
7543 static void merge_cg_buffers(int ncell,
7544 gmx_domdec_comm_dim_t *cd, int pulse,
7546 int *index_gl, int *recv_i,
7547 rvec *cg_cm, rvec *recv_vr,
7549 cginfo_mb_t *cginfo_mb, int *cginfo)
7551 gmx_domdec_ind_t *ind, *ind_p;
7552 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7553 int shift, shift_at;
7555 ind = &cd->ind[pulse];
7557 /* First correct the already stored data */
7558 shift = ind->nrecv[ncell];
7559 for (cell = ncell-1; cell >= 0; cell--)
7561 shift -= ind->nrecv[cell];
7564 /* Move the cg's present from previous grid pulses */
7565 cg0 = ncg_cell[ncell+cell];
7566 cg1 = ncg_cell[ncell+cell+1];
7567 cgindex[cg1+shift] = cgindex[cg1];
7568 for (cg = cg1-1; cg >= cg0; cg--)
7570 index_gl[cg+shift] = index_gl[cg];
7571 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7572 cgindex[cg+shift] = cgindex[cg];
7573 cginfo[cg+shift] = cginfo[cg];
7575 /* Correct the already stored send indices for the shift */
7576 for (p = 1; p <= pulse; p++)
7578 ind_p = &cd->ind[p];
7580 for (c = 0; c < cell; c++)
7582 cg0 += ind_p->nsend[c];
7584 cg1 = cg0 + ind_p->nsend[cell];
7585 for (cg = cg0; cg < cg1; cg++)
7587 ind_p->index[cg] += shift;
7593 /* Merge in the communicated buffers */
7597 for (cell = 0; cell < ncell; cell++)
7599 cg1 = ncg_cell[ncell+cell+1] + shift;
7602 /* Correct the old cg indices */
7603 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7605 cgindex[cg+1] += shift_at;
7608 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7610 /* Copy this charge group from the buffer */
7611 index_gl[cg1] = recv_i[cg0];
7612 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7613 /* Add it to the cgindex */
7614 cg_gl = index_gl[cg1];
7615 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7616 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7617 cgindex[cg1+1] = cgindex[cg1] + nat;
7622 shift += ind->nrecv[cell];
7623 ncg_cell[ncell+cell+1] = cg1;
7627 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7628 int nzone, int cg0, const int *cgindex)
7632 /* Store the atom block boundaries for easy copying of communication buffers
7635 for (zone = 0; zone < nzone; zone++)
7637 for (p = 0; p < cd->np; p++)
7639 cd->ind[p].cell2at0[zone] = cgindex[cg];
7640 cg += cd->ind[p].nrecv[zone];
7641 cd->ind[p].cell2at1[zone] = cgindex[cg];
7646 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7652 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7654 if (!bLocalCG[link->a[i]])
7663 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7665 real c[DIM][4]; /* the corners for the non-bonded communication */
7666 real cr0; /* corner for rounding */
7667 real cr1[4]; /* corners for rounding */
7668 real bc[DIM]; /* corners for bounded communication */
7669 real bcr1; /* corner for rounding for bonded communication */
7672 /* Determine the corners of the domain(s) we are communicating with */
7674 set_dd_corners(const gmx_domdec_t *dd,
7675 int dim0, int dim1, int dim2,
7679 const gmx_domdec_comm_t *comm;
7680 const gmx_domdec_zones_t *zones;
7685 zones = &comm->zones;
7687 /* Keep the compiler happy */
7691 /* The first dimension is equal for all cells */
7692 c->c[0][0] = comm->cell_x0[dim0];
7695 c->bc[0] = c->c[0][0];
7700 /* This cell row is only seen from the first row */
7701 c->c[1][0] = comm->cell_x0[dim1];
7702 /* All rows can see this row */
7703 c->c[1][1] = comm->cell_x0[dim1];
7706 c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7709 /* For the multi-body distance we need the maximum */
7710 c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7713 /* Set the upper-right corner for rounding */
7714 c->cr0 = comm->cell_x1[dim0];
7719 for (j = 0; j < 4; j++)
7721 c->c[2][j] = comm->cell_x0[dim2];
7725 /* Use the maximum of the i-cells that see a j-cell */
7726 for (i = 0; i < zones->nizone; i++)
7728 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7734 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7740 /* For the multi-body distance we need the maximum */
7741 c->bc[2] = comm->cell_x0[dim2];
7742 for (i = 0; i < 2; i++)
7744 for (j = 0; j < 2; j++)
7746 c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0);
7752 /* Set the upper-right corner for rounding */
7753 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7754 * Only cell (0,0,0) can see cell 7 (1,1,1)
7756 c->cr1[0] = comm->cell_x1[dim1];
7757 c->cr1[3] = comm->cell_x1[dim1];
7760 c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7763 /* For the multi-body distance we need the maximum */
7764 c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7771 /* Determine which cg's we need to send in this pulse from this zone */
7773 get_zone_pulse_cgs(gmx_domdec_t *dd,
7774 int zonei, int zone,
7776 const int *index_gl,
7778 int dim, int dim_ind,
7779 int dim0, int dim1, int dim2,
7780 real r_comm2, real r_bcomm2,
7784 real skew_fac2_d, real skew_fac_01,
7785 rvec *v_d, rvec *v_0, rvec *v_1,
7786 const dd_corners_t *c,
7788 gmx_bool bDistBonded,
7794 gmx_domdec_ind_t *ind,
7795 int **ibuf, int *ibuf_nalloc,
7801 gmx_domdec_comm_t *comm;
7803 gmx_bool bDistMB_pulse;
7805 real r2, rb2, r, tric_sh;
7808 int nsend_z, nsend, nat;
7812 bScrew = (dd->bScrewPBC && dim == XX);
7814 bDistMB_pulse = (bDistMB && bDistBonded);
7820 for (cg = cg0; cg < cg1; cg++)
7824 if (tric_dist[dim_ind] == 0)
7826 /* Rectangular direction, easy */
7827 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7834 r = cg_cm[cg][dim] - c->bc[dim_ind];
7840 /* Rounding gives at most a 16% reduction
7841 * in communicated atoms
7843 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7845 r = cg_cm[cg][dim0] - c->cr0;
7846 /* This is the first dimension, so always r >= 0 */
7853 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7855 r = cg_cm[cg][dim1] - c->cr1[zone];
7862 r = cg_cm[cg][dim1] - c->bcr1;
7872 /* Triclinic direction, more complicated */
7875 /* Rounding, conservative as the skew_fac multiplication
7876 * will slightly underestimate the distance.
7878 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7880 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7881 for (i = dim0+1; i < DIM; i++)
7883 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7885 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7888 rb[dim0] = rn[dim0];
7891 /* Take care that the cell planes along dim0 might not
7892 * be orthogonal to those along dim1 and dim2.
7894 for (i = 1; i <= dim_ind; i++)
7897 if (normal[dim0][dimd] > 0)
7899 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7902 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7907 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7909 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7911 for (i = dim1+1; i < DIM; i++)
7913 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7915 rn[dim1] += tric_sh;
7918 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7919 /* Take care of coupling of the distances
7920 * to the planes along dim0 and dim1 through dim2.
7922 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7923 /* Take care that the cell planes along dim1
7924 * might not be orthogonal to that along dim2.
7926 if (normal[dim1][dim2] > 0)
7928 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7934 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7937 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7938 /* Take care of coupling of the distances
7939 * to the planes along dim0 and dim1 through dim2.
7941 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7942 /* Take care that the cell planes along dim1
7943 * might not be orthogonal to that along dim2.
7945 if (normal[dim1][dim2] > 0)
7947 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7952 /* The distance along the communication direction */
7953 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
7955 for (i = dim+1; i < DIM; i++)
7957 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7962 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7963 /* Take care of coupling of the distances
7964 * to the planes along dim0 and dim1 through dim2.
7966 if (dim_ind == 1 && zonei == 1)
7968 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
7974 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
7977 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
7978 /* Take care of coupling of the distances
7979 * to the planes along dim0 and dim1 through dim2.
7981 if (dim_ind == 1 && zonei == 1)
7983 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
7991 ((bDistMB && rb2 < r_bcomm2) ||
7992 (bDist2B && r2 < r_bcomm2)) &&
7994 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
7995 missing_link(comm->cglink, index_gl[cg],
7998 /* Make an index to the local charge groups */
7999 if (nsend+1 > ind->nalloc)
8001 ind->nalloc = over_alloc_large(nsend+1);
8002 srenew(ind->index, ind->nalloc);
8004 if (nsend+1 > *ibuf_nalloc)
8006 *ibuf_nalloc = over_alloc_large(nsend+1);
8007 srenew(*ibuf, *ibuf_nalloc);
8009 ind->index[nsend] = cg;
8010 (*ibuf)[nsend] = index_gl[cg];
8012 vec_rvec_check_alloc(vbuf, nsend+1);
8014 if (dd->ci[dim] == 0)
8016 /* Correct cg_cm for pbc */
8017 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
8020 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
8021 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
8026 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
8029 nat += cgindex[cg+1] - cgindex[cg];
8035 *nsend_z_ptr = nsend_z;
8038 static void setup_dd_communication(gmx_domdec_t *dd,
8039 matrix box, gmx_ddbox_t *ddbox,
8040 t_forcerec *fr, t_state *state, rvec **f)
8042 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
8043 int nzone, nzone_send, zone, zonei, cg0, cg1;
8044 int c, i, j, cg, cg_gl, nrcg;
8045 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
8046 gmx_domdec_comm_t *comm;
8047 gmx_domdec_zones_t *zones;
8048 gmx_domdec_comm_dim_t *cd;
8049 gmx_domdec_ind_t *ind;
8050 cginfo_mb_t *cginfo_mb;
8051 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
8052 real r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
8053 dd_corners_t corners;
8055 rvec *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
8056 real skew_fac2_d, skew_fac_01;
8063 fprintf(debug, "Setting up DD communication\n");
8068 switch (fr->cutoff_scheme)
8077 gmx_incons("unimplemented");
8081 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8083 dim = dd->dim[dim_ind];
8085 /* Check if we need to use triclinic distances */
8086 tric_dist[dim_ind] = 0;
8087 for (i = 0; i <= dim_ind; i++)
8089 if (ddbox->tric_dir[dd->dim[i]])
8091 tric_dist[dim_ind] = 1;
8096 bBondComm = comm->bBondComm;
8098 /* Do we need to determine extra distances for multi-body bondeds? */
8099 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8101 /* Do we need to determine extra distances for only two-body bondeds? */
8102 bDist2B = (bBondComm && !bDistMB);
8104 r_comm2 = sqr(comm->cutoff);
8105 r_bcomm2 = sqr(comm->cutoff_mbody);
8109 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, sqrt(r_bcomm2));
8112 zones = &comm->zones;
8115 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8116 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8118 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8120 /* Triclinic stuff */
8121 normal = ddbox->normal;
8125 v_0 = ddbox->v[dim0];
8126 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8128 /* Determine the coupling coefficient for the distances
8129 * to the cell planes along dim0 and dim1 through dim2.
8130 * This is required for correct rounding.
8133 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8136 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8142 v_1 = ddbox->v[dim1];
8145 zone_cg_range = zones->cg_range;
8146 index_gl = dd->index_gl;
8147 cgindex = dd->cgindex;
8148 cginfo_mb = fr->cginfo_mb;
8150 zone_cg_range[0] = 0;
8151 zone_cg_range[1] = dd->ncg_home;
8152 comm->zone_ncg1[0] = dd->ncg_home;
8153 pos_cg = dd->ncg_home;
8155 nat_tot = dd->nat_home;
8157 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8159 dim = dd->dim[dim_ind];
8160 cd = &comm->cd[dim_ind];
8162 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8164 /* No pbc in this dimension, the first node should not comm. */
8172 v_d = ddbox->v[dim];
8173 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
8175 cd->bInPlace = TRUE;
8176 for (p = 0; p < cd->np; p++)
8178 /* Only atoms communicated in the first pulse are used
8179 * for multi-body bonded interactions or for bBondComm.
8181 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8186 for (zone = 0; zone < nzone_send; zone++)
8188 if (tric_dist[dim_ind] && dim_ind > 0)
8190 /* Determine slightly more optimized skew_fac's
8192 * This reduces the number of communicated atoms
8193 * by about 10% for 3D DD of rhombic dodecahedra.
8195 for (dimd = 0; dimd < dim; dimd++)
8197 sf2_round[dimd] = 1;
8198 if (ddbox->tric_dir[dimd])
8200 for (i = dd->dim[dimd]+1; i < DIM; i++)
8202 /* If we are shifted in dimension i
8203 * and the cell plane is tilted forward
8204 * in dimension i, skip this coupling.
8206 if (!(zones->shift[nzone+zone][i] &&
8207 ddbox->v[dimd][i][dimd] >= 0))
8210 sqr(ddbox->v[dimd][i][dimd]);
8213 sf2_round[dimd] = 1/sf2_round[dimd];
8218 zonei = zone_perm[dim_ind][zone];
8221 /* Here we permutate the zones to obtain a convenient order
8222 * for neighbor searching
8224 cg0 = zone_cg_range[zonei];
8225 cg1 = zone_cg_range[zonei+1];
8229 /* Look only at the cg's received in the previous grid pulse
8231 cg1 = zone_cg_range[nzone+zone+1];
8232 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8235 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8236 for (th = 0; th < comm->nth; th++)
8238 gmx_domdec_ind_t *ind_p;
8239 int **ibuf_p, *ibuf_nalloc_p;
8241 int *nsend_p, *nat_p;
8247 /* Thread 0 writes in the comm buffers */
8249 ibuf_p = &comm->buf_int;
8250 ibuf_nalloc_p = &comm->nalloc_int;
8251 vbuf_p = &comm->vbuf;
8254 nsend_zone_p = &ind->nsend[zone];
8258 /* Other threads write into temp buffers */
8259 ind_p = &comm->dth[th].ind;
8260 ibuf_p = &comm->dth[th].ibuf;
8261 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8262 vbuf_p = &comm->dth[th].vbuf;
8263 nsend_p = &comm->dth[th].nsend;
8264 nat_p = &comm->dth[th].nat;
8265 nsend_zone_p = &comm->dth[th].nsend_zone;
8267 comm->dth[th].nsend = 0;
8268 comm->dth[th].nat = 0;
8269 comm->dth[th].nsend_zone = 0;
8279 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8280 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8283 /* Get the cg's for this pulse in this zone */
8284 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8286 dim, dim_ind, dim0, dim1, dim2,
8289 normal, skew_fac2_d, skew_fac_01,
8290 v_d, v_0, v_1, &corners, sf2_round,
8291 bDistBonded, bBondComm,
8295 ibuf_p, ibuf_nalloc_p,
8301 /* Append data of threads>=1 to the communication buffers */
8302 for (th = 1; th < comm->nth; th++)
8304 dd_comm_setup_work_t *dth;
8307 dth = &comm->dth[th];
8309 ns1 = nsend + dth->nsend_zone;
8310 if (ns1 > ind->nalloc)
8312 ind->nalloc = over_alloc_dd(ns1);
8313 srenew(ind->index, ind->nalloc);
8315 if (ns1 > comm->nalloc_int)
8317 comm->nalloc_int = over_alloc_dd(ns1);
8318 srenew(comm->buf_int, comm->nalloc_int);
8320 if (ns1 > comm->vbuf.nalloc)
8322 comm->vbuf.nalloc = over_alloc_dd(ns1);
8323 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8326 for (i = 0; i < dth->nsend_zone; i++)
8328 ind->index[nsend] = dth->ind.index[i];
8329 comm->buf_int[nsend] = dth->ibuf[i];
8330 copy_rvec(dth->vbuf.v[i],
8331 comm->vbuf.v[nsend]);
8335 ind->nsend[zone] += dth->nsend_zone;
8338 /* Clear the counts in case we do not have pbc */
8339 for (zone = nzone_send; zone < nzone; zone++)
8341 ind->nsend[zone] = 0;
8343 ind->nsend[nzone] = nsend;
8344 ind->nsend[nzone+1] = nat;
8345 /* Communicate the number of cg's and atoms to receive */
8346 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8347 ind->nsend, nzone+2,
8348 ind->nrecv, nzone+2);
8350 /* The rvec buffer is also required for atom buffers of size nsend
8351 * in dd_move_x and dd_move_f.
8353 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8357 /* We can receive in place if only the last zone is not empty */
8358 for (zone = 0; zone < nzone-1; zone++)
8360 if (ind->nrecv[zone] > 0)
8362 cd->bInPlace = FALSE;
8367 /* The int buffer is only required here for the cg indices */
8368 if (ind->nrecv[nzone] > comm->nalloc_int2)
8370 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8371 srenew(comm->buf_int2, comm->nalloc_int2);
8373 /* The rvec buffer is also required for atom buffers
8374 * of size nrecv in dd_move_x and dd_move_f.
8376 i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8377 vec_rvec_check_alloc(&comm->vbuf2, i);
8381 /* Make space for the global cg indices */
8382 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8383 || dd->cg_nalloc == 0)
8385 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8386 srenew(index_gl, dd->cg_nalloc);
8387 srenew(cgindex, dd->cg_nalloc+1);
8389 /* Communicate the global cg indices */
8392 recv_i = index_gl + pos_cg;
8396 recv_i = comm->buf_int2;
8398 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8399 comm->buf_int, nsend,
8400 recv_i, ind->nrecv[nzone]);
8402 /* Make space for cg_cm */
8403 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8404 if (fr->cutoff_scheme == ecutsGROUP)
8412 /* Communicate cg_cm */
8415 recv_vr = cg_cm + pos_cg;
8419 recv_vr = comm->vbuf2.v;
8421 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8422 comm->vbuf.v, nsend,
8423 recv_vr, ind->nrecv[nzone]);
8425 /* Make the charge group index */
8428 zone = (p == 0 ? 0 : nzone - 1);
8429 while (zone < nzone)
8431 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8433 cg_gl = index_gl[pos_cg];
8434 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8435 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8436 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8439 /* Update the charge group presence,
8440 * so we can use it in the next pass of the loop.
8442 comm->bLocalCG[cg_gl] = TRUE;
8448 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8451 zone_cg_range[nzone+zone] = pos_cg;
8456 /* This part of the code is never executed with bBondComm. */
8457 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8458 index_gl, recv_i, cg_cm, recv_vr,
8459 cgindex, fr->cginfo_mb, fr->cginfo);
8460 pos_cg += ind->nrecv[nzone];
8462 nat_tot += ind->nrecv[nzone+1];
8466 /* Store the atom block for easy copying of communication buffers */
8467 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8471 dd->index_gl = index_gl;
8472 dd->cgindex = cgindex;
8474 dd->ncg_tot = zone_cg_range[zones->n];
8475 dd->nat_tot = nat_tot;
8476 comm->nat[ddnatHOME] = dd->nat_home;
8477 for (i = ddnatZONE; i < ddnatNR; i++)
8479 comm->nat[i] = dd->nat_tot;
8484 /* We don't need to update cginfo, since that was alrady done above.
8485 * So we pass NULL for the forcerec.
8487 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8488 NULL, comm->bLocalCG);
8493 fprintf(debug, "Finished setting up DD communication, zones:");
8494 for (c = 0; c < zones->n; c++)
8496 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8498 fprintf(debug, "\n");
8502 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8506 for (c = 0; c < zones->nizone; c++)
8508 zones->izone[c].cg1 = zones->cg_range[c+1];
8509 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8510 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8514 static void set_zones_size(gmx_domdec_t *dd,
8515 matrix box, const gmx_ddbox_t *ddbox,
8516 int zone_start, int zone_end)
8518 gmx_domdec_comm_t *comm;
8519 gmx_domdec_zones_t *zones;
8521 int z, zi, zj0, zj1, d, dim;
8524 real size_j, add_tric;
8529 zones = &comm->zones;
8531 /* Do we need to determine extra distances for multi-body bondeds? */
8532 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8534 for (z = zone_start; z < zone_end; z++)
8536 /* Copy cell limits to zone limits.
8537 * Valid for non-DD dims and non-shifted dims.
8539 copy_rvec(comm->cell_x0, zones->size[z].x0);
8540 copy_rvec(comm->cell_x1, zones->size[z].x1);
8543 for (d = 0; d < dd->ndim; d++)
8547 for (z = 0; z < zones->n; z++)
8549 /* With a staggered grid we have different sizes
8550 * for non-shifted dimensions.
8552 if (dd->bGridJump && zones->shift[z][dim] == 0)
8556 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8557 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8561 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8562 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8568 rcmbs = comm->cutoff_mbody;
8569 if (ddbox->tric_dir[dim])
8571 rcs /= ddbox->skew_fac[dim];
8572 rcmbs /= ddbox->skew_fac[dim];
8575 /* Set the lower limit for the shifted zone dimensions */
8576 for (z = zone_start; z < zone_end; z++)
8578 if (zones->shift[z][dim] > 0)
8581 if (!dd->bGridJump || d == 0)
8583 zones->size[z].x0[dim] = comm->cell_x1[dim];
8584 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8588 /* Here we take the lower limit of the zone from
8589 * the lowest domain of the zone below.
8593 zones->size[z].x0[dim] =
8594 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8600 zones->size[z].x0[dim] =
8601 zones->size[zone_perm[2][z-4]].x0[dim];
8605 zones->size[z].x0[dim] =
8606 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8609 /* A temporary limit, is updated below */
8610 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8614 for (zi = 0; zi < zones->nizone; zi++)
8616 if (zones->shift[zi][dim] == 0)
8618 /* This takes the whole zone into account.
8619 * With multiple pulses this will lead
8620 * to a larger zone then strictly necessary.
8622 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8623 zones->size[zi].x1[dim]+rcmbs);
8631 /* Loop over the i-zones to set the upper limit of each
8634 for (zi = 0; zi < zones->nizone; zi++)
8636 if (zones->shift[zi][dim] == 0)
8638 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8640 if (zones->shift[z][dim] > 0)
8642 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8643 zones->size[zi].x1[dim]+rcs);
8650 for (z = zone_start; z < zone_end; z++)
8652 /* Initialization only required to keep the compiler happy */
8653 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8656 /* To determine the bounding box for a zone we need to find
8657 * the extreme corners of 4, 2 or 1 corners.
8659 nc = 1 << (ddbox->npbcdim - 1);
8661 for (c = 0; c < nc; c++)
8663 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8667 corner[YY] = zones->size[z].x0[YY];
8671 corner[YY] = zones->size[z].x1[YY];
8675 corner[ZZ] = zones->size[z].x0[ZZ];
8679 corner[ZZ] = zones->size[z].x1[ZZ];
8681 if (dd->ndim == 1 && box[ZZ][YY] != 0)
8683 /* With 1D domain decomposition the cg's are not in
8684 * the triclinic box, but triclinic x-y and rectangular y-z.
8685 * Shift y back, so it will later end up at 0.
8687 corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
8689 /* Apply the triclinic couplings */
8690 assert(ddbox->npbcdim <= DIM);
8691 for (i = YY; i < ddbox->npbcdim; i++)
8693 for (j = XX; j < i; j++)
8695 corner[j] += corner[i]*box[i][j]/box[i][i];
8700 copy_rvec(corner, corner_min);
8701 copy_rvec(corner, corner_max);
8705 for (i = 0; i < DIM; i++)
8707 corner_min[i] = min(corner_min[i], corner[i]);
8708 corner_max[i] = max(corner_max[i], corner[i]);
8712 /* Copy the extreme cornes without offset along x */
8713 for (i = 0; i < DIM; i++)
8715 zones->size[z].bb_x0[i] = corner_min[i];
8716 zones->size[z].bb_x1[i] = corner_max[i];
8718 /* Add the offset along x */
8719 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8720 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8723 if (zone_start == 0)
8726 for (dim = 0; dim < DIM; dim++)
8728 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8730 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
8735 for (z = zone_start; z < zone_end; z++)
8737 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8739 zones->size[z].x0[XX], zones->size[z].x1[XX],
8740 zones->size[z].x0[YY], zones->size[z].x1[YY],
8741 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8742 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8744 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8745 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8746 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8751 static int comp_cgsort(const void *a, const void *b)
8755 gmx_cgsort_t *cga, *cgb;
8756 cga = (gmx_cgsort_t *)a;
8757 cgb = (gmx_cgsort_t *)b;
8759 comp = cga->nsc - cgb->nsc;
8762 comp = cga->ind_gl - cgb->ind_gl;
8768 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8773 /* Order the data */
8774 for (i = 0; i < n; i++)
8776 buf[i] = a[sort[i].ind];
8779 /* Copy back to the original array */
8780 for (i = 0; i < n; i++)
8786 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8791 /* Order the data */
8792 for (i = 0; i < n; i++)
8794 copy_rvec(v[sort[i].ind], buf[i]);
8797 /* Copy back to the original array */
8798 for (i = 0; i < n; i++)
8800 copy_rvec(buf[i], v[i]);
8804 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8807 int a, atot, cg, cg0, cg1, i;
8809 if (cgindex == NULL)
8811 /* Avoid the useless loop of the atoms within a cg */
8812 order_vec_cg(ncg, sort, v, buf);
8817 /* Order the data */
8819 for (cg = 0; cg < ncg; cg++)
8821 cg0 = cgindex[sort[cg].ind];
8822 cg1 = cgindex[sort[cg].ind+1];
8823 for (i = cg0; i < cg1; i++)
8825 copy_rvec(v[i], buf[a]);
8831 /* Copy back to the original array */
8832 for (a = 0; a < atot; a++)
8834 copy_rvec(buf[a], v[a]);
8838 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8839 int nsort_new, gmx_cgsort_t *sort_new,
8840 gmx_cgsort_t *sort1)
8844 /* The new indices are not very ordered, so we qsort them */
8845 gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8847 /* sort2 is already ordered, so now we can merge the two arrays */
8851 while (i2 < nsort2 || i_new < nsort_new)
8855 sort1[i1++] = sort_new[i_new++];
8857 else if (i_new == nsort_new)
8859 sort1[i1++] = sort2[i2++];
8861 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8862 (sort2[i2].nsc == sort_new[i_new].nsc &&
8863 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8865 sort1[i1++] = sort2[i2++];
8869 sort1[i1++] = sort_new[i_new++];
8874 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8876 gmx_domdec_sort_t *sort;
8877 gmx_cgsort_t *cgsort, *sort_i;
8878 int ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
8879 int sort_last, sort_skip;
8881 sort = dd->comm->sort;
8883 a = fr->ns.grid->cell_index;
8885 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns.grid->ncells;
8887 if (ncg_home_old >= 0)
8889 /* The charge groups that remained in the same ns grid cell
8890 * are completely ordered. So we can sort efficiently by sorting
8891 * the charge groups that did move into the stationary list.
8896 for (i = 0; i < dd->ncg_home; i++)
8898 /* Check if this cg did not move to another node */
8901 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8903 /* This cg is new on this node or moved ns grid cell */
8904 if (nsort_new >= sort->sort_new_nalloc)
8906 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8907 srenew(sort->sort_new, sort->sort_new_nalloc);
8909 sort_i = &(sort->sort_new[nsort_new++]);
8913 /* This cg did not move */
8914 sort_i = &(sort->sort2[nsort2++]);
8916 /* Sort on the ns grid cell indices
8917 * and the global topology index.
8918 * index_gl is irrelevant with cell ns,
8919 * but we set it here anyhow to avoid a conditional.
8922 sort_i->ind_gl = dd->index_gl[i];
8929 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8932 /* Sort efficiently */
8933 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8938 cgsort = sort->sort;
8940 for (i = 0; i < dd->ncg_home; i++)
8942 /* Sort on the ns grid cell indices
8943 * and the global topology index
8945 cgsort[i].nsc = a[i];
8946 cgsort[i].ind_gl = dd->index_gl[i];
8948 if (cgsort[i].nsc < moved)
8955 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
8957 /* Determine the order of the charge groups using qsort */
8958 gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
8964 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
8967 int ncg_new, i, *a, na;
8969 sort = dd->comm->sort->sort;
8971 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
8974 for (i = 0; i < na; i++)
8978 sort[ncg_new].ind = a[i];
8986 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
8989 gmx_domdec_sort_t *sort;
8990 gmx_cgsort_t *cgsort, *sort_i;
8992 int ncg_new, i, *ibuf, cgsize;
8995 sort = dd->comm->sort;
8997 if (dd->ncg_home > sort->sort_nalloc)
8999 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
9000 srenew(sort->sort, sort->sort_nalloc);
9001 srenew(sort->sort2, sort->sort_nalloc);
9003 cgsort = sort->sort;
9005 switch (fr->cutoff_scheme)
9008 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
9011 ncg_new = dd_sort_order_nbnxn(dd, fr);
9014 gmx_incons("unimplemented");
9018 /* We alloc with the old size, since cgindex is still old */
9019 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9020 vbuf = dd->comm->vbuf.v;
9024 cgindex = dd->cgindex;
9031 /* Remove the charge groups which are no longer at home here */
9032 dd->ncg_home = ncg_new;
9035 fprintf(debug, "Set the new home charge group count to %d\n",
9039 /* Reorder the state */
9040 for (i = 0; i < estNR; i++)
9042 if (EST_DISTR(i) && (state->flags & (1<<i)))
9047 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
9050 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
9053 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->sd_X, vbuf);
9056 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
9060 case estDISRE_INITF:
9061 case estDISRE_RM3TAV:
9062 case estORIRE_INITF:
9064 /* No ordering required */
9067 gmx_incons("Unknown state entry encountered in dd_sort_state");
9072 if (fr->cutoff_scheme == ecutsGROUP)
9075 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9078 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9080 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9081 srenew(sort->ibuf, sort->ibuf_nalloc);
9084 /* Reorder the global cg index */
9085 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9086 /* Reorder the cginfo */
9087 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9088 /* Rebuild the local cg index */
9092 for (i = 0; i < dd->ncg_home; i++)
9094 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9095 ibuf[i+1] = ibuf[i] + cgsize;
9097 for (i = 0; i < dd->ncg_home+1; i++)
9099 dd->cgindex[i] = ibuf[i];
9104 for (i = 0; i < dd->ncg_home+1; i++)
9109 /* Set the home atom number */
9110 dd->nat_home = dd->cgindex[dd->ncg_home];
9112 if (fr->cutoff_scheme == ecutsVERLET)
9114 /* The atoms are now exactly in grid order, update the grid order */
9115 nbnxn_set_atomorder(fr->nbv->nbs);
9119 /* Copy the sorted ns cell indices back to the ns grid struct */
9120 for (i = 0; i < dd->ncg_home; i++)
9122 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
9124 fr->ns.grid->nr = dd->ncg_home;
9128 static void add_dd_statistics(gmx_domdec_t *dd)
9130 gmx_domdec_comm_t *comm;
9135 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9137 comm->sum_nat[ddnat-ddnatZONE] +=
9138 comm->nat[ddnat] - comm->nat[ddnat-1];
9143 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9145 gmx_domdec_comm_t *comm;
9150 /* Reset all the statistics and counters for total run counting */
9151 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9153 comm->sum_nat[ddnat-ddnatZONE] = 0;
9157 comm->load_step = 0;
9160 clear_ivec(comm->load_lim);
9165 void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
9167 gmx_domdec_comm_t *comm;
9171 comm = cr->dd->comm;
9173 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9180 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9182 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9184 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9189 " av. #atoms communicated per step for force: %d x %.1f\n",
9193 if (cr->dd->vsite_comm)
9196 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9197 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9202 if (cr->dd->constraint_comm)
9205 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9206 1 + ir->nLincsIter, av);
9210 gmx_incons(" Unknown type for DD statistics");
9213 fprintf(fplog, "\n");
9215 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9217 print_dd_load_av(fplog, cr->dd);
9221 void dd_partition_system(FILE *fplog,
9224 gmx_bool bMasterState,
9226 t_state *state_global,
9227 gmx_mtop_t *top_global,
9229 t_state *state_local,
9232 gmx_localtop_t *top_local,
9235 gmx_shellfc_t shellfc,
9236 gmx_constr_t constr,
9238 gmx_wallcycle_t wcycle,
9242 gmx_domdec_comm_t *comm;
9243 gmx_ddbox_t ddbox = {0};
9245 gmx_int64_t step_pcoupl;
9246 rvec cell_ns_x0, cell_ns_x1;
9247 int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9248 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
9249 gmx_bool bRedist, bSortCG, bResortAll;
9250 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9257 bBoxChanged = (bMasterState || DEFORM(*ir));
9258 if (ir->epc != epcNO)
9260 /* With nstpcouple > 1 pressure coupling happens.
9261 * one step after calculating the pressure.
9262 * Box scaling happens at the end of the MD step,
9263 * after the DD partitioning.
9264 * We therefore have to do DLB in the first partitioning
9265 * after an MD step where P-coupling occured.
9266 * We need to determine the last step in which p-coupling occurred.
9267 * MRS -- need to validate this for vv?
9272 step_pcoupl = step - 1;
9276 step_pcoupl = ((step - 1)/n)*n + 1;
9278 if (step_pcoupl >= comm->partition_step)
9284 bNStGlobalComm = (step % nstglobalcomm == 0);
9286 if (!comm->bDynLoadBal)
9292 /* Should we do dynamic load balacing this step?
9293 * Since it requires (possibly expensive) global communication,
9294 * we might want to do DLB less frequently.
9296 if (bBoxChanged || ir->epc != epcNO)
9298 bDoDLB = bBoxChanged;
9302 bDoDLB = bNStGlobalComm;
9306 /* Check if we have recorded loads on the nodes */
9307 if (comm->bRecordLoad && dd_load_count(comm))
9309 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
9311 /* Check if we should use DLB at the second partitioning
9312 * and every 100 partitionings,
9313 * so the extra communication cost is negligible.
9315 n = max(100, nstglobalcomm);
9316 bCheckDLB = (comm->n_load_collect == 0 ||
9317 comm->n_load_have % n == n-1);
9324 /* Print load every nstlog, first and last step to the log file */
9325 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9326 comm->n_load_collect == 0 ||
9328 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9330 /* Avoid extra communication due to verbose screen output
9331 * when nstglobalcomm is set.
9333 if (bDoDLB || bLogLoad || bCheckDLB ||
9334 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9336 get_load_distribution(dd, wcycle);
9341 dd_print_load(fplog, dd, step-1);
9345 dd_print_load_verbose(dd);
9348 comm->n_load_collect++;
9352 /* Since the timings are node dependent, the master decides */
9356 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS);
9359 fprintf(debug, "step %s, imb loss %f\n",
9360 gmx_step_str(step, sbuf),
9361 dd_force_imb_perf_loss(dd));
9364 dd_bcast(dd, sizeof(bTurnOnDLB), &bTurnOnDLB);
9367 turn_on_dlb(fplog, cr, step);
9372 comm->n_load_have++;
9375 cgs_gl = &comm->cgs_gl;
9380 /* Clear the old state */
9381 clear_dd_indices(dd, 0, 0);
9384 set_ddbox(dd, bMasterState, cr, ir, state_global->box,
9385 TRUE, cgs_gl, state_global->x, &ddbox);
9387 get_cg_distribution(fplog, step, dd, cgs_gl,
9388 state_global->box, &ddbox, state_global->x);
9390 dd_distribute_state(dd, cgs_gl,
9391 state_global, state_local, f);
9393 dd_make_local_cgs(dd, &top_local->cgs);
9395 /* Ensure that we have space for the new distribution */
9396 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9398 if (fr->cutoff_scheme == ecutsGROUP)
9400 calc_cgcm(fplog, 0, dd->ncg_home,
9401 &top_local->cgs, state_local->x, fr->cg_cm);
9404 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9406 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9408 else if (state_local->ddp_count != dd->ddp_count)
9410 if (state_local->ddp_count > dd->ddp_count)
9412 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9415 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9417 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9420 /* Clear the old state */
9421 clear_dd_indices(dd, 0, 0);
9423 /* Build the new indices */
9424 rebuild_cgindex(dd, cgs_gl->index, state_local);
9425 make_dd_indices(dd, cgs_gl->index, 0);
9426 ncgindex_set = dd->ncg_home;
9428 if (fr->cutoff_scheme == ecutsGROUP)
9430 /* Redetermine the cg COMs */
9431 calc_cgcm(fplog, 0, dd->ncg_home,
9432 &top_local->cgs, state_local->x, fr->cg_cm);
9435 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9437 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9439 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9440 TRUE, &top_local->cgs, state_local->x, &ddbox);
9442 bRedist = comm->bDynLoadBal;
9446 /* We have the full state, only redistribute the cgs */
9448 /* Clear the non-home indices */
9449 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9452 /* Avoid global communication for dim's without pbc and -gcom */
9453 if (!bNStGlobalComm)
9455 copy_rvec(comm->box0, ddbox.box0 );
9456 copy_rvec(comm->box_size, ddbox.box_size);
9458 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9459 bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
9464 /* For dim's without pbc and -gcom */
9465 copy_rvec(ddbox.box0, comm->box0 );
9466 copy_rvec(ddbox.box_size, comm->box_size);
9468 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9471 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9473 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9476 /* Check if we should sort the charge groups */
9477 if (comm->nstSortCG > 0)
9479 bSortCG = (bMasterState ||
9480 (bRedist && (step % comm->nstSortCG == 0)));
9487 ncg_home_old = dd->ncg_home;
9492 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9494 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9496 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9498 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9501 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9503 &comm->cell_x0, &comm->cell_x1,
9504 dd->ncg_home, fr->cg_cm,
9505 cell_ns_x0, cell_ns_x1, &grid_density);
9509 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9512 switch (fr->cutoff_scheme)
9515 copy_ivec(fr->ns.grid->n, ncells_old);
9516 grid_first(fplog, fr->ns.grid, dd, &ddbox,
9517 state_local->box, cell_ns_x0, cell_ns_x1,
9518 fr->rlistlong, grid_density);
9521 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9524 gmx_incons("unimplemented");
9526 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9527 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9531 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9533 /* Sort the state on charge group position.
9534 * This enables exact restarts from this step.
9535 * It also improves performance by about 15% with larger numbers
9536 * of atoms per node.
9539 /* Fill the ns grid with the home cell,
9540 * so we can sort with the indices.
9542 set_zones_ncg_home(dd);
9544 switch (fr->cutoff_scheme)
9547 set_zones_size(dd, state_local->box, &ddbox, 0, 1);
9549 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9551 comm->zones.size[0].bb_x0,
9552 comm->zones.size[0].bb_x1,
9554 comm->zones.dens_zone0,
9557 ncg_moved, bRedist ? comm->moved : NULL,
9558 fr->nbv->grp[eintLocal].kernel_type,
9559 fr->nbv->grp[eintLocal].nbat);
9561 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9564 fill_grid(&comm->zones, fr->ns.grid, dd->ncg_home,
9565 0, dd->ncg_home, fr->cg_cm);
9567 copy_ivec(fr->ns.grid->n, ncells_new);
9570 gmx_incons("unimplemented");
9573 bResortAll = bMasterState;
9575 /* Check if we can user the old order and ns grid cell indices
9576 * of the charge groups to sort the charge groups efficiently.
9578 if (ncells_new[XX] != ncells_old[XX] ||
9579 ncells_new[YY] != ncells_old[YY] ||
9580 ncells_new[ZZ] != ncells_old[ZZ])
9587 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9588 gmx_step_str(step, sbuf), dd->ncg_home);
9590 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9591 bResortAll ? -1 : ncg_home_old);
9592 /* Rebuild all the indices */
9593 ga2la_clear(dd->ga2la);
9596 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9599 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9601 /* Setup up the communication and communicate the coordinates */
9602 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9604 /* Set the indices */
9605 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9607 /* Set the charge group boundaries for neighbor searching */
9608 set_cg_boundaries(&comm->zones);
9610 if (fr->cutoff_scheme == ecutsVERLET)
9612 set_zones_size(dd, state_local->box, &ddbox,
9613 bSortCG ? 1 : 0, comm->zones.n);
9616 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9619 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9620 -1,state_local->x,state_local->box);
9623 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9625 /* Extract a local topology from the global topology */
9626 for (i = 0; i < dd->ndim; i++)
9628 np[dd->dim[i]] = comm->cd[i].np;
9630 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9631 comm->cellsize_min, np,
9633 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
9634 vsite, top_global, top_local);
9636 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9638 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9640 /* Set up the special atom communication */
9641 n = comm->nat[ddnatZONE];
9642 for (i = ddnatZONE+1; i < ddnatNR; i++)
9647 if (vsite && vsite->n_intercg_vsite)
9649 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9653 if (dd->bInterCGcons || dd->bInterCGsettles)
9655 /* Only for inter-cg constraints we need special code */
9656 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9657 constr, ir->nProjOrder,
9658 top_local->idef.il);
9662 gmx_incons("Unknown special atom type setup");
9667 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9669 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9671 /* Make space for the extra coordinates for virtual site
9672 * or constraint communication.
9674 state_local->natoms = comm->nat[ddnatNR-1];
9675 if (state_local->natoms > state_local->nalloc)
9677 dd_realloc_state(state_local, f, state_local->natoms);
9680 if (fr->bF_NoVirSum)
9682 if (vsite && vsite->n_intercg_vsite)
9684 nat_f_novirsum = comm->nat[ddnatVSITE];
9688 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9690 nat_f_novirsum = dd->nat_tot;
9694 nat_f_novirsum = dd->nat_home;
9703 /* Set the number of atoms required for the force calculation.
9704 * Forces need to be constrained when using a twin-range setup
9705 * or with energy minimization. For simple simulations we could
9706 * avoid some allocation, zeroing and copying, but this is
9707 * probably not worth the complications ande checking.
9709 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9710 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9712 /* We make the all mdatoms up to nat_tot_con.
9713 * We could save some work by only setting invmass
9714 * between nat_tot and nat_tot_con.
9716 /* This call also sets the new number of home particles to dd->nat_home */
9717 atoms2md(top_global, ir,
9718 comm->nat[ddnatCON], dd->gatindex, dd->nat_home, mdatoms);
9720 /* Now we have the charges we can sort the FE interactions */
9721 dd_sort_local_top(dd, mdatoms, top_local);
9725 /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
9726 split_vsites_over_threads(top_local->idef.il, mdatoms, FALSE, vsite);
9731 /* Make the local shell stuff, currently no communication is done */
9732 make_local_shells(cr, mdatoms, shellfc);
9735 if (ir->implicit_solvent)
9737 make_local_gb(cr, fr->born, ir->gb_algorithm);
9740 setup_bonded_threading(fr, &top_local->idef);
9742 if (!(cr->duty & DUTY_PME))
9744 /* Send the charges and/or c6/sigmas to our PME only node */
9745 gmx_pme_send_parameters(cr, mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
9746 mdatoms->chargeA, mdatoms->chargeB,
9747 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
9748 mdatoms->sigmaA, mdatoms->sigmaB,
9749 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9754 set_constraints(constr, top_local, ir, mdatoms, cr);
9757 if (ir->ePull != epullNO)
9759 /* Update the local pull groups */
9760 dd_make_local_pull_groups(dd, ir->pull, mdatoms);
9765 /* Update the local rotation groups */
9766 dd_make_local_rotation_groups(dd, ir->rot);
9769 if (ir->eSwapCoords != eswapNO)
9771 /* Update the local groups needed for ion swapping */
9772 dd_make_local_swap_groups(dd, ir->swap);
9775 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
9776 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
9778 add_dd_statistics(dd);
9780 /* Make sure we only count the cycles for this DD partitioning */
9781 clear_dd_cycle_counts(dd);
9783 /* Because the order of the atoms might have changed since
9784 * the last vsite construction, we need to communicate the constructing
9785 * atom coordinates again (for spreading the forces this MD step).
9787 dd_move_x_vsites(dd, state_local->box, state_local->x);
9789 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9791 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9793 dd_move_x(dd, state_local->box, state_local->x);
9794 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9795 -1, state_local->x, state_local->box);
9798 /* Store the partitioning step */
9799 comm->partition_step = step;
9801 /* Increase the DD partitioning counter */
9803 /* The state currently matches this DD partitioning count, store it */
9804 state_local->ddp_count = dd->ddp_count;
9807 /* The DD master node knows the complete cg distribution,
9808 * store the count so we can possibly skip the cg info communication.
9810 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9813 if (comm->DD_debug > 0)
9815 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9816 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9817 "after partitioning");