2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
47 #include "gmx_fatal.h"
48 #include "gmx_fatal_collective.h"
51 #include "domdec_network.h"
54 #include "chargegroup.h"
63 #include "mtop_util.h"
64 #include "gmx_ga2la.h"
66 #include "nbnxn_search.h"
68 #include "gmx_omp_nthreads.h"
69 #include "gpu_utils.h"
71 #include "gromacs/fileio/futil.h"
72 #include "gromacs/fileio/gmxfio.h"
73 #include "gromacs/fileio/pdbio.h"
74 #include "gromacs/timing/wallcycle.h"
75 #include "gromacs/utility/gmxmpi.h"
76 #include "gromacs/swap/swapcoords.h"
77 #include "gromacs/utility/qsort_threadsafe.h"
78 #include "gromacs/pulling/pull.h"
79 #include "gromacs/pulling/pull_rotation.h"
81 #define DDRANK(dd, rank) (rank)
82 #define DDMASTERRANK(dd) (dd->masterrank)
84 typedef struct gmx_domdec_master
86 /* The cell boundaries */
88 /* The global charge group division */
89 int *ncg; /* Number of home charge groups for each node */
90 int *index; /* Index of nnodes+1 into cg */
91 int *cg; /* Global charge group index */
92 int *nat; /* Number of home atoms for each node. */
93 int *ibuf; /* Buffer for communication */
94 rvec *vbuf; /* Buffer for state scattering and gathering */
95 } gmx_domdec_master_t;
99 /* The numbers of charge groups to send and receive for each cell
100 * that requires communication, the last entry contains the total
101 * number of atoms that needs to be communicated.
103 int nsend[DD_MAXIZONE+2];
104 int nrecv[DD_MAXIZONE+2];
105 /* The charge groups to send */
108 /* The atom range for non-in-place communication */
109 int cell2at0[DD_MAXIZONE];
110 int cell2at1[DD_MAXIZONE];
115 int np; /* Number of grid pulses in this dimension */
116 int np_dlb; /* For dlb, for use with edlbAUTO */
117 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
119 gmx_bool bInPlace; /* Can we communicate in place? */
120 } gmx_domdec_comm_dim_t;
124 gmx_bool *bCellMin; /* Temp. var.: is this cell size at the limit */
125 real *cell_f; /* State var.: cell boundaries, box relative */
126 real *old_cell_f; /* Temp. var.: old cell size */
127 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
128 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
129 real *bound_min; /* Temp. var.: lower limit for cell boundary */
130 real *bound_max; /* Temp. var.: upper limit for cell boundary */
131 gmx_bool bLimited; /* State var.: is DLB limited in this dim and row */
132 real *buf_ncd; /* Temp. var. */
135 #define DD_NLOAD_MAX 9
137 /* Here floats are accurate enough, since these variables
138 * only influence the load balancing, not the actual MD results.
165 gmx_cgsort_t *sort_new;
177 /* This enum determines the order of the coordinates.
178 * ddnatHOME and ddnatZONE should be first and second,
179 * the others can be ordered as wanted.
182 ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR
186 edlbAUTO, edlbNO, edlbYES, edlbNR
188 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
192 int dim; /* The dimension */
193 gmx_bool dim_match; /* Tells if DD and PME dims match */
194 int nslab; /* The number of PME slabs in this dimension */
195 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
196 int *pp_min; /* The minimum pp node location, size nslab */
197 int *pp_max; /* The maximum pp node location,size nslab */
198 int maxshift; /* The maximum shift for coordinate redistribution in PME */
203 real min0; /* The minimum bottom of this zone */
204 real max1; /* The maximum top of this zone */
205 real min1; /* The minimum top of this zone */
206 real mch0; /* The maximum bottom communicaton height for this zone */
207 real mch1; /* The maximum top communicaton height for this zone */
208 real p1_0; /* The bottom value of the first cell in this zone */
209 real p1_1; /* The top value of the first cell in this zone */
214 gmx_domdec_ind_t ind;
221 } dd_comm_setup_work_t;
223 typedef struct gmx_domdec_comm
225 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
226 * unless stated otherwise.
229 /* The number of decomposition dimensions for PME, 0: no PME */
231 /* The number of nodes doing PME (PP/PME or only PME) */
235 /* The communication setup including the PME only nodes */
236 gmx_bool bCartesianPP_PME;
239 int *pmenodes; /* size npmenodes */
240 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
241 * but with bCartesianPP_PME */
242 gmx_ddpme_t ddpme[2];
244 /* The DD particle-particle nodes only */
245 gmx_bool bCartesianPP;
246 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
248 /* The global charge groups */
251 /* Should we sort the cgs */
253 gmx_domdec_sort_t *sort;
255 /* Are there charge groups? */
258 /* Are there bonded and multi-body interactions between charge groups? */
259 gmx_bool bInterCGBondeds;
260 gmx_bool bInterCGMultiBody;
262 /* Data for the optional bonded interaction atom communication range */
269 /* Are we actually using DLB? */
270 gmx_bool bDynLoadBal;
272 /* Cell sizes for static load balancing, first index cartesian */
275 /* The width of the communicated boundaries */
278 /* The minimum cell size (including triclinic correction) */
280 /* For dlb, for use with edlbAUTO */
281 rvec cellsize_min_dlb;
282 /* The lower limit for the DD cell size with DLB */
284 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
285 gmx_bool bVacDLBNoLimit;
287 /* With PME load balancing we set limits on DLB */
288 gmx_bool bPMELoadBalDLBLimits;
289 /* DLB needs to take into account that we want to allow this maximum
290 * cut-off (for PME load balancing), this could limit cell boundaries.
292 real PMELoadBal_max_cutoff;
294 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
296 /* box0 and box_size are required with dim's without pbc and -gcom */
300 /* The cell boundaries */
304 /* The old location of the cell boundaries, to check cg displacements */
308 /* The communication setup and charge group boundaries for the zones */
309 gmx_domdec_zones_t zones;
311 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
312 * cell boundaries of neighboring cells for dynamic load balancing.
314 gmx_ddzone_t zone_d1[2];
315 gmx_ddzone_t zone_d2[2][2];
317 /* The coordinate/force communication setup and indices */
318 gmx_domdec_comm_dim_t cd[DIM];
319 /* The maximum number of cells to communicate with in one dimension */
322 /* Which cg distribution is stored on the master node */
323 int master_cg_ddp_count;
325 /* The number of cg's received from the direct neighbors */
326 int zone_ncg1[DD_MAXZONE];
328 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
331 /* Array for signalling if atoms have moved to another domain */
335 /* Communication buffer for general use */
339 /* Communication buffer for general use */
342 /* Temporary storage for thread parallel communication setup */
344 dd_comm_setup_work_t *dth;
346 /* Communication buffers only used with multiple grid pulses */
351 /* Communication buffers for local redistribution */
353 int cggl_flag_nalloc[DIM*2];
355 int cgcm_state_nalloc[DIM*2];
357 /* Cell sizes for dynamic load balancing */
358 gmx_domdec_root_t **root;
362 real cell_f_max0[DIM];
363 real cell_f_min1[DIM];
365 /* Stuff for load communication */
366 gmx_bool bRecordLoad;
367 gmx_domdec_load_t *load;
368 int nrank_gpu_shared;
370 MPI_Comm *mpi_comm_load;
371 MPI_Comm mpi_comm_gpu_shared;
374 /* Maximum DLB scaling per load balancing step in percent */
378 float cycl[ddCyclNr];
379 int cycl_n[ddCyclNr];
380 float cycl_max[ddCyclNr];
381 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
385 /* Have often have did we have load measurements */
387 /* Have often have we collected the load measurements */
391 double sum_nat[ddnatNR-ddnatZONE];
401 /* The last partition step */
402 gmx_int64_t partition_step;
410 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
413 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
414 #define DD_FLAG_NRCG 65535
415 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
416 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
418 /* Zone permutation required to obtain consecutive charge groups
419 * for neighbor searching.
421 static const int zone_perm[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
423 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
424 * components see only j zones with that component 0.
427 /* The DD zone order */
428 static const ivec dd_zo[DD_MAXZONE] =
429 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
434 static const ivec dd_zp3[dd_zp3n] = {{0, 0, 8}, {1, 3, 6}, {2, 5, 6}, {3, 5, 7}};
439 static const ivec dd_zp2[dd_zp2n] = {{0, 0, 4}, {1, 3, 4}};
444 static const ivec dd_zp1[dd_zp1n] = {{0, 0, 2}};
446 /* Factors used to avoid problems due to rounding issues */
447 #define DD_CELL_MARGIN 1.0001
448 #define DD_CELL_MARGIN2 1.00005
449 /* Factor to account for pressure scaling during nstlist steps */
450 #define DD_PRES_SCALE_MARGIN 1.02
452 /* Allowed performance loss before we DLB or warn */
453 #define DD_PERF_LOSS 0.05
455 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
457 /* Use separate MPI send and receive commands
458 * when nnodes <= GMX_DD_NNODES_SENDRECV.
459 * This saves memory (and some copying for small nnodes).
460 * For high parallelization scatter and gather calls are used.
462 #define GMX_DD_NNODES_SENDRECV 4
466 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
468 static void index2xyz(ivec nc,int ind,ivec xyz)
470 xyz[XX] = ind % nc[XX];
471 xyz[YY] = (ind / nc[XX]) % nc[YY];
472 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
476 /* This order is required to minimize the coordinate communication in PME
477 * which uses decomposition in the x direction.
479 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
481 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
483 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
484 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
485 xyz[ZZ] = ind % nc[ZZ];
488 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
493 ddindex = dd_index(dd->nc, c);
494 if (dd->comm->bCartesianPP_PME)
496 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
498 else if (dd->comm->bCartesianPP)
501 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
512 static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox, t_inputrec *ir)
514 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
517 int ddglatnr(gmx_domdec_t *dd, int i)
527 if (i >= dd->comm->nat[ddnatNR-1])
529 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
531 atnr = dd->gatindex[i] + 1;
537 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
539 return &dd->comm->cgs_gl;
542 static void vec_rvec_init(vec_rvec_t *v)
548 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
552 v->nalloc = over_alloc_dd(n);
553 srenew(v->v, v->nalloc);
557 void dd_store_state(gmx_domdec_t *dd, t_state *state)
561 if (state->ddp_count != dd->ddp_count)
563 gmx_incons("The state does not the domain decomposition state");
566 state->ncg_gl = dd->ncg_home;
567 if (state->ncg_gl > state->cg_gl_nalloc)
569 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
570 srenew(state->cg_gl, state->cg_gl_nalloc);
572 for (i = 0; i < state->ncg_gl; i++)
574 state->cg_gl[i] = dd->index_gl[i];
577 state->ddp_count_cg_gl = dd->ddp_count;
580 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
582 return &dd->comm->zones;
585 void dd_get_ns_ranges(gmx_domdec_t *dd, int icg,
586 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
588 gmx_domdec_zones_t *zones;
591 zones = &dd->comm->zones;
594 while (icg >= zones->izone[izone].cg1)
603 else if (izone < zones->nizone)
605 *jcg0 = zones->izone[izone].jcg0;
609 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
610 icg, izone, zones->nizone);
613 *jcg1 = zones->izone[izone].jcg1;
615 for (d = 0; d < dd->ndim; d++)
618 shift0[dim] = zones->izone[izone].shift0[dim];
619 shift1[dim] = zones->izone[izone].shift1[dim];
620 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
622 /* A conservative approach, this can be optimized */
629 int dd_natoms_vsite(gmx_domdec_t *dd)
631 return dd->comm->nat[ddnatVSITE];
634 void dd_get_constraint_range(gmx_domdec_t *dd, int *at_start, int *at_end)
636 *at_start = dd->comm->nat[ddnatCON-1];
637 *at_end = dd->comm->nat[ddnatCON];
640 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
642 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
643 int *index, *cgindex;
644 gmx_domdec_comm_t *comm;
645 gmx_domdec_comm_dim_t *cd;
646 gmx_domdec_ind_t *ind;
647 rvec shift = {0, 0, 0}, *buf, *rbuf;
648 gmx_bool bPBC, bScrew;
652 cgindex = dd->cgindex;
657 nat_tot = dd->nat_home;
658 for (d = 0; d < dd->ndim; d++)
660 bPBC = (dd->ci[dd->dim[d]] == 0);
661 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
664 copy_rvec(box[dd->dim[d]], shift);
667 for (p = 0; p < cd->np; p++)
674 for (i = 0; i < ind->nsend[nzone]; i++)
676 at0 = cgindex[index[i]];
677 at1 = cgindex[index[i]+1];
678 for (j = at0; j < at1; j++)
680 copy_rvec(x[j], buf[n]);
687 for (i = 0; i < ind->nsend[nzone]; i++)
689 at0 = cgindex[index[i]];
690 at1 = cgindex[index[i]+1];
691 for (j = at0; j < at1; j++)
693 /* We need to shift the coordinates */
694 rvec_add(x[j], shift, buf[n]);
701 for (i = 0; i < ind->nsend[nzone]; i++)
703 at0 = cgindex[index[i]];
704 at1 = cgindex[index[i]+1];
705 for (j = at0; j < at1; j++)
708 buf[n][XX] = x[j][XX] + shift[XX];
710 * This operation requires a special shift force
711 * treatment, which is performed in calc_vir.
713 buf[n][YY] = box[YY][YY] - x[j][YY];
714 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
726 rbuf = comm->vbuf2.v;
728 /* Send and receive the coordinates */
729 dd_sendrecv_rvec(dd, d, dddirBackward,
730 buf, ind->nsend[nzone+1],
731 rbuf, ind->nrecv[nzone+1]);
735 for (zone = 0; zone < nzone; zone++)
737 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
739 copy_rvec(rbuf[j], x[i]);
744 nat_tot += ind->nrecv[nzone+1];
750 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
752 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
753 int *index, *cgindex;
754 gmx_domdec_comm_t *comm;
755 gmx_domdec_comm_dim_t *cd;
756 gmx_domdec_ind_t *ind;
760 gmx_bool bPBC, bScrew;
764 cgindex = dd->cgindex;
769 nzone = comm->zones.n/2;
770 nat_tot = dd->nat_tot;
771 for (d = dd->ndim-1; d >= 0; d--)
773 bPBC = (dd->ci[dd->dim[d]] == 0);
774 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
775 if (fshift == NULL && !bScrew)
779 /* Determine which shift vector we need */
785 for (p = cd->np-1; p >= 0; p--)
788 nat_tot -= ind->nrecv[nzone+1];
795 sbuf = comm->vbuf2.v;
797 for (zone = 0; zone < nzone; zone++)
799 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
801 copy_rvec(f[i], sbuf[j]);
806 /* Communicate the forces */
807 dd_sendrecv_rvec(dd, d, dddirForward,
808 sbuf, ind->nrecv[nzone+1],
809 buf, ind->nsend[nzone+1]);
811 /* Add the received forces */
815 for (i = 0; i < ind->nsend[nzone]; i++)
817 at0 = cgindex[index[i]];
818 at1 = cgindex[index[i]+1];
819 for (j = at0; j < at1; j++)
821 rvec_inc(f[j], buf[n]);
828 for (i = 0; i < ind->nsend[nzone]; i++)
830 at0 = cgindex[index[i]];
831 at1 = cgindex[index[i]+1];
832 for (j = at0; j < at1; j++)
834 rvec_inc(f[j], buf[n]);
835 /* Add this force to the shift force */
836 rvec_inc(fshift[is], buf[n]);
843 for (i = 0; i < ind->nsend[nzone]; i++)
845 at0 = cgindex[index[i]];
846 at1 = cgindex[index[i]+1];
847 for (j = at0; j < at1; j++)
849 /* Rotate the force */
850 f[j][XX] += buf[n][XX];
851 f[j][YY] -= buf[n][YY];
852 f[j][ZZ] -= buf[n][ZZ];
855 /* Add this force to the shift force */
856 rvec_inc(fshift[is], buf[n]);
867 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
869 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
870 int *index, *cgindex;
871 gmx_domdec_comm_t *comm;
872 gmx_domdec_comm_dim_t *cd;
873 gmx_domdec_ind_t *ind;
878 cgindex = dd->cgindex;
880 buf = &comm->vbuf.v[0][0];
883 nat_tot = dd->nat_home;
884 for (d = 0; d < dd->ndim; d++)
887 for (p = 0; p < cd->np; p++)
892 for (i = 0; i < ind->nsend[nzone]; i++)
894 at0 = cgindex[index[i]];
895 at1 = cgindex[index[i]+1];
896 for (j = at0; j < at1; j++)
909 rbuf = &comm->vbuf2.v[0][0];
911 /* Send and receive the coordinates */
912 dd_sendrecv_real(dd, d, dddirBackward,
913 buf, ind->nsend[nzone+1],
914 rbuf, ind->nrecv[nzone+1]);
918 for (zone = 0; zone < nzone; zone++)
920 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
927 nat_tot += ind->nrecv[nzone+1];
933 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
935 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
936 int *index, *cgindex;
937 gmx_domdec_comm_t *comm;
938 gmx_domdec_comm_dim_t *cd;
939 gmx_domdec_ind_t *ind;
944 cgindex = dd->cgindex;
946 buf = &comm->vbuf.v[0][0];
949 nzone = comm->zones.n/2;
950 nat_tot = dd->nat_tot;
951 for (d = dd->ndim-1; d >= 0; d--)
954 for (p = cd->np-1; p >= 0; p--)
957 nat_tot -= ind->nrecv[nzone+1];
964 sbuf = &comm->vbuf2.v[0][0];
966 for (zone = 0; zone < nzone; zone++)
968 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
975 /* Communicate the forces */
976 dd_sendrecv_real(dd, d, dddirForward,
977 sbuf, ind->nrecv[nzone+1],
978 buf, ind->nsend[nzone+1]);
980 /* Add the received forces */
982 for (i = 0; i < ind->nsend[nzone]; i++)
984 at0 = cgindex[index[i]];
985 at1 = cgindex[index[i]+1];
986 for (j = at0; j < at1; j++)
997 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
999 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
1001 zone->min0, zone->max1,
1002 zone->mch0, zone->mch0,
1003 zone->p1_0, zone->p1_1);
1007 #define DDZONECOMM_MAXZONE 5
1008 #define DDZONECOMM_BUFSIZE 3
1010 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
1011 int ddimind, int direction,
1012 gmx_ddzone_t *buf_s, int n_s,
1013 gmx_ddzone_t *buf_r, int n_r)
1015 #define ZBS DDZONECOMM_BUFSIZE
1016 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
1017 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
1020 for (i = 0; i < n_s; i++)
1022 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
1023 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
1024 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
1025 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
1026 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
1027 vbuf_s[i*ZBS+1][2] = 0;
1028 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
1029 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
1030 vbuf_s[i*ZBS+2][2] = 0;
1033 dd_sendrecv_rvec(dd, ddimind, direction,
1037 for (i = 0; i < n_r; i++)
1039 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
1040 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
1041 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
1042 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
1043 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
1044 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
1045 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
1051 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
1052 rvec cell_ns_x0, rvec cell_ns_x1)
1054 int d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
1056 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
1057 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
1058 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
1059 rvec extr_s[2], extr_r[2];
1061 real dist_d, c = 0, det;
1062 gmx_domdec_comm_t *comm;
1063 gmx_bool bPBC, bUse;
1067 for (d = 1; d < dd->ndim; d++)
1070 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
1071 zp->min0 = cell_ns_x0[dim];
1072 zp->max1 = cell_ns_x1[dim];
1073 zp->min1 = cell_ns_x1[dim];
1074 zp->mch0 = cell_ns_x0[dim];
1075 zp->mch1 = cell_ns_x1[dim];
1076 zp->p1_0 = cell_ns_x0[dim];
1077 zp->p1_1 = cell_ns_x1[dim];
1080 for (d = dd->ndim-2; d >= 0; d--)
1083 bPBC = (dim < ddbox->npbcdim);
1085 /* Use an rvec to store two reals */
1086 extr_s[d][0] = comm->cell_f0[d+1];
1087 extr_s[d][1] = comm->cell_f1[d+1];
1088 extr_s[d][2] = comm->cell_f1[d+1];
1091 /* Store the extremes in the backward sending buffer,
1092 * so the get updated separately from the forward communication.
1094 for (d1 = d; d1 < dd->ndim-1; d1++)
1096 /* We invert the order to be able to use the same loop for buf_e */
1097 buf_s[pos].min0 = extr_s[d1][1];
1098 buf_s[pos].max1 = extr_s[d1][0];
1099 buf_s[pos].min1 = extr_s[d1][2];
1100 buf_s[pos].mch0 = 0;
1101 buf_s[pos].mch1 = 0;
1102 /* Store the cell corner of the dimension we communicate along */
1103 buf_s[pos].p1_0 = comm->cell_x0[dim];
1104 buf_s[pos].p1_1 = 0;
1108 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1111 if (dd->ndim == 3 && d == 0)
1113 buf_s[pos] = comm->zone_d2[0][1];
1115 buf_s[pos] = comm->zone_d1[0];
1119 /* We only need to communicate the extremes
1120 * in the forward direction
1122 npulse = comm->cd[d].np;
1125 /* Take the minimum to avoid double communication */
1126 npulse_min = min(npulse, dd->nc[dim]-1-npulse);
1130 /* Without PBC we should really not communicate over
1131 * the boundaries, but implementing that complicates
1132 * the communication setup and therefore we simply
1133 * do all communication, but ignore some data.
1135 npulse_min = npulse;
1137 for (p = 0; p < npulse_min; p++)
1139 /* Communicate the extremes forward */
1140 bUse = (bPBC || dd->ci[dim] > 0);
1142 dd_sendrecv_rvec(dd, d, dddirForward,
1143 extr_s+d, dd->ndim-d-1,
1144 extr_r+d, dd->ndim-d-1);
1148 for (d1 = d; d1 < dd->ndim-1; d1++)
1150 extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]);
1151 extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]);
1152 extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]);
1158 for (p = 0; p < npulse; p++)
1160 /* Communicate all the zone information backward */
1161 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1163 dd_sendrecv_ddzone(dd, d, dddirBackward,
1170 for (d1 = d+1; d1 < dd->ndim; d1++)
1172 /* Determine the decrease of maximum required
1173 * communication height along d1 due to the distance along d,
1174 * this avoids a lot of useless atom communication.
1176 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1178 if (ddbox->tric_dir[dim])
1180 /* c is the off-diagonal coupling between the cell planes
1181 * along directions d and d1.
1183 c = ddbox->v[dim][dd->dim[d1]][dim];
1189 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1192 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1196 /* A negative value signals out of range */
1202 /* Accumulate the extremes over all pulses */
1203 for (i = 0; i < buf_size; i++)
1207 buf_e[i] = buf_r[i];
1213 buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0);
1214 buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1);
1215 buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1);
1218 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1226 if (bUse && dh[d1] >= 0)
1228 buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
1229 buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
1232 /* Copy the received buffer to the send buffer,
1233 * to pass the data through with the next pulse.
1235 buf_s[i] = buf_r[i];
1237 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1238 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1240 /* Store the extremes */
1243 for (d1 = d; d1 < dd->ndim-1; d1++)
1245 extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0);
1246 extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1);
1247 extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1);
1251 if (d == 1 || (d == 0 && dd->ndim == 3))
1253 for (i = d; i < 2; i++)
1255 comm->zone_d2[1-d][i] = buf_e[pos];
1261 comm->zone_d1[1] = buf_e[pos];
1271 for (i = 0; i < 2; i++)
1275 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1277 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1278 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1284 for (i = 0; i < 2; i++)
1286 for (j = 0; j < 2; j++)
1290 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1292 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1293 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1297 for (d = 1; d < dd->ndim; d++)
1299 comm->cell_f_max0[d] = extr_s[d-1][0];
1300 comm->cell_f_min1[d] = extr_s[d-1][1];
1303 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1304 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1309 static void dd_collect_cg(gmx_domdec_t *dd,
1310 t_state *state_local)
1312 gmx_domdec_master_t *ma = NULL;
1313 int buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
1316 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1318 /* The master has the correct distribution */
1322 if (state_local->ddp_count == dd->ddp_count)
1324 ncg_home = dd->ncg_home;
1326 nat_home = dd->nat_home;
1328 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1330 cgs_gl = &dd->comm->cgs_gl;
1332 ncg_home = state_local->ncg_gl;
1333 cg = state_local->cg_gl;
1335 for (i = 0; i < ncg_home; i++)
1337 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1342 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1345 buf2[0] = dd->ncg_home;
1346 buf2[1] = dd->nat_home;
1356 /* Collect the charge group and atom counts on the master */
1357 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1362 for (i = 0; i < dd->nnodes; i++)
1364 ma->ncg[i] = ma->ibuf[2*i];
1365 ma->nat[i] = ma->ibuf[2*i+1];
1366 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1369 /* Make byte counts and indices */
1370 for (i = 0; i < dd->nnodes; i++)
1372 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1373 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1377 fprintf(debug, "Initial charge group distribution: ");
1378 for (i = 0; i < dd->nnodes; i++)
1380 fprintf(debug, " %d", ma->ncg[i]);
1382 fprintf(debug, "\n");
1386 /* Collect the charge group indices on the master */
1388 dd->ncg_home*sizeof(int), dd->index_gl,
1389 DDMASTER(dd) ? ma->ibuf : NULL,
1390 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1391 DDMASTER(dd) ? ma->cg : NULL);
1393 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1396 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1399 gmx_domdec_master_t *ma;
1400 int n, i, c, a, nalloc = 0;
1409 MPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1410 dd->rank, dd->mpi_comm_all);
1415 /* Copy the master coordinates to the global array */
1416 cgs_gl = &dd->comm->cgs_gl;
1418 n = DDMASTERRANK(dd);
1420 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1422 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1424 copy_rvec(lv[a++], v[c]);
1428 for (n = 0; n < dd->nnodes; n++)
1432 if (ma->nat[n] > nalloc)
1434 nalloc = over_alloc_dd(ma->nat[n]);
1435 srenew(buf, nalloc);
1438 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1439 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1442 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1444 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1446 copy_rvec(buf[a++], v[c]);
1455 static void get_commbuffer_counts(gmx_domdec_t *dd,
1456 int **counts, int **disps)
1458 gmx_domdec_master_t *ma;
1463 /* Make the rvec count and displacment arrays */
1465 *disps = ma->ibuf + dd->nnodes;
1466 for (n = 0; n < dd->nnodes; n++)
1468 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1469 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1473 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1476 gmx_domdec_master_t *ma;
1477 int *rcounts = NULL, *disps = NULL;
1486 get_commbuffer_counts(dd, &rcounts, &disps);
1491 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv, rcounts, disps, buf);
1495 cgs_gl = &dd->comm->cgs_gl;
1498 for (n = 0; n < dd->nnodes; n++)
1500 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1502 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1504 copy_rvec(buf[a++], v[c]);
1511 void dd_collect_vec(gmx_domdec_t *dd,
1512 t_state *state_local, rvec *lv, rvec *v)
1514 gmx_domdec_master_t *ma;
1515 int n, i, c, a, nalloc = 0;
1518 dd_collect_cg(dd, state_local);
1520 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1522 dd_collect_vec_sendrecv(dd, lv, v);
1526 dd_collect_vec_gatherv(dd, lv, v);
1531 void dd_collect_state(gmx_domdec_t *dd,
1532 t_state *state_local, t_state *state)
1536 nh = state->nhchainlength;
1540 for (i = 0; i < efptNR; i++)
1542 state->lambda[i] = state_local->lambda[i];
1544 state->fep_state = state_local->fep_state;
1545 state->veta = state_local->veta;
1546 state->vol0 = state_local->vol0;
1547 copy_mat(state_local->box, state->box);
1548 copy_mat(state_local->boxv, state->boxv);
1549 copy_mat(state_local->svir_prev, state->svir_prev);
1550 copy_mat(state_local->fvir_prev, state->fvir_prev);
1551 copy_mat(state_local->pres_prev, state->pres_prev);
1553 for (i = 0; i < state_local->ngtc; i++)
1555 for (j = 0; j < nh; j++)
1557 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1558 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1560 state->therm_integral[i] = state_local->therm_integral[i];
1562 for (i = 0; i < state_local->nnhpres; i++)
1564 for (j = 0; j < nh; j++)
1566 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1567 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1571 for (est = 0; est < estNR; est++)
1573 if (EST_DISTR(est) && (state_local->flags & (1<<est)))
1578 dd_collect_vec(dd, state_local, state_local->x, state->x);
1581 dd_collect_vec(dd, state_local, state_local->v, state->v);
1584 dd_collect_vec(dd, state_local, state_local->sd_X, state->sd_X);
1587 dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
1589 case estDISRE_INITF:
1590 case estDISRE_RM3TAV:
1591 case estORIRE_INITF:
1595 gmx_incons("Unknown state entry encountered in dd_collect_state");
1601 static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
1607 fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
1610 state->nalloc = over_alloc_dd(nalloc);
1612 for (est = 0; est < estNR; est++)
1614 if (EST_DISTR(est) && (state->flags & (1<<est)))
1619 srenew(state->x, state->nalloc);
1622 srenew(state->v, state->nalloc);
1625 srenew(state->sd_X, state->nalloc);
1628 srenew(state->cg_p, state->nalloc);
1630 case estDISRE_INITF:
1631 case estDISRE_RM3TAV:
1632 case estORIRE_INITF:
1634 /* No reallocation required */
1637 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1644 srenew(*f, state->nalloc);
1648 static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
1651 if (nalloc > fr->cg_nalloc)
1655 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
1657 fr->cg_nalloc = over_alloc_dd(nalloc);
1658 srenew(fr->cginfo, fr->cg_nalloc);
1659 if (fr->cutoff_scheme == ecutsGROUP)
1661 srenew(fr->cg_cm, fr->cg_nalloc);
1664 if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
1666 /* We don't use charge groups, we use x in state to set up
1667 * the atom communication.
1669 dd_realloc_state(state, f, nalloc);
1673 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1676 gmx_domdec_master_t *ma;
1677 int n, i, c, a, nalloc = 0;
1684 for (n = 0; n < dd->nnodes; n++)
1688 if (ma->nat[n] > nalloc)
1690 nalloc = over_alloc_dd(ma->nat[n]);
1691 srenew(buf, nalloc);
1693 /* Use lv as a temporary buffer */
1695 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1697 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1699 copy_rvec(v[c], buf[a++]);
1702 if (a != ma->nat[n])
1704 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1709 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1710 DDRANK(dd, n), n, dd->mpi_comm_all);
1715 n = DDMASTERRANK(dd);
1717 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1719 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1721 copy_rvec(v[c], lv[a++]);
1728 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1729 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1734 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1737 gmx_domdec_master_t *ma;
1738 int *scounts = NULL, *disps = NULL;
1739 int n, i, c, a, nalloc = 0;
1746 get_commbuffer_counts(dd, &scounts, &disps);
1750 for (n = 0; n < dd->nnodes; n++)
1752 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1754 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1756 copy_rvec(v[c], buf[a++]);
1762 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1765 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
1767 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1769 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1773 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1777 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1780 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1781 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1782 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1784 if (dfhist->nlambda > 0)
1786 int nlam = dfhist->nlambda;
1787 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1788 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1789 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1790 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1791 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1792 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1794 for (i = 0; i < nlam; i++)
1796 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1797 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1798 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1799 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1800 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1801 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1806 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1807 t_state *state, t_state *state_local,
1812 nh = state->nhchainlength;
1816 for (i = 0; i < efptNR; i++)
1818 state_local->lambda[i] = state->lambda[i];
1820 state_local->fep_state = state->fep_state;
1821 state_local->veta = state->veta;
1822 state_local->vol0 = state->vol0;
1823 copy_mat(state->box, state_local->box);
1824 copy_mat(state->box_rel, state_local->box_rel);
1825 copy_mat(state->boxv, state_local->boxv);
1826 copy_mat(state->svir_prev, state_local->svir_prev);
1827 copy_mat(state->fvir_prev, state_local->fvir_prev);
1828 copy_df_history(&state_local->dfhist, &state->dfhist);
1829 for (i = 0; i < state_local->ngtc; i++)
1831 for (j = 0; j < nh; j++)
1833 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1834 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1836 state_local->therm_integral[i] = state->therm_integral[i];
1838 for (i = 0; i < state_local->nnhpres; i++)
1840 for (j = 0; j < nh; j++)
1842 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1843 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1847 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
1848 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1849 dd_bcast(dd, sizeof(real), &state_local->veta);
1850 dd_bcast(dd, sizeof(real), &state_local->vol0);
1851 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1852 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1853 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1854 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1855 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1856 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
1857 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
1858 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
1859 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
1860 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
1862 /* communicate df_history -- required for restarting from checkpoint */
1863 dd_distribute_dfhist(dd, &state_local->dfhist);
1865 if (dd->nat_home > state_local->nalloc)
1867 dd_realloc_state(state_local, f, dd->nat_home);
1869 for (i = 0; i < estNR; i++)
1871 if (EST_DISTR(i) && (state_local->flags & (1<<i)))
1876 dd_distribute_vec(dd, cgs, state->x, state_local->x);
1879 dd_distribute_vec(dd, cgs, state->v, state_local->v);
1882 dd_distribute_vec(dd, cgs, state->sd_X, state_local->sd_X);
1885 dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
1887 case estDISRE_INITF:
1888 case estDISRE_RM3TAV:
1889 case estORIRE_INITF:
1891 /* Not implemented yet */
1894 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1900 static char dim2char(int dim)
1906 case XX: c = 'X'; break;
1907 case YY: c = 'Y'; break;
1908 case ZZ: c = 'Z'; break;
1909 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1915 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1916 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1918 rvec grid_s[2], *grid_r = NULL, cx, r;
1919 char fname[STRLEN], format[STRLEN], buf[22];
1921 int a, i, d, z, y, x;
1925 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1926 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1930 snew(grid_r, 2*dd->nnodes);
1933 dd_gather(dd, 2*sizeof(rvec), grid_s[0], DDMASTER(dd) ? grid_r[0] : NULL);
1937 for (d = 0; d < DIM; d++)
1939 for (i = 0; i < DIM; i++)
1947 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1949 tric[d][i] = box[i][d]/box[i][i];
1958 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1959 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
1960 out = gmx_fio_fopen(fname, "w");
1961 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1963 for (i = 0; i < dd->nnodes; i++)
1965 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1966 for (d = 0; d < DIM; d++)
1968 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1970 for (z = 0; z < 2; z++)
1972 for (y = 0; y < 2; y++)
1974 for (x = 0; x < 2; x++)
1976 cx[XX] = grid_r[i*2+x][XX];
1977 cx[YY] = grid_r[i*2+y][YY];
1978 cx[ZZ] = grid_r[i*2+z][ZZ];
1980 fprintf(out, format, "ATOM", a++, "CA", "GLY", ' ', 1+i,
1981 ' ', 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol);
1985 for (d = 0; d < DIM; d++)
1987 for (x = 0; x < 4; x++)
1991 case 0: y = 1 + i*8 + 2*x; break;
1992 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1993 case 2: y = 1 + i*8 + x; break;
1995 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
1999 gmx_fio_fclose(out);
2004 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
2005 gmx_mtop_t *mtop, t_commrec *cr,
2006 int natoms, rvec x[], matrix box)
2008 char fname[STRLEN], format[STRLEN], format4[STRLEN], buf[22];
2010 int i, ii, resnr, c;
2011 char *atomname, *resname;
2018 natoms = dd->comm->nat[ddnatVSITE];
2021 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
2023 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
2024 sprintf(format4, "%s%s\n", get_pdbformat4(), "%6.2f%6.2f");
2026 out = gmx_fio_fopen(fname, "w");
2028 fprintf(out, "TITLE %s\n", title);
2029 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2030 for (i = 0; i < natoms; i++)
2032 ii = dd->gatindex[i];
2033 gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
2034 if (i < dd->comm->nat[ddnatZONE])
2037 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
2043 else if (i < dd->comm->nat[ddnatVSITE])
2045 b = dd->comm->zones.n;
2049 b = dd->comm->zones.n + 1;
2051 fprintf(out, strlen(atomname) < 4 ? format : format4,
2052 "ATOM", (ii+1)%100000,
2053 atomname, resname, ' ', resnr%10000, ' ',
2054 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b);
2056 fprintf(out, "TER\n");
2058 gmx_fio_fclose(out);
2061 real dd_cutoff_mbody(gmx_domdec_t *dd)
2063 gmx_domdec_comm_t *comm;
2070 if (comm->bInterCGBondeds)
2072 if (comm->cutoff_mbody > 0)
2074 r = comm->cutoff_mbody;
2078 /* cutoff_mbody=0 means we do not have DLB */
2079 r = comm->cellsize_min[dd->dim[0]];
2080 for (di = 1; di < dd->ndim; di++)
2082 r = min(r, comm->cellsize_min[dd->dim[di]]);
2084 if (comm->bBondComm)
2086 r = max(r, comm->cutoff_mbody);
2090 r = min(r, comm->cutoff);
2098 real dd_cutoff_twobody(gmx_domdec_t *dd)
2102 r_mb = dd_cutoff_mbody(dd);
2104 return max(dd->comm->cutoff, r_mb);
2108 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd, ivec coord, ivec coord_pme)
2112 nc = dd->nc[dd->comm->cartpmedim];
2113 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2114 copy_ivec(coord, coord_pme);
2115 coord_pme[dd->comm->cartpmedim] =
2116 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2119 static int low_ddindex2pmeindex(int ndd, int npme, int ddindex)
2121 /* Here we assign a PME node to communicate with this DD node
2122 * by assuming that the major index of both is x.
2123 * We add cr->npmenodes/2 to obtain an even distribution.
2125 return (ddindex*npme + npme/2)/ndd;
2128 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
2130 return low_ddindex2pmeindex(dd->nnodes, dd->comm->npmenodes, ddindex);
2133 static int cr_ddindex2pmeindex(const t_commrec *cr, int ddindex)
2135 return low_ddindex2pmeindex(cr->dd->nnodes, cr->npmenodes, ddindex);
2138 static int *dd_pmenodes(t_commrec *cr)
2143 snew(pmenodes, cr->npmenodes);
2145 for (i = 0; i < cr->dd->nnodes; i++)
2147 p0 = cr_ddindex2pmeindex(cr, i);
2148 p1 = cr_ddindex2pmeindex(cr, i+1);
2149 if (i+1 == cr->dd->nnodes || p1 > p0)
2153 fprintf(debug, "pmenode[%d] = %d\n", n, i+1+n);
2155 pmenodes[n] = i + 1 + n;
2163 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
2166 ivec coords, coords_pme, nc;
2171 if (dd->comm->bCartesian) {
2172 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2173 dd_coords2pmecoords(dd,coords,coords_pme);
2174 copy_ivec(dd->ntot,nc);
2175 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2176 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2178 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2180 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2186 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
2191 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
2193 gmx_domdec_comm_t *comm;
2195 int ddindex, nodeid = -1;
2197 comm = cr->dd->comm;
2202 if (comm->bCartesianPP_PME)
2205 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
2210 ddindex = dd_index(cr->dd->nc, coords);
2211 if (comm->bCartesianPP)
2213 nodeid = comm->ddindex2simnodeid[ddindex];
2219 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
2231 static int dd_simnode2pmenode(t_commrec *cr, int sim_nodeid)
2234 gmx_domdec_comm_t *comm;
2235 ivec coord, coord_pme;
2242 /* This assumes a uniform x domain decomposition grid cell size */
2243 if (comm->bCartesianPP_PME)
2246 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
2247 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2249 /* This is a PP node */
2250 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2251 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
2255 else if (comm->bCartesianPP)
2257 if (sim_nodeid < dd->nnodes)
2259 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2264 /* This assumes DD cells with identical x coordinates
2265 * are numbered sequentially.
2267 if (dd->comm->pmenodes == NULL)
2269 if (sim_nodeid < dd->nnodes)
2271 /* The DD index equals the nodeid */
2272 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2278 while (sim_nodeid > dd->comm->pmenodes[i])
2282 if (sim_nodeid < dd->comm->pmenodes[i])
2284 pmenode = dd->comm->pmenodes[i];
2292 void get_pme_nnodes(const gmx_domdec_t *dd,
2293 int *npmenodes_x, int *npmenodes_y)
2297 *npmenodes_x = dd->comm->npmenodes_x;
2298 *npmenodes_y = dd->comm->npmenodes_y;
2307 gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
2309 gmx_bool bPMEOnlyNode;
2311 if (DOMAINDECOMP(cr))
2313 bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
2317 bPMEOnlyNode = FALSE;
2320 return bPMEOnlyNode;
2323 void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
2324 int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
2328 ivec coord, coord_pme;
2332 snew(*my_ddnodes, (dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2335 for (x = 0; x < dd->nc[XX]; x++)
2337 for (y = 0; y < dd->nc[YY]; y++)
2339 for (z = 0; z < dd->nc[ZZ]; z++)
2341 if (dd->comm->bCartesianPP_PME)
2346 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2347 if (dd->ci[XX] == coord_pme[XX] &&
2348 dd->ci[YY] == coord_pme[YY] &&
2349 dd->ci[ZZ] == coord_pme[ZZ])
2351 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2356 /* The slab corresponds to the nodeid in the PME group */
2357 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2359 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2366 /* The last PP-only node is the peer node */
2367 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2371 fprintf(debug, "Receive coordinates from PP nodes:");
2372 for (x = 0; x < *nmy_ddnodes; x++)
2374 fprintf(debug, " %d", (*my_ddnodes)[x]);
2376 fprintf(debug, "\n");
2380 static gmx_bool receive_vir_ener(t_commrec *cr)
2382 gmx_domdec_comm_t *comm;
2383 int pmenode, coords[DIM], rank;
2387 if (cr->npmenodes < cr->dd->nnodes)
2389 comm = cr->dd->comm;
2390 if (comm->bCartesianPP_PME)
2392 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2394 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2395 coords[comm->cartpmedim]++;
2396 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2398 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2399 if (dd_simnode2pmenode(cr, rank) == pmenode)
2401 /* This is not the last PP node for pmenode */
2409 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2410 if (cr->sim_nodeid+1 < cr->nnodes &&
2411 dd_simnode2pmenode(cr, cr->sim_nodeid+1) == pmenode)
2413 /* This is not the last PP node for pmenode */
2422 static void set_zones_ncg_home(gmx_domdec_t *dd)
2424 gmx_domdec_zones_t *zones;
2427 zones = &dd->comm->zones;
2429 zones->cg_range[0] = 0;
2430 for (i = 1; i < zones->n+1; i++)
2432 zones->cg_range[i] = dd->ncg_home;
2434 /* zone_ncg1[0] should always be equal to ncg_home */
2435 dd->comm->zone_ncg1[0] = dd->ncg_home;
2438 static void rebuild_cgindex(gmx_domdec_t *dd,
2439 const int *gcgs_index, t_state *state)
2441 int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
2444 dd_cg_gl = dd->index_gl;
2445 cgindex = dd->cgindex;
2448 for (i = 0; i < state->ncg_gl; i++)
2452 dd_cg_gl[i] = cg_gl;
2453 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2457 dd->ncg_home = state->ncg_gl;
2460 set_zones_ncg_home(dd);
2463 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2465 while (cg >= cginfo_mb->cg_end)
2470 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2473 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2474 t_forcerec *fr, char *bLocalCG)
2476 cginfo_mb_t *cginfo_mb;
2482 cginfo_mb = fr->cginfo_mb;
2483 cginfo = fr->cginfo;
2485 for (cg = cg0; cg < cg1; cg++)
2487 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2491 if (bLocalCG != NULL)
2493 for (cg = cg0; cg < cg1; cg++)
2495 bLocalCG[index_gl[cg]] = TRUE;
2500 static void make_dd_indices(gmx_domdec_t *dd,
2501 const int *gcgs_index, int cg_start)
2503 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2504 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2509 bLocalCG = dd->comm->bLocalCG;
2511 if (dd->nat_tot > dd->gatindex_nalloc)
2513 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2514 srenew(dd->gatindex, dd->gatindex_nalloc);
2517 nzone = dd->comm->zones.n;
2518 zone2cg = dd->comm->zones.cg_range;
2519 zone_ncg1 = dd->comm->zone_ncg1;
2520 index_gl = dd->index_gl;
2521 gatindex = dd->gatindex;
2522 bCGs = dd->comm->bCGs;
2524 if (zone2cg[1] != dd->ncg_home)
2526 gmx_incons("dd->ncg_zone is not up to date");
2529 /* Make the local to global and global to local atom index */
2530 a = dd->cgindex[cg_start];
2531 for (zone = 0; zone < nzone; zone++)
2539 cg0 = zone2cg[zone];
2541 cg1 = zone2cg[zone+1];
2542 cg1_p1 = cg0 + zone_ncg1[zone];
2544 for (cg = cg0; cg < cg1; cg++)
2549 /* Signal that this cg is from more than one pulse away */
2552 cg_gl = index_gl[cg];
2555 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2558 ga2la_set(dd->ga2la, a_gl, a, zone1);
2564 gatindex[a] = cg_gl;
2565 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2572 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2575 int ncg, i, ngl, nerr;
2578 if (bLocalCG == NULL)
2582 for (i = 0; i < dd->ncg_tot; i++)
2584 if (!bLocalCG[dd->index_gl[i]])
2587 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2592 for (i = 0; i < ncg_sys; i++)
2599 if (ngl != dd->ncg_tot)
2601 fprintf(stderr, "DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2608 static void check_index_consistency(gmx_domdec_t *dd,
2609 int natoms_sys, int ncg_sys,
2612 int nerr, ngl, i, a, cell;
2617 if (dd->comm->DD_debug > 1)
2619 snew(have, natoms_sys);
2620 for (a = 0; a < dd->nat_tot; a++)
2622 if (have[dd->gatindex[a]] > 0)
2624 fprintf(stderr, "DD node %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2628 have[dd->gatindex[a]] = a + 1;
2634 snew(have, dd->nat_tot);
2637 for (i = 0; i < natoms_sys; i++)
2639 if (ga2la_get(dd->ga2la, i, &a, &cell))
2641 if (a >= dd->nat_tot)
2643 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2649 if (dd->gatindex[a] != i)
2651 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2658 if (ngl != dd->nat_tot)
2661 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2662 dd->rank, where, ngl, dd->nat_tot);
2664 for (a = 0; a < dd->nat_tot; a++)
2669 "DD node %d, %s: local atom %d, global %d has no global index\n",
2670 dd->rank, where, a+1, dd->gatindex[a]+1);
2675 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2679 gmx_fatal(FARGS, "DD node %d, %s: %d atom/cg index inconsistencies",
2680 dd->rank, where, nerr);
2684 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2691 /* Clear the whole list without searching */
2692 ga2la_clear(dd->ga2la);
2696 for (i = a_start; i < dd->nat_tot; i++)
2698 ga2la_del(dd->ga2la, dd->gatindex[i]);
2702 bLocalCG = dd->comm->bLocalCG;
2705 for (i = cg_start; i < dd->ncg_tot; i++)
2707 bLocalCG[dd->index_gl[i]] = FALSE;
2711 dd_clear_local_vsite_indices(dd);
2713 if (dd->constraints)
2715 dd_clear_local_constraint_indices(dd);
2719 /* This function should be used for moving the domain boudaries during DLB,
2720 * for obtaining the minimum cell size. It checks the initially set limit
2721 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2722 * and, possibly, a longer cut-off limit set for PME load balancing.
2724 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2728 cellsize_min = comm->cellsize_min[dim];
2730 if (!comm->bVacDLBNoLimit)
2732 /* The cut-off might have changed, e.g. by PME load balacning,
2733 * from the value used to set comm->cellsize_min, so check it.
2735 cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2737 if (comm->bPMELoadBalDLBLimits)
2739 /* Check for the cut-off limit set by the PME load balancing */
2740 cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2744 return cellsize_min;
2747 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2750 real grid_jump_limit;
2752 /* The distance between the boundaries of cells at distance
2753 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2754 * and by the fact that cells should not be shifted by more than
2755 * half their size, such that cg's only shift by one cell
2756 * at redecomposition.
2758 grid_jump_limit = comm->cellsize_limit;
2759 if (!comm->bVacDLBNoLimit)
2761 if (comm->bPMELoadBalDLBLimits)
2763 cutoff = max(cutoff, comm->PMELoadBal_max_cutoff);
2765 grid_jump_limit = max(grid_jump_limit,
2766 cutoff/comm->cd[dim_ind].np);
2769 return grid_jump_limit;
2772 static gmx_bool check_grid_jump(gmx_int64_t step,
2778 gmx_domdec_comm_t *comm;
2787 for (d = 1; d < dd->ndim; d++)
2790 limit = grid_jump_limit(comm, cutoff, d);
2791 bfac = ddbox->box_size[dim];
2792 if (ddbox->tric_dir[dim])
2794 bfac *= ddbox->skew_fac[dim];
2796 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2797 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2805 /* This error should never be triggered under normal
2806 * circumstances, but you never know ...
2808 gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with less nodes might avoid this issue.",
2809 gmx_step_str(step, buf),
2810 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2818 static int dd_load_count(gmx_domdec_comm_t *comm)
2820 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2823 static float dd_force_load(gmx_domdec_comm_t *comm)
2830 if (comm->eFlop > 1)
2832 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2837 load = comm->cycl[ddCyclF];
2838 if (comm->cycl_n[ddCyclF] > 1)
2840 /* Subtract the maximum of the last n cycle counts
2841 * to get rid of possible high counts due to other sources,
2842 * for instance system activity, that would otherwise
2843 * affect the dynamic load balancing.
2845 load -= comm->cycl_max[ddCyclF];
2849 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2851 float gpu_wait, gpu_wait_sum;
2853 gpu_wait = comm->cycl[ddCyclWaitGPU];
2854 if (comm->cycl_n[ddCyclF] > 1)
2856 /* We should remove the WaitGPU time of the same MD step
2857 * as the one with the maximum F time, since the F time
2858 * and the wait time are not independent.
2859 * Furthermore, the step for the max F time should be chosen
2860 * the same on all ranks that share the same GPU.
2861 * But to keep the code simple, we remove the average instead.
2862 * The main reason for artificially long times at some steps
2863 * is spurious CPU activity or MPI time, so we don't expect
2864 * that changes in the GPU wait time matter a lot here.
2866 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2868 /* Sum the wait times over the ranks that share the same GPU */
2869 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2870 comm->mpi_comm_gpu_shared);
2871 /* Replace the wait time by the average over the ranks */
2872 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2880 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2882 gmx_domdec_comm_t *comm;
2887 snew(*dim_f, dd->nc[dim]+1);
2889 for (i = 1; i < dd->nc[dim]; i++)
2891 if (comm->slb_frac[dim])
2893 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2897 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2900 (*dim_f)[dd->nc[dim]] = 1;
2903 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2905 int pmeindex, slab, nso, i;
2908 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2914 ddpme->dim = dimind;
2916 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2918 ddpme->nslab = (ddpme->dim == 0 ?
2919 dd->comm->npmenodes_x :
2920 dd->comm->npmenodes_y);
2922 if (ddpme->nslab <= 1)
2927 nso = dd->comm->npmenodes/ddpme->nslab;
2928 /* Determine for each PME slab the PP location range for dimension dim */
2929 snew(ddpme->pp_min, ddpme->nslab);
2930 snew(ddpme->pp_max, ddpme->nslab);
2931 for (slab = 0; slab < ddpme->nslab; slab++)
2933 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2934 ddpme->pp_max[slab] = 0;
2936 for (i = 0; i < dd->nnodes; i++)
2938 ddindex2xyz(dd->nc, i, xyz);
2939 /* For y only use our y/z slab.
2940 * This assumes that the PME x grid size matches the DD grid size.
2942 if (dimind == 0 || xyz[XX] == dd->ci[XX])
2944 pmeindex = ddindex2pmeindex(dd, i);
2947 slab = pmeindex/nso;
2951 slab = pmeindex % ddpme->nslab;
2953 ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]);
2954 ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]);
2958 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
2961 int dd_pme_maxshift_x(gmx_domdec_t *dd)
2963 if (dd->comm->ddpme[0].dim == XX)
2965 return dd->comm->ddpme[0].maxshift;
2973 int dd_pme_maxshift_y(gmx_domdec_t *dd)
2975 if (dd->comm->ddpme[0].dim == YY)
2977 return dd->comm->ddpme[0].maxshift;
2979 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
2981 return dd->comm->ddpme[1].maxshift;
2989 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
2990 gmx_bool bUniform, gmx_ddbox_t *ddbox, real *cell_f)
2992 gmx_domdec_comm_t *comm;
2995 real range, pme_boundary;
2999 nc = dd->nc[ddpme->dim];
3002 if (!ddpme->dim_match)
3004 /* PP decomposition is not along dim: the worst situation */
3007 else if (ns <= 3 || (bUniform && ns == nc))
3009 /* The optimal situation */
3014 /* We need to check for all pme nodes which nodes they
3015 * could possibly need to communicate with.
3017 xmin = ddpme->pp_min;
3018 xmax = ddpme->pp_max;
3019 /* Allow for atoms to be maximally 2/3 times the cut-off
3020 * out of their DD cell. This is a reasonable balance between
3021 * between performance and support for most charge-group/cut-off
3024 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
3025 /* Avoid extra communication when we are exactly at a boundary */
3029 for (s = 0; s < ns; s++)
3031 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
3032 pme_boundary = (real)s/ns;
3035 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
3037 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
3041 pme_boundary = (real)(s+1)/ns;
3044 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
3046 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
3053 ddpme->maxshift = sh;
3057 fprintf(debug, "PME slab communication range for dim %d is %d\n",
3058 ddpme->dim, ddpme->maxshift);
3062 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3066 for (d = 0; d < dd->ndim; d++)
3069 if (dim < ddbox->nboundeddim &&
3070 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
3071 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
3073 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
3074 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3075 dd->nc[dim], dd->comm->cellsize_limit);
3080 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
3081 gmx_bool bMaster, ivec npulse)
3083 gmx_domdec_comm_t *comm;
3086 real *cell_x, cell_dx, cellsize;
3090 for (d = 0; d < DIM; d++)
3092 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
3094 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
3097 cell_dx = ddbox->box_size[d]/dd->nc[d];
3100 for (j = 0; j < dd->nc[d]+1; j++)
3102 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
3107 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
3108 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
3110 cellsize = cell_dx*ddbox->skew_fac[d];
3111 while (cellsize*npulse[d] < comm->cutoff && npulse[d] < dd->nc[d]-1)
3115 cellsize_min[d] = cellsize;
3119 /* Statically load balanced grid */
3120 /* Also when we are not doing a master distribution we determine
3121 * all cell borders in a loop to obtain identical values
3122 * to the master distribution case and to determine npulse.
3126 cell_x = dd->ma->cell_x[d];
3130 snew(cell_x, dd->nc[d]+1);
3132 cell_x[0] = ddbox->box0[d];
3133 for (j = 0; j < dd->nc[d]; j++)
3135 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
3136 cell_x[j+1] = cell_x[j] + cell_dx;
3137 cellsize = cell_dx*ddbox->skew_fac[d];
3138 while (cellsize*npulse[d] < comm->cutoff &&
3139 npulse[d] < dd->nc[d]-1)
3143 cellsize_min[d] = min(cellsize_min[d], cellsize);
3147 comm->cell_x0[d] = cell_x[dd->ci[d]];
3148 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
3152 /* The following limitation is to avoid that a cell would receive
3153 * some of its own home charge groups back over the periodic boundary.
3154 * Double charge groups cause trouble with the global indices.
3156 if (d < ddbox->npbcdim &&
3157 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
3159 gmx_fatal_collective(FARGS, NULL, dd,
3160 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3161 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
3163 dd->nc[d], dd->nc[d],
3164 dd->nnodes > dd->nc[d] ? "cells" : "processors");
3168 if (!comm->bDynLoadBal)
3170 copy_rvec(cellsize_min, comm->cellsize_min);
3173 for (d = 0; d < comm->npmedecompdim; d++)
3175 set_pme_maxshift(dd, &comm->ddpme[d],
3176 comm->slb_frac[dd->dim[d]] == NULL, ddbox,
3177 comm->ddpme[d].slb_dim_f);
3182 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
3183 int d, int dim, gmx_domdec_root_t *root,
3185 gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
3187 gmx_domdec_comm_t *comm;
3188 int ncd, i, j, nmin, nmin_old;
3189 gmx_bool bLimLo, bLimHi;
3191 real fac, halfway, cellsize_limit_f_i, region_size;
3192 gmx_bool bPBC, bLastHi = FALSE;
3193 int nrange[] = {range[0], range[1]};
3195 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
3201 bPBC = (dim < ddbox->npbcdim);
3203 cell_size = root->buf_ncd;
3207 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
3210 /* First we need to check if the scaling does not make cells
3211 * smaller than the smallest allowed size.
3212 * We need to do this iteratively, since if a cell is too small,
3213 * it needs to be enlarged, which makes all the other cells smaller,
3214 * which could in turn make another cell smaller than allowed.
3216 for (i = range[0]; i < range[1]; i++)
3218 root->bCellMin[i] = FALSE;
3224 /* We need the total for normalization */
3226 for (i = range[0]; i < range[1]; i++)
3228 if (root->bCellMin[i] == FALSE)
3230 fac += cell_size[i];
3233 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3234 /* Determine the cell boundaries */
3235 for (i = range[0]; i < range[1]; i++)
3237 if (root->bCellMin[i] == FALSE)
3239 cell_size[i] *= fac;
3240 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3242 cellsize_limit_f_i = 0;
3246 cellsize_limit_f_i = cellsize_limit_f;
3248 if (cell_size[i] < cellsize_limit_f_i)
3250 root->bCellMin[i] = TRUE;
3251 cell_size[i] = cellsize_limit_f_i;
3255 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3258 while (nmin > nmin_old);
3261 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3262 /* For this check we should not use DD_CELL_MARGIN,
3263 * but a slightly smaller factor,
3264 * since rounding could get use below the limit.
3266 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3269 gmx_fatal(FARGS, "Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3270 gmx_step_str(step, buf),
3271 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3272 ncd, comm->cellsize_min[dim]);
3275 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
3279 /* Check if the boundary did not displace more than halfway
3280 * each of the cells it bounds, as this could cause problems,
3281 * especially when the differences between cell sizes are large.
3282 * If changes are applied, they will not make cells smaller
3283 * than the cut-off, as we check all the boundaries which
3284 * might be affected by a change and if the old state was ok,
3285 * the cells will at most be shrunk back to their old size.
3287 for (i = range[0]+1; i < range[1]; i++)
3289 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3290 if (root->cell_f[i] < halfway)
3292 root->cell_f[i] = halfway;
3293 /* Check if the change also causes shifts of the next boundaries */
3294 for (j = i+1; j < range[1]; j++)
3296 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3298 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3302 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3303 if (root->cell_f[i] > halfway)
3305 root->cell_f[i] = halfway;
3306 /* Check if the change also causes shifts of the next boundaries */
3307 for (j = i-1; j >= range[0]+1; j--)
3309 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3311 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3318 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3319 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3320 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3321 * for a and b nrange is used */
3324 /* Take care of the staggering of the cell boundaries */
3327 for (i = range[0]; i < range[1]; i++)
3329 root->cell_f_max0[i] = root->cell_f[i];
3330 root->cell_f_min1[i] = root->cell_f[i+1];
3335 for (i = range[0]+1; i < range[1]; i++)
3337 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3338 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3339 if (bLimLo && bLimHi)
3341 /* Both limits violated, try the best we can */
3342 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3343 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3344 nrange[0] = range[0];
3346 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3349 nrange[1] = range[1];
3350 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3356 /* root->cell_f[i] = root->bound_min[i]; */
3357 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3360 else if (bLimHi && !bLastHi)
3363 if (nrange[1] < range[1]) /* found a LimLo before */
3365 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3366 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3367 nrange[0] = nrange[1];
3369 root->cell_f[i] = root->bound_max[i];
3371 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3373 nrange[1] = range[1];
3376 if (nrange[1] < range[1]) /* found last a LimLo */
3378 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3379 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3380 nrange[0] = nrange[1];
3381 nrange[1] = range[1];
3382 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3384 else if (nrange[0] > range[0]) /* found at least one LimHi */
3386 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3393 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3394 int d, int dim, gmx_domdec_root_t *root,
3395 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3396 gmx_bool bUniform, gmx_int64_t step)
3398 gmx_domdec_comm_t *comm;
3399 int ncd, d1, i, j, pos;
3401 real load_aver, load_i, imbalance, change, change_max, sc;
3402 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3406 int range[] = { 0, 0 };
3410 /* Convert the maximum change from the input percentage to a fraction */
3411 change_limit = comm->dlb_scale_lim*0.01;
3415 bPBC = (dim < ddbox->npbcdim);
3417 cell_size = root->buf_ncd;
3419 /* Store the original boundaries */
3420 for (i = 0; i < ncd+1; i++)
3422 root->old_cell_f[i] = root->cell_f[i];
3426 for (i = 0; i < ncd; i++)
3428 cell_size[i] = 1.0/ncd;
3431 else if (dd_load_count(comm))
3433 load_aver = comm->load[d].sum_m/ncd;
3435 for (i = 0; i < ncd; i++)
3437 /* Determine the relative imbalance of cell i */
3438 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3439 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3440 /* Determine the change of the cell size using underrelaxation */
3441 change = -relax*imbalance;
3442 change_max = max(change_max, max(change, -change));
3444 /* Limit the amount of scaling.
3445 * We need to use the same rescaling for all cells in one row,
3446 * otherwise the load balancing might not converge.
3449 if (change_max > change_limit)
3451 sc *= change_limit/change_max;
3453 for (i = 0; i < ncd; i++)
3455 /* Determine the relative imbalance of cell i */
3456 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3457 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3458 /* Determine the change of the cell size using underrelaxation */
3459 change = -sc*imbalance;
3460 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3464 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3465 cellsize_limit_f *= DD_CELL_MARGIN;
3466 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3467 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3468 if (ddbox->tric_dir[dim])
3470 cellsize_limit_f /= ddbox->skew_fac[dim];
3471 dist_min_f /= ddbox->skew_fac[dim];
3473 if (bDynamicBox && d > 0)
3475 dist_min_f *= DD_PRES_SCALE_MARGIN;
3477 if (d > 0 && !bUniform)
3479 /* Make sure that the grid is not shifted too much */
3480 for (i = 1; i < ncd; i++)
3482 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3484 gmx_incons("Inconsistent DD boundary staggering limits!");
3486 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3487 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3490 root->bound_min[i] += 0.5*space;
3492 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3493 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3496 root->bound_max[i] += 0.5*space;
3501 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3503 root->cell_f_max0[i-1] + dist_min_f,
3504 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3505 root->cell_f_min1[i] - dist_min_f);
3510 root->cell_f[0] = 0;
3511 root->cell_f[ncd] = 1;
3512 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3515 /* After the checks above, the cells should obey the cut-off
3516 * restrictions, but it does not hurt to check.
3518 for (i = 0; i < ncd; i++)
3522 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3523 dim, i, root->cell_f[i], root->cell_f[i+1]);
3526 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3527 root->cell_f[i+1] - root->cell_f[i] <
3528 cellsize_limit_f/DD_CELL_MARGIN)
3532 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3533 gmx_step_str(step, buf), dim2char(dim), i,
3534 (root->cell_f[i+1] - root->cell_f[i])
3535 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3540 /* Store the cell boundaries of the lower dimensions at the end */
3541 for (d1 = 0; d1 < d; d1++)
3543 root->cell_f[pos++] = comm->cell_f0[d1];
3544 root->cell_f[pos++] = comm->cell_f1[d1];
3547 if (d < comm->npmedecompdim)
3549 /* The master determines the maximum shift for
3550 * the coordinate communication between separate PME nodes.
3552 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3554 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3557 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3561 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3562 gmx_ddbox_t *ddbox, int dimind)
3564 gmx_domdec_comm_t *comm;
3569 /* Set the cell dimensions */
3570 dim = dd->dim[dimind];
3571 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3572 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3573 if (dim >= ddbox->nboundeddim)
3575 comm->cell_x0[dim] += ddbox->box0[dim];
3576 comm->cell_x1[dim] += ddbox->box0[dim];
3580 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3581 int d, int dim, real *cell_f_row,
3584 gmx_domdec_comm_t *comm;
3590 /* Each node would only need to know two fractions,
3591 * but it is probably cheaper to broadcast the whole array.
3593 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3594 0, comm->mpi_comm_load[d]);
3596 /* Copy the fractions for this dimension from the buffer */
3597 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3598 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3599 /* The whole array was communicated, so set the buffer position */
3600 pos = dd->nc[dim] + 1;
3601 for (d1 = 0; d1 <= d; d1++)
3605 /* Copy the cell fractions of the lower dimensions */
3606 comm->cell_f0[d1] = cell_f_row[pos++];
3607 comm->cell_f1[d1] = cell_f_row[pos++];
3609 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3611 /* Convert the communicated shift from float to int */
3612 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3615 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3619 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3620 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3621 gmx_bool bUniform, gmx_int64_t step)
3623 gmx_domdec_comm_t *comm;
3625 gmx_bool bRowMember, bRowRoot;
3630 for (d = 0; d < dd->ndim; d++)
3635 for (d1 = d; d1 < dd->ndim; d1++)
3637 if (dd->ci[dd->dim[d1]] > 0)
3650 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3651 ddbox, bDynamicBox, bUniform, step);
3652 cell_f_row = comm->root[d]->cell_f;
3656 cell_f_row = comm->cell_f_row;
3658 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3663 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3667 /* This function assumes the box is static and should therefore
3668 * not be called when the box has changed since the last
3669 * call to dd_partition_system.
3671 for (d = 0; d < dd->ndim; d++)
3673 relative_to_absolute_cell_bounds(dd, ddbox, d);
3679 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3680 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3681 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3682 gmx_wallcycle_t wcycle)
3684 gmx_domdec_comm_t *comm;
3691 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3692 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3693 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3695 else if (bDynamicBox)
3697 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3700 /* Set the dimensions for which no DD is used */
3701 for (dim = 0; dim < DIM; dim++)
3703 if (dd->nc[dim] == 1)
3705 comm->cell_x0[dim] = 0;
3706 comm->cell_x1[dim] = ddbox->box_size[dim];
3707 if (dim >= ddbox->nboundeddim)
3709 comm->cell_x0[dim] += ddbox->box0[dim];
3710 comm->cell_x1[dim] += ddbox->box0[dim];
3716 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3719 gmx_domdec_comm_dim_t *cd;
3721 for (d = 0; d < dd->ndim; d++)
3723 cd = &dd->comm->cd[d];
3724 np = npulse[dd->dim[d]];
3725 if (np > cd->np_nalloc)
3729 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3730 dim2char(dd->dim[d]), np);
3732 if (DDMASTER(dd) && cd->np_nalloc > 0)
3734 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3736 srenew(cd->ind, np);
3737 for (i = cd->np_nalloc; i < np; i++)
3739 cd->ind[i].index = NULL;
3740 cd->ind[i].nalloc = 0;
3749 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3750 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3751 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3752 gmx_wallcycle_t wcycle)
3754 gmx_domdec_comm_t *comm;
3760 /* Copy the old cell boundaries for the cg displacement check */
3761 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3762 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3764 if (comm->bDynLoadBal)
3768 check_box_size(dd, ddbox);
3770 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3774 set_dd_cell_sizes_slb(dd, ddbox, FALSE, npulse);
3775 realloc_comm_ind(dd, npulse);
3780 for (d = 0; d < DIM; d++)
3782 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3783 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3788 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3790 rvec cell_ns_x0, rvec cell_ns_x1,
3793 gmx_domdec_comm_t *comm;
3798 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3800 dim = dd->dim[dim_ind];
3802 /* Without PBC we don't have restrictions on the outer cells */
3803 if (!(dim >= ddbox->npbcdim &&
3804 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3805 comm->bDynLoadBal &&
3806 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3807 comm->cellsize_min[dim])
3810 gmx_fatal(FARGS, "Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3811 gmx_step_str(step, buf), dim2char(dim),
3812 comm->cell_x1[dim] - comm->cell_x0[dim],
3813 ddbox->skew_fac[dim],
3814 dd->comm->cellsize_min[dim],
3815 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3819 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3821 /* Communicate the boundaries and update cell_ns_x0/1 */
3822 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3823 if (dd->bGridJump && dd->ndim > 1)
3825 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3830 static void make_tric_corr_matrix(int npbcdim, matrix box, matrix tcm)
3834 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3842 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3843 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3852 static void check_screw_box(matrix box)
3854 /* Mathematical limitation */
3855 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3857 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3860 /* Limitation due to the asymmetry of the eighth shell method */
3861 if (box[ZZ][YY] != 0)
3863 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3867 static void distribute_cg(FILE *fplog, gmx_int64_t step,
3868 matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3871 gmx_domdec_master_t *ma;
3872 int **tmp_ind = NULL, *tmp_nalloc = NULL;
3873 int i, icg, j, k, k0, k1, d, npbcdim;
3875 rvec box_size, cg_cm;
3877 real nrcg, inv_ncg, pos_d;
3879 gmx_bool bUnbounded, bScrew;
3883 if (tmp_ind == NULL)
3885 snew(tmp_nalloc, dd->nnodes);
3886 snew(tmp_ind, dd->nnodes);
3887 for (i = 0; i < dd->nnodes; i++)
3889 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3890 snew(tmp_ind[i], tmp_nalloc[i]);
3894 /* Clear the count */
3895 for (i = 0; i < dd->nnodes; i++)
3901 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3903 cgindex = cgs->index;
3905 /* Compute the center of geometry for all charge groups */
3906 for (icg = 0; icg < cgs->nr; icg++)
3909 k1 = cgindex[icg+1];
3913 copy_rvec(pos[k0], cg_cm);
3920 for (k = k0; (k < k1); k++)
3922 rvec_inc(cg_cm, pos[k]);
3924 for (d = 0; (d < DIM); d++)
3926 cg_cm[d] *= inv_ncg;
3929 /* Put the charge group in the box and determine the cell index */
3930 for (d = DIM-1; d >= 0; d--)
3933 if (d < dd->npbcdim)
3935 bScrew = (dd->bScrewPBC && d == XX);
3936 if (tric_dir[d] && dd->nc[d] > 1)
3938 /* Use triclinic coordintates for this dimension */
3939 for (j = d+1; j < DIM; j++)
3941 pos_d += cg_cm[j]*tcm[j][d];
3944 while (pos_d >= box[d][d])
3947 rvec_dec(cg_cm, box[d]);
3950 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3951 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3953 for (k = k0; (k < k1); k++)
3955 rvec_dec(pos[k], box[d]);
3958 pos[k][YY] = box[YY][YY] - pos[k][YY];
3959 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3966 rvec_inc(cg_cm, box[d]);
3969 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3970 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3972 for (k = k0; (k < k1); k++)
3974 rvec_inc(pos[k], box[d]);
3977 pos[k][YY] = box[YY][YY] - pos[k][YY];
3978 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3983 /* This could be done more efficiently */
3985 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
3990 i = dd_index(dd->nc, ind);
3991 if (ma->ncg[i] == tmp_nalloc[i])
3993 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
3994 srenew(tmp_ind[i], tmp_nalloc[i]);
3996 tmp_ind[i][ma->ncg[i]] = icg;
3998 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
4002 for (i = 0; i < dd->nnodes; i++)
4005 for (k = 0; k < ma->ncg[i]; k++)
4007 ma->cg[k1++] = tmp_ind[i][k];
4010 ma->index[dd->nnodes] = k1;
4012 for (i = 0; i < dd->nnodes; i++)
4022 fprintf(fplog, "Charge group distribution at step %s:",
4023 gmx_step_str(step, buf));
4024 for (i = 0; i < dd->nnodes; i++)
4026 fprintf(fplog, " %d", ma->ncg[i]);
4028 fprintf(fplog, "\n");
4032 static void get_cg_distribution(FILE *fplog, gmx_int64_t step, gmx_domdec_t *dd,
4033 t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
4036 gmx_domdec_master_t *ma = NULL;
4039 int *ibuf, buf2[2] = { 0, 0 };
4040 gmx_bool bMaster = DDMASTER(dd);
4047 check_screw_box(box);
4050 set_dd_cell_sizes_slb(dd, ddbox, TRUE, npulse);
4052 distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
4053 for (i = 0; i < dd->nnodes; i++)
4055 ma->ibuf[2*i] = ma->ncg[i];
4056 ma->ibuf[2*i+1] = ma->nat[i];
4064 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
4066 dd->ncg_home = buf2[0];
4067 dd->nat_home = buf2[1];
4068 dd->ncg_tot = dd->ncg_home;
4069 dd->nat_tot = dd->nat_home;
4070 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
4072 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
4073 srenew(dd->index_gl, dd->cg_nalloc);
4074 srenew(dd->cgindex, dd->cg_nalloc+1);
4078 for (i = 0; i < dd->nnodes; i++)
4080 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
4081 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
4086 DDMASTER(dd) ? ma->ibuf : NULL,
4087 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
4088 DDMASTER(dd) ? ma->cg : NULL,
4089 dd->ncg_home*sizeof(int), dd->index_gl);
4091 /* Determine the home charge group sizes */
4093 for (i = 0; i < dd->ncg_home; i++)
4095 cg_gl = dd->index_gl[i];
4097 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
4102 fprintf(debug, "Home charge groups:\n");
4103 for (i = 0; i < dd->ncg_home; i++)
4105 fprintf(debug, " %d", dd->index_gl[i]);
4108 fprintf(debug, "\n");
4111 fprintf(debug, "\n");
4115 static int compact_and_copy_vec_at(int ncg, int *move,
4118 rvec *src, gmx_domdec_comm_t *comm,
4121 int m, icg, i, i0, i1, nrcg;
4127 for (m = 0; m < DIM*2; m++)
4133 for (icg = 0; icg < ncg; icg++)
4135 i1 = cgindex[icg+1];
4141 /* Compact the home array in place */
4142 for (i = i0; i < i1; i++)
4144 copy_rvec(src[i], src[home_pos++]);
4150 /* Copy to the communication buffer */
4152 pos_vec[m] += 1 + vec*nrcg;
4153 for (i = i0; i < i1; i++)
4155 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
4157 pos_vec[m] += (nvec - vec - 1)*nrcg;
4161 home_pos += i1 - i0;
4169 static int compact_and_copy_vec_cg(int ncg, int *move,
4171 int nvec, rvec *src, gmx_domdec_comm_t *comm,
4174 int m, icg, i0, i1, nrcg;
4180 for (m = 0; m < DIM*2; m++)
4186 for (icg = 0; icg < ncg; icg++)
4188 i1 = cgindex[icg+1];
4194 /* Compact the home array in place */
4195 copy_rvec(src[icg], src[home_pos++]);
4201 /* Copy to the communication buffer */
4202 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
4203 pos_vec[m] += 1 + nrcg*nvec;
4215 static int compact_ind(int ncg, int *move,
4216 int *index_gl, int *cgindex,
4218 gmx_ga2la_t ga2la, char *bLocalCG,
4221 int cg, nat, a0, a1, a, a_gl;
4226 for (cg = 0; cg < ncg; cg++)
4232 /* Compact the home arrays in place.
4233 * Anything that can be done here avoids access to global arrays.
4235 cgindex[home_pos] = nat;
4236 for (a = a0; a < a1; a++)
4239 gatindex[nat] = a_gl;
4240 /* The cell number stays 0, so we don't need to set it */
4241 ga2la_change_la(ga2la, a_gl, nat);
4244 index_gl[home_pos] = index_gl[cg];
4245 cginfo[home_pos] = cginfo[cg];
4246 /* The charge group remains local, so bLocalCG does not change */
4251 /* Clear the global indices */
4252 for (a = a0; a < a1; a++)
4254 ga2la_del(ga2la, gatindex[a]);
4258 bLocalCG[index_gl[cg]] = FALSE;
4262 cgindex[home_pos] = nat;
4267 static void clear_and_mark_ind(int ncg, int *move,
4268 int *index_gl, int *cgindex, int *gatindex,
4269 gmx_ga2la_t ga2la, char *bLocalCG,
4274 for (cg = 0; cg < ncg; cg++)
4280 /* Clear the global indices */
4281 for (a = a0; a < a1; a++)
4283 ga2la_del(ga2la, gatindex[a]);
4287 bLocalCG[index_gl[cg]] = FALSE;
4289 /* Signal that this cg has moved using the ns cell index.
4290 * Here we set it to -1. fill_grid will change it
4291 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4293 cell_index[cg] = -1;
4298 static void print_cg_move(FILE *fplog,
4300 gmx_int64_t step, int cg, int dim, int dir,
4301 gmx_bool bHaveLimitdAndCMOld, real limitd,
4302 rvec cm_old, rvec cm_new, real pos_d)
4304 gmx_domdec_comm_t *comm;
4309 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4310 if (bHaveLimitdAndCMOld)
4312 fprintf(fplog, "The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4313 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4317 fprintf(fplog, "The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4318 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4320 fprintf(fplog, "distance out of cell %f\n",
4321 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4322 if (bHaveLimitdAndCMOld)
4324 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4325 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4327 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4328 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4329 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4331 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4332 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4334 comm->cell_x0[dim], comm->cell_x1[dim]);
4337 static void cg_move_error(FILE *fplog,
4339 gmx_int64_t step, int cg, int dim, int dir,
4340 gmx_bool bHaveLimitdAndCMOld, real limitd,
4341 rvec cm_old, rvec cm_new, real pos_d)
4345 print_cg_move(fplog, dd, step, cg, dim, dir,
4346 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4348 print_cg_move(stderr, dd, step, cg, dim, dir,
4349 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4351 "A charge group moved too far between two domain decomposition steps\n"
4352 "This usually means that your system is not well equilibrated");
4355 static void rotate_state_atom(t_state *state, int a)
4359 for (est = 0; est < estNR; est++)
4361 if (EST_DISTR(est) && (state->flags & (1<<est)))
4366 /* Rotate the complete state; for a rectangular box only */
4367 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4368 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4371 state->v[a][YY] = -state->v[a][YY];
4372 state->v[a][ZZ] = -state->v[a][ZZ];
4375 state->sd_X[a][YY] = -state->sd_X[a][YY];
4376 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4379 state->cg_p[a][YY] = -state->cg_p[a][YY];
4380 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4382 case estDISRE_INITF:
4383 case estDISRE_RM3TAV:
4384 case estORIRE_INITF:
4386 /* These are distances, so not affected by rotation */
4389 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4395 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4397 if (natoms > comm->moved_nalloc)
4399 /* Contents should be preserved here */
4400 comm->moved_nalloc = over_alloc_dd(natoms);
4401 srenew(comm->moved, comm->moved_nalloc);
4407 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
4410 ivec tric_dir, matrix tcm,
4411 rvec cell_x0, rvec cell_x1,
4412 rvec limitd, rvec limit0, rvec limit1,
4414 int cg_start, int cg_end,
4419 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4420 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4424 real inv_ncg, pos_d;
4427 npbcdim = dd->npbcdim;
4429 for (cg = cg_start; cg < cg_end; cg++)
4436 copy_rvec(state->x[k0], cm_new);
4443 for (k = k0; (k < k1); k++)
4445 rvec_inc(cm_new, state->x[k]);
4447 for (d = 0; (d < DIM); d++)
4449 cm_new[d] = inv_ncg*cm_new[d];
4454 /* Do pbc and check DD cell boundary crossings */
4455 for (d = DIM-1; d >= 0; d--)
4459 bScrew = (dd->bScrewPBC && d == XX);
4460 /* Determine the location of this cg in lattice coordinates */
4464 for (d2 = d+1; d2 < DIM; d2++)
4466 pos_d += cm_new[d2]*tcm[d2][d];
4469 /* Put the charge group in the triclinic unit-cell */
4470 if (pos_d >= cell_x1[d])
4472 if (pos_d >= limit1[d])
4474 cg_move_error(fplog, dd, step, cg, d, 1, TRUE, limitd[d],
4475 cg_cm[cg], cm_new, pos_d);
4478 if (dd->ci[d] == dd->nc[d] - 1)
4480 rvec_dec(cm_new, state->box[d]);
4483 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4484 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4486 for (k = k0; (k < k1); k++)
4488 rvec_dec(state->x[k], state->box[d]);
4491 rotate_state_atom(state, k);
4496 else if (pos_d < cell_x0[d])
4498 if (pos_d < limit0[d])
4500 cg_move_error(fplog, dd, step, cg, d, -1, TRUE, limitd[d],
4501 cg_cm[cg], cm_new, pos_d);
4506 rvec_inc(cm_new, state->box[d]);
4509 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4510 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4512 for (k = k0; (k < k1); k++)
4514 rvec_inc(state->x[k], state->box[d]);
4517 rotate_state_atom(state, k);
4523 else if (d < npbcdim)
4525 /* Put the charge group in the rectangular unit-cell */
4526 while (cm_new[d] >= state->box[d][d])
4528 rvec_dec(cm_new, state->box[d]);
4529 for (k = k0; (k < k1); k++)
4531 rvec_dec(state->x[k], state->box[d]);
4534 while (cm_new[d] < 0)
4536 rvec_inc(cm_new, state->box[d]);
4537 for (k = k0; (k < k1); k++)
4539 rvec_inc(state->x[k], state->box[d]);
4545 copy_rvec(cm_new, cg_cm[cg]);
4547 /* Determine where this cg should go */
4550 for (d = 0; d < dd->ndim; d++)
4555 flag |= DD_FLAG_FW(d);
4561 else if (dev[dim] == -1)
4563 flag |= DD_FLAG_BW(d);
4566 if (dd->nc[dim] > 2)
4577 /* Temporarily store the flag in move */
4578 move[cg] = mc + flag;
4582 static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
4583 gmx_domdec_t *dd, ivec tric_dir,
4584 t_state *state, rvec **f,
4593 int ncg[DIM*2], nat[DIM*2];
4594 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4595 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4596 int sbuf[2], rbuf[2];
4597 int home_pos_cg, home_pos_at, buf_pos;
4599 gmx_bool bV = FALSE, bSDX = FALSE, bCGP = FALSE;
4602 real inv_ncg, pos_d;
4604 rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
4606 cginfo_mb_t *cginfo_mb;
4607 gmx_domdec_comm_t *comm;
4609 int nthread, thread;
4613 check_screw_box(state->box);
4617 if (fr->cutoff_scheme == ecutsGROUP)
4622 for (i = 0; i < estNR; i++)
4628 case estX: /* Always present */ break;
4629 case estV: bV = (state->flags & (1<<i)); break;
4630 case estSDX: bSDX = (state->flags & (1<<i)); break;
4631 case estCGP: bCGP = (state->flags & (1<<i)); break;
4634 case estDISRE_INITF:
4635 case estDISRE_RM3TAV:
4636 case estORIRE_INITF:
4638 /* No processing required */
4641 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4646 if (dd->ncg_tot > comm->nalloc_int)
4648 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4649 srenew(comm->buf_int, comm->nalloc_int);
4651 move = comm->buf_int;
4653 /* Clear the count */
4654 for (c = 0; c < dd->ndim*2; c++)
4660 npbcdim = dd->npbcdim;
4662 for (d = 0; (d < DIM); d++)
4664 limitd[d] = dd->comm->cellsize_min[d];
4665 if (d >= npbcdim && dd->ci[d] == 0)
4667 cell_x0[d] = -GMX_FLOAT_MAX;
4671 cell_x0[d] = comm->cell_x0[d];
4673 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4675 cell_x1[d] = GMX_FLOAT_MAX;
4679 cell_x1[d] = comm->cell_x1[d];
4683 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4684 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4688 /* We check after communication if a charge group moved
4689 * more than one cell. Set the pre-comm check limit to float_max.
4691 limit0[d] = -GMX_FLOAT_MAX;
4692 limit1[d] = GMX_FLOAT_MAX;
4696 make_tric_corr_matrix(npbcdim, state->box, tcm);
4698 cgindex = dd->cgindex;
4700 nthread = gmx_omp_nthreads_get(emntDomdec);
4702 /* Compute the center of geometry for all home charge groups
4703 * and put them in the box and determine where they should go.
4705 #pragma omp parallel for num_threads(nthread) schedule(static)
4706 for (thread = 0; thread < nthread; thread++)
4708 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4709 cell_x0, cell_x1, limitd, limit0, limit1,
4711 ( thread *dd->ncg_home)/nthread,
4712 ((thread+1)*dd->ncg_home)/nthread,
4713 fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
4717 for (cg = 0; cg < dd->ncg_home; cg++)
4722 flag = mc & ~DD_FLAG_NRCG;
4723 mc = mc & DD_FLAG_NRCG;
4726 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4728 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4729 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4731 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4732 /* We store the cg size in the lower 16 bits
4733 * and the place where the charge group should go
4734 * in the next 6 bits. This saves some communication volume.
4736 nrcg = cgindex[cg+1] - cgindex[cg];
4737 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4743 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4744 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4747 for (i = 0; i < dd->ndim*2; i++)
4749 *ncg_moved += ncg[i];
4766 /* Make sure the communication buffers are large enough */
4767 for (mc = 0; mc < dd->ndim*2; mc++)
4769 nvr = ncg[mc] + nat[mc]*nvec;
4770 if (nvr > comm->cgcm_state_nalloc[mc])
4772 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4773 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4777 switch (fr->cutoff_scheme)
4780 /* Recalculating cg_cm might be cheaper than communicating,
4781 * but that could give rise to rounding issues.
4784 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4785 nvec, cg_cm, comm, bCompact);
4788 /* Without charge groups we send the moved atom coordinates
4789 * over twice. This is so the code below can be used without
4790 * many conditionals for both for with and without charge groups.
4793 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4794 nvec, state->x, comm, FALSE);
4797 home_pos_cg -= *ncg_moved;
4801 gmx_incons("unimplemented");
4807 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4808 nvec, vec++, state->x, comm, bCompact);
4811 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4812 nvec, vec++, state->v, comm, bCompact);
4816 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4817 nvec, vec++, state->sd_X, comm, bCompact);
4821 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4822 nvec, vec++, state->cg_p, comm, bCompact);
4827 compact_ind(dd->ncg_home, move,
4828 dd->index_gl, dd->cgindex, dd->gatindex,
4829 dd->ga2la, comm->bLocalCG,
4834 if (fr->cutoff_scheme == ecutsVERLET)
4836 moved = get_moved(comm, dd->ncg_home);
4838 for (k = 0; k < dd->ncg_home; k++)
4845 moved = fr->ns.grid->cell_index;
4848 clear_and_mark_ind(dd->ncg_home, move,
4849 dd->index_gl, dd->cgindex, dd->gatindex,
4850 dd->ga2la, comm->bLocalCG,
4854 cginfo_mb = fr->cginfo_mb;
4856 *ncg_stay_home = home_pos_cg;
4857 for (d = 0; d < dd->ndim; d++)
4863 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4866 /* Communicate the cg and atom counts */
4871 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4872 d, dir, sbuf[0], sbuf[1]);
4874 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4876 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4878 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4879 srenew(comm->buf_int, comm->nalloc_int);
4882 /* Communicate the charge group indices, sizes and flags */
4883 dd_sendrecv_int(dd, d, dir,
4884 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4885 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4887 nvs = ncg[cdd] + nat[cdd]*nvec;
4888 i = rbuf[0] + rbuf[1] *nvec;
4889 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4891 /* Communicate cgcm and state */
4892 dd_sendrecv_rvec(dd, d, dir,
4893 comm->cgcm_state[cdd], nvs,
4894 comm->vbuf.v+nvr, i);
4895 ncg_recv += rbuf[0];
4896 nat_recv += rbuf[1];
4900 /* Process the received charge groups */
4902 for (cg = 0; cg < ncg_recv; cg++)
4904 flag = comm->buf_int[cg*DD_CGIBS+1];
4906 if (dim >= npbcdim && dd->nc[dim] > 2)
4908 /* No pbc in this dim and more than one domain boundary.
4909 * We do a separate check if a charge group didn't move too far.
4911 if (((flag & DD_FLAG_FW(d)) &&
4912 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4913 ((flag & DD_FLAG_BW(d)) &&
4914 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4916 cg_move_error(fplog, dd, step, cg, dim,
4917 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4919 comm->vbuf.v[buf_pos],
4920 comm->vbuf.v[buf_pos],
4921 comm->vbuf.v[buf_pos][dim]);
4928 /* Check which direction this cg should go */
4929 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4933 /* The cell boundaries for dimension d2 are not equal
4934 * for each cell row of the lower dimension(s),
4935 * therefore we might need to redetermine where
4936 * this cg should go.
4939 /* If this cg crosses the box boundary in dimension d2
4940 * we can use the communicated flag, so we do not
4941 * have to worry about pbc.
4943 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4944 (flag & DD_FLAG_FW(d2))) ||
4945 (dd->ci[dim2] == 0 &&
4946 (flag & DD_FLAG_BW(d2)))))
4948 /* Clear the two flags for this dimension */
4949 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4950 /* Determine the location of this cg
4951 * in lattice coordinates
4953 pos_d = comm->vbuf.v[buf_pos][dim2];
4956 for (d3 = dim2+1; d3 < DIM; d3++)
4959 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
4962 /* Check of we are not at the box edge.
4963 * pbc is only handled in the first step above,
4964 * but this check could move over pbc while
4965 * the first step did not due to different rounding.
4967 if (pos_d >= cell_x1[dim2] &&
4968 dd->ci[dim2] != dd->nc[dim2]-1)
4970 flag |= DD_FLAG_FW(d2);
4972 else if (pos_d < cell_x0[dim2] &&
4975 flag |= DD_FLAG_BW(d2);
4977 comm->buf_int[cg*DD_CGIBS+1] = flag;
4980 /* Set to which neighboring cell this cg should go */
4981 if (flag & DD_FLAG_FW(d2))
4985 else if (flag & DD_FLAG_BW(d2))
4987 if (dd->nc[dd->dim[d2]] > 2)
4999 nrcg = flag & DD_FLAG_NRCG;
5002 if (home_pos_cg+1 > dd->cg_nalloc)
5004 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
5005 srenew(dd->index_gl, dd->cg_nalloc);
5006 srenew(dd->cgindex, dd->cg_nalloc+1);
5008 /* Set the global charge group index and size */
5009 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
5010 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
5011 /* Copy the state from the buffer */
5012 dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
5013 if (fr->cutoff_scheme == ecutsGROUP)
5016 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
5020 /* Set the cginfo */
5021 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
5022 dd->index_gl[home_pos_cg]);
5025 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
5028 if (home_pos_at+nrcg > state->nalloc)
5030 dd_realloc_state(state, f, home_pos_at+nrcg);
5032 for (i = 0; i < nrcg; i++)
5034 copy_rvec(comm->vbuf.v[buf_pos++],
5035 state->x[home_pos_at+i]);
5039 for (i = 0; i < nrcg; i++)
5041 copy_rvec(comm->vbuf.v[buf_pos++],
5042 state->v[home_pos_at+i]);
5047 for (i = 0; i < nrcg; i++)
5049 copy_rvec(comm->vbuf.v[buf_pos++],
5050 state->sd_X[home_pos_at+i]);
5055 for (i = 0; i < nrcg; i++)
5057 copy_rvec(comm->vbuf.v[buf_pos++],
5058 state->cg_p[home_pos_at+i]);
5062 home_pos_at += nrcg;
5066 /* Reallocate the buffers if necessary */
5067 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
5069 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
5070 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
5072 nvr = ncg[mc] + nat[mc]*nvec;
5073 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
5075 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
5076 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
5078 /* Copy from the receive to the send buffers */
5079 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
5080 comm->buf_int + cg*DD_CGIBS,
5081 DD_CGIBS*sizeof(int));
5082 memcpy(comm->cgcm_state[mc][nvr],
5083 comm->vbuf.v[buf_pos],
5084 (1+nrcg*nvec)*sizeof(rvec));
5085 buf_pos += 1 + nrcg*nvec;
5092 /* With sorting (!bCompact) the indices are now only partially up to date
5093 * and ncg_home and nat_home are not the real count, since there are
5094 * "holes" in the arrays for the charge groups that moved to neighbors.
5096 if (fr->cutoff_scheme == ecutsVERLET)
5098 moved = get_moved(comm, home_pos_cg);
5100 for (i = dd->ncg_home; i < home_pos_cg; i++)
5105 dd->ncg_home = home_pos_cg;
5106 dd->nat_home = home_pos_at;
5111 "Finished repartitioning: cgs moved out %d, new home %d\n",
5112 *ncg_moved, dd->ncg_home-*ncg_moved);
5117 void dd_cycles_add(gmx_domdec_t *dd, float cycles, int ddCycl)
5119 dd->comm->cycl[ddCycl] += cycles;
5120 dd->comm->cycl_n[ddCycl]++;
5121 if (cycles > dd->comm->cycl_max[ddCycl])
5123 dd->comm->cycl_max[ddCycl] = cycles;
5127 static double force_flop_count(t_nrnb *nrnb)
5134 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
5136 /* To get closer to the real timings, we half the count
5137 * for the normal loops and again half it for water loops.
5140 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5142 sum += nrnb->n[i]*0.25*cost_nrnb(i);
5146 sum += nrnb->n[i]*0.50*cost_nrnb(i);
5149 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
5152 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5154 sum += nrnb->n[i]*cost_nrnb(i);
5157 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
5159 sum += nrnb->n[i]*cost_nrnb(i);
5165 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
5167 if (dd->comm->eFlop)
5169 dd->comm->flop -= force_flop_count(nrnb);
5172 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
5174 if (dd->comm->eFlop)
5176 dd->comm->flop += force_flop_count(nrnb);
5181 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
5185 for (i = 0; i < ddCyclNr; i++)
5187 dd->comm->cycl[i] = 0;
5188 dd->comm->cycl_n[i] = 0;
5189 dd->comm->cycl_max[i] = 0;
5192 dd->comm->flop_n = 0;
5195 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
5197 gmx_domdec_comm_t *comm;
5198 gmx_domdec_load_t *load;
5199 gmx_domdec_root_t *root = NULL;
5200 int d, dim, cid, i, pos;
5201 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
5206 fprintf(debug, "get_load_distribution start\n");
5209 wallcycle_start(wcycle, ewcDDCOMMLOAD);
5213 bSepPME = (dd->pme_nodeid >= 0);
5215 for (d = dd->ndim-1; d >= 0; d--)
5218 /* Check if we participate in the communication in this dimension */
5219 if (d == dd->ndim-1 ||
5220 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
5222 load = &comm->load[d];
5225 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
5228 if (d == dd->ndim-1)
5230 sbuf[pos++] = dd_force_load(comm);
5231 sbuf[pos++] = sbuf[0];
5234 sbuf[pos++] = sbuf[0];
5235 sbuf[pos++] = cell_frac;
5238 sbuf[pos++] = comm->cell_f_max0[d];
5239 sbuf[pos++] = comm->cell_f_min1[d];
5244 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
5245 sbuf[pos++] = comm->cycl[ddCyclPME];
5250 sbuf[pos++] = comm->load[d+1].sum;
5251 sbuf[pos++] = comm->load[d+1].max;
5254 sbuf[pos++] = comm->load[d+1].sum_m;
5255 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
5256 sbuf[pos++] = comm->load[d+1].flags;
5259 sbuf[pos++] = comm->cell_f_max0[d];
5260 sbuf[pos++] = comm->cell_f_min1[d];
5265 sbuf[pos++] = comm->load[d+1].mdf;
5266 sbuf[pos++] = comm->load[d+1].pme;
5270 /* Communicate a row in DD direction d.
5271 * The communicators are setup such that the root always has rank 0.
5274 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
5275 load->load, load->nload*sizeof(float), MPI_BYTE,
5276 0, comm->mpi_comm_load[d]);
5278 if (dd->ci[dim] == dd->master_ci[dim])
5280 /* We are the root, process this row */
5281 if (comm->bDynLoadBal)
5283 root = comm->root[d];
5293 for (i = 0; i < dd->nc[dim]; i++)
5295 load->sum += load->load[pos++];
5296 load->max = max(load->max, load->load[pos]);
5302 /* This direction could not be load balanced properly,
5303 * therefore we need to use the maximum iso the average load.
5305 load->sum_m = max(load->sum_m, load->load[pos]);
5309 load->sum_m += load->load[pos];
5312 load->cvol_min = min(load->cvol_min, load->load[pos]);
5316 load->flags = (int)(load->load[pos++] + 0.5);
5320 root->cell_f_max0[i] = load->load[pos++];
5321 root->cell_f_min1[i] = load->load[pos++];
5326 load->mdf = max(load->mdf, load->load[pos]);
5328 load->pme = max(load->pme, load->load[pos]);
5332 if (comm->bDynLoadBal && root->bLimited)
5334 load->sum_m *= dd->nc[dim];
5335 load->flags |= (1<<d);
5343 comm->nload += dd_load_count(comm);
5344 comm->load_step += comm->cycl[ddCyclStep];
5345 comm->load_sum += comm->load[0].sum;
5346 comm->load_max += comm->load[0].max;
5347 if (comm->bDynLoadBal)
5349 for (d = 0; d < dd->ndim; d++)
5351 if (comm->load[0].flags & (1<<d))
5353 comm->load_lim[d]++;
5359 comm->load_mdf += comm->load[0].mdf;
5360 comm->load_pme += comm->load[0].pme;
5364 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5368 fprintf(debug, "get_load_distribution finished\n");
5372 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5374 /* Return the relative performance loss on the total run time
5375 * due to the force calculation load imbalance.
5377 if (dd->comm->nload > 0)
5380 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5381 (dd->comm->load_step*dd->nnodes);
5389 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5392 int npp, npme, nnodes, d, limp;
5393 float imbal, pme_f_ratio, lossf, lossp = 0;
5395 gmx_domdec_comm_t *comm;
5398 if (DDMASTER(dd) && comm->nload > 0)
5401 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5402 nnodes = npp + npme;
5403 imbal = comm->load_max*npp/comm->load_sum - 1;
5404 lossf = dd_force_imb_perf_loss(dd);
5405 sprintf(buf, " Average load imbalance: %.1f %%\n", imbal*100);
5406 fprintf(fplog, "%s", buf);
5407 fprintf(stderr, "\n");
5408 fprintf(stderr, "%s", buf);
5409 sprintf(buf, " Part of the total run time spent waiting due to load imbalance: %.1f %%\n", lossf*100);
5410 fprintf(fplog, "%s", buf);
5411 fprintf(stderr, "%s", buf);
5413 if (comm->bDynLoadBal)
5415 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5416 for (d = 0; d < dd->ndim; d++)
5418 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5419 sprintf(buf+strlen(buf), " %c %d %%", dim2char(dd->dim[d]), limp);
5425 sprintf(buf+strlen(buf), "\n");
5426 fprintf(fplog, "%s", buf);
5427 fprintf(stderr, "%s", buf);
5431 pme_f_ratio = comm->load_pme/comm->load_mdf;
5432 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5435 lossp *= (float)npme/(float)nnodes;
5439 lossp *= (float)npp/(float)nnodes;
5441 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pme_f_ratio);
5442 fprintf(fplog, "%s", buf);
5443 fprintf(stderr, "%s", buf);
5444 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossp)*100);
5445 fprintf(fplog, "%s", buf);
5446 fprintf(stderr, "%s", buf);
5448 fprintf(fplog, "\n");
5449 fprintf(stderr, "\n");
5451 if (lossf >= DD_PERF_LOSS)
5454 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5455 " in the domain decomposition.\n", lossf*100);
5456 if (!comm->bDynLoadBal)
5458 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5462 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5464 fprintf(fplog, "%s\n", buf);
5465 fprintf(stderr, "%s\n", buf);
5467 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS)
5470 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5471 " had %s work to do than the PP nodes.\n"
5472 " You might want to %s the number of PME nodes\n"
5473 " or %s the cut-off and the grid spacing.\n",
5475 (lossp < 0) ? "less" : "more",
5476 (lossp < 0) ? "decrease" : "increase",
5477 (lossp < 0) ? "decrease" : "increase");
5478 fprintf(fplog, "%s\n", buf);
5479 fprintf(stderr, "%s\n", buf);
5484 static float dd_vol_min(gmx_domdec_t *dd)
5486 return dd->comm->load[0].cvol_min*dd->nnodes;
5489 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5491 return dd->comm->load[0].flags;
5494 static float dd_f_imbal(gmx_domdec_t *dd)
5496 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5499 float dd_pme_f_ratio(gmx_domdec_t *dd)
5501 if (dd->comm->cycl_n[ddCyclPME] > 0)
5503 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5511 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
5516 flags = dd_load_flags(dd);
5520 "DD load balancing is limited by minimum cell size in dimension");
5521 for (d = 0; d < dd->ndim; d++)
5525 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5528 fprintf(fplog, "\n");
5530 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5531 if (dd->comm->bDynLoadBal)
5533 fprintf(fplog, " vol min/aver %5.3f%c",
5534 dd_vol_min(dd), flags ? '!' : ' ');
5536 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5537 if (dd->comm->cycl_n[ddCyclPME])
5539 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5541 fprintf(fplog, "\n\n");
5544 static void dd_print_load_verbose(gmx_domdec_t *dd)
5546 if (dd->comm->bDynLoadBal)
5548 fprintf(stderr, "vol %4.2f%c ",
5549 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5551 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5552 if (dd->comm->cycl_n[ddCyclPME])
5554 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5559 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5564 gmx_domdec_root_t *root;
5565 gmx_bool bPartOfGroup = FALSE;
5567 dim = dd->dim[dim_ind];
5568 copy_ivec(loc, loc_c);
5569 for (i = 0; i < dd->nc[dim]; i++)
5572 rank = dd_index(dd->nc, loc_c);
5573 if (rank == dd->rank)
5575 /* This process is part of the group */
5576 bPartOfGroup = TRUE;
5579 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5583 dd->comm->mpi_comm_load[dim_ind] = c_row;
5584 if (dd->comm->eDLB != edlbNO)
5586 if (dd->ci[dim] == dd->master_ci[dim])
5588 /* This is the root process of this row */
5589 snew(dd->comm->root[dim_ind], 1);
5590 root = dd->comm->root[dim_ind];
5591 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5592 snew(root->old_cell_f, dd->nc[dim]+1);
5593 snew(root->bCellMin, dd->nc[dim]);
5596 snew(root->cell_f_max0, dd->nc[dim]);
5597 snew(root->cell_f_min1, dd->nc[dim]);
5598 snew(root->bound_min, dd->nc[dim]);
5599 snew(root->bound_max, dd->nc[dim]);
5601 snew(root->buf_ncd, dd->nc[dim]);
5605 /* This is not a root process, we only need to receive cell_f */
5606 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5609 if (dd->ci[dim] == dd->master_ci[dim])
5611 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5617 void dd_setup_dlb_resource_sharing(t_commrec gmx_unused *cr,
5618 const gmx_hw_info_t gmx_unused *hwinfo,
5619 const gmx_hw_opt_t gmx_unused *hw_opt)
5622 int physicalnode_id_hash;
5625 MPI_Comm mpi_comm_pp_physicalnode;
5627 if (!(cr->duty & DUTY_PP) ||
5628 hw_opt->gpu_opt.ncuda_dev_use == 0)
5630 /* Only PP nodes (currently) use GPUs.
5631 * If we don't have GPUs, there are no resources to share.
5636 physicalnode_id_hash = gmx_physicalnode_id_hash();
5638 gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
5644 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5645 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5646 dd->rank, physicalnode_id_hash, gpu_id);
5648 /* Split the PP communicator over the physical nodes */
5649 /* TODO: See if we should store this (before), as it's also used for
5650 * for the nodecomm summution.
5652 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5653 &mpi_comm_pp_physicalnode);
5654 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5655 &dd->comm->mpi_comm_gpu_shared);
5656 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5657 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5661 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5664 /* Note that some ranks could share a GPU, while others don't */
5666 if (dd->comm->nrank_gpu_shared == 1)
5668 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5673 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
5676 int dim0, dim1, i, j;
5681 fprintf(debug, "Making load communicators\n");
5684 snew(dd->comm->load, dd->ndim);
5685 snew(dd->comm->mpi_comm_load, dd->ndim);
5688 make_load_communicator(dd, 0, loc);
5692 for (i = 0; i < dd->nc[dim0]; i++)
5695 make_load_communicator(dd, 1, loc);
5701 for (i = 0; i < dd->nc[dim0]; i++)
5705 for (j = 0; j < dd->nc[dim1]; j++)
5708 make_load_communicator(dd, 2, loc);
5715 fprintf(debug, "Finished making load communicators\n");
5720 void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
5723 int d, dim, i, j, m;
5726 ivec dd_zp[DD_MAXIZONE];
5727 gmx_domdec_zones_t *zones;
5728 gmx_domdec_ns_ranges_t *izone;
5730 for (d = 0; d < dd->ndim; d++)
5733 copy_ivec(dd->ci, tmp);
5734 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5735 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5736 copy_ivec(dd->ci, tmp);
5737 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5738 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5741 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5744 dd->neighbor[d][1]);
5750 fprintf(fplog, "\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5752 dd->nc[XX], dd->nc[YY], dd->nc[ZZ],
5753 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5760 for (i = 0; i < nzonep; i++)
5762 copy_ivec(dd_zp3[i], dd_zp[i]);
5768 for (i = 0; i < nzonep; i++)
5770 copy_ivec(dd_zp2[i], dd_zp[i]);
5776 for (i = 0; i < nzonep; i++)
5778 copy_ivec(dd_zp1[i], dd_zp[i]);
5782 gmx_fatal(FARGS, "Can only do 1, 2 or 3D domain decomposition");
5787 zones = &dd->comm->zones;
5789 for (i = 0; i < nzone; i++)
5792 clear_ivec(zones->shift[i]);
5793 for (d = 0; d < dd->ndim; d++)
5795 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5800 for (i = 0; i < nzone; i++)
5802 for (d = 0; d < DIM; d++)
5804 s[d] = dd->ci[d] - zones->shift[i][d];
5809 else if (s[d] >= dd->nc[d])
5815 zones->nizone = nzonep;
5816 for (i = 0; i < zones->nizone; i++)
5818 if (dd_zp[i][0] != i)
5820 gmx_fatal(FARGS, "Internal inconsistency in the dd grid setup");
5822 izone = &zones->izone[i];
5823 izone->j0 = dd_zp[i][1];
5824 izone->j1 = dd_zp[i][2];
5825 for (dim = 0; dim < DIM; dim++)
5827 if (dd->nc[dim] == 1)
5829 /* All shifts should be allowed */
5830 izone->shift0[dim] = -1;
5831 izone->shift1[dim] = 1;
5836 izone->shift0[d] = 0;
5837 izone->shift1[d] = 0;
5838 for(j=izone->j0; j<izone->j1; j++) {
5839 if (dd->shift[j][d] > dd->shift[i][d])
5840 izone->shift0[d] = -1;
5841 if (dd->shift[j][d] < dd->shift[i][d])
5842 izone->shift1[d] = 1;
5848 /* Assume the shift are not more than 1 cell */
5849 izone->shift0[dim] = 1;
5850 izone->shift1[dim] = -1;
5851 for (j = izone->j0; j < izone->j1; j++)
5853 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5854 if (shift_diff < izone->shift0[dim])
5856 izone->shift0[dim] = shift_diff;
5858 if (shift_diff > izone->shift1[dim])
5860 izone->shift1[dim] = shift_diff;
5867 if (dd->comm->eDLB != edlbNO)
5869 snew(dd->comm->root, dd->ndim);
5872 if (dd->comm->bRecordLoad)
5874 make_load_communicators(dd);
5878 static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reorder)
5881 gmx_domdec_comm_t *comm;
5892 if (comm->bCartesianPP)
5894 /* Set up cartesian communication for the particle-particle part */
5897 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5898 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5901 for (i = 0; i < DIM; i++)
5905 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5907 /* We overwrite the old communicator with the new cartesian one */
5908 cr->mpi_comm_mygroup = comm_cart;
5911 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5912 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5914 if (comm->bCartesianPP_PME)
5916 /* Since we want to use the original cartesian setup for sim,
5917 * and not the one after split, we need to make an index.
5919 snew(comm->ddindex2ddnodeid, dd->nnodes);
5920 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
5921 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5922 /* Get the rank of the DD master,
5923 * above we made sure that the master node is a PP node.
5933 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
5935 else if (comm->bCartesianPP)
5937 if (cr->npmenodes == 0)
5939 /* The PP communicator is also
5940 * the communicator for this simulation
5942 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5944 cr->nodeid = dd->rank;
5946 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
5948 /* We need to make an index to go from the coordinates
5949 * to the nodeid of this simulation.
5951 snew(comm->ddindex2simnodeid, dd->nnodes);
5952 snew(buf, dd->nnodes);
5953 if (cr->duty & DUTY_PP)
5955 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5957 /* Communicate the ddindex to simulation nodeid index */
5958 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
5959 cr->mpi_comm_mysim);
5962 /* Determine the master coordinates and rank.
5963 * The DD master should be the same node as the master of this sim.
5965 for (i = 0; i < dd->nnodes; i++)
5967 if (comm->ddindex2simnodeid[i] == 0)
5969 ddindex2xyz(dd->nc, i, dd->master_ci);
5970 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
5975 fprintf(debug, "The master rank is %d\n", dd->masterrank);
5980 /* No Cartesian communicators */
5981 /* We use the rank in dd->comm->all as DD index */
5982 ddindex2xyz(dd->nc, dd->rank, dd->ci);
5983 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
5985 clear_ivec(dd->master_ci);
5992 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5993 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5998 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5999 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6003 static void receive_ddindex2simnodeid(t_commrec *cr)
6007 gmx_domdec_comm_t *comm;
6014 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
6016 snew(comm->ddindex2simnodeid, dd->nnodes);
6017 snew(buf, dd->nnodes);
6018 if (cr->duty & DUTY_PP)
6020 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
6023 /* Communicate the ddindex to simulation nodeid index */
6024 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6025 cr->mpi_comm_mysim);
6032 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
6033 int ncg, int natoms)
6035 gmx_domdec_master_t *ma;
6040 snew(ma->ncg, dd->nnodes);
6041 snew(ma->index, dd->nnodes+1);
6043 snew(ma->nat, dd->nnodes);
6044 snew(ma->ibuf, dd->nnodes*2);
6045 snew(ma->cell_x, DIM);
6046 for (i = 0; i < DIM; i++)
6048 snew(ma->cell_x[i], dd->nc[i]+1);
6051 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
6057 snew(ma->vbuf, natoms);
6063 static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_node_order,
6064 int gmx_unused reorder)
6067 gmx_domdec_comm_t *comm;
6078 if (comm->bCartesianPP)
6080 for (i = 1; i < DIM; i++)
6082 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
6084 if (bDiv[YY] || bDiv[ZZ])
6086 comm->bCartesianPP_PME = TRUE;
6087 /* If we have 2D PME decomposition, which is always in x+y,
6088 * we stack the PME only nodes in z.
6089 * Otherwise we choose the direction that provides the thinnest slab
6090 * of PME only nodes as this will have the least effect
6091 * on the PP communication.
6092 * But for the PME communication the opposite might be better.
6094 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
6096 dd->nc[YY] > dd->nc[ZZ]))
6098 comm->cartpmedim = ZZ;
6102 comm->cartpmedim = YY;
6104 comm->ntot[comm->cartpmedim]
6105 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
6109 fprintf(fplog, "#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
6111 "Will not use a Cartesian communicator for PP <-> PME\n\n");
6116 if (comm->bCartesianPP_PME)
6120 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
6123 for (i = 0; i < DIM; i++)
6127 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
6130 MPI_Comm_rank(comm_cart, &rank);
6131 if (MASTERNODE(cr) && rank != 0)
6133 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
6136 /* With this assigment we loose the link to the original communicator
6137 * which will usually be MPI_COMM_WORLD, unless have multisim.
6139 cr->mpi_comm_mysim = comm_cart;
6140 cr->sim_nodeid = rank;
6142 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
6146 fprintf(fplog, "Cartesian nodeid %d, coordinates %d %d %d\n\n",
6147 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6150 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
6154 if (cr->npmenodes == 0 ||
6155 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
6157 cr->duty = DUTY_PME;
6160 /* Split the sim communicator into PP and PME only nodes */
6161 MPI_Comm_split(cr->mpi_comm_mysim,
6163 dd_index(comm->ntot, dd->ci),
6164 &cr->mpi_comm_mygroup);
6168 switch (dd_node_order)
6173 fprintf(fplog, "Order of the nodes: PP first, PME last\n");
6176 case ddnoINTERLEAVE:
6177 /* Interleave the PP-only and PME-only nodes,
6178 * as on clusters with dual-core machines this will double
6179 * the communication bandwidth of the PME processes
6180 * and thus speed up the PP <-> PME and inter PME communication.
6184 fprintf(fplog, "Interleaving PP and PME nodes\n");
6186 comm->pmenodes = dd_pmenodes(cr);
6191 gmx_fatal(FARGS, "Unknown dd_node_order=%d", dd_node_order);
6194 if (dd_simnode2pmenode(cr, cr->sim_nodeid) == -1)
6196 cr->duty = DUTY_PME;
6203 /* Split the sim communicator into PP and PME only nodes */
6204 MPI_Comm_split(cr->mpi_comm_mysim,
6207 &cr->mpi_comm_mygroup);
6208 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
6214 fprintf(fplog, "This is a %s only node\n\n",
6215 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
6219 void make_dd_communicators(FILE *fplog, t_commrec *cr, int dd_node_order)
6222 gmx_domdec_comm_t *comm;
6228 copy_ivec(dd->nc, comm->ntot);
6230 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
6231 comm->bCartesianPP_PME = FALSE;
6233 /* Reorder the nodes by default. This might change the MPI ranks.
6234 * Real reordering is only supported on very few architectures,
6235 * Blue Gene is one of them.
6237 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
6239 if (cr->npmenodes > 0)
6241 /* Split the communicator into a PP and PME part */
6242 split_communicator(fplog, cr, dd_node_order, CartReorder);
6243 if (comm->bCartesianPP_PME)
6245 /* We (possibly) reordered the nodes in split_communicator,
6246 * so it is no longer required in make_pp_communicator.
6248 CartReorder = FALSE;
6253 /* All nodes do PP and PME */
6255 /* We do not require separate communicators */
6256 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
6260 if (cr->duty & DUTY_PP)
6262 /* Copy or make a new PP communicator */
6263 make_pp_communicator(fplog, cr, CartReorder);
6267 receive_ddindex2simnodeid(cr);
6270 if (!(cr->duty & DUTY_PME))
6272 /* Set up the commnuication to our PME node */
6273 dd->pme_nodeid = dd_simnode2pmenode(cr, cr->sim_nodeid);
6274 dd->pme_receive_vir_ener = receive_vir_ener(cr);
6277 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
6278 dd->pme_nodeid, dd->pme_receive_vir_ener);
6283 dd->pme_nodeid = -1;
6288 dd->ma = init_gmx_domdec_master_t(dd,
6290 comm->cgs_gl.index[comm->cgs_gl.nr]);
6294 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6296 real *slb_frac, tot;
6301 if (nc > 1 && size_string != NULL)
6305 fprintf(fplog, "Using static load balancing for the %s direction\n",
6310 for (i = 0; i < nc; i++)
6313 sscanf(size_string, "%lf%n", &dbl, &n);
6316 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6325 fprintf(fplog, "Relative cell sizes:");
6327 for (i = 0; i < nc; i++)
6332 fprintf(fplog, " %5.3f", slb_frac[i]);
6337 fprintf(fplog, "\n");
6344 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
6347 gmx_mtop_ilistloop_t iloop;
6351 iloop = gmx_mtop_ilistloop_init(mtop);
6352 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6354 for (ftype = 0; ftype < F_NRE; ftype++)
6356 if ((interaction_function[ftype].flags & IF_BOND) &&
6359 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6367 static int dd_getenv(FILE *fplog, const char *env_var, int def)
6373 val = getenv(env_var);
6376 if (sscanf(val, "%d", &nst) <= 0)
6382 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6390 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6394 fprintf(stderr, "\n%s\n", warn_string);
6398 fprintf(fplog, "\n%s\n", warn_string);
6402 static void check_dd_restrictions(t_commrec *cr, gmx_domdec_t *dd,
6403 t_inputrec *ir, FILE *fplog)
6405 if (ir->ePBC == epbcSCREW &&
6406 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6408 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6411 if (ir->ns_type == ensSIMPLE)
6413 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
6416 if (ir->nstlist == 0)
6418 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6421 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6423 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6427 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6432 r = ddbox->box_size[XX];
6433 for (di = 0; di < dd->ndim; di++)
6436 /* Check using the initial average cell size */
6437 r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6443 static int check_dlb_support(FILE *fplog, t_commrec *cr,
6444 const char *dlb_opt, gmx_bool bRecordLoad,
6445 unsigned long Flags, t_inputrec *ir)
6453 case 'a': eDLB = edlbAUTO; break;
6454 case 'n': eDLB = edlbNO; break;
6455 case 'y': eDLB = edlbYES; break;
6456 default: gmx_incons("Unknown dlb_opt");
6459 if (Flags & MD_RERUN)
6464 if (!EI_DYNAMICS(ir->eI))
6466 if (eDLB == edlbYES)
6468 sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
6469 dd_warning(cr, fplog, buf);
6477 dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6482 if (Flags & MD_REPRODUCIBLE)
6489 dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
6493 dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6496 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", eDLB);
6504 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6509 if (getenv("GMX_DD_ORDER_ZYX") != NULL)
6511 /* Decomposition order z,y,x */
6514 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6516 for (dim = DIM-1; dim >= 0; dim--)
6518 if (dd->nc[dim] > 1)
6520 dd->dim[dd->ndim++] = dim;
6526 /* Decomposition order x,y,z */
6527 for (dim = 0; dim < DIM; dim++)
6529 if (dd->nc[dim] > 1)
6531 dd->dim[dd->ndim++] = dim;
6537 static gmx_domdec_comm_t *init_dd_comm()
6539 gmx_domdec_comm_t *comm;
6543 snew(comm->cggl_flag, DIM*2);
6544 snew(comm->cgcm_state, DIM*2);
6545 for (i = 0; i < DIM*2; i++)
6547 comm->cggl_flag_nalloc[i] = 0;
6548 comm->cgcm_state_nalloc[i] = 0;
6551 comm->nalloc_int = 0;
6552 comm->buf_int = NULL;
6554 vec_rvec_init(&comm->vbuf);
6556 comm->n_load_have = 0;
6557 comm->n_load_collect = 0;
6559 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6561 comm->sum_nat[i] = 0;
6565 comm->load_step = 0;
6568 clear_ivec(comm->load_lim);
6575 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
6576 unsigned long Flags,
6578 real comm_distance_min, real rconstr,
6579 const char *dlb_opt, real dlb_scale,
6580 const char *sizex, const char *sizey, const char *sizez,
6581 gmx_mtop_t *mtop, t_inputrec *ir,
6582 matrix box, rvec *x,
6584 int *npme_x, int *npme_y)
6587 gmx_domdec_comm_t *comm;
6590 real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
6597 "\nInitializing Domain Decomposition on %d nodes\n", cr->nnodes);
6602 dd->comm = init_dd_comm();
6604 snew(comm->cggl_flag, DIM*2);
6605 snew(comm->cgcm_state, DIM*2);
6607 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6608 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6610 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
6611 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
6612 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
6613 recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
6614 comm->nstSortCG = dd_getenv(fplog, "GMX_DD_NST_SORT_CHARGE_GROUPS", 1);
6615 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
6616 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
6617 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
6619 dd->pme_recv_f_alloc = 0;
6620 dd->pme_recv_f_buf = NULL;
6622 if (dd->bSendRecv2 && fplog)
6624 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6630 fprintf(fplog, "Will load balance based on FLOP count\n");
6632 if (comm->eFlop > 1)
6634 srand(1+cr->nodeid);
6636 comm->bRecordLoad = TRUE;
6640 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6644 /* Initialize to GPU share count to 0, might change later */
6645 comm->nrank_gpu_shared = 0;
6647 comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
6649 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6652 fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
6654 dd->bGridJump = comm->bDynLoadBal;
6655 comm->bPMELoadBalDLBLimits = FALSE;
6657 if (comm->nstSortCG)
6661 if (comm->nstSortCG == 1)
6663 fprintf(fplog, "Will sort the charge groups at every domain (re)decomposition\n");
6667 fprintf(fplog, "Will sort the charge groups every %d steps\n",
6671 snew(comm->sort, 1);
6677 fprintf(fplog, "Will not sort the charge groups\n");
6681 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6683 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6684 if (comm->bInterCGBondeds)
6686 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6690 comm->bInterCGMultiBody = FALSE;
6693 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6694 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6696 if (ir->rlistlong == 0)
6698 /* Set the cut-off to some very large value,
6699 * so we don't need if statements everywhere in the code.
6700 * We use sqrt, since the cut-off is squared in some places.
6702 comm->cutoff = GMX_CUTOFF_INF;
6706 comm->cutoff = ir->rlistlong;
6708 comm->cutoff_mbody = 0;
6710 comm->cellsize_limit = 0;
6711 comm->bBondComm = FALSE;
6713 if (comm->bInterCGBondeds)
6715 if (comm_distance_min > 0)
6717 comm->cutoff_mbody = comm_distance_min;
6718 if (Flags & MD_DDBONDCOMM)
6720 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6724 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6726 r_bonded_limit = comm->cutoff_mbody;
6728 else if (ir->bPeriodicMols)
6730 /* Can not easily determine the required cut-off */
6731 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6732 comm->cutoff_mbody = comm->cutoff/2;
6733 r_bonded_limit = comm->cutoff_mbody;
6739 dd_bonded_cg_distance(fplog, mtop, ir, x, box,
6740 Flags & MD_DDBONDCHECK, &r_2b, &r_mb);
6742 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6743 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6745 /* We use an initial margin of 10% for the minimum cell size,
6746 * except when we are just below the non-bonded cut-off.
6748 if (Flags & MD_DDBONDCOMM)
6750 if (max(r_2b, r_mb) > comm->cutoff)
6752 r_bonded = max(r_2b, r_mb);
6753 r_bonded_limit = 1.1*r_bonded;
6754 comm->bBondComm = TRUE;
6759 r_bonded_limit = min(1.1*r_bonded, comm->cutoff);
6761 /* We determine cutoff_mbody later */
6765 /* No special bonded communication,
6766 * simply increase the DD cut-off.
6768 r_bonded_limit = 1.1*max(r_2b, r_mb);
6769 comm->cutoff_mbody = r_bonded_limit;
6770 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6773 comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit);
6777 "Minimum cell size due to bonded interactions: %.3f nm\n",
6778 comm->cellsize_limit);
6782 if (dd->bInterCGcons && rconstr <= 0)
6784 /* There is a cell size limit due to the constraints (P-LINCS) */
6785 rconstr = constr_r_max(fplog, mtop, ir);
6789 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6791 if (rconstr > comm->cellsize_limit)
6793 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6797 else if (rconstr > 0 && fplog)
6799 /* Here we do not check for dd->bInterCGcons,
6800 * because one can also set a cell size limit for virtual sites only
6801 * and at this point we don't know yet if there are intercg v-sites.
6804 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6807 comm->cellsize_limit = max(comm->cellsize_limit, rconstr);
6809 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6813 copy_ivec(nc, dd->nc);
6814 set_dd_dim(fplog, dd);
6815 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, x, ddbox);
6817 if (cr->npmenodes == -1)
6821 acs = average_cellsize_min(dd, ddbox);
6822 if (acs < comm->cellsize_limit)
6826 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6828 gmx_fatal_collective(FARGS, cr, NULL,
6829 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6830 acs, comm->cellsize_limit);
6835 set_ddbox_cr(cr, NULL, ir, box, &comm->cgs_gl, x, ddbox);
6837 /* We need to choose the optimal DD grid and possibly PME nodes */
6838 limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6839 comm->eDLB != edlbNO, dlb_scale,
6840 comm->cellsize_limit, comm->cutoff,
6841 comm->bInterCGBondeds);
6843 if (dd->nc[XX] == 0)
6845 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6846 sprintf(buf, "Change the number of nodes or mdrun option %s%s%s",
6847 !bC ? "-rdd" : "-rcon",
6848 comm->eDLB != edlbNO ? " or -dds" : "",
6849 bC ? " or your LINCS settings" : "");
6851 gmx_fatal_collective(FARGS, cr, NULL,
6852 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6854 "Look in the log file for details on the domain decomposition",
6855 cr->nnodes-cr->npmenodes, limit, buf);
6857 set_dd_dim(fplog, dd);
6863 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6864 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6867 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6868 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6870 gmx_fatal_collective(FARGS, cr, NULL,
6871 "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6872 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6874 if (cr->npmenodes > dd->nnodes)
6876 gmx_fatal_collective(FARGS, cr, NULL,
6877 "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6879 if (cr->npmenodes > 0)
6881 comm->npmenodes = cr->npmenodes;
6885 comm->npmenodes = dd->nnodes;
6888 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
6890 /* The following choices should match those
6891 * in comm_cost_est in domdec_setup.c.
6892 * Note that here the checks have to take into account
6893 * that the decomposition might occur in a different order than xyz
6894 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6895 * in which case they will not match those in comm_cost_est,
6896 * but since that is mainly for testing purposes that's fine.
6898 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6899 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6900 getenv("GMX_PMEONEDD") == NULL)
6902 comm->npmedecompdim = 2;
6903 comm->npmenodes_x = dd->nc[XX];
6904 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6908 /* In case nc is 1 in both x and y we could still choose to
6909 * decompose pme in y instead of x, but we use x for simplicity.
6911 comm->npmedecompdim = 1;
6912 if (dd->dim[0] == YY)
6914 comm->npmenodes_x = 1;
6915 comm->npmenodes_y = comm->npmenodes;
6919 comm->npmenodes_x = comm->npmenodes;
6920 comm->npmenodes_y = 1;
6925 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6926 comm->npmenodes_x, comm->npmenodes_y, 1);
6931 comm->npmedecompdim = 0;
6932 comm->npmenodes_x = 0;
6933 comm->npmenodes_y = 0;
6936 /* Technically we don't need both of these,
6937 * but it simplifies code not having to recalculate it.
6939 *npme_x = comm->npmenodes_x;
6940 *npme_y = comm->npmenodes_y;
6942 snew(comm->slb_frac, DIM);
6943 if (comm->eDLB == edlbNO)
6945 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
6946 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
6947 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], sizez);
6950 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6952 if (comm->bBondComm || comm->eDLB != edlbNO)
6954 /* Set the bonded communication distance to halfway
6955 * the minimum and the maximum,
6956 * since the extra communication cost is nearly zero.
6958 acs = average_cellsize_min(dd, ddbox);
6959 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6960 if (comm->eDLB != edlbNO)
6962 /* Check if this does not limit the scaling */
6963 comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs);
6965 if (!comm->bBondComm)
6967 /* Without bBondComm do not go beyond the n.b. cut-off */
6968 comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff);
6969 if (comm->cellsize_limit >= comm->cutoff)
6971 /* We don't loose a lot of efficieny
6972 * when increasing it to the n.b. cut-off.
6973 * It can even be slightly faster, because we need
6974 * less checks for the communication setup.
6976 comm->cutoff_mbody = comm->cutoff;
6979 /* Check if we did not end up below our original limit */
6980 comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit);
6982 if (comm->cutoff_mbody > comm->cellsize_limit)
6984 comm->cellsize_limit = comm->cutoff_mbody;
6987 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
6992 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
6993 "cellsize limit %f\n",
6994 comm->bBondComm, comm->cellsize_limit);
6999 check_dd_restrictions(cr, dd, ir, fplog);
7002 comm->partition_step = INT_MIN;
7005 clear_dd_cycle_counts(dd);
7010 static void set_dlb_limits(gmx_domdec_t *dd)
7015 for (d = 0; d < dd->ndim; d++)
7017 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
7018 dd->comm->cellsize_min[dd->dim[d]] =
7019 dd->comm->cellsize_min_dlb[dd->dim[d]];
7024 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
7027 gmx_domdec_comm_t *comm;
7037 fprintf(fplog, "At step %s the performance loss due to force load imbalance is %.1f %%\n", gmx_step_str(step, buf), dd_force_imb_perf_loss(dd)*100);
7040 cellsize_min = comm->cellsize_min[dd->dim[0]];
7041 for (d = 1; d < dd->ndim; d++)
7043 cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
7046 if (cellsize_min < comm->cellsize_limit*1.05)
7048 dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
7050 /* Change DLB from "auto" to "no". */
7051 comm->eDLB = edlbNO;
7056 dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
7057 comm->bDynLoadBal = TRUE;
7058 dd->bGridJump = TRUE;
7062 /* We can set the required cell size info here,
7063 * so we do not need to communicate this.
7064 * The grid is completely uniform.
7066 for (d = 0; d < dd->ndim; d++)
7070 comm->load[d].sum_m = comm->load[d].sum;
7072 nc = dd->nc[dd->dim[d]];
7073 for (i = 0; i < nc; i++)
7075 comm->root[d]->cell_f[i] = i/(real)nc;
7078 comm->root[d]->cell_f_max0[i] = i /(real)nc;
7079 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
7082 comm->root[d]->cell_f[nc] = 1.0;
7087 static char *init_bLocalCG(gmx_mtop_t *mtop)
7092 ncg = ncg_mtop(mtop);
7093 snew(bLocalCG, ncg);
7094 for (cg = 0; cg < ncg; cg++)
7096 bLocalCG[cg] = FALSE;
7102 void dd_init_bondeds(FILE *fplog,
7103 gmx_domdec_t *dd, gmx_mtop_t *mtop,
7105 t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
7107 gmx_domdec_comm_t *comm;
7111 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
7115 if (comm->bBondComm)
7117 /* Communicate atoms beyond the cut-off for bonded interactions */
7120 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
7122 comm->bLocalCG = init_bLocalCG(mtop);
7126 /* Only communicate atoms based on cut-off */
7127 comm->cglink = NULL;
7128 comm->bLocalCG = NULL;
7132 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
7134 gmx_bool bDynLoadBal, real dlb_scale,
7137 gmx_domdec_comm_t *comm;
7152 fprintf(fplog, "The maximum number of communication pulses is:");
7153 for (d = 0; d < dd->ndim; d++)
7155 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
7157 fprintf(fplog, "\n");
7158 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
7159 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
7160 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
7161 for (d = 0; d < DIM; d++)
7165 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
7172 comm->cellsize_min_dlb[d]/
7173 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
7175 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
7178 fprintf(fplog, "\n");
7182 set_dd_cell_sizes_slb(dd, ddbox, FALSE, np);
7183 fprintf(fplog, "The initial number of communication pulses is:");
7184 for (d = 0; d < dd->ndim; d++)
7186 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
7188 fprintf(fplog, "\n");
7189 fprintf(fplog, "The initial domain decomposition cell size is:");
7190 for (d = 0; d < DIM; d++)
7194 fprintf(fplog, " %c %.2f nm",
7195 dim2char(d), dd->comm->cellsize_min[d]);
7198 fprintf(fplog, "\n\n");
7201 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
7203 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
7204 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7205 "non-bonded interactions", "", comm->cutoff);
7209 limit = dd->comm->cellsize_limit;
7213 if (dynamic_dd_box(ddbox, ir))
7215 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
7217 limit = dd->comm->cellsize_min[XX];
7218 for (d = 1; d < DIM; d++)
7220 limit = min(limit, dd->comm->cellsize_min[d]);
7224 if (comm->bInterCGBondeds)
7226 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7227 "two-body bonded interactions", "(-rdd)",
7228 max(comm->cutoff, comm->cutoff_mbody));
7229 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7230 "multi-body bonded interactions", "(-rdd)",
7231 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit));
7235 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7236 "virtual site constructions", "(-rcon)", limit);
7238 if (dd->constraint_comm)
7240 sprintf(buf, "atoms separated by up to %d constraints",
7242 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7243 buf, "(-rcon)", limit);
7245 fprintf(fplog, "\n");
7251 static void set_cell_limits_dlb(gmx_domdec_t *dd,
7253 const t_inputrec *ir,
7254 const gmx_ddbox_t *ddbox)
7256 gmx_domdec_comm_t *comm;
7257 int d, dim, npulse, npulse_d_max, npulse_d;
7262 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7264 /* Determine the maximum number of comm. pulses in one dimension */
7266 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7268 /* Determine the maximum required number of grid pulses */
7269 if (comm->cellsize_limit >= comm->cutoff)
7271 /* Only a single pulse is required */
7274 else if (!bNoCutOff && comm->cellsize_limit > 0)
7276 /* We round down slightly here to avoid overhead due to the latency
7277 * of extra communication calls when the cut-off
7278 * would be only slightly longer than the cell size.
7279 * Later cellsize_limit is redetermined,
7280 * so we can not miss interactions due to this rounding.
7282 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7286 /* There is no cell size limit */
7287 npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7290 if (!bNoCutOff && npulse > 1)
7292 /* See if we can do with less pulses, based on dlb_scale */
7294 for (d = 0; d < dd->ndim; d++)
7297 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7298 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7299 npulse_d_max = max(npulse_d_max, npulse_d);
7301 npulse = min(npulse, npulse_d_max);
7304 /* This env var can override npulse */
7305 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
7312 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7313 for (d = 0; d < dd->ndim; d++)
7315 comm->cd[d].np_dlb = min(npulse, dd->nc[dd->dim[d]]-1);
7316 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7317 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7318 comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb);
7319 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7321 comm->bVacDLBNoLimit = FALSE;
7325 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7326 if (!comm->bVacDLBNoLimit)
7328 comm->cellsize_limit = max(comm->cellsize_limit,
7329 comm->cutoff/comm->maxpulse);
7331 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7332 /* Set the minimum cell size for each DD dimension */
7333 for (d = 0; d < dd->ndim; d++)
7335 if (comm->bVacDLBNoLimit ||
7336 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7338 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7342 comm->cellsize_min_dlb[dd->dim[d]] =
7343 comm->cutoff/comm->cd[d].np_dlb;
7346 if (comm->cutoff_mbody <= 0)
7348 comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit);
7350 if (comm->bDynLoadBal)
7356 gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd, int ePBC)
7358 /* If each molecule is a single charge group
7359 * or we use domain decomposition for each periodic dimension,
7360 * we do not need to take pbc into account for the bonded interactions.
7362 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7365 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7368 void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7369 t_inputrec *ir, gmx_ddbox_t *ddbox)
7371 gmx_domdec_comm_t *comm;
7377 /* Initialize the thread data.
7378 * This can not be done in init_domain_decomposition,
7379 * as the numbers of threads is determined later.
7381 comm->nth = gmx_omp_nthreads_get(emntDomdec);
7384 snew(comm->dth, comm->nth);
7387 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
7389 init_ddpme(dd, &comm->ddpme[0], 0);
7390 if (comm->npmedecompdim >= 2)
7392 init_ddpme(dd, &comm->ddpme[1], 1);
7397 comm->npmenodes = 0;
7398 if (dd->pme_nodeid >= 0)
7400 gmx_fatal_collective(FARGS, NULL, dd,
7401 "Can not have separate PME nodes without PME electrostatics");
7407 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7409 if (comm->eDLB != edlbNO)
7411 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7414 print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
7415 if (comm->eDLB == edlbAUTO)
7419 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7421 print_dd_settings(fplog, dd, ir, TRUE, dlb_scale, ddbox);
7424 if (ir->ePBC == epbcNONE)
7426 vol_frac = 1 - 1/(double)dd->nnodes;
7431 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7435 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7437 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7439 dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
7442 static gmx_bool test_dd_cutoff(t_commrec *cr,
7443 t_state *state, t_inputrec *ir,
7454 set_ddbox(dd, FALSE, cr, ir, state->box,
7455 TRUE, &dd->comm->cgs_gl, state->x, &ddbox);
7459 for (d = 0; d < dd->ndim; d++)
7463 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7464 if (dynamic_dd_box(&ddbox, ir))
7466 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7469 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7471 if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
7472 dd->comm->cd[d].np_dlb > 0)
7474 if (np > dd->comm->cd[d].np_dlb)
7479 /* If a current local cell size is smaller than the requested
7480 * cut-off, we could still fix it, but this gets very complicated.
7481 * Without fixing here, we might actually need more checks.
7483 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7490 if (dd->comm->eDLB != edlbNO)
7492 /* If DLB is not active yet, we don't need to check the grid jumps.
7493 * Actually we shouldn't, because then the grid jump data is not set.
7495 if (dd->comm->bDynLoadBal &&
7496 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7501 gmx_sumi(1, &LocallyLimited, cr);
7503 if (LocallyLimited > 0)
7512 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, t_inputrec *ir,
7515 gmx_bool bCutoffAllowed;
7517 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7521 cr->dd->comm->cutoff = cutoff_req;
7524 return bCutoffAllowed;
7527 void change_dd_dlb_cutoff_limit(t_commrec *cr)
7529 gmx_domdec_comm_t *comm;
7531 comm = cr->dd->comm;
7533 /* Turn on the DLB limiting (might have been on already) */
7534 comm->bPMELoadBalDLBLimits = TRUE;
7536 /* Change the cut-off limit */
7537 comm->PMELoadBal_max_cutoff = comm->cutoff;
7540 static void merge_cg_buffers(int ncell,
7541 gmx_domdec_comm_dim_t *cd, int pulse,
7543 int *index_gl, int *recv_i,
7544 rvec *cg_cm, rvec *recv_vr,
7546 cginfo_mb_t *cginfo_mb, int *cginfo)
7548 gmx_domdec_ind_t *ind, *ind_p;
7549 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7550 int shift, shift_at;
7552 ind = &cd->ind[pulse];
7554 /* First correct the already stored data */
7555 shift = ind->nrecv[ncell];
7556 for (cell = ncell-1; cell >= 0; cell--)
7558 shift -= ind->nrecv[cell];
7561 /* Move the cg's present from previous grid pulses */
7562 cg0 = ncg_cell[ncell+cell];
7563 cg1 = ncg_cell[ncell+cell+1];
7564 cgindex[cg1+shift] = cgindex[cg1];
7565 for (cg = cg1-1; cg >= cg0; cg--)
7567 index_gl[cg+shift] = index_gl[cg];
7568 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7569 cgindex[cg+shift] = cgindex[cg];
7570 cginfo[cg+shift] = cginfo[cg];
7572 /* Correct the already stored send indices for the shift */
7573 for (p = 1; p <= pulse; p++)
7575 ind_p = &cd->ind[p];
7577 for (c = 0; c < cell; c++)
7579 cg0 += ind_p->nsend[c];
7581 cg1 = cg0 + ind_p->nsend[cell];
7582 for (cg = cg0; cg < cg1; cg++)
7584 ind_p->index[cg] += shift;
7590 /* Merge in the communicated buffers */
7594 for (cell = 0; cell < ncell; cell++)
7596 cg1 = ncg_cell[ncell+cell+1] + shift;
7599 /* Correct the old cg indices */
7600 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7602 cgindex[cg+1] += shift_at;
7605 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7607 /* Copy this charge group from the buffer */
7608 index_gl[cg1] = recv_i[cg0];
7609 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7610 /* Add it to the cgindex */
7611 cg_gl = index_gl[cg1];
7612 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7613 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7614 cgindex[cg1+1] = cgindex[cg1] + nat;
7619 shift += ind->nrecv[cell];
7620 ncg_cell[ncell+cell+1] = cg1;
7624 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7625 int nzone, int cg0, const int *cgindex)
7629 /* Store the atom block boundaries for easy copying of communication buffers
7632 for (zone = 0; zone < nzone; zone++)
7634 for (p = 0; p < cd->np; p++)
7636 cd->ind[p].cell2at0[zone] = cgindex[cg];
7637 cg += cd->ind[p].nrecv[zone];
7638 cd->ind[p].cell2at1[zone] = cgindex[cg];
7643 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7649 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7651 if (!bLocalCG[link->a[i]])
7660 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7662 real c[DIM][4]; /* the corners for the non-bonded communication */
7663 real cr0; /* corner for rounding */
7664 real cr1[4]; /* corners for rounding */
7665 real bc[DIM]; /* corners for bounded communication */
7666 real bcr1; /* corner for rounding for bonded communication */
7669 /* Determine the corners of the domain(s) we are communicating with */
7671 set_dd_corners(const gmx_domdec_t *dd,
7672 int dim0, int dim1, int dim2,
7676 const gmx_domdec_comm_t *comm;
7677 const gmx_domdec_zones_t *zones;
7682 zones = &comm->zones;
7684 /* Keep the compiler happy */
7688 /* The first dimension is equal for all cells */
7689 c->c[0][0] = comm->cell_x0[dim0];
7692 c->bc[0] = c->c[0][0];
7697 /* This cell row is only seen from the first row */
7698 c->c[1][0] = comm->cell_x0[dim1];
7699 /* All rows can see this row */
7700 c->c[1][1] = comm->cell_x0[dim1];
7703 c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7706 /* For the multi-body distance we need the maximum */
7707 c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7710 /* Set the upper-right corner for rounding */
7711 c->cr0 = comm->cell_x1[dim0];
7716 for (j = 0; j < 4; j++)
7718 c->c[2][j] = comm->cell_x0[dim2];
7722 /* Use the maximum of the i-cells that see a j-cell */
7723 for (i = 0; i < zones->nizone; i++)
7725 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7731 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7737 /* For the multi-body distance we need the maximum */
7738 c->bc[2] = comm->cell_x0[dim2];
7739 for (i = 0; i < 2; i++)
7741 for (j = 0; j < 2; j++)
7743 c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0);
7749 /* Set the upper-right corner for rounding */
7750 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7751 * Only cell (0,0,0) can see cell 7 (1,1,1)
7753 c->cr1[0] = comm->cell_x1[dim1];
7754 c->cr1[3] = comm->cell_x1[dim1];
7757 c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7760 /* For the multi-body distance we need the maximum */
7761 c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7768 /* Determine which cg's we need to send in this pulse from this zone */
7770 get_zone_pulse_cgs(gmx_domdec_t *dd,
7771 int zonei, int zone,
7773 const int *index_gl,
7775 int dim, int dim_ind,
7776 int dim0, int dim1, int dim2,
7777 real r_comm2, real r_bcomm2,
7781 real skew_fac2_d, real skew_fac_01,
7782 rvec *v_d, rvec *v_0, rvec *v_1,
7783 const dd_corners_t *c,
7785 gmx_bool bDistBonded,
7791 gmx_domdec_ind_t *ind,
7792 int **ibuf, int *ibuf_nalloc,
7798 gmx_domdec_comm_t *comm;
7800 gmx_bool bDistMB_pulse;
7802 real r2, rb2, r, tric_sh;
7805 int nsend_z, nsend, nat;
7809 bScrew = (dd->bScrewPBC && dim == XX);
7811 bDistMB_pulse = (bDistMB && bDistBonded);
7817 for (cg = cg0; cg < cg1; cg++)
7821 if (tric_dist[dim_ind] == 0)
7823 /* Rectangular direction, easy */
7824 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7831 r = cg_cm[cg][dim] - c->bc[dim_ind];
7837 /* Rounding gives at most a 16% reduction
7838 * in communicated atoms
7840 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7842 r = cg_cm[cg][dim0] - c->cr0;
7843 /* This is the first dimension, so always r >= 0 */
7850 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7852 r = cg_cm[cg][dim1] - c->cr1[zone];
7859 r = cg_cm[cg][dim1] - c->bcr1;
7869 /* Triclinic direction, more complicated */
7872 /* Rounding, conservative as the skew_fac multiplication
7873 * will slightly underestimate the distance.
7875 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7877 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7878 for (i = dim0+1; i < DIM; i++)
7880 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7882 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7885 rb[dim0] = rn[dim0];
7888 /* Take care that the cell planes along dim0 might not
7889 * be orthogonal to those along dim1 and dim2.
7891 for (i = 1; i <= dim_ind; i++)
7894 if (normal[dim0][dimd] > 0)
7896 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7899 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7904 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7906 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7908 for (i = dim1+1; i < DIM; i++)
7910 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7912 rn[dim1] += tric_sh;
7915 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7916 /* Take care of coupling of the distances
7917 * to the planes along dim0 and dim1 through dim2.
7919 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7920 /* Take care that the cell planes along dim1
7921 * might not be orthogonal to that along dim2.
7923 if (normal[dim1][dim2] > 0)
7925 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7931 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7934 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7935 /* Take care of coupling of the distances
7936 * to the planes along dim0 and dim1 through dim2.
7938 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7939 /* Take care that the cell planes along dim1
7940 * might not be orthogonal to that along dim2.
7942 if (normal[dim1][dim2] > 0)
7944 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7949 /* The distance along the communication direction */
7950 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
7952 for (i = dim+1; i < DIM; i++)
7954 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7959 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7960 /* Take care of coupling of the distances
7961 * to the planes along dim0 and dim1 through dim2.
7963 if (dim_ind == 1 && zonei == 1)
7965 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
7971 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
7974 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
7975 /* Take care of coupling of the distances
7976 * to the planes along dim0 and dim1 through dim2.
7978 if (dim_ind == 1 && zonei == 1)
7980 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
7988 ((bDistMB && rb2 < r_bcomm2) ||
7989 (bDist2B && r2 < r_bcomm2)) &&
7991 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
7992 missing_link(comm->cglink, index_gl[cg],
7995 /* Make an index to the local charge groups */
7996 if (nsend+1 > ind->nalloc)
7998 ind->nalloc = over_alloc_large(nsend+1);
7999 srenew(ind->index, ind->nalloc);
8001 if (nsend+1 > *ibuf_nalloc)
8003 *ibuf_nalloc = over_alloc_large(nsend+1);
8004 srenew(*ibuf, *ibuf_nalloc);
8006 ind->index[nsend] = cg;
8007 (*ibuf)[nsend] = index_gl[cg];
8009 vec_rvec_check_alloc(vbuf, nsend+1);
8011 if (dd->ci[dim] == 0)
8013 /* Correct cg_cm for pbc */
8014 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
8017 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
8018 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
8023 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
8026 nat += cgindex[cg+1] - cgindex[cg];
8032 *nsend_z_ptr = nsend_z;
8035 static void setup_dd_communication(gmx_domdec_t *dd,
8036 matrix box, gmx_ddbox_t *ddbox,
8037 t_forcerec *fr, t_state *state, rvec **f)
8039 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
8040 int nzone, nzone_send, zone, zonei, cg0, cg1;
8041 int c, i, j, cg, cg_gl, nrcg;
8042 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
8043 gmx_domdec_comm_t *comm;
8044 gmx_domdec_zones_t *zones;
8045 gmx_domdec_comm_dim_t *cd;
8046 gmx_domdec_ind_t *ind;
8047 cginfo_mb_t *cginfo_mb;
8048 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
8049 real r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
8050 dd_corners_t corners;
8052 rvec *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
8053 real skew_fac2_d, skew_fac_01;
8060 fprintf(debug, "Setting up DD communication\n");
8065 switch (fr->cutoff_scheme)
8074 gmx_incons("unimplemented");
8078 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8080 dim = dd->dim[dim_ind];
8082 /* Check if we need to use triclinic distances */
8083 tric_dist[dim_ind] = 0;
8084 for (i = 0; i <= dim_ind; i++)
8086 if (ddbox->tric_dir[dd->dim[i]])
8088 tric_dist[dim_ind] = 1;
8093 bBondComm = comm->bBondComm;
8095 /* Do we need to determine extra distances for multi-body bondeds? */
8096 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8098 /* Do we need to determine extra distances for only two-body bondeds? */
8099 bDist2B = (bBondComm && !bDistMB);
8101 r_comm2 = sqr(comm->cutoff);
8102 r_bcomm2 = sqr(comm->cutoff_mbody);
8106 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, sqrt(r_bcomm2));
8109 zones = &comm->zones;
8112 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8113 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8115 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8117 /* Triclinic stuff */
8118 normal = ddbox->normal;
8122 v_0 = ddbox->v[dim0];
8123 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8125 /* Determine the coupling coefficient for the distances
8126 * to the cell planes along dim0 and dim1 through dim2.
8127 * This is required for correct rounding.
8130 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8133 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8139 v_1 = ddbox->v[dim1];
8142 zone_cg_range = zones->cg_range;
8143 index_gl = dd->index_gl;
8144 cgindex = dd->cgindex;
8145 cginfo_mb = fr->cginfo_mb;
8147 zone_cg_range[0] = 0;
8148 zone_cg_range[1] = dd->ncg_home;
8149 comm->zone_ncg1[0] = dd->ncg_home;
8150 pos_cg = dd->ncg_home;
8152 nat_tot = dd->nat_home;
8154 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8156 dim = dd->dim[dim_ind];
8157 cd = &comm->cd[dim_ind];
8159 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8161 /* No pbc in this dimension, the first node should not comm. */
8169 v_d = ddbox->v[dim];
8170 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
8172 cd->bInPlace = TRUE;
8173 for (p = 0; p < cd->np; p++)
8175 /* Only atoms communicated in the first pulse are used
8176 * for multi-body bonded interactions or for bBondComm.
8178 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8183 for (zone = 0; zone < nzone_send; zone++)
8185 if (tric_dist[dim_ind] && dim_ind > 0)
8187 /* Determine slightly more optimized skew_fac's
8189 * This reduces the number of communicated atoms
8190 * by about 10% for 3D DD of rhombic dodecahedra.
8192 for (dimd = 0; dimd < dim; dimd++)
8194 sf2_round[dimd] = 1;
8195 if (ddbox->tric_dir[dimd])
8197 for (i = dd->dim[dimd]+1; i < DIM; i++)
8199 /* If we are shifted in dimension i
8200 * and the cell plane is tilted forward
8201 * in dimension i, skip this coupling.
8203 if (!(zones->shift[nzone+zone][i] &&
8204 ddbox->v[dimd][i][dimd] >= 0))
8207 sqr(ddbox->v[dimd][i][dimd]);
8210 sf2_round[dimd] = 1/sf2_round[dimd];
8215 zonei = zone_perm[dim_ind][zone];
8218 /* Here we permutate the zones to obtain a convenient order
8219 * for neighbor searching
8221 cg0 = zone_cg_range[zonei];
8222 cg1 = zone_cg_range[zonei+1];
8226 /* Look only at the cg's received in the previous grid pulse
8228 cg1 = zone_cg_range[nzone+zone+1];
8229 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8232 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8233 for (th = 0; th < comm->nth; th++)
8235 gmx_domdec_ind_t *ind_p;
8236 int **ibuf_p, *ibuf_nalloc_p;
8238 int *nsend_p, *nat_p;
8244 /* Thread 0 writes in the comm buffers */
8246 ibuf_p = &comm->buf_int;
8247 ibuf_nalloc_p = &comm->nalloc_int;
8248 vbuf_p = &comm->vbuf;
8251 nsend_zone_p = &ind->nsend[zone];
8255 /* Other threads write into temp buffers */
8256 ind_p = &comm->dth[th].ind;
8257 ibuf_p = &comm->dth[th].ibuf;
8258 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8259 vbuf_p = &comm->dth[th].vbuf;
8260 nsend_p = &comm->dth[th].nsend;
8261 nat_p = &comm->dth[th].nat;
8262 nsend_zone_p = &comm->dth[th].nsend_zone;
8264 comm->dth[th].nsend = 0;
8265 comm->dth[th].nat = 0;
8266 comm->dth[th].nsend_zone = 0;
8276 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8277 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8280 /* Get the cg's for this pulse in this zone */
8281 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8283 dim, dim_ind, dim0, dim1, dim2,
8286 normal, skew_fac2_d, skew_fac_01,
8287 v_d, v_0, v_1, &corners, sf2_round,
8288 bDistBonded, bBondComm,
8292 ibuf_p, ibuf_nalloc_p,
8298 /* Append data of threads>=1 to the communication buffers */
8299 for (th = 1; th < comm->nth; th++)
8301 dd_comm_setup_work_t *dth;
8304 dth = &comm->dth[th];
8306 ns1 = nsend + dth->nsend_zone;
8307 if (ns1 > ind->nalloc)
8309 ind->nalloc = over_alloc_dd(ns1);
8310 srenew(ind->index, ind->nalloc);
8312 if (ns1 > comm->nalloc_int)
8314 comm->nalloc_int = over_alloc_dd(ns1);
8315 srenew(comm->buf_int, comm->nalloc_int);
8317 if (ns1 > comm->vbuf.nalloc)
8319 comm->vbuf.nalloc = over_alloc_dd(ns1);
8320 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8323 for (i = 0; i < dth->nsend_zone; i++)
8325 ind->index[nsend] = dth->ind.index[i];
8326 comm->buf_int[nsend] = dth->ibuf[i];
8327 copy_rvec(dth->vbuf.v[i],
8328 comm->vbuf.v[nsend]);
8332 ind->nsend[zone] += dth->nsend_zone;
8335 /* Clear the counts in case we do not have pbc */
8336 for (zone = nzone_send; zone < nzone; zone++)
8338 ind->nsend[zone] = 0;
8340 ind->nsend[nzone] = nsend;
8341 ind->nsend[nzone+1] = nat;
8342 /* Communicate the number of cg's and atoms to receive */
8343 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8344 ind->nsend, nzone+2,
8345 ind->nrecv, nzone+2);
8347 /* The rvec buffer is also required for atom buffers of size nsend
8348 * in dd_move_x and dd_move_f.
8350 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8354 /* We can receive in place if only the last zone is not empty */
8355 for (zone = 0; zone < nzone-1; zone++)
8357 if (ind->nrecv[zone] > 0)
8359 cd->bInPlace = FALSE;
8364 /* The int buffer is only required here for the cg indices */
8365 if (ind->nrecv[nzone] > comm->nalloc_int2)
8367 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8368 srenew(comm->buf_int2, comm->nalloc_int2);
8370 /* The rvec buffer is also required for atom buffers
8371 * of size nrecv in dd_move_x and dd_move_f.
8373 i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8374 vec_rvec_check_alloc(&comm->vbuf2, i);
8378 /* Make space for the global cg indices */
8379 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8380 || dd->cg_nalloc == 0)
8382 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8383 srenew(index_gl, dd->cg_nalloc);
8384 srenew(cgindex, dd->cg_nalloc+1);
8386 /* Communicate the global cg indices */
8389 recv_i = index_gl + pos_cg;
8393 recv_i = comm->buf_int2;
8395 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8396 comm->buf_int, nsend,
8397 recv_i, ind->nrecv[nzone]);
8399 /* Make space for cg_cm */
8400 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8401 if (fr->cutoff_scheme == ecutsGROUP)
8409 /* Communicate cg_cm */
8412 recv_vr = cg_cm + pos_cg;
8416 recv_vr = comm->vbuf2.v;
8418 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8419 comm->vbuf.v, nsend,
8420 recv_vr, ind->nrecv[nzone]);
8422 /* Make the charge group index */
8425 zone = (p == 0 ? 0 : nzone - 1);
8426 while (zone < nzone)
8428 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8430 cg_gl = index_gl[pos_cg];
8431 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8432 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8433 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8436 /* Update the charge group presence,
8437 * so we can use it in the next pass of the loop.
8439 comm->bLocalCG[cg_gl] = TRUE;
8445 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8448 zone_cg_range[nzone+zone] = pos_cg;
8453 /* This part of the code is never executed with bBondComm. */
8454 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8455 index_gl, recv_i, cg_cm, recv_vr,
8456 cgindex, fr->cginfo_mb, fr->cginfo);
8457 pos_cg += ind->nrecv[nzone];
8459 nat_tot += ind->nrecv[nzone+1];
8463 /* Store the atom block for easy copying of communication buffers */
8464 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8468 dd->index_gl = index_gl;
8469 dd->cgindex = cgindex;
8471 dd->ncg_tot = zone_cg_range[zones->n];
8472 dd->nat_tot = nat_tot;
8473 comm->nat[ddnatHOME] = dd->nat_home;
8474 for (i = ddnatZONE; i < ddnatNR; i++)
8476 comm->nat[i] = dd->nat_tot;
8481 /* We don't need to update cginfo, since that was alrady done above.
8482 * So we pass NULL for the forcerec.
8484 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8485 NULL, comm->bLocalCG);
8490 fprintf(debug, "Finished setting up DD communication, zones:");
8491 for (c = 0; c < zones->n; c++)
8493 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8495 fprintf(debug, "\n");
8499 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8503 for (c = 0; c < zones->nizone; c++)
8505 zones->izone[c].cg1 = zones->cg_range[c+1];
8506 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8507 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8511 static void set_zones_size(gmx_domdec_t *dd,
8512 matrix box, const gmx_ddbox_t *ddbox,
8513 int zone_start, int zone_end)
8515 gmx_domdec_comm_t *comm;
8516 gmx_domdec_zones_t *zones;
8518 int z, zi, zj0, zj1, d, dim;
8521 real size_j, add_tric;
8526 zones = &comm->zones;
8528 /* Do we need to determine extra distances for multi-body bondeds? */
8529 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8531 for (z = zone_start; z < zone_end; z++)
8533 /* Copy cell limits to zone limits.
8534 * Valid for non-DD dims and non-shifted dims.
8536 copy_rvec(comm->cell_x0, zones->size[z].x0);
8537 copy_rvec(comm->cell_x1, zones->size[z].x1);
8540 for (d = 0; d < dd->ndim; d++)
8544 for (z = 0; z < zones->n; z++)
8546 /* With a staggered grid we have different sizes
8547 * for non-shifted dimensions.
8549 if (dd->bGridJump && zones->shift[z][dim] == 0)
8553 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8554 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8558 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8559 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8565 rcmbs = comm->cutoff_mbody;
8566 if (ddbox->tric_dir[dim])
8568 rcs /= ddbox->skew_fac[dim];
8569 rcmbs /= ddbox->skew_fac[dim];
8572 /* Set the lower limit for the shifted zone dimensions */
8573 for (z = zone_start; z < zone_end; z++)
8575 if (zones->shift[z][dim] > 0)
8578 if (!dd->bGridJump || d == 0)
8580 zones->size[z].x0[dim] = comm->cell_x1[dim];
8581 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8585 /* Here we take the lower limit of the zone from
8586 * the lowest domain of the zone below.
8590 zones->size[z].x0[dim] =
8591 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8597 zones->size[z].x0[dim] =
8598 zones->size[zone_perm[2][z-4]].x0[dim];
8602 zones->size[z].x0[dim] =
8603 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8606 /* A temporary limit, is updated below */
8607 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8611 for (zi = 0; zi < zones->nizone; zi++)
8613 if (zones->shift[zi][dim] == 0)
8615 /* This takes the whole zone into account.
8616 * With multiple pulses this will lead
8617 * to a larger zone then strictly necessary.
8619 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8620 zones->size[zi].x1[dim]+rcmbs);
8628 /* Loop over the i-zones to set the upper limit of each
8631 for (zi = 0; zi < zones->nizone; zi++)
8633 if (zones->shift[zi][dim] == 0)
8635 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8637 if (zones->shift[z][dim] > 0)
8639 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8640 zones->size[zi].x1[dim]+rcs);
8647 for (z = zone_start; z < zone_end; z++)
8649 /* Initialization only required to keep the compiler happy */
8650 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8653 /* To determine the bounding box for a zone we need to find
8654 * the extreme corners of 4, 2 or 1 corners.
8656 nc = 1 << (ddbox->npbcdim - 1);
8658 for (c = 0; c < nc; c++)
8660 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8664 corner[YY] = zones->size[z].x0[YY];
8668 corner[YY] = zones->size[z].x1[YY];
8672 corner[ZZ] = zones->size[z].x0[ZZ];
8676 corner[ZZ] = zones->size[z].x1[ZZ];
8678 if (dd->ndim == 1 && box[ZZ][YY] != 0)
8680 /* With 1D domain decomposition the cg's are not in
8681 * the triclinic box, but triclinic x-y and rectangular y-z.
8682 * Shift y back, so it will later end up at 0.
8684 corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
8686 /* Apply the triclinic couplings */
8687 for (i = YY; i < ddbox->npbcdim; i++)
8689 for (j = XX; j < i; j++)
8691 corner[j] += corner[i]*box[i][j]/box[i][i];
8696 copy_rvec(corner, corner_min);
8697 copy_rvec(corner, corner_max);
8701 for (i = 0; i < DIM; i++)
8703 corner_min[i] = min(corner_min[i], corner[i]);
8704 corner_max[i] = max(corner_max[i], corner[i]);
8708 /* Copy the extreme cornes without offset along x */
8709 for (i = 0; i < DIM; i++)
8711 zones->size[z].bb_x0[i] = corner_min[i];
8712 zones->size[z].bb_x1[i] = corner_max[i];
8714 /* Add the offset along x */
8715 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8716 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8719 if (zone_start == 0)
8722 for (dim = 0; dim < DIM; dim++)
8724 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8726 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
8731 for (z = zone_start; z < zone_end; z++)
8733 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8735 zones->size[z].x0[XX], zones->size[z].x1[XX],
8736 zones->size[z].x0[YY], zones->size[z].x1[YY],
8737 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8738 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8740 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8741 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8742 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8747 static int comp_cgsort(const void *a, const void *b)
8751 gmx_cgsort_t *cga, *cgb;
8752 cga = (gmx_cgsort_t *)a;
8753 cgb = (gmx_cgsort_t *)b;
8755 comp = cga->nsc - cgb->nsc;
8758 comp = cga->ind_gl - cgb->ind_gl;
8764 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8769 /* Order the data */
8770 for (i = 0; i < n; i++)
8772 buf[i] = a[sort[i].ind];
8775 /* Copy back to the original array */
8776 for (i = 0; i < n; i++)
8782 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8787 /* Order the data */
8788 for (i = 0; i < n; i++)
8790 copy_rvec(v[sort[i].ind], buf[i]);
8793 /* Copy back to the original array */
8794 for (i = 0; i < n; i++)
8796 copy_rvec(buf[i], v[i]);
8800 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8803 int a, atot, cg, cg0, cg1, i;
8805 if (cgindex == NULL)
8807 /* Avoid the useless loop of the atoms within a cg */
8808 order_vec_cg(ncg, sort, v, buf);
8813 /* Order the data */
8815 for (cg = 0; cg < ncg; cg++)
8817 cg0 = cgindex[sort[cg].ind];
8818 cg1 = cgindex[sort[cg].ind+1];
8819 for (i = cg0; i < cg1; i++)
8821 copy_rvec(v[i], buf[a]);
8827 /* Copy back to the original array */
8828 for (a = 0; a < atot; a++)
8830 copy_rvec(buf[a], v[a]);
8834 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8835 int nsort_new, gmx_cgsort_t *sort_new,
8836 gmx_cgsort_t *sort1)
8840 /* The new indices are not very ordered, so we qsort them */
8841 gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8843 /* sort2 is already ordered, so now we can merge the two arrays */
8847 while (i2 < nsort2 || i_new < nsort_new)
8851 sort1[i1++] = sort_new[i_new++];
8853 else if (i_new == nsort_new)
8855 sort1[i1++] = sort2[i2++];
8857 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8858 (sort2[i2].nsc == sort_new[i_new].nsc &&
8859 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8861 sort1[i1++] = sort2[i2++];
8865 sort1[i1++] = sort_new[i_new++];
8870 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8872 gmx_domdec_sort_t *sort;
8873 gmx_cgsort_t *cgsort, *sort_i;
8874 int ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
8875 int sort_last, sort_skip;
8877 sort = dd->comm->sort;
8879 a = fr->ns.grid->cell_index;
8881 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns.grid->ncells;
8883 if (ncg_home_old >= 0)
8885 /* The charge groups that remained in the same ns grid cell
8886 * are completely ordered. So we can sort efficiently by sorting
8887 * the charge groups that did move into the stationary list.
8892 for (i = 0; i < dd->ncg_home; i++)
8894 /* Check if this cg did not move to another node */
8897 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8899 /* This cg is new on this node or moved ns grid cell */
8900 if (nsort_new >= sort->sort_new_nalloc)
8902 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8903 srenew(sort->sort_new, sort->sort_new_nalloc);
8905 sort_i = &(sort->sort_new[nsort_new++]);
8909 /* This cg did not move */
8910 sort_i = &(sort->sort2[nsort2++]);
8912 /* Sort on the ns grid cell indices
8913 * and the global topology index.
8914 * index_gl is irrelevant with cell ns,
8915 * but we set it here anyhow to avoid a conditional.
8918 sort_i->ind_gl = dd->index_gl[i];
8925 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8928 /* Sort efficiently */
8929 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8934 cgsort = sort->sort;
8936 for (i = 0; i < dd->ncg_home; i++)
8938 /* Sort on the ns grid cell indices
8939 * and the global topology index
8941 cgsort[i].nsc = a[i];
8942 cgsort[i].ind_gl = dd->index_gl[i];
8944 if (cgsort[i].nsc < moved)
8951 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
8953 /* Determine the order of the charge groups using qsort */
8954 gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
8960 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
8963 int ncg_new, i, *a, na;
8965 sort = dd->comm->sort->sort;
8967 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
8970 for (i = 0; i < na; i++)
8974 sort[ncg_new].ind = a[i];
8982 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
8985 gmx_domdec_sort_t *sort;
8986 gmx_cgsort_t *cgsort, *sort_i;
8988 int ncg_new, i, *ibuf, cgsize;
8991 sort = dd->comm->sort;
8993 if (dd->ncg_home > sort->sort_nalloc)
8995 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
8996 srenew(sort->sort, sort->sort_nalloc);
8997 srenew(sort->sort2, sort->sort_nalloc);
8999 cgsort = sort->sort;
9001 switch (fr->cutoff_scheme)
9004 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
9007 ncg_new = dd_sort_order_nbnxn(dd, fr);
9010 gmx_incons("unimplemented");
9014 /* We alloc with the old size, since cgindex is still old */
9015 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9016 vbuf = dd->comm->vbuf.v;
9020 cgindex = dd->cgindex;
9027 /* Remove the charge groups which are no longer at home here */
9028 dd->ncg_home = ncg_new;
9031 fprintf(debug, "Set the new home charge group count to %d\n",
9035 /* Reorder the state */
9036 for (i = 0; i < estNR; i++)
9038 if (EST_DISTR(i) && (state->flags & (1<<i)))
9043 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
9046 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
9049 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->sd_X, vbuf);
9052 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
9056 case estDISRE_INITF:
9057 case estDISRE_RM3TAV:
9058 case estORIRE_INITF:
9060 /* No ordering required */
9063 gmx_incons("Unknown state entry encountered in dd_sort_state");
9068 if (fr->cutoff_scheme == ecutsGROUP)
9071 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9074 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9076 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9077 srenew(sort->ibuf, sort->ibuf_nalloc);
9080 /* Reorder the global cg index */
9081 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9082 /* Reorder the cginfo */
9083 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9084 /* Rebuild the local cg index */
9088 for (i = 0; i < dd->ncg_home; i++)
9090 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9091 ibuf[i+1] = ibuf[i] + cgsize;
9093 for (i = 0; i < dd->ncg_home+1; i++)
9095 dd->cgindex[i] = ibuf[i];
9100 for (i = 0; i < dd->ncg_home+1; i++)
9105 /* Set the home atom number */
9106 dd->nat_home = dd->cgindex[dd->ncg_home];
9108 if (fr->cutoff_scheme == ecutsVERLET)
9110 /* The atoms are now exactly in grid order, update the grid order */
9111 nbnxn_set_atomorder(fr->nbv->nbs);
9115 /* Copy the sorted ns cell indices back to the ns grid struct */
9116 for (i = 0; i < dd->ncg_home; i++)
9118 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
9120 fr->ns.grid->nr = dd->ncg_home;
9124 static void add_dd_statistics(gmx_domdec_t *dd)
9126 gmx_domdec_comm_t *comm;
9131 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9133 comm->sum_nat[ddnat-ddnatZONE] +=
9134 comm->nat[ddnat] - comm->nat[ddnat-1];
9139 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9141 gmx_domdec_comm_t *comm;
9146 /* Reset all the statistics and counters for total run counting */
9147 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9149 comm->sum_nat[ddnat-ddnatZONE] = 0;
9153 comm->load_step = 0;
9156 clear_ivec(comm->load_lim);
9161 void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
9163 gmx_domdec_comm_t *comm;
9167 comm = cr->dd->comm;
9169 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9176 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9178 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9180 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9185 " av. #atoms communicated per step for force: %d x %.1f\n",
9189 if (cr->dd->vsite_comm)
9192 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9193 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9198 if (cr->dd->constraint_comm)
9201 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9202 1 + ir->nLincsIter, av);
9206 gmx_incons(" Unknown type for DD statistics");
9209 fprintf(fplog, "\n");
9211 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9213 print_dd_load_av(fplog, cr->dd);
9217 void dd_partition_system(FILE *fplog,
9220 gmx_bool bMasterState,
9222 t_state *state_global,
9223 gmx_mtop_t *top_global,
9225 t_state *state_local,
9228 gmx_localtop_t *top_local,
9231 gmx_shellfc_t shellfc,
9232 gmx_constr_t constr,
9234 gmx_wallcycle_t wcycle,
9238 gmx_domdec_comm_t *comm;
9239 gmx_ddbox_t ddbox = {0};
9241 gmx_int64_t step_pcoupl;
9242 rvec cell_ns_x0, cell_ns_x1;
9243 int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9244 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
9245 gmx_bool bRedist, bSortCG, bResortAll;
9246 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9253 bBoxChanged = (bMasterState || DEFORM(*ir));
9254 if (ir->epc != epcNO)
9256 /* With nstpcouple > 1 pressure coupling happens.
9257 * one step after calculating the pressure.
9258 * Box scaling happens at the end of the MD step,
9259 * after the DD partitioning.
9260 * We therefore have to do DLB in the first partitioning
9261 * after an MD step where P-coupling occured.
9262 * We need to determine the last step in which p-coupling occurred.
9263 * MRS -- need to validate this for vv?
9268 step_pcoupl = step - 1;
9272 step_pcoupl = ((step - 1)/n)*n + 1;
9274 if (step_pcoupl >= comm->partition_step)
9280 bNStGlobalComm = (step % nstglobalcomm == 0);
9282 if (!comm->bDynLoadBal)
9288 /* Should we do dynamic load balacing this step?
9289 * Since it requires (possibly expensive) global communication,
9290 * we might want to do DLB less frequently.
9292 if (bBoxChanged || ir->epc != epcNO)
9294 bDoDLB = bBoxChanged;
9298 bDoDLB = bNStGlobalComm;
9302 /* Check if we have recorded loads on the nodes */
9303 if (comm->bRecordLoad && dd_load_count(comm))
9305 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
9307 /* Check if we should use DLB at the second partitioning
9308 * and every 100 partitionings,
9309 * so the extra communication cost is negligible.
9311 n = max(100, nstglobalcomm);
9312 bCheckDLB = (comm->n_load_collect == 0 ||
9313 comm->n_load_have % n == n-1);
9320 /* Print load every nstlog, first and last step to the log file */
9321 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9322 comm->n_load_collect == 0 ||
9324 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9326 /* Avoid extra communication due to verbose screen output
9327 * when nstglobalcomm is set.
9329 if (bDoDLB || bLogLoad || bCheckDLB ||
9330 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9332 get_load_distribution(dd, wcycle);
9337 dd_print_load(fplog, dd, step-1);
9341 dd_print_load_verbose(dd);
9344 comm->n_load_collect++;
9348 /* Since the timings are node dependent, the master decides */
9352 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS);
9355 fprintf(debug, "step %s, imb loss %f\n",
9356 gmx_step_str(step, sbuf),
9357 dd_force_imb_perf_loss(dd));
9360 dd_bcast(dd, sizeof(bTurnOnDLB), &bTurnOnDLB);
9363 turn_on_dlb(fplog, cr, step);
9368 comm->n_load_have++;
9371 cgs_gl = &comm->cgs_gl;
9376 /* Clear the old state */
9377 clear_dd_indices(dd, 0, 0);
9380 set_ddbox(dd, bMasterState, cr, ir, state_global->box,
9381 TRUE, cgs_gl, state_global->x, &ddbox);
9383 get_cg_distribution(fplog, step, dd, cgs_gl,
9384 state_global->box, &ddbox, state_global->x);
9386 dd_distribute_state(dd, cgs_gl,
9387 state_global, state_local, f);
9389 dd_make_local_cgs(dd, &top_local->cgs);
9391 /* Ensure that we have space for the new distribution */
9392 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9394 if (fr->cutoff_scheme == ecutsGROUP)
9396 calc_cgcm(fplog, 0, dd->ncg_home,
9397 &top_local->cgs, state_local->x, fr->cg_cm);
9400 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9402 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9404 else if (state_local->ddp_count != dd->ddp_count)
9406 if (state_local->ddp_count > dd->ddp_count)
9408 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9411 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9413 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9416 /* Clear the old state */
9417 clear_dd_indices(dd, 0, 0);
9419 /* Build the new indices */
9420 rebuild_cgindex(dd, cgs_gl->index, state_local);
9421 make_dd_indices(dd, cgs_gl->index, 0);
9422 ncgindex_set = dd->ncg_home;
9424 if (fr->cutoff_scheme == ecutsGROUP)
9426 /* Redetermine the cg COMs */
9427 calc_cgcm(fplog, 0, dd->ncg_home,
9428 &top_local->cgs, state_local->x, fr->cg_cm);
9431 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9433 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9435 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9436 TRUE, &top_local->cgs, state_local->x, &ddbox);
9438 bRedist = comm->bDynLoadBal;
9442 /* We have the full state, only redistribute the cgs */
9444 /* Clear the non-home indices */
9445 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9448 /* Avoid global communication for dim's without pbc and -gcom */
9449 if (!bNStGlobalComm)
9451 copy_rvec(comm->box0, ddbox.box0 );
9452 copy_rvec(comm->box_size, ddbox.box_size);
9454 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9455 bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
9460 /* For dim's without pbc and -gcom */
9461 copy_rvec(ddbox.box0, comm->box0 );
9462 copy_rvec(ddbox.box_size, comm->box_size);
9464 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9467 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9469 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9472 /* Check if we should sort the charge groups */
9473 if (comm->nstSortCG > 0)
9475 bSortCG = (bMasterState ||
9476 (bRedist && (step % comm->nstSortCG == 0)));
9483 ncg_home_old = dd->ncg_home;
9488 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9490 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9492 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9494 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9497 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9499 &comm->cell_x0, &comm->cell_x1,
9500 dd->ncg_home, fr->cg_cm,
9501 cell_ns_x0, cell_ns_x1, &grid_density);
9505 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9508 switch (fr->cutoff_scheme)
9511 copy_ivec(fr->ns.grid->n, ncells_old);
9512 grid_first(fplog, fr->ns.grid, dd, &ddbox,
9513 state_local->box, cell_ns_x0, cell_ns_x1,
9514 fr->rlistlong, grid_density);
9517 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9520 gmx_incons("unimplemented");
9522 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9523 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9527 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9529 /* Sort the state on charge group position.
9530 * This enables exact restarts from this step.
9531 * It also improves performance by about 15% with larger numbers
9532 * of atoms per node.
9535 /* Fill the ns grid with the home cell,
9536 * so we can sort with the indices.
9538 set_zones_ncg_home(dd);
9540 switch (fr->cutoff_scheme)
9543 set_zones_size(dd, state_local->box, &ddbox, 0, 1);
9545 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9547 comm->zones.size[0].bb_x0,
9548 comm->zones.size[0].bb_x1,
9550 comm->zones.dens_zone0,
9553 ncg_moved, bRedist ? comm->moved : NULL,
9554 fr->nbv->grp[eintLocal].kernel_type,
9555 fr->nbv->grp[eintLocal].nbat);
9557 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9560 fill_grid(&comm->zones, fr->ns.grid, dd->ncg_home,
9561 0, dd->ncg_home, fr->cg_cm);
9563 copy_ivec(fr->ns.grid->n, ncells_new);
9566 gmx_incons("unimplemented");
9569 bResortAll = bMasterState;
9571 /* Check if we can user the old order and ns grid cell indices
9572 * of the charge groups to sort the charge groups efficiently.
9574 if (ncells_new[XX] != ncells_old[XX] ||
9575 ncells_new[YY] != ncells_old[YY] ||
9576 ncells_new[ZZ] != ncells_old[ZZ])
9583 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9584 gmx_step_str(step, sbuf), dd->ncg_home);
9586 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9587 bResortAll ? -1 : ncg_home_old);
9588 /* Rebuild all the indices */
9589 ga2la_clear(dd->ga2la);
9592 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9595 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9597 /* Setup up the communication and communicate the coordinates */
9598 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9600 /* Set the indices */
9601 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9603 /* Set the charge group boundaries for neighbor searching */
9604 set_cg_boundaries(&comm->zones);
9606 if (fr->cutoff_scheme == ecutsVERLET)
9608 set_zones_size(dd, state_local->box, &ddbox,
9609 bSortCG ? 1 : 0, comm->zones.n);
9612 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9615 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9616 -1,state_local->x,state_local->box);
9619 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9621 /* Extract a local topology from the global topology */
9622 for (i = 0; i < dd->ndim; i++)
9624 np[dd->dim[i]] = comm->cd[i].np;
9626 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9627 comm->cellsize_min, np,
9629 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
9630 vsite, top_global, top_local);
9632 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9634 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9636 /* Set up the special atom communication */
9637 n = comm->nat[ddnatZONE];
9638 for (i = ddnatZONE+1; i < ddnatNR; i++)
9643 if (vsite && vsite->n_intercg_vsite)
9645 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9649 if (dd->bInterCGcons || dd->bInterCGsettles)
9651 /* Only for inter-cg constraints we need special code */
9652 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9653 constr, ir->nProjOrder,
9654 top_local->idef.il);
9658 gmx_incons("Unknown special atom type setup");
9663 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9665 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9667 /* Make space for the extra coordinates for virtual site
9668 * or constraint communication.
9670 state_local->natoms = comm->nat[ddnatNR-1];
9671 if (state_local->natoms > state_local->nalloc)
9673 dd_realloc_state(state_local, f, state_local->natoms);
9676 if (fr->bF_NoVirSum)
9678 if (vsite && vsite->n_intercg_vsite)
9680 nat_f_novirsum = comm->nat[ddnatVSITE];
9684 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9686 nat_f_novirsum = dd->nat_tot;
9690 nat_f_novirsum = dd->nat_home;
9699 /* Set the number of atoms required for the force calculation.
9700 * Forces need to be constrained when using a twin-range setup
9701 * or with energy minimization. For simple simulations we could
9702 * avoid some allocation, zeroing and copying, but this is
9703 * probably not worth the complications ande checking.
9705 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9706 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9708 /* We make the all mdatoms up to nat_tot_con.
9709 * We could save some work by only setting invmass
9710 * between nat_tot and nat_tot_con.
9712 /* This call also sets the new number of home particles to dd->nat_home */
9713 atoms2md(top_global, ir,
9714 comm->nat[ddnatCON], dd->gatindex, dd->nat_home, mdatoms);
9716 /* Now we have the charges we can sort the FE interactions */
9717 dd_sort_local_top(dd, mdatoms, top_local);
9721 /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
9722 split_vsites_over_threads(top_local->idef.il, mdatoms, FALSE, vsite);
9727 /* Make the local shell stuff, currently no communication is done */
9728 make_local_shells(cr, mdatoms, shellfc);
9731 if (ir->implicit_solvent)
9733 make_local_gb(cr, fr->born, ir->gb_algorithm);
9736 setup_bonded_threading(fr, &top_local->idef);
9738 if (!(cr->duty & DUTY_PME))
9740 /* Send the charges and/or c6/sigmas to our PME only node */
9741 gmx_pme_send_parameters(cr, mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
9742 mdatoms->chargeA, mdatoms->chargeB,
9743 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
9744 mdatoms->sigmaA, mdatoms->sigmaB,
9745 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9750 set_constraints(constr, top_local, ir, mdatoms, cr);
9753 if (ir->ePull != epullNO)
9755 /* Update the local pull groups */
9756 dd_make_local_pull_groups(dd, ir->pull, mdatoms);
9761 /* Update the local rotation groups */
9762 dd_make_local_rotation_groups(dd, ir->rot);
9765 if (ir->eSwapCoords != eswapNO)
9767 /* Update the local groups needed for ion swapping */
9768 dd_make_local_swap_groups(dd, ir->swap);
9771 add_dd_statistics(dd);
9773 /* Make sure we only count the cycles for this DD partitioning */
9774 clear_dd_cycle_counts(dd);
9776 /* Because the order of the atoms might have changed since
9777 * the last vsite construction, we need to communicate the constructing
9778 * atom coordinates again (for spreading the forces this MD step).
9780 dd_move_x_vsites(dd, state_local->box, state_local->x);
9782 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9784 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9786 dd_move_x(dd, state_local->box, state_local->x);
9787 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9788 -1, state_local->x, state_local->box);
9791 /* Store the partitioning step */
9792 comm->partition_step = step;
9794 /* Increase the DD partitioning counter */
9796 /* The state currently matches this DD partitioning count, store it */
9797 state_local->ddp_count = dd->ddp_count;
9800 /* The DD master node knows the complete cg distribution,
9801 * store the count so we can possibly skip the cg info communication.
9803 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9806 if (comm->DD_debug > 0)
9808 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9809 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9810 "after partitioning");