2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
49 #include "gmx_fatal.h"
50 #include "gmx_fatal_collective.h"
53 #include "domdec_network.h"
56 #include "chargegroup.h"
65 #include "mtop_util.h"
66 #include "gmx_ga2la.h"
68 #include "nbnxn_search.h"
70 #include "gmx_omp_nthreads.h"
71 #include "gpu_utils.h"
73 #include "gromacs/fileio/futil.h"
74 #include "gromacs/fileio/gmxfio.h"
75 #include "gromacs/fileio/pdbio.h"
76 #include "gromacs/timing/wallcycle.h"
77 #include "gromacs/utility/gmxmpi.h"
78 #include "gromacs/swap/swapcoords.h"
79 #include "gromacs/utility/qsort_threadsafe.h"
80 #include "gromacs/pulling/pull.h"
81 #include "gromacs/pulling/pull_rotation.h"
83 #define DDRANK(dd, rank) (rank)
84 #define DDMASTERRANK(dd) (dd->masterrank)
86 typedef struct gmx_domdec_master
88 /* The cell boundaries */
90 /* The global charge group division */
91 int *ncg; /* Number of home charge groups for each node */
92 int *index; /* Index of nnodes+1 into cg */
93 int *cg; /* Global charge group index */
94 int *nat; /* Number of home atoms for each node. */
95 int *ibuf; /* Buffer for communication */
96 rvec *vbuf; /* Buffer for state scattering and gathering */
97 } gmx_domdec_master_t;
101 /* The numbers of charge groups to send and receive for each cell
102 * that requires communication, the last entry contains the total
103 * number of atoms that needs to be communicated.
105 int nsend[DD_MAXIZONE+2];
106 int nrecv[DD_MAXIZONE+2];
107 /* The charge groups to send */
110 /* The atom range for non-in-place communication */
111 int cell2at0[DD_MAXIZONE];
112 int cell2at1[DD_MAXIZONE];
117 int np; /* Number of grid pulses in this dimension */
118 int np_dlb; /* For dlb, for use with edlbAUTO */
119 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
121 gmx_bool bInPlace; /* Can we communicate in place? */
122 } gmx_domdec_comm_dim_t;
126 gmx_bool *bCellMin; /* Temp. var.: is this cell size at the limit */
127 real *cell_f; /* State var.: cell boundaries, box relative */
128 real *old_cell_f; /* Temp. var.: old cell size */
129 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
130 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
131 real *bound_min; /* Temp. var.: lower limit for cell boundary */
132 real *bound_max; /* Temp. var.: upper limit for cell boundary */
133 gmx_bool bLimited; /* State var.: is DLB limited in this dim and row */
134 real *buf_ncd; /* Temp. var. */
137 #define DD_NLOAD_MAX 9
139 /* Here floats are accurate enough, since these variables
140 * only influence the load balancing, not the actual MD results.
167 gmx_cgsort_t *sort_new;
179 /* This enum determines the order of the coordinates.
180 * ddnatHOME and ddnatZONE should be first and second,
181 * the others can be ordered as wanted.
184 ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR
188 edlbAUTO, edlbNO, edlbYES, edlbNR
190 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
194 int dim; /* The dimension */
195 gmx_bool dim_match; /* Tells if DD and PME dims match */
196 int nslab; /* The number of PME slabs in this dimension */
197 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
198 int *pp_min; /* The minimum pp node location, size nslab */
199 int *pp_max; /* The maximum pp node location,size nslab */
200 int maxshift; /* The maximum shift for coordinate redistribution in PME */
205 real min0; /* The minimum bottom of this zone */
206 real max1; /* The maximum top of this zone */
207 real min1; /* The minimum top of this zone */
208 real mch0; /* The maximum bottom communicaton height for this zone */
209 real mch1; /* The maximum top communicaton height for this zone */
210 real p1_0; /* The bottom value of the first cell in this zone */
211 real p1_1; /* The top value of the first cell in this zone */
216 gmx_domdec_ind_t ind;
223 } dd_comm_setup_work_t;
225 typedef struct gmx_domdec_comm
227 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
228 * unless stated otherwise.
231 /* The number of decomposition dimensions for PME, 0: no PME */
233 /* The number of nodes doing PME (PP/PME or only PME) */
237 /* The communication setup including the PME only nodes */
238 gmx_bool bCartesianPP_PME;
241 int *pmenodes; /* size npmenodes */
242 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
243 * but with bCartesianPP_PME */
244 gmx_ddpme_t ddpme[2];
246 /* The DD particle-particle nodes only */
247 gmx_bool bCartesianPP;
248 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
250 /* The global charge groups */
253 /* Should we sort the cgs */
255 gmx_domdec_sort_t *sort;
257 /* Are there charge groups? */
260 /* Are there bonded and multi-body interactions between charge groups? */
261 gmx_bool bInterCGBondeds;
262 gmx_bool bInterCGMultiBody;
264 /* Data for the optional bonded interaction atom communication range */
271 /* Are we actually using DLB? */
272 gmx_bool bDynLoadBal;
274 /* Cell sizes for static load balancing, first index cartesian */
277 /* The width of the communicated boundaries */
280 /* The minimum cell size (including triclinic correction) */
282 /* For dlb, for use with edlbAUTO */
283 rvec cellsize_min_dlb;
284 /* The lower limit for the DD cell size with DLB */
286 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
287 gmx_bool bVacDLBNoLimit;
289 /* With PME load balancing we set limits on DLB */
290 gmx_bool bPMELoadBalDLBLimits;
291 /* DLB needs to take into account that we want to allow this maximum
292 * cut-off (for PME load balancing), this could limit cell boundaries.
294 real PMELoadBal_max_cutoff;
296 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
298 /* box0 and box_size are required with dim's without pbc and -gcom */
302 /* The cell boundaries */
306 /* The old location of the cell boundaries, to check cg displacements */
310 /* The communication setup and charge group boundaries for the zones */
311 gmx_domdec_zones_t zones;
313 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
314 * cell boundaries of neighboring cells for dynamic load balancing.
316 gmx_ddzone_t zone_d1[2];
317 gmx_ddzone_t zone_d2[2][2];
319 /* The coordinate/force communication setup and indices */
320 gmx_domdec_comm_dim_t cd[DIM];
321 /* The maximum number of cells to communicate with in one dimension */
324 /* Which cg distribution is stored on the master node */
325 int master_cg_ddp_count;
327 /* The number of cg's received from the direct neighbors */
328 int zone_ncg1[DD_MAXZONE];
330 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
333 /* Array for signalling if atoms have moved to another domain */
337 /* Communication buffer for general use */
341 /* Communication buffer for general use */
344 /* Temporary storage for thread parallel communication setup */
346 dd_comm_setup_work_t *dth;
348 /* Communication buffers only used with multiple grid pulses */
353 /* Communication buffers for local redistribution */
355 int cggl_flag_nalloc[DIM*2];
357 int cgcm_state_nalloc[DIM*2];
359 /* Cell sizes for dynamic load balancing */
360 gmx_domdec_root_t **root;
364 real cell_f_max0[DIM];
365 real cell_f_min1[DIM];
367 /* Stuff for load communication */
368 gmx_bool bRecordLoad;
369 gmx_domdec_load_t *load;
370 int nrank_gpu_shared;
372 MPI_Comm *mpi_comm_load;
373 MPI_Comm mpi_comm_gpu_shared;
376 /* Maximum DLB scaling per load balancing step in percent */
380 float cycl[ddCyclNr];
381 int cycl_n[ddCyclNr];
382 float cycl_max[ddCyclNr];
383 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
387 /* Have often have did we have load measurements */
389 /* Have often have we collected the load measurements */
393 double sum_nat[ddnatNR-ddnatZONE];
403 /* The last partition step */
404 gmx_int64_t partition_step;
412 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
415 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
416 #define DD_FLAG_NRCG 65535
417 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
418 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
420 /* Zone permutation required to obtain consecutive charge groups
421 * for neighbor searching.
423 static const int zone_perm[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
425 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
426 * components see only j zones with that component 0.
429 /* The DD zone order */
430 static const ivec dd_zo[DD_MAXZONE] =
431 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
436 static const ivec dd_zp3[dd_zp3n] = {{0, 0, 8}, {1, 3, 6}, {2, 5, 6}, {3, 5, 7}};
441 static const ivec dd_zp2[dd_zp2n] = {{0, 0, 4}, {1, 3, 4}};
446 static const ivec dd_zp1[dd_zp1n] = {{0, 0, 2}};
448 /* Factors used to avoid problems due to rounding issues */
449 #define DD_CELL_MARGIN 1.0001
450 #define DD_CELL_MARGIN2 1.00005
451 /* Factor to account for pressure scaling during nstlist steps */
452 #define DD_PRES_SCALE_MARGIN 1.02
454 /* Allowed performance loss before we DLB or warn */
455 #define DD_PERF_LOSS 0.05
457 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
459 /* Use separate MPI send and receive commands
460 * when nnodes <= GMX_DD_NNODES_SENDRECV.
461 * This saves memory (and some copying for small nnodes).
462 * For high parallelization scatter and gather calls are used.
464 #define GMX_DD_NNODES_SENDRECV 4
468 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
470 static void index2xyz(ivec nc,int ind,ivec xyz)
472 xyz[XX] = ind % nc[XX];
473 xyz[YY] = (ind / nc[XX]) % nc[YY];
474 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
478 /* This order is required to minimize the coordinate communication in PME
479 * which uses decomposition in the x direction.
481 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
483 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
485 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
486 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
487 xyz[ZZ] = ind % nc[ZZ];
490 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
495 ddindex = dd_index(dd->nc, c);
496 if (dd->comm->bCartesianPP_PME)
498 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
500 else if (dd->comm->bCartesianPP)
503 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
514 static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox, t_inputrec *ir)
516 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
519 int ddglatnr(gmx_domdec_t *dd, int i)
529 if (i >= dd->comm->nat[ddnatNR-1])
531 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
533 atnr = dd->gatindex[i] + 1;
539 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
541 return &dd->comm->cgs_gl;
544 static void vec_rvec_init(vec_rvec_t *v)
550 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
554 v->nalloc = over_alloc_dd(n);
555 srenew(v->v, v->nalloc);
559 void dd_store_state(gmx_domdec_t *dd, t_state *state)
563 if (state->ddp_count != dd->ddp_count)
565 gmx_incons("The state does not the domain decomposition state");
568 state->ncg_gl = dd->ncg_home;
569 if (state->ncg_gl > state->cg_gl_nalloc)
571 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
572 srenew(state->cg_gl, state->cg_gl_nalloc);
574 for (i = 0; i < state->ncg_gl; i++)
576 state->cg_gl[i] = dd->index_gl[i];
579 state->ddp_count_cg_gl = dd->ddp_count;
582 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
584 return &dd->comm->zones;
587 void dd_get_ns_ranges(gmx_domdec_t *dd, int icg,
588 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
590 gmx_domdec_zones_t *zones;
593 zones = &dd->comm->zones;
596 while (icg >= zones->izone[izone].cg1)
605 else if (izone < zones->nizone)
607 *jcg0 = zones->izone[izone].jcg0;
611 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
612 icg, izone, zones->nizone);
615 *jcg1 = zones->izone[izone].jcg1;
617 for (d = 0; d < dd->ndim; d++)
620 shift0[dim] = zones->izone[izone].shift0[dim];
621 shift1[dim] = zones->izone[izone].shift1[dim];
622 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
624 /* A conservative approach, this can be optimized */
631 int dd_natoms_vsite(gmx_domdec_t *dd)
633 return dd->comm->nat[ddnatVSITE];
636 void dd_get_constraint_range(gmx_domdec_t *dd, int *at_start, int *at_end)
638 *at_start = dd->comm->nat[ddnatCON-1];
639 *at_end = dd->comm->nat[ddnatCON];
642 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
644 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
645 int *index, *cgindex;
646 gmx_domdec_comm_t *comm;
647 gmx_domdec_comm_dim_t *cd;
648 gmx_domdec_ind_t *ind;
649 rvec shift = {0, 0, 0}, *buf, *rbuf;
650 gmx_bool bPBC, bScrew;
654 cgindex = dd->cgindex;
659 nat_tot = dd->nat_home;
660 for (d = 0; d < dd->ndim; d++)
662 bPBC = (dd->ci[dd->dim[d]] == 0);
663 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
666 copy_rvec(box[dd->dim[d]], shift);
669 for (p = 0; p < cd->np; p++)
676 for (i = 0; i < ind->nsend[nzone]; i++)
678 at0 = cgindex[index[i]];
679 at1 = cgindex[index[i]+1];
680 for (j = at0; j < at1; j++)
682 copy_rvec(x[j], buf[n]);
689 for (i = 0; i < ind->nsend[nzone]; i++)
691 at0 = cgindex[index[i]];
692 at1 = cgindex[index[i]+1];
693 for (j = at0; j < at1; j++)
695 /* We need to shift the coordinates */
696 rvec_add(x[j], shift, buf[n]);
703 for (i = 0; i < ind->nsend[nzone]; i++)
705 at0 = cgindex[index[i]];
706 at1 = cgindex[index[i]+1];
707 for (j = at0; j < at1; j++)
710 buf[n][XX] = x[j][XX] + shift[XX];
712 * This operation requires a special shift force
713 * treatment, which is performed in calc_vir.
715 buf[n][YY] = box[YY][YY] - x[j][YY];
716 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
728 rbuf = comm->vbuf2.v;
730 /* Send and receive the coordinates */
731 dd_sendrecv_rvec(dd, d, dddirBackward,
732 buf, ind->nsend[nzone+1],
733 rbuf, ind->nrecv[nzone+1]);
737 for (zone = 0; zone < nzone; zone++)
739 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
741 copy_rvec(rbuf[j], x[i]);
746 nat_tot += ind->nrecv[nzone+1];
752 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
754 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
755 int *index, *cgindex;
756 gmx_domdec_comm_t *comm;
757 gmx_domdec_comm_dim_t *cd;
758 gmx_domdec_ind_t *ind;
762 gmx_bool bPBC, bScrew;
766 cgindex = dd->cgindex;
771 nzone = comm->zones.n/2;
772 nat_tot = dd->nat_tot;
773 for (d = dd->ndim-1; d >= 0; d--)
775 bPBC = (dd->ci[dd->dim[d]] == 0);
776 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
777 if (fshift == NULL && !bScrew)
781 /* Determine which shift vector we need */
787 for (p = cd->np-1; p >= 0; p--)
790 nat_tot -= ind->nrecv[nzone+1];
797 sbuf = comm->vbuf2.v;
799 for (zone = 0; zone < nzone; zone++)
801 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
803 copy_rvec(f[i], sbuf[j]);
808 /* Communicate the forces */
809 dd_sendrecv_rvec(dd, d, dddirForward,
810 sbuf, ind->nrecv[nzone+1],
811 buf, ind->nsend[nzone+1]);
813 /* Add the received forces */
817 for (i = 0; i < ind->nsend[nzone]; i++)
819 at0 = cgindex[index[i]];
820 at1 = cgindex[index[i]+1];
821 for (j = at0; j < at1; j++)
823 rvec_inc(f[j], buf[n]);
830 for (i = 0; i < ind->nsend[nzone]; i++)
832 at0 = cgindex[index[i]];
833 at1 = cgindex[index[i]+1];
834 for (j = at0; j < at1; j++)
836 rvec_inc(f[j], buf[n]);
837 /* Add this force to the shift force */
838 rvec_inc(fshift[is], buf[n]);
845 for (i = 0; i < ind->nsend[nzone]; i++)
847 at0 = cgindex[index[i]];
848 at1 = cgindex[index[i]+1];
849 for (j = at0; j < at1; j++)
851 /* Rotate the force */
852 f[j][XX] += buf[n][XX];
853 f[j][YY] -= buf[n][YY];
854 f[j][ZZ] -= buf[n][ZZ];
857 /* Add this force to the shift force */
858 rvec_inc(fshift[is], buf[n]);
869 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
871 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
872 int *index, *cgindex;
873 gmx_domdec_comm_t *comm;
874 gmx_domdec_comm_dim_t *cd;
875 gmx_domdec_ind_t *ind;
880 cgindex = dd->cgindex;
882 buf = &comm->vbuf.v[0][0];
885 nat_tot = dd->nat_home;
886 for (d = 0; d < dd->ndim; d++)
889 for (p = 0; p < cd->np; p++)
894 for (i = 0; i < ind->nsend[nzone]; i++)
896 at0 = cgindex[index[i]];
897 at1 = cgindex[index[i]+1];
898 for (j = at0; j < at1; j++)
911 rbuf = &comm->vbuf2.v[0][0];
913 /* Send and receive the coordinates */
914 dd_sendrecv_real(dd, d, dddirBackward,
915 buf, ind->nsend[nzone+1],
916 rbuf, ind->nrecv[nzone+1]);
920 for (zone = 0; zone < nzone; zone++)
922 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
929 nat_tot += ind->nrecv[nzone+1];
935 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
937 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
938 int *index, *cgindex;
939 gmx_domdec_comm_t *comm;
940 gmx_domdec_comm_dim_t *cd;
941 gmx_domdec_ind_t *ind;
946 cgindex = dd->cgindex;
948 buf = &comm->vbuf.v[0][0];
951 nzone = comm->zones.n/2;
952 nat_tot = dd->nat_tot;
953 for (d = dd->ndim-1; d >= 0; d--)
956 for (p = cd->np-1; p >= 0; p--)
959 nat_tot -= ind->nrecv[nzone+1];
966 sbuf = &comm->vbuf2.v[0][0];
968 for (zone = 0; zone < nzone; zone++)
970 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
977 /* Communicate the forces */
978 dd_sendrecv_real(dd, d, dddirForward,
979 sbuf, ind->nrecv[nzone+1],
980 buf, ind->nsend[nzone+1]);
982 /* Add the received forces */
984 for (i = 0; i < ind->nsend[nzone]; i++)
986 at0 = cgindex[index[i]];
987 at1 = cgindex[index[i]+1];
988 for (j = at0; j < at1; j++)
999 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
1001 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
1003 zone->min0, zone->max1,
1004 zone->mch0, zone->mch0,
1005 zone->p1_0, zone->p1_1);
1009 #define DDZONECOMM_MAXZONE 5
1010 #define DDZONECOMM_BUFSIZE 3
1012 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
1013 int ddimind, int direction,
1014 gmx_ddzone_t *buf_s, int n_s,
1015 gmx_ddzone_t *buf_r, int n_r)
1017 #define ZBS DDZONECOMM_BUFSIZE
1018 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
1019 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
1022 for (i = 0; i < n_s; i++)
1024 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
1025 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
1026 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
1027 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
1028 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
1029 vbuf_s[i*ZBS+1][2] = 0;
1030 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
1031 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
1032 vbuf_s[i*ZBS+2][2] = 0;
1035 dd_sendrecv_rvec(dd, ddimind, direction,
1039 for (i = 0; i < n_r; i++)
1041 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
1042 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
1043 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
1044 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
1045 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
1046 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
1047 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
1053 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
1054 rvec cell_ns_x0, rvec cell_ns_x1)
1056 int d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
1058 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
1059 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
1060 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
1061 rvec extr_s[2], extr_r[2];
1063 real dist_d, c = 0, det;
1064 gmx_domdec_comm_t *comm;
1065 gmx_bool bPBC, bUse;
1069 for (d = 1; d < dd->ndim; d++)
1072 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
1073 zp->min0 = cell_ns_x0[dim];
1074 zp->max1 = cell_ns_x1[dim];
1075 zp->min1 = cell_ns_x1[dim];
1076 zp->mch0 = cell_ns_x0[dim];
1077 zp->mch1 = cell_ns_x1[dim];
1078 zp->p1_0 = cell_ns_x0[dim];
1079 zp->p1_1 = cell_ns_x1[dim];
1082 for (d = dd->ndim-2; d >= 0; d--)
1085 bPBC = (dim < ddbox->npbcdim);
1087 /* Use an rvec to store two reals */
1088 extr_s[d][0] = comm->cell_f0[d+1];
1089 extr_s[d][1] = comm->cell_f1[d+1];
1090 extr_s[d][2] = comm->cell_f1[d+1];
1093 /* Store the extremes in the backward sending buffer,
1094 * so the get updated separately from the forward communication.
1096 for (d1 = d; d1 < dd->ndim-1; d1++)
1098 /* We invert the order to be able to use the same loop for buf_e */
1099 buf_s[pos].min0 = extr_s[d1][1];
1100 buf_s[pos].max1 = extr_s[d1][0];
1101 buf_s[pos].min1 = extr_s[d1][2];
1102 buf_s[pos].mch0 = 0;
1103 buf_s[pos].mch1 = 0;
1104 /* Store the cell corner of the dimension we communicate along */
1105 buf_s[pos].p1_0 = comm->cell_x0[dim];
1106 buf_s[pos].p1_1 = 0;
1110 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1113 if (dd->ndim == 3 && d == 0)
1115 buf_s[pos] = comm->zone_d2[0][1];
1117 buf_s[pos] = comm->zone_d1[0];
1121 /* We only need to communicate the extremes
1122 * in the forward direction
1124 npulse = comm->cd[d].np;
1127 /* Take the minimum to avoid double communication */
1128 npulse_min = min(npulse, dd->nc[dim]-1-npulse);
1132 /* Without PBC we should really not communicate over
1133 * the boundaries, but implementing that complicates
1134 * the communication setup and therefore we simply
1135 * do all communication, but ignore some data.
1137 npulse_min = npulse;
1139 for (p = 0; p < npulse_min; p++)
1141 /* Communicate the extremes forward */
1142 bUse = (bPBC || dd->ci[dim] > 0);
1144 dd_sendrecv_rvec(dd, d, dddirForward,
1145 extr_s+d, dd->ndim-d-1,
1146 extr_r+d, dd->ndim-d-1);
1150 for (d1 = d; d1 < dd->ndim-1; d1++)
1152 extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]);
1153 extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]);
1154 extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]);
1160 for (p = 0; p < npulse; p++)
1162 /* Communicate all the zone information backward */
1163 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1165 dd_sendrecv_ddzone(dd, d, dddirBackward,
1172 for (d1 = d+1; d1 < dd->ndim; d1++)
1174 /* Determine the decrease of maximum required
1175 * communication height along d1 due to the distance along d,
1176 * this avoids a lot of useless atom communication.
1178 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1180 if (ddbox->tric_dir[dim])
1182 /* c is the off-diagonal coupling between the cell planes
1183 * along directions d and d1.
1185 c = ddbox->v[dim][dd->dim[d1]][dim];
1191 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1194 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1198 /* A negative value signals out of range */
1204 /* Accumulate the extremes over all pulses */
1205 for (i = 0; i < buf_size; i++)
1209 buf_e[i] = buf_r[i];
1215 buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0);
1216 buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1);
1217 buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1);
1220 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1228 if (bUse && dh[d1] >= 0)
1230 buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
1231 buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
1234 /* Copy the received buffer to the send buffer,
1235 * to pass the data through with the next pulse.
1237 buf_s[i] = buf_r[i];
1239 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1240 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1242 /* Store the extremes */
1245 for (d1 = d; d1 < dd->ndim-1; d1++)
1247 extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0);
1248 extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1);
1249 extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1);
1253 if (d == 1 || (d == 0 && dd->ndim == 3))
1255 for (i = d; i < 2; i++)
1257 comm->zone_d2[1-d][i] = buf_e[pos];
1263 comm->zone_d1[1] = buf_e[pos];
1273 for (i = 0; i < 2; i++)
1277 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1279 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1280 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1286 for (i = 0; i < 2; i++)
1288 for (j = 0; j < 2; j++)
1292 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1294 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1295 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1299 for (d = 1; d < dd->ndim; d++)
1301 comm->cell_f_max0[d] = extr_s[d-1][0];
1302 comm->cell_f_min1[d] = extr_s[d-1][1];
1305 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1306 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1311 static void dd_collect_cg(gmx_domdec_t *dd,
1312 t_state *state_local)
1314 gmx_domdec_master_t *ma = NULL;
1315 int buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
1318 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1320 /* The master has the correct distribution */
1324 if (state_local->ddp_count == dd->ddp_count)
1326 ncg_home = dd->ncg_home;
1328 nat_home = dd->nat_home;
1330 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1332 cgs_gl = &dd->comm->cgs_gl;
1334 ncg_home = state_local->ncg_gl;
1335 cg = state_local->cg_gl;
1337 for (i = 0; i < ncg_home; i++)
1339 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1344 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1347 buf2[0] = dd->ncg_home;
1348 buf2[1] = dd->nat_home;
1358 /* Collect the charge group and atom counts on the master */
1359 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1364 for (i = 0; i < dd->nnodes; i++)
1366 ma->ncg[i] = ma->ibuf[2*i];
1367 ma->nat[i] = ma->ibuf[2*i+1];
1368 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1371 /* Make byte counts and indices */
1372 for (i = 0; i < dd->nnodes; i++)
1374 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1375 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1379 fprintf(debug, "Initial charge group distribution: ");
1380 for (i = 0; i < dd->nnodes; i++)
1382 fprintf(debug, " %d", ma->ncg[i]);
1384 fprintf(debug, "\n");
1388 /* Collect the charge group indices on the master */
1390 dd->ncg_home*sizeof(int), dd->index_gl,
1391 DDMASTER(dd) ? ma->ibuf : NULL,
1392 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1393 DDMASTER(dd) ? ma->cg : NULL);
1395 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1398 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1401 gmx_domdec_master_t *ma;
1402 int n, i, c, a, nalloc = 0;
1411 MPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1412 dd->rank, dd->mpi_comm_all);
1417 /* Copy the master coordinates to the global array */
1418 cgs_gl = &dd->comm->cgs_gl;
1420 n = DDMASTERRANK(dd);
1422 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1424 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1426 copy_rvec(lv[a++], v[c]);
1430 for (n = 0; n < dd->nnodes; n++)
1434 if (ma->nat[n] > nalloc)
1436 nalloc = over_alloc_dd(ma->nat[n]);
1437 srenew(buf, nalloc);
1440 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1441 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1444 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1446 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1448 copy_rvec(buf[a++], v[c]);
1457 static void get_commbuffer_counts(gmx_domdec_t *dd,
1458 int **counts, int **disps)
1460 gmx_domdec_master_t *ma;
1465 /* Make the rvec count and displacment arrays */
1467 *disps = ma->ibuf + dd->nnodes;
1468 for (n = 0; n < dd->nnodes; n++)
1470 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1471 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1475 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1478 gmx_domdec_master_t *ma;
1479 int *rcounts = NULL, *disps = NULL;
1488 get_commbuffer_counts(dd, &rcounts, &disps);
1493 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv, rcounts, disps, buf);
1497 cgs_gl = &dd->comm->cgs_gl;
1500 for (n = 0; n < dd->nnodes; n++)
1502 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1504 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1506 copy_rvec(buf[a++], v[c]);
1513 void dd_collect_vec(gmx_domdec_t *dd,
1514 t_state *state_local, rvec *lv, rvec *v)
1516 gmx_domdec_master_t *ma;
1517 int n, i, c, a, nalloc = 0;
1520 dd_collect_cg(dd, state_local);
1522 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1524 dd_collect_vec_sendrecv(dd, lv, v);
1528 dd_collect_vec_gatherv(dd, lv, v);
1533 void dd_collect_state(gmx_domdec_t *dd,
1534 t_state *state_local, t_state *state)
1538 nh = state->nhchainlength;
1542 for (i = 0; i < efptNR; i++)
1544 state->lambda[i] = state_local->lambda[i];
1546 state->fep_state = state_local->fep_state;
1547 state->veta = state_local->veta;
1548 state->vol0 = state_local->vol0;
1549 copy_mat(state_local->box, state->box);
1550 copy_mat(state_local->boxv, state->boxv);
1551 copy_mat(state_local->svir_prev, state->svir_prev);
1552 copy_mat(state_local->fvir_prev, state->fvir_prev);
1553 copy_mat(state_local->pres_prev, state->pres_prev);
1555 for (i = 0; i < state_local->ngtc; i++)
1557 for (j = 0; j < nh; j++)
1559 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1560 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1562 state->therm_integral[i] = state_local->therm_integral[i];
1564 for (i = 0; i < state_local->nnhpres; i++)
1566 for (j = 0; j < nh; j++)
1568 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1569 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1573 for (est = 0; est < estNR; est++)
1575 if (EST_DISTR(est) && (state_local->flags & (1<<est)))
1580 dd_collect_vec(dd, state_local, state_local->x, state->x);
1583 dd_collect_vec(dd, state_local, state_local->v, state->v);
1586 dd_collect_vec(dd, state_local, state_local->sd_X, state->sd_X);
1589 dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
1591 case estDISRE_INITF:
1592 case estDISRE_RM3TAV:
1593 case estORIRE_INITF:
1597 gmx_incons("Unknown state entry encountered in dd_collect_state");
1603 static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
1609 fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
1612 state->nalloc = over_alloc_dd(nalloc);
1614 for (est = 0; est < estNR; est++)
1616 if (EST_DISTR(est) && (state->flags & (1<<est)))
1621 srenew(state->x, state->nalloc);
1624 srenew(state->v, state->nalloc);
1627 srenew(state->sd_X, state->nalloc);
1630 srenew(state->cg_p, state->nalloc);
1632 case estDISRE_INITF:
1633 case estDISRE_RM3TAV:
1634 case estORIRE_INITF:
1636 /* No reallocation required */
1639 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1646 srenew(*f, state->nalloc);
1650 static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
1653 if (nalloc > fr->cg_nalloc)
1657 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
1659 fr->cg_nalloc = over_alloc_dd(nalloc);
1660 srenew(fr->cginfo, fr->cg_nalloc);
1661 if (fr->cutoff_scheme == ecutsGROUP)
1663 srenew(fr->cg_cm, fr->cg_nalloc);
1666 if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
1668 /* We don't use charge groups, we use x in state to set up
1669 * the atom communication.
1671 dd_realloc_state(state, f, nalloc);
1675 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1678 gmx_domdec_master_t *ma;
1679 int n, i, c, a, nalloc = 0;
1686 for (n = 0; n < dd->nnodes; n++)
1690 if (ma->nat[n] > nalloc)
1692 nalloc = over_alloc_dd(ma->nat[n]);
1693 srenew(buf, nalloc);
1695 /* Use lv as a temporary buffer */
1697 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1699 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1701 copy_rvec(v[c], buf[a++]);
1704 if (a != ma->nat[n])
1706 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1711 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1712 DDRANK(dd, n), n, dd->mpi_comm_all);
1717 n = DDMASTERRANK(dd);
1719 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1721 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1723 copy_rvec(v[c], lv[a++]);
1730 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1731 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1736 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1739 gmx_domdec_master_t *ma;
1740 int *scounts = NULL, *disps = NULL;
1741 int n, i, c, a, nalloc = 0;
1748 get_commbuffer_counts(dd, &scounts, &disps);
1752 for (n = 0; n < dd->nnodes; n++)
1754 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1756 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1758 copy_rvec(v[c], buf[a++]);
1764 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1767 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
1769 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1771 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1775 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1779 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1782 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1783 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1784 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1786 if (dfhist->nlambda > 0)
1788 int nlam = dfhist->nlambda;
1789 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1790 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1791 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1792 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1793 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1794 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1796 for (i = 0; i < nlam; i++)
1798 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1799 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1800 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1801 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1802 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1803 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1808 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1809 t_state *state, t_state *state_local,
1814 nh = state->nhchainlength;
1818 for (i = 0; i < efptNR; i++)
1820 state_local->lambda[i] = state->lambda[i];
1822 state_local->fep_state = state->fep_state;
1823 state_local->veta = state->veta;
1824 state_local->vol0 = state->vol0;
1825 copy_mat(state->box, state_local->box);
1826 copy_mat(state->box_rel, state_local->box_rel);
1827 copy_mat(state->boxv, state_local->boxv);
1828 copy_mat(state->svir_prev, state_local->svir_prev);
1829 copy_mat(state->fvir_prev, state_local->fvir_prev);
1830 copy_df_history(&state_local->dfhist, &state->dfhist);
1831 for (i = 0; i < state_local->ngtc; i++)
1833 for (j = 0; j < nh; j++)
1835 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1836 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1838 state_local->therm_integral[i] = state->therm_integral[i];
1840 for (i = 0; i < state_local->nnhpres; i++)
1842 for (j = 0; j < nh; j++)
1844 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1845 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1849 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
1850 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1851 dd_bcast(dd, sizeof(real), &state_local->veta);
1852 dd_bcast(dd, sizeof(real), &state_local->vol0);
1853 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1854 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1855 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1856 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1857 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1858 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
1859 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
1860 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
1861 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
1862 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
1864 /* communicate df_history -- required for restarting from checkpoint */
1865 dd_distribute_dfhist(dd, &state_local->dfhist);
1867 if (dd->nat_home > state_local->nalloc)
1869 dd_realloc_state(state_local, f, dd->nat_home);
1871 for (i = 0; i < estNR; i++)
1873 if (EST_DISTR(i) && (state_local->flags & (1<<i)))
1878 dd_distribute_vec(dd, cgs, state->x, state_local->x);
1881 dd_distribute_vec(dd, cgs, state->v, state_local->v);
1884 dd_distribute_vec(dd, cgs, state->sd_X, state_local->sd_X);
1887 dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
1889 case estDISRE_INITF:
1890 case estDISRE_RM3TAV:
1891 case estORIRE_INITF:
1893 /* Not implemented yet */
1896 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1902 static char dim2char(int dim)
1908 case XX: c = 'X'; break;
1909 case YY: c = 'Y'; break;
1910 case ZZ: c = 'Z'; break;
1911 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1917 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1918 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1920 rvec grid_s[2], *grid_r = NULL, cx, r;
1921 char fname[STRLEN], format[STRLEN], buf[22];
1923 int a, i, d, z, y, x;
1927 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1928 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1932 snew(grid_r, 2*dd->nnodes);
1935 dd_gather(dd, 2*sizeof(rvec), grid_s[0], DDMASTER(dd) ? grid_r[0] : NULL);
1939 for (d = 0; d < DIM; d++)
1941 for (i = 0; i < DIM; i++)
1949 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1951 tric[d][i] = box[i][d]/box[i][i];
1960 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1961 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
1962 out = gmx_fio_fopen(fname, "w");
1963 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1965 for (i = 0; i < dd->nnodes; i++)
1967 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1968 for (d = 0; d < DIM; d++)
1970 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1972 for (z = 0; z < 2; z++)
1974 for (y = 0; y < 2; y++)
1976 for (x = 0; x < 2; x++)
1978 cx[XX] = grid_r[i*2+x][XX];
1979 cx[YY] = grid_r[i*2+y][YY];
1980 cx[ZZ] = grid_r[i*2+z][ZZ];
1982 fprintf(out, format, "ATOM", a++, "CA", "GLY", ' ', 1+i,
1983 ' ', 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol);
1987 for (d = 0; d < DIM; d++)
1989 for (x = 0; x < 4; x++)
1993 case 0: y = 1 + i*8 + 2*x; break;
1994 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1995 case 2: y = 1 + i*8 + x; break;
1997 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
2001 gmx_fio_fclose(out);
2006 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
2007 gmx_mtop_t *mtop, t_commrec *cr,
2008 int natoms, rvec x[], matrix box)
2010 char fname[STRLEN], format[STRLEN], format4[STRLEN], buf[22];
2012 int i, ii, resnr, c;
2013 char *atomname, *resname;
2020 natoms = dd->comm->nat[ddnatVSITE];
2023 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
2025 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
2026 sprintf(format4, "%s%s\n", get_pdbformat4(), "%6.2f%6.2f");
2028 out = gmx_fio_fopen(fname, "w");
2030 fprintf(out, "TITLE %s\n", title);
2031 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2032 for (i = 0; i < natoms; i++)
2034 ii = dd->gatindex[i];
2035 gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
2036 if (i < dd->comm->nat[ddnatZONE])
2039 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
2045 else if (i < dd->comm->nat[ddnatVSITE])
2047 b = dd->comm->zones.n;
2051 b = dd->comm->zones.n + 1;
2053 fprintf(out, strlen(atomname) < 4 ? format : format4,
2054 "ATOM", (ii+1)%100000,
2055 atomname, resname, ' ', resnr%10000, ' ',
2056 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b);
2058 fprintf(out, "TER\n");
2060 gmx_fio_fclose(out);
2063 real dd_cutoff_mbody(gmx_domdec_t *dd)
2065 gmx_domdec_comm_t *comm;
2072 if (comm->bInterCGBondeds)
2074 if (comm->cutoff_mbody > 0)
2076 r = comm->cutoff_mbody;
2080 /* cutoff_mbody=0 means we do not have DLB */
2081 r = comm->cellsize_min[dd->dim[0]];
2082 for (di = 1; di < dd->ndim; di++)
2084 r = min(r, comm->cellsize_min[dd->dim[di]]);
2086 if (comm->bBondComm)
2088 r = max(r, comm->cutoff_mbody);
2092 r = min(r, comm->cutoff);
2100 real dd_cutoff_twobody(gmx_domdec_t *dd)
2104 r_mb = dd_cutoff_mbody(dd);
2106 return max(dd->comm->cutoff, r_mb);
2110 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd, ivec coord, ivec coord_pme)
2114 nc = dd->nc[dd->comm->cartpmedim];
2115 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2116 copy_ivec(coord, coord_pme);
2117 coord_pme[dd->comm->cartpmedim] =
2118 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2121 static int low_ddindex2pmeindex(int ndd, int npme, int ddindex)
2123 /* Here we assign a PME node to communicate with this DD node
2124 * by assuming that the major index of both is x.
2125 * We add cr->npmenodes/2 to obtain an even distribution.
2127 return (ddindex*npme + npme/2)/ndd;
2130 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
2132 return low_ddindex2pmeindex(dd->nnodes, dd->comm->npmenodes, ddindex);
2135 static int cr_ddindex2pmeindex(const t_commrec *cr, int ddindex)
2137 return low_ddindex2pmeindex(cr->dd->nnodes, cr->npmenodes, ddindex);
2140 static int *dd_pmenodes(t_commrec *cr)
2145 snew(pmenodes, cr->npmenodes);
2147 for (i = 0; i < cr->dd->nnodes; i++)
2149 p0 = cr_ddindex2pmeindex(cr, i);
2150 p1 = cr_ddindex2pmeindex(cr, i+1);
2151 if (i+1 == cr->dd->nnodes || p1 > p0)
2155 fprintf(debug, "pmenode[%d] = %d\n", n, i+1+n);
2157 pmenodes[n] = i + 1 + n;
2165 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
2168 ivec coords, coords_pme, nc;
2173 if (dd->comm->bCartesian) {
2174 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2175 dd_coords2pmecoords(dd,coords,coords_pme);
2176 copy_ivec(dd->ntot,nc);
2177 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2178 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2180 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2182 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2188 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
2193 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
2195 gmx_domdec_comm_t *comm;
2197 int ddindex, nodeid = -1;
2199 comm = cr->dd->comm;
2204 if (comm->bCartesianPP_PME)
2207 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
2212 ddindex = dd_index(cr->dd->nc, coords);
2213 if (comm->bCartesianPP)
2215 nodeid = comm->ddindex2simnodeid[ddindex];
2221 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
2233 static int dd_simnode2pmenode(t_commrec *cr, int sim_nodeid)
2236 gmx_domdec_comm_t *comm;
2237 ivec coord, coord_pme;
2244 /* This assumes a uniform x domain decomposition grid cell size */
2245 if (comm->bCartesianPP_PME)
2248 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
2249 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2251 /* This is a PP node */
2252 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2253 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
2257 else if (comm->bCartesianPP)
2259 if (sim_nodeid < dd->nnodes)
2261 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2266 /* This assumes DD cells with identical x coordinates
2267 * are numbered sequentially.
2269 if (dd->comm->pmenodes == NULL)
2271 if (sim_nodeid < dd->nnodes)
2273 /* The DD index equals the nodeid */
2274 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2280 while (sim_nodeid > dd->comm->pmenodes[i])
2284 if (sim_nodeid < dd->comm->pmenodes[i])
2286 pmenode = dd->comm->pmenodes[i];
2294 void get_pme_nnodes(const gmx_domdec_t *dd,
2295 int *npmenodes_x, int *npmenodes_y)
2299 *npmenodes_x = dd->comm->npmenodes_x;
2300 *npmenodes_y = dd->comm->npmenodes_y;
2309 gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
2311 gmx_bool bPMEOnlyNode;
2313 if (DOMAINDECOMP(cr))
2315 bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
2319 bPMEOnlyNode = FALSE;
2322 return bPMEOnlyNode;
2325 void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
2326 int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
2330 ivec coord, coord_pme;
2334 snew(*my_ddnodes, (dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2337 for (x = 0; x < dd->nc[XX]; x++)
2339 for (y = 0; y < dd->nc[YY]; y++)
2341 for (z = 0; z < dd->nc[ZZ]; z++)
2343 if (dd->comm->bCartesianPP_PME)
2348 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2349 if (dd->ci[XX] == coord_pme[XX] &&
2350 dd->ci[YY] == coord_pme[YY] &&
2351 dd->ci[ZZ] == coord_pme[ZZ])
2353 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2358 /* The slab corresponds to the nodeid in the PME group */
2359 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2361 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2368 /* The last PP-only node is the peer node */
2369 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2373 fprintf(debug, "Receive coordinates from PP nodes:");
2374 for (x = 0; x < *nmy_ddnodes; x++)
2376 fprintf(debug, " %d", (*my_ddnodes)[x]);
2378 fprintf(debug, "\n");
2382 static gmx_bool receive_vir_ener(t_commrec *cr)
2384 gmx_domdec_comm_t *comm;
2385 int pmenode, coords[DIM], rank;
2389 if (cr->npmenodes < cr->dd->nnodes)
2391 comm = cr->dd->comm;
2392 if (comm->bCartesianPP_PME)
2394 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2396 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2397 coords[comm->cartpmedim]++;
2398 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2400 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2401 if (dd_simnode2pmenode(cr, rank) == pmenode)
2403 /* This is not the last PP node for pmenode */
2411 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2412 if (cr->sim_nodeid+1 < cr->nnodes &&
2413 dd_simnode2pmenode(cr, cr->sim_nodeid+1) == pmenode)
2415 /* This is not the last PP node for pmenode */
2424 static void set_zones_ncg_home(gmx_domdec_t *dd)
2426 gmx_domdec_zones_t *zones;
2429 zones = &dd->comm->zones;
2431 zones->cg_range[0] = 0;
2432 for (i = 1; i < zones->n+1; i++)
2434 zones->cg_range[i] = dd->ncg_home;
2436 /* zone_ncg1[0] should always be equal to ncg_home */
2437 dd->comm->zone_ncg1[0] = dd->ncg_home;
2440 static void rebuild_cgindex(gmx_domdec_t *dd,
2441 const int *gcgs_index, t_state *state)
2443 int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
2446 dd_cg_gl = dd->index_gl;
2447 cgindex = dd->cgindex;
2450 for (i = 0; i < state->ncg_gl; i++)
2454 dd_cg_gl[i] = cg_gl;
2455 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2459 dd->ncg_home = state->ncg_gl;
2462 set_zones_ncg_home(dd);
2465 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2467 while (cg >= cginfo_mb->cg_end)
2472 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2475 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2476 t_forcerec *fr, char *bLocalCG)
2478 cginfo_mb_t *cginfo_mb;
2484 cginfo_mb = fr->cginfo_mb;
2485 cginfo = fr->cginfo;
2487 for (cg = cg0; cg < cg1; cg++)
2489 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2493 if (bLocalCG != NULL)
2495 for (cg = cg0; cg < cg1; cg++)
2497 bLocalCG[index_gl[cg]] = TRUE;
2502 static void make_dd_indices(gmx_domdec_t *dd,
2503 const int *gcgs_index, int cg_start)
2505 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2506 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2511 bLocalCG = dd->comm->bLocalCG;
2513 if (dd->nat_tot > dd->gatindex_nalloc)
2515 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2516 srenew(dd->gatindex, dd->gatindex_nalloc);
2519 nzone = dd->comm->zones.n;
2520 zone2cg = dd->comm->zones.cg_range;
2521 zone_ncg1 = dd->comm->zone_ncg1;
2522 index_gl = dd->index_gl;
2523 gatindex = dd->gatindex;
2524 bCGs = dd->comm->bCGs;
2526 if (zone2cg[1] != dd->ncg_home)
2528 gmx_incons("dd->ncg_zone is not up to date");
2531 /* Make the local to global and global to local atom index */
2532 a = dd->cgindex[cg_start];
2533 for (zone = 0; zone < nzone; zone++)
2541 cg0 = zone2cg[zone];
2543 cg1 = zone2cg[zone+1];
2544 cg1_p1 = cg0 + zone_ncg1[zone];
2546 for (cg = cg0; cg < cg1; cg++)
2551 /* Signal that this cg is from more than one pulse away */
2554 cg_gl = index_gl[cg];
2557 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2560 ga2la_set(dd->ga2la, a_gl, a, zone1);
2566 gatindex[a] = cg_gl;
2567 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2574 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2577 int ncg, i, ngl, nerr;
2580 if (bLocalCG == NULL)
2584 for (i = 0; i < dd->ncg_tot; i++)
2586 if (!bLocalCG[dd->index_gl[i]])
2589 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2594 for (i = 0; i < ncg_sys; i++)
2601 if (ngl != dd->ncg_tot)
2603 fprintf(stderr, "DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2610 static void check_index_consistency(gmx_domdec_t *dd,
2611 int natoms_sys, int ncg_sys,
2614 int nerr, ngl, i, a, cell;
2619 if (dd->comm->DD_debug > 1)
2621 snew(have, natoms_sys);
2622 for (a = 0; a < dd->nat_tot; a++)
2624 if (have[dd->gatindex[a]] > 0)
2626 fprintf(stderr, "DD node %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2630 have[dd->gatindex[a]] = a + 1;
2636 snew(have, dd->nat_tot);
2639 for (i = 0; i < natoms_sys; i++)
2641 if (ga2la_get(dd->ga2la, i, &a, &cell))
2643 if (a >= dd->nat_tot)
2645 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2651 if (dd->gatindex[a] != i)
2653 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2660 if (ngl != dd->nat_tot)
2663 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2664 dd->rank, where, ngl, dd->nat_tot);
2666 for (a = 0; a < dd->nat_tot; a++)
2671 "DD node %d, %s: local atom %d, global %d has no global index\n",
2672 dd->rank, where, a+1, dd->gatindex[a]+1);
2677 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2681 gmx_fatal(FARGS, "DD node %d, %s: %d atom/cg index inconsistencies",
2682 dd->rank, where, nerr);
2686 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2693 /* Clear the whole list without searching */
2694 ga2la_clear(dd->ga2la);
2698 for (i = a_start; i < dd->nat_tot; i++)
2700 ga2la_del(dd->ga2la, dd->gatindex[i]);
2704 bLocalCG = dd->comm->bLocalCG;
2707 for (i = cg_start; i < dd->ncg_tot; i++)
2709 bLocalCG[dd->index_gl[i]] = FALSE;
2713 dd_clear_local_vsite_indices(dd);
2715 if (dd->constraints)
2717 dd_clear_local_constraint_indices(dd);
2721 /* This function should be used for moving the domain boudaries during DLB,
2722 * for obtaining the minimum cell size. It checks the initially set limit
2723 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2724 * and, possibly, a longer cut-off limit set for PME load balancing.
2726 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2730 cellsize_min = comm->cellsize_min[dim];
2732 if (!comm->bVacDLBNoLimit)
2734 /* The cut-off might have changed, e.g. by PME load balacning,
2735 * from the value used to set comm->cellsize_min, so check it.
2737 cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2739 if (comm->bPMELoadBalDLBLimits)
2741 /* Check for the cut-off limit set by the PME load balancing */
2742 cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2746 return cellsize_min;
2749 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2752 real grid_jump_limit;
2754 /* The distance between the boundaries of cells at distance
2755 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2756 * and by the fact that cells should not be shifted by more than
2757 * half their size, such that cg's only shift by one cell
2758 * at redecomposition.
2760 grid_jump_limit = comm->cellsize_limit;
2761 if (!comm->bVacDLBNoLimit)
2763 if (comm->bPMELoadBalDLBLimits)
2765 cutoff = max(cutoff, comm->PMELoadBal_max_cutoff);
2767 grid_jump_limit = max(grid_jump_limit,
2768 cutoff/comm->cd[dim_ind].np);
2771 return grid_jump_limit;
2774 static gmx_bool check_grid_jump(gmx_int64_t step,
2780 gmx_domdec_comm_t *comm;
2789 for (d = 1; d < dd->ndim; d++)
2792 limit = grid_jump_limit(comm, cutoff, d);
2793 bfac = ddbox->box_size[dim];
2794 if (ddbox->tric_dir[dim])
2796 bfac *= ddbox->skew_fac[dim];
2798 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2799 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2807 /* This error should never be triggered under normal
2808 * circumstances, but you never know ...
2810 gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with less nodes might avoid this issue.",
2811 gmx_step_str(step, buf),
2812 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2820 static int dd_load_count(gmx_domdec_comm_t *comm)
2822 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2825 static float dd_force_load(gmx_domdec_comm_t *comm)
2832 if (comm->eFlop > 1)
2834 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2839 load = comm->cycl[ddCyclF];
2840 if (comm->cycl_n[ddCyclF] > 1)
2842 /* Subtract the maximum of the last n cycle counts
2843 * to get rid of possible high counts due to other sources,
2844 * for instance system activity, that would otherwise
2845 * affect the dynamic load balancing.
2847 load -= comm->cycl_max[ddCyclF];
2851 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2853 float gpu_wait, gpu_wait_sum;
2855 gpu_wait = comm->cycl[ddCyclWaitGPU];
2856 if (comm->cycl_n[ddCyclF] > 1)
2858 /* We should remove the WaitGPU time of the same MD step
2859 * as the one with the maximum F time, since the F time
2860 * and the wait time are not independent.
2861 * Furthermore, the step for the max F time should be chosen
2862 * the same on all ranks that share the same GPU.
2863 * But to keep the code simple, we remove the average instead.
2864 * The main reason for artificially long times at some steps
2865 * is spurious CPU activity or MPI time, so we don't expect
2866 * that changes in the GPU wait time matter a lot here.
2868 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2870 /* Sum the wait times over the ranks that share the same GPU */
2871 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2872 comm->mpi_comm_gpu_shared);
2873 /* Replace the wait time by the average over the ranks */
2874 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2882 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2884 gmx_domdec_comm_t *comm;
2889 snew(*dim_f, dd->nc[dim]+1);
2891 for (i = 1; i < dd->nc[dim]; i++)
2893 if (comm->slb_frac[dim])
2895 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2899 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2902 (*dim_f)[dd->nc[dim]] = 1;
2905 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2907 int pmeindex, slab, nso, i;
2910 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2916 ddpme->dim = dimind;
2918 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2920 ddpme->nslab = (ddpme->dim == 0 ?
2921 dd->comm->npmenodes_x :
2922 dd->comm->npmenodes_y);
2924 if (ddpme->nslab <= 1)
2929 nso = dd->comm->npmenodes/ddpme->nslab;
2930 /* Determine for each PME slab the PP location range for dimension dim */
2931 snew(ddpme->pp_min, ddpme->nslab);
2932 snew(ddpme->pp_max, ddpme->nslab);
2933 for (slab = 0; slab < ddpme->nslab; slab++)
2935 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2936 ddpme->pp_max[slab] = 0;
2938 for (i = 0; i < dd->nnodes; i++)
2940 ddindex2xyz(dd->nc, i, xyz);
2941 /* For y only use our y/z slab.
2942 * This assumes that the PME x grid size matches the DD grid size.
2944 if (dimind == 0 || xyz[XX] == dd->ci[XX])
2946 pmeindex = ddindex2pmeindex(dd, i);
2949 slab = pmeindex/nso;
2953 slab = pmeindex % ddpme->nslab;
2955 ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]);
2956 ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]);
2960 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
2963 int dd_pme_maxshift_x(gmx_domdec_t *dd)
2965 if (dd->comm->ddpme[0].dim == XX)
2967 return dd->comm->ddpme[0].maxshift;
2975 int dd_pme_maxshift_y(gmx_domdec_t *dd)
2977 if (dd->comm->ddpme[0].dim == YY)
2979 return dd->comm->ddpme[0].maxshift;
2981 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
2983 return dd->comm->ddpme[1].maxshift;
2991 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
2992 gmx_bool bUniform, gmx_ddbox_t *ddbox, real *cell_f)
2994 gmx_domdec_comm_t *comm;
2997 real range, pme_boundary;
3001 nc = dd->nc[ddpme->dim];
3004 if (!ddpme->dim_match)
3006 /* PP decomposition is not along dim: the worst situation */
3009 else if (ns <= 3 || (bUniform && ns == nc))
3011 /* The optimal situation */
3016 /* We need to check for all pme nodes which nodes they
3017 * could possibly need to communicate with.
3019 xmin = ddpme->pp_min;
3020 xmax = ddpme->pp_max;
3021 /* Allow for atoms to be maximally 2/3 times the cut-off
3022 * out of their DD cell. This is a reasonable balance between
3023 * between performance and support for most charge-group/cut-off
3026 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
3027 /* Avoid extra communication when we are exactly at a boundary */
3031 for (s = 0; s < ns; s++)
3033 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
3034 pme_boundary = (real)s/ns;
3037 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
3039 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
3043 pme_boundary = (real)(s+1)/ns;
3046 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
3048 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
3055 ddpme->maxshift = sh;
3059 fprintf(debug, "PME slab communication range for dim %d is %d\n",
3060 ddpme->dim, ddpme->maxshift);
3064 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3068 for (d = 0; d < dd->ndim; d++)
3071 if (dim < ddbox->nboundeddim &&
3072 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
3073 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
3075 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
3076 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3077 dd->nc[dim], dd->comm->cellsize_limit);
3082 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
3083 gmx_bool bMaster, ivec npulse)
3085 gmx_domdec_comm_t *comm;
3088 real *cell_x, cell_dx, cellsize;
3092 for (d = 0; d < DIM; d++)
3094 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
3096 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
3099 cell_dx = ddbox->box_size[d]/dd->nc[d];
3102 for (j = 0; j < dd->nc[d]+1; j++)
3104 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
3109 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
3110 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
3112 cellsize = cell_dx*ddbox->skew_fac[d];
3113 while (cellsize*npulse[d] < comm->cutoff && npulse[d] < dd->nc[d]-1)
3117 cellsize_min[d] = cellsize;
3121 /* Statically load balanced grid */
3122 /* Also when we are not doing a master distribution we determine
3123 * all cell borders in a loop to obtain identical values
3124 * to the master distribution case and to determine npulse.
3128 cell_x = dd->ma->cell_x[d];
3132 snew(cell_x, dd->nc[d]+1);
3134 cell_x[0] = ddbox->box0[d];
3135 for (j = 0; j < dd->nc[d]; j++)
3137 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
3138 cell_x[j+1] = cell_x[j] + cell_dx;
3139 cellsize = cell_dx*ddbox->skew_fac[d];
3140 while (cellsize*npulse[d] < comm->cutoff &&
3141 npulse[d] < dd->nc[d]-1)
3145 cellsize_min[d] = min(cellsize_min[d], cellsize);
3149 comm->cell_x0[d] = cell_x[dd->ci[d]];
3150 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
3154 /* The following limitation is to avoid that a cell would receive
3155 * some of its own home charge groups back over the periodic boundary.
3156 * Double charge groups cause trouble with the global indices.
3158 if (d < ddbox->npbcdim &&
3159 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
3161 gmx_fatal_collective(FARGS, NULL, dd,
3162 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3163 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
3165 dd->nc[d], dd->nc[d],
3166 dd->nnodes > dd->nc[d] ? "cells" : "processors");
3170 if (!comm->bDynLoadBal)
3172 copy_rvec(cellsize_min, comm->cellsize_min);
3175 for (d = 0; d < comm->npmedecompdim; d++)
3177 set_pme_maxshift(dd, &comm->ddpme[d],
3178 comm->slb_frac[dd->dim[d]] == NULL, ddbox,
3179 comm->ddpme[d].slb_dim_f);
3184 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
3185 int d, int dim, gmx_domdec_root_t *root,
3187 gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
3189 gmx_domdec_comm_t *comm;
3190 int ncd, i, j, nmin, nmin_old;
3191 gmx_bool bLimLo, bLimHi;
3193 real fac, halfway, cellsize_limit_f_i, region_size;
3194 gmx_bool bPBC, bLastHi = FALSE;
3195 int nrange[] = {range[0], range[1]};
3197 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
3203 bPBC = (dim < ddbox->npbcdim);
3205 cell_size = root->buf_ncd;
3209 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
3212 /* First we need to check if the scaling does not make cells
3213 * smaller than the smallest allowed size.
3214 * We need to do this iteratively, since if a cell is too small,
3215 * it needs to be enlarged, which makes all the other cells smaller,
3216 * which could in turn make another cell smaller than allowed.
3218 for (i = range[0]; i < range[1]; i++)
3220 root->bCellMin[i] = FALSE;
3226 /* We need the total for normalization */
3228 for (i = range[0]; i < range[1]; i++)
3230 if (root->bCellMin[i] == FALSE)
3232 fac += cell_size[i];
3235 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3236 /* Determine the cell boundaries */
3237 for (i = range[0]; i < range[1]; i++)
3239 if (root->bCellMin[i] == FALSE)
3241 cell_size[i] *= fac;
3242 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3244 cellsize_limit_f_i = 0;
3248 cellsize_limit_f_i = cellsize_limit_f;
3250 if (cell_size[i] < cellsize_limit_f_i)
3252 root->bCellMin[i] = TRUE;
3253 cell_size[i] = cellsize_limit_f_i;
3257 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3260 while (nmin > nmin_old);
3263 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3264 /* For this check we should not use DD_CELL_MARGIN,
3265 * but a slightly smaller factor,
3266 * since rounding could get use below the limit.
3268 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3271 gmx_fatal(FARGS, "Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3272 gmx_step_str(step, buf),
3273 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3274 ncd, comm->cellsize_min[dim]);
3277 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
3281 /* Check if the boundary did not displace more than halfway
3282 * each of the cells it bounds, as this could cause problems,
3283 * especially when the differences between cell sizes are large.
3284 * If changes are applied, they will not make cells smaller
3285 * than the cut-off, as we check all the boundaries which
3286 * might be affected by a change and if the old state was ok,
3287 * the cells will at most be shrunk back to their old size.
3289 for (i = range[0]+1; i < range[1]; i++)
3291 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3292 if (root->cell_f[i] < halfway)
3294 root->cell_f[i] = halfway;
3295 /* Check if the change also causes shifts of the next boundaries */
3296 for (j = i+1; j < range[1]; j++)
3298 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3300 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3304 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3305 if (root->cell_f[i] > halfway)
3307 root->cell_f[i] = halfway;
3308 /* Check if the change also causes shifts of the next boundaries */
3309 for (j = i-1; j >= range[0]+1; j--)
3311 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3313 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3320 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3321 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3322 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3323 * for a and b nrange is used */
3326 /* Take care of the staggering of the cell boundaries */
3329 for (i = range[0]; i < range[1]; i++)
3331 root->cell_f_max0[i] = root->cell_f[i];
3332 root->cell_f_min1[i] = root->cell_f[i+1];
3337 for (i = range[0]+1; i < range[1]; i++)
3339 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3340 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3341 if (bLimLo && bLimHi)
3343 /* Both limits violated, try the best we can */
3344 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3345 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3346 nrange[0] = range[0];
3348 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3351 nrange[1] = range[1];
3352 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3358 /* root->cell_f[i] = root->bound_min[i]; */
3359 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3362 else if (bLimHi && !bLastHi)
3365 if (nrange[1] < range[1]) /* found a LimLo before */
3367 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3368 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3369 nrange[0] = nrange[1];
3371 root->cell_f[i] = root->bound_max[i];
3373 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3375 nrange[1] = range[1];
3378 if (nrange[1] < range[1]) /* found last a LimLo */
3380 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3381 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3382 nrange[0] = nrange[1];
3383 nrange[1] = range[1];
3384 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3386 else if (nrange[0] > range[0]) /* found at least one LimHi */
3388 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3395 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3396 int d, int dim, gmx_domdec_root_t *root,
3397 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3398 gmx_bool bUniform, gmx_int64_t step)
3400 gmx_domdec_comm_t *comm;
3401 int ncd, d1, i, j, pos;
3403 real load_aver, load_i, imbalance, change, change_max, sc;
3404 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3408 int range[] = { 0, 0 };
3412 /* Convert the maximum change from the input percentage to a fraction */
3413 change_limit = comm->dlb_scale_lim*0.01;
3417 bPBC = (dim < ddbox->npbcdim);
3419 cell_size = root->buf_ncd;
3421 /* Store the original boundaries */
3422 for (i = 0; i < ncd+1; i++)
3424 root->old_cell_f[i] = root->cell_f[i];
3428 for (i = 0; i < ncd; i++)
3430 cell_size[i] = 1.0/ncd;
3433 else if (dd_load_count(comm))
3435 load_aver = comm->load[d].sum_m/ncd;
3437 for (i = 0; i < ncd; i++)
3439 /* Determine the relative imbalance of cell i */
3440 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3441 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3442 /* Determine the change of the cell size using underrelaxation */
3443 change = -relax*imbalance;
3444 change_max = max(change_max, max(change, -change));
3446 /* Limit the amount of scaling.
3447 * We need to use the same rescaling for all cells in one row,
3448 * otherwise the load balancing might not converge.
3451 if (change_max > change_limit)
3453 sc *= change_limit/change_max;
3455 for (i = 0; i < ncd; i++)
3457 /* Determine the relative imbalance of cell i */
3458 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3459 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3460 /* Determine the change of the cell size using underrelaxation */
3461 change = -sc*imbalance;
3462 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3466 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3467 cellsize_limit_f *= DD_CELL_MARGIN;
3468 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3469 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3470 if (ddbox->tric_dir[dim])
3472 cellsize_limit_f /= ddbox->skew_fac[dim];
3473 dist_min_f /= ddbox->skew_fac[dim];
3475 if (bDynamicBox && d > 0)
3477 dist_min_f *= DD_PRES_SCALE_MARGIN;
3479 if (d > 0 && !bUniform)
3481 /* Make sure that the grid is not shifted too much */
3482 for (i = 1; i < ncd; i++)
3484 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3486 gmx_incons("Inconsistent DD boundary staggering limits!");
3488 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3489 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3492 root->bound_min[i] += 0.5*space;
3494 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3495 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3498 root->bound_max[i] += 0.5*space;
3503 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3505 root->cell_f_max0[i-1] + dist_min_f,
3506 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3507 root->cell_f_min1[i] - dist_min_f);
3512 root->cell_f[0] = 0;
3513 root->cell_f[ncd] = 1;
3514 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3517 /* After the checks above, the cells should obey the cut-off
3518 * restrictions, but it does not hurt to check.
3520 for (i = 0; i < ncd; i++)
3524 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3525 dim, i, root->cell_f[i], root->cell_f[i+1]);
3528 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3529 root->cell_f[i+1] - root->cell_f[i] <
3530 cellsize_limit_f/DD_CELL_MARGIN)
3534 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3535 gmx_step_str(step, buf), dim2char(dim), i,
3536 (root->cell_f[i+1] - root->cell_f[i])
3537 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3542 /* Store the cell boundaries of the lower dimensions at the end */
3543 for (d1 = 0; d1 < d; d1++)
3545 root->cell_f[pos++] = comm->cell_f0[d1];
3546 root->cell_f[pos++] = comm->cell_f1[d1];
3549 if (d < comm->npmedecompdim)
3551 /* The master determines the maximum shift for
3552 * the coordinate communication between separate PME nodes.
3554 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3556 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3559 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3563 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3564 gmx_ddbox_t *ddbox, int dimind)
3566 gmx_domdec_comm_t *comm;
3571 /* Set the cell dimensions */
3572 dim = dd->dim[dimind];
3573 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3574 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3575 if (dim >= ddbox->nboundeddim)
3577 comm->cell_x0[dim] += ddbox->box0[dim];
3578 comm->cell_x1[dim] += ddbox->box0[dim];
3582 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3583 int d, int dim, real *cell_f_row,
3586 gmx_domdec_comm_t *comm;
3592 /* Each node would only need to know two fractions,
3593 * but it is probably cheaper to broadcast the whole array.
3595 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3596 0, comm->mpi_comm_load[d]);
3598 /* Copy the fractions for this dimension from the buffer */
3599 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3600 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3601 /* The whole array was communicated, so set the buffer position */
3602 pos = dd->nc[dim] + 1;
3603 for (d1 = 0; d1 <= d; d1++)
3607 /* Copy the cell fractions of the lower dimensions */
3608 comm->cell_f0[d1] = cell_f_row[pos++];
3609 comm->cell_f1[d1] = cell_f_row[pos++];
3611 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3613 /* Convert the communicated shift from float to int */
3614 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3617 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3621 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3622 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3623 gmx_bool bUniform, gmx_int64_t step)
3625 gmx_domdec_comm_t *comm;
3627 gmx_bool bRowMember, bRowRoot;
3632 for (d = 0; d < dd->ndim; d++)
3637 for (d1 = d; d1 < dd->ndim; d1++)
3639 if (dd->ci[dd->dim[d1]] > 0)
3652 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3653 ddbox, bDynamicBox, bUniform, step);
3654 cell_f_row = comm->root[d]->cell_f;
3658 cell_f_row = comm->cell_f_row;
3660 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3665 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3669 /* This function assumes the box is static and should therefore
3670 * not be called when the box has changed since the last
3671 * call to dd_partition_system.
3673 for (d = 0; d < dd->ndim; d++)
3675 relative_to_absolute_cell_bounds(dd, ddbox, d);
3681 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3682 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3683 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3684 gmx_wallcycle_t wcycle)
3686 gmx_domdec_comm_t *comm;
3693 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3694 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3695 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3697 else if (bDynamicBox)
3699 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3702 /* Set the dimensions for which no DD is used */
3703 for (dim = 0; dim < DIM; dim++)
3705 if (dd->nc[dim] == 1)
3707 comm->cell_x0[dim] = 0;
3708 comm->cell_x1[dim] = ddbox->box_size[dim];
3709 if (dim >= ddbox->nboundeddim)
3711 comm->cell_x0[dim] += ddbox->box0[dim];
3712 comm->cell_x1[dim] += ddbox->box0[dim];
3718 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3721 gmx_domdec_comm_dim_t *cd;
3723 for (d = 0; d < dd->ndim; d++)
3725 cd = &dd->comm->cd[d];
3726 np = npulse[dd->dim[d]];
3727 if (np > cd->np_nalloc)
3731 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3732 dim2char(dd->dim[d]), np);
3734 if (DDMASTER(dd) && cd->np_nalloc > 0)
3736 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3738 srenew(cd->ind, np);
3739 for (i = cd->np_nalloc; i < np; i++)
3741 cd->ind[i].index = NULL;
3742 cd->ind[i].nalloc = 0;
3751 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3752 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3753 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3754 gmx_wallcycle_t wcycle)
3756 gmx_domdec_comm_t *comm;
3762 /* Copy the old cell boundaries for the cg displacement check */
3763 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3764 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3766 if (comm->bDynLoadBal)
3770 check_box_size(dd, ddbox);
3772 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3776 set_dd_cell_sizes_slb(dd, ddbox, FALSE, npulse);
3777 realloc_comm_ind(dd, npulse);
3782 for (d = 0; d < DIM; d++)
3784 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3785 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3790 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3792 rvec cell_ns_x0, rvec cell_ns_x1,
3795 gmx_domdec_comm_t *comm;
3800 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3802 dim = dd->dim[dim_ind];
3804 /* Without PBC we don't have restrictions on the outer cells */
3805 if (!(dim >= ddbox->npbcdim &&
3806 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3807 comm->bDynLoadBal &&
3808 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3809 comm->cellsize_min[dim])
3812 gmx_fatal(FARGS, "Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3813 gmx_step_str(step, buf), dim2char(dim),
3814 comm->cell_x1[dim] - comm->cell_x0[dim],
3815 ddbox->skew_fac[dim],
3816 dd->comm->cellsize_min[dim],
3817 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3821 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3823 /* Communicate the boundaries and update cell_ns_x0/1 */
3824 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3825 if (dd->bGridJump && dd->ndim > 1)
3827 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3832 static void make_tric_corr_matrix(int npbcdim, matrix box, matrix tcm)
3836 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3844 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3845 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3854 static void check_screw_box(matrix box)
3856 /* Mathematical limitation */
3857 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3859 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3862 /* Limitation due to the asymmetry of the eighth shell method */
3863 if (box[ZZ][YY] != 0)
3865 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3869 static void distribute_cg(FILE *fplog, gmx_int64_t step,
3870 matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3873 gmx_domdec_master_t *ma;
3874 int **tmp_ind = NULL, *tmp_nalloc = NULL;
3875 int i, icg, j, k, k0, k1, d, npbcdim;
3877 rvec box_size, cg_cm;
3879 real nrcg, inv_ncg, pos_d;
3881 gmx_bool bUnbounded, bScrew;
3885 if (tmp_ind == NULL)
3887 snew(tmp_nalloc, dd->nnodes);
3888 snew(tmp_ind, dd->nnodes);
3889 for (i = 0; i < dd->nnodes; i++)
3891 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3892 snew(tmp_ind[i], tmp_nalloc[i]);
3896 /* Clear the count */
3897 for (i = 0; i < dd->nnodes; i++)
3903 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3905 cgindex = cgs->index;
3907 /* Compute the center of geometry for all charge groups */
3908 for (icg = 0; icg < cgs->nr; icg++)
3911 k1 = cgindex[icg+1];
3915 copy_rvec(pos[k0], cg_cm);
3922 for (k = k0; (k < k1); k++)
3924 rvec_inc(cg_cm, pos[k]);
3926 for (d = 0; (d < DIM); d++)
3928 cg_cm[d] *= inv_ncg;
3931 /* Put the charge group in the box and determine the cell index */
3932 for (d = DIM-1; d >= 0; d--)
3935 if (d < dd->npbcdim)
3937 bScrew = (dd->bScrewPBC && d == XX);
3938 if (tric_dir[d] && dd->nc[d] > 1)
3940 /* Use triclinic coordintates for this dimension */
3941 for (j = d+1; j < DIM; j++)
3943 pos_d += cg_cm[j]*tcm[j][d];
3946 while (pos_d >= box[d][d])
3949 rvec_dec(cg_cm, box[d]);
3952 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3953 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3955 for (k = k0; (k < k1); k++)
3957 rvec_dec(pos[k], box[d]);
3960 pos[k][YY] = box[YY][YY] - pos[k][YY];
3961 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3968 rvec_inc(cg_cm, box[d]);
3971 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3972 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3974 for (k = k0; (k < k1); k++)
3976 rvec_inc(pos[k], box[d]);
3979 pos[k][YY] = box[YY][YY] - pos[k][YY];
3980 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3985 /* This could be done more efficiently */
3987 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
3992 i = dd_index(dd->nc, ind);
3993 if (ma->ncg[i] == tmp_nalloc[i])
3995 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
3996 srenew(tmp_ind[i], tmp_nalloc[i]);
3998 tmp_ind[i][ma->ncg[i]] = icg;
4000 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
4004 for (i = 0; i < dd->nnodes; i++)
4007 for (k = 0; k < ma->ncg[i]; k++)
4009 ma->cg[k1++] = tmp_ind[i][k];
4012 ma->index[dd->nnodes] = k1;
4014 for (i = 0; i < dd->nnodes; i++)
4024 fprintf(fplog, "Charge group distribution at step %s:",
4025 gmx_step_str(step, buf));
4026 for (i = 0; i < dd->nnodes; i++)
4028 fprintf(fplog, " %d", ma->ncg[i]);
4030 fprintf(fplog, "\n");
4034 static void get_cg_distribution(FILE *fplog, gmx_int64_t step, gmx_domdec_t *dd,
4035 t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
4038 gmx_domdec_master_t *ma = NULL;
4041 int *ibuf, buf2[2] = { 0, 0 };
4042 gmx_bool bMaster = DDMASTER(dd);
4049 check_screw_box(box);
4052 set_dd_cell_sizes_slb(dd, ddbox, TRUE, npulse);
4054 distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
4055 for (i = 0; i < dd->nnodes; i++)
4057 ma->ibuf[2*i] = ma->ncg[i];
4058 ma->ibuf[2*i+1] = ma->nat[i];
4066 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
4068 dd->ncg_home = buf2[0];
4069 dd->nat_home = buf2[1];
4070 dd->ncg_tot = dd->ncg_home;
4071 dd->nat_tot = dd->nat_home;
4072 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
4074 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
4075 srenew(dd->index_gl, dd->cg_nalloc);
4076 srenew(dd->cgindex, dd->cg_nalloc+1);
4080 for (i = 0; i < dd->nnodes; i++)
4082 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
4083 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
4088 DDMASTER(dd) ? ma->ibuf : NULL,
4089 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
4090 DDMASTER(dd) ? ma->cg : NULL,
4091 dd->ncg_home*sizeof(int), dd->index_gl);
4093 /* Determine the home charge group sizes */
4095 for (i = 0; i < dd->ncg_home; i++)
4097 cg_gl = dd->index_gl[i];
4099 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
4104 fprintf(debug, "Home charge groups:\n");
4105 for (i = 0; i < dd->ncg_home; i++)
4107 fprintf(debug, " %d", dd->index_gl[i]);
4110 fprintf(debug, "\n");
4113 fprintf(debug, "\n");
4117 static int compact_and_copy_vec_at(int ncg, int *move,
4120 rvec *src, gmx_domdec_comm_t *comm,
4123 int m, icg, i, i0, i1, nrcg;
4129 for (m = 0; m < DIM*2; m++)
4135 for (icg = 0; icg < ncg; icg++)
4137 i1 = cgindex[icg+1];
4143 /* Compact the home array in place */
4144 for (i = i0; i < i1; i++)
4146 copy_rvec(src[i], src[home_pos++]);
4152 /* Copy to the communication buffer */
4154 pos_vec[m] += 1 + vec*nrcg;
4155 for (i = i0; i < i1; i++)
4157 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
4159 pos_vec[m] += (nvec - vec - 1)*nrcg;
4163 home_pos += i1 - i0;
4171 static int compact_and_copy_vec_cg(int ncg, int *move,
4173 int nvec, rvec *src, gmx_domdec_comm_t *comm,
4176 int m, icg, i0, i1, nrcg;
4182 for (m = 0; m < DIM*2; m++)
4188 for (icg = 0; icg < ncg; icg++)
4190 i1 = cgindex[icg+1];
4196 /* Compact the home array in place */
4197 copy_rvec(src[icg], src[home_pos++]);
4203 /* Copy to the communication buffer */
4204 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
4205 pos_vec[m] += 1 + nrcg*nvec;
4217 static int compact_ind(int ncg, int *move,
4218 int *index_gl, int *cgindex,
4220 gmx_ga2la_t ga2la, char *bLocalCG,
4223 int cg, nat, a0, a1, a, a_gl;
4228 for (cg = 0; cg < ncg; cg++)
4234 /* Compact the home arrays in place.
4235 * Anything that can be done here avoids access to global arrays.
4237 cgindex[home_pos] = nat;
4238 for (a = a0; a < a1; a++)
4241 gatindex[nat] = a_gl;
4242 /* The cell number stays 0, so we don't need to set it */
4243 ga2la_change_la(ga2la, a_gl, nat);
4246 index_gl[home_pos] = index_gl[cg];
4247 cginfo[home_pos] = cginfo[cg];
4248 /* The charge group remains local, so bLocalCG does not change */
4253 /* Clear the global indices */
4254 for (a = a0; a < a1; a++)
4256 ga2la_del(ga2la, gatindex[a]);
4260 bLocalCG[index_gl[cg]] = FALSE;
4264 cgindex[home_pos] = nat;
4269 static void clear_and_mark_ind(int ncg, int *move,
4270 int *index_gl, int *cgindex, int *gatindex,
4271 gmx_ga2la_t ga2la, char *bLocalCG,
4276 for (cg = 0; cg < ncg; cg++)
4282 /* Clear the global indices */
4283 for (a = a0; a < a1; a++)
4285 ga2la_del(ga2la, gatindex[a]);
4289 bLocalCG[index_gl[cg]] = FALSE;
4291 /* Signal that this cg has moved using the ns cell index.
4292 * Here we set it to -1. fill_grid will change it
4293 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4295 cell_index[cg] = -1;
4300 static void print_cg_move(FILE *fplog,
4302 gmx_int64_t step, int cg, int dim, int dir,
4303 gmx_bool bHaveLimitdAndCMOld, real limitd,
4304 rvec cm_old, rvec cm_new, real pos_d)
4306 gmx_domdec_comm_t *comm;
4311 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4312 if (bHaveLimitdAndCMOld)
4314 fprintf(fplog, "The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4315 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4319 fprintf(fplog, "The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4320 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4322 fprintf(fplog, "distance out of cell %f\n",
4323 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4324 if (bHaveLimitdAndCMOld)
4326 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4327 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4329 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4330 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4331 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4333 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4334 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4336 comm->cell_x0[dim], comm->cell_x1[dim]);
4339 static void cg_move_error(FILE *fplog,
4341 gmx_int64_t step, int cg, int dim, int dir,
4342 gmx_bool bHaveLimitdAndCMOld, real limitd,
4343 rvec cm_old, rvec cm_new, real pos_d)
4347 print_cg_move(fplog, dd, step, cg, dim, dir,
4348 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4350 print_cg_move(stderr, dd, step, cg, dim, dir,
4351 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4353 "A charge group moved too far between two domain decomposition steps\n"
4354 "This usually means that your system is not well equilibrated");
4357 static void rotate_state_atom(t_state *state, int a)
4361 for (est = 0; est < estNR; est++)
4363 if (EST_DISTR(est) && (state->flags & (1<<est)))
4368 /* Rotate the complete state; for a rectangular box only */
4369 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4370 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4373 state->v[a][YY] = -state->v[a][YY];
4374 state->v[a][ZZ] = -state->v[a][ZZ];
4377 state->sd_X[a][YY] = -state->sd_X[a][YY];
4378 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4381 state->cg_p[a][YY] = -state->cg_p[a][YY];
4382 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4384 case estDISRE_INITF:
4385 case estDISRE_RM3TAV:
4386 case estORIRE_INITF:
4388 /* These are distances, so not affected by rotation */
4391 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4397 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4399 if (natoms > comm->moved_nalloc)
4401 /* Contents should be preserved here */
4402 comm->moved_nalloc = over_alloc_dd(natoms);
4403 srenew(comm->moved, comm->moved_nalloc);
4409 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
4412 ivec tric_dir, matrix tcm,
4413 rvec cell_x0, rvec cell_x1,
4414 rvec limitd, rvec limit0, rvec limit1,
4416 int cg_start, int cg_end,
4421 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4422 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4426 real inv_ncg, pos_d;
4429 npbcdim = dd->npbcdim;
4431 for (cg = cg_start; cg < cg_end; cg++)
4438 copy_rvec(state->x[k0], cm_new);
4445 for (k = k0; (k < k1); k++)
4447 rvec_inc(cm_new, state->x[k]);
4449 for (d = 0; (d < DIM); d++)
4451 cm_new[d] = inv_ncg*cm_new[d];
4456 /* Do pbc and check DD cell boundary crossings */
4457 for (d = DIM-1; d >= 0; d--)
4461 bScrew = (dd->bScrewPBC && d == XX);
4462 /* Determine the location of this cg in lattice coordinates */
4466 for (d2 = d+1; d2 < DIM; d2++)
4468 pos_d += cm_new[d2]*tcm[d2][d];
4471 /* Put the charge group in the triclinic unit-cell */
4472 if (pos_d >= cell_x1[d])
4474 if (pos_d >= limit1[d])
4476 cg_move_error(fplog, dd, step, cg, d, 1, TRUE, limitd[d],
4477 cg_cm[cg], cm_new, pos_d);
4480 if (dd->ci[d] == dd->nc[d] - 1)
4482 rvec_dec(cm_new, state->box[d]);
4485 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4486 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4488 for (k = k0; (k < k1); k++)
4490 rvec_dec(state->x[k], state->box[d]);
4493 rotate_state_atom(state, k);
4498 else if (pos_d < cell_x0[d])
4500 if (pos_d < limit0[d])
4502 cg_move_error(fplog, dd, step, cg, d, -1, TRUE, limitd[d],
4503 cg_cm[cg], cm_new, pos_d);
4508 rvec_inc(cm_new, state->box[d]);
4511 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4512 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4514 for (k = k0; (k < k1); k++)
4516 rvec_inc(state->x[k], state->box[d]);
4519 rotate_state_atom(state, k);
4525 else if (d < npbcdim)
4527 /* Put the charge group in the rectangular unit-cell */
4528 while (cm_new[d] >= state->box[d][d])
4530 rvec_dec(cm_new, state->box[d]);
4531 for (k = k0; (k < k1); k++)
4533 rvec_dec(state->x[k], state->box[d]);
4536 while (cm_new[d] < 0)
4538 rvec_inc(cm_new, state->box[d]);
4539 for (k = k0; (k < k1); k++)
4541 rvec_inc(state->x[k], state->box[d]);
4547 copy_rvec(cm_new, cg_cm[cg]);
4549 /* Determine where this cg should go */
4552 for (d = 0; d < dd->ndim; d++)
4557 flag |= DD_FLAG_FW(d);
4563 else if (dev[dim] == -1)
4565 flag |= DD_FLAG_BW(d);
4568 if (dd->nc[dim] > 2)
4579 /* Temporarily store the flag in move */
4580 move[cg] = mc + flag;
4584 static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
4585 gmx_domdec_t *dd, ivec tric_dir,
4586 t_state *state, rvec **f,
4595 int ncg[DIM*2], nat[DIM*2];
4596 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4597 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4598 int sbuf[2], rbuf[2];
4599 int home_pos_cg, home_pos_at, buf_pos;
4601 gmx_bool bV = FALSE, bSDX = FALSE, bCGP = FALSE;
4604 real inv_ncg, pos_d;
4606 rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
4608 cginfo_mb_t *cginfo_mb;
4609 gmx_domdec_comm_t *comm;
4611 int nthread, thread;
4615 check_screw_box(state->box);
4619 if (fr->cutoff_scheme == ecutsGROUP)
4624 for (i = 0; i < estNR; i++)
4630 case estX: /* Always present */ break;
4631 case estV: bV = (state->flags & (1<<i)); break;
4632 case estSDX: bSDX = (state->flags & (1<<i)); break;
4633 case estCGP: bCGP = (state->flags & (1<<i)); break;
4636 case estDISRE_INITF:
4637 case estDISRE_RM3TAV:
4638 case estORIRE_INITF:
4640 /* No processing required */
4643 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4648 if (dd->ncg_tot > comm->nalloc_int)
4650 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4651 srenew(comm->buf_int, comm->nalloc_int);
4653 move = comm->buf_int;
4655 /* Clear the count */
4656 for (c = 0; c < dd->ndim*2; c++)
4662 npbcdim = dd->npbcdim;
4664 for (d = 0; (d < DIM); d++)
4666 limitd[d] = dd->comm->cellsize_min[d];
4667 if (d >= npbcdim && dd->ci[d] == 0)
4669 cell_x0[d] = -GMX_FLOAT_MAX;
4673 cell_x0[d] = comm->cell_x0[d];
4675 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4677 cell_x1[d] = GMX_FLOAT_MAX;
4681 cell_x1[d] = comm->cell_x1[d];
4685 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4686 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4690 /* We check after communication if a charge group moved
4691 * more than one cell. Set the pre-comm check limit to float_max.
4693 limit0[d] = -GMX_FLOAT_MAX;
4694 limit1[d] = GMX_FLOAT_MAX;
4698 make_tric_corr_matrix(npbcdim, state->box, tcm);
4700 cgindex = dd->cgindex;
4702 nthread = gmx_omp_nthreads_get(emntDomdec);
4704 /* Compute the center of geometry for all home charge groups
4705 * and put them in the box and determine where they should go.
4707 #pragma omp parallel for num_threads(nthread) schedule(static)
4708 for (thread = 0; thread < nthread; thread++)
4710 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4711 cell_x0, cell_x1, limitd, limit0, limit1,
4713 ( thread *dd->ncg_home)/nthread,
4714 ((thread+1)*dd->ncg_home)/nthread,
4715 fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
4719 for (cg = 0; cg < dd->ncg_home; cg++)
4724 flag = mc & ~DD_FLAG_NRCG;
4725 mc = mc & DD_FLAG_NRCG;
4728 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4730 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4731 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4733 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4734 /* We store the cg size in the lower 16 bits
4735 * and the place where the charge group should go
4736 * in the next 6 bits. This saves some communication volume.
4738 nrcg = cgindex[cg+1] - cgindex[cg];
4739 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4745 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4746 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4749 for (i = 0; i < dd->ndim*2; i++)
4751 *ncg_moved += ncg[i];
4768 /* Make sure the communication buffers are large enough */
4769 for (mc = 0; mc < dd->ndim*2; mc++)
4771 nvr = ncg[mc] + nat[mc]*nvec;
4772 if (nvr > comm->cgcm_state_nalloc[mc])
4774 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4775 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4779 switch (fr->cutoff_scheme)
4782 /* Recalculating cg_cm might be cheaper than communicating,
4783 * but that could give rise to rounding issues.
4786 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4787 nvec, cg_cm, comm, bCompact);
4790 /* Without charge groups we send the moved atom coordinates
4791 * over twice. This is so the code below can be used without
4792 * many conditionals for both for with and without charge groups.
4795 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4796 nvec, state->x, comm, FALSE);
4799 home_pos_cg -= *ncg_moved;
4803 gmx_incons("unimplemented");
4809 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4810 nvec, vec++, state->x, comm, bCompact);
4813 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4814 nvec, vec++, state->v, comm, bCompact);
4818 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4819 nvec, vec++, state->sd_X, comm, bCompact);
4823 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4824 nvec, vec++, state->cg_p, comm, bCompact);
4829 compact_ind(dd->ncg_home, move,
4830 dd->index_gl, dd->cgindex, dd->gatindex,
4831 dd->ga2la, comm->bLocalCG,
4836 if (fr->cutoff_scheme == ecutsVERLET)
4838 moved = get_moved(comm, dd->ncg_home);
4840 for (k = 0; k < dd->ncg_home; k++)
4847 moved = fr->ns.grid->cell_index;
4850 clear_and_mark_ind(dd->ncg_home, move,
4851 dd->index_gl, dd->cgindex, dd->gatindex,
4852 dd->ga2la, comm->bLocalCG,
4856 cginfo_mb = fr->cginfo_mb;
4858 *ncg_stay_home = home_pos_cg;
4859 for (d = 0; d < dd->ndim; d++)
4865 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4868 /* Communicate the cg and atom counts */
4873 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4874 d, dir, sbuf[0], sbuf[1]);
4876 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4878 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4880 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4881 srenew(comm->buf_int, comm->nalloc_int);
4884 /* Communicate the charge group indices, sizes and flags */
4885 dd_sendrecv_int(dd, d, dir,
4886 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4887 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4889 nvs = ncg[cdd] + nat[cdd]*nvec;
4890 i = rbuf[0] + rbuf[1] *nvec;
4891 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4893 /* Communicate cgcm and state */
4894 dd_sendrecv_rvec(dd, d, dir,
4895 comm->cgcm_state[cdd], nvs,
4896 comm->vbuf.v+nvr, i);
4897 ncg_recv += rbuf[0];
4898 nat_recv += rbuf[1];
4902 /* Process the received charge groups */
4904 for (cg = 0; cg < ncg_recv; cg++)
4906 flag = comm->buf_int[cg*DD_CGIBS+1];
4908 if (dim >= npbcdim && dd->nc[dim] > 2)
4910 /* No pbc in this dim and more than one domain boundary.
4911 * We do a separate check if a charge group didn't move too far.
4913 if (((flag & DD_FLAG_FW(d)) &&
4914 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4915 ((flag & DD_FLAG_BW(d)) &&
4916 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4918 cg_move_error(fplog, dd, step, cg, dim,
4919 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4921 comm->vbuf.v[buf_pos],
4922 comm->vbuf.v[buf_pos],
4923 comm->vbuf.v[buf_pos][dim]);
4930 /* Check which direction this cg should go */
4931 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4935 /* The cell boundaries for dimension d2 are not equal
4936 * for each cell row of the lower dimension(s),
4937 * therefore we might need to redetermine where
4938 * this cg should go.
4941 /* If this cg crosses the box boundary in dimension d2
4942 * we can use the communicated flag, so we do not
4943 * have to worry about pbc.
4945 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4946 (flag & DD_FLAG_FW(d2))) ||
4947 (dd->ci[dim2] == 0 &&
4948 (flag & DD_FLAG_BW(d2)))))
4950 /* Clear the two flags for this dimension */
4951 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4952 /* Determine the location of this cg
4953 * in lattice coordinates
4955 pos_d = comm->vbuf.v[buf_pos][dim2];
4958 for (d3 = dim2+1; d3 < DIM; d3++)
4961 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
4964 /* Check of we are not at the box edge.
4965 * pbc is only handled in the first step above,
4966 * but this check could move over pbc while
4967 * the first step did not due to different rounding.
4969 if (pos_d >= cell_x1[dim2] &&
4970 dd->ci[dim2] != dd->nc[dim2]-1)
4972 flag |= DD_FLAG_FW(d2);
4974 else if (pos_d < cell_x0[dim2] &&
4977 flag |= DD_FLAG_BW(d2);
4979 comm->buf_int[cg*DD_CGIBS+1] = flag;
4982 /* Set to which neighboring cell this cg should go */
4983 if (flag & DD_FLAG_FW(d2))
4987 else if (flag & DD_FLAG_BW(d2))
4989 if (dd->nc[dd->dim[d2]] > 2)
5001 nrcg = flag & DD_FLAG_NRCG;
5004 if (home_pos_cg+1 > dd->cg_nalloc)
5006 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
5007 srenew(dd->index_gl, dd->cg_nalloc);
5008 srenew(dd->cgindex, dd->cg_nalloc+1);
5010 /* Set the global charge group index and size */
5011 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
5012 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
5013 /* Copy the state from the buffer */
5014 dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
5015 if (fr->cutoff_scheme == ecutsGROUP)
5018 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
5022 /* Set the cginfo */
5023 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
5024 dd->index_gl[home_pos_cg]);
5027 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
5030 if (home_pos_at+nrcg > state->nalloc)
5032 dd_realloc_state(state, f, home_pos_at+nrcg);
5034 for (i = 0; i < nrcg; i++)
5036 copy_rvec(comm->vbuf.v[buf_pos++],
5037 state->x[home_pos_at+i]);
5041 for (i = 0; i < nrcg; i++)
5043 copy_rvec(comm->vbuf.v[buf_pos++],
5044 state->v[home_pos_at+i]);
5049 for (i = 0; i < nrcg; i++)
5051 copy_rvec(comm->vbuf.v[buf_pos++],
5052 state->sd_X[home_pos_at+i]);
5057 for (i = 0; i < nrcg; i++)
5059 copy_rvec(comm->vbuf.v[buf_pos++],
5060 state->cg_p[home_pos_at+i]);
5064 home_pos_at += nrcg;
5068 /* Reallocate the buffers if necessary */
5069 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
5071 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
5072 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
5074 nvr = ncg[mc] + nat[mc]*nvec;
5075 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
5077 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
5078 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
5080 /* Copy from the receive to the send buffers */
5081 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
5082 comm->buf_int + cg*DD_CGIBS,
5083 DD_CGIBS*sizeof(int));
5084 memcpy(comm->cgcm_state[mc][nvr],
5085 comm->vbuf.v[buf_pos],
5086 (1+nrcg*nvec)*sizeof(rvec));
5087 buf_pos += 1 + nrcg*nvec;
5094 /* With sorting (!bCompact) the indices are now only partially up to date
5095 * and ncg_home and nat_home are not the real count, since there are
5096 * "holes" in the arrays for the charge groups that moved to neighbors.
5098 if (fr->cutoff_scheme == ecutsVERLET)
5100 moved = get_moved(comm, home_pos_cg);
5102 for (i = dd->ncg_home; i < home_pos_cg; i++)
5107 dd->ncg_home = home_pos_cg;
5108 dd->nat_home = home_pos_at;
5113 "Finished repartitioning: cgs moved out %d, new home %d\n",
5114 *ncg_moved, dd->ncg_home-*ncg_moved);
5119 void dd_cycles_add(gmx_domdec_t *dd, float cycles, int ddCycl)
5121 dd->comm->cycl[ddCycl] += cycles;
5122 dd->comm->cycl_n[ddCycl]++;
5123 if (cycles > dd->comm->cycl_max[ddCycl])
5125 dd->comm->cycl_max[ddCycl] = cycles;
5129 static double force_flop_count(t_nrnb *nrnb)
5136 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
5138 /* To get closer to the real timings, we half the count
5139 * for the normal loops and again half it for water loops.
5142 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5144 sum += nrnb->n[i]*0.25*cost_nrnb(i);
5148 sum += nrnb->n[i]*0.50*cost_nrnb(i);
5151 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
5154 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5156 sum += nrnb->n[i]*cost_nrnb(i);
5159 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
5161 sum += nrnb->n[i]*cost_nrnb(i);
5167 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
5169 if (dd->comm->eFlop)
5171 dd->comm->flop -= force_flop_count(nrnb);
5174 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
5176 if (dd->comm->eFlop)
5178 dd->comm->flop += force_flop_count(nrnb);
5183 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
5187 for (i = 0; i < ddCyclNr; i++)
5189 dd->comm->cycl[i] = 0;
5190 dd->comm->cycl_n[i] = 0;
5191 dd->comm->cycl_max[i] = 0;
5194 dd->comm->flop_n = 0;
5197 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
5199 gmx_domdec_comm_t *comm;
5200 gmx_domdec_load_t *load;
5201 gmx_domdec_root_t *root = NULL;
5202 int d, dim, cid, i, pos;
5203 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
5208 fprintf(debug, "get_load_distribution start\n");
5211 wallcycle_start(wcycle, ewcDDCOMMLOAD);
5215 bSepPME = (dd->pme_nodeid >= 0);
5217 for (d = dd->ndim-1; d >= 0; d--)
5220 /* Check if we participate in the communication in this dimension */
5221 if (d == dd->ndim-1 ||
5222 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
5224 load = &comm->load[d];
5227 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
5230 if (d == dd->ndim-1)
5232 sbuf[pos++] = dd_force_load(comm);
5233 sbuf[pos++] = sbuf[0];
5236 sbuf[pos++] = sbuf[0];
5237 sbuf[pos++] = cell_frac;
5240 sbuf[pos++] = comm->cell_f_max0[d];
5241 sbuf[pos++] = comm->cell_f_min1[d];
5246 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
5247 sbuf[pos++] = comm->cycl[ddCyclPME];
5252 sbuf[pos++] = comm->load[d+1].sum;
5253 sbuf[pos++] = comm->load[d+1].max;
5256 sbuf[pos++] = comm->load[d+1].sum_m;
5257 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
5258 sbuf[pos++] = comm->load[d+1].flags;
5261 sbuf[pos++] = comm->cell_f_max0[d];
5262 sbuf[pos++] = comm->cell_f_min1[d];
5267 sbuf[pos++] = comm->load[d+1].mdf;
5268 sbuf[pos++] = comm->load[d+1].pme;
5272 /* Communicate a row in DD direction d.
5273 * The communicators are setup such that the root always has rank 0.
5276 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
5277 load->load, load->nload*sizeof(float), MPI_BYTE,
5278 0, comm->mpi_comm_load[d]);
5280 if (dd->ci[dim] == dd->master_ci[dim])
5282 /* We are the root, process this row */
5283 if (comm->bDynLoadBal)
5285 root = comm->root[d];
5295 for (i = 0; i < dd->nc[dim]; i++)
5297 load->sum += load->load[pos++];
5298 load->max = max(load->max, load->load[pos]);
5304 /* This direction could not be load balanced properly,
5305 * therefore we need to use the maximum iso the average load.
5307 load->sum_m = max(load->sum_m, load->load[pos]);
5311 load->sum_m += load->load[pos];
5314 load->cvol_min = min(load->cvol_min, load->load[pos]);
5318 load->flags = (int)(load->load[pos++] + 0.5);
5322 root->cell_f_max0[i] = load->load[pos++];
5323 root->cell_f_min1[i] = load->load[pos++];
5328 load->mdf = max(load->mdf, load->load[pos]);
5330 load->pme = max(load->pme, load->load[pos]);
5334 if (comm->bDynLoadBal && root->bLimited)
5336 load->sum_m *= dd->nc[dim];
5337 load->flags |= (1<<d);
5345 comm->nload += dd_load_count(comm);
5346 comm->load_step += comm->cycl[ddCyclStep];
5347 comm->load_sum += comm->load[0].sum;
5348 comm->load_max += comm->load[0].max;
5349 if (comm->bDynLoadBal)
5351 for (d = 0; d < dd->ndim; d++)
5353 if (comm->load[0].flags & (1<<d))
5355 comm->load_lim[d]++;
5361 comm->load_mdf += comm->load[0].mdf;
5362 comm->load_pme += comm->load[0].pme;
5366 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5370 fprintf(debug, "get_load_distribution finished\n");
5374 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5376 /* Return the relative performance loss on the total run time
5377 * due to the force calculation load imbalance.
5379 if (dd->comm->nload > 0)
5382 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5383 (dd->comm->load_step*dd->nnodes);
5391 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5394 int npp, npme, nnodes, d, limp;
5395 float imbal, pme_f_ratio, lossf, lossp = 0;
5397 gmx_domdec_comm_t *comm;
5400 if (DDMASTER(dd) && comm->nload > 0)
5403 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5404 nnodes = npp + npme;
5405 imbal = comm->load_max*npp/comm->load_sum - 1;
5406 lossf = dd_force_imb_perf_loss(dd);
5407 sprintf(buf, " Average load imbalance: %.1f %%\n", imbal*100);
5408 fprintf(fplog, "%s", buf);
5409 fprintf(stderr, "\n");
5410 fprintf(stderr, "%s", buf);
5411 sprintf(buf, " Part of the total run time spent waiting due to load imbalance: %.1f %%\n", lossf*100);
5412 fprintf(fplog, "%s", buf);
5413 fprintf(stderr, "%s", buf);
5415 if (comm->bDynLoadBal)
5417 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5418 for (d = 0; d < dd->ndim; d++)
5420 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5421 sprintf(buf+strlen(buf), " %c %d %%", dim2char(dd->dim[d]), limp);
5427 sprintf(buf+strlen(buf), "\n");
5428 fprintf(fplog, "%s", buf);
5429 fprintf(stderr, "%s", buf);
5433 pme_f_ratio = comm->load_pme/comm->load_mdf;
5434 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5437 lossp *= (float)npme/(float)nnodes;
5441 lossp *= (float)npp/(float)nnodes;
5443 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pme_f_ratio);
5444 fprintf(fplog, "%s", buf);
5445 fprintf(stderr, "%s", buf);
5446 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossp)*100);
5447 fprintf(fplog, "%s", buf);
5448 fprintf(stderr, "%s", buf);
5450 fprintf(fplog, "\n");
5451 fprintf(stderr, "\n");
5453 if (lossf >= DD_PERF_LOSS)
5456 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5457 " in the domain decomposition.\n", lossf*100);
5458 if (!comm->bDynLoadBal)
5460 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5464 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5466 fprintf(fplog, "%s\n", buf);
5467 fprintf(stderr, "%s\n", buf);
5469 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS)
5472 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5473 " had %s work to do than the PP nodes.\n"
5474 " You might want to %s the number of PME nodes\n"
5475 " or %s the cut-off and the grid spacing.\n",
5477 (lossp < 0) ? "less" : "more",
5478 (lossp < 0) ? "decrease" : "increase",
5479 (lossp < 0) ? "decrease" : "increase");
5480 fprintf(fplog, "%s\n", buf);
5481 fprintf(stderr, "%s\n", buf);
5486 static float dd_vol_min(gmx_domdec_t *dd)
5488 return dd->comm->load[0].cvol_min*dd->nnodes;
5491 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5493 return dd->comm->load[0].flags;
5496 static float dd_f_imbal(gmx_domdec_t *dd)
5498 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5501 float dd_pme_f_ratio(gmx_domdec_t *dd)
5503 if (dd->comm->cycl_n[ddCyclPME] > 0)
5505 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5513 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
5518 flags = dd_load_flags(dd);
5522 "DD load balancing is limited by minimum cell size in dimension");
5523 for (d = 0; d < dd->ndim; d++)
5527 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5530 fprintf(fplog, "\n");
5532 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5533 if (dd->comm->bDynLoadBal)
5535 fprintf(fplog, " vol min/aver %5.3f%c",
5536 dd_vol_min(dd), flags ? '!' : ' ');
5538 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5539 if (dd->comm->cycl_n[ddCyclPME])
5541 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5543 fprintf(fplog, "\n\n");
5546 static void dd_print_load_verbose(gmx_domdec_t *dd)
5548 if (dd->comm->bDynLoadBal)
5550 fprintf(stderr, "vol %4.2f%c ",
5551 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5553 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5554 if (dd->comm->cycl_n[ddCyclPME])
5556 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5561 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5566 gmx_domdec_root_t *root;
5567 gmx_bool bPartOfGroup = FALSE;
5569 dim = dd->dim[dim_ind];
5570 copy_ivec(loc, loc_c);
5571 for (i = 0; i < dd->nc[dim]; i++)
5574 rank = dd_index(dd->nc, loc_c);
5575 if (rank == dd->rank)
5577 /* This process is part of the group */
5578 bPartOfGroup = TRUE;
5581 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5585 dd->comm->mpi_comm_load[dim_ind] = c_row;
5586 if (dd->comm->eDLB != edlbNO)
5588 if (dd->ci[dim] == dd->master_ci[dim])
5590 /* This is the root process of this row */
5591 snew(dd->comm->root[dim_ind], 1);
5592 root = dd->comm->root[dim_ind];
5593 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5594 snew(root->old_cell_f, dd->nc[dim]+1);
5595 snew(root->bCellMin, dd->nc[dim]);
5598 snew(root->cell_f_max0, dd->nc[dim]);
5599 snew(root->cell_f_min1, dd->nc[dim]);
5600 snew(root->bound_min, dd->nc[dim]);
5601 snew(root->bound_max, dd->nc[dim]);
5603 snew(root->buf_ncd, dd->nc[dim]);
5607 /* This is not a root process, we only need to receive cell_f */
5608 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5611 if (dd->ci[dim] == dd->master_ci[dim])
5613 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5619 void dd_setup_dlb_resource_sharing(t_commrec gmx_unused *cr,
5620 const gmx_hw_info_t gmx_unused *hwinfo,
5621 const gmx_hw_opt_t gmx_unused *hw_opt)
5624 int physicalnode_id_hash;
5627 MPI_Comm mpi_comm_pp_physicalnode;
5629 if (!(cr->duty & DUTY_PP) ||
5630 hw_opt->gpu_opt.ncuda_dev_use == 0)
5632 /* Only PP nodes (currently) use GPUs.
5633 * If we don't have GPUs, there are no resources to share.
5638 physicalnode_id_hash = gmx_physicalnode_id_hash();
5640 gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
5646 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5647 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5648 dd->rank, physicalnode_id_hash, gpu_id);
5650 /* Split the PP communicator over the physical nodes */
5651 /* TODO: See if we should store this (before), as it's also used for
5652 * for the nodecomm summution.
5654 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5655 &mpi_comm_pp_physicalnode);
5656 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5657 &dd->comm->mpi_comm_gpu_shared);
5658 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5659 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5663 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5666 /* Note that some ranks could share a GPU, while others don't */
5668 if (dd->comm->nrank_gpu_shared == 1)
5670 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5675 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
5678 int dim0, dim1, i, j;
5683 fprintf(debug, "Making load communicators\n");
5686 snew(dd->comm->load, dd->ndim);
5687 snew(dd->comm->mpi_comm_load, dd->ndim);
5690 make_load_communicator(dd, 0, loc);
5694 for (i = 0; i < dd->nc[dim0]; i++)
5697 make_load_communicator(dd, 1, loc);
5703 for (i = 0; i < dd->nc[dim0]; i++)
5707 for (j = 0; j < dd->nc[dim1]; j++)
5710 make_load_communicator(dd, 2, loc);
5717 fprintf(debug, "Finished making load communicators\n");
5722 void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
5725 int d, dim, i, j, m;
5728 ivec dd_zp[DD_MAXIZONE];
5729 gmx_domdec_zones_t *zones;
5730 gmx_domdec_ns_ranges_t *izone;
5732 for (d = 0; d < dd->ndim; d++)
5735 copy_ivec(dd->ci, tmp);
5736 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5737 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5738 copy_ivec(dd->ci, tmp);
5739 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5740 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5743 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5746 dd->neighbor[d][1]);
5752 fprintf(fplog, "\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5754 dd->nc[XX], dd->nc[YY], dd->nc[ZZ],
5755 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5762 for (i = 0; i < nzonep; i++)
5764 copy_ivec(dd_zp3[i], dd_zp[i]);
5770 for (i = 0; i < nzonep; i++)
5772 copy_ivec(dd_zp2[i], dd_zp[i]);
5778 for (i = 0; i < nzonep; i++)
5780 copy_ivec(dd_zp1[i], dd_zp[i]);
5784 gmx_fatal(FARGS, "Can only do 1, 2 or 3D domain decomposition");
5789 zones = &dd->comm->zones;
5791 for (i = 0; i < nzone; i++)
5794 clear_ivec(zones->shift[i]);
5795 for (d = 0; d < dd->ndim; d++)
5797 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5802 for (i = 0; i < nzone; i++)
5804 for (d = 0; d < DIM; d++)
5806 s[d] = dd->ci[d] - zones->shift[i][d];
5811 else if (s[d] >= dd->nc[d])
5817 zones->nizone = nzonep;
5818 for (i = 0; i < zones->nizone; i++)
5820 if (dd_zp[i][0] != i)
5822 gmx_fatal(FARGS, "Internal inconsistency in the dd grid setup");
5824 izone = &zones->izone[i];
5825 izone->j0 = dd_zp[i][1];
5826 izone->j1 = dd_zp[i][2];
5827 for (dim = 0; dim < DIM; dim++)
5829 if (dd->nc[dim] == 1)
5831 /* All shifts should be allowed */
5832 izone->shift0[dim] = -1;
5833 izone->shift1[dim] = 1;
5838 izone->shift0[d] = 0;
5839 izone->shift1[d] = 0;
5840 for(j=izone->j0; j<izone->j1; j++) {
5841 if (dd->shift[j][d] > dd->shift[i][d])
5842 izone->shift0[d] = -1;
5843 if (dd->shift[j][d] < dd->shift[i][d])
5844 izone->shift1[d] = 1;
5850 /* Assume the shift are not more than 1 cell */
5851 izone->shift0[dim] = 1;
5852 izone->shift1[dim] = -1;
5853 for (j = izone->j0; j < izone->j1; j++)
5855 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5856 if (shift_diff < izone->shift0[dim])
5858 izone->shift0[dim] = shift_diff;
5860 if (shift_diff > izone->shift1[dim])
5862 izone->shift1[dim] = shift_diff;
5869 if (dd->comm->eDLB != edlbNO)
5871 snew(dd->comm->root, dd->ndim);
5874 if (dd->comm->bRecordLoad)
5876 make_load_communicators(dd);
5880 static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reorder)
5883 gmx_domdec_comm_t *comm;
5894 if (comm->bCartesianPP)
5896 /* Set up cartesian communication for the particle-particle part */
5899 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5900 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5903 for (i = 0; i < DIM; i++)
5907 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5909 /* We overwrite the old communicator with the new cartesian one */
5910 cr->mpi_comm_mygroup = comm_cart;
5913 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5914 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5916 if (comm->bCartesianPP_PME)
5918 /* Since we want to use the original cartesian setup for sim,
5919 * and not the one after split, we need to make an index.
5921 snew(comm->ddindex2ddnodeid, dd->nnodes);
5922 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
5923 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5924 /* Get the rank of the DD master,
5925 * above we made sure that the master node is a PP node.
5935 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
5937 else if (comm->bCartesianPP)
5939 if (cr->npmenodes == 0)
5941 /* The PP communicator is also
5942 * the communicator for this simulation
5944 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5946 cr->nodeid = dd->rank;
5948 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
5950 /* We need to make an index to go from the coordinates
5951 * to the nodeid of this simulation.
5953 snew(comm->ddindex2simnodeid, dd->nnodes);
5954 snew(buf, dd->nnodes);
5955 if (cr->duty & DUTY_PP)
5957 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5959 /* Communicate the ddindex to simulation nodeid index */
5960 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
5961 cr->mpi_comm_mysim);
5964 /* Determine the master coordinates and rank.
5965 * The DD master should be the same node as the master of this sim.
5967 for (i = 0; i < dd->nnodes; i++)
5969 if (comm->ddindex2simnodeid[i] == 0)
5971 ddindex2xyz(dd->nc, i, dd->master_ci);
5972 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
5977 fprintf(debug, "The master rank is %d\n", dd->masterrank);
5982 /* No Cartesian communicators */
5983 /* We use the rank in dd->comm->all as DD index */
5984 ddindex2xyz(dd->nc, dd->rank, dd->ci);
5985 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
5987 clear_ivec(dd->master_ci);
5994 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5995 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6000 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6001 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6005 static void receive_ddindex2simnodeid(t_commrec *cr)
6009 gmx_domdec_comm_t *comm;
6016 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
6018 snew(comm->ddindex2simnodeid, dd->nnodes);
6019 snew(buf, dd->nnodes);
6020 if (cr->duty & DUTY_PP)
6022 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
6025 /* Communicate the ddindex to simulation nodeid index */
6026 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6027 cr->mpi_comm_mysim);
6034 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
6035 int ncg, int natoms)
6037 gmx_domdec_master_t *ma;
6042 snew(ma->ncg, dd->nnodes);
6043 snew(ma->index, dd->nnodes+1);
6045 snew(ma->nat, dd->nnodes);
6046 snew(ma->ibuf, dd->nnodes*2);
6047 snew(ma->cell_x, DIM);
6048 for (i = 0; i < DIM; i++)
6050 snew(ma->cell_x[i], dd->nc[i]+1);
6053 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
6059 snew(ma->vbuf, natoms);
6065 static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_node_order,
6066 int gmx_unused reorder)
6069 gmx_domdec_comm_t *comm;
6080 if (comm->bCartesianPP)
6082 for (i = 1; i < DIM; i++)
6084 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
6086 if (bDiv[YY] || bDiv[ZZ])
6088 comm->bCartesianPP_PME = TRUE;
6089 /* If we have 2D PME decomposition, which is always in x+y,
6090 * we stack the PME only nodes in z.
6091 * Otherwise we choose the direction that provides the thinnest slab
6092 * of PME only nodes as this will have the least effect
6093 * on the PP communication.
6094 * But for the PME communication the opposite might be better.
6096 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
6098 dd->nc[YY] > dd->nc[ZZ]))
6100 comm->cartpmedim = ZZ;
6104 comm->cartpmedim = YY;
6106 comm->ntot[comm->cartpmedim]
6107 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
6111 fprintf(fplog, "#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
6113 "Will not use a Cartesian communicator for PP <-> PME\n\n");
6118 if (comm->bCartesianPP_PME)
6122 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
6125 for (i = 0; i < DIM; i++)
6129 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
6132 MPI_Comm_rank(comm_cart, &rank);
6133 if (MASTERNODE(cr) && rank != 0)
6135 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
6138 /* With this assigment we loose the link to the original communicator
6139 * which will usually be MPI_COMM_WORLD, unless have multisim.
6141 cr->mpi_comm_mysim = comm_cart;
6142 cr->sim_nodeid = rank;
6144 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
6148 fprintf(fplog, "Cartesian nodeid %d, coordinates %d %d %d\n\n",
6149 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6152 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
6156 if (cr->npmenodes == 0 ||
6157 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
6159 cr->duty = DUTY_PME;
6162 /* Split the sim communicator into PP and PME only nodes */
6163 MPI_Comm_split(cr->mpi_comm_mysim,
6165 dd_index(comm->ntot, dd->ci),
6166 &cr->mpi_comm_mygroup);
6170 switch (dd_node_order)
6175 fprintf(fplog, "Order of the nodes: PP first, PME last\n");
6178 case ddnoINTERLEAVE:
6179 /* Interleave the PP-only and PME-only nodes,
6180 * as on clusters with dual-core machines this will double
6181 * the communication bandwidth of the PME processes
6182 * and thus speed up the PP <-> PME and inter PME communication.
6186 fprintf(fplog, "Interleaving PP and PME nodes\n");
6188 comm->pmenodes = dd_pmenodes(cr);
6193 gmx_fatal(FARGS, "Unknown dd_node_order=%d", dd_node_order);
6196 if (dd_simnode2pmenode(cr, cr->sim_nodeid) == -1)
6198 cr->duty = DUTY_PME;
6205 /* Split the sim communicator into PP and PME only nodes */
6206 MPI_Comm_split(cr->mpi_comm_mysim,
6209 &cr->mpi_comm_mygroup);
6210 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
6216 fprintf(fplog, "This is a %s only node\n\n",
6217 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
6221 void make_dd_communicators(FILE *fplog, t_commrec *cr, int dd_node_order)
6224 gmx_domdec_comm_t *comm;
6230 copy_ivec(dd->nc, comm->ntot);
6232 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
6233 comm->bCartesianPP_PME = FALSE;
6235 /* Reorder the nodes by default. This might change the MPI ranks.
6236 * Real reordering is only supported on very few architectures,
6237 * Blue Gene is one of them.
6239 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
6241 if (cr->npmenodes > 0)
6243 /* Split the communicator into a PP and PME part */
6244 split_communicator(fplog, cr, dd_node_order, CartReorder);
6245 if (comm->bCartesianPP_PME)
6247 /* We (possibly) reordered the nodes in split_communicator,
6248 * so it is no longer required in make_pp_communicator.
6250 CartReorder = FALSE;
6255 /* All nodes do PP and PME */
6257 /* We do not require separate communicators */
6258 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
6262 if (cr->duty & DUTY_PP)
6264 /* Copy or make a new PP communicator */
6265 make_pp_communicator(fplog, cr, CartReorder);
6269 receive_ddindex2simnodeid(cr);
6272 if (!(cr->duty & DUTY_PME))
6274 /* Set up the commnuication to our PME node */
6275 dd->pme_nodeid = dd_simnode2pmenode(cr, cr->sim_nodeid);
6276 dd->pme_receive_vir_ener = receive_vir_ener(cr);
6279 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
6280 dd->pme_nodeid, dd->pme_receive_vir_ener);
6285 dd->pme_nodeid = -1;
6290 dd->ma = init_gmx_domdec_master_t(dd,
6292 comm->cgs_gl.index[comm->cgs_gl.nr]);
6296 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6298 real *slb_frac, tot;
6303 if (nc > 1 && size_string != NULL)
6307 fprintf(fplog, "Using static load balancing for the %s direction\n",
6312 for (i = 0; i < nc; i++)
6315 sscanf(size_string, "%lf%n", &dbl, &n);
6318 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6327 fprintf(fplog, "Relative cell sizes:");
6329 for (i = 0; i < nc; i++)
6334 fprintf(fplog, " %5.3f", slb_frac[i]);
6339 fprintf(fplog, "\n");
6346 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
6349 gmx_mtop_ilistloop_t iloop;
6353 iloop = gmx_mtop_ilistloop_init(mtop);
6354 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6356 for (ftype = 0; ftype < F_NRE; ftype++)
6358 if ((interaction_function[ftype].flags & IF_BOND) &&
6361 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6369 static int dd_getenv(FILE *fplog, const char *env_var, int def)
6375 val = getenv(env_var);
6378 if (sscanf(val, "%d", &nst) <= 0)
6384 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6392 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6396 fprintf(stderr, "\n%s\n", warn_string);
6400 fprintf(fplog, "\n%s\n", warn_string);
6404 static void check_dd_restrictions(t_commrec *cr, gmx_domdec_t *dd,
6405 t_inputrec *ir, FILE *fplog)
6407 if (ir->ePBC == epbcSCREW &&
6408 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6410 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6413 if (ir->ns_type == ensSIMPLE)
6415 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
6418 if (ir->nstlist == 0)
6420 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6423 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6425 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6429 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6434 r = ddbox->box_size[XX];
6435 for (di = 0; di < dd->ndim; di++)
6438 /* Check using the initial average cell size */
6439 r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6445 static int check_dlb_support(FILE *fplog, t_commrec *cr,
6446 const char *dlb_opt, gmx_bool bRecordLoad,
6447 unsigned long Flags, t_inputrec *ir)
6455 case 'a': eDLB = edlbAUTO; break;
6456 case 'n': eDLB = edlbNO; break;
6457 case 'y': eDLB = edlbYES; break;
6458 default: gmx_incons("Unknown dlb_opt");
6461 if (Flags & MD_RERUN)
6466 if (!EI_DYNAMICS(ir->eI))
6468 if (eDLB == edlbYES)
6470 sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
6471 dd_warning(cr, fplog, buf);
6479 dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6484 if (Flags & MD_REPRODUCIBLE)
6491 dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
6495 dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6498 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", eDLB);
6506 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6511 if (getenv("GMX_DD_ORDER_ZYX") != NULL)
6513 /* Decomposition order z,y,x */
6516 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6518 for (dim = DIM-1; dim >= 0; dim--)
6520 if (dd->nc[dim] > 1)
6522 dd->dim[dd->ndim++] = dim;
6528 /* Decomposition order x,y,z */
6529 for (dim = 0; dim < DIM; dim++)
6531 if (dd->nc[dim] > 1)
6533 dd->dim[dd->ndim++] = dim;
6539 static gmx_domdec_comm_t *init_dd_comm()
6541 gmx_domdec_comm_t *comm;
6545 snew(comm->cggl_flag, DIM*2);
6546 snew(comm->cgcm_state, DIM*2);
6547 for (i = 0; i < DIM*2; i++)
6549 comm->cggl_flag_nalloc[i] = 0;
6550 comm->cgcm_state_nalloc[i] = 0;
6553 comm->nalloc_int = 0;
6554 comm->buf_int = NULL;
6556 vec_rvec_init(&comm->vbuf);
6558 comm->n_load_have = 0;
6559 comm->n_load_collect = 0;
6561 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6563 comm->sum_nat[i] = 0;
6567 comm->load_step = 0;
6570 clear_ivec(comm->load_lim);
6577 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
6578 unsigned long Flags,
6580 real comm_distance_min, real rconstr,
6581 const char *dlb_opt, real dlb_scale,
6582 const char *sizex, const char *sizey, const char *sizez,
6583 gmx_mtop_t *mtop, t_inputrec *ir,
6584 matrix box, rvec *x,
6586 int *npme_x, int *npme_y)
6589 gmx_domdec_comm_t *comm;
6592 real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
6599 "\nInitializing Domain Decomposition on %d nodes\n", cr->nnodes);
6604 dd->comm = init_dd_comm();
6606 snew(comm->cggl_flag, DIM*2);
6607 snew(comm->cgcm_state, DIM*2);
6609 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6610 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6612 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
6613 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
6614 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
6615 recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
6616 comm->nstSortCG = dd_getenv(fplog, "GMX_DD_NST_SORT_CHARGE_GROUPS", 1);
6617 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
6618 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
6619 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
6621 dd->pme_recv_f_alloc = 0;
6622 dd->pme_recv_f_buf = NULL;
6624 if (dd->bSendRecv2 && fplog)
6626 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6632 fprintf(fplog, "Will load balance based on FLOP count\n");
6634 if (comm->eFlop > 1)
6636 srand(1+cr->nodeid);
6638 comm->bRecordLoad = TRUE;
6642 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6646 /* Initialize to GPU share count to 0, might change later */
6647 comm->nrank_gpu_shared = 0;
6649 comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
6651 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6654 fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
6656 dd->bGridJump = comm->bDynLoadBal;
6657 comm->bPMELoadBalDLBLimits = FALSE;
6659 if (comm->nstSortCG)
6663 if (comm->nstSortCG == 1)
6665 fprintf(fplog, "Will sort the charge groups at every domain (re)decomposition\n");
6669 fprintf(fplog, "Will sort the charge groups every %d steps\n",
6673 snew(comm->sort, 1);
6679 fprintf(fplog, "Will not sort the charge groups\n");
6683 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6685 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6686 if (comm->bInterCGBondeds)
6688 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6692 comm->bInterCGMultiBody = FALSE;
6695 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6696 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6698 if (ir->rlistlong == 0)
6700 /* Set the cut-off to some very large value,
6701 * so we don't need if statements everywhere in the code.
6702 * We use sqrt, since the cut-off is squared in some places.
6704 comm->cutoff = GMX_CUTOFF_INF;
6708 comm->cutoff = ir->rlistlong;
6710 comm->cutoff_mbody = 0;
6712 comm->cellsize_limit = 0;
6713 comm->bBondComm = FALSE;
6715 if (comm->bInterCGBondeds)
6717 if (comm_distance_min > 0)
6719 comm->cutoff_mbody = comm_distance_min;
6720 if (Flags & MD_DDBONDCOMM)
6722 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6726 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6728 r_bonded_limit = comm->cutoff_mbody;
6730 else if (ir->bPeriodicMols)
6732 /* Can not easily determine the required cut-off */
6733 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6734 comm->cutoff_mbody = comm->cutoff/2;
6735 r_bonded_limit = comm->cutoff_mbody;
6741 dd_bonded_cg_distance(fplog, mtop, ir, x, box,
6742 Flags & MD_DDBONDCHECK, &r_2b, &r_mb);
6744 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6745 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6747 /* We use an initial margin of 10% for the minimum cell size,
6748 * except when we are just below the non-bonded cut-off.
6750 if (Flags & MD_DDBONDCOMM)
6752 if (max(r_2b, r_mb) > comm->cutoff)
6754 r_bonded = max(r_2b, r_mb);
6755 r_bonded_limit = 1.1*r_bonded;
6756 comm->bBondComm = TRUE;
6761 r_bonded_limit = min(1.1*r_bonded, comm->cutoff);
6763 /* We determine cutoff_mbody later */
6767 /* No special bonded communication,
6768 * simply increase the DD cut-off.
6770 r_bonded_limit = 1.1*max(r_2b, r_mb);
6771 comm->cutoff_mbody = r_bonded_limit;
6772 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6775 comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit);
6779 "Minimum cell size due to bonded interactions: %.3f nm\n",
6780 comm->cellsize_limit);
6784 if (dd->bInterCGcons && rconstr <= 0)
6786 /* There is a cell size limit due to the constraints (P-LINCS) */
6787 rconstr = constr_r_max(fplog, mtop, ir);
6791 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6793 if (rconstr > comm->cellsize_limit)
6795 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6799 else if (rconstr > 0 && fplog)
6801 /* Here we do not check for dd->bInterCGcons,
6802 * because one can also set a cell size limit for virtual sites only
6803 * and at this point we don't know yet if there are intercg v-sites.
6806 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6809 comm->cellsize_limit = max(comm->cellsize_limit, rconstr);
6811 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6815 copy_ivec(nc, dd->nc);
6816 set_dd_dim(fplog, dd);
6817 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, x, ddbox);
6819 if (cr->npmenodes == -1)
6823 acs = average_cellsize_min(dd, ddbox);
6824 if (acs < comm->cellsize_limit)
6828 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6830 gmx_fatal_collective(FARGS, cr, NULL,
6831 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6832 acs, comm->cellsize_limit);
6837 set_ddbox_cr(cr, NULL, ir, box, &comm->cgs_gl, x, ddbox);
6839 /* We need to choose the optimal DD grid and possibly PME nodes */
6840 limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6841 comm->eDLB != edlbNO, dlb_scale,
6842 comm->cellsize_limit, comm->cutoff,
6843 comm->bInterCGBondeds);
6845 if (dd->nc[XX] == 0)
6847 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6848 sprintf(buf, "Change the number of nodes or mdrun option %s%s%s",
6849 !bC ? "-rdd" : "-rcon",
6850 comm->eDLB != edlbNO ? " or -dds" : "",
6851 bC ? " or your LINCS settings" : "");
6853 gmx_fatal_collective(FARGS, cr, NULL,
6854 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6856 "Look in the log file for details on the domain decomposition",
6857 cr->nnodes-cr->npmenodes, limit, buf);
6859 set_dd_dim(fplog, dd);
6865 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6866 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6869 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6870 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6872 gmx_fatal_collective(FARGS, cr, NULL,
6873 "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6874 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6876 if (cr->npmenodes > dd->nnodes)
6878 gmx_fatal_collective(FARGS, cr, NULL,
6879 "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6881 if (cr->npmenodes > 0)
6883 comm->npmenodes = cr->npmenodes;
6887 comm->npmenodes = dd->nnodes;
6890 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
6892 /* The following choices should match those
6893 * in comm_cost_est in domdec_setup.c.
6894 * Note that here the checks have to take into account
6895 * that the decomposition might occur in a different order than xyz
6896 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6897 * in which case they will not match those in comm_cost_est,
6898 * but since that is mainly for testing purposes that's fine.
6900 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6901 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6902 getenv("GMX_PMEONEDD") == NULL)
6904 comm->npmedecompdim = 2;
6905 comm->npmenodes_x = dd->nc[XX];
6906 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6910 /* In case nc is 1 in both x and y we could still choose to
6911 * decompose pme in y instead of x, but we use x for simplicity.
6913 comm->npmedecompdim = 1;
6914 if (dd->dim[0] == YY)
6916 comm->npmenodes_x = 1;
6917 comm->npmenodes_y = comm->npmenodes;
6921 comm->npmenodes_x = comm->npmenodes;
6922 comm->npmenodes_y = 1;
6927 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6928 comm->npmenodes_x, comm->npmenodes_y, 1);
6933 comm->npmedecompdim = 0;
6934 comm->npmenodes_x = 0;
6935 comm->npmenodes_y = 0;
6938 /* Technically we don't need both of these,
6939 * but it simplifies code not having to recalculate it.
6941 *npme_x = comm->npmenodes_x;
6942 *npme_y = comm->npmenodes_y;
6944 snew(comm->slb_frac, DIM);
6945 if (comm->eDLB == edlbNO)
6947 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
6948 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
6949 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], sizez);
6952 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6954 if (comm->bBondComm || comm->eDLB != edlbNO)
6956 /* Set the bonded communication distance to halfway
6957 * the minimum and the maximum,
6958 * since the extra communication cost is nearly zero.
6960 acs = average_cellsize_min(dd, ddbox);
6961 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6962 if (comm->eDLB != edlbNO)
6964 /* Check if this does not limit the scaling */
6965 comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs);
6967 if (!comm->bBondComm)
6969 /* Without bBondComm do not go beyond the n.b. cut-off */
6970 comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff);
6971 if (comm->cellsize_limit >= comm->cutoff)
6973 /* We don't loose a lot of efficieny
6974 * when increasing it to the n.b. cut-off.
6975 * It can even be slightly faster, because we need
6976 * less checks for the communication setup.
6978 comm->cutoff_mbody = comm->cutoff;
6981 /* Check if we did not end up below our original limit */
6982 comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit);
6984 if (comm->cutoff_mbody > comm->cellsize_limit)
6986 comm->cellsize_limit = comm->cutoff_mbody;
6989 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
6994 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
6995 "cellsize limit %f\n",
6996 comm->bBondComm, comm->cellsize_limit);
7001 check_dd_restrictions(cr, dd, ir, fplog);
7004 comm->partition_step = INT_MIN;
7007 clear_dd_cycle_counts(dd);
7012 static void set_dlb_limits(gmx_domdec_t *dd)
7017 for (d = 0; d < dd->ndim; d++)
7019 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
7020 dd->comm->cellsize_min[dd->dim[d]] =
7021 dd->comm->cellsize_min_dlb[dd->dim[d]];
7026 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
7029 gmx_domdec_comm_t *comm;
7039 fprintf(fplog, "At step %s the performance loss due to force load imbalance is %.1f %%\n", gmx_step_str(step, buf), dd_force_imb_perf_loss(dd)*100);
7042 cellsize_min = comm->cellsize_min[dd->dim[0]];
7043 for (d = 1; d < dd->ndim; d++)
7045 cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
7048 if (cellsize_min < comm->cellsize_limit*1.05)
7050 dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
7052 /* Change DLB from "auto" to "no". */
7053 comm->eDLB = edlbNO;
7058 dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
7059 comm->bDynLoadBal = TRUE;
7060 dd->bGridJump = TRUE;
7064 /* We can set the required cell size info here,
7065 * so we do not need to communicate this.
7066 * The grid is completely uniform.
7068 for (d = 0; d < dd->ndim; d++)
7072 comm->load[d].sum_m = comm->load[d].sum;
7074 nc = dd->nc[dd->dim[d]];
7075 for (i = 0; i < nc; i++)
7077 comm->root[d]->cell_f[i] = i/(real)nc;
7080 comm->root[d]->cell_f_max0[i] = i /(real)nc;
7081 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
7084 comm->root[d]->cell_f[nc] = 1.0;
7089 static char *init_bLocalCG(gmx_mtop_t *mtop)
7094 ncg = ncg_mtop(mtop);
7095 snew(bLocalCG, ncg);
7096 for (cg = 0; cg < ncg; cg++)
7098 bLocalCG[cg] = FALSE;
7104 void dd_init_bondeds(FILE *fplog,
7105 gmx_domdec_t *dd, gmx_mtop_t *mtop,
7107 t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
7109 gmx_domdec_comm_t *comm;
7113 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
7117 if (comm->bBondComm)
7119 /* Communicate atoms beyond the cut-off for bonded interactions */
7122 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
7124 comm->bLocalCG = init_bLocalCG(mtop);
7128 /* Only communicate atoms based on cut-off */
7129 comm->cglink = NULL;
7130 comm->bLocalCG = NULL;
7134 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
7136 gmx_bool bDynLoadBal, real dlb_scale,
7139 gmx_domdec_comm_t *comm;
7154 fprintf(fplog, "The maximum number of communication pulses is:");
7155 for (d = 0; d < dd->ndim; d++)
7157 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
7159 fprintf(fplog, "\n");
7160 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
7161 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
7162 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
7163 for (d = 0; d < DIM; d++)
7167 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
7174 comm->cellsize_min_dlb[d]/
7175 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
7177 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
7180 fprintf(fplog, "\n");
7184 set_dd_cell_sizes_slb(dd, ddbox, FALSE, np);
7185 fprintf(fplog, "The initial number of communication pulses is:");
7186 for (d = 0; d < dd->ndim; d++)
7188 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
7190 fprintf(fplog, "\n");
7191 fprintf(fplog, "The initial domain decomposition cell size is:");
7192 for (d = 0; d < DIM; d++)
7196 fprintf(fplog, " %c %.2f nm",
7197 dim2char(d), dd->comm->cellsize_min[d]);
7200 fprintf(fplog, "\n\n");
7203 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
7205 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
7206 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7207 "non-bonded interactions", "", comm->cutoff);
7211 limit = dd->comm->cellsize_limit;
7215 if (dynamic_dd_box(ddbox, ir))
7217 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
7219 limit = dd->comm->cellsize_min[XX];
7220 for (d = 1; d < DIM; d++)
7222 limit = min(limit, dd->comm->cellsize_min[d]);
7226 if (comm->bInterCGBondeds)
7228 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7229 "two-body bonded interactions", "(-rdd)",
7230 max(comm->cutoff, comm->cutoff_mbody));
7231 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7232 "multi-body bonded interactions", "(-rdd)",
7233 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit));
7237 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7238 "virtual site constructions", "(-rcon)", limit);
7240 if (dd->constraint_comm)
7242 sprintf(buf, "atoms separated by up to %d constraints",
7244 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7245 buf, "(-rcon)", limit);
7247 fprintf(fplog, "\n");
7253 static void set_cell_limits_dlb(gmx_domdec_t *dd,
7255 const t_inputrec *ir,
7256 const gmx_ddbox_t *ddbox)
7258 gmx_domdec_comm_t *comm;
7259 int d, dim, npulse, npulse_d_max, npulse_d;
7264 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7266 /* Determine the maximum number of comm. pulses in one dimension */
7268 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7270 /* Determine the maximum required number of grid pulses */
7271 if (comm->cellsize_limit >= comm->cutoff)
7273 /* Only a single pulse is required */
7276 else if (!bNoCutOff && comm->cellsize_limit > 0)
7278 /* We round down slightly here to avoid overhead due to the latency
7279 * of extra communication calls when the cut-off
7280 * would be only slightly longer than the cell size.
7281 * Later cellsize_limit is redetermined,
7282 * so we can not miss interactions due to this rounding.
7284 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7288 /* There is no cell size limit */
7289 npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7292 if (!bNoCutOff && npulse > 1)
7294 /* See if we can do with less pulses, based on dlb_scale */
7296 for (d = 0; d < dd->ndim; d++)
7299 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7300 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7301 npulse_d_max = max(npulse_d_max, npulse_d);
7303 npulse = min(npulse, npulse_d_max);
7306 /* This env var can override npulse */
7307 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
7314 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7315 for (d = 0; d < dd->ndim; d++)
7317 comm->cd[d].np_dlb = min(npulse, dd->nc[dd->dim[d]]-1);
7318 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7319 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7320 comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb);
7321 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7323 comm->bVacDLBNoLimit = FALSE;
7327 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7328 if (!comm->bVacDLBNoLimit)
7330 comm->cellsize_limit = max(comm->cellsize_limit,
7331 comm->cutoff/comm->maxpulse);
7333 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7334 /* Set the minimum cell size for each DD dimension */
7335 for (d = 0; d < dd->ndim; d++)
7337 if (comm->bVacDLBNoLimit ||
7338 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7340 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7344 comm->cellsize_min_dlb[dd->dim[d]] =
7345 comm->cutoff/comm->cd[d].np_dlb;
7348 if (comm->cutoff_mbody <= 0)
7350 comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit);
7352 if (comm->bDynLoadBal)
7358 gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd, int ePBC)
7360 /* If each molecule is a single charge group
7361 * or we use domain decomposition for each periodic dimension,
7362 * we do not need to take pbc into account for the bonded interactions.
7364 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7367 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7370 void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7371 t_inputrec *ir, gmx_ddbox_t *ddbox)
7373 gmx_domdec_comm_t *comm;
7379 /* Initialize the thread data.
7380 * This can not be done in init_domain_decomposition,
7381 * as the numbers of threads is determined later.
7383 comm->nth = gmx_omp_nthreads_get(emntDomdec);
7386 snew(comm->dth, comm->nth);
7389 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
7391 init_ddpme(dd, &comm->ddpme[0], 0);
7392 if (comm->npmedecompdim >= 2)
7394 init_ddpme(dd, &comm->ddpme[1], 1);
7399 comm->npmenodes = 0;
7400 if (dd->pme_nodeid >= 0)
7402 gmx_fatal_collective(FARGS, NULL, dd,
7403 "Can not have separate PME nodes without PME electrostatics");
7409 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7411 if (comm->eDLB != edlbNO)
7413 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7416 print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
7417 if (comm->eDLB == edlbAUTO)
7421 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7423 print_dd_settings(fplog, dd, ir, TRUE, dlb_scale, ddbox);
7426 if (ir->ePBC == epbcNONE)
7428 vol_frac = 1 - 1/(double)dd->nnodes;
7433 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7437 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7439 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7441 dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
7444 static gmx_bool test_dd_cutoff(t_commrec *cr,
7445 t_state *state, t_inputrec *ir,
7456 set_ddbox(dd, FALSE, cr, ir, state->box,
7457 TRUE, &dd->comm->cgs_gl, state->x, &ddbox);
7461 for (d = 0; d < dd->ndim; d++)
7465 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7466 if (dynamic_dd_box(&ddbox, ir))
7468 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7471 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7473 if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
7474 dd->comm->cd[d].np_dlb > 0)
7476 if (np > dd->comm->cd[d].np_dlb)
7481 /* If a current local cell size is smaller than the requested
7482 * cut-off, we could still fix it, but this gets very complicated.
7483 * Without fixing here, we might actually need more checks.
7485 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7492 if (dd->comm->eDLB != edlbNO)
7494 /* If DLB is not active yet, we don't need to check the grid jumps.
7495 * Actually we shouldn't, because then the grid jump data is not set.
7497 if (dd->comm->bDynLoadBal &&
7498 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7503 gmx_sumi(1, &LocallyLimited, cr);
7505 if (LocallyLimited > 0)
7514 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, t_inputrec *ir,
7517 gmx_bool bCutoffAllowed;
7519 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7523 cr->dd->comm->cutoff = cutoff_req;
7526 return bCutoffAllowed;
7529 void change_dd_dlb_cutoff_limit(t_commrec *cr)
7531 gmx_domdec_comm_t *comm;
7533 comm = cr->dd->comm;
7535 /* Turn on the DLB limiting (might have been on already) */
7536 comm->bPMELoadBalDLBLimits = TRUE;
7538 /* Change the cut-off limit */
7539 comm->PMELoadBal_max_cutoff = comm->cutoff;
7542 static void merge_cg_buffers(int ncell,
7543 gmx_domdec_comm_dim_t *cd, int pulse,
7545 int *index_gl, int *recv_i,
7546 rvec *cg_cm, rvec *recv_vr,
7548 cginfo_mb_t *cginfo_mb, int *cginfo)
7550 gmx_domdec_ind_t *ind, *ind_p;
7551 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7552 int shift, shift_at;
7554 ind = &cd->ind[pulse];
7556 /* First correct the already stored data */
7557 shift = ind->nrecv[ncell];
7558 for (cell = ncell-1; cell >= 0; cell--)
7560 shift -= ind->nrecv[cell];
7563 /* Move the cg's present from previous grid pulses */
7564 cg0 = ncg_cell[ncell+cell];
7565 cg1 = ncg_cell[ncell+cell+1];
7566 cgindex[cg1+shift] = cgindex[cg1];
7567 for (cg = cg1-1; cg >= cg0; cg--)
7569 index_gl[cg+shift] = index_gl[cg];
7570 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7571 cgindex[cg+shift] = cgindex[cg];
7572 cginfo[cg+shift] = cginfo[cg];
7574 /* Correct the already stored send indices for the shift */
7575 for (p = 1; p <= pulse; p++)
7577 ind_p = &cd->ind[p];
7579 for (c = 0; c < cell; c++)
7581 cg0 += ind_p->nsend[c];
7583 cg1 = cg0 + ind_p->nsend[cell];
7584 for (cg = cg0; cg < cg1; cg++)
7586 ind_p->index[cg] += shift;
7592 /* Merge in the communicated buffers */
7596 for (cell = 0; cell < ncell; cell++)
7598 cg1 = ncg_cell[ncell+cell+1] + shift;
7601 /* Correct the old cg indices */
7602 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7604 cgindex[cg+1] += shift_at;
7607 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7609 /* Copy this charge group from the buffer */
7610 index_gl[cg1] = recv_i[cg0];
7611 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7612 /* Add it to the cgindex */
7613 cg_gl = index_gl[cg1];
7614 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7615 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7616 cgindex[cg1+1] = cgindex[cg1] + nat;
7621 shift += ind->nrecv[cell];
7622 ncg_cell[ncell+cell+1] = cg1;
7626 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7627 int nzone, int cg0, const int *cgindex)
7631 /* Store the atom block boundaries for easy copying of communication buffers
7634 for (zone = 0; zone < nzone; zone++)
7636 for (p = 0; p < cd->np; p++)
7638 cd->ind[p].cell2at0[zone] = cgindex[cg];
7639 cg += cd->ind[p].nrecv[zone];
7640 cd->ind[p].cell2at1[zone] = cgindex[cg];
7645 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7651 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7653 if (!bLocalCG[link->a[i]])
7662 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7664 real c[DIM][4]; /* the corners for the non-bonded communication */
7665 real cr0; /* corner for rounding */
7666 real cr1[4]; /* corners for rounding */
7667 real bc[DIM]; /* corners for bounded communication */
7668 real bcr1; /* corner for rounding for bonded communication */
7671 /* Determine the corners of the domain(s) we are communicating with */
7673 set_dd_corners(const gmx_domdec_t *dd,
7674 int dim0, int dim1, int dim2,
7678 const gmx_domdec_comm_t *comm;
7679 const gmx_domdec_zones_t *zones;
7684 zones = &comm->zones;
7686 /* Keep the compiler happy */
7690 /* The first dimension is equal for all cells */
7691 c->c[0][0] = comm->cell_x0[dim0];
7694 c->bc[0] = c->c[0][0];
7699 /* This cell row is only seen from the first row */
7700 c->c[1][0] = comm->cell_x0[dim1];
7701 /* All rows can see this row */
7702 c->c[1][1] = comm->cell_x0[dim1];
7705 c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7708 /* For the multi-body distance we need the maximum */
7709 c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7712 /* Set the upper-right corner for rounding */
7713 c->cr0 = comm->cell_x1[dim0];
7718 for (j = 0; j < 4; j++)
7720 c->c[2][j] = comm->cell_x0[dim2];
7724 /* Use the maximum of the i-cells that see a j-cell */
7725 for (i = 0; i < zones->nizone; i++)
7727 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7733 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7739 /* For the multi-body distance we need the maximum */
7740 c->bc[2] = comm->cell_x0[dim2];
7741 for (i = 0; i < 2; i++)
7743 for (j = 0; j < 2; j++)
7745 c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0);
7751 /* Set the upper-right corner for rounding */
7752 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7753 * Only cell (0,0,0) can see cell 7 (1,1,1)
7755 c->cr1[0] = comm->cell_x1[dim1];
7756 c->cr1[3] = comm->cell_x1[dim1];
7759 c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7762 /* For the multi-body distance we need the maximum */
7763 c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7770 /* Determine which cg's we need to send in this pulse from this zone */
7772 get_zone_pulse_cgs(gmx_domdec_t *dd,
7773 int zonei, int zone,
7775 const int *index_gl,
7777 int dim, int dim_ind,
7778 int dim0, int dim1, int dim2,
7779 real r_comm2, real r_bcomm2,
7783 real skew_fac2_d, real skew_fac_01,
7784 rvec *v_d, rvec *v_0, rvec *v_1,
7785 const dd_corners_t *c,
7787 gmx_bool bDistBonded,
7793 gmx_domdec_ind_t *ind,
7794 int **ibuf, int *ibuf_nalloc,
7800 gmx_domdec_comm_t *comm;
7802 gmx_bool bDistMB_pulse;
7804 real r2, rb2, r, tric_sh;
7807 int nsend_z, nsend, nat;
7811 bScrew = (dd->bScrewPBC && dim == XX);
7813 bDistMB_pulse = (bDistMB && bDistBonded);
7819 for (cg = cg0; cg < cg1; cg++)
7823 if (tric_dist[dim_ind] == 0)
7825 /* Rectangular direction, easy */
7826 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7833 r = cg_cm[cg][dim] - c->bc[dim_ind];
7839 /* Rounding gives at most a 16% reduction
7840 * in communicated atoms
7842 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7844 r = cg_cm[cg][dim0] - c->cr0;
7845 /* This is the first dimension, so always r >= 0 */
7852 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7854 r = cg_cm[cg][dim1] - c->cr1[zone];
7861 r = cg_cm[cg][dim1] - c->bcr1;
7871 /* Triclinic direction, more complicated */
7874 /* Rounding, conservative as the skew_fac multiplication
7875 * will slightly underestimate the distance.
7877 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7879 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7880 for (i = dim0+1; i < DIM; i++)
7882 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7884 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7887 rb[dim0] = rn[dim0];
7890 /* Take care that the cell planes along dim0 might not
7891 * be orthogonal to those along dim1 and dim2.
7893 for (i = 1; i <= dim_ind; i++)
7896 if (normal[dim0][dimd] > 0)
7898 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7901 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7906 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7908 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7910 for (i = dim1+1; i < DIM; i++)
7912 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7914 rn[dim1] += tric_sh;
7917 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7918 /* Take care of coupling of the distances
7919 * to the planes along dim0 and dim1 through dim2.
7921 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7922 /* Take care that the cell planes along dim1
7923 * might not be orthogonal to that along dim2.
7925 if (normal[dim1][dim2] > 0)
7927 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7933 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7936 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7937 /* Take care of coupling of the distances
7938 * to the planes along dim0 and dim1 through dim2.
7940 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7941 /* Take care that the cell planes along dim1
7942 * might not be orthogonal to that along dim2.
7944 if (normal[dim1][dim2] > 0)
7946 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7951 /* The distance along the communication direction */
7952 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
7954 for (i = dim+1; i < DIM; i++)
7956 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7961 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7962 /* Take care of coupling of the distances
7963 * to the planes along dim0 and dim1 through dim2.
7965 if (dim_ind == 1 && zonei == 1)
7967 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
7973 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
7976 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
7977 /* Take care of coupling of the distances
7978 * to the planes along dim0 and dim1 through dim2.
7980 if (dim_ind == 1 && zonei == 1)
7982 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
7990 ((bDistMB && rb2 < r_bcomm2) ||
7991 (bDist2B && r2 < r_bcomm2)) &&
7993 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
7994 missing_link(comm->cglink, index_gl[cg],
7997 /* Make an index to the local charge groups */
7998 if (nsend+1 > ind->nalloc)
8000 ind->nalloc = over_alloc_large(nsend+1);
8001 srenew(ind->index, ind->nalloc);
8003 if (nsend+1 > *ibuf_nalloc)
8005 *ibuf_nalloc = over_alloc_large(nsend+1);
8006 srenew(*ibuf, *ibuf_nalloc);
8008 ind->index[nsend] = cg;
8009 (*ibuf)[nsend] = index_gl[cg];
8011 vec_rvec_check_alloc(vbuf, nsend+1);
8013 if (dd->ci[dim] == 0)
8015 /* Correct cg_cm for pbc */
8016 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
8019 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
8020 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
8025 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
8028 nat += cgindex[cg+1] - cgindex[cg];
8034 *nsend_z_ptr = nsend_z;
8037 static void setup_dd_communication(gmx_domdec_t *dd,
8038 matrix box, gmx_ddbox_t *ddbox,
8039 t_forcerec *fr, t_state *state, rvec **f)
8041 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
8042 int nzone, nzone_send, zone, zonei, cg0, cg1;
8043 int c, i, j, cg, cg_gl, nrcg;
8044 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
8045 gmx_domdec_comm_t *comm;
8046 gmx_domdec_zones_t *zones;
8047 gmx_domdec_comm_dim_t *cd;
8048 gmx_domdec_ind_t *ind;
8049 cginfo_mb_t *cginfo_mb;
8050 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
8051 real r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
8052 dd_corners_t corners;
8054 rvec *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
8055 real skew_fac2_d, skew_fac_01;
8062 fprintf(debug, "Setting up DD communication\n");
8067 switch (fr->cutoff_scheme)
8076 gmx_incons("unimplemented");
8080 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8082 dim = dd->dim[dim_ind];
8084 /* Check if we need to use triclinic distances */
8085 tric_dist[dim_ind] = 0;
8086 for (i = 0; i <= dim_ind; i++)
8088 if (ddbox->tric_dir[dd->dim[i]])
8090 tric_dist[dim_ind] = 1;
8095 bBondComm = comm->bBondComm;
8097 /* Do we need to determine extra distances for multi-body bondeds? */
8098 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8100 /* Do we need to determine extra distances for only two-body bondeds? */
8101 bDist2B = (bBondComm && !bDistMB);
8103 r_comm2 = sqr(comm->cutoff);
8104 r_bcomm2 = sqr(comm->cutoff_mbody);
8108 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, sqrt(r_bcomm2));
8111 zones = &comm->zones;
8114 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8115 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8117 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8119 /* Triclinic stuff */
8120 normal = ddbox->normal;
8124 v_0 = ddbox->v[dim0];
8125 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8127 /* Determine the coupling coefficient for the distances
8128 * to the cell planes along dim0 and dim1 through dim2.
8129 * This is required for correct rounding.
8132 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8135 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8141 v_1 = ddbox->v[dim1];
8144 zone_cg_range = zones->cg_range;
8145 index_gl = dd->index_gl;
8146 cgindex = dd->cgindex;
8147 cginfo_mb = fr->cginfo_mb;
8149 zone_cg_range[0] = 0;
8150 zone_cg_range[1] = dd->ncg_home;
8151 comm->zone_ncg1[0] = dd->ncg_home;
8152 pos_cg = dd->ncg_home;
8154 nat_tot = dd->nat_home;
8156 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8158 dim = dd->dim[dim_ind];
8159 cd = &comm->cd[dim_ind];
8161 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8163 /* No pbc in this dimension, the first node should not comm. */
8171 v_d = ddbox->v[dim];
8172 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
8174 cd->bInPlace = TRUE;
8175 for (p = 0; p < cd->np; p++)
8177 /* Only atoms communicated in the first pulse are used
8178 * for multi-body bonded interactions or for bBondComm.
8180 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8185 for (zone = 0; zone < nzone_send; zone++)
8187 if (tric_dist[dim_ind] && dim_ind > 0)
8189 /* Determine slightly more optimized skew_fac's
8191 * This reduces the number of communicated atoms
8192 * by about 10% for 3D DD of rhombic dodecahedra.
8194 for (dimd = 0; dimd < dim; dimd++)
8196 sf2_round[dimd] = 1;
8197 if (ddbox->tric_dir[dimd])
8199 for (i = dd->dim[dimd]+1; i < DIM; i++)
8201 /* If we are shifted in dimension i
8202 * and the cell plane is tilted forward
8203 * in dimension i, skip this coupling.
8205 if (!(zones->shift[nzone+zone][i] &&
8206 ddbox->v[dimd][i][dimd] >= 0))
8209 sqr(ddbox->v[dimd][i][dimd]);
8212 sf2_round[dimd] = 1/sf2_round[dimd];
8217 zonei = zone_perm[dim_ind][zone];
8220 /* Here we permutate the zones to obtain a convenient order
8221 * for neighbor searching
8223 cg0 = zone_cg_range[zonei];
8224 cg1 = zone_cg_range[zonei+1];
8228 /* Look only at the cg's received in the previous grid pulse
8230 cg1 = zone_cg_range[nzone+zone+1];
8231 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8234 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8235 for (th = 0; th < comm->nth; th++)
8237 gmx_domdec_ind_t *ind_p;
8238 int **ibuf_p, *ibuf_nalloc_p;
8240 int *nsend_p, *nat_p;
8246 /* Thread 0 writes in the comm buffers */
8248 ibuf_p = &comm->buf_int;
8249 ibuf_nalloc_p = &comm->nalloc_int;
8250 vbuf_p = &comm->vbuf;
8253 nsend_zone_p = &ind->nsend[zone];
8257 /* Other threads write into temp buffers */
8258 ind_p = &comm->dth[th].ind;
8259 ibuf_p = &comm->dth[th].ibuf;
8260 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8261 vbuf_p = &comm->dth[th].vbuf;
8262 nsend_p = &comm->dth[th].nsend;
8263 nat_p = &comm->dth[th].nat;
8264 nsend_zone_p = &comm->dth[th].nsend_zone;
8266 comm->dth[th].nsend = 0;
8267 comm->dth[th].nat = 0;
8268 comm->dth[th].nsend_zone = 0;
8278 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8279 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8282 /* Get the cg's for this pulse in this zone */
8283 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8285 dim, dim_ind, dim0, dim1, dim2,
8288 normal, skew_fac2_d, skew_fac_01,
8289 v_d, v_0, v_1, &corners, sf2_round,
8290 bDistBonded, bBondComm,
8294 ibuf_p, ibuf_nalloc_p,
8300 /* Append data of threads>=1 to the communication buffers */
8301 for (th = 1; th < comm->nth; th++)
8303 dd_comm_setup_work_t *dth;
8306 dth = &comm->dth[th];
8308 ns1 = nsend + dth->nsend_zone;
8309 if (ns1 > ind->nalloc)
8311 ind->nalloc = over_alloc_dd(ns1);
8312 srenew(ind->index, ind->nalloc);
8314 if (ns1 > comm->nalloc_int)
8316 comm->nalloc_int = over_alloc_dd(ns1);
8317 srenew(comm->buf_int, comm->nalloc_int);
8319 if (ns1 > comm->vbuf.nalloc)
8321 comm->vbuf.nalloc = over_alloc_dd(ns1);
8322 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8325 for (i = 0; i < dth->nsend_zone; i++)
8327 ind->index[nsend] = dth->ind.index[i];
8328 comm->buf_int[nsend] = dth->ibuf[i];
8329 copy_rvec(dth->vbuf.v[i],
8330 comm->vbuf.v[nsend]);
8334 ind->nsend[zone] += dth->nsend_zone;
8337 /* Clear the counts in case we do not have pbc */
8338 for (zone = nzone_send; zone < nzone; zone++)
8340 ind->nsend[zone] = 0;
8342 ind->nsend[nzone] = nsend;
8343 ind->nsend[nzone+1] = nat;
8344 /* Communicate the number of cg's and atoms to receive */
8345 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8346 ind->nsend, nzone+2,
8347 ind->nrecv, nzone+2);
8349 /* The rvec buffer is also required for atom buffers of size nsend
8350 * in dd_move_x and dd_move_f.
8352 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8356 /* We can receive in place if only the last zone is not empty */
8357 for (zone = 0; zone < nzone-1; zone++)
8359 if (ind->nrecv[zone] > 0)
8361 cd->bInPlace = FALSE;
8366 /* The int buffer is only required here for the cg indices */
8367 if (ind->nrecv[nzone] > comm->nalloc_int2)
8369 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8370 srenew(comm->buf_int2, comm->nalloc_int2);
8372 /* The rvec buffer is also required for atom buffers
8373 * of size nrecv in dd_move_x and dd_move_f.
8375 i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8376 vec_rvec_check_alloc(&comm->vbuf2, i);
8380 /* Make space for the global cg indices */
8381 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8382 || dd->cg_nalloc == 0)
8384 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8385 srenew(index_gl, dd->cg_nalloc);
8386 srenew(cgindex, dd->cg_nalloc+1);
8388 /* Communicate the global cg indices */
8391 recv_i = index_gl + pos_cg;
8395 recv_i = comm->buf_int2;
8397 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8398 comm->buf_int, nsend,
8399 recv_i, ind->nrecv[nzone]);
8401 /* Make space for cg_cm */
8402 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8403 if (fr->cutoff_scheme == ecutsGROUP)
8411 /* Communicate cg_cm */
8414 recv_vr = cg_cm + pos_cg;
8418 recv_vr = comm->vbuf2.v;
8420 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8421 comm->vbuf.v, nsend,
8422 recv_vr, ind->nrecv[nzone]);
8424 /* Make the charge group index */
8427 zone = (p == 0 ? 0 : nzone - 1);
8428 while (zone < nzone)
8430 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8432 cg_gl = index_gl[pos_cg];
8433 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8434 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8435 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8438 /* Update the charge group presence,
8439 * so we can use it in the next pass of the loop.
8441 comm->bLocalCG[cg_gl] = TRUE;
8447 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8450 zone_cg_range[nzone+zone] = pos_cg;
8455 /* This part of the code is never executed with bBondComm. */
8456 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8457 index_gl, recv_i, cg_cm, recv_vr,
8458 cgindex, fr->cginfo_mb, fr->cginfo);
8459 pos_cg += ind->nrecv[nzone];
8461 nat_tot += ind->nrecv[nzone+1];
8465 /* Store the atom block for easy copying of communication buffers */
8466 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8470 dd->index_gl = index_gl;
8471 dd->cgindex = cgindex;
8473 dd->ncg_tot = zone_cg_range[zones->n];
8474 dd->nat_tot = nat_tot;
8475 comm->nat[ddnatHOME] = dd->nat_home;
8476 for (i = ddnatZONE; i < ddnatNR; i++)
8478 comm->nat[i] = dd->nat_tot;
8483 /* We don't need to update cginfo, since that was alrady done above.
8484 * So we pass NULL for the forcerec.
8486 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8487 NULL, comm->bLocalCG);
8492 fprintf(debug, "Finished setting up DD communication, zones:");
8493 for (c = 0; c < zones->n; c++)
8495 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8497 fprintf(debug, "\n");
8501 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8505 for (c = 0; c < zones->nizone; c++)
8507 zones->izone[c].cg1 = zones->cg_range[c+1];
8508 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8509 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8513 static void set_zones_size(gmx_domdec_t *dd,
8514 matrix box, const gmx_ddbox_t *ddbox,
8515 int zone_start, int zone_end)
8517 gmx_domdec_comm_t *comm;
8518 gmx_domdec_zones_t *zones;
8520 int z, zi, zj0, zj1, d, dim;
8523 real size_j, add_tric;
8528 zones = &comm->zones;
8530 /* Do we need to determine extra distances for multi-body bondeds? */
8531 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8533 for (z = zone_start; z < zone_end; z++)
8535 /* Copy cell limits to zone limits.
8536 * Valid for non-DD dims and non-shifted dims.
8538 copy_rvec(comm->cell_x0, zones->size[z].x0);
8539 copy_rvec(comm->cell_x1, zones->size[z].x1);
8542 for (d = 0; d < dd->ndim; d++)
8546 for (z = 0; z < zones->n; z++)
8548 /* With a staggered grid we have different sizes
8549 * for non-shifted dimensions.
8551 if (dd->bGridJump && zones->shift[z][dim] == 0)
8555 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8556 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8560 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8561 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8567 rcmbs = comm->cutoff_mbody;
8568 if (ddbox->tric_dir[dim])
8570 rcs /= ddbox->skew_fac[dim];
8571 rcmbs /= ddbox->skew_fac[dim];
8574 /* Set the lower limit for the shifted zone dimensions */
8575 for (z = zone_start; z < zone_end; z++)
8577 if (zones->shift[z][dim] > 0)
8580 if (!dd->bGridJump || d == 0)
8582 zones->size[z].x0[dim] = comm->cell_x1[dim];
8583 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8587 /* Here we take the lower limit of the zone from
8588 * the lowest domain of the zone below.
8592 zones->size[z].x0[dim] =
8593 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8599 zones->size[z].x0[dim] =
8600 zones->size[zone_perm[2][z-4]].x0[dim];
8604 zones->size[z].x0[dim] =
8605 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8608 /* A temporary limit, is updated below */
8609 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8613 for (zi = 0; zi < zones->nizone; zi++)
8615 if (zones->shift[zi][dim] == 0)
8617 /* This takes the whole zone into account.
8618 * With multiple pulses this will lead
8619 * to a larger zone then strictly necessary.
8621 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8622 zones->size[zi].x1[dim]+rcmbs);
8630 /* Loop over the i-zones to set the upper limit of each
8633 for (zi = 0; zi < zones->nizone; zi++)
8635 if (zones->shift[zi][dim] == 0)
8637 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8639 if (zones->shift[z][dim] > 0)
8641 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8642 zones->size[zi].x1[dim]+rcs);
8649 for (z = zone_start; z < zone_end; z++)
8651 /* Initialization only required to keep the compiler happy */
8652 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8655 /* To determine the bounding box for a zone we need to find
8656 * the extreme corners of 4, 2 or 1 corners.
8658 nc = 1 << (ddbox->npbcdim - 1);
8660 for (c = 0; c < nc; c++)
8662 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8666 corner[YY] = zones->size[z].x0[YY];
8670 corner[YY] = zones->size[z].x1[YY];
8674 corner[ZZ] = zones->size[z].x0[ZZ];
8678 corner[ZZ] = zones->size[z].x1[ZZ];
8680 if (dd->ndim == 1 && box[ZZ][YY] != 0)
8682 /* With 1D domain decomposition the cg's are not in
8683 * the triclinic box, but triclinic x-y and rectangular y-z.
8684 * Shift y back, so it will later end up at 0.
8686 corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
8688 /* Apply the triclinic couplings */
8689 assert(ddbox->npbcdim <= DIM);
8690 for (i = YY; i < ddbox->npbcdim; i++)
8692 for (j = XX; j < i; j++)
8694 corner[j] += corner[i]*box[i][j]/box[i][i];
8699 copy_rvec(corner, corner_min);
8700 copy_rvec(corner, corner_max);
8704 for (i = 0; i < DIM; i++)
8706 corner_min[i] = min(corner_min[i], corner[i]);
8707 corner_max[i] = max(corner_max[i], corner[i]);
8711 /* Copy the extreme cornes without offset along x */
8712 for (i = 0; i < DIM; i++)
8714 zones->size[z].bb_x0[i] = corner_min[i];
8715 zones->size[z].bb_x1[i] = corner_max[i];
8717 /* Add the offset along x */
8718 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8719 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8722 if (zone_start == 0)
8725 for (dim = 0; dim < DIM; dim++)
8727 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8729 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
8734 for (z = zone_start; z < zone_end; z++)
8736 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8738 zones->size[z].x0[XX], zones->size[z].x1[XX],
8739 zones->size[z].x0[YY], zones->size[z].x1[YY],
8740 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8741 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8743 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8744 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8745 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8750 static int comp_cgsort(const void *a, const void *b)
8754 gmx_cgsort_t *cga, *cgb;
8755 cga = (gmx_cgsort_t *)a;
8756 cgb = (gmx_cgsort_t *)b;
8758 comp = cga->nsc - cgb->nsc;
8761 comp = cga->ind_gl - cgb->ind_gl;
8767 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8772 /* Order the data */
8773 for (i = 0; i < n; i++)
8775 buf[i] = a[sort[i].ind];
8778 /* Copy back to the original array */
8779 for (i = 0; i < n; i++)
8785 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8790 /* Order the data */
8791 for (i = 0; i < n; i++)
8793 copy_rvec(v[sort[i].ind], buf[i]);
8796 /* Copy back to the original array */
8797 for (i = 0; i < n; i++)
8799 copy_rvec(buf[i], v[i]);
8803 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8806 int a, atot, cg, cg0, cg1, i;
8808 if (cgindex == NULL)
8810 /* Avoid the useless loop of the atoms within a cg */
8811 order_vec_cg(ncg, sort, v, buf);
8816 /* Order the data */
8818 for (cg = 0; cg < ncg; cg++)
8820 cg0 = cgindex[sort[cg].ind];
8821 cg1 = cgindex[sort[cg].ind+1];
8822 for (i = cg0; i < cg1; i++)
8824 copy_rvec(v[i], buf[a]);
8830 /* Copy back to the original array */
8831 for (a = 0; a < atot; a++)
8833 copy_rvec(buf[a], v[a]);
8837 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8838 int nsort_new, gmx_cgsort_t *sort_new,
8839 gmx_cgsort_t *sort1)
8843 /* The new indices are not very ordered, so we qsort them */
8844 gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8846 /* sort2 is already ordered, so now we can merge the two arrays */
8850 while (i2 < nsort2 || i_new < nsort_new)
8854 sort1[i1++] = sort_new[i_new++];
8856 else if (i_new == nsort_new)
8858 sort1[i1++] = sort2[i2++];
8860 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8861 (sort2[i2].nsc == sort_new[i_new].nsc &&
8862 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8864 sort1[i1++] = sort2[i2++];
8868 sort1[i1++] = sort_new[i_new++];
8873 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8875 gmx_domdec_sort_t *sort;
8876 gmx_cgsort_t *cgsort, *sort_i;
8877 int ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
8878 int sort_last, sort_skip;
8880 sort = dd->comm->sort;
8882 a = fr->ns.grid->cell_index;
8884 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns.grid->ncells;
8886 if (ncg_home_old >= 0)
8888 /* The charge groups that remained in the same ns grid cell
8889 * are completely ordered. So we can sort efficiently by sorting
8890 * the charge groups that did move into the stationary list.
8895 for (i = 0; i < dd->ncg_home; i++)
8897 /* Check if this cg did not move to another node */
8900 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8902 /* This cg is new on this node or moved ns grid cell */
8903 if (nsort_new >= sort->sort_new_nalloc)
8905 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8906 srenew(sort->sort_new, sort->sort_new_nalloc);
8908 sort_i = &(sort->sort_new[nsort_new++]);
8912 /* This cg did not move */
8913 sort_i = &(sort->sort2[nsort2++]);
8915 /* Sort on the ns grid cell indices
8916 * and the global topology index.
8917 * index_gl is irrelevant with cell ns,
8918 * but we set it here anyhow to avoid a conditional.
8921 sort_i->ind_gl = dd->index_gl[i];
8928 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8931 /* Sort efficiently */
8932 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8937 cgsort = sort->sort;
8939 for (i = 0; i < dd->ncg_home; i++)
8941 /* Sort on the ns grid cell indices
8942 * and the global topology index
8944 cgsort[i].nsc = a[i];
8945 cgsort[i].ind_gl = dd->index_gl[i];
8947 if (cgsort[i].nsc < moved)
8954 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
8956 /* Determine the order of the charge groups using qsort */
8957 gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
8963 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
8966 int ncg_new, i, *a, na;
8968 sort = dd->comm->sort->sort;
8970 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
8973 for (i = 0; i < na; i++)
8977 sort[ncg_new].ind = a[i];
8985 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
8988 gmx_domdec_sort_t *sort;
8989 gmx_cgsort_t *cgsort, *sort_i;
8991 int ncg_new, i, *ibuf, cgsize;
8994 sort = dd->comm->sort;
8996 if (dd->ncg_home > sort->sort_nalloc)
8998 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
8999 srenew(sort->sort, sort->sort_nalloc);
9000 srenew(sort->sort2, sort->sort_nalloc);
9002 cgsort = sort->sort;
9004 switch (fr->cutoff_scheme)
9007 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
9010 ncg_new = dd_sort_order_nbnxn(dd, fr);
9013 gmx_incons("unimplemented");
9017 /* We alloc with the old size, since cgindex is still old */
9018 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9019 vbuf = dd->comm->vbuf.v;
9023 cgindex = dd->cgindex;
9030 /* Remove the charge groups which are no longer at home here */
9031 dd->ncg_home = ncg_new;
9034 fprintf(debug, "Set the new home charge group count to %d\n",
9038 /* Reorder the state */
9039 for (i = 0; i < estNR; i++)
9041 if (EST_DISTR(i) && (state->flags & (1<<i)))
9046 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
9049 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
9052 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->sd_X, vbuf);
9055 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
9059 case estDISRE_INITF:
9060 case estDISRE_RM3TAV:
9061 case estORIRE_INITF:
9063 /* No ordering required */
9066 gmx_incons("Unknown state entry encountered in dd_sort_state");
9071 if (fr->cutoff_scheme == ecutsGROUP)
9074 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9077 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9079 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9080 srenew(sort->ibuf, sort->ibuf_nalloc);
9083 /* Reorder the global cg index */
9084 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9085 /* Reorder the cginfo */
9086 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9087 /* Rebuild the local cg index */
9091 for (i = 0; i < dd->ncg_home; i++)
9093 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9094 ibuf[i+1] = ibuf[i] + cgsize;
9096 for (i = 0; i < dd->ncg_home+1; i++)
9098 dd->cgindex[i] = ibuf[i];
9103 for (i = 0; i < dd->ncg_home+1; i++)
9108 /* Set the home atom number */
9109 dd->nat_home = dd->cgindex[dd->ncg_home];
9111 if (fr->cutoff_scheme == ecutsVERLET)
9113 /* The atoms are now exactly in grid order, update the grid order */
9114 nbnxn_set_atomorder(fr->nbv->nbs);
9118 /* Copy the sorted ns cell indices back to the ns grid struct */
9119 for (i = 0; i < dd->ncg_home; i++)
9121 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
9123 fr->ns.grid->nr = dd->ncg_home;
9127 static void add_dd_statistics(gmx_domdec_t *dd)
9129 gmx_domdec_comm_t *comm;
9134 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9136 comm->sum_nat[ddnat-ddnatZONE] +=
9137 comm->nat[ddnat] - comm->nat[ddnat-1];
9142 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9144 gmx_domdec_comm_t *comm;
9149 /* Reset all the statistics and counters for total run counting */
9150 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9152 comm->sum_nat[ddnat-ddnatZONE] = 0;
9156 comm->load_step = 0;
9159 clear_ivec(comm->load_lim);
9164 void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
9166 gmx_domdec_comm_t *comm;
9170 comm = cr->dd->comm;
9172 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9179 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9181 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9183 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9188 " av. #atoms communicated per step for force: %d x %.1f\n",
9192 if (cr->dd->vsite_comm)
9195 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9196 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9201 if (cr->dd->constraint_comm)
9204 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9205 1 + ir->nLincsIter, av);
9209 gmx_incons(" Unknown type for DD statistics");
9212 fprintf(fplog, "\n");
9214 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9216 print_dd_load_av(fplog, cr->dd);
9220 void dd_partition_system(FILE *fplog,
9223 gmx_bool bMasterState,
9225 t_state *state_global,
9226 gmx_mtop_t *top_global,
9228 t_state *state_local,
9231 gmx_localtop_t *top_local,
9234 gmx_shellfc_t shellfc,
9235 gmx_constr_t constr,
9237 gmx_wallcycle_t wcycle,
9241 gmx_domdec_comm_t *comm;
9242 gmx_ddbox_t ddbox = {0};
9244 gmx_int64_t step_pcoupl;
9245 rvec cell_ns_x0, cell_ns_x1;
9246 int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9247 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
9248 gmx_bool bRedist, bSortCG, bResortAll;
9249 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9256 bBoxChanged = (bMasterState || DEFORM(*ir));
9257 if (ir->epc != epcNO)
9259 /* With nstpcouple > 1 pressure coupling happens.
9260 * one step after calculating the pressure.
9261 * Box scaling happens at the end of the MD step,
9262 * after the DD partitioning.
9263 * We therefore have to do DLB in the first partitioning
9264 * after an MD step where P-coupling occured.
9265 * We need to determine the last step in which p-coupling occurred.
9266 * MRS -- need to validate this for vv?
9271 step_pcoupl = step - 1;
9275 step_pcoupl = ((step - 1)/n)*n + 1;
9277 if (step_pcoupl >= comm->partition_step)
9283 bNStGlobalComm = (step % nstglobalcomm == 0);
9285 if (!comm->bDynLoadBal)
9291 /* Should we do dynamic load balacing this step?
9292 * Since it requires (possibly expensive) global communication,
9293 * we might want to do DLB less frequently.
9295 if (bBoxChanged || ir->epc != epcNO)
9297 bDoDLB = bBoxChanged;
9301 bDoDLB = bNStGlobalComm;
9305 /* Check if we have recorded loads on the nodes */
9306 if (comm->bRecordLoad && dd_load_count(comm))
9308 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
9310 /* Check if we should use DLB at the second partitioning
9311 * and every 100 partitionings,
9312 * so the extra communication cost is negligible.
9314 n = max(100, nstglobalcomm);
9315 bCheckDLB = (comm->n_load_collect == 0 ||
9316 comm->n_load_have % n == n-1);
9323 /* Print load every nstlog, first and last step to the log file */
9324 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9325 comm->n_load_collect == 0 ||
9327 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9329 /* Avoid extra communication due to verbose screen output
9330 * when nstglobalcomm is set.
9332 if (bDoDLB || bLogLoad || bCheckDLB ||
9333 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9335 get_load_distribution(dd, wcycle);
9340 dd_print_load(fplog, dd, step-1);
9344 dd_print_load_verbose(dd);
9347 comm->n_load_collect++;
9351 /* Since the timings are node dependent, the master decides */
9355 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS);
9358 fprintf(debug, "step %s, imb loss %f\n",
9359 gmx_step_str(step, sbuf),
9360 dd_force_imb_perf_loss(dd));
9363 dd_bcast(dd, sizeof(bTurnOnDLB), &bTurnOnDLB);
9366 turn_on_dlb(fplog, cr, step);
9371 comm->n_load_have++;
9374 cgs_gl = &comm->cgs_gl;
9379 /* Clear the old state */
9380 clear_dd_indices(dd, 0, 0);
9383 set_ddbox(dd, bMasterState, cr, ir, state_global->box,
9384 TRUE, cgs_gl, state_global->x, &ddbox);
9386 get_cg_distribution(fplog, step, dd, cgs_gl,
9387 state_global->box, &ddbox, state_global->x);
9389 dd_distribute_state(dd, cgs_gl,
9390 state_global, state_local, f);
9392 dd_make_local_cgs(dd, &top_local->cgs);
9394 /* Ensure that we have space for the new distribution */
9395 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9397 if (fr->cutoff_scheme == ecutsGROUP)
9399 calc_cgcm(fplog, 0, dd->ncg_home,
9400 &top_local->cgs, state_local->x, fr->cg_cm);
9403 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9405 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9407 else if (state_local->ddp_count != dd->ddp_count)
9409 if (state_local->ddp_count > dd->ddp_count)
9411 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9414 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9416 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9419 /* Clear the old state */
9420 clear_dd_indices(dd, 0, 0);
9422 /* Build the new indices */
9423 rebuild_cgindex(dd, cgs_gl->index, state_local);
9424 make_dd_indices(dd, cgs_gl->index, 0);
9425 ncgindex_set = dd->ncg_home;
9427 if (fr->cutoff_scheme == ecutsGROUP)
9429 /* Redetermine the cg COMs */
9430 calc_cgcm(fplog, 0, dd->ncg_home,
9431 &top_local->cgs, state_local->x, fr->cg_cm);
9434 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9436 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9438 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9439 TRUE, &top_local->cgs, state_local->x, &ddbox);
9441 bRedist = comm->bDynLoadBal;
9445 /* We have the full state, only redistribute the cgs */
9447 /* Clear the non-home indices */
9448 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9451 /* Avoid global communication for dim's without pbc and -gcom */
9452 if (!bNStGlobalComm)
9454 copy_rvec(comm->box0, ddbox.box0 );
9455 copy_rvec(comm->box_size, ddbox.box_size);
9457 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9458 bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
9463 /* For dim's without pbc and -gcom */
9464 copy_rvec(ddbox.box0, comm->box0 );
9465 copy_rvec(ddbox.box_size, comm->box_size);
9467 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9470 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9472 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9475 /* Check if we should sort the charge groups */
9476 if (comm->nstSortCG > 0)
9478 bSortCG = (bMasterState ||
9479 (bRedist && (step % comm->nstSortCG == 0)));
9486 ncg_home_old = dd->ncg_home;
9491 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9493 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9495 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9497 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9500 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9502 &comm->cell_x0, &comm->cell_x1,
9503 dd->ncg_home, fr->cg_cm,
9504 cell_ns_x0, cell_ns_x1, &grid_density);
9508 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9511 switch (fr->cutoff_scheme)
9514 copy_ivec(fr->ns.grid->n, ncells_old);
9515 grid_first(fplog, fr->ns.grid, dd, &ddbox,
9516 state_local->box, cell_ns_x0, cell_ns_x1,
9517 fr->rlistlong, grid_density);
9520 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9523 gmx_incons("unimplemented");
9525 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9526 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9530 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9532 /* Sort the state on charge group position.
9533 * This enables exact restarts from this step.
9534 * It also improves performance by about 15% with larger numbers
9535 * of atoms per node.
9538 /* Fill the ns grid with the home cell,
9539 * so we can sort with the indices.
9541 set_zones_ncg_home(dd);
9543 switch (fr->cutoff_scheme)
9546 set_zones_size(dd, state_local->box, &ddbox, 0, 1);
9548 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9550 comm->zones.size[0].bb_x0,
9551 comm->zones.size[0].bb_x1,
9553 comm->zones.dens_zone0,
9556 ncg_moved, bRedist ? comm->moved : NULL,
9557 fr->nbv->grp[eintLocal].kernel_type,
9558 fr->nbv->grp[eintLocal].nbat);
9560 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9563 fill_grid(&comm->zones, fr->ns.grid, dd->ncg_home,
9564 0, dd->ncg_home, fr->cg_cm);
9566 copy_ivec(fr->ns.grid->n, ncells_new);
9569 gmx_incons("unimplemented");
9572 bResortAll = bMasterState;
9574 /* Check if we can user the old order and ns grid cell indices
9575 * of the charge groups to sort the charge groups efficiently.
9577 if (ncells_new[XX] != ncells_old[XX] ||
9578 ncells_new[YY] != ncells_old[YY] ||
9579 ncells_new[ZZ] != ncells_old[ZZ])
9586 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9587 gmx_step_str(step, sbuf), dd->ncg_home);
9589 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9590 bResortAll ? -1 : ncg_home_old);
9591 /* Rebuild all the indices */
9592 ga2la_clear(dd->ga2la);
9595 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9598 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9600 /* Setup up the communication and communicate the coordinates */
9601 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9603 /* Set the indices */
9604 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9606 /* Set the charge group boundaries for neighbor searching */
9607 set_cg_boundaries(&comm->zones);
9609 if (fr->cutoff_scheme == ecutsVERLET)
9611 set_zones_size(dd, state_local->box, &ddbox,
9612 bSortCG ? 1 : 0, comm->zones.n);
9615 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9618 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9619 -1,state_local->x,state_local->box);
9622 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9624 /* Extract a local topology from the global topology */
9625 for (i = 0; i < dd->ndim; i++)
9627 np[dd->dim[i]] = comm->cd[i].np;
9629 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9630 comm->cellsize_min, np,
9632 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
9633 vsite, top_global, top_local);
9635 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9637 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9639 /* Set up the special atom communication */
9640 n = comm->nat[ddnatZONE];
9641 for (i = ddnatZONE+1; i < ddnatNR; i++)
9646 if (vsite && vsite->n_intercg_vsite)
9648 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9652 if (dd->bInterCGcons || dd->bInterCGsettles)
9654 /* Only for inter-cg constraints we need special code */
9655 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9656 constr, ir->nProjOrder,
9657 top_local->idef.il);
9661 gmx_incons("Unknown special atom type setup");
9666 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9668 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9670 /* Make space for the extra coordinates for virtual site
9671 * or constraint communication.
9673 state_local->natoms = comm->nat[ddnatNR-1];
9674 if (state_local->natoms > state_local->nalloc)
9676 dd_realloc_state(state_local, f, state_local->natoms);
9679 if (fr->bF_NoVirSum)
9681 if (vsite && vsite->n_intercg_vsite)
9683 nat_f_novirsum = comm->nat[ddnatVSITE];
9687 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9689 nat_f_novirsum = dd->nat_tot;
9693 nat_f_novirsum = dd->nat_home;
9702 /* Set the number of atoms required for the force calculation.
9703 * Forces need to be constrained when using a twin-range setup
9704 * or with energy minimization. For simple simulations we could
9705 * avoid some allocation, zeroing and copying, but this is
9706 * probably not worth the complications ande checking.
9708 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9709 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9711 /* We make the all mdatoms up to nat_tot_con.
9712 * We could save some work by only setting invmass
9713 * between nat_tot and nat_tot_con.
9715 /* This call also sets the new number of home particles to dd->nat_home */
9716 atoms2md(top_global, ir,
9717 comm->nat[ddnatCON], dd->gatindex, dd->nat_home, mdatoms);
9719 /* Now we have the charges we can sort the FE interactions */
9720 dd_sort_local_top(dd, mdatoms, top_local);
9724 /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
9725 split_vsites_over_threads(top_local->idef.il, mdatoms, FALSE, vsite);
9730 /* Make the local shell stuff, currently no communication is done */
9731 make_local_shells(cr, mdatoms, shellfc);
9734 if (ir->implicit_solvent)
9736 make_local_gb(cr, fr->born, ir->gb_algorithm);
9739 setup_bonded_threading(fr, &top_local->idef);
9741 if (!(cr->duty & DUTY_PME))
9743 /* Send the charges and/or c6/sigmas to our PME only node */
9744 gmx_pme_send_parameters(cr, mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
9745 mdatoms->chargeA, mdatoms->chargeB,
9746 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
9747 mdatoms->sigmaA, mdatoms->sigmaB,
9748 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9753 set_constraints(constr, top_local, ir, mdatoms, cr);
9756 if (ir->ePull != epullNO)
9758 /* Update the local pull groups */
9759 dd_make_local_pull_groups(dd, ir->pull, mdatoms);
9764 /* Update the local rotation groups */
9765 dd_make_local_rotation_groups(dd, ir->rot);
9768 if (ir->eSwapCoords != eswapNO)
9770 /* Update the local groups needed for ion swapping */
9771 dd_make_local_swap_groups(dd, ir->swap);
9774 add_dd_statistics(dd);
9776 /* Make sure we only count the cycles for this DD partitioning */
9777 clear_dd_cycle_counts(dd);
9779 /* Because the order of the atoms might have changed since
9780 * the last vsite construction, we need to communicate the constructing
9781 * atom coordinates again (for spreading the forces this MD step).
9783 dd_move_x_vsites(dd, state_local->box, state_local->x);
9785 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9787 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9789 dd_move_x(dd, state_local->box, state_local->x);
9790 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9791 -1, state_local->x, state_local->box);
9794 /* Store the partitioning step */
9795 comm->partition_step = step;
9797 /* Increase the DD partitioning counter */
9799 /* The state currently matches this DD partitioning count, store it */
9800 state_local->ddp_count = dd->ddp_count;
9803 /* The DD master node knows the complete cg distribution,
9804 * store the count so we can possibly skip the cg info communication.
9806 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9809 if (comm->DD_debug > 0)
9811 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9812 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9813 "after partitioning");