2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 #include "gromacs/legacyheaders/domdec.h"
51 #include "gromacs/bonded/bonded.h"
52 #include "gromacs/fileio/gmxfio.h"
53 #include "gromacs/fileio/pdbio.h"
54 #include "gromacs/imd/imd.h"
55 #include "gromacs/legacyheaders/bonded-threading.h"
56 #include "gromacs/legacyheaders/chargegroup.h"
57 #include "gromacs/legacyheaders/constr.h"
58 #include "gromacs/legacyheaders/domdec_network.h"
59 #include "gromacs/legacyheaders/force.h"
60 #include "gromacs/legacyheaders/gmx_ga2la.h"
61 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
62 #include "gromacs/legacyheaders/gpu_utils.h"
63 #include "gromacs/legacyheaders/macros.h"
64 #include "gromacs/legacyheaders/mdatoms.h"
65 #include "gromacs/legacyheaders/mdrun.h"
66 #include "gromacs/legacyheaders/names.h"
67 #include "gromacs/legacyheaders/network.h"
68 #include "gromacs/legacyheaders/nrnb.h"
69 #include "gromacs/legacyheaders/nsgrid.h"
70 #include "gromacs/legacyheaders/pme.h"
71 #include "gromacs/legacyheaders/shellfc.h"
72 #include "gromacs/legacyheaders/typedefs.h"
73 #include "gromacs/math/vec.h"
74 #include "gromacs/mdlib/nb_verlet.h"
75 #include "gromacs/mdlib/nbnxn_search.h"
76 #include "gromacs/pbcutil/ishift.h"
77 #include "gromacs/pbcutil/pbc.h"
78 #include "gromacs/pulling/pull.h"
79 #include "gromacs/pulling/pull_rotation.h"
80 #include "gromacs/swap/swapcoords.h"
81 #include "gromacs/timing/wallcycle.h"
82 #include "gromacs/topology/mtop_util.h"
83 #include "gromacs/utility/basenetwork.h"
84 #include "gromacs/utility/fatalerror.h"
85 #include "gromacs/utility/futil.h"
86 #include "gromacs/utility/gmxmpi.h"
87 #include "gromacs/utility/qsort_threadsafe.h"
88 #include "gromacs/utility/smalloc.h"
90 #define DDRANK(dd, rank) (rank)
91 #define DDMASTERRANK(dd) (dd->masterrank)
93 typedef struct gmx_domdec_master
95 /* The cell boundaries */
97 /* The global charge group division */
98 int *ncg; /* Number of home charge groups for each node */
99 int *index; /* Index of nnodes+1 into cg */
100 int *cg; /* Global charge group index */
101 int *nat; /* Number of home atoms for each node. */
102 int *ibuf; /* Buffer for communication */
103 rvec *vbuf; /* Buffer for state scattering and gathering */
104 } gmx_domdec_master_t;
108 /* The numbers of charge groups to send and receive for each cell
109 * that requires communication, the last entry contains the total
110 * number of atoms that needs to be communicated.
112 int nsend[DD_MAXIZONE+2];
113 int nrecv[DD_MAXIZONE+2];
114 /* The charge groups to send */
117 /* The atom range for non-in-place communication */
118 int cell2at0[DD_MAXIZONE];
119 int cell2at1[DD_MAXIZONE];
124 int np; /* Number of grid pulses in this dimension */
125 int np_dlb; /* For dlb, for use with edlbAUTO */
126 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
128 gmx_bool bInPlace; /* Can we communicate in place? */
129 } gmx_domdec_comm_dim_t;
133 gmx_bool *bCellMin; /* Temp. var.: is this cell size at the limit */
134 real *cell_f; /* State var.: cell boundaries, box relative */
135 real *old_cell_f; /* Temp. var.: old cell size */
136 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
137 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
138 real *bound_min; /* Temp. var.: lower limit for cell boundary */
139 real *bound_max; /* Temp. var.: upper limit for cell boundary */
140 gmx_bool bLimited; /* State var.: is DLB limited in this dim and row */
141 real *buf_ncd; /* Temp. var. */
144 #define DD_NLOAD_MAX 9
146 /* Here floats are accurate enough, since these variables
147 * only influence the load balancing, not the actual MD results.
174 gmx_cgsort_t *sort_new;
186 /* This enum determines the order of the coordinates.
187 * ddnatHOME and ddnatZONE should be first and second,
188 * the others can be ordered as wanted.
191 ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR
195 edlbAUTO, edlbNO, edlbYES, edlbNR
197 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
201 int dim; /* The dimension */
202 gmx_bool dim_match; /* Tells if DD and PME dims match */
203 int nslab; /* The number of PME slabs in this dimension */
204 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
205 int *pp_min; /* The minimum pp node location, size nslab */
206 int *pp_max; /* The maximum pp node location,size nslab */
207 int maxshift; /* The maximum shift for coordinate redistribution in PME */
212 real min0; /* The minimum bottom of this zone */
213 real max1; /* The maximum top of this zone */
214 real min1; /* The minimum top of this zone */
215 real mch0; /* The maximum bottom communicaton height for this zone */
216 real mch1; /* The maximum top communicaton height for this zone */
217 real p1_0; /* The bottom value of the first cell in this zone */
218 real p1_1; /* The top value of the first cell in this zone */
223 gmx_domdec_ind_t ind;
230 } dd_comm_setup_work_t;
232 typedef struct gmx_domdec_comm
234 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
235 * unless stated otherwise.
238 /* The number of decomposition dimensions for PME, 0: no PME */
240 /* The number of nodes doing PME (PP/PME or only PME) */
244 /* The communication setup including the PME only nodes */
245 gmx_bool bCartesianPP_PME;
248 int *pmenodes; /* size npmenodes */
249 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
250 * but with bCartesianPP_PME */
251 gmx_ddpme_t ddpme[2];
253 /* The DD particle-particle nodes only */
254 gmx_bool bCartesianPP;
255 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
257 /* The global charge groups */
260 /* Should we sort the cgs */
262 gmx_domdec_sort_t *sort;
264 /* Are there charge groups? */
267 /* Are there bonded and multi-body interactions between charge groups? */
268 gmx_bool bInterCGBondeds;
269 gmx_bool bInterCGMultiBody;
271 /* Data for the optional bonded interaction atom communication range */
278 /* Are we actually using DLB? */
279 gmx_bool bDynLoadBal;
281 /* Cell sizes for static load balancing, first index cartesian */
284 /* The width of the communicated boundaries */
287 /* The minimum cell size (including triclinic correction) */
289 /* For dlb, for use with edlbAUTO */
290 rvec cellsize_min_dlb;
291 /* The lower limit for the DD cell size with DLB */
293 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
294 gmx_bool bVacDLBNoLimit;
296 /* With PME load balancing we set limits on DLB */
297 gmx_bool bPMELoadBalDLBLimits;
298 /* DLB needs to take into account that we want to allow this maximum
299 * cut-off (for PME load balancing), this could limit cell boundaries.
301 real PMELoadBal_max_cutoff;
303 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
305 /* box0 and box_size are required with dim's without pbc and -gcom */
309 /* The cell boundaries */
313 /* The old location of the cell boundaries, to check cg displacements */
317 /* The communication setup and charge group boundaries for the zones */
318 gmx_domdec_zones_t zones;
320 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
321 * cell boundaries of neighboring cells for dynamic load balancing.
323 gmx_ddzone_t zone_d1[2];
324 gmx_ddzone_t zone_d2[2][2];
326 /* The coordinate/force communication setup and indices */
327 gmx_domdec_comm_dim_t cd[DIM];
328 /* The maximum number of cells to communicate with in one dimension */
331 /* Which cg distribution is stored on the master node */
332 int master_cg_ddp_count;
334 /* The number of cg's received from the direct neighbors */
335 int zone_ncg1[DD_MAXZONE];
337 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
340 /* Array for signalling if atoms have moved to another domain */
344 /* Communication buffer for general use */
348 /* Communication buffer for general use */
351 /* Temporary storage for thread parallel communication setup */
353 dd_comm_setup_work_t *dth;
355 /* Communication buffers only used with multiple grid pulses */
360 /* Communication buffers for local redistribution */
362 int cggl_flag_nalloc[DIM*2];
364 int cgcm_state_nalloc[DIM*2];
366 /* Cell sizes for dynamic load balancing */
367 gmx_domdec_root_t **root;
371 real cell_f_max0[DIM];
372 real cell_f_min1[DIM];
374 /* Stuff for load communication */
375 gmx_bool bRecordLoad;
376 gmx_domdec_load_t *load;
377 int nrank_gpu_shared;
379 MPI_Comm *mpi_comm_load;
380 MPI_Comm mpi_comm_gpu_shared;
383 /* Maximum DLB scaling per load balancing step in percent */
387 float cycl[ddCyclNr];
388 int cycl_n[ddCyclNr];
389 float cycl_max[ddCyclNr];
390 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
394 /* Have often have did we have load measurements */
396 /* Have often have we collected the load measurements */
400 double sum_nat[ddnatNR-ddnatZONE];
410 /* The last partition step */
411 gmx_int64_t partition_step;
419 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
422 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
423 #define DD_FLAG_NRCG 65535
424 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
425 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
427 /* Zone permutation required to obtain consecutive charge groups
428 * for neighbor searching.
430 static const int zone_perm[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
432 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
433 * components see only j zones with that component 0.
436 /* The DD zone order */
437 static const ivec dd_zo[DD_MAXZONE] =
438 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
443 static const ivec dd_zp3[dd_zp3n] = {{0, 0, 8}, {1, 3, 6}, {2, 5, 6}, {3, 5, 7}};
448 static const ivec dd_zp2[dd_zp2n] = {{0, 0, 4}, {1, 3, 4}};
453 static const ivec dd_zp1[dd_zp1n] = {{0, 0, 2}};
455 /* Factors used to avoid problems due to rounding issues */
456 #define DD_CELL_MARGIN 1.0001
457 #define DD_CELL_MARGIN2 1.00005
458 /* Factor to account for pressure scaling during nstlist steps */
459 #define DD_PRES_SCALE_MARGIN 1.02
461 /* Turn on DLB when the load imbalance causes this amount of total loss.
462 * There is a bit of overhead with DLB and it's difficult to achieve
463 * a load imbalance of less than 2% with DLB.
465 #define DD_PERF_LOSS_DLB_ON 0.02
467 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
468 #define DD_PERF_LOSS_WARN 0.05
470 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
472 /* Use separate MPI send and receive commands
473 * when nnodes <= GMX_DD_NNODES_SENDRECV.
474 * This saves memory (and some copying for small nnodes).
475 * For high parallelization scatter and gather calls are used.
477 #define GMX_DD_NNODES_SENDRECV 4
481 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
483 static void index2xyz(ivec nc,int ind,ivec xyz)
485 xyz[XX] = ind % nc[XX];
486 xyz[YY] = (ind / nc[XX]) % nc[YY];
487 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
491 /* This order is required to minimize the coordinate communication in PME
492 * which uses decomposition in the x direction.
494 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
496 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
498 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
499 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
500 xyz[ZZ] = ind % nc[ZZ];
503 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
508 ddindex = dd_index(dd->nc, c);
509 if (dd->comm->bCartesianPP_PME)
511 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
513 else if (dd->comm->bCartesianPP)
516 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
527 static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox, t_inputrec *ir)
529 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
532 int ddglatnr(gmx_domdec_t *dd, int i)
542 if (i >= dd->comm->nat[ddnatNR-1])
544 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
546 atnr = dd->gatindex[i] + 1;
552 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
554 return &dd->comm->cgs_gl;
557 static void vec_rvec_init(vec_rvec_t *v)
563 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
567 v->nalloc = over_alloc_dd(n);
568 srenew(v->v, v->nalloc);
572 void dd_store_state(gmx_domdec_t *dd, t_state *state)
576 if (state->ddp_count != dd->ddp_count)
578 gmx_incons("The state does not the domain decomposition state");
581 state->ncg_gl = dd->ncg_home;
582 if (state->ncg_gl > state->cg_gl_nalloc)
584 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
585 srenew(state->cg_gl, state->cg_gl_nalloc);
587 for (i = 0; i < state->ncg_gl; i++)
589 state->cg_gl[i] = dd->index_gl[i];
592 state->ddp_count_cg_gl = dd->ddp_count;
595 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
597 return &dd->comm->zones;
600 void dd_get_ns_ranges(gmx_domdec_t *dd, int icg,
601 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
603 gmx_domdec_zones_t *zones;
606 zones = &dd->comm->zones;
609 while (icg >= zones->izone[izone].cg1)
618 else if (izone < zones->nizone)
620 *jcg0 = zones->izone[izone].jcg0;
624 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
625 icg, izone, zones->nizone);
628 *jcg1 = zones->izone[izone].jcg1;
630 for (d = 0; d < dd->ndim; d++)
633 shift0[dim] = zones->izone[izone].shift0[dim];
634 shift1[dim] = zones->izone[izone].shift1[dim];
635 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
637 /* A conservative approach, this can be optimized */
644 int dd_natoms_vsite(gmx_domdec_t *dd)
646 return dd->comm->nat[ddnatVSITE];
649 void dd_get_constraint_range(gmx_domdec_t *dd, int *at_start, int *at_end)
651 *at_start = dd->comm->nat[ddnatCON-1];
652 *at_end = dd->comm->nat[ddnatCON];
655 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
657 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
658 int *index, *cgindex;
659 gmx_domdec_comm_t *comm;
660 gmx_domdec_comm_dim_t *cd;
661 gmx_domdec_ind_t *ind;
662 rvec shift = {0, 0, 0}, *buf, *rbuf;
663 gmx_bool bPBC, bScrew;
667 cgindex = dd->cgindex;
672 nat_tot = dd->nat_home;
673 for (d = 0; d < dd->ndim; d++)
675 bPBC = (dd->ci[dd->dim[d]] == 0);
676 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
679 copy_rvec(box[dd->dim[d]], shift);
682 for (p = 0; p < cd->np; p++)
689 for (i = 0; i < ind->nsend[nzone]; i++)
691 at0 = cgindex[index[i]];
692 at1 = cgindex[index[i]+1];
693 for (j = at0; j < at1; j++)
695 copy_rvec(x[j], buf[n]);
702 for (i = 0; i < ind->nsend[nzone]; i++)
704 at0 = cgindex[index[i]];
705 at1 = cgindex[index[i]+1];
706 for (j = at0; j < at1; j++)
708 /* We need to shift the coordinates */
709 rvec_add(x[j], shift, buf[n]);
716 for (i = 0; i < ind->nsend[nzone]; i++)
718 at0 = cgindex[index[i]];
719 at1 = cgindex[index[i]+1];
720 for (j = at0; j < at1; j++)
723 buf[n][XX] = x[j][XX] + shift[XX];
725 * This operation requires a special shift force
726 * treatment, which is performed in calc_vir.
728 buf[n][YY] = box[YY][YY] - x[j][YY];
729 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
741 rbuf = comm->vbuf2.v;
743 /* Send and receive the coordinates */
744 dd_sendrecv_rvec(dd, d, dddirBackward,
745 buf, ind->nsend[nzone+1],
746 rbuf, ind->nrecv[nzone+1]);
750 for (zone = 0; zone < nzone; zone++)
752 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
754 copy_rvec(rbuf[j], x[i]);
759 nat_tot += ind->nrecv[nzone+1];
765 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
767 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
768 int *index, *cgindex;
769 gmx_domdec_comm_t *comm;
770 gmx_domdec_comm_dim_t *cd;
771 gmx_domdec_ind_t *ind;
775 gmx_bool bShiftForcesNeedPbc, bScrew;
779 cgindex = dd->cgindex;
783 nzone = comm->zones.n/2;
784 nat_tot = dd->nat_tot;
785 for (d = dd->ndim-1; d >= 0; d--)
787 /* Only forces in domains near the PBC boundaries need to
788 consider PBC in the treatment of fshift */
789 bShiftForcesNeedPbc = (dd->ci[dd->dim[d]] == 0);
790 bScrew = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
791 if (fshift == NULL && !bScrew)
793 bShiftForcesNeedPbc = FALSE;
795 /* Determine which shift vector we need */
801 for (p = cd->np-1; p >= 0; p--)
804 nat_tot -= ind->nrecv[nzone+1];
811 sbuf = comm->vbuf2.v;
813 for (zone = 0; zone < nzone; zone++)
815 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
817 copy_rvec(f[i], sbuf[j]);
822 /* Communicate the forces */
823 dd_sendrecv_rvec(dd, d, dddirForward,
824 sbuf, ind->nrecv[nzone+1],
825 buf, ind->nsend[nzone+1]);
827 /* Add the received forces */
829 if (!bShiftForcesNeedPbc)
831 for (i = 0; i < ind->nsend[nzone]; i++)
833 at0 = cgindex[index[i]];
834 at1 = cgindex[index[i]+1];
835 for (j = at0; j < at1; j++)
837 rvec_inc(f[j], buf[n]);
844 /* fshift should always be defined if this function is
845 * called when bShiftForcesNeedPbc is true */
846 assert(NULL != fshift);
847 for (i = 0; i < ind->nsend[nzone]; i++)
849 at0 = cgindex[index[i]];
850 at1 = cgindex[index[i]+1];
851 for (j = at0; j < at1; j++)
853 rvec_inc(f[j], buf[n]);
854 /* Add this force to the shift force */
855 rvec_inc(fshift[is], buf[n]);
862 for (i = 0; i < ind->nsend[nzone]; i++)
864 at0 = cgindex[index[i]];
865 at1 = cgindex[index[i]+1];
866 for (j = at0; j < at1; j++)
868 /* Rotate the force */
869 f[j][XX] += buf[n][XX];
870 f[j][YY] -= buf[n][YY];
871 f[j][ZZ] -= buf[n][ZZ];
874 /* Add this force to the shift force */
875 rvec_inc(fshift[is], buf[n]);
886 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
888 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
889 int *index, *cgindex;
890 gmx_domdec_comm_t *comm;
891 gmx_domdec_comm_dim_t *cd;
892 gmx_domdec_ind_t *ind;
897 cgindex = dd->cgindex;
899 buf = &comm->vbuf.v[0][0];
902 nat_tot = dd->nat_home;
903 for (d = 0; d < dd->ndim; d++)
906 for (p = 0; p < cd->np; p++)
911 for (i = 0; i < ind->nsend[nzone]; i++)
913 at0 = cgindex[index[i]];
914 at1 = cgindex[index[i]+1];
915 for (j = at0; j < at1; j++)
928 rbuf = &comm->vbuf2.v[0][0];
930 /* Send and receive the coordinates */
931 dd_sendrecv_real(dd, d, dddirBackward,
932 buf, ind->nsend[nzone+1],
933 rbuf, ind->nrecv[nzone+1]);
937 for (zone = 0; zone < nzone; zone++)
939 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
946 nat_tot += ind->nrecv[nzone+1];
952 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
954 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
955 int *index, *cgindex;
956 gmx_domdec_comm_t *comm;
957 gmx_domdec_comm_dim_t *cd;
958 gmx_domdec_ind_t *ind;
963 cgindex = dd->cgindex;
965 buf = &comm->vbuf.v[0][0];
967 nzone = comm->zones.n/2;
968 nat_tot = dd->nat_tot;
969 for (d = dd->ndim-1; d >= 0; d--)
972 for (p = cd->np-1; p >= 0; p--)
975 nat_tot -= ind->nrecv[nzone+1];
982 sbuf = &comm->vbuf2.v[0][0];
984 for (zone = 0; zone < nzone; zone++)
986 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
993 /* Communicate the forces */
994 dd_sendrecv_real(dd, d, dddirForward,
995 sbuf, ind->nrecv[nzone+1],
996 buf, ind->nsend[nzone+1]);
998 /* Add the received forces */
1000 for (i = 0; i < ind->nsend[nzone]; i++)
1002 at0 = cgindex[index[i]];
1003 at1 = cgindex[index[i]+1];
1004 for (j = at0; j < at1; j++)
1015 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
1017 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
1019 zone->min0, zone->max1,
1020 zone->mch0, zone->mch0,
1021 zone->p1_0, zone->p1_1);
1025 #define DDZONECOMM_MAXZONE 5
1026 #define DDZONECOMM_BUFSIZE 3
1028 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
1029 int ddimind, int direction,
1030 gmx_ddzone_t *buf_s, int n_s,
1031 gmx_ddzone_t *buf_r, int n_r)
1033 #define ZBS DDZONECOMM_BUFSIZE
1034 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
1035 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
1038 for (i = 0; i < n_s; i++)
1040 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
1041 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
1042 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
1043 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
1044 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
1045 vbuf_s[i*ZBS+1][2] = 0;
1046 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
1047 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
1048 vbuf_s[i*ZBS+2][2] = 0;
1051 dd_sendrecv_rvec(dd, ddimind, direction,
1055 for (i = 0; i < n_r; i++)
1057 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
1058 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
1059 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
1060 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
1061 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
1062 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
1063 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
1069 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
1070 rvec cell_ns_x0, rvec cell_ns_x1)
1072 int d, d1, dim, pos, buf_size, i, j, p, npulse, npulse_min;
1074 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
1075 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
1076 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
1077 rvec extr_s[2], extr_r[2];
1079 real dist_d, c = 0, det;
1080 gmx_domdec_comm_t *comm;
1081 gmx_bool bPBC, bUse;
1085 for (d = 1; d < dd->ndim; d++)
1088 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
1089 zp->min0 = cell_ns_x0[dim];
1090 zp->max1 = cell_ns_x1[dim];
1091 zp->min1 = cell_ns_x1[dim];
1092 zp->mch0 = cell_ns_x0[dim];
1093 zp->mch1 = cell_ns_x1[dim];
1094 zp->p1_0 = cell_ns_x0[dim];
1095 zp->p1_1 = cell_ns_x1[dim];
1098 for (d = dd->ndim-2; d >= 0; d--)
1101 bPBC = (dim < ddbox->npbcdim);
1103 /* Use an rvec to store two reals */
1104 extr_s[d][0] = comm->cell_f0[d+1];
1105 extr_s[d][1] = comm->cell_f1[d+1];
1106 extr_s[d][2] = comm->cell_f1[d+1];
1109 /* Store the extremes in the backward sending buffer,
1110 * so the get updated separately from the forward communication.
1112 for (d1 = d; d1 < dd->ndim-1; d1++)
1114 /* We invert the order to be able to use the same loop for buf_e */
1115 buf_s[pos].min0 = extr_s[d1][1];
1116 buf_s[pos].max1 = extr_s[d1][0];
1117 buf_s[pos].min1 = extr_s[d1][2];
1118 buf_s[pos].mch0 = 0;
1119 buf_s[pos].mch1 = 0;
1120 /* Store the cell corner of the dimension we communicate along */
1121 buf_s[pos].p1_0 = comm->cell_x0[dim];
1122 buf_s[pos].p1_1 = 0;
1126 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1129 if (dd->ndim == 3 && d == 0)
1131 buf_s[pos] = comm->zone_d2[0][1];
1133 buf_s[pos] = comm->zone_d1[0];
1137 /* We only need to communicate the extremes
1138 * in the forward direction
1140 npulse = comm->cd[d].np;
1143 /* Take the minimum to avoid double communication */
1144 npulse_min = std::min(npulse, dd->nc[dim]-1-npulse);
1148 /* Without PBC we should really not communicate over
1149 * the boundaries, but implementing that complicates
1150 * the communication setup and therefore we simply
1151 * do all communication, but ignore some data.
1153 npulse_min = npulse;
1155 for (p = 0; p < npulse_min; p++)
1157 /* Communicate the extremes forward */
1158 bUse = (bPBC || dd->ci[dim] > 0);
1160 dd_sendrecv_rvec(dd, d, dddirForward,
1161 extr_s+d, dd->ndim-d-1,
1162 extr_r+d, dd->ndim-d-1);
1166 for (d1 = d; d1 < dd->ndim-1; d1++)
1168 extr_s[d1][0] = std::max(extr_s[d1][0], extr_r[d1][0]);
1169 extr_s[d1][1] = std::min(extr_s[d1][1], extr_r[d1][1]);
1170 extr_s[d1][2] = std::min(extr_s[d1][2], extr_r[d1][2]);
1176 for (p = 0; p < npulse; p++)
1178 /* Communicate all the zone information backward */
1179 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1181 dd_sendrecv_ddzone(dd, d, dddirBackward,
1188 for (d1 = d+1; d1 < dd->ndim; d1++)
1190 /* Determine the decrease of maximum required
1191 * communication height along d1 due to the distance along d,
1192 * this avoids a lot of useless atom communication.
1194 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1196 if (ddbox->tric_dir[dim])
1198 /* c is the off-diagonal coupling between the cell planes
1199 * along directions d and d1.
1201 c = ddbox->v[dim][dd->dim[d1]][dim];
1207 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1210 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1214 /* A negative value signals out of range */
1220 /* Accumulate the extremes over all pulses */
1221 for (i = 0; i < buf_size; i++)
1225 buf_e[i] = buf_r[i];
1231 buf_e[i].min0 = std::min(buf_e[i].min0, buf_r[i].min0);
1232 buf_e[i].max1 = std::max(buf_e[i].max1, buf_r[i].max1);
1233 buf_e[i].min1 = std::min(buf_e[i].min1, buf_r[i].min1);
1236 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1244 if (bUse && dh[d1] >= 0)
1246 buf_e[i].mch0 = std::max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
1247 buf_e[i].mch1 = std::max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
1250 /* Copy the received buffer to the send buffer,
1251 * to pass the data through with the next pulse.
1253 buf_s[i] = buf_r[i];
1255 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1256 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1258 /* Store the extremes */
1261 for (d1 = d; d1 < dd->ndim-1; d1++)
1263 extr_s[d1][1] = std::min(extr_s[d1][1], buf_e[pos].min0);
1264 extr_s[d1][0] = std::max(extr_s[d1][0], buf_e[pos].max1);
1265 extr_s[d1][2] = std::min(extr_s[d1][2], buf_e[pos].min1);
1269 if (d == 1 || (d == 0 && dd->ndim == 3))
1271 for (i = d; i < 2; i++)
1273 comm->zone_d2[1-d][i] = buf_e[pos];
1279 comm->zone_d1[1] = buf_e[pos];
1289 for (i = 0; i < 2; i++)
1293 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1295 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1296 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1302 for (i = 0; i < 2; i++)
1304 for (j = 0; j < 2; j++)
1308 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1310 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1311 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1315 for (d = 1; d < dd->ndim; d++)
1317 comm->cell_f_max0[d] = extr_s[d-1][0];
1318 comm->cell_f_min1[d] = extr_s[d-1][1];
1321 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1322 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1327 static void dd_collect_cg(gmx_domdec_t *dd,
1328 t_state *state_local)
1330 gmx_domdec_master_t *ma = NULL;
1331 int buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
1333 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1335 /* The master has the correct distribution */
1339 if (state_local->ddp_count == dd->ddp_count)
1341 /* The local state and DD are in sync, use the DD indices */
1342 ncg_home = dd->ncg_home;
1344 nat_home = dd->nat_home;
1346 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1348 /* The DD is out of sync with the local state, but we have stored
1349 * the cg indices with the local state, so we can use those.
1353 cgs_gl = &dd->comm->cgs_gl;
1355 ncg_home = state_local->ncg_gl;
1356 cg = state_local->cg_gl;
1358 for (i = 0; i < ncg_home; i++)
1360 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1365 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1379 /* Collect the charge group and atom counts on the master */
1380 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1385 for (i = 0; i < dd->nnodes; i++)
1387 ma->ncg[i] = ma->ibuf[2*i];
1388 ma->nat[i] = ma->ibuf[2*i+1];
1389 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1392 /* Make byte counts and indices */
1393 for (i = 0; i < dd->nnodes; i++)
1395 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1396 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1400 fprintf(debug, "Initial charge group distribution: ");
1401 for (i = 0; i < dd->nnodes; i++)
1403 fprintf(debug, " %d", ma->ncg[i]);
1405 fprintf(debug, "\n");
1409 /* Collect the charge group indices on the master */
1411 ncg_home*sizeof(int), cg,
1412 DDMASTER(dd) ? ma->ibuf : NULL,
1413 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1414 DDMASTER(dd) ? ma->cg : NULL);
1416 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1419 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1422 gmx_domdec_master_t *ma;
1423 int n, i, c, a, nalloc = 0;
1432 MPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1433 dd->rank, dd->mpi_comm_all);
1438 /* Copy the master coordinates to the global array */
1439 cgs_gl = &dd->comm->cgs_gl;
1441 n = DDMASTERRANK(dd);
1443 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1445 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1447 copy_rvec(lv[a++], v[c]);
1451 for (n = 0; n < dd->nnodes; n++)
1455 if (ma->nat[n] > nalloc)
1457 nalloc = over_alloc_dd(ma->nat[n]);
1458 srenew(buf, nalloc);
1461 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1462 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1465 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1467 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1469 copy_rvec(buf[a++], v[c]);
1478 static void get_commbuffer_counts(gmx_domdec_t *dd,
1479 int **counts, int **disps)
1481 gmx_domdec_master_t *ma;
1486 /* Make the rvec count and displacment arrays */
1488 *disps = ma->ibuf + dd->nnodes;
1489 for (n = 0; n < dd->nnodes; n++)
1491 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1492 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1496 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1499 gmx_domdec_master_t *ma;
1500 int *rcounts = NULL, *disps = NULL;
1509 get_commbuffer_counts(dd, &rcounts, &disps);
1514 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv, rcounts, disps, buf);
1518 cgs_gl = &dd->comm->cgs_gl;
1521 for (n = 0; n < dd->nnodes; n++)
1523 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1525 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1527 copy_rvec(buf[a++], v[c]);
1534 void dd_collect_vec(gmx_domdec_t *dd,
1535 t_state *state_local, rvec *lv, rvec *v)
1537 dd_collect_cg(dd, state_local);
1539 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1541 dd_collect_vec_sendrecv(dd, lv, v);
1545 dd_collect_vec_gatherv(dd, lv, v);
1550 void dd_collect_state(gmx_domdec_t *dd,
1551 t_state *state_local, t_state *state)
1555 nh = state->nhchainlength;
1559 for (i = 0; i < efptNR; i++)
1561 state->lambda[i] = state_local->lambda[i];
1563 state->fep_state = state_local->fep_state;
1564 state->veta = state_local->veta;
1565 state->vol0 = state_local->vol0;
1566 copy_mat(state_local->box, state->box);
1567 copy_mat(state_local->boxv, state->boxv);
1568 copy_mat(state_local->svir_prev, state->svir_prev);
1569 copy_mat(state_local->fvir_prev, state->fvir_prev);
1570 copy_mat(state_local->pres_prev, state->pres_prev);
1572 for (i = 0; i < state_local->ngtc; i++)
1574 for (j = 0; j < nh; j++)
1576 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1577 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1579 state->therm_integral[i] = state_local->therm_integral[i];
1581 for (i = 0; i < state_local->nnhpres; i++)
1583 for (j = 0; j < nh; j++)
1585 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1586 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1590 for (est = 0; est < estNR; est++)
1592 if (EST_DISTR(est) && (state_local->flags & (1<<est)))
1597 dd_collect_vec(dd, state_local, state_local->x, state->x);
1600 dd_collect_vec(dd, state_local, state_local->v, state->v);
1603 dd_collect_vec(dd, state_local, state_local->sd_X, state->sd_X);
1606 dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
1608 case estDISRE_INITF:
1609 case estDISRE_RM3TAV:
1610 case estORIRE_INITF:
1614 gmx_incons("Unknown state entry encountered in dd_collect_state");
1620 static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
1626 fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
1629 state->nalloc = over_alloc_dd(nalloc);
1631 for (est = 0; est < estNR; est++)
1633 if (EST_DISTR(est) && (state->flags & (1<<est)))
1638 srenew(state->x, state->nalloc);
1641 srenew(state->v, state->nalloc);
1644 srenew(state->sd_X, state->nalloc);
1647 srenew(state->cg_p, state->nalloc);
1649 case estDISRE_INITF:
1650 case estDISRE_RM3TAV:
1651 case estORIRE_INITF:
1653 /* No reallocation required */
1656 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1663 srenew(*f, state->nalloc);
1667 static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
1670 if (nalloc > fr->cg_nalloc)
1674 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
1676 fr->cg_nalloc = over_alloc_dd(nalloc);
1677 srenew(fr->cginfo, fr->cg_nalloc);
1678 if (fr->cutoff_scheme == ecutsGROUP)
1680 srenew(fr->cg_cm, fr->cg_nalloc);
1683 if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
1685 /* We don't use charge groups, we use x in state to set up
1686 * the atom communication.
1688 dd_realloc_state(state, f, nalloc);
1692 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1695 gmx_domdec_master_t *ma;
1696 int n, i, c, a, nalloc = 0;
1703 for (n = 0; n < dd->nnodes; n++)
1707 if (ma->nat[n] > nalloc)
1709 nalloc = over_alloc_dd(ma->nat[n]);
1710 srenew(buf, nalloc);
1712 /* Use lv as a temporary buffer */
1714 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1716 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1718 copy_rvec(v[c], buf[a++]);
1721 if (a != ma->nat[n])
1723 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1728 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1729 DDRANK(dd, n), n, dd->mpi_comm_all);
1734 n = DDMASTERRANK(dd);
1736 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1738 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1740 copy_rvec(v[c], lv[a++]);
1747 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1748 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1753 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1756 gmx_domdec_master_t *ma;
1757 int *scounts = NULL, *disps = NULL;
1765 get_commbuffer_counts(dd, &scounts, &disps);
1769 for (n = 0; n < dd->nnodes; n++)
1771 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1773 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1775 copy_rvec(v[c], buf[a++]);
1781 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1784 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
1786 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1788 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1792 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1796 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1799 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1800 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1801 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1803 if (dfhist->nlambda > 0)
1805 int nlam = dfhist->nlambda;
1806 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1807 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1808 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1809 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1810 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1811 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1813 for (i = 0; i < nlam; i++)
1815 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1816 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1817 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1818 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1819 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1820 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1825 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1826 t_state *state, t_state *state_local,
1831 nh = state->nhchainlength;
1835 for (i = 0; i < efptNR; i++)
1837 state_local->lambda[i] = state->lambda[i];
1839 state_local->fep_state = state->fep_state;
1840 state_local->veta = state->veta;
1841 state_local->vol0 = state->vol0;
1842 copy_mat(state->box, state_local->box);
1843 copy_mat(state->box_rel, state_local->box_rel);
1844 copy_mat(state->boxv, state_local->boxv);
1845 copy_mat(state->svir_prev, state_local->svir_prev);
1846 copy_mat(state->fvir_prev, state_local->fvir_prev);
1847 copy_df_history(&state_local->dfhist, &state->dfhist);
1848 for (i = 0; i < state_local->ngtc; i++)
1850 for (j = 0; j < nh; j++)
1852 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1853 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1855 state_local->therm_integral[i] = state->therm_integral[i];
1857 for (i = 0; i < state_local->nnhpres; i++)
1859 for (j = 0; j < nh; j++)
1861 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1862 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1866 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
1867 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1868 dd_bcast(dd, sizeof(real), &state_local->veta);
1869 dd_bcast(dd, sizeof(real), &state_local->vol0);
1870 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1871 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1872 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1873 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1874 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1875 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
1876 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
1877 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
1878 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
1879 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
1881 /* communicate df_history -- required for restarting from checkpoint */
1882 dd_distribute_dfhist(dd, &state_local->dfhist);
1884 if (dd->nat_home > state_local->nalloc)
1886 dd_realloc_state(state_local, f, dd->nat_home);
1888 for (i = 0; i < estNR; i++)
1890 if (EST_DISTR(i) && (state_local->flags & (1<<i)))
1895 dd_distribute_vec(dd, cgs, state->x, state_local->x);
1898 dd_distribute_vec(dd, cgs, state->v, state_local->v);
1901 dd_distribute_vec(dd, cgs, state->sd_X, state_local->sd_X);
1904 dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
1906 case estDISRE_INITF:
1907 case estDISRE_RM3TAV:
1908 case estORIRE_INITF:
1910 /* Not implemented yet */
1913 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1919 static char dim2char(int dim)
1925 case XX: c = 'X'; break;
1926 case YY: c = 'Y'; break;
1927 case ZZ: c = 'Z'; break;
1928 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1934 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1935 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1937 rvec grid_s[2], *grid_r = NULL, cx, r;
1938 char fname[STRLEN], buf[22];
1940 int a, i, d, z, y, x;
1944 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1945 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1949 snew(grid_r, 2*dd->nnodes);
1952 dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : NULL);
1956 for (d = 0; d < DIM; d++)
1958 for (i = 0; i < DIM; i++)
1966 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1968 tric[d][i] = box[i][d]/box[i][i];
1977 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1978 out = gmx_fio_fopen(fname, "w");
1979 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1981 for (i = 0; i < dd->nnodes; i++)
1983 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1984 for (d = 0; d < DIM; d++)
1986 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1988 for (z = 0; z < 2; z++)
1990 for (y = 0; y < 2; y++)
1992 for (x = 0; x < 2; x++)
1994 cx[XX] = grid_r[i*2+x][XX];
1995 cx[YY] = grid_r[i*2+y][YY];
1996 cx[ZZ] = grid_r[i*2+z][ZZ];
1998 gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
1999 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
2003 for (d = 0; d < DIM; d++)
2005 for (x = 0; x < 4; x++)
2009 case 0: y = 1 + i*8 + 2*x; break;
2010 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
2011 case 2: y = 1 + i*8 + x; break;
2013 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
2017 gmx_fio_fclose(out);
2022 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
2023 gmx_mtop_t *mtop, t_commrec *cr,
2024 int natoms, rvec x[], matrix box)
2026 char fname[STRLEN], buf[22];
2028 int i, ii, resnr, c;
2029 char *atomname, *resname;
2036 natoms = dd->comm->nat[ddnatVSITE];
2039 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
2041 out = gmx_fio_fopen(fname, "w");
2043 fprintf(out, "TITLE %s\n", title);
2044 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2045 for (i = 0; i < natoms; i++)
2047 ii = dd->gatindex[i];
2048 gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
2049 if (i < dd->comm->nat[ddnatZONE])
2052 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
2058 else if (i < dd->comm->nat[ddnatVSITE])
2060 b = dd->comm->zones.n;
2064 b = dd->comm->zones.n + 1;
2066 gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
2067 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
2069 fprintf(out, "TER\n");
2071 gmx_fio_fclose(out);
2074 real dd_cutoff_mbody(gmx_domdec_t *dd)
2076 gmx_domdec_comm_t *comm;
2083 if (comm->bInterCGBondeds)
2085 if (comm->cutoff_mbody > 0)
2087 r = comm->cutoff_mbody;
2091 /* cutoff_mbody=0 means we do not have DLB */
2092 r = comm->cellsize_min[dd->dim[0]];
2093 for (di = 1; di < dd->ndim; di++)
2095 r = std::min(r, comm->cellsize_min[dd->dim[di]]);
2097 if (comm->bBondComm)
2099 r = std::max(r, comm->cutoff_mbody);
2103 r = std::min(r, comm->cutoff);
2111 real dd_cutoff_twobody(gmx_domdec_t *dd)
2115 r_mb = dd_cutoff_mbody(dd);
2117 return std::max(dd->comm->cutoff, r_mb);
2121 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd, ivec coord, ivec coord_pme)
2125 nc = dd->nc[dd->comm->cartpmedim];
2126 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2127 copy_ivec(coord, coord_pme);
2128 coord_pme[dd->comm->cartpmedim] =
2129 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2132 static int low_ddindex2pmeindex(int ndd, int npme, int ddindex)
2134 /* Here we assign a PME node to communicate with this DD node
2135 * by assuming that the major index of both is x.
2136 * We add cr->npmenodes/2 to obtain an even distribution.
2138 return (ddindex*npme + npme/2)/ndd;
2141 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
2143 return low_ddindex2pmeindex(dd->nnodes, dd->comm->npmenodes, ddindex);
2146 static int cr_ddindex2pmeindex(const t_commrec *cr, int ddindex)
2148 return low_ddindex2pmeindex(cr->dd->nnodes, cr->npmenodes, ddindex);
2151 static int *dd_pmenodes(t_commrec *cr)
2156 snew(pmenodes, cr->npmenodes);
2158 for (i = 0; i < cr->dd->nnodes; i++)
2160 p0 = cr_ddindex2pmeindex(cr, i);
2161 p1 = cr_ddindex2pmeindex(cr, i+1);
2162 if (i+1 == cr->dd->nnodes || p1 > p0)
2166 fprintf(debug, "pmenode[%d] = %d\n", n, i+1+n);
2168 pmenodes[n] = i + 1 + n;
2176 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
2184 if (dd->comm->bCartesian) {
2185 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2186 dd_coords2pmecoords(dd,coords,coords_pme);
2187 copy_ivec(dd->ntot,nc);
2188 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2189 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2191 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2193 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2199 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
2204 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
2206 gmx_domdec_comm_t *comm;
2208 int ddindex, nodeid = -1;
2210 comm = cr->dd->comm;
2215 if (comm->bCartesianPP_PME)
2218 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
2223 ddindex = dd_index(cr->dd->nc, coords);
2224 if (comm->bCartesianPP)
2226 nodeid = comm->ddindex2simnodeid[ddindex];
2232 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
2244 static int dd_simnode2pmenode(t_commrec *cr, int sim_nodeid)
2247 gmx_domdec_comm_t *comm;
2254 /* This assumes a uniform x domain decomposition grid cell size */
2255 if (comm->bCartesianPP_PME)
2258 ivec coord, coord_pme;
2259 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
2260 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2262 /* This is a PP node */
2263 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2264 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
2268 else if (comm->bCartesianPP)
2270 if (sim_nodeid < dd->nnodes)
2272 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2277 /* This assumes DD cells with identical x coordinates
2278 * are numbered sequentially.
2280 if (dd->comm->pmenodes == NULL)
2282 if (sim_nodeid < dd->nnodes)
2284 /* The DD index equals the nodeid */
2285 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2291 while (sim_nodeid > dd->comm->pmenodes[i])
2295 if (sim_nodeid < dd->comm->pmenodes[i])
2297 pmenode = dd->comm->pmenodes[i];
2305 void get_pme_nnodes(const gmx_domdec_t *dd,
2306 int *npmenodes_x, int *npmenodes_y)
2310 *npmenodes_x = dd->comm->npmenodes_x;
2311 *npmenodes_y = dd->comm->npmenodes_y;
2320 gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
2322 gmx_bool bPMEOnlyNode;
2324 if (DOMAINDECOMP(cr))
2326 bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
2330 bPMEOnlyNode = FALSE;
2333 return bPMEOnlyNode;
2336 void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
2337 int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
2341 ivec coord, coord_pme;
2345 snew(*my_ddnodes, (dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2348 for (x = 0; x < dd->nc[XX]; x++)
2350 for (y = 0; y < dd->nc[YY]; y++)
2352 for (z = 0; z < dd->nc[ZZ]; z++)
2354 if (dd->comm->bCartesianPP_PME)
2359 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2360 if (dd->ci[XX] == coord_pme[XX] &&
2361 dd->ci[YY] == coord_pme[YY] &&
2362 dd->ci[ZZ] == coord_pme[ZZ])
2364 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2369 /* The slab corresponds to the nodeid in the PME group */
2370 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2372 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2379 /* The last PP-only node is the peer node */
2380 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2384 fprintf(debug, "Receive coordinates from PP ranks:");
2385 for (x = 0; x < *nmy_ddnodes; x++)
2387 fprintf(debug, " %d", (*my_ddnodes)[x]);
2389 fprintf(debug, "\n");
2393 static gmx_bool receive_vir_ener(t_commrec *cr)
2395 gmx_domdec_comm_t *comm;
2400 if (cr->npmenodes < cr->dd->nnodes)
2402 comm = cr->dd->comm;
2403 if (comm->bCartesianPP_PME)
2405 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2408 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2409 coords[comm->cartpmedim]++;
2410 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2413 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2414 if (dd_simnode2pmenode(cr, rank) == pmenode)
2416 /* This is not the last PP node for pmenode */
2424 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2425 if (cr->sim_nodeid+1 < cr->nnodes &&
2426 dd_simnode2pmenode(cr, cr->sim_nodeid+1) == pmenode)
2428 /* This is not the last PP node for pmenode */
2437 static void set_zones_ncg_home(gmx_domdec_t *dd)
2439 gmx_domdec_zones_t *zones;
2442 zones = &dd->comm->zones;
2444 zones->cg_range[0] = 0;
2445 for (i = 1; i < zones->n+1; i++)
2447 zones->cg_range[i] = dd->ncg_home;
2449 /* zone_ncg1[0] should always be equal to ncg_home */
2450 dd->comm->zone_ncg1[0] = dd->ncg_home;
2453 static void rebuild_cgindex(gmx_domdec_t *dd,
2454 const int *gcgs_index, t_state *state)
2456 int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
2459 dd_cg_gl = dd->index_gl;
2460 cgindex = dd->cgindex;
2463 for (i = 0; i < state->ncg_gl; i++)
2467 dd_cg_gl[i] = cg_gl;
2468 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2472 dd->ncg_home = state->ncg_gl;
2475 set_zones_ncg_home(dd);
2478 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2480 while (cg >= cginfo_mb->cg_end)
2485 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2488 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2489 t_forcerec *fr, char *bLocalCG)
2491 cginfo_mb_t *cginfo_mb;
2497 cginfo_mb = fr->cginfo_mb;
2498 cginfo = fr->cginfo;
2500 for (cg = cg0; cg < cg1; cg++)
2502 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2506 if (bLocalCG != NULL)
2508 for (cg = cg0; cg < cg1; cg++)
2510 bLocalCG[index_gl[cg]] = TRUE;
2515 static void make_dd_indices(gmx_domdec_t *dd,
2516 const int *gcgs_index, int cg_start)
2518 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2519 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2522 if (dd->nat_tot > dd->gatindex_nalloc)
2524 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2525 srenew(dd->gatindex, dd->gatindex_nalloc);
2528 nzone = dd->comm->zones.n;
2529 zone2cg = dd->comm->zones.cg_range;
2530 zone_ncg1 = dd->comm->zone_ncg1;
2531 index_gl = dd->index_gl;
2532 gatindex = dd->gatindex;
2533 bCGs = dd->comm->bCGs;
2535 if (zone2cg[1] != dd->ncg_home)
2537 gmx_incons("dd->ncg_zone is not up to date");
2540 /* Make the local to global and global to local atom index */
2541 a = dd->cgindex[cg_start];
2542 for (zone = 0; zone < nzone; zone++)
2550 cg0 = zone2cg[zone];
2552 cg1 = zone2cg[zone+1];
2553 cg1_p1 = cg0 + zone_ncg1[zone];
2555 for (cg = cg0; cg < cg1; cg++)
2560 /* Signal that this cg is from more than one pulse away */
2563 cg_gl = index_gl[cg];
2566 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2569 ga2la_set(dd->ga2la, a_gl, a, zone1);
2575 gatindex[a] = cg_gl;
2576 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2583 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2589 if (bLocalCG == NULL)
2593 for (i = 0; i < dd->ncg_tot; i++)
2595 if (!bLocalCG[dd->index_gl[i]])
2598 "DD rank %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2603 for (i = 0; i < ncg_sys; i++)
2610 if (ngl != dd->ncg_tot)
2612 fprintf(stderr, "DD rank %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2619 static void check_index_consistency(gmx_domdec_t *dd,
2620 int natoms_sys, int ncg_sys,
2623 int nerr, ngl, i, a, cell;
2628 if (dd->comm->DD_debug > 1)
2630 snew(have, natoms_sys);
2631 for (a = 0; a < dd->nat_tot; a++)
2633 if (have[dd->gatindex[a]] > 0)
2635 fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2639 have[dd->gatindex[a]] = a + 1;
2645 snew(have, dd->nat_tot);
2648 for (i = 0; i < natoms_sys; i++)
2650 if (ga2la_get(dd->ga2la, i, &a, &cell))
2652 if (a >= dd->nat_tot)
2654 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2660 if (dd->gatindex[a] != i)
2662 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2669 if (ngl != dd->nat_tot)
2672 "DD rank %d, %s: %d global atom indices, %d local atoms\n",
2673 dd->rank, where, ngl, dd->nat_tot);
2675 for (a = 0; a < dd->nat_tot; a++)
2680 "DD rank %d, %s: local atom %d, global %d has no global index\n",
2681 dd->rank, where, a+1, dd->gatindex[a]+1);
2686 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2690 gmx_fatal(FARGS, "DD rank %d, %s: %d atom/cg index inconsistencies",
2691 dd->rank, where, nerr);
2695 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2702 /* Clear the whole list without searching */
2703 ga2la_clear(dd->ga2la);
2707 for (i = a_start; i < dd->nat_tot; i++)
2709 ga2la_del(dd->ga2la, dd->gatindex[i]);
2713 bLocalCG = dd->comm->bLocalCG;
2716 for (i = cg_start; i < dd->ncg_tot; i++)
2718 bLocalCG[dd->index_gl[i]] = FALSE;
2722 dd_clear_local_vsite_indices(dd);
2724 if (dd->constraints)
2726 dd_clear_local_constraint_indices(dd);
2730 /* This function should be used for moving the domain boudaries during DLB,
2731 * for obtaining the minimum cell size. It checks the initially set limit
2732 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2733 * and, possibly, a longer cut-off limit set for PME load balancing.
2735 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2739 cellsize_min = comm->cellsize_min[dim];
2741 if (!comm->bVacDLBNoLimit)
2743 /* The cut-off might have changed, e.g. by PME load balacning,
2744 * from the value used to set comm->cellsize_min, so check it.
2746 cellsize_min = std::max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2748 if (comm->bPMELoadBalDLBLimits)
2750 /* Check for the cut-off limit set by the PME load balancing */
2751 cellsize_min = std::max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2755 return cellsize_min;
2758 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2761 real grid_jump_limit;
2763 /* The distance between the boundaries of cells at distance
2764 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2765 * and by the fact that cells should not be shifted by more than
2766 * half their size, such that cg's only shift by one cell
2767 * at redecomposition.
2769 grid_jump_limit = comm->cellsize_limit;
2770 if (!comm->bVacDLBNoLimit)
2772 if (comm->bPMELoadBalDLBLimits)
2774 cutoff = std::max(cutoff, comm->PMELoadBal_max_cutoff);
2776 grid_jump_limit = std::max(grid_jump_limit,
2777 cutoff/comm->cd[dim_ind].np);
2780 return grid_jump_limit;
2783 static gmx_bool check_grid_jump(gmx_int64_t step,
2789 gmx_domdec_comm_t *comm;
2798 for (d = 1; d < dd->ndim; d++)
2801 limit = grid_jump_limit(comm, cutoff, d);
2802 bfac = ddbox->box_size[dim];
2803 if (ddbox->tric_dir[dim])
2805 bfac *= ddbox->skew_fac[dim];
2807 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2808 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2816 /* This error should never be triggered under normal
2817 * circumstances, but you never know ...
2819 gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
2820 gmx_step_str(step, buf),
2821 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2829 static int dd_load_count(gmx_domdec_comm_t *comm)
2831 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2834 static float dd_force_load(gmx_domdec_comm_t *comm)
2841 if (comm->eFlop > 1)
2843 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2848 load = comm->cycl[ddCyclF];
2849 if (comm->cycl_n[ddCyclF] > 1)
2851 /* Subtract the maximum of the last n cycle counts
2852 * to get rid of possible high counts due to other sources,
2853 * for instance system activity, that would otherwise
2854 * affect the dynamic load balancing.
2856 load -= comm->cycl_max[ddCyclF];
2860 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2862 float gpu_wait, gpu_wait_sum;
2864 gpu_wait = comm->cycl[ddCyclWaitGPU];
2865 if (comm->cycl_n[ddCyclF] > 1)
2867 /* We should remove the WaitGPU time of the same MD step
2868 * as the one with the maximum F time, since the F time
2869 * and the wait time are not independent.
2870 * Furthermore, the step for the max F time should be chosen
2871 * the same on all ranks that share the same GPU.
2872 * But to keep the code simple, we remove the average instead.
2873 * The main reason for artificially long times at some steps
2874 * is spurious CPU activity or MPI time, so we don't expect
2875 * that changes in the GPU wait time matter a lot here.
2877 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2879 /* Sum the wait times over the ranks that share the same GPU */
2880 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2881 comm->mpi_comm_gpu_shared);
2882 /* Replace the wait time by the average over the ranks */
2883 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2891 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2893 gmx_domdec_comm_t *comm;
2898 snew(*dim_f, dd->nc[dim]+1);
2900 for (i = 1; i < dd->nc[dim]; i++)
2902 if (comm->slb_frac[dim])
2904 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2908 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2911 (*dim_f)[dd->nc[dim]] = 1;
2914 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2916 int pmeindex, slab, nso, i;
2919 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2925 ddpme->dim = dimind;
2927 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2929 ddpme->nslab = (ddpme->dim == 0 ?
2930 dd->comm->npmenodes_x :
2931 dd->comm->npmenodes_y);
2933 if (ddpme->nslab <= 1)
2938 nso = dd->comm->npmenodes/ddpme->nslab;
2939 /* Determine for each PME slab the PP location range for dimension dim */
2940 snew(ddpme->pp_min, ddpme->nslab);
2941 snew(ddpme->pp_max, ddpme->nslab);
2942 for (slab = 0; slab < ddpme->nslab; slab++)
2944 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2945 ddpme->pp_max[slab] = 0;
2947 for (i = 0; i < dd->nnodes; i++)
2949 ddindex2xyz(dd->nc, i, xyz);
2950 /* For y only use our y/z slab.
2951 * This assumes that the PME x grid size matches the DD grid size.
2953 if (dimind == 0 || xyz[XX] == dd->ci[XX])
2955 pmeindex = ddindex2pmeindex(dd, i);
2958 slab = pmeindex/nso;
2962 slab = pmeindex % ddpme->nslab;
2964 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
2965 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
2969 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
2972 int dd_pme_maxshift_x(gmx_domdec_t *dd)
2974 if (dd->comm->ddpme[0].dim == XX)
2976 return dd->comm->ddpme[0].maxshift;
2984 int dd_pme_maxshift_y(gmx_domdec_t *dd)
2986 if (dd->comm->ddpme[0].dim == YY)
2988 return dd->comm->ddpme[0].maxshift;
2990 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
2992 return dd->comm->ddpme[1].maxshift;
3000 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
3001 gmx_bool bUniform, gmx_ddbox_t *ddbox, real *cell_f)
3003 gmx_domdec_comm_t *comm;
3006 real range, pme_boundary;
3010 nc = dd->nc[ddpme->dim];
3013 if (!ddpme->dim_match)
3015 /* PP decomposition is not along dim: the worst situation */
3018 else if (ns <= 3 || (bUniform && ns == nc))
3020 /* The optimal situation */
3025 /* We need to check for all pme nodes which nodes they
3026 * could possibly need to communicate with.
3028 xmin = ddpme->pp_min;
3029 xmax = ddpme->pp_max;
3030 /* Allow for atoms to be maximally 2/3 times the cut-off
3031 * out of their DD cell. This is a reasonable balance between
3032 * between performance and support for most charge-group/cut-off
3035 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
3036 /* Avoid extra communication when we are exactly at a boundary */
3040 for (s = 0; s < ns; s++)
3042 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
3043 pme_boundary = (real)s/ns;
3046 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
3048 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
3052 pme_boundary = (real)(s+1)/ns;
3055 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
3057 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
3064 ddpme->maxshift = sh;
3068 fprintf(debug, "PME slab communication range for dim %d is %d\n",
3069 ddpme->dim, ddpme->maxshift);
3073 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3077 for (d = 0; d < dd->ndim; d++)
3080 if (dim < ddbox->nboundeddim &&
3081 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
3082 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
3084 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
3085 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3086 dd->nc[dim], dd->comm->cellsize_limit);
3092 setcellsizeslbLOCAL, setcellsizeslbMASTER, setcellsizeslbPULSE_ONLY
3095 /* Set the domain boundaries. Use for static (or no) load balancing,
3096 * and also for the starting state for dynamic load balancing.
3097 * setmode determine if and where the boundaries are stored, use enum above.
3098 * Returns the number communication pulses in npulse.
3100 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
3101 int setmode, ivec npulse)
3103 gmx_domdec_comm_t *comm;
3106 real *cell_x, cell_dx, cellsize;
3110 for (d = 0; d < DIM; d++)
3112 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
3114 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
3117 cell_dx = ddbox->box_size[d]/dd->nc[d];
3120 case setcellsizeslbMASTER:
3121 for (j = 0; j < dd->nc[d]+1; j++)
3123 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
3126 case setcellsizeslbLOCAL:
3127 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
3128 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
3133 cellsize = cell_dx*ddbox->skew_fac[d];
3134 while (cellsize*npulse[d] < comm->cutoff)
3138 cellsize_min[d] = cellsize;
3142 /* Statically load balanced grid */
3143 /* Also when we are not doing a master distribution we determine
3144 * all cell borders in a loop to obtain identical values
3145 * to the master distribution case and to determine npulse.
3147 if (setmode == setcellsizeslbMASTER)
3149 cell_x = dd->ma->cell_x[d];
3153 snew(cell_x, dd->nc[d]+1);
3155 cell_x[0] = ddbox->box0[d];
3156 for (j = 0; j < dd->nc[d]; j++)
3158 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
3159 cell_x[j+1] = cell_x[j] + cell_dx;
3160 cellsize = cell_dx*ddbox->skew_fac[d];
3161 while (cellsize*npulse[d] < comm->cutoff &&
3162 npulse[d] < dd->nc[d]-1)
3166 cellsize_min[d] = std::min(cellsize_min[d], cellsize);
3168 if (setmode == setcellsizeslbLOCAL)
3170 comm->cell_x0[d] = cell_x[dd->ci[d]];
3171 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
3173 if (setmode != setcellsizeslbMASTER)
3178 /* The following limitation is to avoid that a cell would receive
3179 * some of its own home charge groups back over the periodic boundary.
3180 * Double charge groups cause trouble with the global indices.
3182 if (d < ddbox->npbcdim &&
3183 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
3185 char error_string[STRLEN];
3187 sprintf(error_string,
3188 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3189 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
3191 dd->nc[d], dd->nc[d],
3192 dd->nnodes > dd->nc[d] ? "cells" : "ranks");
3194 if (setmode == setcellsizeslbLOCAL)
3196 gmx_fatal_collective(FARGS, NULL, dd, error_string);
3200 gmx_fatal(FARGS, error_string);
3205 if (!comm->bDynLoadBal)
3207 copy_rvec(cellsize_min, comm->cellsize_min);
3210 for (d = 0; d < comm->npmedecompdim; d++)
3212 set_pme_maxshift(dd, &comm->ddpme[d],
3213 comm->slb_frac[dd->dim[d]] == NULL, ddbox,
3214 comm->ddpme[d].slb_dim_f);
3219 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
3220 int d, int dim, gmx_domdec_root_t *root,
3222 gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
3224 gmx_domdec_comm_t *comm;
3225 int ncd, i, j, nmin, nmin_old;
3226 gmx_bool bLimLo, bLimHi;
3228 real fac, halfway, cellsize_limit_f_i, region_size;
3229 gmx_bool bPBC, bLastHi = FALSE;
3230 int nrange[] = {range[0], range[1]};
3232 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
3238 bPBC = (dim < ddbox->npbcdim);
3240 cell_size = root->buf_ncd;
3244 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
3247 /* First we need to check if the scaling does not make cells
3248 * smaller than the smallest allowed size.
3249 * We need to do this iteratively, since if a cell is too small,
3250 * it needs to be enlarged, which makes all the other cells smaller,
3251 * which could in turn make another cell smaller than allowed.
3253 for (i = range[0]; i < range[1]; i++)
3255 root->bCellMin[i] = FALSE;
3261 /* We need the total for normalization */
3263 for (i = range[0]; i < range[1]; i++)
3265 if (root->bCellMin[i] == FALSE)
3267 fac += cell_size[i];
3270 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3271 /* Determine the cell boundaries */
3272 for (i = range[0]; i < range[1]; i++)
3274 if (root->bCellMin[i] == FALSE)
3276 cell_size[i] *= fac;
3277 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3279 cellsize_limit_f_i = 0;
3283 cellsize_limit_f_i = cellsize_limit_f;
3285 if (cell_size[i] < cellsize_limit_f_i)
3287 root->bCellMin[i] = TRUE;
3288 cell_size[i] = cellsize_limit_f_i;
3292 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3295 while (nmin > nmin_old);
3298 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3299 /* For this check we should not use DD_CELL_MARGIN,
3300 * but a slightly smaller factor,
3301 * since rounding could get use below the limit.
3303 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3306 gmx_fatal(FARGS, "Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3307 gmx_step_str(step, buf),
3308 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3309 ncd, comm->cellsize_min[dim]);
3312 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
3316 /* Check if the boundary did not displace more than halfway
3317 * each of the cells it bounds, as this could cause problems,
3318 * especially when the differences between cell sizes are large.
3319 * If changes are applied, they will not make cells smaller
3320 * than the cut-off, as we check all the boundaries which
3321 * might be affected by a change and if the old state was ok,
3322 * the cells will at most be shrunk back to their old size.
3324 for (i = range[0]+1; i < range[1]; i++)
3326 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3327 if (root->cell_f[i] < halfway)
3329 root->cell_f[i] = halfway;
3330 /* Check if the change also causes shifts of the next boundaries */
3331 for (j = i+1; j < range[1]; j++)
3333 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3335 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3339 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3340 if (root->cell_f[i] > halfway)
3342 root->cell_f[i] = halfway;
3343 /* Check if the change also causes shifts of the next boundaries */
3344 for (j = i-1; j >= range[0]+1; j--)
3346 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3348 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3355 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3356 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3357 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3358 * for a and b nrange is used */
3361 /* Take care of the staggering of the cell boundaries */
3364 for (i = range[0]; i < range[1]; i++)
3366 root->cell_f_max0[i] = root->cell_f[i];
3367 root->cell_f_min1[i] = root->cell_f[i+1];
3372 for (i = range[0]+1; i < range[1]; i++)
3374 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3375 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3376 if (bLimLo && bLimHi)
3378 /* Both limits violated, try the best we can */
3379 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3380 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3381 nrange[0] = range[0];
3383 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3386 nrange[1] = range[1];
3387 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3393 /* root->cell_f[i] = root->bound_min[i]; */
3394 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3397 else if (bLimHi && !bLastHi)
3400 if (nrange[1] < range[1]) /* found a LimLo before */
3402 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3403 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3404 nrange[0] = nrange[1];
3406 root->cell_f[i] = root->bound_max[i];
3408 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3410 nrange[1] = range[1];
3413 if (nrange[1] < range[1]) /* found last a LimLo */
3415 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3416 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3417 nrange[0] = nrange[1];
3418 nrange[1] = range[1];
3419 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3421 else if (nrange[0] > range[0]) /* found at least one LimHi */
3423 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3430 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3431 int d, int dim, gmx_domdec_root_t *root,
3432 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3433 gmx_bool bUniform, gmx_int64_t step)
3435 gmx_domdec_comm_t *comm;
3436 int ncd, d1, i, pos;
3438 real load_aver, load_i, imbalance, change, change_max, sc;
3439 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3443 int range[] = { 0, 0 };
3447 /* Convert the maximum change from the input percentage to a fraction */
3448 change_limit = comm->dlb_scale_lim*0.01;
3452 bPBC = (dim < ddbox->npbcdim);
3454 cell_size = root->buf_ncd;
3456 /* Store the original boundaries */
3457 for (i = 0; i < ncd+1; i++)
3459 root->old_cell_f[i] = root->cell_f[i];
3463 for (i = 0; i < ncd; i++)
3465 cell_size[i] = 1.0/ncd;
3468 else if (dd_load_count(comm))
3470 load_aver = comm->load[d].sum_m/ncd;
3472 for (i = 0; i < ncd; i++)
3474 /* Determine the relative imbalance of cell i */
3475 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3476 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3477 /* Determine the change of the cell size using underrelaxation */
3478 change = -relax*imbalance;
3479 change_max = std::max(change_max, std::max(change, -change));
3481 /* Limit the amount of scaling.
3482 * We need to use the same rescaling for all cells in one row,
3483 * otherwise the load balancing might not converge.
3486 if (change_max > change_limit)
3488 sc *= change_limit/change_max;
3490 for (i = 0; i < ncd; i++)
3492 /* Determine the relative imbalance of cell i */
3493 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3494 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3495 /* Determine the change of the cell size using underrelaxation */
3496 change = -sc*imbalance;
3497 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3501 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3502 cellsize_limit_f *= DD_CELL_MARGIN;
3503 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3504 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3505 if (ddbox->tric_dir[dim])
3507 cellsize_limit_f /= ddbox->skew_fac[dim];
3508 dist_min_f /= ddbox->skew_fac[dim];
3510 if (bDynamicBox && d > 0)
3512 dist_min_f *= DD_PRES_SCALE_MARGIN;
3514 if (d > 0 && !bUniform)
3516 /* Make sure that the grid is not shifted too much */
3517 for (i = 1; i < ncd; i++)
3519 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3521 gmx_incons("Inconsistent DD boundary staggering limits!");
3523 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3524 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3527 root->bound_min[i] += 0.5*space;
3529 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3530 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3533 root->bound_max[i] += 0.5*space;
3538 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3540 root->cell_f_max0[i-1] + dist_min_f,
3541 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3542 root->cell_f_min1[i] - dist_min_f);
3547 root->cell_f[0] = 0;
3548 root->cell_f[ncd] = 1;
3549 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3552 /* After the checks above, the cells should obey the cut-off
3553 * restrictions, but it does not hurt to check.
3555 for (i = 0; i < ncd; i++)
3559 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3560 dim, i, root->cell_f[i], root->cell_f[i+1]);
3563 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3564 root->cell_f[i+1] - root->cell_f[i] <
3565 cellsize_limit_f/DD_CELL_MARGIN)
3569 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3570 gmx_step_str(step, buf), dim2char(dim), i,
3571 (root->cell_f[i+1] - root->cell_f[i])
3572 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3577 /* Store the cell boundaries of the lower dimensions at the end */
3578 for (d1 = 0; d1 < d; d1++)
3580 root->cell_f[pos++] = comm->cell_f0[d1];
3581 root->cell_f[pos++] = comm->cell_f1[d1];
3584 if (d < comm->npmedecompdim)
3586 /* The master determines the maximum shift for
3587 * the coordinate communication between separate PME nodes.
3589 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3591 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3594 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3598 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3599 gmx_ddbox_t *ddbox, int dimind)
3601 gmx_domdec_comm_t *comm;
3606 /* Set the cell dimensions */
3607 dim = dd->dim[dimind];
3608 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3609 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3610 if (dim >= ddbox->nboundeddim)
3612 comm->cell_x0[dim] += ddbox->box0[dim];
3613 comm->cell_x1[dim] += ddbox->box0[dim];
3617 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3618 int d, int dim, real *cell_f_row,
3621 gmx_domdec_comm_t *comm;
3627 /* Each node would only need to know two fractions,
3628 * but it is probably cheaper to broadcast the whole array.
3630 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3631 0, comm->mpi_comm_load[d]);
3633 /* Copy the fractions for this dimension from the buffer */
3634 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3635 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3636 /* The whole array was communicated, so set the buffer position */
3637 pos = dd->nc[dim] + 1;
3638 for (d1 = 0; d1 <= d; d1++)
3642 /* Copy the cell fractions of the lower dimensions */
3643 comm->cell_f0[d1] = cell_f_row[pos++];
3644 comm->cell_f1[d1] = cell_f_row[pos++];
3646 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3648 /* Convert the communicated shift from float to int */
3649 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3652 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3656 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3657 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3658 gmx_bool bUniform, gmx_int64_t step)
3660 gmx_domdec_comm_t *comm;
3662 gmx_bool bRowMember, bRowRoot;
3667 for (d = 0; d < dd->ndim; d++)
3672 for (d1 = d; d1 < dd->ndim; d1++)
3674 if (dd->ci[dd->dim[d1]] > 0)
3687 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3688 ddbox, bDynamicBox, bUniform, step);
3689 cell_f_row = comm->root[d]->cell_f;
3693 cell_f_row = comm->cell_f_row;
3695 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3700 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3704 /* This function assumes the box is static and should therefore
3705 * not be called when the box has changed since the last
3706 * call to dd_partition_system.
3708 for (d = 0; d < dd->ndim; d++)
3710 relative_to_absolute_cell_bounds(dd, ddbox, d);
3716 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3717 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3718 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3719 gmx_wallcycle_t wcycle)
3721 gmx_domdec_comm_t *comm;
3728 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3729 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3730 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3732 else if (bDynamicBox)
3734 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3737 /* Set the dimensions for which no DD is used */
3738 for (dim = 0; dim < DIM; dim++)
3740 if (dd->nc[dim] == 1)
3742 comm->cell_x0[dim] = 0;
3743 comm->cell_x1[dim] = ddbox->box_size[dim];
3744 if (dim >= ddbox->nboundeddim)
3746 comm->cell_x0[dim] += ddbox->box0[dim];
3747 comm->cell_x1[dim] += ddbox->box0[dim];
3753 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3756 gmx_domdec_comm_dim_t *cd;
3758 for (d = 0; d < dd->ndim; d++)
3760 cd = &dd->comm->cd[d];
3761 np = npulse[dd->dim[d]];
3762 if (np > cd->np_nalloc)
3766 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3767 dim2char(dd->dim[d]), np);
3769 if (DDMASTER(dd) && cd->np_nalloc > 0)
3771 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3773 srenew(cd->ind, np);
3774 for (i = cd->np_nalloc; i < np; i++)
3776 cd->ind[i].index = NULL;
3777 cd->ind[i].nalloc = 0;
3786 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3787 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3788 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3789 gmx_wallcycle_t wcycle)
3791 gmx_domdec_comm_t *comm;
3797 /* Copy the old cell boundaries for the cg displacement check */
3798 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3799 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3801 if (comm->bDynLoadBal)
3805 check_box_size(dd, ddbox);
3807 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3811 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbLOCAL, npulse);
3812 realloc_comm_ind(dd, npulse);
3817 for (d = 0; d < DIM; d++)
3819 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3820 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3825 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3827 rvec cell_ns_x0, rvec cell_ns_x1,
3830 gmx_domdec_comm_t *comm;
3835 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3837 dim = dd->dim[dim_ind];
3839 /* Without PBC we don't have restrictions on the outer cells */
3840 if (!(dim >= ddbox->npbcdim &&
3841 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3842 comm->bDynLoadBal &&
3843 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3844 comm->cellsize_min[dim])
3847 gmx_fatal(FARGS, "Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3848 gmx_step_str(step, buf), dim2char(dim),
3849 comm->cell_x1[dim] - comm->cell_x0[dim],
3850 ddbox->skew_fac[dim],
3851 dd->comm->cellsize_min[dim],
3852 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3856 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3858 /* Communicate the boundaries and update cell_ns_x0/1 */
3859 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3860 if (dd->bGridJump && dd->ndim > 1)
3862 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3867 static void make_tric_corr_matrix(int npbcdim, matrix box, matrix tcm)
3871 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3879 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3880 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3889 static void check_screw_box(matrix box)
3891 /* Mathematical limitation */
3892 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3894 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3897 /* Limitation due to the asymmetry of the eighth shell method */
3898 if (box[ZZ][YY] != 0)
3900 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3904 static void distribute_cg(FILE *fplog, gmx_int64_t step,
3905 matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3908 gmx_domdec_master_t *ma;
3909 int **tmp_ind = NULL, *tmp_nalloc = NULL;
3910 int i, icg, j, k, k0, k1, d;
3914 real nrcg, inv_ncg, pos_d;
3920 if (tmp_ind == NULL)
3922 snew(tmp_nalloc, dd->nnodes);
3923 snew(tmp_ind, dd->nnodes);
3924 for (i = 0; i < dd->nnodes; i++)
3926 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3927 snew(tmp_ind[i], tmp_nalloc[i]);
3931 /* Clear the count */
3932 for (i = 0; i < dd->nnodes; i++)
3938 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3940 cgindex = cgs->index;
3942 /* Compute the center of geometry for all charge groups */
3943 for (icg = 0; icg < cgs->nr; icg++)
3946 k1 = cgindex[icg+1];
3950 copy_rvec(pos[k0], cg_cm);
3957 for (k = k0; (k < k1); k++)
3959 rvec_inc(cg_cm, pos[k]);
3961 for (d = 0; (d < DIM); d++)
3963 cg_cm[d] *= inv_ncg;
3966 /* Put the charge group in the box and determine the cell index */
3967 for (d = DIM-1; d >= 0; d--)
3970 if (d < dd->npbcdim)
3972 bScrew = (dd->bScrewPBC && d == XX);
3973 if (tric_dir[d] && dd->nc[d] > 1)
3975 /* Use triclinic coordintates for this dimension */
3976 for (j = d+1; j < DIM; j++)
3978 pos_d += cg_cm[j]*tcm[j][d];
3981 while (pos_d >= box[d][d])
3984 rvec_dec(cg_cm, box[d]);
3987 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3988 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3990 for (k = k0; (k < k1); k++)
3992 rvec_dec(pos[k], box[d]);
3995 pos[k][YY] = box[YY][YY] - pos[k][YY];
3996 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4003 rvec_inc(cg_cm, box[d]);
4006 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
4007 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
4009 for (k = k0; (k < k1); k++)
4011 rvec_inc(pos[k], box[d]);
4014 pos[k][YY] = box[YY][YY] - pos[k][YY];
4015 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4020 /* This could be done more efficiently */
4022 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
4027 i = dd_index(dd->nc, ind);
4028 if (ma->ncg[i] == tmp_nalloc[i])
4030 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
4031 srenew(tmp_ind[i], tmp_nalloc[i]);
4033 tmp_ind[i][ma->ncg[i]] = icg;
4035 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
4039 for (i = 0; i < dd->nnodes; i++)
4042 for (k = 0; k < ma->ncg[i]; k++)
4044 ma->cg[k1++] = tmp_ind[i][k];
4047 ma->index[dd->nnodes] = k1;
4049 for (i = 0; i < dd->nnodes; i++)
4059 fprintf(fplog, "Charge group distribution at step %s:",
4060 gmx_step_str(step, buf));
4061 for (i = 0; i < dd->nnodes; i++)
4063 fprintf(fplog, " %d", ma->ncg[i]);
4065 fprintf(fplog, "\n");
4069 static void get_cg_distribution(FILE *fplog, gmx_int64_t step, gmx_domdec_t *dd,
4070 t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
4073 gmx_domdec_master_t *ma = NULL;
4076 int *ibuf, buf2[2] = { 0, 0 };
4077 gmx_bool bMaster = DDMASTER(dd);
4085 check_screw_box(box);
4088 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbMASTER, npulse);
4090 distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
4091 for (i = 0; i < dd->nnodes; i++)
4093 ma->ibuf[2*i] = ma->ncg[i];
4094 ma->ibuf[2*i+1] = ma->nat[i];
4102 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
4104 dd->ncg_home = buf2[0];
4105 dd->nat_home = buf2[1];
4106 dd->ncg_tot = dd->ncg_home;
4107 dd->nat_tot = dd->nat_home;
4108 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
4110 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
4111 srenew(dd->index_gl, dd->cg_nalloc);
4112 srenew(dd->cgindex, dd->cg_nalloc+1);
4116 for (i = 0; i < dd->nnodes; i++)
4118 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
4119 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
4124 bMaster ? ma->ibuf : NULL,
4125 bMaster ? ma->ibuf+dd->nnodes : NULL,
4126 bMaster ? ma->cg : NULL,
4127 dd->ncg_home*sizeof(int), dd->index_gl);
4129 /* Determine the home charge group sizes */
4131 for (i = 0; i < dd->ncg_home; i++)
4133 cg_gl = dd->index_gl[i];
4135 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
4140 fprintf(debug, "Home charge groups:\n");
4141 for (i = 0; i < dd->ncg_home; i++)
4143 fprintf(debug, " %d", dd->index_gl[i]);
4146 fprintf(debug, "\n");
4149 fprintf(debug, "\n");
4153 static int compact_and_copy_vec_at(int ncg, int *move,
4156 rvec *src, gmx_domdec_comm_t *comm,
4159 int m, icg, i, i0, i1, nrcg;
4165 for (m = 0; m < DIM*2; m++)
4171 for (icg = 0; icg < ncg; icg++)
4173 i1 = cgindex[icg+1];
4179 /* Compact the home array in place */
4180 for (i = i0; i < i1; i++)
4182 copy_rvec(src[i], src[home_pos++]);
4188 /* Copy to the communication buffer */
4190 pos_vec[m] += 1 + vec*nrcg;
4191 for (i = i0; i < i1; i++)
4193 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
4195 pos_vec[m] += (nvec - vec - 1)*nrcg;
4199 home_pos += i1 - i0;
4207 static int compact_and_copy_vec_cg(int ncg, int *move,
4209 int nvec, rvec *src, gmx_domdec_comm_t *comm,
4212 int m, icg, i0, i1, nrcg;
4218 for (m = 0; m < DIM*2; m++)
4224 for (icg = 0; icg < ncg; icg++)
4226 i1 = cgindex[icg+1];
4232 /* Compact the home array in place */
4233 copy_rvec(src[icg], src[home_pos++]);
4239 /* Copy to the communication buffer */
4240 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
4241 pos_vec[m] += 1 + nrcg*nvec;
4253 static int compact_ind(int ncg, int *move,
4254 int *index_gl, int *cgindex,
4256 gmx_ga2la_t ga2la, char *bLocalCG,
4259 int cg, nat, a0, a1, a, a_gl;
4264 for (cg = 0; cg < ncg; cg++)
4270 /* Compact the home arrays in place.
4271 * Anything that can be done here avoids access to global arrays.
4273 cgindex[home_pos] = nat;
4274 for (a = a0; a < a1; a++)
4277 gatindex[nat] = a_gl;
4278 /* The cell number stays 0, so we don't need to set it */
4279 ga2la_change_la(ga2la, a_gl, nat);
4282 index_gl[home_pos] = index_gl[cg];
4283 cginfo[home_pos] = cginfo[cg];
4284 /* The charge group remains local, so bLocalCG does not change */
4289 /* Clear the global indices */
4290 for (a = a0; a < a1; a++)
4292 ga2la_del(ga2la, gatindex[a]);
4296 bLocalCG[index_gl[cg]] = FALSE;
4300 cgindex[home_pos] = nat;
4305 static void clear_and_mark_ind(int ncg, int *move,
4306 int *index_gl, int *cgindex, int *gatindex,
4307 gmx_ga2la_t ga2la, char *bLocalCG,
4312 for (cg = 0; cg < ncg; cg++)
4318 /* Clear the global indices */
4319 for (a = a0; a < a1; a++)
4321 ga2la_del(ga2la, gatindex[a]);
4325 bLocalCG[index_gl[cg]] = FALSE;
4327 /* Signal that this cg has moved using the ns cell index.
4328 * Here we set it to -1. fill_grid will change it
4329 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4331 cell_index[cg] = -1;
4336 static void print_cg_move(FILE *fplog,
4338 gmx_int64_t step, int cg, int dim, int dir,
4339 gmx_bool bHaveLimitdAndCMOld, real limitd,
4340 rvec cm_old, rvec cm_new, real pos_d)
4342 gmx_domdec_comm_t *comm;
4347 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4348 if (bHaveLimitdAndCMOld)
4350 fprintf(fplog, "The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4351 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4355 fprintf(fplog, "The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4356 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4358 fprintf(fplog, "distance out of cell %f\n",
4359 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4360 if (bHaveLimitdAndCMOld)
4362 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4363 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4365 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4366 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4367 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4369 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4370 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4372 comm->cell_x0[dim], comm->cell_x1[dim]);
4375 static void cg_move_error(FILE *fplog,
4377 gmx_int64_t step, int cg, int dim, int dir,
4378 gmx_bool bHaveLimitdAndCMOld, real limitd,
4379 rvec cm_old, rvec cm_new, real pos_d)
4383 print_cg_move(fplog, dd, step, cg, dim, dir,
4384 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4386 print_cg_move(stderr, dd, step, cg, dim, dir,
4387 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4389 "A charge group moved too far between two domain decomposition steps\n"
4390 "This usually means that your system is not well equilibrated");
4393 static void rotate_state_atom(t_state *state, int a)
4397 for (est = 0; est < estNR; est++)
4399 if (EST_DISTR(est) && (state->flags & (1<<est)))
4404 /* Rotate the complete state; for a rectangular box only */
4405 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4406 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4409 state->v[a][YY] = -state->v[a][YY];
4410 state->v[a][ZZ] = -state->v[a][ZZ];
4413 state->sd_X[a][YY] = -state->sd_X[a][YY];
4414 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4417 state->cg_p[a][YY] = -state->cg_p[a][YY];
4418 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4420 case estDISRE_INITF:
4421 case estDISRE_RM3TAV:
4422 case estORIRE_INITF:
4424 /* These are distances, so not affected by rotation */
4427 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4433 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4435 if (natoms > comm->moved_nalloc)
4437 /* Contents should be preserved here */
4438 comm->moved_nalloc = over_alloc_dd(natoms);
4439 srenew(comm->moved, comm->moved_nalloc);
4445 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
4448 ivec tric_dir, matrix tcm,
4449 rvec cell_x0, rvec cell_x1,
4450 rvec limitd, rvec limit0, rvec limit1,
4452 int cg_start, int cg_end,
4457 int cg, k, k0, k1, d, dim, d2;
4462 real inv_ncg, pos_d;
4465 npbcdim = dd->npbcdim;
4467 for (cg = cg_start; cg < cg_end; cg++)
4474 copy_rvec(state->x[k0], cm_new);
4481 for (k = k0; (k < k1); k++)
4483 rvec_inc(cm_new, state->x[k]);
4485 for (d = 0; (d < DIM); d++)
4487 cm_new[d] = inv_ncg*cm_new[d];
4492 /* Do pbc and check DD cell boundary crossings */
4493 for (d = DIM-1; d >= 0; d--)
4497 bScrew = (dd->bScrewPBC && d == XX);
4498 /* Determine the location of this cg in lattice coordinates */
4502 for (d2 = d+1; d2 < DIM; d2++)
4504 pos_d += cm_new[d2]*tcm[d2][d];
4507 /* Put the charge group in the triclinic unit-cell */
4508 if (pos_d >= cell_x1[d])
4510 if (pos_d >= limit1[d])
4512 cg_move_error(fplog, dd, step, cg, d, 1, TRUE, limitd[d],
4513 cg_cm[cg], cm_new, pos_d);
4516 if (dd->ci[d] == dd->nc[d] - 1)
4518 rvec_dec(cm_new, state->box[d]);
4521 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4522 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4524 for (k = k0; (k < k1); k++)
4526 rvec_dec(state->x[k], state->box[d]);
4529 rotate_state_atom(state, k);
4534 else if (pos_d < cell_x0[d])
4536 if (pos_d < limit0[d])
4538 cg_move_error(fplog, dd, step, cg, d, -1, TRUE, limitd[d],
4539 cg_cm[cg], cm_new, pos_d);
4544 rvec_inc(cm_new, state->box[d]);
4547 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4548 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4550 for (k = k0; (k < k1); k++)
4552 rvec_inc(state->x[k], state->box[d]);
4555 rotate_state_atom(state, k);
4561 else if (d < npbcdim)
4563 /* Put the charge group in the rectangular unit-cell */
4564 while (cm_new[d] >= state->box[d][d])
4566 rvec_dec(cm_new, state->box[d]);
4567 for (k = k0; (k < k1); k++)
4569 rvec_dec(state->x[k], state->box[d]);
4572 while (cm_new[d] < 0)
4574 rvec_inc(cm_new, state->box[d]);
4575 for (k = k0; (k < k1); k++)
4577 rvec_inc(state->x[k], state->box[d]);
4583 copy_rvec(cm_new, cg_cm[cg]);
4585 /* Determine where this cg should go */
4588 for (d = 0; d < dd->ndim; d++)
4593 flag |= DD_FLAG_FW(d);
4599 else if (dev[dim] == -1)
4601 flag |= DD_FLAG_BW(d);
4604 if (dd->nc[dim] > 2)
4615 /* Temporarily store the flag in move */
4616 move[cg] = mc + flag;
4620 static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
4621 gmx_domdec_t *dd, ivec tric_dir,
4622 t_state *state, rvec **f,
4631 int ncg[DIM*2], nat[DIM*2];
4632 int c, i, cg, k, d, dim, dim2, dir, d2, d3;
4633 int mc, cdd, nrcg, ncg_recv, nvs, nvr, nvec, vec;
4634 int sbuf[2], rbuf[2];
4635 int home_pos_cg, home_pos_at, buf_pos;
4637 gmx_bool bV = FALSE, bSDX = FALSE, bCGP = FALSE;
4640 rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1;
4642 cginfo_mb_t *cginfo_mb;
4643 gmx_domdec_comm_t *comm;
4645 int nthread, thread;
4649 check_screw_box(state->box);
4653 if (fr->cutoff_scheme == ecutsGROUP)
4658 for (i = 0; i < estNR; i++)
4664 case estX: /* Always present */ break;
4665 case estV: bV = (state->flags & (1<<i)); break;
4666 case estSDX: bSDX = (state->flags & (1<<i)); break;
4667 case estCGP: bCGP = (state->flags & (1<<i)); break;
4670 case estDISRE_INITF:
4671 case estDISRE_RM3TAV:
4672 case estORIRE_INITF:
4674 /* No processing required */
4677 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4682 if (dd->ncg_tot > comm->nalloc_int)
4684 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4685 srenew(comm->buf_int, comm->nalloc_int);
4687 move = comm->buf_int;
4689 /* Clear the count */
4690 for (c = 0; c < dd->ndim*2; c++)
4696 npbcdim = dd->npbcdim;
4698 for (d = 0; (d < DIM); d++)
4700 limitd[d] = dd->comm->cellsize_min[d];
4701 if (d >= npbcdim && dd->ci[d] == 0)
4703 cell_x0[d] = -GMX_FLOAT_MAX;
4707 cell_x0[d] = comm->cell_x0[d];
4709 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4711 cell_x1[d] = GMX_FLOAT_MAX;
4715 cell_x1[d] = comm->cell_x1[d];
4719 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4720 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4724 /* We check after communication if a charge group moved
4725 * more than one cell. Set the pre-comm check limit to float_max.
4727 limit0[d] = -GMX_FLOAT_MAX;
4728 limit1[d] = GMX_FLOAT_MAX;
4732 make_tric_corr_matrix(npbcdim, state->box, tcm);
4734 cgindex = dd->cgindex;
4736 nthread = gmx_omp_nthreads_get(emntDomdec);
4738 /* Compute the center of geometry for all home charge groups
4739 * and put them in the box and determine where they should go.
4741 #pragma omp parallel for num_threads(nthread) schedule(static)
4742 for (thread = 0; thread < nthread; thread++)
4744 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4745 cell_x0, cell_x1, limitd, limit0, limit1,
4747 ( thread *dd->ncg_home)/nthread,
4748 ((thread+1)*dd->ncg_home)/nthread,
4749 fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
4753 for (cg = 0; cg < dd->ncg_home; cg++)
4758 flag = mc & ~DD_FLAG_NRCG;
4759 mc = mc & DD_FLAG_NRCG;
4762 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4764 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4765 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4767 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4768 /* We store the cg size in the lower 16 bits
4769 * and the place where the charge group should go
4770 * in the next 6 bits. This saves some communication volume.
4772 nrcg = cgindex[cg+1] - cgindex[cg];
4773 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4779 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4780 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4783 for (i = 0; i < dd->ndim*2; i++)
4785 *ncg_moved += ncg[i];
4802 /* Make sure the communication buffers are large enough */
4803 for (mc = 0; mc < dd->ndim*2; mc++)
4805 nvr = ncg[mc] + nat[mc]*nvec;
4806 if (nvr > comm->cgcm_state_nalloc[mc])
4808 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4809 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4813 switch (fr->cutoff_scheme)
4816 /* Recalculating cg_cm might be cheaper than communicating,
4817 * but that could give rise to rounding issues.
4820 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4821 nvec, cg_cm, comm, bCompact);
4824 /* Without charge groups we send the moved atom coordinates
4825 * over twice. This is so the code below can be used without
4826 * many conditionals for both for with and without charge groups.
4829 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4830 nvec, state->x, comm, FALSE);
4833 home_pos_cg -= *ncg_moved;
4837 gmx_incons("unimplemented");
4843 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4844 nvec, vec++, state->x, comm, bCompact);
4847 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4848 nvec, vec++, state->v, comm, bCompact);
4852 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4853 nvec, vec++, state->sd_X, comm, bCompact);
4857 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4858 nvec, vec++, state->cg_p, comm, bCompact);
4863 compact_ind(dd->ncg_home, move,
4864 dd->index_gl, dd->cgindex, dd->gatindex,
4865 dd->ga2la, comm->bLocalCG,
4870 if (fr->cutoff_scheme == ecutsVERLET)
4872 moved = get_moved(comm, dd->ncg_home);
4874 for (k = 0; k < dd->ncg_home; k++)
4881 moved = fr->ns.grid->cell_index;
4884 clear_and_mark_ind(dd->ncg_home, move,
4885 dd->index_gl, dd->cgindex, dd->gatindex,
4886 dd->ga2la, comm->bLocalCG,
4890 cginfo_mb = fr->cginfo_mb;
4892 *ncg_stay_home = home_pos_cg;
4893 for (d = 0; d < dd->ndim; d++)
4898 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4901 /* Communicate the cg and atom counts */
4906 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4907 d, dir, sbuf[0], sbuf[1]);
4909 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4911 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4913 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4914 srenew(comm->buf_int, comm->nalloc_int);
4917 /* Communicate the charge group indices, sizes and flags */
4918 dd_sendrecv_int(dd, d, dir,
4919 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4920 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4922 nvs = ncg[cdd] + nat[cdd]*nvec;
4923 i = rbuf[0] + rbuf[1] *nvec;
4924 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4926 /* Communicate cgcm and state */
4927 dd_sendrecv_rvec(dd, d, dir,
4928 comm->cgcm_state[cdd], nvs,
4929 comm->vbuf.v+nvr, i);
4930 ncg_recv += rbuf[0];
4934 /* Process the received charge groups */
4936 for (cg = 0; cg < ncg_recv; cg++)
4938 flag = comm->buf_int[cg*DD_CGIBS+1];
4940 if (dim >= npbcdim && dd->nc[dim] > 2)
4942 /* No pbc in this dim and more than one domain boundary.
4943 * We do a separate check if a charge group didn't move too far.
4945 if (((flag & DD_FLAG_FW(d)) &&
4946 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4947 ((flag & DD_FLAG_BW(d)) &&
4948 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4950 cg_move_error(fplog, dd, step, cg, dim,
4951 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4953 comm->vbuf.v[buf_pos],
4954 comm->vbuf.v[buf_pos],
4955 comm->vbuf.v[buf_pos][dim]);
4962 /* Check which direction this cg should go */
4963 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4967 /* The cell boundaries for dimension d2 are not equal
4968 * for each cell row of the lower dimension(s),
4969 * therefore we might need to redetermine where
4970 * this cg should go.
4973 /* If this cg crosses the box boundary in dimension d2
4974 * we can use the communicated flag, so we do not
4975 * have to worry about pbc.
4977 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4978 (flag & DD_FLAG_FW(d2))) ||
4979 (dd->ci[dim2] == 0 &&
4980 (flag & DD_FLAG_BW(d2)))))
4982 /* Clear the two flags for this dimension */
4983 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4984 /* Determine the location of this cg
4985 * in lattice coordinates
4987 pos_d = comm->vbuf.v[buf_pos][dim2];
4990 for (d3 = dim2+1; d3 < DIM; d3++)
4993 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
4996 /* Check of we are not at the box edge.
4997 * pbc is only handled in the first step above,
4998 * but this check could move over pbc while
4999 * the first step did not due to different rounding.
5001 if (pos_d >= cell_x1[dim2] &&
5002 dd->ci[dim2] != dd->nc[dim2]-1)
5004 flag |= DD_FLAG_FW(d2);
5006 else if (pos_d < cell_x0[dim2] &&
5009 flag |= DD_FLAG_BW(d2);
5011 comm->buf_int[cg*DD_CGIBS+1] = flag;
5014 /* Set to which neighboring cell this cg should go */
5015 if (flag & DD_FLAG_FW(d2))
5019 else if (flag & DD_FLAG_BW(d2))
5021 if (dd->nc[dd->dim[d2]] > 2)
5033 nrcg = flag & DD_FLAG_NRCG;
5036 if (home_pos_cg+1 > dd->cg_nalloc)
5038 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
5039 srenew(dd->index_gl, dd->cg_nalloc);
5040 srenew(dd->cgindex, dd->cg_nalloc+1);
5042 /* Set the global charge group index and size */
5043 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
5044 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
5045 /* Copy the state from the buffer */
5046 dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
5047 if (fr->cutoff_scheme == ecutsGROUP)
5050 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
5054 /* Set the cginfo */
5055 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
5056 dd->index_gl[home_pos_cg]);
5059 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
5062 if (home_pos_at+nrcg > state->nalloc)
5064 dd_realloc_state(state, f, home_pos_at+nrcg);
5066 for (i = 0; i < nrcg; i++)
5068 copy_rvec(comm->vbuf.v[buf_pos++],
5069 state->x[home_pos_at+i]);
5073 for (i = 0; i < nrcg; i++)
5075 copy_rvec(comm->vbuf.v[buf_pos++],
5076 state->v[home_pos_at+i]);
5081 for (i = 0; i < nrcg; i++)
5083 copy_rvec(comm->vbuf.v[buf_pos++],
5084 state->sd_X[home_pos_at+i]);
5089 for (i = 0; i < nrcg; i++)
5091 copy_rvec(comm->vbuf.v[buf_pos++],
5092 state->cg_p[home_pos_at+i]);
5096 home_pos_at += nrcg;
5100 /* Reallocate the buffers if necessary */
5101 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
5103 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
5104 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
5106 nvr = ncg[mc] + nat[mc]*nvec;
5107 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
5109 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
5110 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
5112 /* Copy from the receive to the send buffers */
5113 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
5114 comm->buf_int + cg*DD_CGIBS,
5115 DD_CGIBS*sizeof(int));
5116 memcpy(comm->cgcm_state[mc][nvr],
5117 comm->vbuf.v[buf_pos],
5118 (1+nrcg*nvec)*sizeof(rvec));
5119 buf_pos += 1 + nrcg*nvec;
5126 /* With sorting (!bCompact) the indices are now only partially up to date
5127 * and ncg_home and nat_home are not the real count, since there are
5128 * "holes" in the arrays for the charge groups that moved to neighbors.
5130 if (fr->cutoff_scheme == ecutsVERLET)
5132 moved = get_moved(comm, home_pos_cg);
5134 for (i = dd->ncg_home; i < home_pos_cg; i++)
5139 dd->ncg_home = home_pos_cg;
5140 dd->nat_home = home_pos_at;
5145 "Finished repartitioning: cgs moved out %d, new home %d\n",
5146 *ncg_moved, dd->ncg_home-*ncg_moved);
5151 void dd_cycles_add(gmx_domdec_t *dd, float cycles, int ddCycl)
5153 dd->comm->cycl[ddCycl] += cycles;
5154 dd->comm->cycl_n[ddCycl]++;
5155 if (cycles > dd->comm->cycl_max[ddCycl])
5157 dd->comm->cycl_max[ddCycl] = cycles;
5161 static double force_flop_count(t_nrnb *nrnb)
5168 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
5170 /* To get closer to the real timings, we half the count
5171 * for the normal loops and again half it for water loops.
5174 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5176 sum += nrnb->n[i]*0.25*cost_nrnb(i);
5180 sum += nrnb->n[i]*0.50*cost_nrnb(i);
5183 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
5186 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5188 sum += nrnb->n[i]*cost_nrnb(i);
5191 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
5193 sum += nrnb->n[i]*cost_nrnb(i);
5199 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
5201 if (dd->comm->eFlop)
5203 dd->comm->flop -= force_flop_count(nrnb);
5206 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
5208 if (dd->comm->eFlop)
5210 dd->comm->flop += force_flop_count(nrnb);
5215 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
5219 for (i = 0; i < ddCyclNr; i++)
5221 dd->comm->cycl[i] = 0;
5222 dd->comm->cycl_n[i] = 0;
5223 dd->comm->cycl_max[i] = 0;
5226 dd->comm->flop_n = 0;
5229 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
5231 gmx_domdec_comm_t *comm;
5232 gmx_domdec_load_t *load;
5233 gmx_domdec_root_t *root = NULL;
5235 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
5240 fprintf(debug, "get_load_distribution start\n");
5243 wallcycle_start(wcycle, ewcDDCOMMLOAD);
5247 bSepPME = (dd->pme_nodeid >= 0);
5249 for (d = dd->ndim-1; d >= 0; d--)
5252 /* Check if we participate in the communication in this dimension */
5253 if (d == dd->ndim-1 ||
5254 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
5256 load = &comm->load[d];
5259 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
5262 if (d == dd->ndim-1)
5264 sbuf[pos++] = dd_force_load(comm);
5265 sbuf[pos++] = sbuf[0];
5268 sbuf[pos++] = sbuf[0];
5269 sbuf[pos++] = cell_frac;
5272 sbuf[pos++] = comm->cell_f_max0[d];
5273 sbuf[pos++] = comm->cell_f_min1[d];
5278 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
5279 sbuf[pos++] = comm->cycl[ddCyclPME];
5284 sbuf[pos++] = comm->load[d+1].sum;
5285 sbuf[pos++] = comm->load[d+1].max;
5288 sbuf[pos++] = comm->load[d+1].sum_m;
5289 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
5290 sbuf[pos++] = comm->load[d+1].flags;
5293 sbuf[pos++] = comm->cell_f_max0[d];
5294 sbuf[pos++] = comm->cell_f_min1[d];
5299 sbuf[pos++] = comm->load[d+1].mdf;
5300 sbuf[pos++] = comm->load[d+1].pme;
5304 /* Communicate a row in DD direction d.
5305 * The communicators are setup such that the root always has rank 0.
5308 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
5309 load->load, load->nload*sizeof(float), MPI_BYTE,
5310 0, comm->mpi_comm_load[d]);
5312 if (dd->ci[dim] == dd->master_ci[dim])
5314 /* We are the root, process this row */
5315 if (comm->bDynLoadBal)
5317 root = comm->root[d];
5327 for (i = 0; i < dd->nc[dim]; i++)
5329 load->sum += load->load[pos++];
5330 load->max = std::max(load->max, load->load[pos]);
5336 /* This direction could not be load balanced properly,
5337 * therefore we need to use the maximum iso the average load.
5339 load->sum_m = std::max(load->sum_m, load->load[pos]);
5343 load->sum_m += load->load[pos];
5346 load->cvol_min = std::min(load->cvol_min, load->load[pos]);
5350 load->flags = (int)(load->load[pos++] + 0.5);
5354 root->cell_f_max0[i] = load->load[pos++];
5355 root->cell_f_min1[i] = load->load[pos++];
5360 load->mdf = std::max(load->mdf, load->load[pos]);
5362 load->pme = std::max(load->pme, load->load[pos]);
5366 if (comm->bDynLoadBal && root->bLimited)
5368 load->sum_m *= dd->nc[dim];
5369 load->flags |= (1<<d);
5377 comm->nload += dd_load_count(comm);
5378 comm->load_step += comm->cycl[ddCyclStep];
5379 comm->load_sum += comm->load[0].sum;
5380 comm->load_max += comm->load[0].max;
5381 if (comm->bDynLoadBal)
5383 for (d = 0; d < dd->ndim; d++)
5385 if (comm->load[0].flags & (1<<d))
5387 comm->load_lim[d]++;
5393 comm->load_mdf += comm->load[0].mdf;
5394 comm->load_pme += comm->load[0].pme;
5398 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5402 fprintf(debug, "get_load_distribution finished\n");
5406 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5408 /* Return the relative performance loss on the total run time
5409 * due to the force calculation load imbalance.
5411 if (dd->comm->nload > 0)
5414 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5415 (dd->comm->load_step*dd->nnodes);
5423 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5426 int npp, npme, nnodes, d, limp;
5427 float imbal, pme_f_ratio, lossf, lossp = 0;
5429 gmx_domdec_comm_t *comm;
5432 if (DDMASTER(dd) && comm->nload > 0)
5435 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5436 nnodes = npp + npme;
5437 imbal = comm->load_max*npp/comm->load_sum - 1;
5438 lossf = dd_force_imb_perf_loss(dd);
5439 sprintf(buf, " Average load imbalance: %.1f %%\n", imbal*100);
5440 fprintf(fplog, "%s", buf);
5441 fprintf(stderr, "\n");
5442 fprintf(stderr, "%s", buf);
5443 sprintf(buf, " Part of the total run time spent waiting due to load imbalance: %.1f %%\n", lossf*100);
5444 fprintf(fplog, "%s", buf);
5445 fprintf(stderr, "%s", buf);
5447 if (comm->bDynLoadBal)
5449 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5450 for (d = 0; d < dd->ndim; d++)
5452 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5453 sprintf(buf+strlen(buf), " %c %d %%", dim2char(dd->dim[d]), limp);
5459 sprintf(buf+strlen(buf), "\n");
5460 fprintf(fplog, "%s", buf);
5461 fprintf(stderr, "%s", buf);
5465 pme_f_ratio = comm->load_pme/comm->load_mdf;
5466 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5469 lossp *= (float)npme/(float)nnodes;
5473 lossp *= (float)npp/(float)nnodes;
5475 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pme_f_ratio);
5476 fprintf(fplog, "%s", buf);
5477 fprintf(stderr, "%s", buf);
5478 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossp)*100);
5479 fprintf(fplog, "%s", buf);
5480 fprintf(stderr, "%s", buf);
5482 fprintf(fplog, "\n");
5483 fprintf(stderr, "\n");
5485 if (lossf >= DD_PERF_LOSS_WARN)
5488 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5489 " in the domain decomposition.\n", lossf*100);
5490 if (!comm->bDynLoadBal)
5492 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5496 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5498 fprintf(fplog, "%s\n", buf);
5499 fprintf(stderr, "%s\n", buf);
5501 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS_WARN)
5504 "NOTE: %.1f %% performance was lost because the PME ranks\n"
5505 " had %s work to do than the PP ranks.\n"
5506 " You might want to %s the number of PME ranks\n"
5507 " or %s the cut-off and the grid spacing.\n",
5509 (lossp < 0) ? "less" : "more",
5510 (lossp < 0) ? "decrease" : "increase",
5511 (lossp < 0) ? "decrease" : "increase");
5512 fprintf(fplog, "%s\n", buf);
5513 fprintf(stderr, "%s\n", buf);
5518 static float dd_vol_min(gmx_domdec_t *dd)
5520 return dd->comm->load[0].cvol_min*dd->nnodes;
5523 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5525 return dd->comm->load[0].flags;
5528 static float dd_f_imbal(gmx_domdec_t *dd)
5530 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5533 float dd_pme_f_ratio(gmx_domdec_t *dd)
5535 if (dd->comm->cycl_n[ddCyclPME] > 0)
5537 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5545 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
5550 flags = dd_load_flags(dd);
5554 "DD load balancing is limited by minimum cell size in dimension");
5555 for (d = 0; d < dd->ndim; d++)
5559 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5562 fprintf(fplog, "\n");
5564 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5565 if (dd->comm->bDynLoadBal)
5567 fprintf(fplog, " vol min/aver %5.3f%c",
5568 dd_vol_min(dd), flags ? '!' : ' ');
5570 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5571 if (dd->comm->cycl_n[ddCyclPME])
5573 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5575 fprintf(fplog, "\n\n");
5578 static void dd_print_load_verbose(gmx_domdec_t *dd)
5580 if (dd->comm->bDynLoadBal)
5582 fprintf(stderr, "vol %4.2f%c ",
5583 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5585 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5586 if (dd->comm->cycl_n[ddCyclPME])
5588 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5593 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5598 gmx_domdec_root_t *root;
5599 gmx_bool bPartOfGroup = FALSE;
5601 dim = dd->dim[dim_ind];
5602 copy_ivec(loc, loc_c);
5603 for (i = 0; i < dd->nc[dim]; i++)
5606 rank = dd_index(dd->nc, loc_c);
5607 if (rank == dd->rank)
5609 /* This process is part of the group */
5610 bPartOfGroup = TRUE;
5613 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5617 dd->comm->mpi_comm_load[dim_ind] = c_row;
5618 if (dd->comm->eDLB != edlbNO)
5620 if (dd->ci[dim] == dd->master_ci[dim])
5622 /* This is the root process of this row */
5623 snew(dd->comm->root[dim_ind], 1);
5624 root = dd->comm->root[dim_ind];
5625 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5626 snew(root->old_cell_f, dd->nc[dim]+1);
5627 snew(root->bCellMin, dd->nc[dim]);
5630 snew(root->cell_f_max0, dd->nc[dim]);
5631 snew(root->cell_f_min1, dd->nc[dim]);
5632 snew(root->bound_min, dd->nc[dim]);
5633 snew(root->bound_max, dd->nc[dim]);
5635 snew(root->buf_ncd, dd->nc[dim]);
5639 /* This is not a root process, we only need to receive cell_f */
5640 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5643 if (dd->ci[dim] == dd->master_ci[dim])
5645 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5651 void dd_setup_dlb_resource_sharing(t_commrec gmx_unused *cr,
5652 const gmx_hw_info_t gmx_unused *hwinfo,
5653 const gmx_hw_opt_t gmx_unused *hw_opt)
5656 int physicalnode_id_hash;
5659 MPI_Comm mpi_comm_pp_physicalnode;
5661 if (!(cr->duty & DUTY_PP) ||
5662 hw_opt->gpu_opt.ncuda_dev_use == 0)
5664 /* Only PP nodes (currently) use GPUs.
5665 * If we don't have GPUs, there are no resources to share.
5670 physicalnode_id_hash = gmx_physicalnode_id_hash();
5672 gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
5678 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5679 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5680 dd->rank, physicalnode_id_hash, gpu_id);
5682 /* Split the PP communicator over the physical nodes */
5683 /* TODO: See if we should store this (before), as it's also used for
5684 * for the nodecomm summution.
5686 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5687 &mpi_comm_pp_physicalnode);
5688 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5689 &dd->comm->mpi_comm_gpu_shared);
5690 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5691 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5695 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5698 /* Note that some ranks could share a GPU, while others don't */
5700 if (dd->comm->nrank_gpu_shared == 1)
5702 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5707 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
5710 int dim0, dim1, i, j;
5715 fprintf(debug, "Making load communicators\n");
5718 snew(dd->comm->load, dd->ndim);
5719 snew(dd->comm->mpi_comm_load, dd->ndim);
5722 make_load_communicator(dd, 0, loc);
5726 for (i = 0; i < dd->nc[dim0]; i++)
5729 make_load_communicator(dd, 1, loc);
5735 for (i = 0; i < dd->nc[dim0]; i++)
5739 for (j = 0; j < dd->nc[dim1]; j++)
5742 make_load_communicator(dd, 2, loc);
5749 fprintf(debug, "Finished making load communicators\n");
5754 void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
5756 int d, dim, i, j, m;
5759 ivec dd_zp[DD_MAXIZONE];
5760 gmx_domdec_zones_t *zones;
5761 gmx_domdec_ns_ranges_t *izone;
5763 for (d = 0; d < dd->ndim; d++)
5766 copy_ivec(dd->ci, tmp);
5767 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5768 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5769 copy_ivec(dd->ci, tmp);
5770 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5771 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5774 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5777 dd->neighbor[d][1]);
5783 fprintf(fplog, "\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5785 dd->nc[XX], dd->nc[YY], dd->nc[ZZ],
5786 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5793 for (i = 0; i < nzonep; i++)
5795 copy_ivec(dd_zp3[i], dd_zp[i]);
5801 for (i = 0; i < nzonep; i++)
5803 copy_ivec(dd_zp2[i], dd_zp[i]);
5809 for (i = 0; i < nzonep; i++)
5811 copy_ivec(dd_zp1[i], dd_zp[i]);
5815 gmx_fatal(FARGS, "Can only do 1, 2 or 3D domain decomposition");
5820 zones = &dd->comm->zones;
5822 for (i = 0; i < nzone; i++)
5825 clear_ivec(zones->shift[i]);
5826 for (d = 0; d < dd->ndim; d++)
5828 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5833 for (i = 0; i < nzone; i++)
5835 for (d = 0; d < DIM; d++)
5837 s[d] = dd->ci[d] - zones->shift[i][d];
5842 else if (s[d] >= dd->nc[d])
5848 zones->nizone = nzonep;
5849 for (i = 0; i < zones->nizone; i++)
5851 if (dd_zp[i][0] != i)
5853 gmx_fatal(FARGS, "Internal inconsistency in the dd grid setup");
5855 izone = &zones->izone[i];
5856 izone->j0 = dd_zp[i][1];
5857 izone->j1 = dd_zp[i][2];
5858 for (dim = 0; dim < DIM; dim++)
5860 if (dd->nc[dim] == 1)
5862 /* All shifts should be allowed */
5863 izone->shift0[dim] = -1;
5864 izone->shift1[dim] = 1;
5869 izone->shift0[d] = 0;
5870 izone->shift1[d] = 0;
5871 for(j=izone->j0; j<izone->j1; j++) {
5872 if (dd->shift[j][d] > dd->shift[i][d])
5873 izone->shift0[d] = -1;
5874 if (dd->shift[j][d] < dd->shift[i][d])
5875 izone->shift1[d] = 1;
5881 /* Assume the shift are not more than 1 cell */
5882 izone->shift0[dim] = 1;
5883 izone->shift1[dim] = -1;
5884 for (j = izone->j0; j < izone->j1; j++)
5886 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5887 if (shift_diff < izone->shift0[dim])
5889 izone->shift0[dim] = shift_diff;
5891 if (shift_diff > izone->shift1[dim])
5893 izone->shift1[dim] = shift_diff;
5900 if (dd->comm->eDLB != edlbNO)
5902 snew(dd->comm->root, dd->ndim);
5905 if (dd->comm->bRecordLoad)
5907 make_load_communicators(dd);
5911 static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reorder)
5917 gmx_domdec_comm_t *comm;
5924 if (comm->bCartesianPP)
5926 /* Set up cartesian communication for the particle-particle part */
5929 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5930 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5933 for (int i = 0; i < DIM; i++)
5937 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5939 /* We overwrite the old communicator with the new cartesian one */
5940 cr->mpi_comm_mygroup = comm_cart;
5943 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5944 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5946 if (comm->bCartesianPP_PME)
5948 /* Since we want to use the original cartesian setup for sim,
5949 * and not the one after split, we need to make an index.
5951 snew(comm->ddindex2ddnodeid, dd->nnodes);
5952 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
5953 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5954 /* Get the rank of the DD master,
5955 * above we made sure that the master node is a PP node.
5965 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
5967 else if (comm->bCartesianPP)
5969 if (cr->npmenodes == 0)
5971 /* The PP communicator is also
5972 * the communicator for this simulation
5974 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5976 cr->nodeid = dd->rank;
5978 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
5980 /* We need to make an index to go from the coordinates
5981 * to the nodeid of this simulation.
5983 snew(comm->ddindex2simnodeid, dd->nnodes);
5984 snew(buf, dd->nnodes);
5985 if (cr->duty & DUTY_PP)
5987 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5989 /* Communicate the ddindex to simulation nodeid index */
5990 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
5991 cr->mpi_comm_mysim);
5994 /* Determine the master coordinates and rank.
5995 * The DD master should be the same node as the master of this sim.
5997 for (int i = 0; i < dd->nnodes; i++)
5999 if (comm->ddindex2simnodeid[i] == 0)
6001 ddindex2xyz(dd->nc, i, dd->master_ci);
6002 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
6007 fprintf(debug, "The master rank is %d\n", dd->masterrank);
6012 /* No Cartesian communicators */
6013 /* We use the rank in dd->comm->all as DD index */
6014 ddindex2xyz(dd->nc, dd->rank, dd->ci);
6015 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
6017 clear_ivec(dd->master_ci);
6024 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
6025 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6030 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
6031 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6035 static void receive_ddindex2simnodeid(t_commrec gmx_unused *cr)
6039 gmx_domdec_comm_t *comm;
6044 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
6047 snew(comm->ddindex2simnodeid, dd->nnodes);
6048 snew(buf, dd->nnodes);
6049 if (cr->duty & DUTY_PP)
6051 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
6053 /* Communicate the ddindex to simulation nodeid index */
6054 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6055 cr->mpi_comm_mysim);
6061 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
6062 int ncg, int natoms)
6064 gmx_domdec_master_t *ma;
6069 snew(ma->ncg, dd->nnodes);
6070 snew(ma->index, dd->nnodes+1);
6072 snew(ma->nat, dd->nnodes);
6073 snew(ma->ibuf, dd->nnodes*2);
6074 snew(ma->cell_x, DIM);
6075 for (i = 0; i < DIM; i++)
6077 snew(ma->cell_x[i], dd->nc[i]+1);
6080 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
6086 snew(ma->vbuf, natoms);
6092 static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_node_order,
6093 int gmx_unused reorder)
6096 gmx_domdec_comm_t *comm;
6106 if (comm->bCartesianPP)
6108 for (i = 1; i < DIM; i++)
6110 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
6112 if (bDiv[YY] || bDiv[ZZ])
6114 comm->bCartesianPP_PME = TRUE;
6115 /* If we have 2D PME decomposition, which is always in x+y,
6116 * we stack the PME only nodes in z.
6117 * Otherwise we choose the direction that provides the thinnest slab
6118 * of PME only nodes as this will have the least effect
6119 * on the PP communication.
6120 * But for the PME communication the opposite might be better.
6122 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
6124 dd->nc[YY] > dd->nc[ZZ]))
6126 comm->cartpmedim = ZZ;
6130 comm->cartpmedim = YY;
6132 comm->ntot[comm->cartpmedim]
6133 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
6137 fprintf(fplog, "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
6139 "Will not use a Cartesian communicator for PP <-> PME\n\n");
6144 if (comm->bCartesianPP_PME)
6151 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
6154 for (i = 0; i < DIM; i++)
6158 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
6160 MPI_Comm_rank(comm_cart, &rank);
6161 if (MASTERNODE(cr) && rank != 0)
6163 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
6166 /* With this assigment we loose the link to the original communicator
6167 * which will usually be MPI_COMM_WORLD, unless have multisim.
6169 cr->mpi_comm_mysim = comm_cart;
6170 cr->sim_nodeid = rank;
6172 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
6176 fprintf(fplog, "Cartesian rank %d, coordinates %d %d %d\n\n",
6177 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6180 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
6184 if (cr->npmenodes == 0 ||
6185 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
6187 cr->duty = DUTY_PME;
6190 /* Split the sim communicator into PP and PME only nodes */
6191 MPI_Comm_split(cr->mpi_comm_mysim,
6193 dd_index(comm->ntot, dd->ci),
6194 &cr->mpi_comm_mygroup);
6198 switch (dd_node_order)
6203 fprintf(fplog, "Order of the ranks: PP first, PME last\n");
6206 case ddnoINTERLEAVE:
6207 /* Interleave the PP-only and PME-only nodes,
6208 * as on clusters with dual-core machines this will double
6209 * the communication bandwidth of the PME processes
6210 * and thus speed up the PP <-> PME and inter PME communication.
6214 fprintf(fplog, "Interleaving PP and PME ranks\n");
6216 comm->pmenodes = dd_pmenodes(cr);
6221 gmx_fatal(FARGS, "Unknown dd_node_order=%d", dd_node_order);
6224 if (dd_simnode2pmenode(cr, cr->sim_nodeid) == -1)
6226 cr->duty = DUTY_PME;
6233 /* Split the sim communicator into PP and PME only nodes */
6234 MPI_Comm_split(cr->mpi_comm_mysim,
6237 &cr->mpi_comm_mygroup);
6238 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
6244 fprintf(fplog, "This rank does only %s work.\n\n",
6245 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
6249 void make_dd_communicators(FILE *fplog, t_commrec *cr, int dd_node_order)
6252 gmx_domdec_comm_t *comm;
6258 copy_ivec(dd->nc, comm->ntot);
6260 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
6261 comm->bCartesianPP_PME = FALSE;
6263 /* Reorder the nodes by default. This might change the MPI ranks.
6264 * Real reordering is only supported on very few architectures,
6265 * Blue Gene is one of them.
6267 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
6269 if (cr->npmenodes > 0)
6271 /* Split the communicator into a PP and PME part */
6272 split_communicator(fplog, cr, dd_node_order, CartReorder);
6273 if (comm->bCartesianPP_PME)
6275 /* We (possibly) reordered the nodes in split_communicator,
6276 * so it is no longer required in make_pp_communicator.
6278 CartReorder = FALSE;
6283 /* All nodes do PP and PME */
6285 /* We do not require separate communicators */
6286 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
6290 if (cr->duty & DUTY_PP)
6292 /* Copy or make a new PP communicator */
6293 make_pp_communicator(fplog, cr, CartReorder);
6297 receive_ddindex2simnodeid(cr);
6300 if (!(cr->duty & DUTY_PME))
6302 /* Set up the commnuication to our PME node */
6303 dd->pme_nodeid = dd_simnode2pmenode(cr, cr->sim_nodeid);
6304 dd->pme_receive_vir_ener = receive_vir_ener(cr);
6307 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
6308 dd->pme_nodeid, dd->pme_receive_vir_ener);
6313 dd->pme_nodeid = -1;
6318 dd->ma = init_gmx_domdec_master_t(dd,
6320 comm->cgs_gl.index[comm->cgs_gl.nr]);
6324 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6326 real *slb_frac, tot;
6331 if (nc > 1 && size_string != NULL)
6335 fprintf(fplog, "Using static load balancing for the %s direction\n",
6340 for (i = 0; i < nc; i++)
6343 sscanf(size_string, "%20lf%n", &dbl, &n);
6346 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6355 fprintf(fplog, "Relative cell sizes:");
6357 for (i = 0; i < nc; i++)
6362 fprintf(fplog, " %5.3f", slb_frac[i]);
6367 fprintf(fplog, "\n");
6374 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
6377 gmx_mtop_ilistloop_t iloop;
6381 iloop = gmx_mtop_ilistloop_init(mtop);
6382 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6384 for (ftype = 0; ftype < F_NRE; ftype++)
6386 if ((interaction_function[ftype].flags & IF_BOND) &&
6389 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6397 static int dd_getenv(FILE *fplog, const char *env_var, int def)
6403 val = getenv(env_var);
6406 if (sscanf(val, "%20d", &nst) <= 0)
6412 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6420 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6424 fprintf(stderr, "\n%s\n", warn_string);
6428 fprintf(fplog, "\n%s\n", warn_string);
6432 static void check_dd_restrictions(t_commrec *cr, gmx_domdec_t *dd,
6433 t_inputrec *ir, FILE *fplog)
6435 if (ir->ePBC == epbcSCREW &&
6436 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6438 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6441 if (ir->ns_type == ensSIMPLE)
6443 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
6446 if (ir->nstlist == 0)
6448 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6451 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6453 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6457 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6462 r = ddbox->box_size[XX];
6463 for (di = 0; di < dd->ndim; di++)
6466 /* Check using the initial average cell size */
6467 r = std::min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6473 static int check_dlb_support(FILE *fplog, t_commrec *cr,
6474 const char *dlb_opt, gmx_bool bRecordLoad,
6475 unsigned long Flags, t_inputrec *ir)
6482 case 'a': eDLB = edlbAUTO; break;
6483 case 'n': eDLB = edlbNO; break;
6484 case 'y': eDLB = edlbYES; break;
6485 default: gmx_incons("Unknown dlb_opt");
6488 if (Flags & MD_RERUN)
6493 if (!EI_DYNAMICS(ir->eI))
6495 if (eDLB == edlbYES)
6497 sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
6498 dd_warning(cr, fplog, buf);
6506 dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6511 if (Flags & MD_REPRODUCIBLE)
6518 dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
6522 dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6525 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", eDLB);
6533 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6538 if (getenv("GMX_DD_ORDER_ZYX") != NULL)
6540 /* Decomposition order z,y,x */
6543 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6545 for (dim = DIM-1; dim >= 0; dim--)
6547 if (dd->nc[dim] > 1)
6549 dd->dim[dd->ndim++] = dim;
6555 /* Decomposition order x,y,z */
6556 for (dim = 0; dim < DIM; dim++)
6558 if (dd->nc[dim] > 1)
6560 dd->dim[dd->ndim++] = dim;
6566 static gmx_domdec_comm_t *init_dd_comm()
6568 gmx_domdec_comm_t *comm;
6572 snew(comm->cggl_flag, DIM*2);
6573 snew(comm->cgcm_state, DIM*2);
6574 for (i = 0; i < DIM*2; i++)
6576 comm->cggl_flag_nalloc[i] = 0;
6577 comm->cgcm_state_nalloc[i] = 0;
6580 comm->nalloc_int = 0;
6581 comm->buf_int = NULL;
6583 vec_rvec_init(&comm->vbuf);
6585 comm->n_load_have = 0;
6586 comm->n_load_collect = 0;
6588 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6590 comm->sum_nat[i] = 0;
6594 comm->load_step = 0;
6597 clear_ivec(comm->load_lim);
6604 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
6605 unsigned long Flags,
6607 real comm_distance_min, real rconstr,
6608 const char *dlb_opt, real dlb_scale,
6609 const char *sizex, const char *sizey, const char *sizez,
6610 gmx_mtop_t *mtop, t_inputrec *ir,
6611 matrix box, rvec *x,
6613 int *npme_x, int *npme_y)
6616 gmx_domdec_comm_t *comm;
6618 real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
6621 const real tenPercentMargin = 1.1;
6626 "\nInitializing Domain Decomposition on %d ranks\n", cr->nnodes);
6631 dd->comm = init_dd_comm();
6633 snew(comm->cggl_flag, DIM*2);
6634 snew(comm->cgcm_state, DIM*2);
6636 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6637 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6639 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
6640 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
6641 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
6642 recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
6643 comm->nstSortCG = dd_getenv(fplog, "GMX_DD_NST_SORT_CHARGE_GROUPS", 1);
6644 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
6645 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
6646 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
6648 dd->pme_recv_f_alloc = 0;
6649 dd->pme_recv_f_buf = NULL;
6651 if (dd->bSendRecv2 && fplog)
6653 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6659 fprintf(fplog, "Will load balance based on FLOP count\n");
6661 if (comm->eFlop > 1)
6663 srand(1+cr->nodeid);
6665 comm->bRecordLoad = TRUE;
6669 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6673 /* Initialize to GPU share count to 0, might change later */
6674 comm->nrank_gpu_shared = 0;
6676 comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
6678 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6681 fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
6683 dd->bGridJump = comm->bDynLoadBal;
6684 comm->bPMELoadBalDLBLimits = FALSE;
6686 if (comm->nstSortCG)
6690 if (comm->nstSortCG == 1)
6692 fprintf(fplog, "Will sort the charge groups at every domain (re)decomposition\n");
6696 fprintf(fplog, "Will sort the charge groups every %d steps\n",
6700 snew(comm->sort, 1);
6706 fprintf(fplog, "Will not sort the charge groups\n");
6710 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6712 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6713 if (comm->bInterCGBondeds)
6715 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6719 comm->bInterCGMultiBody = FALSE;
6722 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6723 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6725 if (ir->rlistlong == 0)
6727 /* Set the cut-off to some very large value,
6728 * so we don't need if statements everywhere in the code.
6729 * We use sqrt, since the cut-off is squared in some places.
6731 comm->cutoff = GMX_CUTOFF_INF;
6735 comm->cutoff = ir->rlistlong;
6737 comm->cutoff_mbody = 0;
6739 comm->cellsize_limit = 0;
6740 comm->bBondComm = FALSE;
6742 if (comm->bInterCGBondeds)
6744 if (comm_distance_min > 0)
6746 comm->cutoff_mbody = comm_distance_min;
6747 if (Flags & MD_DDBONDCOMM)
6749 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6753 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
6755 r_bonded_limit = comm->cutoff_mbody;
6757 else if (ir->bPeriodicMols)
6759 /* Can not easily determine the required cut-off */
6760 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6761 comm->cutoff_mbody = comm->cutoff/2;
6762 r_bonded_limit = comm->cutoff_mbody;
6768 dd_bonded_cg_distance(fplog, mtop, ir, x, box,
6769 Flags & MD_DDBONDCHECK, &r_2b, &r_mb);
6771 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6772 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6774 /* We use an initial margin of 10% for the minimum cell size,
6775 * except when we are just below the non-bonded cut-off.
6777 if (Flags & MD_DDBONDCOMM)
6779 if (std::max(r_2b, r_mb) > comm->cutoff)
6781 r_bonded = std::max(r_2b, r_mb);
6782 r_bonded_limit = tenPercentMargin*r_bonded;
6783 comm->bBondComm = TRUE;
6788 r_bonded_limit = std::min(tenPercentMargin*r_bonded, comm->cutoff);
6790 /* We determine cutoff_mbody later */
6794 /* No special bonded communication,
6795 * simply increase the DD cut-off.
6797 r_bonded_limit = tenPercentMargin*std::max(r_2b, r_mb);
6798 comm->cutoff_mbody = r_bonded_limit;
6799 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
6802 comm->cellsize_limit = std::max(comm->cellsize_limit, r_bonded_limit);
6806 "Minimum cell size due to bonded interactions: %.3f nm\n",
6807 comm->cellsize_limit);
6811 if (dd->bInterCGcons && rconstr <= 0)
6813 /* There is a cell size limit due to the constraints (P-LINCS) */
6814 rconstr = constr_r_max(fplog, mtop, ir);
6818 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6820 if (rconstr > comm->cellsize_limit)
6822 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6826 else if (rconstr > 0 && fplog)
6828 /* Here we do not check for dd->bInterCGcons,
6829 * because one can also set a cell size limit for virtual sites only
6830 * and at this point we don't know yet if there are intercg v-sites.
6833 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6836 comm->cellsize_limit = std::max(comm->cellsize_limit, rconstr);
6838 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6842 copy_ivec(nc, dd->nc);
6843 set_dd_dim(fplog, dd);
6844 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, x, ddbox);
6846 if (cr->npmenodes == -1)
6850 acs = average_cellsize_min(dd, ddbox);
6851 if (acs < comm->cellsize_limit)
6855 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6857 gmx_fatal_collective(FARGS, cr, NULL,
6858 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6859 acs, comm->cellsize_limit);
6864 set_ddbox_cr(cr, NULL, ir, box, &comm->cgs_gl, x, ddbox);
6866 /* We need to choose the optimal DD grid and possibly PME nodes */
6867 limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6868 comm->eDLB != edlbNO, dlb_scale,
6869 comm->cellsize_limit, comm->cutoff,
6870 comm->bInterCGBondeds);
6872 if (dd->nc[XX] == 0)
6874 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6875 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
6876 !bC ? "-rdd" : "-rcon",
6877 comm->eDLB != edlbNO ? " or -dds" : "",
6878 bC ? " or your LINCS settings" : "");
6880 gmx_fatal_collective(FARGS, cr, NULL,
6881 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
6883 "Look in the log file for details on the domain decomposition",
6884 cr->nnodes-cr->npmenodes, limit, buf);
6886 set_dd_dim(fplog, dd);
6892 "Domain decomposition grid %d x %d x %d, separate PME ranks %d\n",
6893 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6896 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6897 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6899 gmx_fatal_collective(FARGS, cr, NULL,
6900 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
6901 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6903 if (cr->npmenodes > dd->nnodes)
6905 gmx_fatal_collective(FARGS, cr, NULL,
6906 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6908 if (cr->npmenodes > 0)
6910 comm->npmenodes = cr->npmenodes;
6914 comm->npmenodes = dd->nnodes;
6917 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
6919 /* The following choices should match those
6920 * in comm_cost_est in domdec_setup.c.
6921 * Note that here the checks have to take into account
6922 * that the decomposition might occur in a different order than xyz
6923 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6924 * in which case they will not match those in comm_cost_est,
6925 * but since that is mainly for testing purposes that's fine.
6927 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6928 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6929 getenv("GMX_PMEONEDD") == NULL)
6931 comm->npmedecompdim = 2;
6932 comm->npmenodes_x = dd->nc[XX];
6933 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6937 /* In case nc is 1 in both x and y we could still choose to
6938 * decompose pme in y instead of x, but we use x for simplicity.
6940 comm->npmedecompdim = 1;
6941 if (dd->dim[0] == YY)
6943 comm->npmenodes_x = 1;
6944 comm->npmenodes_y = comm->npmenodes;
6948 comm->npmenodes_x = comm->npmenodes;
6949 comm->npmenodes_y = 1;
6954 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6955 comm->npmenodes_x, comm->npmenodes_y, 1);
6960 comm->npmedecompdim = 0;
6961 comm->npmenodes_x = 0;
6962 comm->npmenodes_y = 0;
6965 /* Technically we don't need both of these,
6966 * but it simplifies code not having to recalculate it.
6968 *npme_x = comm->npmenodes_x;
6969 *npme_y = comm->npmenodes_y;
6971 snew(comm->slb_frac, DIM);
6972 if (comm->eDLB == edlbNO)
6974 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
6975 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
6976 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], sizez);
6979 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6981 if (comm->bBondComm || comm->eDLB != edlbNO)
6983 /* Set the bonded communication distance to halfway
6984 * the minimum and the maximum,
6985 * since the extra communication cost is nearly zero.
6987 acs = average_cellsize_min(dd, ddbox);
6988 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6989 if (comm->eDLB != edlbNO)
6991 /* Check if this does not limit the scaling */
6992 comm->cutoff_mbody = std::min(comm->cutoff_mbody, dlb_scale*acs);
6994 if (!comm->bBondComm)
6996 /* Without bBondComm do not go beyond the n.b. cut-off */
6997 comm->cutoff_mbody = std::min(comm->cutoff_mbody, comm->cutoff);
6998 if (comm->cellsize_limit >= comm->cutoff)
7000 /* We don't loose a lot of efficieny
7001 * when increasing it to the n.b. cut-off.
7002 * It can even be slightly faster, because we need
7003 * less checks for the communication setup.
7005 comm->cutoff_mbody = comm->cutoff;
7008 /* Check if we did not end up below our original limit */
7009 comm->cutoff_mbody = std::max(comm->cutoff_mbody, r_bonded_limit);
7011 if (comm->cutoff_mbody > comm->cellsize_limit)
7013 comm->cellsize_limit = comm->cutoff_mbody;
7016 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
7021 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
7022 "cellsize limit %f\n",
7023 comm->bBondComm, comm->cellsize_limit);
7028 check_dd_restrictions(cr, dd, ir, fplog);
7031 comm->partition_step = INT_MIN;
7034 clear_dd_cycle_counts(dd);
7039 static void set_dlb_limits(gmx_domdec_t *dd)
7044 for (d = 0; d < dd->ndim; d++)
7046 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
7047 dd->comm->cellsize_min[dd->dim[d]] =
7048 dd->comm->cellsize_min_dlb[dd->dim[d]];
7053 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
7056 gmx_domdec_comm_t *comm;
7066 fprintf(fplog, "At step %s the performance loss due to force load imbalance is %.1f %%\n", gmx_step_str(step, buf), dd_force_imb_perf_loss(dd)*100);
7069 cellsize_min = comm->cellsize_min[dd->dim[0]];
7070 for (d = 1; d < dd->ndim; d++)
7072 cellsize_min = std::min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
7075 if (cellsize_min < comm->cellsize_limit*1.05)
7077 dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
7079 /* Change DLB from "auto" to "no". */
7080 comm->eDLB = edlbNO;
7085 dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
7086 comm->bDynLoadBal = TRUE;
7087 dd->bGridJump = TRUE;
7091 /* We can set the required cell size info here,
7092 * so we do not need to communicate this.
7093 * The grid is completely uniform.
7095 for (d = 0; d < dd->ndim; d++)
7099 comm->load[d].sum_m = comm->load[d].sum;
7101 nc = dd->nc[dd->dim[d]];
7102 for (i = 0; i < nc; i++)
7104 comm->root[d]->cell_f[i] = i/(real)nc;
7107 comm->root[d]->cell_f_max0[i] = i /(real)nc;
7108 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
7111 comm->root[d]->cell_f[nc] = 1.0;
7116 static char *init_bLocalCG(gmx_mtop_t *mtop)
7121 ncg = ncg_mtop(mtop);
7122 snew(bLocalCG, ncg);
7123 for (cg = 0; cg < ncg; cg++)
7125 bLocalCG[cg] = FALSE;
7131 void dd_init_bondeds(FILE *fplog,
7132 gmx_domdec_t *dd, gmx_mtop_t *mtop,
7134 t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
7136 gmx_domdec_comm_t *comm;
7138 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
7142 if (comm->bBondComm)
7144 /* Communicate atoms beyond the cut-off for bonded interactions */
7147 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
7149 comm->bLocalCG = init_bLocalCG(mtop);
7153 /* Only communicate atoms based on cut-off */
7154 comm->cglink = NULL;
7155 comm->bLocalCG = NULL;
7159 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
7161 gmx_bool bDynLoadBal, real dlb_scale,
7164 gmx_domdec_comm_t *comm;
7179 fprintf(fplog, "The maximum number of communication pulses is:");
7180 for (d = 0; d < dd->ndim; d++)
7182 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
7184 fprintf(fplog, "\n");
7185 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
7186 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
7187 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
7188 for (d = 0; d < DIM; d++)
7192 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
7199 comm->cellsize_min_dlb[d]/
7200 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
7202 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
7205 fprintf(fplog, "\n");
7209 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
7210 fprintf(fplog, "The initial number of communication pulses is:");
7211 for (d = 0; d < dd->ndim; d++)
7213 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
7215 fprintf(fplog, "\n");
7216 fprintf(fplog, "The initial domain decomposition cell size is:");
7217 for (d = 0; d < DIM; d++)
7221 fprintf(fplog, " %c %.2f nm",
7222 dim2char(d), dd->comm->cellsize_min[d]);
7225 fprintf(fplog, "\n\n");
7228 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
7230 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
7231 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7232 "non-bonded interactions", "", comm->cutoff);
7236 limit = dd->comm->cellsize_limit;
7240 if (dynamic_dd_box(ddbox, ir))
7242 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
7244 limit = dd->comm->cellsize_min[XX];
7245 for (d = 1; d < DIM; d++)
7247 limit = std::min(limit, dd->comm->cellsize_min[d]);
7251 if (comm->bInterCGBondeds)
7253 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7254 "two-body bonded interactions", "(-rdd)",
7255 std::max(comm->cutoff, comm->cutoff_mbody));
7256 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7257 "multi-body bonded interactions", "(-rdd)",
7258 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
7262 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7263 "virtual site constructions", "(-rcon)", limit);
7265 if (dd->constraint_comm)
7267 sprintf(buf, "atoms separated by up to %d constraints",
7269 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7270 buf, "(-rcon)", limit);
7272 fprintf(fplog, "\n");
7278 static void set_cell_limits_dlb(gmx_domdec_t *dd,
7280 const t_inputrec *ir,
7281 const gmx_ddbox_t *ddbox)
7283 gmx_domdec_comm_t *comm;
7284 int d, dim, npulse, npulse_d_max, npulse_d;
7289 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7291 /* Determine the maximum number of comm. pulses in one dimension */
7293 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
7295 /* Determine the maximum required number of grid pulses */
7296 if (comm->cellsize_limit >= comm->cutoff)
7298 /* Only a single pulse is required */
7301 else if (!bNoCutOff && comm->cellsize_limit > 0)
7303 /* We round down slightly here to avoid overhead due to the latency
7304 * of extra communication calls when the cut-off
7305 * would be only slightly longer than the cell size.
7306 * Later cellsize_limit is redetermined,
7307 * so we can not miss interactions due to this rounding.
7309 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7313 /* There is no cell size limit */
7314 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7317 if (!bNoCutOff && npulse > 1)
7319 /* See if we can do with less pulses, based on dlb_scale */
7321 for (d = 0; d < dd->ndim; d++)
7324 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7325 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7326 npulse_d_max = std::max(npulse_d_max, npulse_d);
7328 npulse = std::min(npulse, npulse_d_max);
7331 /* This env var can override npulse */
7332 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
7339 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7340 for (d = 0; d < dd->ndim; d++)
7342 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
7343 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7344 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7345 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
7346 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7348 comm->bVacDLBNoLimit = FALSE;
7352 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7353 if (!comm->bVacDLBNoLimit)
7355 comm->cellsize_limit = std::max(comm->cellsize_limit,
7356 comm->cutoff/comm->maxpulse);
7358 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
7359 /* Set the minimum cell size for each DD dimension */
7360 for (d = 0; d < dd->ndim; d++)
7362 if (comm->bVacDLBNoLimit ||
7363 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7365 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7369 comm->cellsize_min_dlb[dd->dim[d]] =
7370 comm->cutoff/comm->cd[d].np_dlb;
7373 if (comm->cutoff_mbody <= 0)
7375 comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
7377 if (comm->bDynLoadBal)
7383 gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd, int ePBC)
7385 /* If each molecule is a single charge group
7386 * or we use domain decomposition for each periodic dimension,
7387 * we do not need to take pbc into account for the bonded interactions.
7389 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7392 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7395 void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7396 t_inputrec *ir, gmx_ddbox_t *ddbox)
7398 gmx_domdec_comm_t *comm;
7404 /* Initialize the thread data.
7405 * This can not be done in init_domain_decomposition,
7406 * as the numbers of threads is determined later.
7408 comm->nth = gmx_omp_nthreads_get(emntDomdec);
7411 snew(comm->dth, comm->nth);
7414 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
7416 init_ddpme(dd, &comm->ddpme[0], 0);
7417 if (comm->npmedecompdim >= 2)
7419 init_ddpme(dd, &comm->ddpme[1], 1);
7424 comm->npmenodes = 0;
7425 if (dd->pme_nodeid >= 0)
7427 gmx_fatal_collective(FARGS, NULL, dd,
7428 "Can not have separate PME ranks without PME electrostatics");
7434 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7436 if (comm->eDLB != edlbNO)
7438 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7441 print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
7442 if (comm->eDLB == edlbAUTO)
7446 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7448 print_dd_settings(fplog, dd, ir, TRUE, dlb_scale, ddbox);
7451 if (ir->ePBC == epbcNONE)
7453 vol_frac = 1 - 1/(double)dd->nnodes;
7458 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7462 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7464 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7466 dd->ga2la = ga2la_init(natoms_tot, static_cast<int>(vol_frac*natoms_tot));
7469 static gmx_bool test_dd_cutoff(t_commrec *cr,
7470 t_state *state, t_inputrec *ir,
7481 set_ddbox(dd, FALSE, cr, ir, state->box,
7482 TRUE, &dd->comm->cgs_gl, state->x, &ddbox);
7486 for (d = 0; d < dd->ndim; d++)
7490 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7491 if (dynamic_dd_box(&ddbox, ir))
7493 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7496 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7498 if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
7499 dd->comm->cd[d].np_dlb > 0)
7501 if (np > dd->comm->cd[d].np_dlb)
7506 /* If a current local cell size is smaller than the requested
7507 * cut-off, we could still fix it, but this gets very complicated.
7508 * Without fixing here, we might actually need more checks.
7510 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7517 if (dd->comm->eDLB != edlbNO)
7519 /* If DLB is not active yet, we don't need to check the grid jumps.
7520 * Actually we shouldn't, because then the grid jump data is not set.
7522 if (dd->comm->bDynLoadBal &&
7523 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7528 gmx_sumi(1, &LocallyLimited, cr);
7530 if (LocallyLimited > 0)
7539 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, t_inputrec *ir,
7542 gmx_bool bCutoffAllowed;
7544 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7548 cr->dd->comm->cutoff = cutoff_req;
7551 return bCutoffAllowed;
7554 void change_dd_dlb_cutoff_limit(t_commrec *cr)
7556 gmx_domdec_comm_t *comm;
7558 comm = cr->dd->comm;
7560 /* Turn on the DLB limiting (might have been on already) */
7561 comm->bPMELoadBalDLBLimits = TRUE;
7563 /* Change the cut-off limit */
7564 comm->PMELoadBal_max_cutoff = comm->cutoff;
7567 static void merge_cg_buffers(int ncell,
7568 gmx_domdec_comm_dim_t *cd, int pulse,
7570 int *index_gl, int *recv_i,
7571 rvec *cg_cm, rvec *recv_vr,
7573 cginfo_mb_t *cginfo_mb, int *cginfo)
7575 gmx_domdec_ind_t *ind, *ind_p;
7576 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7577 int shift, shift_at;
7579 ind = &cd->ind[pulse];
7581 /* First correct the already stored data */
7582 shift = ind->nrecv[ncell];
7583 for (cell = ncell-1; cell >= 0; cell--)
7585 shift -= ind->nrecv[cell];
7588 /* Move the cg's present from previous grid pulses */
7589 cg0 = ncg_cell[ncell+cell];
7590 cg1 = ncg_cell[ncell+cell+1];
7591 cgindex[cg1+shift] = cgindex[cg1];
7592 for (cg = cg1-1; cg >= cg0; cg--)
7594 index_gl[cg+shift] = index_gl[cg];
7595 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7596 cgindex[cg+shift] = cgindex[cg];
7597 cginfo[cg+shift] = cginfo[cg];
7599 /* Correct the already stored send indices for the shift */
7600 for (p = 1; p <= pulse; p++)
7602 ind_p = &cd->ind[p];
7604 for (c = 0; c < cell; c++)
7606 cg0 += ind_p->nsend[c];
7608 cg1 = cg0 + ind_p->nsend[cell];
7609 for (cg = cg0; cg < cg1; cg++)
7611 ind_p->index[cg] += shift;
7617 /* Merge in the communicated buffers */
7621 for (cell = 0; cell < ncell; cell++)
7623 cg1 = ncg_cell[ncell+cell+1] + shift;
7626 /* Correct the old cg indices */
7627 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7629 cgindex[cg+1] += shift_at;
7632 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7634 /* Copy this charge group from the buffer */
7635 index_gl[cg1] = recv_i[cg0];
7636 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7637 /* Add it to the cgindex */
7638 cg_gl = index_gl[cg1];
7639 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7640 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7641 cgindex[cg1+1] = cgindex[cg1] + nat;
7646 shift += ind->nrecv[cell];
7647 ncg_cell[ncell+cell+1] = cg1;
7651 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7652 int nzone, int cg0, const int *cgindex)
7656 /* Store the atom block boundaries for easy copying of communication buffers
7659 for (zone = 0; zone < nzone; zone++)
7661 for (p = 0; p < cd->np; p++)
7663 cd->ind[p].cell2at0[zone] = cgindex[cg];
7664 cg += cd->ind[p].nrecv[zone];
7665 cd->ind[p].cell2at1[zone] = cgindex[cg];
7670 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7676 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7678 if (!bLocalCG[link->a[i]])
7687 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7689 real c[DIM][4]; /* the corners for the non-bonded communication */
7690 real cr0; /* corner for rounding */
7691 real cr1[4]; /* corners for rounding */
7692 real bc[DIM]; /* corners for bounded communication */
7693 real bcr1; /* corner for rounding for bonded communication */
7696 /* Determine the corners of the domain(s) we are communicating with */
7698 set_dd_corners(const gmx_domdec_t *dd,
7699 int dim0, int dim1, int dim2,
7703 const gmx_domdec_comm_t *comm;
7704 const gmx_domdec_zones_t *zones;
7709 zones = &comm->zones;
7711 /* Keep the compiler happy */
7715 /* The first dimension is equal for all cells */
7716 c->c[0][0] = comm->cell_x0[dim0];
7719 c->bc[0] = c->c[0][0];
7724 /* This cell row is only seen from the first row */
7725 c->c[1][0] = comm->cell_x0[dim1];
7726 /* All rows can see this row */
7727 c->c[1][1] = comm->cell_x0[dim1];
7730 c->c[1][1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7733 /* For the multi-body distance we need the maximum */
7734 c->bc[1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7737 /* Set the upper-right corner for rounding */
7738 c->cr0 = comm->cell_x1[dim0];
7743 for (j = 0; j < 4; j++)
7745 c->c[2][j] = comm->cell_x0[dim2];
7749 /* Use the maximum of the i-cells that see a j-cell */
7750 for (i = 0; i < zones->nizone; i++)
7752 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7757 std::max(c->c[2][j-4],
7758 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7764 /* For the multi-body distance we need the maximum */
7765 c->bc[2] = comm->cell_x0[dim2];
7766 for (i = 0; i < 2; i++)
7768 for (j = 0; j < 2; j++)
7770 c->bc[2] = std::max(c->bc[2], comm->zone_d2[i][j].p1_0);
7776 /* Set the upper-right corner for rounding */
7777 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7778 * Only cell (0,0,0) can see cell 7 (1,1,1)
7780 c->cr1[0] = comm->cell_x1[dim1];
7781 c->cr1[3] = comm->cell_x1[dim1];
7784 c->cr1[0] = std::max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7787 /* For the multi-body distance we need the maximum */
7788 c->bcr1 = std::max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7795 /* Determine which cg's we need to send in this pulse from this zone */
7797 get_zone_pulse_cgs(gmx_domdec_t *dd,
7798 int zonei, int zone,
7800 const int *index_gl,
7802 int dim, int dim_ind,
7803 int dim0, int dim1, int dim2,
7804 real r_comm2, real r_bcomm2,
7808 real skew_fac2_d, real skew_fac_01,
7809 rvec *v_d, rvec *v_0, rvec *v_1,
7810 const dd_corners_t *c,
7812 gmx_bool bDistBonded,
7818 gmx_domdec_ind_t *ind,
7819 int **ibuf, int *ibuf_nalloc,
7825 gmx_domdec_comm_t *comm;
7827 gmx_bool bDistMB_pulse;
7829 real r2, rb2, r, tric_sh;
7832 int nsend_z, nsend, nat;
7836 bScrew = (dd->bScrewPBC && dim == XX);
7838 bDistMB_pulse = (bDistMB && bDistBonded);
7844 for (cg = cg0; cg < cg1; cg++)
7848 if (tric_dist[dim_ind] == 0)
7850 /* Rectangular direction, easy */
7851 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7858 r = cg_cm[cg][dim] - c->bc[dim_ind];
7864 /* Rounding gives at most a 16% reduction
7865 * in communicated atoms
7867 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7869 r = cg_cm[cg][dim0] - c->cr0;
7870 /* This is the first dimension, so always r >= 0 */
7877 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7879 r = cg_cm[cg][dim1] - c->cr1[zone];
7886 r = cg_cm[cg][dim1] - c->bcr1;
7896 /* Triclinic direction, more complicated */
7899 /* Rounding, conservative as the skew_fac multiplication
7900 * will slightly underestimate the distance.
7902 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7904 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7905 for (i = dim0+1; i < DIM; i++)
7907 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7909 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7912 rb[dim0] = rn[dim0];
7915 /* Take care that the cell planes along dim0 might not
7916 * be orthogonal to those along dim1 and dim2.
7918 for (i = 1; i <= dim_ind; i++)
7921 if (normal[dim0][dimd] > 0)
7923 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7926 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7931 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7933 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7935 for (i = dim1+1; i < DIM; i++)
7937 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7939 rn[dim1] += tric_sh;
7942 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7943 /* Take care of coupling of the distances
7944 * to the planes along dim0 and dim1 through dim2.
7946 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7947 /* Take care that the cell planes along dim1
7948 * might not be orthogonal to that along dim2.
7950 if (normal[dim1][dim2] > 0)
7952 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7958 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7961 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7962 /* Take care of coupling of the distances
7963 * to the planes along dim0 and dim1 through dim2.
7965 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7966 /* Take care that the cell planes along dim1
7967 * might not be orthogonal to that along dim2.
7969 if (normal[dim1][dim2] > 0)
7971 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7976 /* The distance along the communication direction */
7977 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
7979 for (i = dim+1; i < DIM; i++)
7981 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7986 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7987 /* Take care of coupling of the distances
7988 * to the planes along dim0 and dim1 through dim2.
7990 if (dim_ind == 1 && zonei == 1)
7992 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
7998 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
8001 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
8002 /* Take care of coupling of the distances
8003 * to the planes along dim0 and dim1 through dim2.
8005 if (dim_ind == 1 && zonei == 1)
8007 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
8015 ((bDistMB && rb2 < r_bcomm2) ||
8016 (bDist2B && r2 < r_bcomm2)) &&
8018 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
8019 missing_link(comm->cglink, index_gl[cg],
8022 /* Make an index to the local charge groups */
8023 if (nsend+1 > ind->nalloc)
8025 ind->nalloc = over_alloc_large(nsend+1);
8026 srenew(ind->index, ind->nalloc);
8028 if (nsend+1 > *ibuf_nalloc)
8030 *ibuf_nalloc = over_alloc_large(nsend+1);
8031 srenew(*ibuf, *ibuf_nalloc);
8033 ind->index[nsend] = cg;
8034 (*ibuf)[nsend] = index_gl[cg];
8036 vec_rvec_check_alloc(vbuf, nsend+1);
8038 if (dd->ci[dim] == 0)
8040 /* Correct cg_cm for pbc */
8041 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
8044 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
8045 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
8050 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
8053 nat += cgindex[cg+1] - cgindex[cg];
8059 *nsend_z_ptr = nsend_z;
8062 static void setup_dd_communication(gmx_domdec_t *dd,
8063 matrix box, gmx_ddbox_t *ddbox,
8064 t_forcerec *fr, t_state *state, rvec **f)
8066 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
8067 int nzone, nzone_send, zone, zonei, cg0, cg1;
8068 int c, i, cg, cg_gl, nrcg;
8069 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
8070 gmx_domdec_comm_t *comm;
8071 gmx_domdec_zones_t *zones;
8072 gmx_domdec_comm_dim_t *cd;
8073 gmx_domdec_ind_t *ind;
8074 cginfo_mb_t *cginfo_mb;
8075 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
8076 real r_comm2, r_bcomm2;
8077 dd_corners_t corners;
8079 rvec *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
8080 real skew_fac2_d, skew_fac_01;
8087 fprintf(debug, "Setting up DD communication\n");
8092 switch (fr->cutoff_scheme)
8101 gmx_incons("unimplemented");
8105 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8107 /* Check if we need to use triclinic distances */
8108 tric_dist[dim_ind] = 0;
8109 for (i = 0; i <= dim_ind; i++)
8111 if (ddbox->tric_dir[dd->dim[i]])
8113 tric_dist[dim_ind] = 1;
8118 bBondComm = comm->bBondComm;
8120 /* Do we need to determine extra distances for multi-body bondeds? */
8121 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8123 /* Do we need to determine extra distances for only two-body bondeds? */
8124 bDist2B = (bBondComm && !bDistMB);
8126 r_comm2 = sqr(comm->cutoff);
8127 r_bcomm2 = sqr(comm->cutoff_mbody);
8131 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, sqrt(r_bcomm2));
8134 zones = &comm->zones;
8137 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8138 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8140 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8142 /* Triclinic stuff */
8143 normal = ddbox->normal;
8147 v_0 = ddbox->v[dim0];
8148 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8150 /* Determine the coupling coefficient for the distances
8151 * to the cell planes along dim0 and dim1 through dim2.
8152 * This is required for correct rounding.
8155 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8158 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8164 v_1 = ddbox->v[dim1];
8167 zone_cg_range = zones->cg_range;
8168 index_gl = dd->index_gl;
8169 cgindex = dd->cgindex;
8170 cginfo_mb = fr->cginfo_mb;
8172 zone_cg_range[0] = 0;
8173 zone_cg_range[1] = dd->ncg_home;
8174 comm->zone_ncg1[0] = dd->ncg_home;
8175 pos_cg = dd->ncg_home;
8177 nat_tot = dd->nat_home;
8179 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8181 dim = dd->dim[dim_ind];
8182 cd = &comm->cd[dim_ind];
8184 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8186 /* No pbc in this dimension, the first node should not comm. */
8194 v_d = ddbox->v[dim];
8195 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
8197 cd->bInPlace = TRUE;
8198 for (p = 0; p < cd->np; p++)
8200 /* Only atoms communicated in the first pulse are used
8201 * for multi-body bonded interactions or for bBondComm.
8203 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8208 for (zone = 0; zone < nzone_send; zone++)
8210 if (tric_dist[dim_ind] && dim_ind > 0)
8212 /* Determine slightly more optimized skew_fac's
8214 * This reduces the number of communicated atoms
8215 * by about 10% for 3D DD of rhombic dodecahedra.
8217 for (dimd = 0; dimd < dim; dimd++)
8219 sf2_round[dimd] = 1;
8220 if (ddbox->tric_dir[dimd])
8222 for (i = dd->dim[dimd]+1; i < DIM; i++)
8224 /* If we are shifted in dimension i
8225 * and the cell plane is tilted forward
8226 * in dimension i, skip this coupling.
8228 if (!(zones->shift[nzone+zone][i] &&
8229 ddbox->v[dimd][i][dimd] >= 0))
8232 sqr(ddbox->v[dimd][i][dimd]);
8235 sf2_round[dimd] = 1/sf2_round[dimd];
8240 zonei = zone_perm[dim_ind][zone];
8243 /* Here we permutate the zones to obtain a convenient order
8244 * for neighbor searching
8246 cg0 = zone_cg_range[zonei];
8247 cg1 = zone_cg_range[zonei+1];
8251 /* Look only at the cg's received in the previous grid pulse
8253 cg1 = zone_cg_range[nzone+zone+1];
8254 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8257 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8258 for (th = 0; th < comm->nth; th++)
8260 gmx_domdec_ind_t *ind_p;
8261 int **ibuf_p, *ibuf_nalloc_p;
8263 int *nsend_p, *nat_p;
8269 /* Thread 0 writes in the comm buffers */
8271 ibuf_p = &comm->buf_int;
8272 ibuf_nalloc_p = &comm->nalloc_int;
8273 vbuf_p = &comm->vbuf;
8276 nsend_zone_p = &ind->nsend[zone];
8280 /* Other threads write into temp buffers */
8281 ind_p = &comm->dth[th].ind;
8282 ibuf_p = &comm->dth[th].ibuf;
8283 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8284 vbuf_p = &comm->dth[th].vbuf;
8285 nsend_p = &comm->dth[th].nsend;
8286 nat_p = &comm->dth[th].nat;
8287 nsend_zone_p = &comm->dth[th].nsend_zone;
8289 comm->dth[th].nsend = 0;
8290 comm->dth[th].nat = 0;
8291 comm->dth[th].nsend_zone = 0;
8301 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8302 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8305 /* Get the cg's for this pulse in this zone */
8306 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8308 dim, dim_ind, dim0, dim1, dim2,
8311 normal, skew_fac2_d, skew_fac_01,
8312 v_d, v_0, v_1, &corners, sf2_round,
8313 bDistBonded, bBondComm,
8317 ibuf_p, ibuf_nalloc_p,
8323 /* Append data of threads>=1 to the communication buffers */
8324 for (th = 1; th < comm->nth; th++)
8326 dd_comm_setup_work_t *dth;
8329 dth = &comm->dth[th];
8331 ns1 = nsend + dth->nsend_zone;
8332 if (ns1 > ind->nalloc)
8334 ind->nalloc = over_alloc_dd(ns1);
8335 srenew(ind->index, ind->nalloc);
8337 if (ns1 > comm->nalloc_int)
8339 comm->nalloc_int = over_alloc_dd(ns1);
8340 srenew(comm->buf_int, comm->nalloc_int);
8342 if (ns1 > comm->vbuf.nalloc)
8344 comm->vbuf.nalloc = over_alloc_dd(ns1);
8345 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8348 for (i = 0; i < dth->nsend_zone; i++)
8350 ind->index[nsend] = dth->ind.index[i];
8351 comm->buf_int[nsend] = dth->ibuf[i];
8352 copy_rvec(dth->vbuf.v[i],
8353 comm->vbuf.v[nsend]);
8357 ind->nsend[zone] += dth->nsend_zone;
8360 /* Clear the counts in case we do not have pbc */
8361 for (zone = nzone_send; zone < nzone; zone++)
8363 ind->nsend[zone] = 0;
8365 ind->nsend[nzone] = nsend;
8366 ind->nsend[nzone+1] = nat;
8367 /* Communicate the number of cg's and atoms to receive */
8368 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8369 ind->nsend, nzone+2,
8370 ind->nrecv, nzone+2);
8372 /* The rvec buffer is also required for atom buffers of size nsend
8373 * in dd_move_x and dd_move_f.
8375 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8379 /* We can receive in place if only the last zone is not empty */
8380 for (zone = 0; zone < nzone-1; zone++)
8382 if (ind->nrecv[zone] > 0)
8384 cd->bInPlace = FALSE;
8389 /* The int buffer is only required here for the cg indices */
8390 if (ind->nrecv[nzone] > comm->nalloc_int2)
8392 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8393 srenew(comm->buf_int2, comm->nalloc_int2);
8395 /* The rvec buffer is also required for atom buffers
8396 * of size nrecv in dd_move_x and dd_move_f.
8398 i = std::max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8399 vec_rvec_check_alloc(&comm->vbuf2, i);
8403 /* Make space for the global cg indices */
8404 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8405 || dd->cg_nalloc == 0)
8407 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8408 srenew(index_gl, dd->cg_nalloc);
8409 srenew(cgindex, dd->cg_nalloc+1);
8411 /* Communicate the global cg indices */
8414 recv_i = index_gl + pos_cg;
8418 recv_i = comm->buf_int2;
8420 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8421 comm->buf_int, nsend,
8422 recv_i, ind->nrecv[nzone]);
8424 /* Make space for cg_cm */
8425 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8426 if (fr->cutoff_scheme == ecutsGROUP)
8434 /* Communicate cg_cm */
8437 recv_vr = cg_cm + pos_cg;
8441 recv_vr = comm->vbuf2.v;
8443 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8444 comm->vbuf.v, nsend,
8445 recv_vr, ind->nrecv[nzone]);
8447 /* Make the charge group index */
8450 zone = (p == 0 ? 0 : nzone - 1);
8451 while (zone < nzone)
8453 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8455 cg_gl = index_gl[pos_cg];
8456 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8457 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8458 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8461 /* Update the charge group presence,
8462 * so we can use it in the next pass of the loop.
8464 comm->bLocalCG[cg_gl] = TRUE;
8470 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8473 zone_cg_range[nzone+zone] = pos_cg;
8478 /* This part of the code is never executed with bBondComm. */
8479 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8480 index_gl, recv_i, cg_cm, recv_vr,
8481 cgindex, fr->cginfo_mb, fr->cginfo);
8482 pos_cg += ind->nrecv[nzone];
8484 nat_tot += ind->nrecv[nzone+1];
8488 /* Store the atom block for easy copying of communication buffers */
8489 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8493 dd->index_gl = index_gl;
8494 dd->cgindex = cgindex;
8496 dd->ncg_tot = zone_cg_range[zones->n];
8497 dd->nat_tot = nat_tot;
8498 comm->nat[ddnatHOME] = dd->nat_home;
8499 for (i = ddnatZONE; i < ddnatNR; i++)
8501 comm->nat[i] = dd->nat_tot;
8506 /* We don't need to update cginfo, since that was alrady done above.
8507 * So we pass NULL for the forcerec.
8509 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8510 NULL, comm->bLocalCG);
8515 fprintf(debug, "Finished setting up DD communication, zones:");
8516 for (c = 0; c < zones->n; c++)
8518 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8520 fprintf(debug, "\n");
8524 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8528 for (c = 0; c < zones->nizone; c++)
8530 zones->izone[c].cg1 = zones->cg_range[c+1];
8531 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8532 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8536 static void set_zones_size(gmx_domdec_t *dd,
8537 matrix box, const gmx_ddbox_t *ddbox,
8538 int zone_start, int zone_end)
8540 gmx_domdec_comm_t *comm;
8541 gmx_domdec_zones_t *zones;
8550 zones = &comm->zones;
8552 /* Do we need to determine extra distances for multi-body bondeds? */
8553 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8555 for (z = zone_start; z < zone_end; z++)
8557 /* Copy cell limits to zone limits.
8558 * Valid for non-DD dims and non-shifted dims.
8560 copy_rvec(comm->cell_x0, zones->size[z].x0);
8561 copy_rvec(comm->cell_x1, zones->size[z].x1);
8564 for (d = 0; d < dd->ndim; d++)
8568 for (z = 0; z < zones->n; z++)
8570 /* With a staggered grid we have different sizes
8571 * for non-shifted dimensions.
8573 if (dd->bGridJump && zones->shift[z][dim] == 0)
8577 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8578 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8582 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8583 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8589 rcmbs = comm->cutoff_mbody;
8590 if (ddbox->tric_dir[dim])
8592 rcs /= ddbox->skew_fac[dim];
8593 rcmbs /= ddbox->skew_fac[dim];
8596 /* Set the lower limit for the shifted zone dimensions */
8597 for (z = zone_start; z < zone_end; z++)
8599 if (zones->shift[z][dim] > 0)
8602 if (!dd->bGridJump || d == 0)
8604 zones->size[z].x0[dim] = comm->cell_x1[dim];
8605 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8609 /* Here we take the lower limit of the zone from
8610 * the lowest domain of the zone below.
8614 zones->size[z].x0[dim] =
8615 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8621 zones->size[z].x0[dim] =
8622 zones->size[zone_perm[2][z-4]].x0[dim];
8626 zones->size[z].x0[dim] =
8627 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8630 /* A temporary limit, is updated below */
8631 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8635 for (zi = 0; zi < zones->nizone; zi++)
8637 if (zones->shift[zi][dim] == 0)
8639 /* This takes the whole zone into account.
8640 * With multiple pulses this will lead
8641 * to a larger zone then strictly necessary.
8643 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
8644 zones->size[zi].x1[dim]+rcmbs);
8652 /* Loop over the i-zones to set the upper limit of each
8655 for (zi = 0; zi < zones->nizone; zi++)
8657 if (zones->shift[zi][dim] == 0)
8659 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8661 if (zones->shift[z][dim] > 0)
8663 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
8664 zones->size[zi].x1[dim]+rcs);
8671 for (z = zone_start; z < zone_end; z++)
8673 /* Initialization only required to keep the compiler happy */
8674 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8677 /* To determine the bounding box for a zone we need to find
8678 * the extreme corners of 4, 2 or 1 corners.
8680 nc = 1 << (ddbox->npbcdim - 1);
8682 for (c = 0; c < nc; c++)
8684 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8688 corner[YY] = zones->size[z].x0[YY];
8692 corner[YY] = zones->size[z].x1[YY];
8696 corner[ZZ] = zones->size[z].x0[ZZ];
8700 corner[ZZ] = zones->size[z].x1[ZZ];
8702 if (dd->ndim == 1 && box[ZZ][YY] != 0)
8704 /* With 1D domain decomposition the cg's are not in
8705 * the triclinic box, but triclinic x-y and rectangular y-z.
8706 * Shift y back, so it will later end up at 0.
8708 corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
8710 /* Apply the triclinic couplings */
8711 assert(ddbox->npbcdim <= DIM);
8712 for (i = YY; i < ddbox->npbcdim; i++)
8714 for (j = XX; j < i; j++)
8716 corner[j] += corner[i]*box[i][j]/box[i][i];
8721 copy_rvec(corner, corner_min);
8722 copy_rvec(corner, corner_max);
8726 for (i = 0; i < DIM; i++)
8728 corner_min[i] = std::min(corner_min[i], corner[i]);
8729 corner_max[i] = std::max(corner_max[i], corner[i]);
8733 /* Copy the extreme cornes without offset along x */
8734 for (i = 0; i < DIM; i++)
8736 zones->size[z].bb_x0[i] = corner_min[i];
8737 zones->size[z].bb_x1[i] = corner_max[i];
8739 /* Add the offset along x */
8740 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8741 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8744 if (zone_start == 0)
8747 for (dim = 0; dim < DIM; dim++)
8749 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8751 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
8756 for (z = zone_start; z < zone_end; z++)
8758 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8760 zones->size[z].x0[XX], zones->size[z].x1[XX],
8761 zones->size[z].x0[YY], zones->size[z].x1[YY],
8762 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8763 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8765 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8766 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8767 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8772 static int comp_cgsort(const void *a, const void *b)
8776 gmx_cgsort_t *cga, *cgb;
8777 cga = (gmx_cgsort_t *)a;
8778 cgb = (gmx_cgsort_t *)b;
8780 comp = cga->nsc - cgb->nsc;
8783 comp = cga->ind_gl - cgb->ind_gl;
8789 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8794 /* Order the data */
8795 for (i = 0; i < n; i++)
8797 buf[i] = a[sort[i].ind];
8800 /* Copy back to the original array */
8801 for (i = 0; i < n; i++)
8807 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8812 /* Order the data */
8813 for (i = 0; i < n; i++)
8815 copy_rvec(v[sort[i].ind], buf[i]);
8818 /* Copy back to the original array */
8819 for (i = 0; i < n; i++)
8821 copy_rvec(buf[i], v[i]);
8825 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8828 int a, atot, cg, cg0, cg1, i;
8830 if (cgindex == NULL)
8832 /* Avoid the useless loop of the atoms within a cg */
8833 order_vec_cg(ncg, sort, v, buf);
8838 /* Order the data */
8840 for (cg = 0; cg < ncg; cg++)
8842 cg0 = cgindex[sort[cg].ind];
8843 cg1 = cgindex[sort[cg].ind+1];
8844 for (i = cg0; i < cg1; i++)
8846 copy_rvec(v[i], buf[a]);
8852 /* Copy back to the original array */
8853 for (a = 0; a < atot; a++)
8855 copy_rvec(buf[a], v[a]);
8859 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8860 int nsort_new, gmx_cgsort_t *sort_new,
8861 gmx_cgsort_t *sort1)
8865 /* The new indices are not very ordered, so we qsort them */
8866 gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8868 /* sort2 is already ordered, so now we can merge the two arrays */
8872 while (i2 < nsort2 || i_new < nsort_new)
8876 sort1[i1++] = sort_new[i_new++];
8878 else if (i_new == nsort_new)
8880 sort1[i1++] = sort2[i2++];
8882 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8883 (sort2[i2].nsc == sort_new[i_new].nsc &&
8884 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8886 sort1[i1++] = sort2[i2++];
8890 sort1[i1++] = sort_new[i_new++];
8895 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8897 gmx_domdec_sort_t *sort;
8898 gmx_cgsort_t *cgsort, *sort_i;
8899 int ncg_new, nsort2, nsort_new, i, *a, moved;
8901 sort = dd->comm->sort;
8903 a = fr->ns.grid->cell_index;
8905 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns.grid->ncells;
8907 if (ncg_home_old >= 0)
8909 /* The charge groups that remained in the same ns grid cell
8910 * are completely ordered. So we can sort efficiently by sorting
8911 * the charge groups that did move into the stationary list.
8916 for (i = 0; i < dd->ncg_home; i++)
8918 /* Check if this cg did not move to another node */
8921 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8923 /* This cg is new on this node or moved ns grid cell */
8924 if (nsort_new >= sort->sort_new_nalloc)
8926 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8927 srenew(sort->sort_new, sort->sort_new_nalloc);
8929 sort_i = &(sort->sort_new[nsort_new++]);
8933 /* This cg did not move */
8934 sort_i = &(sort->sort2[nsort2++]);
8936 /* Sort on the ns grid cell indices
8937 * and the global topology index.
8938 * index_gl is irrelevant with cell ns,
8939 * but we set it here anyhow to avoid a conditional.
8942 sort_i->ind_gl = dd->index_gl[i];
8949 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8952 /* Sort efficiently */
8953 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8958 cgsort = sort->sort;
8960 for (i = 0; i < dd->ncg_home; i++)
8962 /* Sort on the ns grid cell indices
8963 * and the global topology index
8965 cgsort[i].nsc = a[i];
8966 cgsort[i].ind_gl = dd->index_gl[i];
8968 if (cgsort[i].nsc < moved)
8975 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
8977 /* Determine the order of the charge groups using qsort */
8978 gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
8984 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
8987 int ncg_new, i, *a, na;
8989 sort = dd->comm->sort->sort;
8991 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
8994 for (i = 0; i < na; i++)
8998 sort[ncg_new].ind = a[i];
9006 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
9009 gmx_domdec_sort_t *sort;
9010 gmx_cgsort_t *cgsort;
9012 int ncg_new, i, *ibuf, cgsize;
9015 sort = dd->comm->sort;
9017 if (dd->ncg_home > sort->sort_nalloc)
9019 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
9020 srenew(sort->sort, sort->sort_nalloc);
9021 srenew(sort->sort2, sort->sort_nalloc);
9023 cgsort = sort->sort;
9025 switch (fr->cutoff_scheme)
9028 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
9031 ncg_new = dd_sort_order_nbnxn(dd, fr);
9034 gmx_incons("unimplemented");
9038 /* We alloc with the old size, since cgindex is still old */
9039 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9040 vbuf = dd->comm->vbuf.v;
9044 cgindex = dd->cgindex;
9051 /* Remove the charge groups which are no longer at home here */
9052 dd->ncg_home = ncg_new;
9055 fprintf(debug, "Set the new home charge group count to %d\n",
9059 /* Reorder the state */
9060 for (i = 0; i < estNR; i++)
9062 if (EST_DISTR(i) && (state->flags & (1<<i)))
9067 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
9070 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
9073 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->sd_X, vbuf);
9076 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
9080 case estDISRE_INITF:
9081 case estDISRE_RM3TAV:
9082 case estORIRE_INITF:
9084 /* No ordering required */
9087 gmx_incons("Unknown state entry encountered in dd_sort_state");
9092 if (fr->cutoff_scheme == ecutsGROUP)
9095 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9098 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9100 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9101 srenew(sort->ibuf, sort->ibuf_nalloc);
9104 /* Reorder the global cg index */
9105 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9106 /* Reorder the cginfo */
9107 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9108 /* Rebuild the local cg index */
9112 for (i = 0; i < dd->ncg_home; i++)
9114 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9115 ibuf[i+1] = ibuf[i] + cgsize;
9117 for (i = 0; i < dd->ncg_home+1; i++)
9119 dd->cgindex[i] = ibuf[i];
9124 for (i = 0; i < dd->ncg_home+1; i++)
9129 /* Set the home atom number */
9130 dd->nat_home = dd->cgindex[dd->ncg_home];
9132 if (fr->cutoff_scheme == ecutsVERLET)
9134 /* The atoms are now exactly in grid order, update the grid order */
9135 nbnxn_set_atomorder(fr->nbv->nbs);
9139 /* Copy the sorted ns cell indices back to the ns grid struct */
9140 for (i = 0; i < dd->ncg_home; i++)
9142 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
9144 fr->ns.grid->nr = dd->ncg_home;
9148 static void add_dd_statistics(gmx_domdec_t *dd)
9150 gmx_domdec_comm_t *comm;
9155 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9157 comm->sum_nat[ddnat-ddnatZONE] +=
9158 comm->nat[ddnat] - comm->nat[ddnat-1];
9163 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9165 gmx_domdec_comm_t *comm;
9170 /* Reset all the statistics and counters for total run counting */
9171 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9173 comm->sum_nat[ddnat-ddnatZONE] = 0;
9177 comm->load_step = 0;
9180 clear_ivec(comm->load_lim);
9185 void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
9187 gmx_domdec_comm_t *comm;
9191 comm = cr->dd->comm;
9193 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9200 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9202 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9204 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9209 " av. #atoms communicated per step for force: %d x %.1f\n",
9213 if (cr->dd->vsite_comm)
9216 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9217 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9222 if (cr->dd->constraint_comm)
9225 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9226 1 + ir->nLincsIter, av);
9230 gmx_incons(" Unknown type for DD statistics");
9233 fprintf(fplog, "\n");
9235 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9237 print_dd_load_av(fplog, cr->dd);
9241 void dd_partition_system(FILE *fplog,
9244 gmx_bool bMasterState,
9246 t_state *state_global,
9247 gmx_mtop_t *top_global,
9249 t_state *state_local,
9252 gmx_localtop_t *top_local,
9255 gmx_shellfc_t shellfc,
9256 gmx_constr_t constr,
9258 gmx_wallcycle_t wcycle,
9262 gmx_domdec_comm_t *comm;
9263 gmx_ddbox_t ddbox = {0};
9265 gmx_int64_t step_pcoupl;
9266 rvec cell_ns_x0, cell_ns_x1;
9267 int i, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9268 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
9269 gmx_bool bRedist, bSortCG, bResortAll;
9270 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9277 bBoxChanged = (bMasterState || DEFORM(*ir));
9278 if (ir->epc != epcNO)
9280 /* With nstpcouple > 1 pressure coupling happens.
9281 * one step after calculating the pressure.
9282 * Box scaling happens at the end of the MD step,
9283 * after the DD partitioning.
9284 * We therefore have to do DLB in the first partitioning
9285 * after an MD step where P-coupling occured.
9286 * We need to determine the last step in which p-coupling occurred.
9287 * MRS -- need to validate this for vv?
9292 step_pcoupl = step - 1;
9296 step_pcoupl = ((step - 1)/n)*n + 1;
9298 if (step_pcoupl >= comm->partition_step)
9304 bNStGlobalComm = (step % nstglobalcomm == 0);
9306 if (!comm->bDynLoadBal)
9312 /* Should we do dynamic load balacing this step?
9313 * Since it requires (possibly expensive) global communication,
9314 * we might want to do DLB less frequently.
9316 if (bBoxChanged || ir->epc != epcNO)
9318 bDoDLB = bBoxChanged;
9322 bDoDLB = bNStGlobalComm;
9326 /* Check if we have recorded loads on the nodes */
9327 if (comm->bRecordLoad && dd_load_count(comm))
9329 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
9331 /* Check if we should use DLB at the second partitioning
9332 * and every 100 partitionings,
9333 * so the extra communication cost is negligible.
9335 n = std::max(100, nstglobalcomm);
9336 bCheckDLB = (comm->n_load_collect == 0 ||
9337 comm->n_load_have % n == n-1);
9344 /* Print load every nstlog, first and last step to the log file */
9345 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9346 comm->n_load_collect == 0 ||
9348 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9350 /* Avoid extra communication due to verbose screen output
9351 * when nstglobalcomm is set.
9353 if (bDoDLB || bLogLoad || bCheckDLB ||
9354 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9356 get_load_distribution(dd, wcycle);
9361 dd_print_load(fplog, dd, step-1);
9365 dd_print_load_verbose(dd);
9368 comm->n_load_collect++;
9372 /* Since the timings are node dependent, the master decides */
9376 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
9379 fprintf(debug, "step %s, imb loss %f\n",
9380 gmx_step_str(step, sbuf),
9381 dd_force_imb_perf_loss(dd));
9384 dd_bcast(dd, sizeof(bTurnOnDLB), &bTurnOnDLB);
9387 turn_on_dlb(fplog, cr, step);
9392 comm->n_load_have++;
9395 cgs_gl = &comm->cgs_gl;
9400 /* Clear the old state */
9401 clear_dd_indices(dd, 0, 0);
9404 set_ddbox(dd, bMasterState, cr, ir, state_global->box,
9405 TRUE, cgs_gl, state_global->x, &ddbox);
9407 get_cg_distribution(fplog, step, dd, cgs_gl,
9408 state_global->box, &ddbox, state_global->x);
9410 dd_distribute_state(dd, cgs_gl,
9411 state_global, state_local, f);
9413 dd_make_local_cgs(dd, &top_local->cgs);
9415 /* Ensure that we have space for the new distribution */
9416 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9418 if (fr->cutoff_scheme == ecutsGROUP)
9420 calc_cgcm(fplog, 0, dd->ncg_home,
9421 &top_local->cgs, state_local->x, fr->cg_cm);
9424 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9426 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9428 else if (state_local->ddp_count != dd->ddp_count)
9430 if (state_local->ddp_count > dd->ddp_count)
9432 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9435 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9437 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9440 /* Clear the old state */
9441 clear_dd_indices(dd, 0, 0);
9443 /* Build the new indices */
9444 rebuild_cgindex(dd, cgs_gl->index, state_local);
9445 make_dd_indices(dd, cgs_gl->index, 0);
9446 ncgindex_set = dd->ncg_home;
9448 if (fr->cutoff_scheme == ecutsGROUP)
9450 /* Redetermine the cg COMs */
9451 calc_cgcm(fplog, 0, dd->ncg_home,
9452 &top_local->cgs, state_local->x, fr->cg_cm);
9455 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9457 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9459 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9460 TRUE, &top_local->cgs, state_local->x, &ddbox);
9462 bRedist = comm->bDynLoadBal;
9466 /* We have the full state, only redistribute the cgs */
9468 /* Clear the non-home indices */
9469 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9472 /* Avoid global communication for dim's without pbc and -gcom */
9473 if (!bNStGlobalComm)
9475 copy_rvec(comm->box0, ddbox.box0 );
9476 copy_rvec(comm->box_size, ddbox.box_size);
9478 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9479 bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
9484 /* For dim's without pbc and -gcom */
9485 copy_rvec(ddbox.box0, comm->box0 );
9486 copy_rvec(ddbox.box_size, comm->box_size);
9488 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9491 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9493 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9496 /* Check if we should sort the charge groups */
9497 if (comm->nstSortCG > 0)
9499 bSortCG = (bMasterState ||
9500 (bRedist && (step % comm->nstSortCG == 0)));
9507 ncg_home_old = dd->ncg_home;
9512 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9514 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9516 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9518 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9521 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9523 &comm->cell_x0, &comm->cell_x1,
9524 dd->ncg_home, fr->cg_cm,
9525 cell_ns_x0, cell_ns_x1, &grid_density);
9529 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9532 switch (fr->cutoff_scheme)
9535 copy_ivec(fr->ns.grid->n, ncells_old);
9536 grid_first(fplog, fr->ns.grid, dd, &ddbox,
9537 state_local->box, cell_ns_x0, cell_ns_x1,
9538 fr->rlistlong, grid_density);
9541 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9544 gmx_incons("unimplemented");
9546 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9547 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9551 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9553 /* Sort the state on charge group position.
9554 * This enables exact restarts from this step.
9555 * It also improves performance by about 15% with larger numbers
9556 * of atoms per node.
9559 /* Fill the ns grid with the home cell,
9560 * so we can sort with the indices.
9562 set_zones_ncg_home(dd);
9564 switch (fr->cutoff_scheme)
9567 set_zones_size(dd, state_local->box, &ddbox, 0, 1);
9569 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9571 comm->zones.size[0].bb_x0,
9572 comm->zones.size[0].bb_x1,
9574 comm->zones.dens_zone0,
9577 ncg_moved, bRedist ? comm->moved : NULL,
9578 fr->nbv->grp[eintLocal].kernel_type,
9579 fr->nbv->grp[eintLocal].nbat);
9581 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9584 fill_grid(&comm->zones, fr->ns.grid, dd->ncg_home,
9585 0, dd->ncg_home, fr->cg_cm);
9587 copy_ivec(fr->ns.grid->n, ncells_new);
9590 gmx_incons("unimplemented");
9593 bResortAll = bMasterState;
9595 /* Check if we can user the old order and ns grid cell indices
9596 * of the charge groups to sort the charge groups efficiently.
9598 if (ncells_new[XX] != ncells_old[XX] ||
9599 ncells_new[YY] != ncells_old[YY] ||
9600 ncells_new[ZZ] != ncells_old[ZZ])
9607 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9608 gmx_step_str(step, sbuf), dd->ncg_home);
9610 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9611 bResortAll ? -1 : ncg_home_old);
9612 /* Rebuild all the indices */
9613 ga2la_clear(dd->ga2la);
9616 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9619 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9621 /* Setup up the communication and communicate the coordinates */
9622 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9624 /* Set the indices */
9625 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9627 /* Set the charge group boundaries for neighbor searching */
9628 set_cg_boundaries(&comm->zones);
9630 if (fr->cutoff_scheme == ecutsVERLET)
9632 set_zones_size(dd, state_local->box, &ddbox,
9633 bSortCG ? 1 : 0, comm->zones.n);
9636 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9639 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9640 -1,state_local->x,state_local->box);
9643 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9645 /* Extract a local topology from the global topology */
9646 for (i = 0; i < dd->ndim; i++)
9648 np[dd->dim[i]] = comm->cd[i].np;
9650 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9651 comm->cellsize_min, np,
9653 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
9654 vsite, top_global, top_local);
9656 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9658 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9660 /* Set up the special atom communication */
9661 n = comm->nat[ddnatZONE];
9662 for (i = ddnatZONE+1; i < ddnatNR; i++)
9667 if (vsite && vsite->n_intercg_vsite)
9669 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9673 if (dd->bInterCGcons || dd->bInterCGsettles)
9675 /* Only for inter-cg constraints we need special code */
9676 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9677 constr, ir->nProjOrder,
9678 top_local->idef.il);
9682 gmx_incons("Unknown special atom type setup");
9687 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9689 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9691 /* Make space for the extra coordinates for virtual site
9692 * or constraint communication.
9694 state_local->natoms = comm->nat[ddnatNR-1];
9695 if (state_local->natoms > state_local->nalloc)
9697 dd_realloc_state(state_local, f, state_local->natoms);
9700 if (fr->bF_NoVirSum)
9702 if (vsite && vsite->n_intercg_vsite)
9704 nat_f_novirsum = comm->nat[ddnatVSITE];
9708 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9710 nat_f_novirsum = dd->nat_tot;
9714 nat_f_novirsum = dd->nat_home;
9723 /* Set the number of atoms required for the force calculation.
9724 * Forces need to be constrained when using a twin-range setup
9725 * or with energy minimization. For simple simulations we could
9726 * avoid some allocation, zeroing and copying, but this is
9727 * probably not worth the complications ande checking.
9729 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9730 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9732 /* We make the all mdatoms up to nat_tot_con.
9733 * We could save some work by only setting invmass
9734 * between nat_tot and nat_tot_con.
9736 /* This call also sets the new number of home particles to dd->nat_home */
9737 atoms2md(top_global, ir,
9738 comm->nat[ddnatCON], dd->gatindex, dd->nat_home, mdatoms);
9740 /* Now we have the charges we can sort the FE interactions */
9741 dd_sort_local_top(dd, mdatoms, top_local);
9745 /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
9746 split_vsites_over_threads(top_local->idef.il, top_local->idef.iparams,
9747 mdatoms, FALSE, vsite);
9752 /* Make the local shell stuff, currently no communication is done */
9753 make_local_shells(cr, mdatoms, shellfc);
9756 if (ir->implicit_solvent)
9758 make_local_gb(cr, fr->born, ir->gb_algorithm);
9761 setup_bonded_threading(fr, &top_local->idef);
9763 if (!(cr->duty & DUTY_PME))
9765 /* Send the charges and/or c6/sigmas to our PME only node */
9766 gmx_pme_send_parameters(cr,
9768 mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
9769 mdatoms->chargeA, mdatoms->chargeB,
9770 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
9771 mdatoms->sigmaA, mdatoms->sigmaB,
9772 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9777 set_constraints(constr, top_local, ir, mdatoms, cr);
9780 if (ir->ePull != epullNO)
9782 /* Update the local pull groups */
9783 dd_make_local_pull_groups(dd, ir->pull, mdatoms);
9788 /* Update the local rotation groups */
9789 dd_make_local_rotation_groups(dd, ir->rot);
9792 if (ir->eSwapCoords != eswapNO)
9794 /* Update the local groups needed for ion swapping */
9795 dd_make_local_swap_groups(dd, ir->swap);
9798 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
9799 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
9801 add_dd_statistics(dd);
9803 /* Make sure we only count the cycles for this DD partitioning */
9804 clear_dd_cycle_counts(dd);
9806 /* Because the order of the atoms might have changed since
9807 * the last vsite construction, we need to communicate the constructing
9808 * atom coordinates again (for spreading the forces this MD step).
9810 dd_move_x_vsites(dd, state_local->box, state_local->x);
9812 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9814 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9816 dd_move_x(dd, state_local->box, state_local->x);
9817 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9818 -1, state_local->x, state_local->box);
9821 /* Store the partitioning step */
9822 comm->partition_step = step;
9824 /* Increase the DD partitioning counter */
9826 /* The state currently matches this DD partitioning count, store it */
9827 state_local->ddp_count = dd->ddp_count;
9830 /* The DD master node knows the complete cg distribution,
9831 * store the count so we can possibly skip the cg info communication.
9833 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9836 if (comm->DD_debug > 0)
9838 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9839 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9840 "after partitioning");