2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 #include "gromacs/legacyheaders/domdec.h"
49 #include "gromacs/bonded/bonded.h"
50 #include "gromacs/fileio/gmxfio.h"
51 #include "gromacs/fileio/pdbio.h"
52 #include "gromacs/imd/imd.h"
53 #include "gromacs/legacyheaders/bonded-threading.h"
54 #include "gromacs/legacyheaders/chargegroup.h"
55 #include "gromacs/legacyheaders/constr.h"
56 #include "gromacs/legacyheaders/domdec_network.h"
57 #include "gromacs/legacyheaders/force.h"
58 #include "gromacs/legacyheaders/gmx_ga2la.h"
59 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
60 #include "gromacs/legacyheaders/gpu_utils.h"
61 #include "gromacs/legacyheaders/macros.h"
62 #include "gromacs/legacyheaders/mdatoms.h"
63 #include "gromacs/legacyheaders/mdrun.h"
64 #include "gromacs/legacyheaders/names.h"
65 #include "gromacs/legacyheaders/network.h"
66 #include "gromacs/legacyheaders/nrnb.h"
67 #include "gromacs/legacyheaders/nsgrid.h"
68 #include "gromacs/legacyheaders/pme.h"
69 #include "gromacs/legacyheaders/shellfc.h"
70 #include "gromacs/legacyheaders/typedefs.h"
71 #include "gromacs/math/vec.h"
72 #include "gromacs/mdlib/nb_verlet.h"
73 #include "gromacs/mdlib/nbnxn_search.h"
74 #include "gromacs/pbcutil/ishift.h"
75 #include "gromacs/pbcutil/pbc.h"
76 #include "gromacs/pulling/pull.h"
77 #include "gromacs/pulling/pull_rotation.h"
78 #include "gromacs/swap/swapcoords.h"
79 #include "gromacs/timing/wallcycle.h"
80 #include "gromacs/topology/mtop_util.h"
81 #include "gromacs/utility/basenetwork.h"
82 #include "gromacs/utility/fatalerror.h"
83 #include "gromacs/utility/futil.h"
84 #include "gromacs/utility/gmxmpi.h"
85 #include "gromacs/utility/qsort_threadsafe.h"
86 #include "gromacs/utility/smalloc.h"
88 #define DDRANK(dd, rank) (rank)
89 #define DDMASTERRANK(dd) (dd->masterrank)
91 typedef struct gmx_domdec_master
93 /* The cell boundaries */
95 /* The global charge group division */
96 int *ncg; /* Number of home charge groups for each node */
97 int *index; /* Index of nnodes+1 into cg */
98 int *cg; /* Global charge group index */
99 int *nat; /* Number of home atoms for each node. */
100 int *ibuf; /* Buffer for communication */
101 rvec *vbuf; /* Buffer for state scattering and gathering */
102 } gmx_domdec_master_t;
106 /* The numbers of charge groups to send and receive for each cell
107 * that requires communication, the last entry contains the total
108 * number of atoms that needs to be communicated.
110 int nsend[DD_MAXIZONE+2];
111 int nrecv[DD_MAXIZONE+2];
112 /* The charge groups to send */
115 /* The atom range for non-in-place communication */
116 int cell2at0[DD_MAXIZONE];
117 int cell2at1[DD_MAXIZONE];
122 int np; /* Number of grid pulses in this dimension */
123 int np_dlb; /* For dlb, for use with edlbAUTO */
124 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
126 gmx_bool bInPlace; /* Can we communicate in place? */
127 } gmx_domdec_comm_dim_t;
131 gmx_bool *bCellMin; /* Temp. var.: is this cell size at the limit */
132 real *cell_f; /* State var.: cell boundaries, box relative */
133 real *old_cell_f; /* Temp. var.: old cell size */
134 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
135 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
136 real *bound_min; /* Temp. var.: lower limit for cell boundary */
137 real *bound_max; /* Temp. var.: upper limit for cell boundary */
138 gmx_bool bLimited; /* State var.: is DLB limited in this dim and row */
139 real *buf_ncd; /* Temp. var. */
142 #define DD_NLOAD_MAX 9
144 /* Here floats are accurate enough, since these variables
145 * only influence the load balancing, not the actual MD results.
172 gmx_cgsort_t *sort_new;
184 /* This enum determines the order of the coordinates.
185 * ddnatHOME and ddnatZONE should be first and second,
186 * the others can be ordered as wanted.
189 ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR
193 edlbAUTO, edlbNO, edlbYES, edlbNR
195 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
199 int dim; /* The dimension */
200 gmx_bool dim_match; /* Tells if DD and PME dims match */
201 int nslab; /* The number of PME slabs in this dimension */
202 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
203 int *pp_min; /* The minimum pp node location, size nslab */
204 int *pp_max; /* The maximum pp node location,size nslab */
205 int maxshift; /* The maximum shift for coordinate redistribution in PME */
210 real min0; /* The minimum bottom of this zone */
211 real max1; /* The maximum top of this zone */
212 real min1; /* The minimum top of this zone */
213 real mch0; /* The maximum bottom communicaton height for this zone */
214 real mch1; /* The maximum top communicaton height for this zone */
215 real p1_0; /* The bottom value of the first cell in this zone */
216 real p1_1; /* The top value of the first cell in this zone */
221 gmx_domdec_ind_t ind;
228 } dd_comm_setup_work_t;
230 typedef struct gmx_domdec_comm
232 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
233 * unless stated otherwise.
236 /* The number of decomposition dimensions for PME, 0: no PME */
238 /* The number of nodes doing PME (PP/PME or only PME) */
242 /* The communication setup including the PME only nodes */
243 gmx_bool bCartesianPP_PME;
246 int *pmenodes; /* size npmenodes */
247 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
248 * but with bCartesianPP_PME */
249 gmx_ddpme_t ddpme[2];
251 /* The DD particle-particle nodes only */
252 gmx_bool bCartesianPP;
253 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
255 /* The global charge groups */
258 /* Should we sort the cgs */
260 gmx_domdec_sort_t *sort;
262 /* Are there charge groups? */
265 /* Are there bonded and multi-body interactions between charge groups? */
266 gmx_bool bInterCGBondeds;
267 gmx_bool bInterCGMultiBody;
269 /* Data for the optional bonded interaction atom communication range */
276 /* Are we actually using DLB? */
277 gmx_bool bDynLoadBal;
279 /* Cell sizes for static load balancing, first index cartesian */
282 /* The width of the communicated boundaries */
285 /* The minimum cell size (including triclinic correction) */
287 /* For dlb, for use with edlbAUTO */
288 rvec cellsize_min_dlb;
289 /* The lower limit for the DD cell size with DLB */
291 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
292 gmx_bool bVacDLBNoLimit;
294 /* With PME load balancing we set limits on DLB */
295 gmx_bool bPMELoadBalDLBLimits;
296 /* DLB needs to take into account that we want to allow this maximum
297 * cut-off (for PME load balancing), this could limit cell boundaries.
299 real PMELoadBal_max_cutoff;
301 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
303 /* box0 and box_size are required with dim's without pbc and -gcom */
307 /* The cell boundaries */
311 /* The old location of the cell boundaries, to check cg displacements */
315 /* The communication setup and charge group boundaries for the zones */
316 gmx_domdec_zones_t zones;
318 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
319 * cell boundaries of neighboring cells for dynamic load balancing.
321 gmx_ddzone_t zone_d1[2];
322 gmx_ddzone_t zone_d2[2][2];
324 /* The coordinate/force communication setup and indices */
325 gmx_domdec_comm_dim_t cd[DIM];
326 /* The maximum number of cells to communicate with in one dimension */
329 /* Which cg distribution is stored on the master node */
330 int master_cg_ddp_count;
332 /* The number of cg's received from the direct neighbors */
333 int zone_ncg1[DD_MAXZONE];
335 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
338 /* Array for signalling if atoms have moved to another domain */
342 /* Communication buffer for general use */
346 /* Communication buffer for general use */
349 /* Temporary storage for thread parallel communication setup */
351 dd_comm_setup_work_t *dth;
353 /* Communication buffers only used with multiple grid pulses */
358 /* Communication buffers for local redistribution */
360 int cggl_flag_nalloc[DIM*2];
362 int cgcm_state_nalloc[DIM*2];
364 /* Cell sizes for dynamic load balancing */
365 gmx_domdec_root_t **root;
369 real cell_f_max0[DIM];
370 real cell_f_min1[DIM];
372 /* Stuff for load communication */
373 gmx_bool bRecordLoad;
374 gmx_domdec_load_t *load;
375 int nrank_gpu_shared;
377 MPI_Comm *mpi_comm_load;
378 MPI_Comm mpi_comm_gpu_shared;
381 /* Maximum DLB scaling per load balancing step in percent */
385 float cycl[ddCyclNr];
386 int cycl_n[ddCyclNr];
387 float cycl_max[ddCyclNr];
388 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
392 /* Have often have did we have load measurements */
394 /* Have often have we collected the load measurements */
398 double sum_nat[ddnatNR-ddnatZONE];
408 /* The last partition step */
409 gmx_int64_t partition_step;
417 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
420 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
421 #define DD_FLAG_NRCG 65535
422 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
423 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
425 /* Zone permutation required to obtain consecutive charge groups
426 * for neighbor searching.
428 static const int zone_perm[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
430 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
431 * components see only j zones with that component 0.
434 /* The DD zone order */
435 static const ivec dd_zo[DD_MAXZONE] =
436 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
441 static const ivec dd_zp3[dd_zp3n] = {{0, 0, 8}, {1, 3, 6}, {2, 5, 6}, {3, 5, 7}};
446 static const ivec dd_zp2[dd_zp2n] = {{0, 0, 4}, {1, 3, 4}};
451 static const ivec dd_zp1[dd_zp1n] = {{0, 0, 2}};
453 /* Factors used to avoid problems due to rounding issues */
454 #define DD_CELL_MARGIN 1.0001
455 #define DD_CELL_MARGIN2 1.00005
456 /* Factor to account for pressure scaling during nstlist steps */
457 #define DD_PRES_SCALE_MARGIN 1.02
459 /* Turn on DLB when the load imbalance causes this amount of total loss.
460 * There is a bit of overhead with DLB and it's difficult to achieve
461 * a load imbalance of less than 2% with DLB.
463 #define DD_PERF_LOSS_DLB_ON 0.02
465 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
466 #define DD_PERF_LOSS_WARN 0.05
468 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
470 /* Use separate MPI send and receive commands
471 * when nnodes <= GMX_DD_NNODES_SENDRECV.
472 * This saves memory (and some copying for small nnodes).
473 * For high parallelization scatter and gather calls are used.
475 #define GMX_DD_NNODES_SENDRECV 4
479 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
481 static void index2xyz(ivec nc,int ind,ivec xyz)
483 xyz[XX] = ind % nc[XX];
484 xyz[YY] = (ind / nc[XX]) % nc[YY];
485 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
489 /* This order is required to minimize the coordinate communication in PME
490 * which uses decomposition in the x direction.
492 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
494 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
496 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
497 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
498 xyz[ZZ] = ind % nc[ZZ];
501 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
506 ddindex = dd_index(dd->nc, c);
507 if (dd->comm->bCartesianPP_PME)
509 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
511 else if (dd->comm->bCartesianPP)
514 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
525 static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox, t_inputrec *ir)
527 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
530 int ddglatnr(gmx_domdec_t *dd, int i)
540 if (i >= dd->comm->nat[ddnatNR-1])
542 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
544 atnr = dd->gatindex[i] + 1;
550 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
552 return &dd->comm->cgs_gl;
555 static void vec_rvec_init(vec_rvec_t *v)
561 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
565 v->nalloc = over_alloc_dd(n);
566 srenew(v->v, v->nalloc);
570 void dd_store_state(gmx_domdec_t *dd, t_state *state)
574 if (state->ddp_count != dd->ddp_count)
576 gmx_incons("The state does not the domain decomposition state");
579 state->ncg_gl = dd->ncg_home;
580 if (state->ncg_gl > state->cg_gl_nalloc)
582 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
583 srenew(state->cg_gl, state->cg_gl_nalloc);
585 for (i = 0; i < state->ncg_gl; i++)
587 state->cg_gl[i] = dd->index_gl[i];
590 state->ddp_count_cg_gl = dd->ddp_count;
593 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
595 return &dd->comm->zones;
598 void dd_get_ns_ranges(gmx_domdec_t *dd, int icg,
599 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
601 gmx_domdec_zones_t *zones;
604 zones = &dd->comm->zones;
607 while (icg >= zones->izone[izone].cg1)
616 else if (izone < zones->nizone)
618 *jcg0 = zones->izone[izone].jcg0;
622 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
623 icg, izone, zones->nizone);
626 *jcg1 = zones->izone[izone].jcg1;
628 for (d = 0; d < dd->ndim; d++)
631 shift0[dim] = zones->izone[izone].shift0[dim];
632 shift1[dim] = zones->izone[izone].shift1[dim];
633 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
635 /* A conservative approach, this can be optimized */
642 int dd_natoms_vsite(gmx_domdec_t *dd)
644 return dd->comm->nat[ddnatVSITE];
647 void dd_get_constraint_range(gmx_domdec_t *dd, int *at_start, int *at_end)
649 *at_start = dd->comm->nat[ddnatCON-1];
650 *at_end = dd->comm->nat[ddnatCON];
653 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
655 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
656 int *index, *cgindex;
657 gmx_domdec_comm_t *comm;
658 gmx_domdec_comm_dim_t *cd;
659 gmx_domdec_ind_t *ind;
660 rvec shift = {0, 0, 0}, *buf, *rbuf;
661 gmx_bool bPBC, bScrew;
665 cgindex = dd->cgindex;
670 nat_tot = dd->nat_home;
671 for (d = 0; d < dd->ndim; d++)
673 bPBC = (dd->ci[dd->dim[d]] == 0);
674 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
677 copy_rvec(box[dd->dim[d]], shift);
680 for (p = 0; p < cd->np; p++)
687 for (i = 0; i < ind->nsend[nzone]; i++)
689 at0 = cgindex[index[i]];
690 at1 = cgindex[index[i]+1];
691 for (j = at0; j < at1; j++)
693 copy_rvec(x[j], buf[n]);
700 for (i = 0; i < ind->nsend[nzone]; i++)
702 at0 = cgindex[index[i]];
703 at1 = cgindex[index[i]+1];
704 for (j = at0; j < at1; j++)
706 /* We need to shift the coordinates */
707 rvec_add(x[j], shift, buf[n]);
714 for (i = 0; i < ind->nsend[nzone]; i++)
716 at0 = cgindex[index[i]];
717 at1 = cgindex[index[i]+1];
718 for (j = at0; j < at1; j++)
721 buf[n][XX] = x[j][XX] + shift[XX];
723 * This operation requires a special shift force
724 * treatment, which is performed in calc_vir.
726 buf[n][YY] = box[YY][YY] - x[j][YY];
727 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
739 rbuf = comm->vbuf2.v;
741 /* Send and receive the coordinates */
742 dd_sendrecv_rvec(dd, d, dddirBackward,
743 buf, ind->nsend[nzone+1],
744 rbuf, ind->nrecv[nzone+1]);
748 for (zone = 0; zone < nzone; zone++)
750 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
752 copy_rvec(rbuf[j], x[i]);
757 nat_tot += ind->nrecv[nzone+1];
763 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
765 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
766 int *index, *cgindex;
767 gmx_domdec_comm_t *comm;
768 gmx_domdec_comm_dim_t *cd;
769 gmx_domdec_ind_t *ind;
773 gmx_bool bPBC, bScrew;
777 cgindex = dd->cgindex;
782 nzone = comm->zones.n/2;
783 nat_tot = dd->nat_tot;
784 for (d = dd->ndim-1; d >= 0; d--)
786 bPBC = (dd->ci[dd->dim[d]] == 0);
787 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
788 if (fshift == NULL && !bScrew)
792 /* Determine which shift vector we need */
798 for (p = cd->np-1; p >= 0; p--)
801 nat_tot -= ind->nrecv[nzone+1];
808 sbuf = comm->vbuf2.v;
810 for (zone = 0; zone < nzone; zone++)
812 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
814 copy_rvec(f[i], sbuf[j]);
819 /* Communicate the forces */
820 dd_sendrecv_rvec(dd, d, dddirForward,
821 sbuf, ind->nrecv[nzone+1],
822 buf, ind->nsend[nzone+1]);
824 /* Add the received forces */
828 for (i = 0; i < ind->nsend[nzone]; i++)
830 at0 = cgindex[index[i]];
831 at1 = cgindex[index[i]+1];
832 for (j = at0; j < at1; j++)
834 rvec_inc(f[j], buf[n]);
841 for (i = 0; i < ind->nsend[nzone]; i++)
843 at0 = cgindex[index[i]];
844 at1 = cgindex[index[i]+1];
845 for (j = at0; j < at1; j++)
847 rvec_inc(f[j], buf[n]);
848 /* Add this force to the shift force */
849 rvec_inc(fshift[is], buf[n]);
856 for (i = 0; i < ind->nsend[nzone]; i++)
858 at0 = cgindex[index[i]];
859 at1 = cgindex[index[i]+1];
860 for (j = at0; j < at1; j++)
862 /* Rotate the force */
863 f[j][XX] += buf[n][XX];
864 f[j][YY] -= buf[n][YY];
865 f[j][ZZ] -= buf[n][ZZ];
868 /* Add this force to the shift force */
869 rvec_inc(fshift[is], buf[n]);
880 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
882 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
883 int *index, *cgindex;
884 gmx_domdec_comm_t *comm;
885 gmx_domdec_comm_dim_t *cd;
886 gmx_domdec_ind_t *ind;
891 cgindex = dd->cgindex;
893 buf = &comm->vbuf.v[0][0];
896 nat_tot = dd->nat_home;
897 for (d = 0; d < dd->ndim; d++)
900 for (p = 0; p < cd->np; p++)
905 for (i = 0; i < ind->nsend[nzone]; i++)
907 at0 = cgindex[index[i]];
908 at1 = cgindex[index[i]+1];
909 for (j = at0; j < at1; j++)
922 rbuf = &comm->vbuf2.v[0][0];
924 /* Send and receive the coordinates */
925 dd_sendrecv_real(dd, d, dddirBackward,
926 buf, ind->nsend[nzone+1],
927 rbuf, ind->nrecv[nzone+1]);
931 for (zone = 0; zone < nzone; zone++)
933 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
940 nat_tot += ind->nrecv[nzone+1];
946 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
948 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
949 int *index, *cgindex;
950 gmx_domdec_comm_t *comm;
951 gmx_domdec_comm_dim_t *cd;
952 gmx_domdec_ind_t *ind;
957 cgindex = dd->cgindex;
959 buf = &comm->vbuf.v[0][0];
962 nzone = comm->zones.n/2;
963 nat_tot = dd->nat_tot;
964 for (d = dd->ndim-1; d >= 0; d--)
967 for (p = cd->np-1; p >= 0; p--)
970 nat_tot -= ind->nrecv[nzone+1];
977 sbuf = &comm->vbuf2.v[0][0];
979 for (zone = 0; zone < nzone; zone++)
981 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
988 /* Communicate the forces */
989 dd_sendrecv_real(dd, d, dddirForward,
990 sbuf, ind->nrecv[nzone+1],
991 buf, ind->nsend[nzone+1]);
993 /* Add the received forces */
995 for (i = 0; i < ind->nsend[nzone]; i++)
997 at0 = cgindex[index[i]];
998 at1 = cgindex[index[i]+1];
999 for (j = at0; j < at1; j++)
1010 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
1012 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
1014 zone->min0, zone->max1,
1015 zone->mch0, zone->mch0,
1016 zone->p1_0, zone->p1_1);
1020 #define DDZONECOMM_MAXZONE 5
1021 #define DDZONECOMM_BUFSIZE 3
1023 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
1024 int ddimind, int direction,
1025 gmx_ddzone_t *buf_s, int n_s,
1026 gmx_ddzone_t *buf_r, int n_r)
1028 #define ZBS DDZONECOMM_BUFSIZE
1029 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
1030 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
1033 for (i = 0; i < n_s; i++)
1035 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
1036 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
1037 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
1038 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
1039 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
1040 vbuf_s[i*ZBS+1][2] = 0;
1041 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
1042 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
1043 vbuf_s[i*ZBS+2][2] = 0;
1046 dd_sendrecv_rvec(dd, ddimind, direction,
1050 for (i = 0; i < n_r; i++)
1052 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
1053 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
1054 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
1055 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
1056 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
1057 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
1058 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
1064 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
1065 rvec cell_ns_x0, rvec cell_ns_x1)
1067 int d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
1069 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
1070 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
1071 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
1072 rvec extr_s[2], extr_r[2];
1074 real dist_d, c = 0, det;
1075 gmx_domdec_comm_t *comm;
1076 gmx_bool bPBC, bUse;
1080 for (d = 1; d < dd->ndim; d++)
1083 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
1084 zp->min0 = cell_ns_x0[dim];
1085 zp->max1 = cell_ns_x1[dim];
1086 zp->min1 = cell_ns_x1[dim];
1087 zp->mch0 = cell_ns_x0[dim];
1088 zp->mch1 = cell_ns_x1[dim];
1089 zp->p1_0 = cell_ns_x0[dim];
1090 zp->p1_1 = cell_ns_x1[dim];
1093 for (d = dd->ndim-2; d >= 0; d--)
1096 bPBC = (dim < ddbox->npbcdim);
1098 /* Use an rvec to store two reals */
1099 extr_s[d][0] = comm->cell_f0[d+1];
1100 extr_s[d][1] = comm->cell_f1[d+1];
1101 extr_s[d][2] = comm->cell_f1[d+1];
1104 /* Store the extremes in the backward sending buffer,
1105 * so the get updated separately from the forward communication.
1107 for (d1 = d; d1 < dd->ndim-1; d1++)
1109 /* We invert the order to be able to use the same loop for buf_e */
1110 buf_s[pos].min0 = extr_s[d1][1];
1111 buf_s[pos].max1 = extr_s[d1][0];
1112 buf_s[pos].min1 = extr_s[d1][2];
1113 buf_s[pos].mch0 = 0;
1114 buf_s[pos].mch1 = 0;
1115 /* Store the cell corner of the dimension we communicate along */
1116 buf_s[pos].p1_0 = comm->cell_x0[dim];
1117 buf_s[pos].p1_1 = 0;
1121 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1124 if (dd->ndim == 3 && d == 0)
1126 buf_s[pos] = comm->zone_d2[0][1];
1128 buf_s[pos] = comm->zone_d1[0];
1132 /* We only need to communicate the extremes
1133 * in the forward direction
1135 npulse = comm->cd[d].np;
1138 /* Take the minimum to avoid double communication */
1139 npulse_min = min(npulse, dd->nc[dim]-1-npulse);
1143 /* Without PBC we should really not communicate over
1144 * the boundaries, but implementing that complicates
1145 * the communication setup and therefore we simply
1146 * do all communication, but ignore some data.
1148 npulse_min = npulse;
1150 for (p = 0; p < npulse_min; p++)
1152 /* Communicate the extremes forward */
1153 bUse = (bPBC || dd->ci[dim] > 0);
1155 dd_sendrecv_rvec(dd, d, dddirForward,
1156 extr_s+d, dd->ndim-d-1,
1157 extr_r+d, dd->ndim-d-1);
1161 for (d1 = d; d1 < dd->ndim-1; d1++)
1163 extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]);
1164 extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]);
1165 extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]);
1171 for (p = 0; p < npulse; p++)
1173 /* Communicate all the zone information backward */
1174 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1176 dd_sendrecv_ddzone(dd, d, dddirBackward,
1183 for (d1 = d+1; d1 < dd->ndim; d1++)
1185 /* Determine the decrease of maximum required
1186 * communication height along d1 due to the distance along d,
1187 * this avoids a lot of useless atom communication.
1189 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1191 if (ddbox->tric_dir[dim])
1193 /* c is the off-diagonal coupling between the cell planes
1194 * along directions d and d1.
1196 c = ddbox->v[dim][dd->dim[d1]][dim];
1202 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1205 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1209 /* A negative value signals out of range */
1215 /* Accumulate the extremes over all pulses */
1216 for (i = 0; i < buf_size; i++)
1220 buf_e[i] = buf_r[i];
1226 buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0);
1227 buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1);
1228 buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1);
1231 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1239 if (bUse && dh[d1] >= 0)
1241 buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
1242 buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
1245 /* Copy the received buffer to the send buffer,
1246 * to pass the data through with the next pulse.
1248 buf_s[i] = buf_r[i];
1250 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1251 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1253 /* Store the extremes */
1256 for (d1 = d; d1 < dd->ndim-1; d1++)
1258 extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0);
1259 extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1);
1260 extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1);
1264 if (d == 1 || (d == 0 && dd->ndim == 3))
1266 for (i = d; i < 2; i++)
1268 comm->zone_d2[1-d][i] = buf_e[pos];
1274 comm->zone_d1[1] = buf_e[pos];
1284 for (i = 0; i < 2; i++)
1288 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1290 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1291 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1297 for (i = 0; i < 2; i++)
1299 for (j = 0; j < 2; j++)
1303 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1305 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1306 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1310 for (d = 1; d < dd->ndim; d++)
1312 comm->cell_f_max0[d] = extr_s[d-1][0];
1313 comm->cell_f_min1[d] = extr_s[d-1][1];
1316 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1317 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1322 static void dd_collect_cg(gmx_domdec_t *dd,
1323 t_state *state_local)
1325 gmx_domdec_master_t *ma = NULL;
1326 int buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
1328 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1330 /* The master has the correct distribution */
1334 if (state_local->ddp_count == dd->ddp_count)
1336 /* The local state and DD are in sync, use the DD indices */
1337 ncg_home = dd->ncg_home;
1339 nat_home = dd->nat_home;
1341 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1343 /* The DD is out of sync with the local state, but we have stored
1344 * the cg indices with the local state, so we can use those.
1348 cgs_gl = &dd->comm->cgs_gl;
1350 ncg_home = state_local->ncg_gl;
1351 cg = state_local->cg_gl;
1353 for (i = 0; i < ncg_home; i++)
1355 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1360 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1374 /* Collect the charge group and atom counts on the master */
1375 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1380 for (i = 0; i < dd->nnodes; i++)
1382 ma->ncg[i] = ma->ibuf[2*i];
1383 ma->nat[i] = ma->ibuf[2*i+1];
1384 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1387 /* Make byte counts and indices */
1388 for (i = 0; i < dd->nnodes; i++)
1390 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1391 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1395 fprintf(debug, "Initial charge group distribution: ");
1396 for (i = 0; i < dd->nnodes; i++)
1398 fprintf(debug, " %d", ma->ncg[i]);
1400 fprintf(debug, "\n");
1404 /* Collect the charge group indices on the master */
1406 ncg_home*sizeof(int), cg,
1407 DDMASTER(dd) ? ma->ibuf : NULL,
1408 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1409 DDMASTER(dd) ? ma->cg : NULL);
1411 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1414 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1417 gmx_domdec_master_t *ma;
1418 int n, i, c, a, nalloc = 0;
1427 MPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1428 dd->rank, dd->mpi_comm_all);
1433 /* Copy the master coordinates to the global array */
1434 cgs_gl = &dd->comm->cgs_gl;
1436 n = DDMASTERRANK(dd);
1438 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1440 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1442 copy_rvec(lv[a++], v[c]);
1446 for (n = 0; n < dd->nnodes; n++)
1450 if (ma->nat[n] > nalloc)
1452 nalloc = over_alloc_dd(ma->nat[n]);
1453 srenew(buf, nalloc);
1456 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1457 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1460 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1462 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1464 copy_rvec(buf[a++], v[c]);
1473 static void get_commbuffer_counts(gmx_domdec_t *dd,
1474 int **counts, int **disps)
1476 gmx_domdec_master_t *ma;
1481 /* Make the rvec count and displacment arrays */
1483 *disps = ma->ibuf + dd->nnodes;
1484 for (n = 0; n < dd->nnodes; n++)
1486 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1487 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1491 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1494 gmx_domdec_master_t *ma;
1495 int *rcounts = NULL, *disps = NULL;
1504 get_commbuffer_counts(dd, &rcounts, &disps);
1509 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv, rcounts, disps, buf);
1513 cgs_gl = &dd->comm->cgs_gl;
1516 for (n = 0; n < dd->nnodes; n++)
1518 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1520 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1522 copy_rvec(buf[a++], v[c]);
1529 void dd_collect_vec(gmx_domdec_t *dd,
1530 t_state *state_local, rvec *lv, rvec *v)
1532 gmx_domdec_master_t *ma;
1533 int n, i, c, a, nalloc = 0;
1536 dd_collect_cg(dd, state_local);
1538 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1540 dd_collect_vec_sendrecv(dd, lv, v);
1544 dd_collect_vec_gatherv(dd, lv, v);
1549 void dd_collect_state(gmx_domdec_t *dd,
1550 t_state *state_local, t_state *state)
1554 nh = state->nhchainlength;
1558 for (i = 0; i < efptNR; i++)
1560 state->lambda[i] = state_local->lambda[i];
1562 state->fep_state = state_local->fep_state;
1563 state->veta = state_local->veta;
1564 state->vol0 = state_local->vol0;
1565 copy_mat(state_local->box, state->box);
1566 copy_mat(state_local->boxv, state->boxv);
1567 copy_mat(state_local->svir_prev, state->svir_prev);
1568 copy_mat(state_local->fvir_prev, state->fvir_prev);
1569 copy_mat(state_local->pres_prev, state->pres_prev);
1571 for (i = 0; i < state_local->ngtc; i++)
1573 for (j = 0; j < nh; j++)
1575 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1576 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1578 state->therm_integral[i] = state_local->therm_integral[i];
1580 for (i = 0; i < state_local->nnhpres; i++)
1582 for (j = 0; j < nh; j++)
1584 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1585 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1589 for (est = 0; est < estNR; est++)
1591 if (EST_DISTR(est) && (state_local->flags & (1<<est)))
1596 dd_collect_vec(dd, state_local, state_local->x, state->x);
1599 dd_collect_vec(dd, state_local, state_local->v, state->v);
1602 dd_collect_vec(dd, state_local, state_local->sd_X, state->sd_X);
1605 dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
1607 case estDISRE_INITF:
1608 case estDISRE_RM3TAV:
1609 case estORIRE_INITF:
1613 gmx_incons("Unknown state entry encountered in dd_collect_state");
1619 static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
1625 fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
1628 state->nalloc = over_alloc_dd(nalloc);
1630 for (est = 0; est < estNR; est++)
1632 if (EST_DISTR(est) && (state->flags & (1<<est)))
1637 srenew(state->x, state->nalloc);
1640 srenew(state->v, state->nalloc);
1643 srenew(state->sd_X, state->nalloc);
1646 srenew(state->cg_p, state->nalloc);
1648 case estDISRE_INITF:
1649 case estDISRE_RM3TAV:
1650 case estORIRE_INITF:
1652 /* No reallocation required */
1655 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1662 srenew(*f, state->nalloc);
1666 static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
1669 if (nalloc > fr->cg_nalloc)
1673 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
1675 fr->cg_nalloc = over_alloc_dd(nalloc);
1676 srenew(fr->cginfo, fr->cg_nalloc);
1677 if (fr->cutoff_scheme == ecutsGROUP)
1679 srenew(fr->cg_cm, fr->cg_nalloc);
1682 if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
1684 /* We don't use charge groups, we use x in state to set up
1685 * the atom communication.
1687 dd_realloc_state(state, f, nalloc);
1691 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1694 gmx_domdec_master_t *ma;
1695 int n, i, c, a, nalloc = 0;
1702 for (n = 0; n < dd->nnodes; n++)
1706 if (ma->nat[n] > nalloc)
1708 nalloc = over_alloc_dd(ma->nat[n]);
1709 srenew(buf, nalloc);
1711 /* Use lv as a temporary buffer */
1713 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1715 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1717 copy_rvec(v[c], buf[a++]);
1720 if (a != ma->nat[n])
1722 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1727 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1728 DDRANK(dd, n), n, dd->mpi_comm_all);
1733 n = DDMASTERRANK(dd);
1735 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1737 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1739 copy_rvec(v[c], lv[a++]);
1746 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1747 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1752 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1755 gmx_domdec_master_t *ma;
1756 int *scounts = NULL, *disps = NULL;
1757 int n, i, c, a, nalloc = 0;
1764 get_commbuffer_counts(dd, &scounts, &disps);
1768 for (n = 0; n < dd->nnodes; n++)
1770 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1772 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1774 copy_rvec(v[c], buf[a++]);
1780 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1783 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
1785 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1787 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1791 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1795 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1798 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1799 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1800 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1802 if (dfhist->nlambda > 0)
1804 int nlam = dfhist->nlambda;
1805 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1806 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1807 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1808 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1809 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1810 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1812 for (i = 0; i < nlam; i++)
1814 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1815 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1816 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1817 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1818 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1819 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1824 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1825 t_state *state, t_state *state_local,
1830 nh = state->nhchainlength;
1834 for (i = 0; i < efptNR; i++)
1836 state_local->lambda[i] = state->lambda[i];
1838 state_local->fep_state = state->fep_state;
1839 state_local->veta = state->veta;
1840 state_local->vol0 = state->vol0;
1841 copy_mat(state->box, state_local->box);
1842 copy_mat(state->box_rel, state_local->box_rel);
1843 copy_mat(state->boxv, state_local->boxv);
1844 copy_mat(state->svir_prev, state_local->svir_prev);
1845 copy_mat(state->fvir_prev, state_local->fvir_prev);
1846 copy_df_history(&state_local->dfhist, &state->dfhist);
1847 for (i = 0; i < state_local->ngtc; i++)
1849 for (j = 0; j < nh; j++)
1851 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1852 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1854 state_local->therm_integral[i] = state->therm_integral[i];
1856 for (i = 0; i < state_local->nnhpres; i++)
1858 for (j = 0; j < nh; j++)
1860 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1861 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1865 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
1866 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1867 dd_bcast(dd, sizeof(real), &state_local->veta);
1868 dd_bcast(dd, sizeof(real), &state_local->vol0);
1869 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1870 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1871 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1872 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1873 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1874 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
1875 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
1876 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
1877 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
1878 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
1880 /* communicate df_history -- required for restarting from checkpoint */
1881 dd_distribute_dfhist(dd, &state_local->dfhist);
1883 if (dd->nat_home > state_local->nalloc)
1885 dd_realloc_state(state_local, f, dd->nat_home);
1887 for (i = 0; i < estNR; i++)
1889 if (EST_DISTR(i) && (state_local->flags & (1<<i)))
1894 dd_distribute_vec(dd, cgs, state->x, state_local->x);
1897 dd_distribute_vec(dd, cgs, state->v, state_local->v);
1900 dd_distribute_vec(dd, cgs, state->sd_X, state_local->sd_X);
1903 dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
1905 case estDISRE_INITF:
1906 case estDISRE_RM3TAV:
1907 case estORIRE_INITF:
1909 /* Not implemented yet */
1912 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1918 static char dim2char(int dim)
1924 case XX: c = 'X'; break;
1925 case YY: c = 'Y'; break;
1926 case ZZ: c = 'Z'; break;
1927 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1933 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1934 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1936 rvec grid_s[2], *grid_r = NULL, cx, r;
1937 char fname[STRLEN], buf[22];
1939 int a, i, d, z, y, x;
1943 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1944 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1948 snew(grid_r, 2*dd->nnodes);
1951 dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : NULL);
1955 for (d = 0; d < DIM; d++)
1957 for (i = 0; i < DIM; i++)
1965 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1967 tric[d][i] = box[i][d]/box[i][i];
1976 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1977 out = gmx_fio_fopen(fname, "w");
1978 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1980 for (i = 0; i < dd->nnodes; i++)
1982 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1983 for (d = 0; d < DIM; d++)
1985 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1987 for (z = 0; z < 2; z++)
1989 for (y = 0; y < 2; y++)
1991 for (x = 0; x < 2; x++)
1993 cx[XX] = grid_r[i*2+x][XX];
1994 cx[YY] = grid_r[i*2+y][YY];
1995 cx[ZZ] = grid_r[i*2+z][ZZ];
1997 gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
1998 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
2002 for (d = 0; d < DIM; d++)
2004 for (x = 0; x < 4; x++)
2008 case 0: y = 1 + i*8 + 2*x; break;
2009 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
2010 case 2: y = 1 + i*8 + x; break;
2012 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
2016 gmx_fio_fclose(out);
2021 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
2022 gmx_mtop_t *mtop, t_commrec *cr,
2023 int natoms, rvec x[], matrix box)
2025 char fname[STRLEN], buf[22];
2027 int i, ii, resnr, c;
2028 char *atomname, *resname;
2035 natoms = dd->comm->nat[ddnatVSITE];
2038 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
2040 out = gmx_fio_fopen(fname, "w");
2042 fprintf(out, "TITLE %s\n", title);
2043 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2044 for (i = 0; i < natoms; i++)
2046 ii = dd->gatindex[i];
2047 gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
2048 if (i < dd->comm->nat[ddnatZONE])
2051 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
2057 else if (i < dd->comm->nat[ddnatVSITE])
2059 b = dd->comm->zones.n;
2063 b = dd->comm->zones.n + 1;
2065 gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
2066 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
2068 fprintf(out, "TER\n");
2070 gmx_fio_fclose(out);
2073 real dd_cutoff_mbody(gmx_domdec_t *dd)
2075 gmx_domdec_comm_t *comm;
2082 if (comm->bInterCGBondeds)
2084 if (comm->cutoff_mbody > 0)
2086 r = comm->cutoff_mbody;
2090 /* cutoff_mbody=0 means we do not have DLB */
2091 r = comm->cellsize_min[dd->dim[0]];
2092 for (di = 1; di < dd->ndim; di++)
2094 r = min(r, comm->cellsize_min[dd->dim[di]]);
2096 if (comm->bBondComm)
2098 r = max(r, comm->cutoff_mbody);
2102 r = min(r, comm->cutoff);
2110 real dd_cutoff_twobody(gmx_domdec_t *dd)
2114 r_mb = dd_cutoff_mbody(dd);
2116 return max(dd->comm->cutoff, r_mb);
2120 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd, ivec coord, ivec coord_pme)
2124 nc = dd->nc[dd->comm->cartpmedim];
2125 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2126 copy_ivec(coord, coord_pme);
2127 coord_pme[dd->comm->cartpmedim] =
2128 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2131 static int low_ddindex2pmeindex(int ndd, int npme, int ddindex)
2133 /* Here we assign a PME node to communicate with this DD node
2134 * by assuming that the major index of both is x.
2135 * We add cr->npmenodes/2 to obtain an even distribution.
2137 return (ddindex*npme + npme/2)/ndd;
2140 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
2142 return low_ddindex2pmeindex(dd->nnodes, dd->comm->npmenodes, ddindex);
2145 static int cr_ddindex2pmeindex(const t_commrec *cr, int ddindex)
2147 return low_ddindex2pmeindex(cr->dd->nnodes, cr->npmenodes, ddindex);
2150 static int *dd_pmenodes(t_commrec *cr)
2155 snew(pmenodes, cr->npmenodes);
2157 for (i = 0; i < cr->dd->nnodes; i++)
2159 p0 = cr_ddindex2pmeindex(cr, i);
2160 p1 = cr_ddindex2pmeindex(cr, i+1);
2161 if (i+1 == cr->dd->nnodes || p1 > p0)
2165 fprintf(debug, "pmenode[%d] = %d\n", n, i+1+n);
2167 pmenodes[n] = i + 1 + n;
2175 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
2178 ivec coords, coords_pme, nc;
2183 if (dd->comm->bCartesian) {
2184 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2185 dd_coords2pmecoords(dd,coords,coords_pme);
2186 copy_ivec(dd->ntot,nc);
2187 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2188 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2190 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2192 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2198 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
2203 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
2205 gmx_domdec_comm_t *comm;
2207 int ddindex, nodeid = -1;
2209 comm = cr->dd->comm;
2214 if (comm->bCartesianPP_PME)
2217 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
2222 ddindex = dd_index(cr->dd->nc, coords);
2223 if (comm->bCartesianPP)
2225 nodeid = comm->ddindex2simnodeid[ddindex];
2231 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
2243 static int dd_simnode2pmenode(t_commrec *cr, int sim_nodeid)
2246 gmx_domdec_comm_t *comm;
2247 ivec coord, coord_pme;
2254 /* This assumes a uniform x domain decomposition grid cell size */
2255 if (comm->bCartesianPP_PME)
2258 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
2259 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2261 /* This is a PP node */
2262 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2263 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
2267 else if (comm->bCartesianPP)
2269 if (sim_nodeid < dd->nnodes)
2271 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2276 /* This assumes DD cells with identical x coordinates
2277 * are numbered sequentially.
2279 if (dd->comm->pmenodes == NULL)
2281 if (sim_nodeid < dd->nnodes)
2283 /* The DD index equals the nodeid */
2284 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2290 while (sim_nodeid > dd->comm->pmenodes[i])
2294 if (sim_nodeid < dd->comm->pmenodes[i])
2296 pmenode = dd->comm->pmenodes[i];
2304 void get_pme_nnodes(const gmx_domdec_t *dd,
2305 int *npmenodes_x, int *npmenodes_y)
2309 *npmenodes_x = dd->comm->npmenodes_x;
2310 *npmenodes_y = dd->comm->npmenodes_y;
2319 gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
2321 gmx_bool bPMEOnlyNode;
2323 if (DOMAINDECOMP(cr))
2325 bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
2329 bPMEOnlyNode = FALSE;
2332 return bPMEOnlyNode;
2335 void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
2336 int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
2340 ivec coord, coord_pme;
2344 snew(*my_ddnodes, (dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2347 for (x = 0; x < dd->nc[XX]; x++)
2349 for (y = 0; y < dd->nc[YY]; y++)
2351 for (z = 0; z < dd->nc[ZZ]; z++)
2353 if (dd->comm->bCartesianPP_PME)
2358 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2359 if (dd->ci[XX] == coord_pme[XX] &&
2360 dd->ci[YY] == coord_pme[YY] &&
2361 dd->ci[ZZ] == coord_pme[ZZ])
2363 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2368 /* The slab corresponds to the nodeid in the PME group */
2369 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2371 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2378 /* The last PP-only node is the peer node */
2379 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2383 fprintf(debug, "Receive coordinates from PP ranks:");
2384 for (x = 0; x < *nmy_ddnodes; x++)
2386 fprintf(debug, " %d", (*my_ddnodes)[x]);
2388 fprintf(debug, "\n");
2392 static gmx_bool receive_vir_ener(t_commrec *cr)
2394 gmx_domdec_comm_t *comm;
2395 int pmenode, coords[DIM], rank;
2399 if (cr->npmenodes < cr->dd->nnodes)
2401 comm = cr->dd->comm;
2402 if (comm->bCartesianPP_PME)
2404 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2406 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2407 coords[comm->cartpmedim]++;
2408 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2410 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2411 if (dd_simnode2pmenode(cr, rank) == pmenode)
2413 /* This is not the last PP node for pmenode */
2421 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2422 if (cr->sim_nodeid+1 < cr->nnodes &&
2423 dd_simnode2pmenode(cr, cr->sim_nodeid+1) == pmenode)
2425 /* This is not the last PP node for pmenode */
2434 static void set_zones_ncg_home(gmx_domdec_t *dd)
2436 gmx_domdec_zones_t *zones;
2439 zones = &dd->comm->zones;
2441 zones->cg_range[0] = 0;
2442 for (i = 1; i < zones->n+1; i++)
2444 zones->cg_range[i] = dd->ncg_home;
2446 /* zone_ncg1[0] should always be equal to ncg_home */
2447 dd->comm->zone_ncg1[0] = dd->ncg_home;
2450 static void rebuild_cgindex(gmx_domdec_t *dd,
2451 const int *gcgs_index, t_state *state)
2453 int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
2456 dd_cg_gl = dd->index_gl;
2457 cgindex = dd->cgindex;
2460 for (i = 0; i < state->ncg_gl; i++)
2464 dd_cg_gl[i] = cg_gl;
2465 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2469 dd->ncg_home = state->ncg_gl;
2472 set_zones_ncg_home(dd);
2475 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2477 while (cg >= cginfo_mb->cg_end)
2482 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2485 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2486 t_forcerec *fr, char *bLocalCG)
2488 cginfo_mb_t *cginfo_mb;
2494 cginfo_mb = fr->cginfo_mb;
2495 cginfo = fr->cginfo;
2497 for (cg = cg0; cg < cg1; cg++)
2499 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2503 if (bLocalCG != NULL)
2505 for (cg = cg0; cg < cg1; cg++)
2507 bLocalCG[index_gl[cg]] = TRUE;
2512 static void make_dd_indices(gmx_domdec_t *dd,
2513 const int *gcgs_index, int cg_start)
2515 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2516 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2521 bLocalCG = dd->comm->bLocalCG;
2523 if (dd->nat_tot > dd->gatindex_nalloc)
2525 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2526 srenew(dd->gatindex, dd->gatindex_nalloc);
2529 nzone = dd->comm->zones.n;
2530 zone2cg = dd->comm->zones.cg_range;
2531 zone_ncg1 = dd->comm->zone_ncg1;
2532 index_gl = dd->index_gl;
2533 gatindex = dd->gatindex;
2534 bCGs = dd->comm->bCGs;
2536 if (zone2cg[1] != dd->ncg_home)
2538 gmx_incons("dd->ncg_zone is not up to date");
2541 /* Make the local to global and global to local atom index */
2542 a = dd->cgindex[cg_start];
2543 for (zone = 0; zone < nzone; zone++)
2551 cg0 = zone2cg[zone];
2553 cg1 = zone2cg[zone+1];
2554 cg1_p1 = cg0 + zone_ncg1[zone];
2556 for (cg = cg0; cg < cg1; cg++)
2561 /* Signal that this cg is from more than one pulse away */
2564 cg_gl = index_gl[cg];
2567 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2570 ga2la_set(dd->ga2la, a_gl, a, zone1);
2576 gatindex[a] = cg_gl;
2577 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2584 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2587 int ncg, i, ngl, nerr;
2590 if (bLocalCG == NULL)
2594 for (i = 0; i < dd->ncg_tot; i++)
2596 if (!bLocalCG[dd->index_gl[i]])
2599 "DD rank %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2604 for (i = 0; i < ncg_sys; i++)
2611 if (ngl != dd->ncg_tot)
2613 fprintf(stderr, "DD rank %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2620 static void check_index_consistency(gmx_domdec_t *dd,
2621 int natoms_sys, int ncg_sys,
2624 int nerr, ngl, i, a, cell;
2629 if (dd->comm->DD_debug > 1)
2631 snew(have, natoms_sys);
2632 for (a = 0; a < dd->nat_tot; a++)
2634 if (have[dd->gatindex[a]] > 0)
2636 fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2640 have[dd->gatindex[a]] = a + 1;
2646 snew(have, dd->nat_tot);
2649 for (i = 0; i < natoms_sys; i++)
2651 if (ga2la_get(dd->ga2la, i, &a, &cell))
2653 if (a >= dd->nat_tot)
2655 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2661 if (dd->gatindex[a] != i)
2663 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2670 if (ngl != dd->nat_tot)
2673 "DD rank %d, %s: %d global atom indices, %d local atoms\n",
2674 dd->rank, where, ngl, dd->nat_tot);
2676 for (a = 0; a < dd->nat_tot; a++)
2681 "DD rank %d, %s: local atom %d, global %d has no global index\n",
2682 dd->rank, where, a+1, dd->gatindex[a]+1);
2687 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2691 gmx_fatal(FARGS, "DD rank %d, %s: %d atom/cg index inconsistencies",
2692 dd->rank, where, nerr);
2696 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2703 /* Clear the whole list without searching */
2704 ga2la_clear(dd->ga2la);
2708 for (i = a_start; i < dd->nat_tot; i++)
2710 ga2la_del(dd->ga2la, dd->gatindex[i]);
2714 bLocalCG = dd->comm->bLocalCG;
2717 for (i = cg_start; i < dd->ncg_tot; i++)
2719 bLocalCG[dd->index_gl[i]] = FALSE;
2723 dd_clear_local_vsite_indices(dd);
2725 if (dd->constraints)
2727 dd_clear_local_constraint_indices(dd);
2731 /* This function should be used for moving the domain boudaries during DLB,
2732 * for obtaining the minimum cell size. It checks the initially set limit
2733 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2734 * and, possibly, a longer cut-off limit set for PME load balancing.
2736 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2740 cellsize_min = comm->cellsize_min[dim];
2742 if (!comm->bVacDLBNoLimit)
2744 /* The cut-off might have changed, e.g. by PME load balacning,
2745 * from the value used to set comm->cellsize_min, so check it.
2747 cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2749 if (comm->bPMELoadBalDLBLimits)
2751 /* Check for the cut-off limit set by the PME load balancing */
2752 cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2756 return cellsize_min;
2759 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2762 real grid_jump_limit;
2764 /* The distance between the boundaries of cells at distance
2765 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2766 * and by the fact that cells should not be shifted by more than
2767 * half their size, such that cg's only shift by one cell
2768 * at redecomposition.
2770 grid_jump_limit = comm->cellsize_limit;
2771 if (!comm->bVacDLBNoLimit)
2773 if (comm->bPMELoadBalDLBLimits)
2775 cutoff = max(cutoff, comm->PMELoadBal_max_cutoff);
2777 grid_jump_limit = max(grid_jump_limit,
2778 cutoff/comm->cd[dim_ind].np);
2781 return grid_jump_limit;
2784 static gmx_bool check_grid_jump(gmx_int64_t step,
2790 gmx_domdec_comm_t *comm;
2799 for (d = 1; d < dd->ndim; d++)
2802 limit = grid_jump_limit(comm, cutoff, d);
2803 bfac = ddbox->box_size[dim];
2804 if (ddbox->tric_dir[dim])
2806 bfac *= ddbox->skew_fac[dim];
2808 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2809 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2817 /* This error should never be triggered under normal
2818 * circumstances, but you never know ...
2820 gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
2821 gmx_step_str(step, buf),
2822 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2830 static int dd_load_count(gmx_domdec_comm_t *comm)
2832 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2835 static float dd_force_load(gmx_domdec_comm_t *comm)
2842 if (comm->eFlop > 1)
2844 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2849 load = comm->cycl[ddCyclF];
2850 if (comm->cycl_n[ddCyclF] > 1)
2852 /* Subtract the maximum of the last n cycle counts
2853 * to get rid of possible high counts due to other sources,
2854 * for instance system activity, that would otherwise
2855 * affect the dynamic load balancing.
2857 load -= comm->cycl_max[ddCyclF];
2861 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2863 float gpu_wait, gpu_wait_sum;
2865 gpu_wait = comm->cycl[ddCyclWaitGPU];
2866 if (comm->cycl_n[ddCyclF] > 1)
2868 /* We should remove the WaitGPU time of the same MD step
2869 * as the one with the maximum F time, since the F time
2870 * and the wait time are not independent.
2871 * Furthermore, the step for the max F time should be chosen
2872 * the same on all ranks that share the same GPU.
2873 * But to keep the code simple, we remove the average instead.
2874 * The main reason for artificially long times at some steps
2875 * is spurious CPU activity or MPI time, so we don't expect
2876 * that changes in the GPU wait time matter a lot here.
2878 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2880 /* Sum the wait times over the ranks that share the same GPU */
2881 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2882 comm->mpi_comm_gpu_shared);
2883 /* Replace the wait time by the average over the ranks */
2884 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2892 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2894 gmx_domdec_comm_t *comm;
2899 snew(*dim_f, dd->nc[dim]+1);
2901 for (i = 1; i < dd->nc[dim]; i++)
2903 if (comm->slb_frac[dim])
2905 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2909 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2912 (*dim_f)[dd->nc[dim]] = 1;
2915 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2917 int pmeindex, slab, nso, i;
2920 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2926 ddpme->dim = dimind;
2928 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2930 ddpme->nslab = (ddpme->dim == 0 ?
2931 dd->comm->npmenodes_x :
2932 dd->comm->npmenodes_y);
2934 if (ddpme->nslab <= 1)
2939 nso = dd->comm->npmenodes/ddpme->nslab;
2940 /* Determine for each PME slab the PP location range for dimension dim */
2941 snew(ddpme->pp_min, ddpme->nslab);
2942 snew(ddpme->pp_max, ddpme->nslab);
2943 for (slab = 0; slab < ddpme->nslab; slab++)
2945 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2946 ddpme->pp_max[slab] = 0;
2948 for (i = 0; i < dd->nnodes; i++)
2950 ddindex2xyz(dd->nc, i, xyz);
2951 /* For y only use our y/z slab.
2952 * This assumes that the PME x grid size matches the DD grid size.
2954 if (dimind == 0 || xyz[XX] == dd->ci[XX])
2956 pmeindex = ddindex2pmeindex(dd, i);
2959 slab = pmeindex/nso;
2963 slab = pmeindex % ddpme->nslab;
2965 ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]);
2966 ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]);
2970 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
2973 int dd_pme_maxshift_x(gmx_domdec_t *dd)
2975 if (dd->comm->ddpme[0].dim == XX)
2977 return dd->comm->ddpme[0].maxshift;
2985 int dd_pme_maxshift_y(gmx_domdec_t *dd)
2987 if (dd->comm->ddpme[0].dim == YY)
2989 return dd->comm->ddpme[0].maxshift;
2991 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
2993 return dd->comm->ddpme[1].maxshift;
3001 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
3002 gmx_bool bUniform, gmx_ddbox_t *ddbox, real *cell_f)
3004 gmx_domdec_comm_t *comm;
3007 real range, pme_boundary;
3011 nc = dd->nc[ddpme->dim];
3014 if (!ddpme->dim_match)
3016 /* PP decomposition is not along dim: the worst situation */
3019 else if (ns <= 3 || (bUniform && ns == nc))
3021 /* The optimal situation */
3026 /* We need to check for all pme nodes which nodes they
3027 * could possibly need to communicate with.
3029 xmin = ddpme->pp_min;
3030 xmax = ddpme->pp_max;
3031 /* Allow for atoms to be maximally 2/3 times the cut-off
3032 * out of their DD cell. This is a reasonable balance between
3033 * between performance and support for most charge-group/cut-off
3036 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
3037 /* Avoid extra communication when we are exactly at a boundary */
3041 for (s = 0; s < ns; s++)
3043 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
3044 pme_boundary = (real)s/ns;
3047 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
3049 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
3053 pme_boundary = (real)(s+1)/ns;
3056 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
3058 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
3065 ddpme->maxshift = sh;
3069 fprintf(debug, "PME slab communication range for dim %d is %d\n",
3070 ddpme->dim, ddpme->maxshift);
3074 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3078 for (d = 0; d < dd->ndim; d++)
3081 if (dim < ddbox->nboundeddim &&
3082 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
3083 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
3085 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
3086 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3087 dd->nc[dim], dd->comm->cellsize_limit);
3093 setcellsizeslbLOCAL, setcellsizeslbMASTER, setcellsizeslbPULSE_ONLY
3096 /* Set the domain boundaries. Use for static (or no) load balancing,
3097 * and also for the starting state for dynamic load balancing.
3098 * setmode determine if and where the boundaries are stored, use enum above.
3099 * Returns the number communication pulses in npulse.
3101 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
3102 int setmode, ivec npulse)
3104 gmx_domdec_comm_t *comm;
3107 real *cell_x, cell_dx, cellsize;
3111 for (d = 0; d < DIM; d++)
3113 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
3115 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
3118 cell_dx = ddbox->box_size[d]/dd->nc[d];
3121 case setcellsizeslbMASTER:
3122 for (j = 0; j < dd->nc[d]+1; j++)
3124 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
3127 case setcellsizeslbLOCAL:
3128 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
3129 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
3134 cellsize = cell_dx*ddbox->skew_fac[d];
3135 while (cellsize*npulse[d] < comm->cutoff)
3139 cellsize_min[d] = cellsize;
3143 /* Statically load balanced grid */
3144 /* Also when we are not doing a master distribution we determine
3145 * all cell borders in a loop to obtain identical values
3146 * to the master distribution case and to determine npulse.
3148 if (setmode == setcellsizeslbMASTER)
3150 cell_x = dd->ma->cell_x[d];
3154 snew(cell_x, dd->nc[d]+1);
3156 cell_x[0] = ddbox->box0[d];
3157 for (j = 0; j < dd->nc[d]; j++)
3159 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
3160 cell_x[j+1] = cell_x[j] + cell_dx;
3161 cellsize = cell_dx*ddbox->skew_fac[d];
3162 while (cellsize*npulse[d] < comm->cutoff &&
3163 npulse[d] < dd->nc[d]-1)
3167 cellsize_min[d] = min(cellsize_min[d], cellsize);
3169 if (setmode == setcellsizeslbLOCAL)
3171 comm->cell_x0[d] = cell_x[dd->ci[d]];
3172 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
3174 if (setmode != setcellsizeslbMASTER)
3179 /* The following limitation is to avoid that a cell would receive
3180 * some of its own home charge groups back over the periodic boundary.
3181 * Double charge groups cause trouble with the global indices.
3183 if (d < ddbox->npbcdim &&
3184 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
3186 char error_string[STRLEN];
3188 sprintf(error_string,
3189 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3190 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
3192 dd->nc[d], dd->nc[d],
3193 dd->nnodes > dd->nc[d] ? "cells" : "ranks");
3195 if (setmode == setcellsizeslbLOCAL)
3197 gmx_fatal_collective(FARGS, NULL, dd, error_string);
3201 gmx_fatal(FARGS, error_string);
3206 if (!comm->bDynLoadBal)
3208 copy_rvec(cellsize_min, comm->cellsize_min);
3211 for (d = 0; d < comm->npmedecompdim; d++)
3213 set_pme_maxshift(dd, &comm->ddpme[d],
3214 comm->slb_frac[dd->dim[d]] == NULL, ddbox,
3215 comm->ddpme[d].slb_dim_f);
3220 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
3221 int d, int dim, gmx_domdec_root_t *root,
3223 gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
3225 gmx_domdec_comm_t *comm;
3226 int ncd, i, j, nmin, nmin_old;
3227 gmx_bool bLimLo, bLimHi;
3229 real fac, halfway, cellsize_limit_f_i, region_size;
3230 gmx_bool bPBC, bLastHi = FALSE;
3231 int nrange[] = {range[0], range[1]};
3233 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
3239 bPBC = (dim < ddbox->npbcdim);
3241 cell_size = root->buf_ncd;
3245 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
3248 /* First we need to check if the scaling does not make cells
3249 * smaller than the smallest allowed size.
3250 * We need to do this iteratively, since if a cell is too small,
3251 * it needs to be enlarged, which makes all the other cells smaller,
3252 * which could in turn make another cell smaller than allowed.
3254 for (i = range[0]; i < range[1]; i++)
3256 root->bCellMin[i] = FALSE;
3262 /* We need the total for normalization */
3264 for (i = range[0]; i < range[1]; i++)
3266 if (root->bCellMin[i] == FALSE)
3268 fac += cell_size[i];
3271 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3272 /* Determine the cell boundaries */
3273 for (i = range[0]; i < range[1]; i++)
3275 if (root->bCellMin[i] == FALSE)
3277 cell_size[i] *= fac;
3278 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3280 cellsize_limit_f_i = 0;
3284 cellsize_limit_f_i = cellsize_limit_f;
3286 if (cell_size[i] < cellsize_limit_f_i)
3288 root->bCellMin[i] = TRUE;
3289 cell_size[i] = cellsize_limit_f_i;
3293 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3296 while (nmin > nmin_old);
3299 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3300 /* For this check we should not use DD_CELL_MARGIN,
3301 * but a slightly smaller factor,
3302 * since rounding could get use below the limit.
3304 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3307 gmx_fatal(FARGS, "Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3308 gmx_step_str(step, buf),
3309 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3310 ncd, comm->cellsize_min[dim]);
3313 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
3317 /* Check if the boundary did not displace more than halfway
3318 * each of the cells it bounds, as this could cause problems,
3319 * especially when the differences between cell sizes are large.
3320 * If changes are applied, they will not make cells smaller
3321 * than the cut-off, as we check all the boundaries which
3322 * might be affected by a change and if the old state was ok,
3323 * the cells will at most be shrunk back to their old size.
3325 for (i = range[0]+1; i < range[1]; i++)
3327 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3328 if (root->cell_f[i] < halfway)
3330 root->cell_f[i] = halfway;
3331 /* Check if the change also causes shifts of the next boundaries */
3332 for (j = i+1; j < range[1]; j++)
3334 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3336 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3340 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3341 if (root->cell_f[i] > halfway)
3343 root->cell_f[i] = halfway;
3344 /* Check if the change also causes shifts of the next boundaries */
3345 for (j = i-1; j >= range[0]+1; j--)
3347 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3349 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3356 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3357 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3358 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3359 * for a and b nrange is used */
3362 /* Take care of the staggering of the cell boundaries */
3365 for (i = range[0]; i < range[1]; i++)
3367 root->cell_f_max0[i] = root->cell_f[i];
3368 root->cell_f_min1[i] = root->cell_f[i+1];
3373 for (i = range[0]+1; i < range[1]; i++)
3375 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3376 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3377 if (bLimLo && bLimHi)
3379 /* Both limits violated, try the best we can */
3380 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3381 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3382 nrange[0] = range[0];
3384 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3387 nrange[1] = range[1];
3388 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3394 /* root->cell_f[i] = root->bound_min[i]; */
3395 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3398 else if (bLimHi && !bLastHi)
3401 if (nrange[1] < range[1]) /* found a LimLo before */
3403 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3404 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3405 nrange[0] = nrange[1];
3407 root->cell_f[i] = root->bound_max[i];
3409 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3411 nrange[1] = range[1];
3414 if (nrange[1] < range[1]) /* found last a LimLo */
3416 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3417 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3418 nrange[0] = nrange[1];
3419 nrange[1] = range[1];
3420 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3422 else if (nrange[0] > range[0]) /* found at least one LimHi */
3424 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3431 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3432 int d, int dim, gmx_domdec_root_t *root,
3433 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3434 gmx_bool bUniform, gmx_int64_t step)
3436 gmx_domdec_comm_t *comm;
3437 int ncd, d1, i, j, pos;
3439 real load_aver, load_i, imbalance, change, change_max, sc;
3440 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3444 int range[] = { 0, 0 };
3448 /* Convert the maximum change from the input percentage to a fraction */
3449 change_limit = comm->dlb_scale_lim*0.01;
3453 bPBC = (dim < ddbox->npbcdim);
3455 cell_size = root->buf_ncd;
3457 /* Store the original boundaries */
3458 for (i = 0; i < ncd+1; i++)
3460 root->old_cell_f[i] = root->cell_f[i];
3464 for (i = 0; i < ncd; i++)
3466 cell_size[i] = 1.0/ncd;
3469 else if (dd_load_count(comm))
3471 load_aver = comm->load[d].sum_m/ncd;
3473 for (i = 0; i < ncd; i++)
3475 /* Determine the relative imbalance of cell i */
3476 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3477 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3478 /* Determine the change of the cell size using underrelaxation */
3479 change = -relax*imbalance;
3480 change_max = max(change_max, max(change, -change));
3482 /* Limit the amount of scaling.
3483 * We need to use the same rescaling for all cells in one row,
3484 * otherwise the load balancing might not converge.
3487 if (change_max > change_limit)
3489 sc *= change_limit/change_max;
3491 for (i = 0; i < ncd; i++)
3493 /* Determine the relative imbalance of cell i */
3494 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3495 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3496 /* Determine the change of the cell size using underrelaxation */
3497 change = -sc*imbalance;
3498 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3502 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3503 cellsize_limit_f *= DD_CELL_MARGIN;
3504 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3505 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3506 if (ddbox->tric_dir[dim])
3508 cellsize_limit_f /= ddbox->skew_fac[dim];
3509 dist_min_f /= ddbox->skew_fac[dim];
3511 if (bDynamicBox && d > 0)
3513 dist_min_f *= DD_PRES_SCALE_MARGIN;
3515 if (d > 0 && !bUniform)
3517 /* Make sure that the grid is not shifted too much */
3518 for (i = 1; i < ncd; i++)
3520 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3522 gmx_incons("Inconsistent DD boundary staggering limits!");
3524 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3525 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3528 root->bound_min[i] += 0.5*space;
3530 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3531 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3534 root->bound_max[i] += 0.5*space;
3539 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3541 root->cell_f_max0[i-1] + dist_min_f,
3542 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3543 root->cell_f_min1[i] - dist_min_f);
3548 root->cell_f[0] = 0;
3549 root->cell_f[ncd] = 1;
3550 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3553 /* After the checks above, the cells should obey the cut-off
3554 * restrictions, but it does not hurt to check.
3556 for (i = 0; i < ncd; i++)
3560 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3561 dim, i, root->cell_f[i], root->cell_f[i+1]);
3564 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3565 root->cell_f[i+1] - root->cell_f[i] <
3566 cellsize_limit_f/DD_CELL_MARGIN)
3570 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3571 gmx_step_str(step, buf), dim2char(dim), i,
3572 (root->cell_f[i+1] - root->cell_f[i])
3573 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3578 /* Store the cell boundaries of the lower dimensions at the end */
3579 for (d1 = 0; d1 < d; d1++)
3581 root->cell_f[pos++] = comm->cell_f0[d1];
3582 root->cell_f[pos++] = comm->cell_f1[d1];
3585 if (d < comm->npmedecompdim)
3587 /* The master determines the maximum shift for
3588 * the coordinate communication between separate PME nodes.
3590 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3592 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3595 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3599 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3600 gmx_ddbox_t *ddbox, int dimind)
3602 gmx_domdec_comm_t *comm;
3607 /* Set the cell dimensions */
3608 dim = dd->dim[dimind];
3609 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3610 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3611 if (dim >= ddbox->nboundeddim)
3613 comm->cell_x0[dim] += ddbox->box0[dim];
3614 comm->cell_x1[dim] += ddbox->box0[dim];
3618 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3619 int d, int dim, real *cell_f_row,
3622 gmx_domdec_comm_t *comm;
3628 /* Each node would only need to know two fractions,
3629 * but it is probably cheaper to broadcast the whole array.
3631 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3632 0, comm->mpi_comm_load[d]);
3634 /* Copy the fractions for this dimension from the buffer */
3635 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3636 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3637 /* The whole array was communicated, so set the buffer position */
3638 pos = dd->nc[dim] + 1;
3639 for (d1 = 0; d1 <= d; d1++)
3643 /* Copy the cell fractions of the lower dimensions */
3644 comm->cell_f0[d1] = cell_f_row[pos++];
3645 comm->cell_f1[d1] = cell_f_row[pos++];
3647 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3649 /* Convert the communicated shift from float to int */
3650 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3653 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3657 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3658 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3659 gmx_bool bUniform, gmx_int64_t step)
3661 gmx_domdec_comm_t *comm;
3663 gmx_bool bRowMember, bRowRoot;
3668 for (d = 0; d < dd->ndim; d++)
3673 for (d1 = d; d1 < dd->ndim; d1++)
3675 if (dd->ci[dd->dim[d1]] > 0)
3688 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3689 ddbox, bDynamicBox, bUniform, step);
3690 cell_f_row = comm->root[d]->cell_f;
3694 cell_f_row = comm->cell_f_row;
3696 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3701 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3705 /* This function assumes the box is static and should therefore
3706 * not be called when the box has changed since the last
3707 * call to dd_partition_system.
3709 for (d = 0; d < dd->ndim; d++)
3711 relative_to_absolute_cell_bounds(dd, ddbox, d);
3717 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3718 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3719 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3720 gmx_wallcycle_t wcycle)
3722 gmx_domdec_comm_t *comm;
3729 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3730 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3731 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3733 else if (bDynamicBox)
3735 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3738 /* Set the dimensions for which no DD is used */
3739 for (dim = 0; dim < DIM; dim++)
3741 if (dd->nc[dim] == 1)
3743 comm->cell_x0[dim] = 0;
3744 comm->cell_x1[dim] = ddbox->box_size[dim];
3745 if (dim >= ddbox->nboundeddim)
3747 comm->cell_x0[dim] += ddbox->box0[dim];
3748 comm->cell_x1[dim] += ddbox->box0[dim];
3754 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3757 gmx_domdec_comm_dim_t *cd;
3759 for (d = 0; d < dd->ndim; d++)
3761 cd = &dd->comm->cd[d];
3762 np = npulse[dd->dim[d]];
3763 if (np > cd->np_nalloc)
3767 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3768 dim2char(dd->dim[d]), np);
3770 if (DDMASTER(dd) && cd->np_nalloc > 0)
3772 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3774 srenew(cd->ind, np);
3775 for (i = cd->np_nalloc; i < np; i++)
3777 cd->ind[i].index = NULL;
3778 cd->ind[i].nalloc = 0;
3787 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3788 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3789 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3790 gmx_wallcycle_t wcycle)
3792 gmx_domdec_comm_t *comm;
3798 /* Copy the old cell boundaries for the cg displacement check */
3799 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3800 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3802 if (comm->bDynLoadBal)
3806 check_box_size(dd, ddbox);
3808 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3812 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbLOCAL, npulse);
3813 realloc_comm_ind(dd, npulse);
3818 for (d = 0; d < DIM; d++)
3820 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3821 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3826 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3828 rvec cell_ns_x0, rvec cell_ns_x1,
3831 gmx_domdec_comm_t *comm;
3836 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3838 dim = dd->dim[dim_ind];
3840 /* Without PBC we don't have restrictions on the outer cells */
3841 if (!(dim >= ddbox->npbcdim &&
3842 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3843 comm->bDynLoadBal &&
3844 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3845 comm->cellsize_min[dim])
3848 gmx_fatal(FARGS, "Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3849 gmx_step_str(step, buf), dim2char(dim),
3850 comm->cell_x1[dim] - comm->cell_x0[dim],
3851 ddbox->skew_fac[dim],
3852 dd->comm->cellsize_min[dim],
3853 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3857 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3859 /* Communicate the boundaries and update cell_ns_x0/1 */
3860 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3861 if (dd->bGridJump && dd->ndim > 1)
3863 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3868 static void make_tric_corr_matrix(int npbcdim, matrix box, matrix tcm)
3872 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3880 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3881 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3890 static void check_screw_box(matrix box)
3892 /* Mathematical limitation */
3893 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3895 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3898 /* Limitation due to the asymmetry of the eighth shell method */
3899 if (box[ZZ][YY] != 0)
3901 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3905 static void distribute_cg(FILE *fplog, gmx_int64_t step,
3906 matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3909 gmx_domdec_master_t *ma;
3910 int **tmp_ind = NULL, *tmp_nalloc = NULL;
3911 int i, icg, j, k, k0, k1, d, npbcdim;
3913 rvec box_size, cg_cm;
3915 real nrcg, inv_ncg, pos_d;
3917 gmx_bool bUnbounded, bScrew;
3921 if (tmp_ind == NULL)
3923 snew(tmp_nalloc, dd->nnodes);
3924 snew(tmp_ind, dd->nnodes);
3925 for (i = 0; i < dd->nnodes; i++)
3927 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3928 snew(tmp_ind[i], tmp_nalloc[i]);
3932 /* Clear the count */
3933 for (i = 0; i < dd->nnodes; i++)
3939 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3941 cgindex = cgs->index;
3943 /* Compute the center of geometry for all charge groups */
3944 for (icg = 0; icg < cgs->nr; icg++)
3947 k1 = cgindex[icg+1];
3951 copy_rvec(pos[k0], cg_cm);
3958 for (k = k0; (k < k1); k++)
3960 rvec_inc(cg_cm, pos[k]);
3962 for (d = 0; (d < DIM); d++)
3964 cg_cm[d] *= inv_ncg;
3967 /* Put the charge group in the box and determine the cell index */
3968 for (d = DIM-1; d >= 0; d--)
3971 if (d < dd->npbcdim)
3973 bScrew = (dd->bScrewPBC && d == XX);
3974 if (tric_dir[d] && dd->nc[d] > 1)
3976 /* Use triclinic coordintates for this dimension */
3977 for (j = d+1; j < DIM; j++)
3979 pos_d += cg_cm[j]*tcm[j][d];
3982 while (pos_d >= box[d][d])
3985 rvec_dec(cg_cm, box[d]);
3988 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3989 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3991 for (k = k0; (k < k1); k++)
3993 rvec_dec(pos[k], box[d]);
3996 pos[k][YY] = box[YY][YY] - pos[k][YY];
3997 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4004 rvec_inc(cg_cm, box[d]);
4007 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
4008 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
4010 for (k = k0; (k < k1); k++)
4012 rvec_inc(pos[k], box[d]);
4015 pos[k][YY] = box[YY][YY] - pos[k][YY];
4016 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4021 /* This could be done more efficiently */
4023 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
4028 i = dd_index(dd->nc, ind);
4029 if (ma->ncg[i] == tmp_nalloc[i])
4031 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
4032 srenew(tmp_ind[i], tmp_nalloc[i]);
4034 tmp_ind[i][ma->ncg[i]] = icg;
4036 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
4040 for (i = 0; i < dd->nnodes; i++)
4043 for (k = 0; k < ma->ncg[i]; k++)
4045 ma->cg[k1++] = tmp_ind[i][k];
4048 ma->index[dd->nnodes] = k1;
4050 for (i = 0; i < dd->nnodes; i++)
4060 fprintf(fplog, "Charge group distribution at step %s:",
4061 gmx_step_str(step, buf));
4062 for (i = 0; i < dd->nnodes; i++)
4064 fprintf(fplog, " %d", ma->ncg[i]);
4066 fprintf(fplog, "\n");
4070 static void get_cg_distribution(FILE *fplog, gmx_int64_t step, gmx_domdec_t *dd,
4071 t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
4074 gmx_domdec_master_t *ma = NULL;
4077 int *ibuf, buf2[2] = { 0, 0 };
4078 gmx_bool bMaster = DDMASTER(dd);
4086 check_screw_box(box);
4089 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbMASTER, npulse);
4091 distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
4092 for (i = 0; i < dd->nnodes; i++)
4094 ma->ibuf[2*i] = ma->ncg[i];
4095 ma->ibuf[2*i+1] = ma->nat[i];
4103 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
4105 dd->ncg_home = buf2[0];
4106 dd->nat_home = buf2[1];
4107 dd->ncg_tot = dd->ncg_home;
4108 dd->nat_tot = dd->nat_home;
4109 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
4111 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
4112 srenew(dd->index_gl, dd->cg_nalloc);
4113 srenew(dd->cgindex, dd->cg_nalloc+1);
4117 for (i = 0; i < dd->nnodes; i++)
4119 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
4120 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
4125 DDMASTER(dd) ? ma->ibuf : NULL,
4126 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
4127 DDMASTER(dd) ? ma->cg : NULL,
4128 dd->ncg_home*sizeof(int), dd->index_gl);
4130 /* Determine the home charge group sizes */
4132 for (i = 0; i < dd->ncg_home; i++)
4134 cg_gl = dd->index_gl[i];
4136 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
4141 fprintf(debug, "Home charge groups:\n");
4142 for (i = 0; i < dd->ncg_home; i++)
4144 fprintf(debug, " %d", dd->index_gl[i]);
4147 fprintf(debug, "\n");
4150 fprintf(debug, "\n");
4154 static int compact_and_copy_vec_at(int ncg, int *move,
4157 rvec *src, gmx_domdec_comm_t *comm,
4160 int m, icg, i, i0, i1, nrcg;
4166 for (m = 0; m < DIM*2; m++)
4172 for (icg = 0; icg < ncg; icg++)
4174 i1 = cgindex[icg+1];
4180 /* Compact the home array in place */
4181 for (i = i0; i < i1; i++)
4183 copy_rvec(src[i], src[home_pos++]);
4189 /* Copy to the communication buffer */
4191 pos_vec[m] += 1 + vec*nrcg;
4192 for (i = i0; i < i1; i++)
4194 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
4196 pos_vec[m] += (nvec - vec - 1)*nrcg;
4200 home_pos += i1 - i0;
4208 static int compact_and_copy_vec_cg(int ncg, int *move,
4210 int nvec, rvec *src, gmx_domdec_comm_t *comm,
4213 int m, icg, i0, i1, nrcg;
4219 for (m = 0; m < DIM*2; m++)
4225 for (icg = 0; icg < ncg; icg++)
4227 i1 = cgindex[icg+1];
4233 /* Compact the home array in place */
4234 copy_rvec(src[icg], src[home_pos++]);
4240 /* Copy to the communication buffer */
4241 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
4242 pos_vec[m] += 1 + nrcg*nvec;
4254 static int compact_ind(int ncg, int *move,
4255 int *index_gl, int *cgindex,
4257 gmx_ga2la_t ga2la, char *bLocalCG,
4260 int cg, nat, a0, a1, a, a_gl;
4265 for (cg = 0; cg < ncg; cg++)
4271 /* Compact the home arrays in place.
4272 * Anything that can be done here avoids access to global arrays.
4274 cgindex[home_pos] = nat;
4275 for (a = a0; a < a1; a++)
4278 gatindex[nat] = a_gl;
4279 /* The cell number stays 0, so we don't need to set it */
4280 ga2la_change_la(ga2la, a_gl, nat);
4283 index_gl[home_pos] = index_gl[cg];
4284 cginfo[home_pos] = cginfo[cg];
4285 /* The charge group remains local, so bLocalCG does not change */
4290 /* Clear the global indices */
4291 for (a = a0; a < a1; a++)
4293 ga2la_del(ga2la, gatindex[a]);
4297 bLocalCG[index_gl[cg]] = FALSE;
4301 cgindex[home_pos] = nat;
4306 static void clear_and_mark_ind(int ncg, int *move,
4307 int *index_gl, int *cgindex, int *gatindex,
4308 gmx_ga2la_t ga2la, char *bLocalCG,
4313 for (cg = 0; cg < ncg; cg++)
4319 /* Clear the global indices */
4320 for (a = a0; a < a1; a++)
4322 ga2la_del(ga2la, gatindex[a]);
4326 bLocalCG[index_gl[cg]] = FALSE;
4328 /* Signal that this cg has moved using the ns cell index.
4329 * Here we set it to -1. fill_grid will change it
4330 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4332 cell_index[cg] = -1;
4337 static void print_cg_move(FILE *fplog,
4339 gmx_int64_t step, int cg, int dim, int dir,
4340 gmx_bool bHaveLimitdAndCMOld, real limitd,
4341 rvec cm_old, rvec cm_new, real pos_d)
4343 gmx_domdec_comm_t *comm;
4348 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4349 if (bHaveLimitdAndCMOld)
4351 fprintf(fplog, "The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4352 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4356 fprintf(fplog, "The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4357 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4359 fprintf(fplog, "distance out of cell %f\n",
4360 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4361 if (bHaveLimitdAndCMOld)
4363 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4364 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4366 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4367 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4368 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4370 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4371 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4373 comm->cell_x0[dim], comm->cell_x1[dim]);
4376 static void cg_move_error(FILE *fplog,
4378 gmx_int64_t step, int cg, int dim, int dir,
4379 gmx_bool bHaveLimitdAndCMOld, real limitd,
4380 rvec cm_old, rvec cm_new, real pos_d)
4384 print_cg_move(fplog, dd, step, cg, dim, dir,
4385 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4387 print_cg_move(stderr, dd, step, cg, dim, dir,
4388 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4390 "A charge group moved too far between two domain decomposition steps\n"
4391 "This usually means that your system is not well equilibrated");
4394 static void rotate_state_atom(t_state *state, int a)
4398 for (est = 0; est < estNR; est++)
4400 if (EST_DISTR(est) && (state->flags & (1<<est)))
4405 /* Rotate the complete state; for a rectangular box only */
4406 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4407 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4410 state->v[a][YY] = -state->v[a][YY];
4411 state->v[a][ZZ] = -state->v[a][ZZ];
4414 state->sd_X[a][YY] = -state->sd_X[a][YY];
4415 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4418 state->cg_p[a][YY] = -state->cg_p[a][YY];
4419 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4421 case estDISRE_INITF:
4422 case estDISRE_RM3TAV:
4423 case estORIRE_INITF:
4425 /* These are distances, so not affected by rotation */
4428 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4434 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4436 if (natoms > comm->moved_nalloc)
4438 /* Contents should be preserved here */
4439 comm->moved_nalloc = over_alloc_dd(natoms);
4440 srenew(comm->moved, comm->moved_nalloc);
4446 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
4449 ivec tric_dir, matrix tcm,
4450 rvec cell_x0, rvec cell_x1,
4451 rvec limitd, rvec limit0, rvec limit1,
4453 int cg_start, int cg_end,
4458 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4459 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4463 real inv_ncg, pos_d;
4466 npbcdim = dd->npbcdim;
4468 for (cg = cg_start; cg < cg_end; cg++)
4475 copy_rvec(state->x[k0], cm_new);
4482 for (k = k0; (k < k1); k++)
4484 rvec_inc(cm_new, state->x[k]);
4486 for (d = 0; (d < DIM); d++)
4488 cm_new[d] = inv_ncg*cm_new[d];
4493 /* Do pbc and check DD cell boundary crossings */
4494 for (d = DIM-1; d >= 0; d--)
4498 bScrew = (dd->bScrewPBC && d == XX);
4499 /* Determine the location of this cg in lattice coordinates */
4503 for (d2 = d+1; d2 < DIM; d2++)
4505 pos_d += cm_new[d2]*tcm[d2][d];
4508 /* Put the charge group in the triclinic unit-cell */
4509 if (pos_d >= cell_x1[d])
4511 if (pos_d >= limit1[d])
4513 cg_move_error(fplog, dd, step, cg, d, 1, TRUE, limitd[d],
4514 cg_cm[cg], cm_new, pos_d);
4517 if (dd->ci[d] == dd->nc[d] - 1)
4519 rvec_dec(cm_new, state->box[d]);
4522 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4523 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4525 for (k = k0; (k < k1); k++)
4527 rvec_dec(state->x[k], state->box[d]);
4530 rotate_state_atom(state, k);
4535 else if (pos_d < cell_x0[d])
4537 if (pos_d < limit0[d])
4539 cg_move_error(fplog, dd, step, cg, d, -1, TRUE, limitd[d],
4540 cg_cm[cg], cm_new, pos_d);
4545 rvec_inc(cm_new, state->box[d]);
4548 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4549 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4551 for (k = k0; (k < k1); k++)
4553 rvec_inc(state->x[k], state->box[d]);
4556 rotate_state_atom(state, k);
4562 else if (d < npbcdim)
4564 /* Put the charge group in the rectangular unit-cell */
4565 while (cm_new[d] >= state->box[d][d])
4567 rvec_dec(cm_new, state->box[d]);
4568 for (k = k0; (k < k1); k++)
4570 rvec_dec(state->x[k], state->box[d]);
4573 while (cm_new[d] < 0)
4575 rvec_inc(cm_new, state->box[d]);
4576 for (k = k0; (k < k1); k++)
4578 rvec_inc(state->x[k], state->box[d]);
4584 copy_rvec(cm_new, cg_cm[cg]);
4586 /* Determine where this cg should go */
4589 for (d = 0; d < dd->ndim; d++)
4594 flag |= DD_FLAG_FW(d);
4600 else if (dev[dim] == -1)
4602 flag |= DD_FLAG_BW(d);
4605 if (dd->nc[dim] > 2)
4616 /* Temporarily store the flag in move */
4617 move[cg] = mc + flag;
4621 static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
4622 gmx_domdec_t *dd, ivec tric_dir,
4623 t_state *state, rvec **f,
4632 int ncg[DIM*2], nat[DIM*2];
4633 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4634 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4635 int sbuf[2], rbuf[2];
4636 int home_pos_cg, home_pos_at, buf_pos;
4638 gmx_bool bV = FALSE, bSDX = FALSE, bCGP = FALSE;
4641 real inv_ncg, pos_d;
4643 rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
4645 cginfo_mb_t *cginfo_mb;
4646 gmx_domdec_comm_t *comm;
4648 int nthread, thread;
4652 check_screw_box(state->box);
4656 if (fr->cutoff_scheme == ecutsGROUP)
4661 for (i = 0; i < estNR; i++)
4667 case estX: /* Always present */ break;
4668 case estV: bV = (state->flags & (1<<i)); break;
4669 case estSDX: bSDX = (state->flags & (1<<i)); break;
4670 case estCGP: bCGP = (state->flags & (1<<i)); break;
4673 case estDISRE_INITF:
4674 case estDISRE_RM3TAV:
4675 case estORIRE_INITF:
4677 /* No processing required */
4680 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4685 if (dd->ncg_tot > comm->nalloc_int)
4687 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4688 srenew(comm->buf_int, comm->nalloc_int);
4690 move = comm->buf_int;
4692 /* Clear the count */
4693 for (c = 0; c < dd->ndim*2; c++)
4699 npbcdim = dd->npbcdim;
4701 for (d = 0; (d < DIM); d++)
4703 limitd[d] = dd->comm->cellsize_min[d];
4704 if (d >= npbcdim && dd->ci[d] == 0)
4706 cell_x0[d] = -GMX_FLOAT_MAX;
4710 cell_x0[d] = comm->cell_x0[d];
4712 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4714 cell_x1[d] = GMX_FLOAT_MAX;
4718 cell_x1[d] = comm->cell_x1[d];
4722 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4723 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4727 /* We check after communication if a charge group moved
4728 * more than one cell. Set the pre-comm check limit to float_max.
4730 limit0[d] = -GMX_FLOAT_MAX;
4731 limit1[d] = GMX_FLOAT_MAX;
4735 make_tric_corr_matrix(npbcdim, state->box, tcm);
4737 cgindex = dd->cgindex;
4739 nthread = gmx_omp_nthreads_get(emntDomdec);
4741 /* Compute the center of geometry for all home charge groups
4742 * and put them in the box and determine where they should go.
4744 #pragma omp parallel for num_threads(nthread) schedule(static)
4745 for (thread = 0; thread < nthread; thread++)
4747 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4748 cell_x0, cell_x1, limitd, limit0, limit1,
4750 ( thread *dd->ncg_home)/nthread,
4751 ((thread+1)*dd->ncg_home)/nthread,
4752 fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
4756 for (cg = 0; cg < dd->ncg_home; cg++)
4761 flag = mc & ~DD_FLAG_NRCG;
4762 mc = mc & DD_FLAG_NRCG;
4765 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4767 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4768 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4770 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4771 /* We store the cg size in the lower 16 bits
4772 * and the place where the charge group should go
4773 * in the next 6 bits. This saves some communication volume.
4775 nrcg = cgindex[cg+1] - cgindex[cg];
4776 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4782 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4783 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4786 for (i = 0; i < dd->ndim*2; i++)
4788 *ncg_moved += ncg[i];
4805 /* Make sure the communication buffers are large enough */
4806 for (mc = 0; mc < dd->ndim*2; mc++)
4808 nvr = ncg[mc] + nat[mc]*nvec;
4809 if (nvr > comm->cgcm_state_nalloc[mc])
4811 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4812 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4816 switch (fr->cutoff_scheme)
4819 /* Recalculating cg_cm might be cheaper than communicating,
4820 * but that could give rise to rounding issues.
4823 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4824 nvec, cg_cm, comm, bCompact);
4827 /* Without charge groups we send the moved atom coordinates
4828 * over twice. This is so the code below can be used without
4829 * many conditionals for both for with and without charge groups.
4832 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4833 nvec, state->x, comm, FALSE);
4836 home_pos_cg -= *ncg_moved;
4840 gmx_incons("unimplemented");
4846 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4847 nvec, vec++, state->x, comm, bCompact);
4850 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4851 nvec, vec++, state->v, comm, bCompact);
4855 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4856 nvec, vec++, state->sd_X, comm, bCompact);
4860 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4861 nvec, vec++, state->cg_p, comm, bCompact);
4866 compact_ind(dd->ncg_home, move,
4867 dd->index_gl, dd->cgindex, dd->gatindex,
4868 dd->ga2la, comm->bLocalCG,
4873 if (fr->cutoff_scheme == ecutsVERLET)
4875 moved = get_moved(comm, dd->ncg_home);
4877 for (k = 0; k < dd->ncg_home; k++)
4884 moved = fr->ns.grid->cell_index;
4887 clear_and_mark_ind(dd->ncg_home, move,
4888 dd->index_gl, dd->cgindex, dd->gatindex,
4889 dd->ga2la, comm->bLocalCG,
4893 cginfo_mb = fr->cginfo_mb;
4895 *ncg_stay_home = home_pos_cg;
4896 for (d = 0; d < dd->ndim; d++)
4902 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4905 /* Communicate the cg and atom counts */
4910 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4911 d, dir, sbuf[0], sbuf[1]);
4913 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4915 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4917 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4918 srenew(comm->buf_int, comm->nalloc_int);
4921 /* Communicate the charge group indices, sizes and flags */
4922 dd_sendrecv_int(dd, d, dir,
4923 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4924 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4926 nvs = ncg[cdd] + nat[cdd]*nvec;
4927 i = rbuf[0] + rbuf[1] *nvec;
4928 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4930 /* Communicate cgcm and state */
4931 dd_sendrecv_rvec(dd, d, dir,
4932 comm->cgcm_state[cdd], nvs,
4933 comm->vbuf.v+nvr, i);
4934 ncg_recv += rbuf[0];
4935 nat_recv += rbuf[1];
4939 /* Process the received charge groups */
4941 for (cg = 0; cg < ncg_recv; cg++)
4943 flag = comm->buf_int[cg*DD_CGIBS+1];
4945 if (dim >= npbcdim && dd->nc[dim] > 2)
4947 /* No pbc in this dim and more than one domain boundary.
4948 * We do a separate check if a charge group didn't move too far.
4950 if (((flag & DD_FLAG_FW(d)) &&
4951 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4952 ((flag & DD_FLAG_BW(d)) &&
4953 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4955 cg_move_error(fplog, dd, step, cg, dim,
4956 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4958 comm->vbuf.v[buf_pos],
4959 comm->vbuf.v[buf_pos],
4960 comm->vbuf.v[buf_pos][dim]);
4967 /* Check which direction this cg should go */
4968 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4972 /* The cell boundaries for dimension d2 are not equal
4973 * for each cell row of the lower dimension(s),
4974 * therefore we might need to redetermine where
4975 * this cg should go.
4978 /* If this cg crosses the box boundary in dimension d2
4979 * we can use the communicated flag, so we do not
4980 * have to worry about pbc.
4982 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4983 (flag & DD_FLAG_FW(d2))) ||
4984 (dd->ci[dim2] == 0 &&
4985 (flag & DD_FLAG_BW(d2)))))
4987 /* Clear the two flags for this dimension */
4988 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4989 /* Determine the location of this cg
4990 * in lattice coordinates
4992 pos_d = comm->vbuf.v[buf_pos][dim2];
4995 for (d3 = dim2+1; d3 < DIM; d3++)
4998 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
5001 /* Check of we are not at the box edge.
5002 * pbc is only handled in the first step above,
5003 * but this check could move over pbc while
5004 * the first step did not due to different rounding.
5006 if (pos_d >= cell_x1[dim2] &&
5007 dd->ci[dim2] != dd->nc[dim2]-1)
5009 flag |= DD_FLAG_FW(d2);
5011 else if (pos_d < cell_x0[dim2] &&
5014 flag |= DD_FLAG_BW(d2);
5016 comm->buf_int[cg*DD_CGIBS+1] = flag;
5019 /* Set to which neighboring cell this cg should go */
5020 if (flag & DD_FLAG_FW(d2))
5024 else if (flag & DD_FLAG_BW(d2))
5026 if (dd->nc[dd->dim[d2]] > 2)
5038 nrcg = flag & DD_FLAG_NRCG;
5041 if (home_pos_cg+1 > dd->cg_nalloc)
5043 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
5044 srenew(dd->index_gl, dd->cg_nalloc);
5045 srenew(dd->cgindex, dd->cg_nalloc+1);
5047 /* Set the global charge group index and size */
5048 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
5049 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
5050 /* Copy the state from the buffer */
5051 dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
5052 if (fr->cutoff_scheme == ecutsGROUP)
5055 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
5059 /* Set the cginfo */
5060 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
5061 dd->index_gl[home_pos_cg]);
5064 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
5067 if (home_pos_at+nrcg > state->nalloc)
5069 dd_realloc_state(state, f, home_pos_at+nrcg);
5071 for (i = 0; i < nrcg; i++)
5073 copy_rvec(comm->vbuf.v[buf_pos++],
5074 state->x[home_pos_at+i]);
5078 for (i = 0; i < nrcg; i++)
5080 copy_rvec(comm->vbuf.v[buf_pos++],
5081 state->v[home_pos_at+i]);
5086 for (i = 0; i < nrcg; i++)
5088 copy_rvec(comm->vbuf.v[buf_pos++],
5089 state->sd_X[home_pos_at+i]);
5094 for (i = 0; i < nrcg; i++)
5096 copy_rvec(comm->vbuf.v[buf_pos++],
5097 state->cg_p[home_pos_at+i]);
5101 home_pos_at += nrcg;
5105 /* Reallocate the buffers if necessary */
5106 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
5108 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
5109 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
5111 nvr = ncg[mc] + nat[mc]*nvec;
5112 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
5114 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
5115 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
5117 /* Copy from the receive to the send buffers */
5118 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
5119 comm->buf_int + cg*DD_CGIBS,
5120 DD_CGIBS*sizeof(int));
5121 memcpy(comm->cgcm_state[mc][nvr],
5122 comm->vbuf.v[buf_pos],
5123 (1+nrcg*nvec)*sizeof(rvec));
5124 buf_pos += 1 + nrcg*nvec;
5131 /* With sorting (!bCompact) the indices are now only partially up to date
5132 * and ncg_home and nat_home are not the real count, since there are
5133 * "holes" in the arrays for the charge groups that moved to neighbors.
5135 if (fr->cutoff_scheme == ecutsVERLET)
5137 moved = get_moved(comm, home_pos_cg);
5139 for (i = dd->ncg_home; i < home_pos_cg; i++)
5144 dd->ncg_home = home_pos_cg;
5145 dd->nat_home = home_pos_at;
5150 "Finished repartitioning: cgs moved out %d, new home %d\n",
5151 *ncg_moved, dd->ncg_home-*ncg_moved);
5156 void dd_cycles_add(gmx_domdec_t *dd, float cycles, int ddCycl)
5158 dd->comm->cycl[ddCycl] += cycles;
5159 dd->comm->cycl_n[ddCycl]++;
5160 if (cycles > dd->comm->cycl_max[ddCycl])
5162 dd->comm->cycl_max[ddCycl] = cycles;
5166 static double force_flop_count(t_nrnb *nrnb)
5173 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
5175 /* To get closer to the real timings, we half the count
5176 * for the normal loops and again half it for water loops.
5179 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5181 sum += nrnb->n[i]*0.25*cost_nrnb(i);
5185 sum += nrnb->n[i]*0.50*cost_nrnb(i);
5188 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
5191 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5193 sum += nrnb->n[i]*cost_nrnb(i);
5196 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
5198 sum += nrnb->n[i]*cost_nrnb(i);
5204 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
5206 if (dd->comm->eFlop)
5208 dd->comm->flop -= force_flop_count(nrnb);
5211 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
5213 if (dd->comm->eFlop)
5215 dd->comm->flop += force_flop_count(nrnb);
5220 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
5224 for (i = 0; i < ddCyclNr; i++)
5226 dd->comm->cycl[i] = 0;
5227 dd->comm->cycl_n[i] = 0;
5228 dd->comm->cycl_max[i] = 0;
5231 dd->comm->flop_n = 0;
5234 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
5236 gmx_domdec_comm_t *comm;
5237 gmx_domdec_load_t *load;
5238 gmx_domdec_root_t *root = NULL;
5239 int d, dim, cid, i, pos;
5240 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
5245 fprintf(debug, "get_load_distribution start\n");
5248 wallcycle_start(wcycle, ewcDDCOMMLOAD);
5252 bSepPME = (dd->pme_nodeid >= 0);
5254 for (d = dd->ndim-1; d >= 0; d--)
5257 /* Check if we participate in the communication in this dimension */
5258 if (d == dd->ndim-1 ||
5259 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
5261 load = &comm->load[d];
5264 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
5267 if (d == dd->ndim-1)
5269 sbuf[pos++] = dd_force_load(comm);
5270 sbuf[pos++] = sbuf[0];
5273 sbuf[pos++] = sbuf[0];
5274 sbuf[pos++] = cell_frac;
5277 sbuf[pos++] = comm->cell_f_max0[d];
5278 sbuf[pos++] = comm->cell_f_min1[d];
5283 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
5284 sbuf[pos++] = comm->cycl[ddCyclPME];
5289 sbuf[pos++] = comm->load[d+1].sum;
5290 sbuf[pos++] = comm->load[d+1].max;
5293 sbuf[pos++] = comm->load[d+1].sum_m;
5294 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
5295 sbuf[pos++] = comm->load[d+1].flags;
5298 sbuf[pos++] = comm->cell_f_max0[d];
5299 sbuf[pos++] = comm->cell_f_min1[d];
5304 sbuf[pos++] = comm->load[d+1].mdf;
5305 sbuf[pos++] = comm->load[d+1].pme;
5309 /* Communicate a row in DD direction d.
5310 * The communicators are setup such that the root always has rank 0.
5313 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
5314 load->load, load->nload*sizeof(float), MPI_BYTE,
5315 0, comm->mpi_comm_load[d]);
5317 if (dd->ci[dim] == dd->master_ci[dim])
5319 /* We are the root, process this row */
5320 if (comm->bDynLoadBal)
5322 root = comm->root[d];
5332 for (i = 0; i < dd->nc[dim]; i++)
5334 load->sum += load->load[pos++];
5335 load->max = max(load->max, load->load[pos]);
5341 /* This direction could not be load balanced properly,
5342 * therefore we need to use the maximum iso the average load.
5344 load->sum_m = max(load->sum_m, load->load[pos]);
5348 load->sum_m += load->load[pos];
5351 load->cvol_min = min(load->cvol_min, load->load[pos]);
5355 load->flags = (int)(load->load[pos++] + 0.5);
5359 root->cell_f_max0[i] = load->load[pos++];
5360 root->cell_f_min1[i] = load->load[pos++];
5365 load->mdf = max(load->mdf, load->load[pos]);
5367 load->pme = max(load->pme, load->load[pos]);
5371 if (comm->bDynLoadBal && root->bLimited)
5373 load->sum_m *= dd->nc[dim];
5374 load->flags |= (1<<d);
5382 comm->nload += dd_load_count(comm);
5383 comm->load_step += comm->cycl[ddCyclStep];
5384 comm->load_sum += comm->load[0].sum;
5385 comm->load_max += comm->load[0].max;
5386 if (comm->bDynLoadBal)
5388 for (d = 0; d < dd->ndim; d++)
5390 if (comm->load[0].flags & (1<<d))
5392 comm->load_lim[d]++;
5398 comm->load_mdf += comm->load[0].mdf;
5399 comm->load_pme += comm->load[0].pme;
5403 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5407 fprintf(debug, "get_load_distribution finished\n");
5411 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5413 /* Return the relative performance loss on the total run time
5414 * due to the force calculation load imbalance.
5416 if (dd->comm->nload > 0)
5419 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5420 (dd->comm->load_step*dd->nnodes);
5428 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5431 int npp, npme, nnodes, d, limp;
5432 float imbal, pme_f_ratio, lossf, lossp = 0;
5434 gmx_domdec_comm_t *comm;
5437 if (DDMASTER(dd) && comm->nload > 0)
5440 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5441 nnodes = npp + npme;
5442 imbal = comm->load_max*npp/comm->load_sum - 1;
5443 lossf = dd_force_imb_perf_loss(dd);
5444 sprintf(buf, " Average load imbalance: %.1f %%\n", imbal*100);
5445 fprintf(fplog, "%s", buf);
5446 fprintf(stderr, "\n");
5447 fprintf(stderr, "%s", buf);
5448 sprintf(buf, " Part of the total run time spent waiting due to load imbalance: %.1f %%\n", lossf*100);
5449 fprintf(fplog, "%s", buf);
5450 fprintf(stderr, "%s", buf);
5452 if (comm->bDynLoadBal)
5454 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5455 for (d = 0; d < dd->ndim; d++)
5457 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5458 sprintf(buf+strlen(buf), " %c %d %%", dim2char(dd->dim[d]), limp);
5464 sprintf(buf+strlen(buf), "\n");
5465 fprintf(fplog, "%s", buf);
5466 fprintf(stderr, "%s", buf);
5470 pme_f_ratio = comm->load_pme/comm->load_mdf;
5471 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5474 lossp *= (float)npme/(float)nnodes;
5478 lossp *= (float)npp/(float)nnodes;
5480 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pme_f_ratio);
5481 fprintf(fplog, "%s", buf);
5482 fprintf(stderr, "%s", buf);
5483 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossp)*100);
5484 fprintf(fplog, "%s", buf);
5485 fprintf(stderr, "%s", buf);
5487 fprintf(fplog, "\n");
5488 fprintf(stderr, "\n");
5490 if (lossf >= DD_PERF_LOSS_WARN)
5493 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5494 " in the domain decomposition.\n", lossf*100);
5495 if (!comm->bDynLoadBal)
5497 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5501 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5503 fprintf(fplog, "%s\n", buf);
5504 fprintf(stderr, "%s\n", buf);
5506 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS_WARN)
5509 "NOTE: %.1f %% performance was lost because the PME ranks\n"
5510 " had %s work to do than the PP ranks.\n"
5511 " You might want to %s the number of PME ranks\n"
5512 " or %s the cut-off and the grid spacing.\n",
5514 (lossp < 0) ? "less" : "more",
5515 (lossp < 0) ? "decrease" : "increase",
5516 (lossp < 0) ? "decrease" : "increase");
5517 fprintf(fplog, "%s\n", buf);
5518 fprintf(stderr, "%s\n", buf);
5523 static float dd_vol_min(gmx_domdec_t *dd)
5525 return dd->comm->load[0].cvol_min*dd->nnodes;
5528 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5530 return dd->comm->load[0].flags;
5533 static float dd_f_imbal(gmx_domdec_t *dd)
5535 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5538 float dd_pme_f_ratio(gmx_domdec_t *dd)
5540 if (dd->comm->cycl_n[ddCyclPME] > 0)
5542 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5550 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
5555 flags = dd_load_flags(dd);
5559 "DD load balancing is limited by minimum cell size in dimension");
5560 for (d = 0; d < dd->ndim; d++)
5564 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5567 fprintf(fplog, "\n");
5569 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5570 if (dd->comm->bDynLoadBal)
5572 fprintf(fplog, " vol min/aver %5.3f%c",
5573 dd_vol_min(dd), flags ? '!' : ' ');
5575 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5576 if (dd->comm->cycl_n[ddCyclPME])
5578 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5580 fprintf(fplog, "\n\n");
5583 static void dd_print_load_verbose(gmx_domdec_t *dd)
5585 if (dd->comm->bDynLoadBal)
5587 fprintf(stderr, "vol %4.2f%c ",
5588 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5590 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5591 if (dd->comm->cycl_n[ddCyclPME])
5593 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5598 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5603 gmx_domdec_root_t *root;
5604 gmx_bool bPartOfGroup = FALSE;
5606 dim = dd->dim[dim_ind];
5607 copy_ivec(loc, loc_c);
5608 for (i = 0; i < dd->nc[dim]; i++)
5611 rank = dd_index(dd->nc, loc_c);
5612 if (rank == dd->rank)
5614 /* This process is part of the group */
5615 bPartOfGroup = TRUE;
5618 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5622 dd->comm->mpi_comm_load[dim_ind] = c_row;
5623 if (dd->comm->eDLB != edlbNO)
5625 if (dd->ci[dim] == dd->master_ci[dim])
5627 /* This is the root process of this row */
5628 snew(dd->comm->root[dim_ind], 1);
5629 root = dd->comm->root[dim_ind];
5630 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5631 snew(root->old_cell_f, dd->nc[dim]+1);
5632 snew(root->bCellMin, dd->nc[dim]);
5635 snew(root->cell_f_max0, dd->nc[dim]);
5636 snew(root->cell_f_min1, dd->nc[dim]);
5637 snew(root->bound_min, dd->nc[dim]);
5638 snew(root->bound_max, dd->nc[dim]);
5640 snew(root->buf_ncd, dd->nc[dim]);
5644 /* This is not a root process, we only need to receive cell_f */
5645 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5648 if (dd->ci[dim] == dd->master_ci[dim])
5650 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5656 void dd_setup_dlb_resource_sharing(t_commrec gmx_unused *cr,
5657 const gmx_hw_info_t gmx_unused *hwinfo,
5658 const gmx_hw_opt_t gmx_unused *hw_opt)
5661 int physicalnode_id_hash;
5664 MPI_Comm mpi_comm_pp_physicalnode;
5666 if (!(cr->duty & DUTY_PP) ||
5667 hw_opt->gpu_opt.ncuda_dev_use == 0)
5669 /* Only PP nodes (currently) use GPUs.
5670 * If we don't have GPUs, there are no resources to share.
5675 physicalnode_id_hash = gmx_physicalnode_id_hash();
5677 gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
5683 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5684 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5685 dd->rank, physicalnode_id_hash, gpu_id);
5687 /* Split the PP communicator over the physical nodes */
5688 /* TODO: See if we should store this (before), as it's also used for
5689 * for the nodecomm summution.
5691 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5692 &mpi_comm_pp_physicalnode);
5693 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5694 &dd->comm->mpi_comm_gpu_shared);
5695 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5696 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5700 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5703 /* Note that some ranks could share a GPU, while others don't */
5705 if (dd->comm->nrank_gpu_shared == 1)
5707 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5712 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
5715 int dim0, dim1, i, j;
5720 fprintf(debug, "Making load communicators\n");
5723 snew(dd->comm->load, dd->ndim);
5724 snew(dd->comm->mpi_comm_load, dd->ndim);
5727 make_load_communicator(dd, 0, loc);
5731 for (i = 0; i < dd->nc[dim0]; i++)
5734 make_load_communicator(dd, 1, loc);
5740 for (i = 0; i < dd->nc[dim0]; i++)
5744 for (j = 0; j < dd->nc[dim1]; j++)
5747 make_load_communicator(dd, 2, loc);
5754 fprintf(debug, "Finished making load communicators\n");
5759 void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
5762 int d, dim, i, j, m;
5765 ivec dd_zp[DD_MAXIZONE];
5766 gmx_domdec_zones_t *zones;
5767 gmx_domdec_ns_ranges_t *izone;
5769 for (d = 0; d < dd->ndim; d++)
5772 copy_ivec(dd->ci, tmp);
5773 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5774 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5775 copy_ivec(dd->ci, tmp);
5776 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5777 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5780 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5783 dd->neighbor[d][1]);
5789 fprintf(fplog, "\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5791 dd->nc[XX], dd->nc[YY], dd->nc[ZZ],
5792 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5799 for (i = 0; i < nzonep; i++)
5801 copy_ivec(dd_zp3[i], dd_zp[i]);
5807 for (i = 0; i < nzonep; i++)
5809 copy_ivec(dd_zp2[i], dd_zp[i]);
5815 for (i = 0; i < nzonep; i++)
5817 copy_ivec(dd_zp1[i], dd_zp[i]);
5821 gmx_fatal(FARGS, "Can only do 1, 2 or 3D domain decomposition");
5826 zones = &dd->comm->zones;
5828 for (i = 0; i < nzone; i++)
5831 clear_ivec(zones->shift[i]);
5832 for (d = 0; d < dd->ndim; d++)
5834 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5839 for (i = 0; i < nzone; i++)
5841 for (d = 0; d < DIM; d++)
5843 s[d] = dd->ci[d] - zones->shift[i][d];
5848 else if (s[d] >= dd->nc[d])
5854 zones->nizone = nzonep;
5855 for (i = 0; i < zones->nizone; i++)
5857 if (dd_zp[i][0] != i)
5859 gmx_fatal(FARGS, "Internal inconsistency in the dd grid setup");
5861 izone = &zones->izone[i];
5862 izone->j0 = dd_zp[i][1];
5863 izone->j1 = dd_zp[i][2];
5864 for (dim = 0; dim < DIM; dim++)
5866 if (dd->nc[dim] == 1)
5868 /* All shifts should be allowed */
5869 izone->shift0[dim] = -1;
5870 izone->shift1[dim] = 1;
5875 izone->shift0[d] = 0;
5876 izone->shift1[d] = 0;
5877 for(j=izone->j0; j<izone->j1; j++) {
5878 if (dd->shift[j][d] > dd->shift[i][d])
5879 izone->shift0[d] = -1;
5880 if (dd->shift[j][d] < dd->shift[i][d])
5881 izone->shift1[d] = 1;
5887 /* Assume the shift are not more than 1 cell */
5888 izone->shift0[dim] = 1;
5889 izone->shift1[dim] = -1;
5890 for (j = izone->j0; j < izone->j1; j++)
5892 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5893 if (shift_diff < izone->shift0[dim])
5895 izone->shift0[dim] = shift_diff;
5897 if (shift_diff > izone->shift1[dim])
5899 izone->shift1[dim] = shift_diff;
5906 if (dd->comm->eDLB != edlbNO)
5908 snew(dd->comm->root, dd->ndim);
5911 if (dd->comm->bRecordLoad)
5913 make_load_communicators(dd);
5917 static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reorder)
5920 gmx_domdec_comm_t *comm;
5931 if (comm->bCartesianPP)
5933 /* Set up cartesian communication for the particle-particle part */
5936 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5937 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5940 for (i = 0; i < DIM; i++)
5944 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5946 /* We overwrite the old communicator with the new cartesian one */
5947 cr->mpi_comm_mygroup = comm_cart;
5950 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5951 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5953 if (comm->bCartesianPP_PME)
5955 /* Since we want to use the original cartesian setup for sim,
5956 * and not the one after split, we need to make an index.
5958 snew(comm->ddindex2ddnodeid, dd->nnodes);
5959 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
5960 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5961 /* Get the rank of the DD master,
5962 * above we made sure that the master node is a PP node.
5972 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
5974 else if (comm->bCartesianPP)
5976 if (cr->npmenodes == 0)
5978 /* The PP communicator is also
5979 * the communicator for this simulation
5981 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5983 cr->nodeid = dd->rank;
5985 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
5987 /* We need to make an index to go from the coordinates
5988 * to the nodeid of this simulation.
5990 snew(comm->ddindex2simnodeid, dd->nnodes);
5991 snew(buf, dd->nnodes);
5992 if (cr->duty & DUTY_PP)
5994 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5996 /* Communicate the ddindex to simulation nodeid index */
5997 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
5998 cr->mpi_comm_mysim);
6001 /* Determine the master coordinates and rank.
6002 * The DD master should be the same node as the master of this sim.
6004 for (i = 0; i < dd->nnodes; i++)
6006 if (comm->ddindex2simnodeid[i] == 0)
6008 ddindex2xyz(dd->nc, i, dd->master_ci);
6009 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
6014 fprintf(debug, "The master rank is %d\n", dd->masterrank);
6019 /* No Cartesian communicators */
6020 /* We use the rank in dd->comm->all as DD index */
6021 ddindex2xyz(dd->nc, dd->rank, dd->ci);
6022 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
6024 clear_ivec(dd->master_ci);
6031 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
6032 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6037 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
6038 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6042 static void receive_ddindex2simnodeid(t_commrec *cr)
6046 gmx_domdec_comm_t *comm;
6053 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
6055 snew(comm->ddindex2simnodeid, dd->nnodes);
6056 snew(buf, dd->nnodes);
6057 if (cr->duty & DUTY_PP)
6059 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
6062 /* Communicate the ddindex to simulation nodeid index */
6063 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6064 cr->mpi_comm_mysim);
6071 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
6072 int ncg, int natoms)
6074 gmx_domdec_master_t *ma;
6079 snew(ma->ncg, dd->nnodes);
6080 snew(ma->index, dd->nnodes+1);
6082 snew(ma->nat, dd->nnodes);
6083 snew(ma->ibuf, dd->nnodes*2);
6084 snew(ma->cell_x, DIM);
6085 for (i = 0; i < DIM; i++)
6087 snew(ma->cell_x[i], dd->nc[i]+1);
6090 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
6096 snew(ma->vbuf, natoms);
6102 static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_node_order,
6103 int gmx_unused reorder)
6106 gmx_domdec_comm_t *comm;
6117 if (comm->bCartesianPP)
6119 for (i = 1; i < DIM; i++)
6121 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
6123 if (bDiv[YY] || bDiv[ZZ])
6125 comm->bCartesianPP_PME = TRUE;
6126 /* If we have 2D PME decomposition, which is always in x+y,
6127 * we stack the PME only nodes in z.
6128 * Otherwise we choose the direction that provides the thinnest slab
6129 * of PME only nodes as this will have the least effect
6130 * on the PP communication.
6131 * But for the PME communication the opposite might be better.
6133 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
6135 dd->nc[YY] > dd->nc[ZZ]))
6137 comm->cartpmedim = ZZ;
6141 comm->cartpmedim = YY;
6143 comm->ntot[comm->cartpmedim]
6144 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
6148 fprintf(fplog, "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
6150 "Will not use a Cartesian communicator for PP <-> PME\n\n");
6155 if (comm->bCartesianPP_PME)
6159 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
6162 for (i = 0; i < DIM; i++)
6166 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
6169 MPI_Comm_rank(comm_cart, &rank);
6170 if (MASTERNODE(cr) && rank != 0)
6172 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
6175 /* With this assigment we loose the link to the original communicator
6176 * which will usually be MPI_COMM_WORLD, unless have multisim.
6178 cr->mpi_comm_mysim = comm_cart;
6179 cr->sim_nodeid = rank;
6181 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
6185 fprintf(fplog, "Cartesian rank %d, coordinates %d %d %d\n\n",
6186 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6189 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
6193 if (cr->npmenodes == 0 ||
6194 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
6196 cr->duty = DUTY_PME;
6199 /* Split the sim communicator into PP and PME only nodes */
6200 MPI_Comm_split(cr->mpi_comm_mysim,
6202 dd_index(comm->ntot, dd->ci),
6203 &cr->mpi_comm_mygroup);
6207 switch (dd_node_order)
6212 fprintf(fplog, "Order of the ranks: PP first, PME last\n");
6215 case ddnoINTERLEAVE:
6216 /* Interleave the PP-only and PME-only nodes,
6217 * as on clusters with dual-core machines this will double
6218 * the communication bandwidth of the PME processes
6219 * and thus speed up the PP <-> PME and inter PME communication.
6223 fprintf(fplog, "Interleaving PP and PME ranks\n");
6225 comm->pmenodes = dd_pmenodes(cr);
6230 gmx_fatal(FARGS, "Unknown dd_node_order=%d", dd_node_order);
6233 if (dd_simnode2pmenode(cr, cr->sim_nodeid) == -1)
6235 cr->duty = DUTY_PME;
6242 /* Split the sim communicator into PP and PME only nodes */
6243 MPI_Comm_split(cr->mpi_comm_mysim,
6246 &cr->mpi_comm_mygroup);
6247 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
6253 fprintf(fplog, "This rank does only %s work.\n\n",
6254 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
6258 void make_dd_communicators(FILE *fplog, t_commrec *cr, int dd_node_order)
6261 gmx_domdec_comm_t *comm;
6267 copy_ivec(dd->nc, comm->ntot);
6269 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
6270 comm->bCartesianPP_PME = FALSE;
6272 /* Reorder the nodes by default. This might change the MPI ranks.
6273 * Real reordering is only supported on very few architectures,
6274 * Blue Gene is one of them.
6276 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
6278 if (cr->npmenodes > 0)
6280 /* Split the communicator into a PP and PME part */
6281 split_communicator(fplog, cr, dd_node_order, CartReorder);
6282 if (comm->bCartesianPP_PME)
6284 /* We (possibly) reordered the nodes in split_communicator,
6285 * so it is no longer required in make_pp_communicator.
6287 CartReorder = FALSE;
6292 /* All nodes do PP and PME */
6294 /* We do not require separate communicators */
6295 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
6299 if (cr->duty & DUTY_PP)
6301 /* Copy or make a new PP communicator */
6302 make_pp_communicator(fplog, cr, CartReorder);
6306 receive_ddindex2simnodeid(cr);
6309 if (!(cr->duty & DUTY_PME))
6311 /* Set up the commnuication to our PME node */
6312 dd->pme_nodeid = dd_simnode2pmenode(cr, cr->sim_nodeid);
6313 dd->pme_receive_vir_ener = receive_vir_ener(cr);
6316 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
6317 dd->pme_nodeid, dd->pme_receive_vir_ener);
6322 dd->pme_nodeid = -1;
6327 dd->ma = init_gmx_domdec_master_t(dd,
6329 comm->cgs_gl.index[comm->cgs_gl.nr]);
6333 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6335 real *slb_frac, tot;
6340 if (nc > 1 && size_string != NULL)
6344 fprintf(fplog, "Using static load balancing for the %s direction\n",
6349 for (i = 0; i < nc; i++)
6352 sscanf(size_string, "%lf%n", &dbl, &n);
6355 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6364 fprintf(fplog, "Relative cell sizes:");
6366 for (i = 0; i < nc; i++)
6371 fprintf(fplog, " %5.3f", slb_frac[i]);
6376 fprintf(fplog, "\n");
6383 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
6386 gmx_mtop_ilistloop_t iloop;
6390 iloop = gmx_mtop_ilistloop_init(mtop);
6391 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6393 for (ftype = 0; ftype < F_NRE; ftype++)
6395 if ((interaction_function[ftype].flags & IF_BOND) &&
6398 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6406 static int dd_getenv(FILE *fplog, const char *env_var, int def)
6412 val = getenv(env_var);
6415 if (sscanf(val, "%d", &nst) <= 0)
6421 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6429 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6433 fprintf(stderr, "\n%s\n", warn_string);
6437 fprintf(fplog, "\n%s\n", warn_string);
6441 static void check_dd_restrictions(t_commrec *cr, gmx_domdec_t *dd,
6442 t_inputrec *ir, FILE *fplog)
6444 if (ir->ePBC == epbcSCREW &&
6445 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6447 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6450 if (ir->ns_type == ensSIMPLE)
6452 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
6455 if (ir->nstlist == 0)
6457 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6460 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6462 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6466 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6471 r = ddbox->box_size[XX];
6472 for (di = 0; di < dd->ndim; di++)
6475 /* Check using the initial average cell size */
6476 r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6482 static int check_dlb_support(FILE *fplog, t_commrec *cr,
6483 const char *dlb_opt, gmx_bool bRecordLoad,
6484 unsigned long Flags, t_inputrec *ir)
6492 case 'a': eDLB = edlbAUTO; break;
6493 case 'n': eDLB = edlbNO; break;
6494 case 'y': eDLB = edlbYES; break;
6495 default: gmx_incons("Unknown dlb_opt");
6498 if (Flags & MD_RERUN)
6503 if (!EI_DYNAMICS(ir->eI))
6505 if (eDLB == edlbYES)
6507 sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
6508 dd_warning(cr, fplog, buf);
6516 dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6521 if (Flags & MD_REPRODUCIBLE)
6528 dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
6532 dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6535 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", eDLB);
6543 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6548 if (getenv("GMX_DD_ORDER_ZYX") != NULL)
6550 /* Decomposition order z,y,x */
6553 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6555 for (dim = DIM-1; dim >= 0; dim--)
6557 if (dd->nc[dim] > 1)
6559 dd->dim[dd->ndim++] = dim;
6565 /* Decomposition order x,y,z */
6566 for (dim = 0; dim < DIM; dim++)
6568 if (dd->nc[dim] > 1)
6570 dd->dim[dd->ndim++] = dim;
6576 static gmx_domdec_comm_t *init_dd_comm()
6578 gmx_domdec_comm_t *comm;
6582 snew(comm->cggl_flag, DIM*2);
6583 snew(comm->cgcm_state, DIM*2);
6584 for (i = 0; i < DIM*2; i++)
6586 comm->cggl_flag_nalloc[i] = 0;
6587 comm->cgcm_state_nalloc[i] = 0;
6590 comm->nalloc_int = 0;
6591 comm->buf_int = NULL;
6593 vec_rvec_init(&comm->vbuf);
6595 comm->n_load_have = 0;
6596 comm->n_load_collect = 0;
6598 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6600 comm->sum_nat[i] = 0;
6604 comm->load_step = 0;
6607 clear_ivec(comm->load_lim);
6614 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
6615 unsigned long Flags,
6617 real comm_distance_min, real rconstr,
6618 const char *dlb_opt, real dlb_scale,
6619 const char *sizex, const char *sizey, const char *sizez,
6620 gmx_mtop_t *mtop, t_inputrec *ir,
6621 matrix box, rvec *x,
6623 int *npme_x, int *npme_y)
6626 gmx_domdec_comm_t *comm;
6629 real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
6636 "\nInitializing Domain Decomposition on %d ranks\n", cr->nnodes);
6641 dd->comm = init_dd_comm();
6643 snew(comm->cggl_flag, DIM*2);
6644 snew(comm->cgcm_state, DIM*2);
6646 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6647 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6649 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
6650 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
6651 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
6652 recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
6653 comm->nstSortCG = dd_getenv(fplog, "GMX_DD_NST_SORT_CHARGE_GROUPS", 1);
6654 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
6655 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
6656 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
6658 dd->pme_recv_f_alloc = 0;
6659 dd->pme_recv_f_buf = NULL;
6661 if (dd->bSendRecv2 && fplog)
6663 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6669 fprintf(fplog, "Will load balance based on FLOP count\n");
6671 if (comm->eFlop > 1)
6673 srand(1+cr->nodeid);
6675 comm->bRecordLoad = TRUE;
6679 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6683 /* Initialize to GPU share count to 0, might change later */
6684 comm->nrank_gpu_shared = 0;
6686 comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
6688 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6691 fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
6693 dd->bGridJump = comm->bDynLoadBal;
6694 comm->bPMELoadBalDLBLimits = FALSE;
6696 if (comm->nstSortCG)
6700 if (comm->nstSortCG == 1)
6702 fprintf(fplog, "Will sort the charge groups at every domain (re)decomposition\n");
6706 fprintf(fplog, "Will sort the charge groups every %d steps\n",
6710 snew(comm->sort, 1);
6716 fprintf(fplog, "Will not sort the charge groups\n");
6720 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6722 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6723 if (comm->bInterCGBondeds)
6725 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6729 comm->bInterCGMultiBody = FALSE;
6732 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6733 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6735 if (ir->rlistlong == 0)
6737 /* Set the cut-off to some very large value,
6738 * so we don't need if statements everywhere in the code.
6739 * We use sqrt, since the cut-off is squared in some places.
6741 comm->cutoff = GMX_CUTOFF_INF;
6745 comm->cutoff = ir->rlistlong;
6747 comm->cutoff_mbody = 0;
6749 comm->cellsize_limit = 0;
6750 comm->bBondComm = FALSE;
6752 if (comm->bInterCGBondeds)
6754 if (comm_distance_min > 0)
6756 comm->cutoff_mbody = comm_distance_min;
6757 if (Flags & MD_DDBONDCOMM)
6759 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6763 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6765 r_bonded_limit = comm->cutoff_mbody;
6767 else if (ir->bPeriodicMols)
6769 /* Can not easily determine the required cut-off */
6770 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6771 comm->cutoff_mbody = comm->cutoff/2;
6772 r_bonded_limit = comm->cutoff_mbody;
6778 dd_bonded_cg_distance(fplog, mtop, ir, x, box,
6779 Flags & MD_DDBONDCHECK, &r_2b, &r_mb);
6781 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6782 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6784 /* We use an initial margin of 10% for the minimum cell size,
6785 * except when we are just below the non-bonded cut-off.
6787 if (Flags & MD_DDBONDCOMM)
6789 if (max(r_2b, r_mb) > comm->cutoff)
6791 r_bonded = max(r_2b, r_mb);
6792 r_bonded_limit = 1.1*r_bonded;
6793 comm->bBondComm = TRUE;
6798 r_bonded_limit = min(1.1*r_bonded, comm->cutoff);
6800 /* We determine cutoff_mbody later */
6804 /* No special bonded communication,
6805 * simply increase the DD cut-off.
6807 r_bonded_limit = 1.1*max(r_2b, r_mb);
6808 comm->cutoff_mbody = r_bonded_limit;
6809 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6812 comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit);
6816 "Minimum cell size due to bonded interactions: %.3f nm\n",
6817 comm->cellsize_limit);
6821 if (dd->bInterCGcons && rconstr <= 0)
6823 /* There is a cell size limit due to the constraints (P-LINCS) */
6824 rconstr = constr_r_max(fplog, mtop, ir);
6828 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6830 if (rconstr > comm->cellsize_limit)
6832 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6836 else if (rconstr > 0 && fplog)
6838 /* Here we do not check for dd->bInterCGcons,
6839 * because one can also set a cell size limit for virtual sites only
6840 * and at this point we don't know yet if there are intercg v-sites.
6843 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6846 comm->cellsize_limit = max(comm->cellsize_limit, rconstr);
6848 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6852 copy_ivec(nc, dd->nc);
6853 set_dd_dim(fplog, dd);
6854 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, x, ddbox);
6856 if (cr->npmenodes == -1)
6860 acs = average_cellsize_min(dd, ddbox);
6861 if (acs < comm->cellsize_limit)
6865 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6867 gmx_fatal_collective(FARGS, cr, NULL,
6868 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6869 acs, comm->cellsize_limit);
6874 set_ddbox_cr(cr, NULL, ir, box, &comm->cgs_gl, x, ddbox);
6876 /* We need to choose the optimal DD grid and possibly PME nodes */
6877 limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6878 comm->eDLB != edlbNO, dlb_scale,
6879 comm->cellsize_limit, comm->cutoff,
6880 comm->bInterCGBondeds);
6882 if (dd->nc[XX] == 0)
6884 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6885 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
6886 !bC ? "-rdd" : "-rcon",
6887 comm->eDLB != edlbNO ? " or -dds" : "",
6888 bC ? " or your LINCS settings" : "");
6890 gmx_fatal_collective(FARGS, cr, NULL,
6891 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
6893 "Look in the log file for details on the domain decomposition",
6894 cr->nnodes-cr->npmenodes, limit, buf);
6896 set_dd_dim(fplog, dd);
6902 "Domain decomposition grid %d x %d x %d, separate PME ranks %d\n",
6903 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6906 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6907 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6909 gmx_fatal_collective(FARGS, cr, NULL,
6910 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
6911 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6913 if (cr->npmenodes > dd->nnodes)
6915 gmx_fatal_collective(FARGS, cr, NULL,
6916 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6918 if (cr->npmenodes > 0)
6920 comm->npmenodes = cr->npmenodes;
6924 comm->npmenodes = dd->nnodes;
6927 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
6929 /* The following choices should match those
6930 * in comm_cost_est in domdec_setup.c.
6931 * Note that here the checks have to take into account
6932 * that the decomposition might occur in a different order than xyz
6933 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6934 * in which case they will not match those in comm_cost_est,
6935 * but since that is mainly for testing purposes that's fine.
6937 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6938 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6939 getenv("GMX_PMEONEDD") == NULL)
6941 comm->npmedecompdim = 2;
6942 comm->npmenodes_x = dd->nc[XX];
6943 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6947 /* In case nc is 1 in both x and y we could still choose to
6948 * decompose pme in y instead of x, but we use x for simplicity.
6950 comm->npmedecompdim = 1;
6951 if (dd->dim[0] == YY)
6953 comm->npmenodes_x = 1;
6954 comm->npmenodes_y = comm->npmenodes;
6958 comm->npmenodes_x = comm->npmenodes;
6959 comm->npmenodes_y = 1;
6964 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6965 comm->npmenodes_x, comm->npmenodes_y, 1);
6970 comm->npmedecompdim = 0;
6971 comm->npmenodes_x = 0;
6972 comm->npmenodes_y = 0;
6975 /* Technically we don't need both of these,
6976 * but it simplifies code not having to recalculate it.
6978 *npme_x = comm->npmenodes_x;
6979 *npme_y = comm->npmenodes_y;
6981 snew(comm->slb_frac, DIM);
6982 if (comm->eDLB == edlbNO)
6984 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
6985 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
6986 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], sizez);
6989 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6991 if (comm->bBondComm || comm->eDLB != edlbNO)
6993 /* Set the bonded communication distance to halfway
6994 * the minimum and the maximum,
6995 * since the extra communication cost is nearly zero.
6997 acs = average_cellsize_min(dd, ddbox);
6998 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6999 if (comm->eDLB != edlbNO)
7001 /* Check if this does not limit the scaling */
7002 comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs);
7004 if (!comm->bBondComm)
7006 /* Without bBondComm do not go beyond the n.b. cut-off */
7007 comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff);
7008 if (comm->cellsize_limit >= comm->cutoff)
7010 /* We don't loose a lot of efficieny
7011 * when increasing it to the n.b. cut-off.
7012 * It can even be slightly faster, because we need
7013 * less checks for the communication setup.
7015 comm->cutoff_mbody = comm->cutoff;
7018 /* Check if we did not end up below our original limit */
7019 comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit);
7021 if (comm->cutoff_mbody > comm->cellsize_limit)
7023 comm->cellsize_limit = comm->cutoff_mbody;
7026 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
7031 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
7032 "cellsize limit %f\n",
7033 comm->bBondComm, comm->cellsize_limit);
7038 check_dd_restrictions(cr, dd, ir, fplog);
7041 comm->partition_step = INT_MIN;
7044 clear_dd_cycle_counts(dd);
7049 static void set_dlb_limits(gmx_domdec_t *dd)
7054 for (d = 0; d < dd->ndim; d++)
7056 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
7057 dd->comm->cellsize_min[dd->dim[d]] =
7058 dd->comm->cellsize_min_dlb[dd->dim[d]];
7063 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
7066 gmx_domdec_comm_t *comm;
7076 fprintf(fplog, "At step %s the performance loss due to force load imbalance is %.1f %%\n", gmx_step_str(step, buf), dd_force_imb_perf_loss(dd)*100);
7079 cellsize_min = comm->cellsize_min[dd->dim[0]];
7080 for (d = 1; d < dd->ndim; d++)
7082 cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
7085 if (cellsize_min < comm->cellsize_limit*1.05)
7087 dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
7089 /* Change DLB from "auto" to "no". */
7090 comm->eDLB = edlbNO;
7095 dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
7096 comm->bDynLoadBal = TRUE;
7097 dd->bGridJump = TRUE;
7101 /* We can set the required cell size info here,
7102 * so we do not need to communicate this.
7103 * The grid is completely uniform.
7105 for (d = 0; d < dd->ndim; d++)
7109 comm->load[d].sum_m = comm->load[d].sum;
7111 nc = dd->nc[dd->dim[d]];
7112 for (i = 0; i < nc; i++)
7114 comm->root[d]->cell_f[i] = i/(real)nc;
7117 comm->root[d]->cell_f_max0[i] = i /(real)nc;
7118 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
7121 comm->root[d]->cell_f[nc] = 1.0;
7126 static char *init_bLocalCG(gmx_mtop_t *mtop)
7131 ncg = ncg_mtop(mtop);
7132 snew(bLocalCG, ncg);
7133 for (cg = 0; cg < ncg; cg++)
7135 bLocalCG[cg] = FALSE;
7141 void dd_init_bondeds(FILE *fplog,
7142 gmx_domdec_t *dd, gmx_mtop_t *mtop,
7144 t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
7146 gmx_domdec_comm_t *comm;
7150 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
7154 if (comm->bBondComm)
7156 /* Communicate atoms beyond the cut-off for bonded interactions */
7159 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
7161 comm->bLocalCG = init_bLocalCG(mtop);
7165 /* Only communicate atoms based on cut-off */
7166 comm->cglink = NULL;
7167 comm->bLocalCG = NULL;
7171 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
7173 gmx_bool bDynLoadBal, real dlb_scale,
7176 gmx_domdec_comm_t *comm;
7191 fprintf(fplog, "The maximum number of communication pulses is:");
7192 for (d = 0; d < dd->ndim; d++)
7194 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
7196 fprintf(fplog, "\n");
7197 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
7198 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
7199 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
7200 for (d = 0; d < DIM; d++)
7204 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
7211 comm->cellsize_min_dlb[d]/
7212 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
7214 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
7217 fprintf(fplog, "\n");
7221 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
7222 fprintf(fplog, "The initial number of communication pulses is:");
7223 for (d = 0; d < dd->ndim; d++)
7225 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
7227 fprintf(fplog, "\n");
7228 fprintf(fplog, "The initial domain decomposition cell size is:");
7229 for (d = 0; d < DIM; d++)
7233 fprintf(fplog, " %c %.2f nm",
7234 dim2char(d), dd->comm->cellsize_min[d]);
7237 fprintf(fplog, "\n\n");
7240 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
7242 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
7243 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7244 "non-bonded interactions", "", comm->cutoff);
7248 limit = dd->comm->cellsize_limit;
7252 if (dynamic_dd_box(ddbox, ir))
7254 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
7256 limit = dd->comm->cellsize_min[XX];
7257 for (d = 1; d < DIM; d++)
7259 limit = min(limit, dd->comm->cellsize_min[d]);
7263 if (comm->bInterCGBondeds)
7265 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7266 "two-body bonded interactions", "(-rdd)",
7267 max(comm->cutoff, comm->cutoff_mbody));
7268 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7269 "multi-body bonded interactions", "(-rdd)",
7270 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit));
7274 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7275 "virtual site constructions", "(-rcon)", limit);
7277 if (dd->constraint_comm)
7279 sprintf(buf, "atoms separated by up to %d constraints",
7281 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7282 buf, "(-rcon)", limit);
7284 fprintf(fplog, "\n");
7290 static void set_cell_limits_dlb(gmx_domdec_t *dd,
7292 const t_inputrec *ir,
7293 const gmx_ddbox_t *ddbox)
7295 gmx_domdec_comm_t *comm;
7296 int d, dim, npulse, npulse_d_max, npulse_d;
7301 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7303 /* Determine the maximum number of comm. pulses in one dimension */
7305 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7307 /* Determine the maximum required number of grid pulses */
7308 if (comm->cellsize_limit >= comm->cutoff)
7310 /* Only a single pulse is required */
7313 else if (!bNoCutOff && comm->cellsize_limit > 0)
7315 /* We round down slightly here to avoid overhead due to the latency
7316 * of extra communication calls when the cut-off
7317 * would be only slightly longer than the cell size.
7318 * Later cellsize_limit is redetermined,
7319 * so we can not miss interactions due to this rounding.
7321 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7325 /* There is no cell size limit */
7326 npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7329 if (!bNoCutOff && npulse > 1)
7331 /* See if we can do with less pulses, based on dlb_scale */
7333 for (d = 0; d < dd->ndim; d++)
7336 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7337 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7338 npulse_d_max = max(npulse_d_max, npulse_d);
7340 npulse = min(npulse, npulse_d_max);
7343 /* This env var can override npulse */
7344 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
7351 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7352 for (d = 0; d < dd->ndim; d++)
7354 comm->cd[d].np_dlb = min(npulse, dd->nc[dd->dim[d]]-1);
7355 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7356 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7357 comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb);
7358 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7360 comm->bVacDLBNoLimit = FALSE;
7364 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7365 if (!comm->bVacDLBNoLimit)
7367 comm->cellsize_limit = max(comm->cellsize_limit,
7368 comm->cutoff/comm->maxpulse);
7370 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7371 /* Set the minimum cell size for each DD dimension */
7372 for (d = 0; d < dd->ndim; d++)
7374 if (comm->bVacDLBNoLimit ||
7375 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7377 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7381 comm->cellsize_min_dlb[dd->dim[d]] =
7382 comm->cutoff/comm->cd[d].np_dlb;
7385 if (comm->cutoff_mbody <= 0)
7387 comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit);
7389 if (comm->bDynLoadBal)
7395 gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd, int ePBC)
7397 /* If each molecule is a single charge group
7398 * or we use domain decomposition for each periodic dimension,
7399 * we do not need to take pbc into account for the bonded interactions.
7401 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7404 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7407 void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7408 t_inputrec *ir, gmx_ddbox_t *ddbox)
7410 gmx_domdec_comm_t *comm;
7416 /* Initialize the thread data.
7417 * This can not be done in init_domain_decomposition,
7418 * as the numbers of threads is determined later.
7420 comm->nth = gmx_omp_nthreads_get(emntDomdec);
7423 snew(comm->dth, comm->nth);
7426 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
7428 init_ddpme(dd, &comm->ddpme[0], 0);
7429 if (comm->npmedecompdim >= 2)
7431 init_ddpme(dd, &comm->ddpme[1], 1);
7436 comm->npmenodes = 0;
7437 if (dd->pme_nodeid >= 0)
7439 gmx_fatal_collective(FARGS, NULL, dd,
7440 "Can not have separate PME ranks without PME electrostatics");
7446 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7448 if (comm->eDLB != edlbNO)
7450 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7453 print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
7454 if (comm->eDLB == edlbAUTO)
7458 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7460 print_dd_settings(fplog, dd, ir, TRUE, dlb_scale, ddbox);
7463 if (ir->ePBC == epbcNONE)
7465 vol_frac = 1 - 1/(double)dd->nnodes;
7470 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7474 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7476 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7478 dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
7481 static gmx_bool test_dd_cutoff(t_commrec *cr,
7482 t_state *state, t_inputrec *ir,
7493 set_ddbox(dd, FALSE, cr, ir, state->box,
7494 TRUE, &dd->comm->cgs_gl, state->x, &ddbox);
7498 for (d = 0; d < dd->ndim; d++)
7502 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7503 if (dynamic_dd_box(&ddbox, ir))
7505 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7508 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7510 if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
7511 dd->comm->cd[d].np_dlb > 0)
7513 if (np > dd->comm->cd[d].np_dlb)
7518 /* If a current local cell size is smaller than the requested
7519 * cut-off, we could still fix it, but this gets very complicated.
7520 * Without fixing here, we might actually need more checks.
7522 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7529 if (dd->comm->eDLB != edlbNO)
7531 /* If DLB is not active yet, we don't need to check the grid jumps.
7532 * Actually we shouldn't, because then the grid jump data is not set.
7534 if (dd->comm->bDynLoadBal &&
7535 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7540 gmx_sumi(1, &LocallyLimited, cr);
7542 if (LocallyLimited > 0)
7551 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, t_inputrec *ir,
7554 gmx_bool bCutoffAllowed;
7556 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7560 cr->dd->comm->cutoff = cutoff_req;
7563 return bCutoffAllowed;
7566 void change_dd_dlb_cutoff_limit(t_commrec *cr)
7568 gmx_domdec_comm_t *comm;
7570 comm = cr->dd->comm;
7572 /* Turn on the DLB limiting (might have been on already) */
7573 comm->bPMELoadBalDLBLimits = TRUE;
7575 /* Change the cut-off limit */
7576 comm->PMELoadBal_max_cutoff = comm->cutoff;
7579 static void merge_cg_buffers(int ncell,
7580 gmx_domdec_comm_dim_t *cd, int pulse,
7582 int *index_gl, int *recv_i,
7583 rvec *cg_cm, rvec *recv_vr,
7585 cginfo_mb_t *cginfo_mb, int *cginfo)
7587 gmx_domdec_ind_t *ind, *ind_p;
7588 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7589 int shift, shift_at;
7591 ind = &cd->ind[pulse];
7593 /* First correct the already stored data */
7594 shift = ind->nrecv[ncell];
7595 for (cell = ncell-1; cell >= 0; cell--)
7597 shift -= ind->nrecv[cell];
7600 /* Move the cg's present from previous grid pulses */
7601 cg0 = ncg_cell[ncell+cell];
7602 cg1 = ncg_cell[ncell+cell+1];
7603 cgindex[cg1+shift] = cgindex[cg1];
7604 for (cg = cg1-1; cg >= cg0; cg--)
7606 index_gl[cg+shift] = index_gl[cg];
7607 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7608 cgindex[cg+shift] = cgindex[cg];
7609 cginfo[cg+shift] = cginfo[cg];
7611 /* Correct the already stored send indices for the shift */
7612 for (p = 1; p <= pulse; p++)
7614 ind_p = &cd->ind[p];
7616 for (c = 0; c < cell; c++)
7618 cg0 += ind_p->nsend[c];
7620 cg1 = cg0 + ind_p->nsend[cell];
7621 for (cg = cg0; cg < cg1; cg++)
7623 ind_p->index[cg] += shift;
7629 /* Merge in the communicated buffers */
7633 for (cell = 0; cell < ncell; cell++)
7635 cg1 = ncg_cell[ncell+cell+1] + shift;
7638 /* Correct the old cg indices */
7639 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7641 cgindex[cg+1] += shift_at;
7644 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7646 /* Copy this charge group from the buffer */
7647 index_gl[cg1] = recv_i[cg0];
7648 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7649 /* Add it to the cgindex */
7650 cg_gl = index_gl[cg1];
7651 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7652 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7653 cgindex[cg1+1] = cgindex[cg1] + nat;
7658 shift += ind->nrecv[cell];
7659 ncg_cell[ncell+cell+1] = cg1;
7663 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7664 int nzone, int cg0, const int *cgindex)
7668 /* Store the atom block boundaries for easy copying of communication buffers
7671 for (zone = 0; zone < nzone; zone++)
7673 for (p = 0; p < cd->np; p++)
7675 cd->ind[p].cell2at0[zone] = cgindex[cg];
7676 cg += cd->ind[p].nrecv[zone];
7677 cd->ind[p].cell2at1[zone] = cgindex[cg];
7682 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7688 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7690 if (!bLocalCG[link->a[i]])
7699 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7701 real c[DIM][4]; /* the corners for the non-bonded communication */
7702 real cr0; /* corner for rounding */
7703 real cr1[4]; /* corners for rounding */
7704 real bc[DIM]; /* corners for bounded communication */
7705 real bcr1; /* corner for rounding for bonded communication */
7708 /* Determine the corners of the domain(s) we are communicating with */
7710 set_dd_corners(const gmx_domdec_t *dd,
7711 int dim0, int dim1, int dim2,
7715 const gmx_domdec_comm_t *comm;
7716 const gmx_domdec_zones_t *zones;
7721 zones = &comm->zones;
7723 /* Keep the compiler happy */
7727 /* The first dimension is equal for all cells */
7728 c->c[0][0] = comm->cell_x0[dim0];
7731 c->bc[0] = c->c[0][0];
7736 /* This cell row is only seen from the first row */
7737 c->c[1][0] = comm->cell_x0[dim1];
7738 /* All rows can see this row */
7739 c->c[1][1] = comm->cell_x0[dim1];
7742 c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7745 /* For the multi-body distance we need the maximum */
7746 c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7749 /* Set the upper-right corner for rounding */
7750 c->cr0 = comm->cell_x1[dim0];
7755 for (j = 0; j < 4; j++)
7757 c->c[2][j] = comm->cell_x0[dim2];
7761 /* Use the maximum of the i-cells that see a j-cell */
7762 for (i = 0; i < zones->nizone; i++)
7764 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7770 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7776 /* For the multi-body distance we need the maximum */
7777 c->bc[2] = comm->cell_x0[dim2];
7778 for (i = 0; i < 2; i++)
7780 for (j = 0; j < 2; j++)
7782 c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0);
7788 /* Set the upper-right corner for rounding */
7789 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7790 * Only cell (0,0,0) can see cell 7 (1,1,1)
7792 c->cr1[0] = comm->cell_x1[dim1];
7793 c->cr1[3] = comm->cell_x1[dim1];
7796 c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7799 /* For the multi-body distance we need the maximum */
7800 c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7807 /* Determine which cg's we need to send in this pulse from this zone */
7809 get_zone_pulse_cgs(gmx_domdec_t *dd,
7810 int zonei, int zone,
7812 const int *index_gl,
7814 int dim, int dim_ind,
7815 int dim0, int dim1, int dim2,
7816 real r_comm2, real r_bcomm2,
7820 real skew_fac2_d, real skew_fac_01,
7821 rvec *v_d, rvec *v_0, rvec *v_1,
7822 const dd_corners_t *c,
7824 gmx_bool bDistBonded,
7830 gmx_domdec_ind_t *ind,
7831 int **ibuf, int *ibuf_nalloc,
7837 gmx_domdec_comm_t *comm;
7839 gmx_bool bDistMB_pulse;
7841 real r2, rb2, r, tric_sh;
7844 int nsend_z, nsend, nat;
7848 bScrew = (dd->bScrewPBC && dim == XX);
7850 bDistMB_pulse = (bDistMB && bDistBonded);
7856 for (cg = cg0; cg < cg1; cg++)
7860 if (tric_dist[dim_ind] == 0)
7862 /* Rectangular direction, easy */
7863 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7870 r = cg_cm[cg][dim] - c->bc[dim_ind];
7876 /* Rounding gives at most a 16% reduction
7877 * in communicated atoms
7879 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7881 r = cg_cm[cg][dim0] - c->cr0;
7882 /* This is the first dimension, so always r >= 0 */
7889 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7891 r = cg_cm[cg][dim1] - c->cr1[zone];
7898 r = cg_cm[cg][dim1] - c->bcr1;
7908 /* Triclinic direction, more complicated */
7911 /* Rounding, conservative as the skew_fac multiplication
7912 * will slightly underestimate the distance.
7914 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7916 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7917 for (i = dim0+1; i < DIM; i++)
7919 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7921 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7924 rb[dim0] = rn[dim0];
7927 /* Take care that the cell planes along dim0 might not
7928 * be orthogonal to those along dim1 and dim2.
7930 for (i = 1; i <= dim_ind; i++)
7933 if (normal[dim0][dimd] > 0)
7935 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7938 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7943 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7945 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7947 for (i = dim1+1; i < DIM; i++)
7949 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7951 rn[dim1] += tric_sh;
7954 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7955 /* Take care of coupling of the distances
7956 * to the planes along dim0 and dim1 through dim2.
7958 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7959 /* Take care that the cell planes along dim1
7960 * might not be orthogonal to that along dim2.
7962 if (normal[dim1][dim2] > 0)
7964 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7970 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7973 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7974 /* Take care of coupling of the distances
7975 * to the planes along dim0 and dim1 through dim2.
7977 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7978 /* Take care that the cell planes along dim1
7979 * might not be orthogonal to that along dim2.
7981 if (normal[dim1][dim2] > 0)
7983 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7988 /* The distance along the communication direction */
7989 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
7991 for (i = dim+1; i < DIM; i++)
7993 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7998 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7999 /* Take care of coupling of the distances
8000 * to the planes along dim0 and dim1 through dim2.
8002 if (dim_ind == 1 && zonei == 1)
8004 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
8010 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
8013 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
8014 /* Take care of coupling of the distances
8015 * to the planes along dim0 and dim1 through dim2.
8017 if (dim_ind == 1 && zonei == 1)
8019 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
8027 ((bDistMB && rb2 < r_bcomm2) ||
8028 (bDist2B && r2 < r_bcomm2)) &&
8030 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
8031 missing_link(comm->cglink, index_gl[cg],
8034 /* Make an index to the local charge groups */
8035 if (nsend+1 > ind->nalloc)
8037 ind->nalloc = over_alloc_large(nsend+1);
8038 srenew(ind->index, ind->nalloc);
8040 if (nsend+1 > *ibuf_nalloc)
8042 *ibuf_nalloc = over_alloc_large(nsend+1);
8043 srenew(*ibuf, *ibuf_nalloc);
8045 ind->index[nsend] = cg;
8046 (*ibuf)[nsend] = index_gl[cg];
8048 vec_rvec_check_alloc(vbuf, nsend+1);
8050 if (dd->ci[dim] == 0)
8052 /* Correct cg_cm for pbc */
8053 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
8056 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
8057 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
8062 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
8065 nat += cgindex[cg+1] - cgindex[cg];
8071 *nsend_z_ptr = nsend_z;
8074 static void setup_dd_communication(gmx_domdec_t *dd,
8075 matrix box, gmx_ddbox_t *ddbox,
8076 t_forcerec *fr, t_state *state, rvec **f)
8078 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
8079 int nzone, nzone_send, zone, zonei, cg0, cg1;
8080 int c, i, j, cg, cg_gl, nrcg;
8081 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
8082 gmx_domdec_comm_t *comm;
8083 gmx_domdec_zones_t *zones;
8084 gmx_domdec_comm_dim_t *cd;
8085 gmx_domdec_ind_t *ind;
8086 cginfo_mb_t *cginfo_mb;
8087 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
8088 real r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
8089 dd_corners_t corners;
8091 rvec *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
8092 real skew_fac2_d, skew_fac_01;
8099 fprintf(debug, "Setting up DD communication\n");
8104 switch (fr->cutoff_scheme)
8113 gmx_incons("unimplemented");
8117 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8119 dim = dd->dim[dim_ind];
8121 /* Check if we need to use triclinic distances */
8122 tric_dist[dim_ind] = 0;
8123 for (i = 0; i <= dim_ind; i++)
8125 if (ddbox->tric_dir[dd->dim[i]])
8127 tric_dist[dim_ind] = 1;
8132 bBondComm = comm->bBondComm;
8134 /* Do we need to determine extra distances for multi-body bondeds? */
8135 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8137 /* Do we need to determine extra distances for only two-body bondeds? */
8138 bDist2B = (bBondComm && !bDistMB);
8140 r_comm2 = sqr(comm->cutoff);
8141 r_bcomm2 = sqr(comm->cutoff_mbody);
8145 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, sqrt(r_bcomm2));
8148 zones = &comm->zones;
8151 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8152 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8154 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8156 /* Triclinic stuff */
8157 normal = ddbox->normal;
8161 v_0 = ddbox->v[dim0];
8162 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8164 /* Determine the coupling coefficient for the distances
8165 * to the cell planes along dim0 and dim1 through dim2.
8166 * This is required for correct rounding.
8169 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8172 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8178 v_1 = ddbox->v[dim1];
8181 zone_cg_range = zones->cg_range;
8182 index_gl = dd->index_gl;
8183 cgindex = dd->cgindex;
8184 cginfo_mb = fr->cginfo_mb;
8186 zone_cg_range[0] = 0;
8187 zone_cg_range[1] = dd->ncg_home;
8188 comm->zone_ncg1[0] = dd->ncg_home;
8189 pos_cg = dd->ncg_home;
8191 nat_tot = dd->nat_home;
8193 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8195 dim = dd->dim[dim_ind];
8196 cd = &comm->cd[dim_ind];
8198 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8200 /* No pbc in this dimension, the first node should not comm. */
8208 v_d = ddbox->v[dim];
8209 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
8211 cd->bInPlace = TRUE;
8212 for (p = 0; p < cd->np; p++)
8214 /* Only atoms communicated in the first pulse are used
8215 * for multi-body bonded interactions or for bBondComm.
8217 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8222 for (zone = 0; zone < nzone_send; zone++)
8224 if (tric_dist[dim_ind] && dim_ind > 0)
8226 /* Determine slightly more optimized skew_fac's
8228 * This reduces the number of communicated atoms
8229 * by about 10% for 3D DD of rhombic dodecahedra.
8231 for (dimd = 0; dimd < dim; dimd++)
8233 sf2_round[dimd] = 1;
8234 if (ddbox->tric_dir[dimd])
8236 for (i = dd->dim[dimd]+1; i < DIM; i++)
8238 /* If we are shifted in dimension i
8239 * and the cell plane is tilted forward
8240 * in dimension i, skip this coupling.
8242 if (!(zones->shift[nzone+zone][i] &&
8243 ddbox->v[dimd][i][dimd] >= 0))
8246 sqr(ddbox->v[dimd][i][dimd]);
8249 sf2_round[dimd] = 1/sf2_round[dimd];
8254 zonei = zone_perm[dim_ind][zone];
8257 /* Here we permutate the zones to obtain a convenient order
8258 * for neighbor searching
8260 cg0 = zone_cg_range[zonei];
8261 cg1 = zone_cg_range[zonei+1];
8265 /* Look only at the cg's received in the previous grid pulse
8267 cg1 = zone_cg_range[nzone+zone+1];
8268 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8271 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8272 for (th = 0; th < comm->nth; th++)
8274 gmx_domdec_ind_t *ind_p;
8275 int **ibuf_p, *ibuf_nalloc_p;
8277 int *nsend_p, *nat_p;
8283 /* Thread 0 writes in the comm buffers */
8285 ibuf_p = &comm->buf_int;
8286 ibuf_nalloc_p = &comm->nalloc_int;
8287 vbuf_p = &comm->vbuf;
8290 nsend_zone_p = &ind->nsend[zone];
8294 /* Other threads write into temp buffers */
8295 ind_p = &comm->dth[th].ind;
8296 ibuf_p = &comm->dth[th].ibuf;
8297 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8298 vbuf_p = &comm->dth[th].vbuf;
8299 nsend_p = &comm->dth[th].nsend;
8300 nat_p = &comm->dth[th].nat;
8301 nsend_zone_p = &comm->dth[th].nsend_zone;
8303 comm->dth[th].nsend = 0;
8304 comm->dth[th].nat = 0;
8305 comm->dth[th].nsend_zone = 0;
8315 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8316 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8319 /* Get the cg's for this pulse in this zone */
8320 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8322 dim, dim_ind, dim0, dim1, dim2,
8325 normal, skew_fac2_d, skew_fac_01,
8326 v_d, v_0, v_1, &corners, sf2_round,
8327 bDistBonded, bBondComm,
8331 ibuf_p, ibuf_nalloc_p,
8337 /* Append data of threads>=1 to the communication buffers */
8338 for (th = 1; th < comm->nth; th++)
8340 dd_comm_setup_work_t *dth;
8343 dth = &comm->dth[th];
8345 ns1 = nsend + dth->nsend_zone;
8346 if (ns1 > ind->nalloc)
8348 ind->nalloc = over_alloc_dd(ns1);
8349 srenew(ind->index, ind->nalloc);
8351 if (ns1 > comm->nalloc_int)
8353 comm->nalloc_int = over_alloc_dd(ns1);
8354 srenew(comm->buf_int, comm->nalloc_int);
8356 if (ns1 > comm->vbuf.nalloc)
8358 comm->vbuf.nalloc = over_alloc_dd(ns1);
8359 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8362 for (i = 0; i < dth->nsend_zone; i++)
8364 ind->index[nsend] = dth->ind.index[i];
8365 comm->buf_int[nsend] = dth->ibuf[i];
8366 copy_rvec(dth->vbuf.v[i],
8367 comm->vbuf.v[nsend]);
8371 ind->nsend[zone] += dth->nsend_zone;
8374 /* Clear the counts in case we do not have pbc */
8375 for (zone = nzone_send; zone < nzone; zone++)
8377 ind->nsend[zone] = 0;
8379 ind->nsend[nzone] = nsend;
8380 ind->nsend[nzone+1] = nat;
8381 /* Communicate the number of cg's and atoms to receive */
8382 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8383 ind->nsend, nzone+2,
8384 ind->nrecv, nzone+2);
8386 /* The rvec buffer is also required for atom buffers of size nsend
8387 * in dd_move_x and dd_move_f.
8389 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8393 /* We can receive in place if only the last zone is not empty */
8394 for (zone = 0; zone < nzone-1; zone++)
8396 if (ind->nrecv[zone] > 0)
8398 cd->bInPlace = FALSE;
8403 /* The int buffer is only required here for the cg indices */
8404 if (ind->nrecv[nzone] > comm->nalloc_int2)
8406 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8407 srenew(comm->buf_int2, comm->nalloc_int2);
8409 /* The rvec buffer is also required for atom buffers
8410 * of size nrecv in dd_move_x and dd_move_f.
8412 i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8413 vec_rvec_check_alloc(&comm->vbuf2, i);
8417 /* Make space for the global cg indices */
8418 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8419 || dd->cg_nalloc == 0)
8421 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8422 srenew(index_gl, dd->cg_nalloc);
8423 srenew(cgindex, dd->cg_nalloc+1);
8425 /* Communicate the global cg indices */
8428 recv_i = index_gl + pos_cg;
8432 recv_i = comm->buf_int2;
8434 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8435 comm->buf_int, nsend,
8436 recv_i, ind->nrecv[nzone]);
8438 /* Make space for cg_cm */
8439 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8440 if (fr->cutoff_scheme == ecutsGROUP)
8448 /* Communicate cg_cm */
8451 recv_vr = cg_cm + pos_cg;
8455 recv_vr = comm->vbuf2.v;
8457 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8458 comm->vbuf.v, nsend,
8459 recv_vr, ind->nrecv[nzone]);
8461 /* Make the charge group index */
8464 zone = (p == 0 ? 0 : nzone - 1);
8465 while (zone < nzone)
8467 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8469 cg_gl = index_gl[pos_cg];
8470 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8471 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8472 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8475 /* Update the charge group presence,
8476 * so we can use it in the next pass of the loop.
8478 comm->bLocalCG[cg_gl] = TRUE;
8484 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8487 zone_cg_range[nzone+zone] = pos_cg;
8492 /* This part of the code is never executed with bBondComm. */
8493 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8494 index_gl, recv_i, cg_cm, recv_vr,
8495 cgindex, fr->cginfo_mb, fr->cginfo);
8496 pos_cg += ind->nrecv[nzone];
8498 nat_tot += ind->nrecv[nzone+1];
8502 /* Store the atom block for easy copying of communication buffers */
8503 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8507 dd->index_gl = index_gl;
8508 dd->cgindex = cgindex;
8510 dd->ncg_tot = zone_cg_range[zones->n];
8511 dd->nat_tot = nat_tot;
8512 comm->nat[ddnatHOME] = dd->nat_home;
8513 for (i = ddnatZONE; i < ddnatNR; i++)
8515 comm->nat[i] = dd->nat_tot;
8520 /* We don't need to update cginfo, since that was alrady done above.
8521 * So we pass NULL for the forcerec.
8523 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8524 NULL, comm->bLocalCG);
8529 fprintf(debug, "Finished setting up DD communication, zones:");
8530 for (c = 0; c < zones->n; c++)
8532 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8534 fprintf(debug, "\n");
8538 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8542 for (c = 0; c < zones->nizone; c++)
8544 zones->izone[c].cg1 = zones->cg_range[c+1];
8545 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8546 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8550 static void set_zones_size(gmx_domdec_t *dd,
8551 matrix box, const gmx_ddbox_t *ddbox,
8552 int zone_start, int zone_end)
8554 gmx_domdec_comm_t *comm;
8555 gmx_domdec_zones_t *zones;
8557 int z, zi, zj0, zj1, d, dim;
8560 real size_j, add_tric;
8565 zones = &comm->zones;
8567 /* Do we need to determine extra distances for multi-body bondeds? */
8568 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8570 for (z = zone_start; z < zone_end; z++)
8572 /* Copy cell limits to zone limits.
8573 * Valid for non-DD dims and non-shifted dims.
8575 copy_rvec(comm->cell_x0, zones->size[z].x0);
8576 copy_rvec(comm->cell_x1, zones->size[z].x1);
8579 for (d = 0; d < dd->ndim; d++)
8583 for (z = 0; z < zones->n; z++)
8585 /* With a staggered grid we have different sizes
8586 * for non-shifted dimensions.
8588 if (dd->bGridJump && zones->shift[z][dim] == 0)
8592 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8593 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8597 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8598 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8604 rcmbs = comm->cutoff_mbody;
8605 if (ddbox->tric_dir[dim])
8607 rcs /= ddbox->skew_fac[dim];
8608 rcmbs /= ddbox->skew_fac[dim];
8611 /* Set the lower limit for the shifted zone dimensions */
8612 for (z = zone_start; z < zone_end; z++)
8614 if (zones->shift[z][dim] > 0)
8617 if (!dd->bGridJump || d == 0)
8619 zones->size[z].x0[dim] = comm->cell_x1[dim];
8620 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8624 /* Here we take the lower limit of the zone from
8625 * the lowest domain of the zone below.
8629 zones->size[z].x0[dim] =
8630 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8636 zones->size[z].x0[dim] =
8637 zones->size[zone_perm[2][z-4]].x0[dim];
8641 zones->size[z].x0[dim] =
8642 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8645 /* A temporary limit, is updated below */
8646 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8650 for (zi = 0; zi < zones->nizone; zi++)
8652 if (zones->shift[zi][dim] == 0)
8654 /* This takes the whole zone into account.
8655 * With multiple pulses this will lead
8656 * to a larger zone then strictly necessary.
8658 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8659 zones->size[zi].x1[dim]+rcmbs);
8667 /* Loop over the i-zones to set the upper limit of each
8670 for (zi = 0; zi < zones->nizone; zi++)
8672 if (zones->shift[zi][dim] == 0)
8674 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8676 if (zones->shift[z][dim] > 0)
8678 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8679 zones->size[zi].x1[dim]+rcs);
8686 for (z = zone_start; z < zone_end; z++)
8688 /* Initialization only required to keep the compiler happy */
8689 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8692 /* To determine the bounding box for a zone we need to find
8693 * the extreme corners of 4, 2 or 1 corners.
8695 nc = 1 << (ddbox->npbcdim - 1);
8697 for (c = 0; c < nc; c++)
8699 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8703 corner[YY] = zones->size[z].x0[YY];
8707 corner[YY] = zones->size[z].x1[YY];
8711 corner[ZZ] = zones->size[z].x0[ZZ];
8715 corner[ZZ] = zones->size[z].x1[ZZ];
8717 if (dd->ndim == 1 && box[ZZ][YY] != 0)
8719 /* With 1D domain decomposition the cg's are not in
8720 * the triclinic box, but triclinic x-y and rectangular y-z.
8721 * Shift y back, so it will later end up at 0.
8723 corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
8725 /* Apply the triclinic couplings */
8726 assert(ddbox->npbcdim <= DIM);
8727 for (i = YY; i < ddbox->npbcdim; i++)
8729 for (j = XX; j < i; j++)
8731 corner[j] += corner[i]*box[i][j]/box[i][i];
8736 copy_rvec(corner, corner_min);
8737 copy_rvec(corner, corner_max);
8741 for (i = 0; i < DIM; i++)
8743 corner_min[i] = min(corner_min[i], corner[i]);
8744 corner_max[i] = max(corner_max[i], corner[i]);
8748 /* Copy the extreme cornes without offset along x */
8749 for (i = 0; i < DIM; i++)
8751 zones->size[z].bb_x0[i] = corner_min[i];
8752 zones->size[z].bb_x1[i] = corner_max[i];
8754 /* Add the offset along x */
8755 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8756 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8759 if (zone_start == 0)
8762 for (dim = 0; dim < DIM; dim++)
8764 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8766 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
8771 for (z = zone_start; z < zone_end; z++)
8773 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8775 zones->size[z].x0[XX], zones->size[z].x1[XX],
8776 zones->size[z].x0[YY], zones->size[z].x1[YY],
8777 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8778 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8780 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8781 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8782 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8787 static int comp_cgsort(const void *a, const void *b)
8791 gmx_cgsort_t *cga, *cgb;
8792 cga = (gmx_cgsort_t *)a;
8793 cgb = (gmx_cgsort_t *)b;
8795 comp = cga->nsc - cgb->nsc;
8798 comp = cga->ind_gl - cgb->ind_gl;
8804 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8809 /* Order the data */
8810 for (i = 0; i < n; i++)
8812 buf[i] = a[sort[i].ind];
8815 /* Copy back to the original array */
8816 for (i = 0; i < n; i++)
8822 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8827 /* Order the data */
8828 for (i = 0; i < n; i++)
8830 copy_rvec(v[sort[i].ind], buf[i]);
8833 /* Copy back to the original array */
8834 for (i = 0; i < n; i++)
8836 copy_rvec(buf[i], v[i]);
8840 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8843 int a, atot, cg, cg0, cg1, i;
8845 if (cgindex == NULL)
8847 /* Avoid the useless loop of the atoms within a cg */
8848 order_vec_cg(ncg, sort, v, buf);
8853 /* Order the data */
8855 for (cg = 0; cg < ncg; cg++)
8857 cg0 = cgindex[sort[cg].ind];
8858 cg1 = cgindex[sort[cg].ind+1];
8859 for (i = cg0; i < cg1; i++)
8861 copy_rvec(v[i], buf[a]);
8867 /* Copy back to the original array */
8868 for (a = 0; a < atot; a++)
8870 copy_rvec(buf[a], v[a]);
8874 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8875 int nsort_new, gmx_cgsort_t *sort_new,
8876 gmx_cgsort_t *sort1)
8880 /* The new indices are not very ordered, so we qsort them */
8881 gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8883 /* sort2 is already ordered, so now we can merge the two arrays */
8887 while (i2 < nsort2 || i_new < nsort_new)
8891 sort1[i1++] = sort_new[i_new++];
8893 else if (i_new == nsort_new)
8895 sort1[i1++] = sort2[i2++];
8897 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8898 (sort2[i2].nsc == sort_new[i_new].nsc &&
8899 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8901 sort1[i1++] = sort2[i2++];
8905 sort1[i1++] = sort_new[i_new++];
8910 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8912 gmx_domdec_sort_t *sort;
8913 gmx_cgsort_t *cgsort, *sort_i;
8914 int ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
8915 int sort_last, sort_skip;
8917 sort = dd->comm->sort;
8919 a = fr->ns.grid->cell_index;
8921 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns.grid->ncells;
8923 if (ncg_home_old >= 0)
8925 /* The charge groups that remained in the same ns grid cell
8926 * are completely ordered. So we can sort efficiently by sorting
8927 * the charge groups that did move into the stationary list.
8932 for (i = 0; i < dd->ncg_home; i++)
8934 /* Check if this cg did not move to another node */
8937 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8939 /* This cg is new on this node or moved ns grid cell */
8940 if (nsort_new >= sort->sort_new_nalloc)
8942 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8943 srenew(sort->sort_new, sort->sort_new_nalloc);
8945 sort_i = &(sort->sort_new[nsort_new++]);
8949 /* This cg did not move */
8950 sort_i = &(sort->sort2[nsort2++]);
8952 /* Sort on the ns grid cell indices
8953 * and the global topology index.
8954 * index_gl is irrelevant with cell ns,
8955 * but we set it here anyhow to avoid a conditional.
8958 sort_i->ind_gl = dd->index_gl[i];
8965 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8968 /* Sort efficiently */
8969 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8974 cgsort = sort->sort;
8976 for (i = 0; i < dd->ncg_home; i++)
8978 /* Sort on the ns grid cell indices
8979 * and the global topology index
8981 cgsort[i].nsc = a[i];
8982 cgsort[i].ind_gl = dd->index_gl[i];
8984 if (cgsort[i].nsc < moved)
8991 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
8993 /* Determine the order of the charge groups using qsort */
8994 gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
9000 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
9003 int ncg_new, i, *a, na;
9005 sort = dd->comm->sort->sort;
9007 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
9010 for (i = 0; i < na; i++)
9014 sort[ncg_new].ind = a[i];
9022 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
9025 gmx_domdec_sort_t *sort;
9026 gmx_cgsort_t *cgsort, *sort_i;
9028 int ncg_new, i, *ibuf, cgsize;
9031 sort = dd->comm->sort;
9033 if (dd->ncg_home > sort->sort_nalloc)
9035 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
9036 srenew(sort->sort, sort->sort_nalloc);
9037 srenew(sort->sort2, sort->sort_nalloc);
9039 cgsort = sort->sort;
9041 switch (fr->cutoff_scheme)
9044 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
9047 ncg_new = dd_sort_order_nbnxn(dd, fr);
9050 gmx_incons("unimplemented");
9054 /* We alloc with the old size, since cgindex is still old */
9055 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9056 vbuf = dd->comm->vbuf.v;
9060 cgindex = dd->cgindex;
9067 /* Remove the charge groups which are no longer at home here */
9068 dd->ncg_home = ncg_new;
9071 fprintf(debug, "Set the new home charge group count to %d\n",
9075 /* Reorder the state */
9076 for (i = 0; i < estNR; i++)
9078 if (EST_DISTR(i) && (state->flags & (1<<i)))
9083 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
9086 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
9089 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->sd_X, vbuf);
9092 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
9096 case estDISRE_INITF:
9097 case estDISRE_RM3TAV:
9098 case estORIRE_INITF:
9100 /* No ordering required */
9103 gmx_incons("Unknown state entry encountered in dd_sort_state");
9108 if (fr->cutoff_scheme == ecutsGROUP)
9111 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9114 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9116 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9117 srenew(sort->ibuf, sort->ibuf_nalloc);
9120 /* Reorder the global cg index */
9121 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9122 /* Reorder the cginfo */
9123 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9124 /* Rebuild the local cg index */
9128 for (i = 0; i < dd->ncg_home; i++)
9130 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9131 ibuf[i+1] = ibuf[i] + cgsize;
9133 for (i = 0; i < dd->ncg_home+1; i++)
9135 dd->cgindex[i] = ibuf[i];
9140 for (i = 0; i < dd->ncg_home+1; i++)
9145 /* Set the home atom number */
9146 dd->nat_home = dd->cgindex[dd->ncg_home];
9148 if (fr->cutoff_scheme == ecutsVERLET)
9150 /* The atoms are now exactly in grid order, update the grid order */
9151 nbnxn_set_atomorder(fr->nbv->nbs);
9155 /* Copy the sorted ns cell indices back to the ns grid struct */
9156 for (i = 0; i < dd->ncg_home; i++)
9158 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
9160 fr->ns.grid->nr = dd->ncg_home;
9164 static void add_dd_statistics(gmx_domdec_t *dd)
9166 gmx_domdec_comm_t *comm;
9171 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9173 comm->sum_nat[ddnat-ddnatZONE] +=
9174 comm->nat[ddnat] - comm->nat[ddnat-1];
9179 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9181 gmx_domdec_comm_t *comm;
9186 /* Reset all the statistics and counters for total run counting */
9187 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9189 comm->sum_nat[ddnat-ddnatZONE] = 0;
9193 comm->load_step = 0;
9196 clear_ivec(comm->load_lim);
9201 void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
9203 gmx_domdec_comm_t *comm;
9207 comm = cr->dd->comm;
9209 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9216 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9218 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9220 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9225 " av. #atoms communicated per step for force: %d x %.1f\n",
9229 if (cr->dd->vsite_comm)
9232 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9233 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9238 if (cr->dd->constraint_comm)
9241 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9242 1 + ir->nLincsIter, av);
9246 gmx_incons(" Unknown type for DD statistics");
9249 fprintf(fplog, "\n");
9251 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9253 print_dd_load_av(fplog, cr->dd);
9257 void dd_partition_system(FILE *fplog,
9260 gmx_bool bMasterState,
9262 t_state *state_global,
9263 gmx_mtop_t *top_global,
9265 t_state *state_local,
9268 gmx_localtop_t *top_local,
9271 gmx_shellfc_t shellfc,
9272 gmx_constr_t constr,
9274 gmx_wallcycle_t wcycle,
9278 gmx_domdec_comm_t *comm;
9279 gmx_ddbox_t ddbox = {0};
9281 gmx_int64_t step_pcoupl;
9282 rvec cell_ns_x0, cell_ns_x1;
9283 int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9284 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
9285 gmx_bool bRedist, bSortCG, bResortAll;
9286 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9293 bBoxChanged = (bMasterState || DEFORM(*ir));
9294 if (ir->epc != epcNO)
9296 /* With nstpcouple > 1 pressure coupling happens.
9297 * one step after calculating the pressure.
9298 * Box scaling happens at the end of the MD step,
9299 * after the DD partitioning.
9300 * We therefore have to do DLB in the first partitioning
9301 * after an MD step where P-coupling occured.
9302 * We need to determine the last step in which p-coupling occurred.
9303 * MRS -- need to validate this for vv?
9308 step_pcoupl = step - 1;
9312 step_pcoupl = ((step - 1)/n)*n + 1;
9314 if (step_pcoupl >= comm->partition_step)
9320 bNStGlobalComm = (step % nstglobalcomm == 0);
9322 if (!comm->bDynLoadBal)
9328 /* Should we do dynamic load balacing this step?
9329 * Since it requires (possibly expensive) global communication,
9330 * we might want to do DLB less frequently.
9332 if (bBoxChanged || ir->epc != epcNO)
9334 bDoDLB = bBoxChanged;
9338 bDoDLB = bNStGlobalComm;
9342 /* Check if we have recorded loads on the nodes */
9343 if (comm->bRecordLoad && dd_load_count(comm))
9345 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
9347 /* Check if we should use DLB at the second partitioning
9348 * and every 100 partitionings,
9349 * so the extra communication cost is negligible.
9351 n = max(100, nstglobalcomm);
9352 bCheckDLB = (comm->n_load_collect == 0 ||
9353 comm->n_load_have % n == n-1);
9360 /* Print load every nstlog, first and last step to the log file */
9361 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9362 comm->n_load_collect == 0 ||
9364 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9366 /* Avoid extra communication due to verbose screen output
9367 * when nstglobalcomm is set.
9369 if (bDoDLB || bLogLoad || bCheckDLB ||
9370 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9372 get_load_distribution(dd, wcycle);
9377 dd_print_load(fplog, dd, step-1);
9381 dd_print_load_verbose(dd);
9384 comm->n_load_collect++;
9388 /* Since the timings are node dependent, the master decides */
9392 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
9395 fprintf(debug, "step %s, imb loss %f\n",
9396 gmx_step_str(step, sbuf),
9397 dd_force_imb_perf_loss(dd));
9400 dd_bcast(dd, sizeof(bTurnOnDLB), &bTurnOnDLB);
9403 turn_on_dlb(fplog, cr, step);
9408 comm->n_load_have++;
9411 cgs_gl = &comm->cgs_gl;
9416 /* Clear the old state */
9417 clear_dd_indices(dd, 0, 0);
9420 set_ddbox(dd, bMasterState, cr, ir, state_global->box,
9421 TRUE, cgs_gl, state_global->x, &ddbox);
9423 get_cg_distribution(fplog, step, dd, cgs_gl,
9424 state_global->box, &ddbox, state_global->x);
9426 dd_distribute_state(dd, cgs_gl,
9427 state_global, state_local, f);
9429 dd_make_local_cgs(dd, &top_local->cgs);
9431 /* Ensure that we have space for the new distribution */
9432 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9434 if (fr->cutoff_scheme == ecutsGROUP)
9436 calc_cgcm(fplog, 0, dd->ncg_home,
9437 &top_local->cgs, state_local->x, fr->cg_cm);
9440 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9442 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9444 else if (state_local->ddp_count != dd->ddp_count)
9446 if (state_local->ddp_count > dd->ddp_count)
9448 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9451 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9453 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9456 /* Clear the old state */
9457 clear_dd_indices(dd, 0, 0);
9459 /* Build the new indices */
9460 rebuild_cgindex(dd, cgs_gl->index, state_local);
9461 make_dd_indices(dd, cgs_gl->index, 0);
9462 ncgindex_set = dd->ncg_home;
9464 if (fr->cutoff_scheme == ecutsGROUP)
9466 /* Redetermine the cg COMs */
9467 calc_cgcm(fplog, 0, dd->ncg_home,
9468 &top_local->cgs, state_local->x, fr->cg_cm);
9471 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9473 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9475 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9476 TRUE, &top_local->cgs, state_local->x, &ddbox);
9478 bRedist = comm->bDynLoadBal;
9482 /* We have the full state, only redistribute the cgs */
9484 /* Clear the non-home indices */
9485 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9488 /* Avoid global communication for dim's without pbc and -gcom */
9489 if (!bNStGlobalComm)
9491 copy_rvec(comm->box0, ddbox.box0 );
9492 copy_rvec(comm->box_size, ddbox.box_size);
9494 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9495 bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
9500 /* For dim's without pbc and -gcom */
9501 copy_rvec(ddbox.box0, comm->box0 );
9502 copy_rvec(ddbox.box_size, comm->box_size);
9504 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9507 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9509 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9512 /* Check if we should sort the charge groups */
9513 if (comm->nstSortCG > 0)
9515 bSortCG = (bMasterState ||
9516 (bRedist && (step % comm->nstSortCG == 0)));
9523 ncg_home_old = dd->ncg_home;
9528 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9530 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9532 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9534 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9537 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9539 &comm->cell_x0, &comm->cell_x1,
9540 dd->ncg_home, fr->cg_cm,
9541 cell_ns_x0, cell_ns_x1, &grid_density);
9545 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9548 switch (fr->cutoff_scheme)
9551 copy_ivec(fr->ns.grid->n, ncells_old);
9552 grid_first(fplog, fr->ns.grid, dd, &ddbox,
9553 state_local->box, cell_ns_x0, cell_ns_x1,
9554 fr->rlistlong, grid_density);
9557 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9560 gmx_incons("unimplemented");
9562 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9563 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9567 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9569 /* Sort the state on charge group position.
9570 * This enables exact restarts from this step.
9571 * It also improves performance by about 15% with larger numbers
9572 * of atoms per node.
9575 /* Fill the ns grid with the home cell,
9576 * so we can sort with the indices.
9578 set_zones_ncg_home(dd);
9580 switch (fr->cutoff_scheme)
9583 set_zones_size(dd, state_local->box, &ddbox, 0, 1);
9585 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9587 comm->zones.size[0].bb_x0,
9588 comm->zones.size[0].bb_x1,
9590 comm->zones.dens_zone0,
9593 ncg_moved, bRedist ? comm->moved : NULL,
9594 fr->nbv->grp[eintLocal].kernel_type,
9595 fr->nbv->grp[eintLocal].nbat);
9597 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9600 fill_grid(&comm->zones, fr->ns.grid, dd->ncg_home,
9601 0, dd->ncg_home, fr->cg_cm);
9603 copy_ivec(fr->ns.grid->n, ncells_new);
9606 gmx_incons("unimplemented");
9609 bResortAll = bMasterState;
9611 /* Check if we can user the old order and ns grid cell indices
9612 * of the charge groups to sort the charge groups efficiently.
9614 if (ncells_new[XX] != ncells_old[XX] ||
9615 ncells_new[YY] != ncells_old[YY] ||
9616 ncells_new[ZZ] != ncells_old[ZZ])
9623 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9624 gmx_step_str(step, sbuf), dd->ncg_home);
9626 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9627 bResortAll ? -1 : ncg_home_old);
9628 /* Rebuild all the indices */
9629 ga2la_clear(dd->ga2la);
9632 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9635 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9637 /* Setup up the communication and communicate the coordinates */
9638 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9640 /* Set the indices */
9641 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9643 /* Set the charge group boundaries for neighbor searching */
9644 set_cg_boundaries(&comm->zones);
9646 if (fr->cutoff_scheme == ecutsVERLET)
9648 set_zones_size(dd, state_local->box, &ddbox,
9649 bSortCG ? 1 : 0, comm->zones.n);
9652 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9655 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9656 -1,state_local->x,state_local->box);
9659 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9661 /* Extract a local topology from the global topology */
9662 for (i = 0; i < dd->ndim; i++)
9664 np[dd->dim[i]] = comm->cd[i].np;
9666 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9667 comm->cellsize_min, np,
9669 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
9670 vsite, top_global, top_local);
9672 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9674 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9676 /* Set up the special atom communication */
9677 n = comm->nat[ddnatZONE];
9678 for (i = ddnatZONE+1; i < ddnatNR; i++)
9683 if (vsite && vsite->n_intercg_vsite)
9685 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9689 if (dd->bInterCGcons || dd->bInterCGsettles)
9691 /* Only for inter-cg constraints we need special code */
9692 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9693 constr, ir->nProjOrder,
9694 top_local->idef.il);
9698 gmx_incons("Unknown special atom type setup");
9703 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9705 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9707 /* Make space for the extra coordinates for virtual site
9708 * or constraint communication.
9710 state_local->natoms = comm->nat[ddnatNR-1];
9711 if (state_local->natoms > state_local->nalloc)
9713 dd_realloc_state(state_local, f, state_local->natoms);
9716 if (fr->bF_NoVirSum)
9718 if (vsite && vsite->n_intercg_vsite)
9720 nat_f_novirsum = comm->nat[ddnatVSITE];
9724 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9726 nat_f_novirsum = dd->nat_tot;
9730 nat_f_novirsum = dd->nat_home;
9739 /* Set the number of atoms required for the force calculation.
9740 * Forces need to be constrained when using a twin-range setup
9741 * or with energy minimization. For simple simulations we could
9742 * avoid some allocation, zeroing and copying, but this is
9743 * probably not worth the complications ande checking.
9745 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9746 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9748 /* We make the all mdatoms up to nat_tot_con.
9749 * We could save some work by only setting invmass
9750 * between nat_tot and nat_tot_con.
9752 /* This call also sets the new number of home particles to dd->nat_home */
9753 atoms2md(top_global, ir,
9754 comm->nat[ddnatCON], dd->gatindex, dd->nat_home, mdatoms);
9756 /* Now we have the charges we can sort the FE interactions */
9757 dd_sort_local_top(dd, mdatoms, top_local);
9761 /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
9762 split_vsites_over_threads(top_local->idef.il, top_local->idef.iparams,
9763 mdatoms, FALSE, vsite);
9768 /* Make the local shell stuff, currently no communication is done */
9769 make_local_shells(cr, mdatoms, shellfc);
9772 if (ir->implicit_solvent)
9774 make_local_gb(cr, fr->born, ir->gb_algorithm);
9777 setup_bonded_threading(fr, &top_local->idef);
9779 if (!(cr->duty & DUTY_PME))
9781 /* Send the charges and/or c6/sigmas to our PME only node */
9782 gmx_pme_send_parameters(cr,
9784 mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
9785 mdatoms->chargeA, mdatoms->chargeB,
9786 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
9787 mdatoms->sigmaA, mdatoms->sigmaB,
9788 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9793 set_constraints(constr, top_local, ir, mdatoms, cr);
9796 if (ir->ePull != epullNO)
9798 /* Update the local pull groups */
9799 dd_make_local_pull_groups(dd, ir->pull, mdatoms);
9804 /* Update the local rotation groups */
9805 dd_make_local_rotation_groups(dd, ir->rot);
9808 if (ir->eSwapCoords != eswapNO)
9810 /* Update the local groups needed for ion swapping */
9811 dd_make_local_swap_groups(dd, ir->swap);
9814 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
9815 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
9817 add_dd_statistics(dd);
9819 /* Make sure we only count the cycles for this DD partitioning */
9820 clear_dd_cycle_counts(dd);
9822 /* Because the order of the atoms might have changed since
9823 * the last vsite construction, we need to communicate the constructing
9824 * atom coordinates again (for spreading the forces this MD step).
9826 dd_move_x_vsites(dd, state_local->box, state_local->x);
9828 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9830 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9832 dd_move_x(dd, state_local->box, state_local->x);
9833 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9834 -1, state_local->x, state_local->box);
9837 /* Store the partitioning step */
9838 comm->partition_step = step;
9840 /* Increase the DD partitioning counter */
9842 /* The state currently matches this DD partitioning count, store it */
9843 state_local->ddp_count = dd->ddp_count;
9846 /* The DD master node knows the complete cg distribution,
9847 * store the count so we can possibly skip the cg info communication.
9849 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9852 if (comm->DD_debug > 0)
9854 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9855 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9856 "after partitioning");