2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
48 #include "gromacs/utility/smalloc.h"
49 #include "gromacs/utility/fatalerror.h"
51 #include "gromacs/math/vec.h"
53 #include "domdec_network.h"
55 #include "gromacs/pbcutil/pbc.h"
56 #include "chargegroup.h"
65 #include "mtop_util.h"
66 #include "gmx_ga2la.h"
68 #include "nbnxn_search.h"
70 #include "gmx_omp_nthreads.h"
71 #include "gpu_utils.h"
73 #include "gromacs/utility/futil.h"
74 #include "gromacs/fileio/gmxfio.h"
75 #include "gromacs/fileio/pdbio.h"
76 #include "gromacs/imd/imd.h"
77 #include "gromacs/pulling/pull.h"
78 #include "gromacs/pulling/pull_rotation.h"
79 #include "gromacs/swap/swapcoords.h"
80 #include "gromacs/timing/wallcycle.h"
81 #include "gromacs/utility/basenetwork.h"
82 #include "gromacs/utility/gmxmpi.h"
83 #include "gromacs/utility/qsort_threadsafe.h"
85 #define DDRANK(dd, rank) (rank)
86 #define DDMASTERRANK(dd) (dd->masterrank)
88 typedef struct gmx_domdec_master
90 /* The cell boundaries */
92 /* The global charge group division */
93 int *ncg; /* Number of home charge groups for each node */
94 int *index; /* Index of nnodes+1 into cg */
95 int *cg; /* Global charge group index */
96 int *nat; /* Number of home atoms for each node. */
97 int *ibuf; /* Buffer for communication */
98 rvec *vbuf; /* Buffer for state scattering and gathering */
99 } gmx_domdec_master_t;
103 /* The numbers of charge groups to send and receive for each cell
104 * that requires communication, the last entry contains the total
105 * number of atoms that needs to be communicated.
107 int nsend[DD_MAXIZONE+2];
108 int nrecv[DD_MAXIZONE+2];
109 /* The charge groups to send */
112 /* The atom range for non-in-place communication */
113 int cell2at0[DD_MAXIZONE];
114 int cell2at1[DD_MAXIZONE];
119 int np; /* Number of grid pulses in this dimension */
120 int np_dlb; /* For dlb, for use with edlbAUTO */
121 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
123 gmx_bool bInPlace; /* Can we communicate in place? */
124 } gmx_domdec_comm_dim_t;
128 gmx_bool *bCellMin; /* Temp. var.: is this cell size at the limit */
129 real *cell_f; /* State var.: cell boundaries, box relative */
130 real *old_cell_f; /* Temp. var.: old cell size */
131 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
132 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
133 real *bound_min; /* Temp. var.: lower limit for cell boundary */
134 real *bound_max; /* Temp. var.: upper limit for cell boundary */
135 gmx_bool bLimited; /* State var.: is DLB limited in this dim and row */
136 real *buf_ncd; /* Temp. var. */
139 #define DD_NLOAD_MAX 9
141 /* Here floats are accurate enough, since these variables
142 * only influence the load balancing, not the actual MD results.
169 gmx_cgsort_t *sort_new;
181 /* This enum determines the order of the coordinates.
182 * ddnatHOME and ddnatZONE should be first and second,
183 * the others can be ordered as wanted.
186 ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR
190 edlbAUTO, edlbNO, edlbYES, edlbNR
192 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
196 int dim; /* The dimension */
197 gmx_bool dim_match; /* Tells if DD and PME dims match */
198 int nslab; /* The number of PME slabs in this dimension */
199 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
200 int *pp_min; /* The minimum pp node location, size nslab */
201 int *pp_max; /* The maximum pp node location,size nslab */
202 int maxshift; /* The maximum shift for coordinate redistribution in PME */
207 real min0; /* The minimum bottom of this zone */
208 real max1; /* The maximum top of this zone */
209 real min1; /* The minimum top of this zone */
210 real mch0; /* The maximum bottom communicaton height for this zone */
211 real mch1; /* The maximum top communicaton height for this zone */
212 real p1_0; /* The bottom value of the first cell in this zone */
213 real p1_1; /* The top value of the first cell in this zone */
218 gmx_domdec_ind_t ind;
225 } dd_comm_setup_work_t;
227 typedef struct gmx_domdec_comm
229 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
230 * unless stated otherwise.
233 /* The number of decomposition dimensions for PME, 0: no PME */
235 /* The number of nodes doing PME (PP/PME or only PME) */
239 /* The communication setup including the PME only nodes */
240 gmx_bool bCartesianPP_PME;
243 int *pmenodes; /* size npmenodes */
244 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
245 * but with bCartesianPP_PME */
246 gmx_ddpme_t ddpme[2];
248 /* The DD particle-particle nodes only */
249 gmx_bool bCartesianPP;
250 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
252 /* The global charge groups */
255 /* Should we sort the cgs */
257 gmx_domdec_sort_t *sort;
259 /* Are there charge groups? */
262 /* Are there bonded and multi-body interactions between charge groups? */
263 gmx_bool bInterCGBondeds;
264 gmx_bool bInterCGMultiBody;
266 /* Data for the optional bonded interaction atom communication range */
273 /* Are we actually using DLB? */
274 gmx_bool bDynLoadBal;
276 /* Cell sizes for static load balancing, first index cartesian */
279 /* The width of the communicated boundaries */
282 /* The minimum cell size (including triclinic correction) */
284 /* For dlb, for use with edlbAUTO */
285 rvec cellsize_min_dlb;
286 /* The lower limit for the DD cell size with DLB */
288 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
289 gmx_bool bVacDLBNoLimit;
291 /* With PME load balancing we set limits on DLB */
292 gmx_bool bPMELoadBalDLBLimits;
293 /* DLB needs to take into account that we want to allow this maximum
294 * cut-off (for PME load balancing), this could limit cell boundaries.
296 real PMELoadBal_max_cutoff;
298 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
300 /* box0 and box_size are required with dim's without pbc and -gcom */
304 /* The cell boundaries */
308 /* The old location of the cell boundaries, to check cg displacements */
312 /* The communication setup and charge group boundaries for the zones */
313 gmx_domdec_zones_t zones;
315 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
316 * cell boundaries of neighboring cells for dynamic load balancing.
318 gmx_ddzone_t zone_d1[2];
319 gmx_ddzone_t zone_d2[2][2];
321 /* The coordinate/force communication setup and indices */
322 gmx_domdec_comm_dim_t cd[DIM];
323 /* The maximum number of cells to communicate with in one dimension */
326 /* Which cg distribution is stored on the master node */
327 int master_cg_ddp_count;
329 /* The number of cg's received from the direct neighbors */
330 int zone_ncg1[DD_MAXZONE];
332 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
335 /* Array for signalling if atoms have moved to another domain */
339 /* Communication buffer for general use */
343 /* Communication buffer for general use */
346 /* Temporary storage for thread parallel communication setup */
348 dd_comm_setup_work_t *dth;
350 /* Communication buffers only used with multiple grid pulses */
355 /* Communication buffers for local redistribution */
357 int cggl_flag_nalloc[DIM*2];
359 int cgcm_state_nalloc[DIM*2];
361 /* Cell sizes for dynamic load balancing */
362 gmx_domdec_root_t **root;
366 real cell_f_max0[DIM];
367 real cell_f_min1[DIM];
369 /* Stuff for load communication */
370 gmx_bool bRecordLoad;
371 gmx_domdec_load_t *load;
372 int nrank_gpu_shared;
374 MPI_Comm *mpi_comm_load;
375 MPI_Comm mpi_comm_gpu_shared;
378 /* Maximum DLB scaling per load balancing step in percent */
382 float cycl[ddCyclNr];
383 int cycl_n[ddCyclNr];
384 float cycl_max[ddCyclNr];
385 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
389 /* Have often have did we have load measurements */
391 /* Have often have we collected the load measurements */
395 double sum_nat[ddnatNR-ddnatZONE];
405 /* The last partition step */
406 gmx_int64_t partition_step;
414 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
417 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
418 #define DD_FLAG_NRCG 65535
419 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
420 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
422 /* Zone permutation required to obtain consecutive charge groups
423 * for neighbor searching.
425 static const int zone_perm[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
427 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
428 * components see only j zones with that component 0.
431 /* The DD zone order */
432 static const ivec dd_zo[DD_MAXZONE] =
433 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
438 static const ivec dd_zp3[dd_zp3n] = {{0, 0, 8}, {1, 3, 6}, {2, 5, 6}, {3, 5, 7}};
443 static const ivec dd_zp2[dd_zp2n] = {{0, 0, 4}, {1, 3, 4}};
448 static const ivec dd_zp1[dd_zp1n] = {{0, 0, 2}};
450 /* Factors used to avoid problems due to rounding issues */
451 #define DD_CELL_MARGIN 1.0001
452 #define DD_CELL_MARGIN2 1.00005
453 /* Factor to account for pressure scaling during nstlist steps */
454 #define DD_PRES_SCALE_MARGIN 1.02
456 /* Turn on DLB when the load imbalance causes this amount of total loss.
457 * There is a bit of overhead with DLB and it's difficult to achieve
458 * a load imbalance of less than 2% with DLB.
460 #define DD_PERF_LOSS_DLB_ON 0.02
462 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
463 #define DD_PERF_LOSS_WARN 0.05
465 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
467 /* Use separate MPI send and receive commands
468 * when nnodes <= GMX_DD_NNODES_SENDRECV.
469 * This saves memory (and some copying for small nnodes).
470 * For high parallelization scatter and gather calls are used.
472 #define GMX_DD_NNODES_SENDRECV 4
476 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
478 static void index2xyz(ivec nc,int ind,ivec xyz)
480 xyz[XX] = ind % nc[XX];
481 xyz[YY] = (ind / nc[XX]) % nc[YY];
482 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
486 /* This order is required to minimize the coordinate communication in PME
487 * which uses decomposition in the x direction.
489 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
491 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
493 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
494 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
495 xyz[ZZ] = ind % nc[ZZ];
498 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
503 ddindex = dd_index(dd->nc, c);
504 if (dd->comm->bCartesianPP_PME)
506 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
508 else if (dd->comm->bCartesianPP)
511 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
522 static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox, t_inputrec *ir)
524 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
527 int ddglatnr(gmx_domdec_t *dd, int i)
537 if (i >= dd->comm->nat[ddnatNR-1])
539 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
541 atnr = dd->gatindex[i] + 1;
547 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
549 return &dd->comm->cgs_gl;
552 static void vec_rvec_init(vec_rvec_t *v)
558 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
562 v->nalloc = over_alloc_dd(n);
563 srenew(v->v, v->nalloc);
567 void dd_store_state(gmx_domdec_t *dd, t_state *state)
571 if (state->ddp_count != dd->ddp_count)
573 gmx_incons("The state does not the domain decomposition state");
576 state->ncg_gl = dd->ncg_home;
577 if (state->ncg_gl > state->cg_gl_nalloc)
579 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
580 srenew(state->cg_gl, state->cg_gl_nalloc);
582 for (i = 0; i < state->ncg_gl; i++)
584 state->cg_gl[i] = dd->index_gl[i];
587 state->ddp_count_cg_gl = dd->ddp_count;
590 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
592 return &dd->comm->zones;
595 void dd_get_ns_ranges(gmx_domdec_t *dd, int icg,
596 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
598 gmx_domdec_zones_t *zones;
601 zones = &dd->comm->zones;
604 while (icg >= zones->izone[izone].cg1)
613 else if (izone < zones->nizone)
615 *jcg0 = zones->izone[izone].jcg0;
619 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
620 icg, izone, zones->nizone);
623 *jcg1 = zones->izone[izone].jcg1;
625 for (d = 0; d < dd->ndim; d++)
628 shift0[dim] = zones->izone[izone].shift0[dim];
629 shift1[dim] = zones->izone[izone].shift1[dim];
630 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
632 /* A conservative approach, this can be optimized */
639 int dd_natoms_vsite(gmx_domdec_t *dd)
641 return dd->comm->nat[ddnatVSITE];
644 void dd_get_constraint_range(gmx_domdec_t *dd, int *at_start, int *at_end)
646 *at_start = dd->comm->nat[ddnatCON-1];
647 *at_end = dd->comm->nat[ddnatCON];
650 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
652 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
653 int *index, *cgindex;
654 gmx_domdec_comm_t *comm;
655 gmx_domdec_comm_dim_t *cd;
656 gmx_domdec_ind_t *ind;
657 rvec shift = {0, 0, 0}, *buf, *rbuf;
658 gmx_bool bPBC, bScrew;
662 cgindex = dd->cgindex;
667 nat_tot = dd->nat_home;
668 for (d = 0; d < dd->ndim; d++)
670 bPBC = (dd->ci[dd->dim[d]] == 0);
671 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
674 copy_rvec(box[dd->dim[d]], shift);
677 for (p = 0; p < cd->np; p++)
684 for (i = 0; i < ind->nsend[nzone]; i++)
686 at0 = cgindex[index[i]];
687 at1 = cgindex[index[i]+1];
688 for (j = at0; j < at1; j++)
690 copy_rvec(x[j], buf[n]);
697 for (i = 0; i < ind->nsend[nzone]; i++)
699 at0 = cgindex[index[i]];
700 at1 = cgindex[index[i]+1];
701 for (j = at0; j < at1; j++)
703 /* We need to shift the coordinates */
704 rvec_add(x[j], shift, buf[n]);
711 for (i = 0; i < ind->nsend[nzone]; i++)
713 at0 = cgindex[index[i]];
714 at1 = cgindex[index[i]+1];
715 for (j = at0; j < at1; j++)
718 buf[n][XX] = x[j][XX] + shift[XX];
720 * This operation requires a special shift force
721 * treatment, which is performed in calc_vir.
723 buf[n][YY] = box[YY][YY] - x[j][YY];
724 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
736 rbuf = comm->vbuf2.v;
738 /* Send and receive the coordinates */
739 dd_sendrecv_rvec(dd, d, dddirBackward,
740 buf, ind->nsend[nzone+1],
741 rbuf, ind->nrecv[nzone+1]);
745 for (zone = 0; zone < nzone; zone++)
747 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
749 copy_rvec(rbuf[j], x[i]);
754 nat_tot += ind->nrecv[nzone+1];
760 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
762 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
763 int *index, *cgindex;
764 gmx_domdec_comm_t *comm;
765 gmx_domdec_comm_dim_t *cd;
766 gmx_domdec_ind_t *ind;
770 gmx_bool bPBC, bScrew;
774 cgindex = dd->cgindex;
779 nzone = comm->zones.n/2;
780 nat_tot = dd->nat_tot;
781 for (d = dd->ndim-1; d >= 0; d--)
783 bPBC = (dd->ci[dd->dim[d]] == 0);
784 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
785 if (fshift == NULL && !bScrew)
789 /* Determine which shift vector we need */
795 for (p = cd->np-1; p >= 0; p--)
798 nat_tot -= ind->nrecv[nzone+1];
805 sbuf = comm->vbuf2.v;
807 for (zone = 0; zone < nzone; zone++)
809 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
811 copy_rvec(f[i], sbuf[j]);
816 /* Communicate the forces */
817 dd_sendrecv_rvec(dd, d, dddirForward,
818 sbuf, ind->nrecv[nzone+1],
819 buf, ind->nsend[nzone+1]);
821 /* Add the received forces */
825 for (i = 0; i < ind->nsend[nzone]; i++)
827 at0 = cgindex[index[i]];
828 at1 = cgindex[index[i]+1];
829 for (j = at0; j < at1; j++)
831 rvec_inc(f[j], buf[n]);
838 for (i = 0; i < ind->nsend[nzone]; i++)
840 at0 = cgindex[index[i]];
841 at1 = cgindex[index[i]+1];
842 for (j = at0; j < at1; j++)
844 rvec_inc(f[j], buf[n]);
845 /* Add this force to the shift force */
846 rvec_inc(fshift[is], buf[n]);
853 for (i = 0; i < ind->nsend[nzone]; i++)
855 at0 = cgindex[index[i]];
856 at1 = cgindex[index[i]+1];
857 for (j = at0; j < at1; j++)
859 /* Rotate the force */
860 f[j][XX] += buf[n][XX];
861 f[j][YY] -= buf[n][YY];
862 f[j][ZZ] -= buf[n][ZZ];
865 /* Add this force to the shift force */
866 rvec_inc(fshift[is], buf[n]);
877 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
879 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
880 int *index, *cgindex;
881 gmx_domdec_comm_t *comm;
882 gmx_domdec_comm_dim_t *cd;
883 gmx_domdec_ind_t *ind;
888 cgindex = dd->cgindex;
890 buf = &comm->vbuf.v[0][0];
893 nat_tot = dd->nat_home;
894 for (d = 0; d < dd->ndim; d++)
897 for (p = 0; p < cd->np; p++)
902 for (i = 0; i < ind->nsend[nzone]; i++)
904 at0 = cgindex[index[i]];
905 at1 = cgindex[index[i]+1];
906 for (j = at0; j < at1; j++)
919 rbuf = &comm->vbuf2.v[0][0];
921 /* Send and receive the coordinates */
922 dd_sendrecv_real(dd, d, dddirBackward,
923 buf, ind->nsend[nzone+1],
924 rbuf, ind->nrecv[nzone+1]);
928 for (zone = 0; zone < nzone; zone++)
930 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
937 nat_tot += ind->nrecv[nzone+1];
943 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
945 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
946 int *index, *cgindex;
947 gmx_domdec_comm_t *comm;
948 gmx_domdec_comm_dim_t *cd;
949 gmx_domdec_ind_t *ind;
954 cgindex = dd->cgindex;
956 buf = &comm->vbuf.v[0][0];
959 nzone = comm->zones.n/2;
960 nat_tot = dd->nat_tot;
961 for (d = dd->ndim-1; d >= 0; d--)
964 for (p = cd->np-1; p >= 0; p--)
967 nat_tot -= ind->nrecv[nzone+1];
974 sbuf = &comm->vbuf2.v[0][0];
976 for (zone = 0; zone < nzone; zone++)
978 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
985 /* Communicate the forces */
986 dd_sendrecv_real(dd, d, dddirForward,
987 sbuf, ind->nrecv[nzone+1],
988 buf, ind->nsend[nzone+1]);
990 /* Add the received forces */
992 for (i = 0; i < ind->nsend[nzone]; i++)
994 at0 = cgindex[index[i]];
995 at1 = cgindex[index[i]+1];
996 for (j = at0; j < at1; j++)
1007 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
1009 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
1011 zone->min0, zone->max1,
1012 zone->mch0, zone->mch0,
1013 zone->p1_0, zone->p1_1);
1017 #define DDZONECOMM_MAXZONE 5
1018 #define DDZONECOMM_BUFSIZE 3
1020 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
1021 int ddimind, int direction,
1022 gmx_ddzone_t *buf_s, int n_s,
1023 gmx_ddzone_t *buf_r, int n_r)
1025 #define ZBS DDZONECOMM_BUFSIZE
1026 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
1027 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
1030 for (i = 0; i < n_s; i++)
1032 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
1033 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
1034 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
1035 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
1036 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
1037 vbuf_s[i*ZBS+1][2] = 0;
1038 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
1039 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
1040 vbuf_s[i*ZBS+2][2] = 0;
1043 dd_sendrecv_rvec(dd, ddimind, direction,
1047 for (i = 0; i < n_r; i++)
1049 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
1050 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
1051 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
1052 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
1053 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
1054 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
1055 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
1061 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
1062 rvec cell_ns_x0, rvec cell_ns_x1)
1064 int d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
1066 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
1067 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
1068 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
1069 rvec extr_s[2], extr_r[2];
1071 real dist_d, c = 0, det;
1072 gmx_domdec_comm_t *comm;
1073 gmx_bool bPBC, bUse;
1077 for (d = 1; d < dd->ndim; d++)
1080 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
1081 zp->min0 = cell_ns_x0[dim];
1082 zp->max1 = cell_ns_x1[dim];
1083 zp->min1 = cell_ns_x1[dim];
1084 zp->mch0 = cell_ns_x0[dim];
1085 zp->mch1 = cell_ns_x1[dim];
1086 zp->p1_0 = cell_ns_x0[dim];
1087 zp->p1_1 = cell_ns_x1[dim];
1090 for (d = dd->ndim-2; d >= 0; d--)
1093 bPBC = (dim < ddbox->npbcdim);
1095 /* Use an rvec to store two reals */
1096 extr_s[d][0] = comm->cell_f0[d+1];
1097 extr_s[d][1] = comm->cell_f1[d+1];
1098 extr_s[d][2] = comm->cell_f1[d+1];
1101 /* Store the extremes in the backward sending buffer,
1102 * so the get updated separately from the forward communication.
1104 for (d1 = d; d1 < dd->ndim-1; d1++)
1106 /* We invert the order to be able to use the same loop for buf_e */
1107 buf_s[pos].min0 = extr_s[d1][1];
1108 buf_s[pos].max1 = extr_s[d1][0];
1109 buf_s[pos].min1 = extr_s[d1][2];
1110 buf_s[pos].mch0 = 0;
1111 buf_s[pos].mch1 = 0;
1112 /* Store the cell corner of the dimension we communicate along */
1113 buf_s[pos].p1_0 = comm->cell_x0[dim];
1114 buf_s[pos].p1_1 = 0;
1118 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1121 if (dd->ndim == 3 && d == 0)
1123 buf_s[pos] = comm->zone_d2[0][1];
1125 buf_s[pos] = comm->zone_d1[0];
1129 /* We only need to communicate the extremes
1130 * in the forward direction
1132 npulse = comm->cd[d].np;
1135 /* Take the minimum to avoid double communication */
1136 npulse_min = min(npulse, dd->nc[dim]-1-npulse);
1140 /* Without PBC we should really not communicate over
1141 * the boundaries, but implementing that complicates
1142 * the communication setup and therefore we simply
1143 * do all communication, but ignore some data.
1145 npulse_min = npulse;
1147 for (p = 0; p < npulse_min; p++)
1149 /* Communicate the extremes forward */
1150 bUse = (bPBC || dd->ci[dim] > 0);
1152 dd_sendrecv_rvec(dd, d, dddirForward,
1153 extr_s+d, dd->ndim-d-1,
1154 extr_r+d, dd->ndim-d-1);
1158 for (d1 = d; d1 < dd->ndim-1; d1++)
1160 extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]);
1161 extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]);
1162 extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]);
1168 for (p = 0; p < npulse; p++)
1170 /* Communicate all the zone information backward */
1171 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1173 dd_sendrecv_ddzone(dd, d, dddirBackward,
1180 for (d1 = d+1; d1 < dd->ndim; d1++)
1182 /* Determine the decrease of maximum required
1183 * communication height along d1 due to the distance along d,
1184 * this avoids a lot of useless atom communication.
1186 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1188 if (ddbox->tric_dir[dim])
1190 /* c is the off-diagonal coupling between the cell planes
1191 * along directions d and d1.
1193 c = ddbox->v[dim][dd->dim[d1]][dim];
1199 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1202 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1206 /* A negative value signals out of range */
1212 /* Accumulate the extremes over all pulses */
1213 for (i = 0; i < buf_size; i++)
1217 buf_e[i] = buf_r[i];
1223 buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0);
1224 buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1);
1225 buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1);
1228 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1236 if (bUse && dh[d1] >= 0)
1238 buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
1239 buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
1242 /* Copy the received buffer to the send buffer,
1243 * to pass the data through with the next pulse.
1245 buf_s[i] = buf_r[i];
1247 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1248 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1250 /* Store the extremes */
1253 for (d1 = d; d1 < dd->ndim-1; d1++)
1255 extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0);
1256 extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1);
1257 extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1);
1261 if (d == 1 || (d == 0 && dd->ndim == 3))
1263 for (i = d; i < 2; i++)
1265 comm->zone_d2[1-d][i] = buf_e[pos];
1271 comm->zone_d1[1] = buf_e[pos];
1281 for (i = 0; i < 2; i++)
1285 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1287 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1288 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1294 for (i = 0; i < 2; i++)
1296 for (j = 0; j < 2; j++)
1300 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1302 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1303 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1307 for (d = 1; d < dd->ndim; d++)
1309 comm->cell_f_max0[d] = extr_s[d-1][0];
1310 comm->cell_f_min1[d] = extr_s[d-1][1];
1313 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1314 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1319 static void dd_collect_cg(gmx_domdec_t *dd,
1320 t_state *state_local)
1322 gmx_domdec_master_t *ma = NULL;
1323 int buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
1326 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1328 /* The master has the correct distribution */
1332 if (state_local->ddp_count == dd->ddp_count)
1334 ncg_home = dd->ncg_home;
1336 nat_home = dd->nat_home;
1338 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1340 cgs_gl = &dd->comm->cgs_gl;
1342 ncg_home = state_local->ncg_gl;
1343 cg = state_local->cg_gl;
1345 for (i = 0; i < ncg_home; i++)
1347 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1352 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1355 buf2[0] = dd->ncg_home;
1356 buf2[1] = dd->nat_home;
1366 /* Collect the charge group and atom counts on the master */
1367 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1372 for (i = 0; i < dd->nnodes; i++)
1374 ma->ncg[i] = ma->ibuf[2*i];
1375 ma->nat[i] = ma->ibuf[2*i+1];
1376 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1379 /* Make byte counts and indices */
1380 for (i = 0; i < dd->nnodes; i++)
1382 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1383 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1387 fprintf(debug, "Initial charge group distribution: ");
1388 for (i = 0; i < dd->nnodes; i++)
1390 fprintf(debug, " %d", ma->ncg[i]);
1392 fprintf(debug, "\n");
1396 /* Collect the charge group indices on the master */
1398 dd->ncg_home*sizeof(int), dd->index_gl,
1399 DDMASTER(dd) ? ma->ibuf : NULL,
1400 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1401 DDMASTER(dd) ? ma->cg : NULL);
1403 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1406 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1409 gmx_domdec_master_t *ma;
1410 int n, i, c, a, nalloc = 0;
1419 MPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1420 dd->rank, dd->mpi_comm_all);
1425 /* Copy the master coordinates to the global array */
1426 cgs_gl = &dd->comm->cgs_gl;
1428 n = DDMASTERRANK(dd);
1430 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1432 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1434 copy_rvec(lv[a++], v[c]);
1438 for (n = 0; n < dd->nnodes; n++)
1442 if (ma->nat[n] > nalloc)
1444 nalloc = over_alloc_dd(ma->nat[n]);
1445 srenew(buf, nalloc);
1448 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1449 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1452 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1454 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1456 copy_rvec(buf[a++], v[c]);
1465 static void get_commbuffer_counts(gmx_domdec_t *dd,
1466 int **counts, int **disps)
1468 gmx_domdec_master_t *ma;
1473 /* Make the rvec count and displacment arrays */
1475 *disps = ma->ibuf + dd->nnodes;
1476 for (n = 0; n < dd->nnodes; n++)
1478 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1479 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1483 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1486 gmx_domdec_master_t *ma;
1487 int *rcounts = NULL, *disps = NULL;
1496 get_commbuffer_counts(dd, &rcounts, &disps);
1501 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv, rcounts, disps, buf);
1505 cgs_gl = &dd->comm->cgs_gl;
1508 for (n = 0; n < dd->nnodes; n++)
1510 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1512 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1514 copy_rvec(buf[a++], v[c]);
1521 void dd_collect_vec(gmx_domdec_t *dd,
1522 t_state *state_local, rvec *lv, rvec *v)
1524 gmx_domdec_master_t *ma;
1525 int n, i, c, a, nalloc = 0;
1528 dd_collect_cg(dd, state_local);
1530 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1532 dd_collect_vec_sendrecv(dd, lv, v);
1536 dd_collect_vec_gatherv(dd, lv, v);
1541 void dd_collect_state(gmx_domdec_t *dd,
1542 t_state *state_local, t_state *state)
1546 nh = state->nhchainlength;
1550 for (i = 0; i < efptNR; i++)
1552 state->lambda[i] = state_local->lambda[i];
1554 state->fep_state = state_local->fep_state;
1555 state->veta = state_local->veta;
1556 state->vol0 = state_local->vol0;
1557 copy_mat(state_local->box, state->box);
1558 copy_mat(state_local->boxv, state->boxv);
1559 copy_mat(state_local->svir_prev, state->svir_prev);
1560 copy_mat(state_local->fvir_prev, state->fvir_prev);
1561 copy_mat(state_local->pres_prev, state->pres_prev);
1563 for (i = 0; i < state_local->ngtc; i++)
1565 for (j = 0; j < nh; j++)
1567 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1568 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1570 state->therm_integral[i] = state_local->therm_integral[i];
1572 for (i = 0; i < state_local->nnhpres; i++)
1574 for (j = 0; j < nh; j++)
1576 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1577 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1581 for (est = 0; est < estNR; est++)
1583 if (EST_DISTR(est) && (state_local->flags & (1<<est)))
1588 dd_collect_vec(dd, state_local, state_local->x, state->x);
1591 dd_collect_vec(dd, state_local, state_local->v, state->v);
1594 dd_collect_vec(dd, state_local, state_local->sd_X, state->sd_X);
1597 dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
1599 case estDISRE_INITF:
1600 case estDISRE_RM3TAV:
1601 case estORIRE_INITF:
1605 gmx_incons("Unknown state entry encountered in dd_collect_state");
1611 static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
1617 fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
1620 state->nalloc = over_alloc_dd(nalloc);
1622 for (est = 0; est < estNR; est++)
1624 if (EST_DISTR(est) && (state->flags & (1<<est)))
1629 srenew(state->x, state->nalloc);
1632 srenew(state->v, state->nalloc);
1635 srenew(state->sd_X, state->nalloc);
1638 srenew(state->cg_p, state->nalloc);
1640 case estDISRE_INITF:
1641 case estDISRE_RM3TAV:
1642 case estORIRE_INITF:
1644 /* No reallocation required */
1647 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1654 srenew(*f, state->nalloc);
1658 static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
1661 if (nalloc > fr->cg_nalloc)
1665 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
1667 fr->cg_nalloc = over_alloc_dd(nalloc);
1668 srenew(fr->cginfo, fr->cg_nalloc);
1669 if (fr->cutoff_scheme == ecutsGROUP)
1671 srenew(fr->cg_cm, fr->cg_nalloc);
1674 if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
1676 /* We don't use charge groups, we use x in state to set up
1677 * the atom communication.
1679 dd_realloc_state(state, f, nalloc);
1683 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1686 gmx_domdec_master_t *ma;
1687 int n, i, c, a, nalloc = 0;
1694 for (n = 0; n < dd->nnodes; n++)
1698 if (ma->nat[n] > nalloc)
1700 nalloc = over_alloc_dd(ma->nat[n]);
1701 srenew(buf, nalloc);
1703 /* Use lv as a temporary buffer */
1705 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1707 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1709 copy_rvec(v[c], buf[a++]);
1712 if (a != ma->nat[n])
1714 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1719 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1720 DDRANK(dd, n), n, dd->mpi_comm_all);
1725 n = DDMASTERRANK(dd);
1727 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1729 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1731 copy_rvec(v[c], lv[a++]);
1738 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1739 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1744 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1747 gmx_domdec_master_t *ma;
1748 int *scounts = NULL, *disps = NULL;
1749 int n, i, c, a, nalloc = 0;
1756 get_commbuffer_counts(dd, &scounts, &disps);
1760 for (n = 0; n < dd->nnodes; n++)
1762 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1764 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1766 copy_rvec(v[c], buf[a++]);
1772 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1775 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
1777 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1779 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1783 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1787 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1790 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1791 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1792 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1794 if (dfhist->nlambda > 0)
1796 int nlam = dfhist->nlambda;
1797 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1798 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1799 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1800 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1801 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1802 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1804 for (i = 0; i < nlam; i++)
1806 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1807 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1808 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1809 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1810 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1811 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1816 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1817 t_state *state, t_state *state_local,
1822 nh = state->nhchainlength;
1826 for (i = 0; i < efptNR; i++)
1828 state_local->lambda[i] = state->lambda[i];
1830 state_local->fep_state = state->fep_state;
1831 state_local->veta = state->veta;
1832 state_local->vol0 = state->vol0;
1833 copy_mat(state->box, state_local->box);
1834 copy_mat(state->box_rel, state_local->box_rel);
1835 copy_mat(state->boxv, state_local->boxv);
1836 copy_mat(state->svir_prev, state_local->svir_prev);
1837 copy_mat(state->fvir_prev, state_local->fvir_prev);
1838 copy_df_history(&state_local->dfhist, &state->dfhist);
1839 for (i = 0; i < state_local->ngtc; i++)
1841 for (j = 0; j < nh; j++)
1843 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1844 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1846 state_local->therm_integral[i] = state->therm_integral[i];
1848 for (i = 0; i < state_local->nnhpres; i++)
1850 for (j = 0; j < nh; j++)
1852 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1853 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1857 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
1858 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1859 dd_bcast(dd, sizeof(real), &state_local->veta);
1860 dd_bcast(dd, sizeof(real), &state_local->vol0);
1861 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1862 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1863 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1864 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1865 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1866 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
1867 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
1868 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
1869 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
1870 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
1872 /* communicate df_history -- required for restarting from checkpoint */
1873 dd_distribute_dfhist(dd, &state_local->dfhist);
1875 if (dd->nat_home > state_local->nalloc)
1877 dd_realloc_state(state_local, f, dd->nat_home);
1879 for (i = 0; i < estNR; i++)
1881 if (EST_DISTR(i) && (state_local->flags & (1<<i)))
1886 dd_distribute_vec(dd, cgs, state->x, state_local->x);
1889 dd_distribute_vec(dd, cgs, state->v, state_local->v);
1892 dd_distribute_vec(dd, cgs, state->sd_X, state_local->sd_X);
1895 dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
1897 case estDISRE_INITF:
1898 case estDISRE_RM3TAV:
1899 case estORIRE_INITF:
1901 /* Not implemented yet */
1904 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1910 static char dim2char(int dim)
1916 case XX: c = 'X'; break;
1917 case YY: c = 'Y'; break;
1918 case ZZ: c = 'Z'; break;
1919 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1925 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1926 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1928 rvec grid_s[2], *grid_r = NULL, cx, r;
1929 char fname[STRLEN], format[STRLEN], buf[22];
1931 int a, i, d, z, y, x;
1935 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1936 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1940 snew(grid_r, 2*dd->nnodes);
1943 dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : NULL);
1947 for (d = 0; d < DIM; d++)
1949 for (i = 0; i < DIM; i++)
1957 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1959 tric[d][i] = box[i][d]/box[i][i];
1968 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1969 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
1970 out = gmx_fio_fopen(fname, "w");
1971 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1973 for (i = 0; i < dd->nnodes; i++)
1975 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1976 for (d = 0; d < DIM; d++)
1978 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1980 for (z = 0; z < 2; z++)
1982 for (y = 0; y < 2; y++)
1984 for (x = 0; x < 2; x++)
1986 cx[XX] = grid_r[i*2+x][XX];
1987 cx[YY] = grid_r[i*2+y][YY];
1988 cx[ZZ] = grid_r[i*2+z][ZZ];
1990 fprintf(out, format, "ATOM", a++, "CA", "GLY", ' ', 1+i,
1991 ' ', 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol);
1995 for (d = 0; d < DIM; d++)
1997 for (x = 0; x < 4; x++)
2001 case 0: y = 1 + i*8 + 2*x; break;
2002 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
2003 case 2: y = 1 + i*8 + x; break;
2005 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
2009 gmx_fio_fclose(out);
2014 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
2015 gmx_mtop_t *mtop, t_commrec *cr,
2016 int natoms, rvec x[], matrix box)
2018 char fname[STRLEN], format[STRLEN], format4[STRLEN], buf[22];
2020 int i, ii, resnr, c;
2021 char *atomname, *resname;
2028 natoms = dd->comm->nat[ddnatVSITE];
2031 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
2033 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
2034 sprintf(format4, "%s%s\n", get_pdbformat4(), "%6.2f%6.2f");
2036 out = gmx_fio_fopen(fname, "w");
2038 fprintf(out, "TITLE %s\n", title);
2039 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2040 for (i = 0; i < natoms; i++)
2042 ii = dd->gatindex[i];
2043 gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
2044 if (i < dd->comm->nat[ddnatZONE])
2047 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
2053 else if (i < dd->comm->nat[ddnatVSITE])
2055 b = dd->comm->zones.n;
2059 b = dd->comm->zones.n + 1;
2061 fprintf(out, strlen(atomname) < 4 ? format : format4,
2062 "ATOM", (ii+1)%100000,
2063 atomname, resname, ' ', resnr%10000, ' ',
2064 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b);
2066 fprintf(out, "TER\n");
2068 gmx_fio_fclose(out);
2071 real dd_cutoff_mbody(gmx_domdec_t *dd)
2073 gmx_domdec_comm_t *comm;
2080 if (comm->bInterCGBondeds)
2082 if (comm->cutoff_mbody > 0)
2084 r = comm->cutoff_mbody;
2088 /* cutoff_mbody=0 means we do not have DLB */
2089 r = comm->cellsize_min[dd->dim[0]];
2090 for (di = 1; di < dd->ndim; di++)
2092 r = min(r, comm->cellsize_min[dd->dim[di]]);
2094 if (comm->bBondComm)
2096 r = max(r, comm->cutoff_mbody);
2100 r = min(r, comm->cutoff);
2108 real dd_cutoff_twobody(gmx_domdec_t *dd)
2112 r_mb = dd_cutoff_mbody(dd);
2114 return max(dd->comm->cutoff, r_mb);
2118 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd, ivec coord, ivec coord_pme)
2122 nc = dd->nc[dd->comm->cartpmedim];
2123 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2124 copy_ivec(coord, coord_pme);
2125 coord_pme[dd->comm->cartpmedim] =
2126 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2129 static int low_ddindex2pmeindex(int ndd, int npme, int ddindex)
2131 /* Here we assign a PME node to communicate with this DD node
2132 * by assuming that the major index of both is x.
2133 * We add cr->npmenodes/2 to obtain an even distribution.
2135 return (ddindex*npme + npme/2)/ndd;
2138 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
2140 return low_ddindex2pmeindex(dd->nnodes, dd->comm->npmenodes, ddindex);
2143 static int cr_ddindex2pmeindex(const t_commrec *cr, int ddindex)
2145 return low_ddindex2pmeindex(cr->dd->nnodes, cr->npmenodes, ddindex);
2148 static int *dd_pmenodes(t_commrec *cr)
2153 snew(pmenodes, cr->npmenodes);
2155 for (i = 0; i < cr->dd->nnodes; i++)
2157 p0 = cr_ddindex2pmeindex(cr, i);
2158 p1 = cr_ddindex2pmeindex(cr, i+1);
2159 if (i+1 == cr->dd->nnodes || p1 > p0)
2163 fprintf(debug, "pmenode[%d] = %d\n", n, i+1+n);
2165 pmenodes[n] = i + 1 + n;
2173 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
2176 ivec coords, coords_pme, nc;
2181 if (dd->comm->bCartesian) {
2182 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2183 dd_coords2pmecoords(dd,coords,coords_pme);
2184 copy_ivec(dd->ntot,nc);
2185 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2186 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2188 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2190 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2196 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
2201 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
2203 gmx_domdec_comm_t *comm;
2205 int ddindex, nodeid = -1;
2207 comm = cr->dd->comm;
2212 if (comm->bCartesianPP_PME)
2215 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
2220 ddindex = dd_index(cr->dd->nc, coords);
2221 if (comm->bCartesianPP)
2223 nodeid = comm->ddindex2simnodeid[ddindex];
2229 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
2241 static int dd_simnode2pmenode(t_commrec *cr, int sim_nodeid)
2244 gmx_domdec_comm_t *comm;
2245 ivec coord, coord_pme;
2252 /* This assumes a uniform x domain decomposition grid cell size */
2253 if (comm->bCartesianPP_PME)
2256 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
2257 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2259 /* This is a PP node */
2260 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2261 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
2265 else if (comm->bCartesianPP)
2267 if (sim_nodeid < dd->nnodes)
2269 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2274 /* This assumes DD cells with identical x coordinates
2275 * are numbered sequentially.
2277 if (dd->comm->pmenodes == NULL)
2279 if (sim_nodeid < dd->nnodes)
2281 /* The DD index equals the nodeid */
2282 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2288 while (sim_nodeid > dd->comm->pmenodes[i])
2292 if (sim_nodeid < dd->comm->pmenodes[i])
2294 pmenode = dd->comm->pmenodes[i];
2302 void get_pme_nnodes(const gmx_domdec_t *dd,
2303 int *npmenodes_x, int *npmenodes_y)
2307 *npmenodes_x = dd->comm->npmenodes_x;
2308 *npmenodes_y = dd->comm->npmenodes_y;
2317 gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
2319 gmx_bool bPMEOnlyNode;
2321 if (DOMAINDECOMP(cr))
2323 bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
2327 bPMEOnlyNode = FALSE;
2330 return bPMEOnlyNode;
2333 void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
2334 int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
2338 ivec coord, coord_pme;
2342 snew(*my_ddnodes, (dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2345 for (x = 0; x < dd->nc[XX]; x++)
2347 for (y = 0; y < dd->nc[YY]; y++)
2349 for (z = 0; z < dd->nc[ZZ]; z++)
2351 if (dd->comm->bCartesianPP_PME)
2356 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2357 if (dd->ci[XX] == coord_pme[XX] &&
2358 dd->ci[YY] == coord_pme[YY] &&
2359 dd->ci[ZZ] == coord_pme[ZZ])
2361 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2366 /* The slab corresponds to the nodeid in the PME group */
2367 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2369 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2376 /* The last PP-only node is the peer node */
2377 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2381 fprintf(debug, "Receive coordinates from PP nodes:");
2382 for (x = 0; x < *nmy_ddnodes; x++)
2384 fprintf(debug, " %d", (*my_ddnodes)[x]);
2386 fprintf(debug, "\n");
2390 static gmx_bool receive_vir_ener(t_commrec *cr)
2392 gmx_domdec_comm_t *comm;
2393 int pmenode, coords[DIM], rank;
2397 if (cr->npmenodes < cr->dd->nnodes)
2399 comm = cr->dd->comm;
2400 if (comm->bCartesianPP_PME)
2402 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2404 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2405 coords[comm->cartpmedim]++;
2406 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2408 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2409 if (dd_simnode2pmenode(cr, rank) == pmenode)
2411 /* This is not the last PP node for pmenode */
2419 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2420 if (cr->sim_nodeid+1 < cr->nnodes &&
2421 dd_simnode2pmenode(cr, cr->sim_nodeid+1) == pmenode)
2423 /* This is not the last PP node for pmenode */
2432 static void set_zones_ncg_home(gmx_domdec_t *dd)
2434 gmx_domdec_zones_t *zones;
2437 zones = &dd->comm->zones;
2439 zones->cg_range[0] = 0;
2440 for (i = 1; i < zones->n+1; i++)
2442 zones->cg_range[i] = dd->ncg_home;
2444 /* zone_ncg1[0] should always be equal to ncg_home */
2445 dd->comm->zone_ncg1[0] = dd->ncg_home;
2448 static void rebuild_cgindex(gmx_domdec_t *dd,
2449 const int *gcgs_index, t_state *state)
2451 int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
2454 dd_cg_gl = dd->index_gl;
2455 cgindex = dd->cgindex;
2458 for (i = 0; i < state->ncg_gl; i++)
2462 dd_cg_gl[i] = cg_gl;
2463 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2467 dd->ncg_home = state->ncg_gl;
2470 set_zones_ncg_home(dd);
2473 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2475 while (cg >= cginfo_mb->cg_end)
2480 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2483 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2484 t_forcerec *fr, char *bLocalCG)
2486 cginfo_mb_t *cginfo_mb;
2492 cginfo_mb = fr->cginfo_mb;
2493 cginfo = fr->cginfo;
2495 for (cg = cg0; cg < cg1; cg++)
2497 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2501 if (bLocalCG != NULL)
2503 for (cg = cg0; cg < cg1; cg++)
2505 bLocalCG[index_gl[cg]] = TRUE;
2510 static void make_dd_indices(gmx_domdec_t *dd,
2511 const int *gcgs_index, int cg_start)
2513 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2514 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2519 bLocalCG = dd->comm->bLocalCG;
2521 if (dd->nat_tot > dd->gatindex_nalloc)
2523 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2524 srenew(dd->gatindex, dd->gatindex_nalloc);
2527 nzone = dd->comm->zones.n;
2528 zone2cg = dd->comm->zones.cg_range;
2529 zone_ncg1 = dd->comm->zone_ncg1;
2530 index_gl = dd->index_gl;
2531 gatindex = dd->gatindex;
2532 bCGs = dd->comm->bCGs;
2534 if (zone2cg[1] != dd->ncg_home)
2536 gmx_incons("dd->ncg_zone is not up to date");
2539 /* Make the local to global and global to local atom index */
2540 a = dd->cgindex[cg_start];
2541 for (zone = 0; zone < nzone; zone++)
2549 cg0 = zone2cg[zone];
2551 cg1 = zone2cg[zone+1];
2552 cg1_p1 = cg0 + zone_ncg1[zone];
2554 for (cg = cg0; cg < cg1; cg++)
2559 /* Signal that this cg is from more than one pulse away */
2562 cg_gl = index_gl[cg];
2565 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2568 ga2la_set(dd->ga2la, a_gl, a, zone1);
2574 gatindex[a] = cg_gl;
2575 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2582 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2585 int ncg, i, ngl, nerr;
2588 if (bLocalCG == NULL)
2592 for (i = 0; i < dd->ncg_tot; i++)
2594 if (!bLocalCG[dd->index_gl[i]])
2597 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2602 for (i = 0; i < ncg_sys; i++)
2609 if (ngl != dd->ncg_tot)
2611 fprintf(stderr, "DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2618 static void check_index_consistency(gmx_domdec_t *dd,
2619 int natoms_sys, int ncg_sys,
2622 int nerr, ngl, i, a, cell;
2627 if (dd->comm->DD_debug > 1)
2629 snew(have, natoms_sys);
2630 for (a = 0; a < dd->nat_tot; a++)
2632 if (have[dd->gatindex[a]] > 0)
2634 fprintf(stderr, "DD node %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2638 have[dd->gatindex[a]] = a + 1;
2644 snew(have, dd->nat_tot);
2647 for (i = 0; i < natoms_sys; i++)
2649 if (ga2la_get(dd->ga2la, i, &a, &cell))
2651 if (a >= dd->nat_tot)
2653 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2659 if (dd->gatindex[a] != i)
2661 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2668 if (ngl != dd->nat_tot)
2671 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2672 dd->rank, where, ngl, dd->nat_tot);
2674 for (a = 0; a < dd->nat_tot; a++)
2679 "DD node %d, %s: local atom %d, global %d has no global index\n",
2680 dd->rank, where, a+1, dd->gatindex[a]+1);
2685 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2689 gmx_fatal(FARGS, "DD node %d, %s: %d atom/cg index inconsistencies",
2690 dd->rank, where, nerr);
2694 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2701 /* Clear the whole list without searching */
2702 ga2la_clear(dd->ga2la);
2706 for (i = a_start; i < dd->nat_tot; i++)
2708 ga2la_del(dd->ga2la, dd->gatindex[i]);
2712 bLocalCG = dd->comm->bLocalCG;
2715 for (i = cg_start; i < dd->ncg_tot; i++)
2717 bLocalCG[dd->index_gl[i]] = FALSE;
2721 dd_clear_local_vsite_indices(dd);
2723 if (dd->constraints)
2725 dd_clear_local_constraint_indices(dd);
2729 /* This function should be used for moving the domain boudaries during DLB,
2730 * for obtaining the minimum cell size. It checks the initially set limit
2731 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2732 * and, possibly, a longer cut-off limit set for PME load balancing.
2734 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2738 cellsize_min = comm->cellsize_min[dim];
2740 if (!comm->bVacDLBNoLimit)
2742 /* The cut-off might have changed, e.g. by PME load balacning,
2743 * from the value used to set comm->cellsize_min, so check it.
2745 cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2747 if (comm->bPMELoadBalDLBLimits)
2749 /* Check for the cut-off limit set by the PME load balancing */
2750 cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2754 return cellsize_min;
2757 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2760 real grid_jump_limit;
2762 /* The distance between the boundaries of cells at distance
2763 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2764 * and by the fact that cells should not be shifted by more than
2765 * half their size, such that cg's only shift by one cell
2766 * at redecomposition.
2768 grid_jump_limit = comm->cellsize_limit;
2769 if (!comm->bVacDLBNoLimit)
2771 if (comm->bPMELoadBalDLBLimits)
2773 cutoff = max(cutoff, comm->PMELoadBal_max_cutoff);
2775 grid_jump_limit = max(grid_jump_limit,
2776 cutoff/comm->cd[dim_ind].np);
2779 return grid_jump_limit;
2782 static gmx_bool check_grid_jump(gmx_int64_t step,
2788 gmx_domdec_comm_t *comm;
2797 for (d = 1; d < dd->ndim; d++)
2800 limit = grid_jump_limit(comm, cutoff, d);
2801 bfac = ddbox->box_size[dim];
2802 if (ddbox->tric_dir[dim])
2804 bfac *= ddbox->skew_fac[dim];
2806 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2807 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2815 /* This error should never be triggered under normal
2816 * circumstances, but you never know ...
2818 gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with less nodes might avoid this issue.",
2819 gmx_step_str(step, buf),
2820 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2828 static int dd_load_count(gmx_domdec_comm_t *comm)
2830 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2833 static float dd_force_load(gmx_domdec_comm_t *comm)
2840 if (comm->eFlop > 1)
2842 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2847 load = comm->cycl[ddCyclF];
2848 if (comm->cycl_n[ddCyclF] > 1)
2850 /* Subtract the maximum of the last n cycle counts
2851 * to get rid of possible high counts due to other sources,
2852 * for instance system activity, that would otherwise
2853 * affect the dynamic load balancing.
2855 load -= comm->cycl_max[ddCyclF];
2859 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2861 float gpu_wait, gpu_wait_sum;
2863 gpu_wait = comm->cycl[ddCyclWaitGPU];
2864 if (comm->cycl_n[ddCyclF] > 1)
2866 /* We should remove the WaitGPU time of the same MD step
2867 * as the one with the maximum F time, since the F time
2868 * and the wait time are not independent.
2869 * Furthermore, the step for the max F time should be chosen
2870 * the same on all ranks that share the same GPU.
2871 * But to keep the code simple, we remove the average instead.
2872 * The main reason for artificially long times at some steps
2873 * is spurious CPU activity or MPI time, so we don't expect
2874 * that changes in the GPU wait time matter a lot here.
2876 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2878 /* Sum the wait times over the ranks that share the same GPU */
2879 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2880 comm->mpi_comm_gpu_shared);
2881 /* Replace the wait time by the average over the ranks */
2882 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2890 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2892 gmx_domdec_comm_t *comm;
2897 snew(*dim_f, dd->nc[dim]+1);
2899 for (i = 1; i < dd->nc[dim]; i++)
2901 if (comm->slb_frac[dim])
2903 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2907 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2910 (*dim_f)[dd->nc[dim]] = 1;
2913 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2915 int pmeindex, slab, nso, i;
2918 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2924 ddpme->dim = dimind;
2926 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2928 ddpme->nslab = (ddpme->dim == 0 ?
2929 dd->comm->npmenodes_x :
2930 dd->comm->npmenodes_y);
2932 if (ddpme->nslab <= 1)
2937 nso = dd->comm->npmenodes/ddpme->nslab;
2938 /* Determine for each PME slab the PP location range for dimension dim */
2939 snew(ddpme->pp_min, ddpme->nslab);
2940 snew(ddpme->pp_max, ddpme->nslab);
2941 for (slab = 0; slab < ddpme->nslab; slab++)
2943 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2944 ddpme->pp_max[slab] = 0;
2946 for (i = 0; i < dd->nnodes; i++)
2948 ddindex2xyz(dd->nc, i, xyz);
2949 /* For y only use our y/z slab.
2950 * This assumes that the PME x grid size matches the DD grid size.
2952 if (dimind == 0 || xyz[XX] == dd->ci[XX])
2954 pmeindex = ddindex2pmeindex(dd, i);
2957 slab = pmeindex/nso;
2961 slab = pmeindex % ddpme->nslab;
2963 ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]);
2964 ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]);
2968 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
2971 int dd_pme_maxshift_x(gmx_domdec_t *dd)
2973 if (dd->comm->ddpme[0].dim == XX)
2975 return dd->comm->ddpme[0].maxshift;
2983 int dd_pme_maxshift_y(gmx_domdec_t *dd)
2985 if (dd->comm->ddpme[0].dim == YY)
2987 return dd->comm->ddpme[0].maxshift;
2989 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
2991 return dd->comm->ddpme[1].maxshift;
2999 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
3000 gmx_bool bUniform, gmx_ddbox_t *ddbox, real *cell_f)
3002 gmx_domdec_comm_t *comm;
3005 real range, pme_boundary;
3009 nc = dd->nc[ddpme->dim];
3012 if (!ddpme->dim_match)
3014 /* PP decomposition is not along dim: the worst situation */
3017 else if (ns <= 3 || (bUniform && ns == nc))
3019 /* The optimal situation */
3024 /* We need to check for all pme nodes which nodes they
3025 * could possibly need to communicate with.
3027 xmin = ddpme->pp_min;
3028 xmax = ddpme->pp_max;
3029 /* Allow for atoms to be maximally 2/3 times the cut-off
3030 * out of their DD cell. This is a reasonable balance between
3031 * between performance and support for most charge-group/cut-off
3034 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
3035 /* Avoid extra communication when we are exactly at a boundary */
3039 for (s = 0; s < ns; s++)
3041 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
3042 pme_boundary = (real)s/ns;
3045 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
3047 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
3051 pme_boundary = (real)(s+1)/ns;
3054 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
3056 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
3063 ddpme->maxshift = sh;
3067 fprintf(debug, "PME slab communication range for dim %d is %d\n",
3068 ddpme->dim, ddpme->maxshift);
3072 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3076 for (d = 0; d < dd->ndim; d++)
3079 if (dim < ddbox->nboundeddim &&
3080 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
3081 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
3083 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
3084 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3085 dd->nc[dim], dd->comm->cellsize_limit);
3091 setcellsizeslbLOCAL, setcellsizeslbMASTER, setcellsizeslbPULSE_ONLY
3094 /* Set the domain boundaries. Use for static (or no) load balancing,
3095 * and also for the starting state for dynamic load balancing.
3096 * setmode determine if and where the boundaries are stored, use enum above.
3097 * Returns the number communication pulses in npulse.
3099 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
3100 int setmode, ivec npulse)
3102 gmx_domdec_comm_t *comm;
3105 real *cell_x, cell_dx, cellsize;
3109 for (d = 0; d < DIM; d++)
3111 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
3113 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
3116 cell_dx = ddbox->box_size[d]/dd->nc[d];
3119 case setcellsizeslbMASTER:
3120 for (j = 0; j < dd->nc[d]+1; j++)
3122 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
3125 case setcellsizeslbLOCAL:
3126 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
3127 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
3132 cellsize = cell_dx*ddbox->skew_fac[d];
3133 while (cellsize*npulse[d] < comm->cutoff)
3137 cellsize_min[d] = cellsize;
3141 /* Statically load balanced grid */
3142 /* Also when we are not doing a master distribution we determine
3143 * all cell borders in a loop to obtain identical values
3144 * to the master distribution case and to determine npulse.
3146 if (setmode == setcellsizeslbMASTER)
3148 cell_x = dd->ma->cell_x[d];
3152 snew(cell_x, dd->nc[d]+1);
3154 cell_x[0] = ddbox->box0[d];
3155 for (j = 0; j < dd->nc[d]; j++)
3157 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
3158 cell_x[j+1] = cell_x[j] + cell_dx;
3159 cellsize = cell_dx*ddbox->skew_fac[d];
3160 while (cellsize*npulse[d] < comm->cutoff &&
3161 npulse[d] < dd->nc[d]-1)
3165 cellsize_min[d] = min(cellsize_min[d], cellsize);
3167 if (setmode == setcellsizeslbLOCAL)
3169 comm->cell_x0[d] = cell_x[dd->ci[d]];
3170 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
3172 if (setmode != setcellsizeslbMASTER)
3177 /* The following limitation is to avoid that a cell would receive
3178 * some of its own home charge groups back over the periodic boundary.
3179 * Double charge groups cause trouble with the global indices.
3181 if (d < ddbox->npbcdim &&
3182 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
3184 char error_string[STRLEN];
3186 sprintf(error_string,
3187 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3188 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
3190 dd->nc[d], dd->nc[d],
3191 dd->nnodes > dd->nc[d] ? "cells" : "processors");
3193 if (setmode == setcellsizeslbLOCAL)
3195 gmx_fatal_collective(FARGS, NULL, dd, error_string);
3199 gmx_fatal(FARGS, error_string);
3204 if (!comm->bDynLoadBal)
3206 copy_rvec(cellsize_min, comm->cellsize_min);
3209 for (d = 0; d < comm->npmedecompdim; d++)
3211 set_pme_maxshift(dd, &comm->ddpme[d],
3212 comm->slb_frac[dd->dim[d]] == NULL, ddbox,
3213 comm->ddpme[d].slb_dim_f);
3218 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
3219 int d, int dim, gmx_domdec_root_t *root,
3221 gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
3223 gmx_domdec_comm_t *comm;
3224 int ncd, i, j, nmin, nmin_old;
3225 gmx_bool bLimLo, bLimHi;
3227 real fac, halfway, cellsize_limit_f_i, region_size;
3228 gmx_bool bPBC, bLastHi = FALSE;
3229 int nrange[] = {range[0], range[1]};
3231 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
3237 bPBC = (dim < ddbox->npbcdim);
3239 cell_size = root->buf_ncd;
3243 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
3246 /* First we need to check if the scaling does not make cells
3247 * smaller than the smallest allowed size.
3248 * We need to do this iteratively, since if a cell is too small,
3249 * it needs to be enlarged, which makes all the other cells smaller,
3250 * which could in turn make another cell smaller than allowed.
3252 for (i = range[0]; i < range[1]; i++)
3254 root->bCellMin[i] = FALSE;
3260 /* We need the total for normalization */
3262 for (i = range[0]; i < range[1]; i++)
3264 if (root->bCellMin[i] == FALSE)
3266 fac += cell_size[i];
3269 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3270 /* Determine the cell boundaries */
3271 for (i = range[0]; i < range[1]; i++)
3273 if (root->bCellMin[i] == FALSE)
3275 cell_size[i] *= fac;
3276 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3278 cellsize_limit_f_i = 0;
3282 cellsize_limit_f_i = cellsize_limit_f;
3284 if (cell_size[i] < cellsize_limit_f_i)
3286 root->bCellMin[i] = TRUE;
3287 cell_size[i] = cellsize_limit_f_i;
3291 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3294 while (nmin > nmin_old);
3297 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3298 /* For this check we should not use DD_CELL_MARGIN,
3299 * but a slightly smaller factor,
3300 * since rounding could get use below the limit.
3302 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3305 gmx_fatal(FARGS, "Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3306 gmx_step_str(step, buf),
3307 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3308 ncd, comm->cellsize_min[dim]);
3311 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
3315 /* Check if the boundary did not displace more than halfway
3316 * each of the cells it bounds, as this could cause problems,
3317 * especially when the differences between cell sizes are large.
3318 * If changes are applied, they will not make cells smaller
3319 * than the cut-off, as we check all the boundaries which
3320 * might be affected by a change and if the old state was ok,
3321 * the cells will at most be shrunk back to their old size.
3323 for (i = range[0]+1; i < range[1]; i++)
3325 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3326 if (root->cell_f[i] < halfway)
3328 root->cell_f[i] = halfway;
3329 /* Check if the change also causes shifts of the next boundaries */
3330 for (j = i+1; j < range[1]; j++)
3332 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3334 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3338 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3339 if (root->cell_f[i] > halfway)
3341 root->cell_f[i] = halfway;
3342 /* Check if the change also causes shifts of the next boundaries */
3343 for (j = i-1; j >= range[0]+1; j--)
3345 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3347 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3354 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3355 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3356 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3357 * for a and b nrange is used */
3360 /* Take care of the staggering of the cell boundaries */
3363 for (i = range[0]; i < range[1]; i++)
3365 root->cell_f_max0[i] = root->cell_f[i];
3366 root->cell_f_min1[i] = root->cell_f[i+1];
3371 for (i = range[0]+1; i < range[1]; i++)
3373 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3374 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3375 if (bLimLo && bLimHi)
3377 /* Both limits violated, try the best we can */
3378 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3379 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3380 nrange[0] = range[0];
3382 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3385 nrange[1] = range[1];
3386 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3392 /* root->cell_f[i] = root->bound_min[i]; */
3393 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3396 else if (bLimHi && !bLastHi)
3399 if (nrange[1] < range[1]) /* found a LimLo before */
3401 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3402 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3403 nrange[0] = nrange[1];
3405 root->cell_f[i] = root->bound_max[i];
3407 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3409 nrange[1] = range[1];
3412 if (nrange[1] < range[1]) /* found last a LimLo */
3414 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3415 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3416 nrange[0] = nrange[1];
3417 nrange[1] = range[1];
3418 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3420 else if (nrange[0] > range[0]) /* found at least one LimHi */
3422 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3429 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3430 int d, int dim, gmx_domdec_root_t *root,
3431 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3432 gmx_bool bUniform, gmx_int64_t step)
3434 gmx_domdec_comm_t *comm;
3435 int ncd, d1, i, j, pos;
3437 real load_aver, load_i, imbalance, change, change_max, sc;
3438 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3442 int range[] = { 0, 0 };
3446 /* Convert the maximum change from the input percentage to a fraction */
3447 change_limit = comm->dlb_scale_lim*0.01;
3451 bPBC = (dim < ddbox->npbcdim);
3453 cell_size = root->buf_ncd;
3455 /* Store the original boundaries */
3456 for (i = 0; i < ncd+1; i++)
3458 root->old_cell_f[i] = root->cell_f[i];
3462 for (i = 0; i < ncd; i++)
3464 cell_size[i] = 1.0/ncd;
3467 else if (dd_load_count(comm))
3469 load_aver = comm->load[d].sum_m/ncd;
3471 for (i = 0; i < ncd; i++)
3473 /* Determine the relative imbalance of cell i */
3474 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3475 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3476 /* Determine the change of the cell size using underrelaxation */
3477 change = -relax*imbalance;
3478 change_max = max(change_max, max(change, -change));
3480 /* Limit the amount of scaling.
3481 * We need to use the same rescaling for all cells in one row,
3482 * otherwise the load balancing might not converge.
3485 if (change_max > change_limit)
3487 sc *= change_limit/change_max;
3489 for (i = 0; i < ncd; i++)
3491 /* Determine the relative imbalance of cell i */
3492 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3493 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3494 /* Determine the change of the cell size using underrelaxation */
3495 change = -sc*imbalance;
3496 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3500 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3501 cellsize_limit_f *= DD_CELL_MARGIN;
3502 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3503 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3504 if (ddbox->tric_dir[dim])
3506 cellsize_limit_f /= ddbox->skew_fac[dim];
3507 dist_min_f /= ddbox->skew_fac[dim];
3509 if (bDynamicBox && d > 0)
3511 dist_min_f *= DD_PRES_SCALE_MARGIN;
3513 if (d > 0 && !bUniform)
3515 /* Make sure that the grid is not shifted too much */
3516 for (i = 1; i < ncd; i++)
3518 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3520 gmx_incons("Inconsistent DD boundary staggering limits!");
3522 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3523 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3526 root->bound_min[i] += 0.5*space;
3528 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3529 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3532 root->bound_max[i] += 0.5*space;
3537 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3539 root->cell_f_max0[i-1] + dist_min_f,
3540 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3541 root->cell_f_min1[i] - dist_min_f);
3546 root->cell_f[0] = 0;
3547 root->cell_f[ncd] = 1;
3548 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3551 /* After the checks above, the cells should obey the cut-off
3552 * restrictions, but it does not hurt to check.
3554 for (i = 0; i < ncd; i++)
3558 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3559 dim, i, root->cell_f[i], root->cell_f[i+1]);
3562 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3563 root->cell_f[i+1] - root->cell_f[i] <
3564 cellsize_limit_f/DD_CELL_MARGIN)
3568 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3569 gmx_step_str(step, buf), dim2char(dim), i,
3570 (root->cell_f[i+1] - root->cell_f[i])
3571 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3576 /* Store the cell boundaries of the lower dimensions at the end */
3577 for (d1 = 0; d1 < d; d1++)
3579 root->cell_f[pos++] = comm->cell_f0[d1];
3580 root->cell_f[pos++] = comm->cell_f1[d1];
3583 if (d < comm->npmedecompdim)
3585 /* The master determines the maximum shift for
3586 * the coordinate communication between separate PME nodes.
3588 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3590 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3593 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3597 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3598 gmx_ddbox_t *ddbox, int dimind)
3600 gmx_domdec_comm_t *comm;
3605 /* Set the cell dimensions */
3606 dim = dd->dim[dimind];
3607 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3608 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3609 if (dim >= ddbox->nboundeddim)
3611 comm->cell_x0[dim] += ddbox->box0[dim];
3612 comm->cell_x1[dim] += ddbox->box0[dim];
3616 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3617 int d, int dim, real *cell_f_row,
3620 gmx_domdec_comm_t *comm;
3626 /* Each node would only need to know two fractions,
3627 * but it is probably cheaper to broadcast the whole array.
3629 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3630 0, comm->mpi_comm_load[d]);
3632 /* Copy the fractions for this dimension from the buffer */
3633 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3634 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3635 /* The whole array was communicated, so set the buffer position */
3636 pos = dd->nc[dim] + 1;
3637 for (d1 = 0; d1 <= d; d1++)
3641 /* Copy the cell fractions of the lower dimensions */
3642 comm->cell_f0[d1] = cell_f_row[pos++];
3643 comm->cell_f1[d1] = cell_f_row[pos++];
3645 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3647 /* Convert the communicated shift from float to int */
3648 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3651 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3655 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3656 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3657 gmx_bool bUniform, gmx_int64_t step)
3659 gmx_domdec_comm_t *comm;
3661 gmx_bool bRowMember, bRowRoot;
3666 for (d = 0; d < dd->ndim; d++)
3671 for (d1 = d; d1 < dd->ndim; d1++)
3673 if (dd->ci[dd->dim[d1]] > 0)
3686 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3687 ddbox, bDynamicBox, bUniform, step);
3688 cell_f_row = comm->root[d]->cell_f;
3692 cell_f_row = comm->cell_f_row;
3694 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3699 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3703 /* This function assumes the box is static and should therefore
3704 * not be called when the box has changed since the last
3705 * call to dd_partition_system.
3707 for (d = 0; d < dd->ndim; d++)
3709 relative_to_absolute_cell_bounds(dd, ddbox, d);
3715 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3716 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3717 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3718 gmx_wallcycle_t wcycle)
3720 gmx_domdec_comm_t *comm;
3727 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3728 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3729 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3731 else if (bDynamicBox)
3733 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3736 /* Set the dimensions for which no DD is used */
3737 for (dim = 0; dim < DIM; dim++)
3739 if (dd->nc[dim] == 1)
3741 comm->cell_x0[dim] = 0;
3742 comm->cell_x1[dim] = ddbox->box_size[dim];
3743 if (dim >= ddbox->nboundeddim)
3745 comm->cell_x0[dim] += ddbox->box0[dim];
3746 comm->cell_x1[dim] += ddbox->box0[dim];
3752 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3755 gmx_domdec_comm_dim_t *cd;
3757 for (d = 0; d < dd->ndim; d++)
3759 cd = &dd->comm->cd[d];
3760 np = npulse[dd->dim[d]];
3761 if (np > cd->np_nalloc)
3765 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3766 dim2char(dd->dim[d]), np);
3768 if (DDMASTER(dd) && cd->np_nalloc > 0)
3770 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3772 srenew(cd->ind, np);
3773 for (i = cd->np_nalloc; i < np; i++)
3775 cd->ind[i].index = NULL;
3776 cd->ind[i].nalloc = 0;
3785 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3786 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3787 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3788 gmx_wallcycle_t wcycle)
3790 gmx_domdec_comm_t *comm;
3796 /* Copy the old cell boundaries for the cg displacement check */
3797 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3798 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3800 if (comm->bDynLoadBal)
3804 check_box_size(dd, ddbox);
3806 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3810 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbLOCAL, npulse);
3811 realloc_comm_ind(dd, npulse);
3816 for (d = 0; d < DIM; d++)
3818 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3819 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3824 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3826 rvec cell_ns_x0, rvec cell_ns_x1,
3829 gmx_domdec_comm_t *comm;
3834 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3836 dim = dd->dim[dim_ind];
3838 /* Without PBC we don't have restrictions on the outer cells */
3839 if (!(dim >= ddbox->npbcdim &&
3840 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3841 comm->bDynLoadBal &&
3842 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3843 comm->cellsize_min[dim])
3846 gmx_fatal(FARGS, "Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3847 gmx_step_str(step, buf), dim2char(dim),
3848 comm->cell_x1[dim] - comm->cell_x0[dim],
3849 ddbox->skew_fac[dim],
3850 dd->comm->cellsize_min[dim],
3851 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3855 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3857 /* Communicate the boundaries and update cell_ns_x0/1 */
3858 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3859 if (dd->bGridJump && dd->ndim > 1)
3861 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3866 static void make_tric_corr_matrix(int npbcdim, matrix box, matrix tcm)
3870 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3878 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3879 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3888 static void check_screw_box(matrix box)
3890 /* Mathematical limitation */
3891 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3893 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3896 /* Limitation due to the asymmetry of the eighth shell method */
3897 if (box[ZZ][YY] != 0)
3899 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3903 static void distribute_cg(FILE *fplog, gmx_int64_t step,
3904 matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3907 gmx_domdec_master_t *ma;
3908 int **tmp_ind = NULL, *tmp_nalloc = NULL;
3909 int i, icg, j, k, k0, k1, d, npbcdim;
3911 rvec box_size, cg_cm;
3913 real nrcg, inv_ncg, pos_d;
3915 gmx_bool bUnbounded, bScrew;
3919 if (tmp_ind == NULL)
3921 snew(tmp_nalloc, dd->nnodes);
3922 snew(tmp_ind, dd->nnodes);
3923 for (i = 0; i < dd->nnodes; i++)
3925 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3926 snew(tmp_ind[i], tmp_nalloc[i]);
3930 /* Clear the count */
3931 for (i = 0; i < dd->nnodes; i++)
3937 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3939 cgindex = cgs->index;
3941 /* Compute the center of geometry for all charge groups */
3942 for (icg = 0; icg < cgs->nr; icg++)
3945 k1 = cgindex[icg+1];
3949 copy_rvec(pos[k0], cg_cm);
3956 for (k = k0; (k < k1); k++)
3958 rvec_inc(cg_cm, pos[k]);
3960 for (d = 0; (d < DIM); d++)
3962 cg_cm[d] *= inv_ncg;
3965 /* Put the charge group in the box and determine the cell index */
3966 for (d = DIM-1; d >= 0; d--)
3969 if (d < dd->npbcdim)
3971 bScrew = (dd->bScrewPBC && d == XX);
3972 if (tric_dir[d] && dd->nc[d] > 1)
3974 /* Use triclinic coordintates for this dimension */
3975 for (j = d+1; j < DIM; j++)
3977 pos_d += cg_cm[j]*tcm[j][d];
3980 while (pos_d >= box[d][d])
3983 rvec_dec(cg_cm, box[d]);
3986 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3987 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3989 for (k = k0; (k < k1); k++)
3991 rvec_dec(pos[k], box[d]);
3994 pos[k][YY] = box[YY][YY] - pos[k][YY];
3995 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4002 rvec_inc(cg_cm, box[d]);
4005 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
4006 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
4008 for (k = k0; (k < k1); k++)
4010 rvec_inc(pos[k], box[d]);
4013 pos[k][YY] = box[YY][YY] - pos[k][YY];
4014 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4019 /* This could be done more efficiently */
4021 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
4026 i = dd_index(dd->nc, ind);
4027 if (ma->ncg[i] == tmp_nalloc[i])
4029 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
4030 srenew(tmp_ind[i], tmp_nalloc[i]);
4032 tmp_ind[i][ma->ncg[i]] = icg;
4034 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
4038 for (i = 0; i < dd->nnodes; i++)
4041 for (k = 0; k < ma->ncg[i]; k++)
4043 ma->cg[k1++] = tmp_ind[i][k];
4046 ma->index[dd->nnodes] = k1;
4048 for (i = 0; i < dd->nnodes; i++)
4058 fprintf(fplog, "Charge group distribution at step %s:",
4059 gmx_step_str(step, buf));
4060 for (i = 0; i < dd->nnodes; i++)
4062 fprintf(fplog, " %d", ma->ncg[i]);
4064 fprintf(fplog, "\n");
4068 static void get_cg_distribution(FILE *fplog, gmx_int64_t step, gmx_domdec_t *dd,
4069 t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
4072 gmx_domdec_master_t *ma = NULL;
4075 int *ibuf, buf2[2] = { 0, 0 };
4076 gmx_bool bMaster = DDMASTER(dd);
4084 check_screw_box(box);
4087 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbMASTER, npulse);
4089 distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
4090 for (i = 0; i < dd->nnodes; i++)
4092 ma->ibuf[2*i] = ma->ncg[i];
4093 ma->ibuf[2*i+1] = ma->nat[i];
4101 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
4103 dd->ncg_home = buf2[0];
4104 dd->nat_home = buf2[1];
4105 dd->ncg_tot = dd->ncg_home;
4106 dd->nat_tot = dd->nat_home;
4107 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
4109 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
4110 srenew(dd->index_gl, dd->cg_nalloc);
4111 srenew(dd->cgindex, dd->cg_nalloc+1);
4115 for (i = 0; i < dd->nnodes; i++)
4117 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
4118 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
4123 DDMASTER(dd) ? ma->ibuf : NULL,
4124 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
4125 DDMASTER(dd) ? ma->cg : NULL,
4126 dd->ncg_home*sizeof(int), dd->index_gl);
4128 /* Determine the home charge group sizes */
4130 for (i = 0; i < dd->ncg_home; i++)
4132 cg_gl = dd->index_gl[i];
4134 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
4139 fprintf(debug, "Home charge groups:\n");
4140 for (i = 0; i < dd->ncg_home; i++)
4142 fprintf(debug, " %d", dd->index_gl[i]);
4145 fprintf(debug, "\n");
4148 fprintf(debug, "\n");
4152 static int compact_and_copy_vec_at(int ncg, int *move,
4155 rvec *src, gmx_domdec_comm_t *comm,
4158 int m, icg, i, i0, i1, nrcg;
4164 for (m = 0; m < DIM*2; m++)
4170 for (icg = 0; icg < ncg; icg++)
4172 i1 = cgindex[icg+1];
4178 /* Compact the home array in place */
4179 for (i = i0; i < i1; i++)
4181 copy_rvec(src[i], src[home_pos++]);
4187 /* Copy to the communication buffer */
4189 pos_vec[m] += 1 + vec*nrcg;
4190 for (i = i0; i < i1; i++)
4192 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
4194 pos_vec[m] += (nvec - vec - 1)*nrcg;
4198 home_pos += i1 - i0;
4206 static int compact_and_copy_vec_cg(int ncg, int *move,
4208 int nvec, rvec *src, gmx_domdec_comm_t *comm,
4211 int m, icg, i0, i1, nrcg;
4217 for (m = 0; m < DIM*2; m++)
4223 for (icg = 0; icg < ncg; icg++)
4225 i1 = cgindex[icg+1];
4231 /* Compact the home array in place */
4232 copy_rvec(src[icg], src[home_pos++]);
4238 /* Copy to the communication buffer */
4239 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
4240 pos_vec[m] += 1 + nrcg*nvec;
4252 static int compact_ind(int ncg, int *move,
4253 int *index_gl, int *cgindex,
4255 gmx_ga2la_t ga2la, char *bLocalCG,
4258 int cg, nat, a0, a1, a, a_gl;
4263 for (cg = 0; cg < ncg; cg++)
4269 /* Compact the home arrays in place.
4270 * Anything that can be done here avoids access to global arrays.
4272 cgindex[home_pos] = nat;
4273 for (a = a0; a < a1; a++)
4276 gatindex[nat] = a_gl;
4277 /* The cell number stays 0, so we don't need to set it */
4278 ga2la_change_la(ga2la, a_gl, nat);
4281 index_gl[home_pos] = index_gl[cg];
4282 cginfo[home_pos] = cginfo[cg];
4283 /* The charge group remains local, so bLocalCG does not change */
4288 /* Clear the global indices */
4289 for (a = a0; a < a1; a++)
4291 ga2la_del(ga2la, gatindex[a]);
4295 bLocalCG[index_gl[cg]] = FALSE;
4299 cgindex[home_pos] = nat;
4304 static void clear_and_mark_ind(int ncg, int *move,
4305 int *index_gl, int *cgindex, int *gatindex,
4306 gmx_ga2la_t ga2la, char *bLocalCG,
4311 for (cg = 0; cg < ncg; cg++)
4317 /* Clear the global indices */
4318 for (a = a0; a < a1; a++)
4320 ga2la_del(ga2la, gatindex[a]);
4324 bLocalCG[index_gl[cg]] = FALSE;
4326 /* Signal that this cg has moved using the ns cell index.
4327 * Here we set it to -1. fill_grid will change it
4328 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4330 cell_index[cg] = -1;
4335 static void print_cg_move(FILE *fplog,
4337 gmx_int64_t step, int cg, int dim, int dir,
4338 gmx_bool bHaveLimitdAndCMOld, real limitd,
4339 rvec cm_old, rvec cm_new, real pos_d)
4341 gmx_domdec_comm_t *comm;
4346 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4347 if (bHaveLimitdAndCMOld)
4349 fprintf(fplog, "The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4350 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4354 fprintf(fplog, "The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4355 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4357 fprintf(fplog, "distance out of cell %f\n",
4358 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4359 if (bHaveLimitdAndCMOld)
4361 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4362 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4364 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4365 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4366 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4368 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4369 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4371 comm->cell_x0[dim], comm->cell_x1[dim]);
4374 static void cg_move_error(FILE *fplog,
4376 gmx_int64_t step, int cg, int dim, int dir,
4377 gmx_bool bHaveLimitdAndCMOld, real limitd,
4378 rvec cm_old, rvec cm_new, real pos_d)
4382 print_cg_move(fplog, dd, step, cg, dim, dir,
4383 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4385 print_cg_move(stderr, dd, step, cg, dim, dir,
4386 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4388 "A charge group moved too far between two domain decomposition steps\n"
4389 "This usually means that your system is not well equilibrated");
4392 static void rotate_state_atom(t_state *state, int a)
4396 for (est = 0; est < estNR; est++)
4398 if (EST_DISTR(est) && (state->flags & (1<<est)))
4403 /* Rotate the complete state; for a rectangular box only */
4404 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4405 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4408 state->v[a][YY] = -state->v[a][YY];
4409 state->v[a][ZZ] = -state->v[a][ZZ];
4412 state->sd_X[a][YY] = -state->sd_X[a][YY];
4413 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4416 state->cg_p[a][YY] = -state->cg_p[a][YY];
4417 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4419 case estDISRE_INITF:
4420 case estDISRE_RM3TAV:
4421 case estORIRE_INITF:
4423 /* These are distances, so not affected by rotation */
4426 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4432 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4434 if (natoms > comm->moved_nalloc)
4436 /* Contents should be preserved here */
4437 comm->moved_nalloc = over_alloc_dd(natoms);
4438 srenew(comm->moved, comm->moved_nalloc);
4444 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
4447 ivec tric_dir, matrix tcm,
4448 rvec cell_x0, rvec cell_x1,
4449 rvec limitd, rvec limit0, rvec limit1,
4451 int cg_start, int cg_end,
4456 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4457 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4461 real inv_ncg, pos_d;
4464 npbcdim = dd->npbcdim;
4466 for (cg = cg_start; cg < cg_end; cg++)
4473 copy_rvec(state->x[k0], cm_new);
4480 for (k = k0; (k < k1); k++)
4482 rvec_inc(cm_new, state->x[k]);
4484 for (d = 0; (d < DIM); d++)
4486 cm_new[d] = inv_ncg*cm_new[d];
4491 /* Do pbc and check DD cell boundary crossings */
4492 for (d = DIM-1; d >= 0; d--)
4496 bScrew = (dd->bScrewPBC && d == XX);
4497 /* Determine the location of this cg in lattice coordinates */
4501 for (d2 = d+1; d2 < DIM; d2++)
4503 pos_d += cm_new[d2]*tcm[d2][d];
4506 /* Put the charge group in the triclinic unit-cell */
4507 if (pos_d >= cell_x1[d])
4509 if (pos_d >= limit1[d])
4511 cg_move_error(fplog, dd, step, cg, d, 1, TRUE, limitd[d],
4512 cg_cm[cg], cm_new, pos_d);
4515 if (dd->ci[d] == dd->nc[d] - 1)
4517 rvec_dec(cm_new, state->box[d]);
4520 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4521 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4523 for (k = k0; (k < k1); k++)
4525 rvec_dec(state->x[k], state->box[d]);
4528 rotate_state_atom(state, k);
4533 else if (pos_d < cell_x0[d])
4535 if (pos_d < limit0[d])
4537 cg_move_error(fplog, dd, step, cg, d, -1, TRUE, limitd[d],
4538 cg_cm[cg], cm_new, pos_d);
4543 rvec_inc(cm_new, state->box[d]);
4546 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4547 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4549 for (k = k0; (k < k1); k++)
4551 rvec_inc(state->x[k], state->box[d]);
4554 rotate_state_atom(state, k);
4560 else if (d < npbcdim)
4562 /* Put the charge group in the rectangular unit-cell */
4563 while (cm_new[d] >= state->box[d][d])
4565 rvec_dec(cm_new, state->box[d]);
4566 for (k = k0; (k < k1); k++)
4568 rvec_dec(state->x[k], state->box[d]);
4571 while (cm_new[d] < 0)
4573 rvec_inc(cm_new, state->box[d]);
4574 for (k = k0; (k < k1); k++)
4576 rvec_inc(state->x[k], state->box[d]);
4582 copy_rvec(cm_new, cg_cm[cg]);
4584 /* Determine where this cg should go */
4587 for (d = 0; d < dd->ndim; d++)
4592 flag |= DD_FLAG_FW(d);
4598 else if (dev[dim] == -1)
4600 flag |= DD_FLAG_BW(d);
4603 if (dd->nc[dim] > 2)
4614 /* Temporarily store the flag in move */
4615 move[cg] = mc + flag;
4619 static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
4620 gmx_domdec_t *dd, ivec tric_dir,
4621 t_state *state, rvec **f,
4630 int ncg[DIM*2], nat[DIM*2];
4631 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4632 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4633 int sbuf[2], rbuf[2];
4634 int home_pos_cg, home_pos_at, buf_pos;
4636 gmx_bool bV = FALSE, bSDX = FALSE, bCGP = FALSE;
4639 real inv_ncg, pos_d;
4641 rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
4643 cginfo_mb_t *cginfo_mb;
4644 gmx_domdec_comm_t *comm;
4646 int nthread, thread;
4650 check_screw_box(state->box);
4654 if (fr->cutoff_scheme == ecutsGROUP)
4659 for (i = 0; i < estNR; i++)
4665 case estX: /* Always present */ break;
4666 case estV: bV = (state->flags & (1<<i)); break;
4667 case estSDX: bSDX = (state->flags & (1<<i)); break;
4668 case estCGP: bCGP = (state->flags & (1<<i)); break;
4671 case estDISRE_INITF:
4672 case estDISRE_RM3TAV:
4673 case estORIRE_INITF:
4675 /* No processing required */
4678 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4683 if (dd->ncg_tot > comm->nalloc_int)
4685 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4686 srenew(comm->buf_int, comm->nalloc_int);
4688 move = comm->buf_int;
4690 /* Clear the count */
4691 for (c = 0; c < dd->ndim*2; c++)
4697 npbcdim = dd->npbcdim;
4699 for (d = 0; (d < DIM); d++)
4701 limitd[d] = dd->comm->cellsize_min[d];
4702 if (d >= npbcdim && dd->ci[d] == 0)
4704 cell_x0[d] = -GMX_FLOAT_MAX;
4708 cell_x0[d] = comm->cell_x0[d];
4710 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4712 cell_x1[d] = GMX_FLOAT_MAX;
4716 cell_x1[d] = comm->cell_x1[d];
4720 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4721 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4725 /* We check after communication if a charge group moved
4726 * more than one cell. Set the pre-comm check limit to float_max.
4728 limit0[d] = -GMX_FLOAT_MAX;
4729 limit1[d] = GMX_FLOAT_MAX;
4733 make_tric_corr_matrix(npbcdim, state->box, tcm);
4735 cgindex = dd->cgindex;
4737 nthread = gmx_omp_nthreads_get(emntDomdec);
4739 /* Compute the center of geometry for all home charge groups
4740 * and put them in the box and determine where they should go.
4742 #pragma omp parallel for num_threads(nthread) schedule(static)
4743 for (thread = 0; thread < nthread; thread++)
4745 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4746 cell_x0, cell_x1, limitd, limit0, limit1,
4748 ( thread *dd->ncg_home)/nthread,
4749 ((thread+1)*dd->ncg_home)/nthread,
4750 fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
4754 for (cg = 0; cg < dd->ncg_home; cg++)
4759 flag = mc & ~DD_FLAG_NRCG;
4760 mc = mc & DD_FLAG_NRCG;
4763 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4765 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4766 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4768 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4769 /* We store the cg size in the lower 16 bits
4770 * and the place where the charge group should go
4771 * in the next 6 bits. This saves some communication volume.
4773 nrcg = cgindex[cg+1] - cgindex[cg];
4774 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4780 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4781 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4784 for (i = 0; i < dd->ndim*2; i++)
4786 *ncg_moved += ncg[i];
4803 /* Make sure the communication buffers are large enough */
4804 for (mc = 0; mc < dd->ndim*2; mc++)
4806 nvr = ncg[mc] + nat[mc]*nvec;
4807 if (nvr > comm->cgcm_state_nalloc[mc])
4809 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4810 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4814 switch (fr->cutoff_scheme)
4817 /* Recalculating cg_cm might be cheaper than communicating,
4818 * but that could give rise to rounding issues.
4821 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4822 nvec, cg_cm, comm, bCompact);
4825 /* Without charge groups we send the moved atom coordinates
4826 * over twice. This is so the code below can be used without
4827 * many conditionals for both for with and without charge groups.
4830 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4831 nvec, state->x, comm, FALSE);
4834 home_pos_cg -= *ncg_moved;
4838 gmx_incons("unimplemented");
4844 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4845 nvec, vec++, state->x, comm, bCompact);
4848 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4849 nvec, vec++, state->v, comm, bCompact);
4853 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4854 nvec, vec++, state->sd_X, comm, bCompact);
4858 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4859 nvec, vec++, state->cg_p, comm, bCompact);
4864 compact_ind(dd->ncg_home, move,
4865 dd->index_gl, dd->cgindex, dd->gatindex,
4866 dd->ga2la, comm->bLocalCG,
4871 if (fr->cutoff_scheme == ecutsVERLET)
4873 moved = get_moved(comm, dd->ncg_home);
4875 for (k = 0; k < dd->ncg_home; k++)
4882 moved = fr->ns.grid->cell_index;
4885 clear_and_mark_ind(dd->ncg_home, move,
4886 dd->index_gl, dd->cgindex, dd->gatindex,
4887 dd->ga2la, comm->bLocalCG,
4891 cginfo_mb = fr->cginfo_mb;
4893 *ncg_stay_home = home_pos_cg;
4894 for (d = 0; d < dd->ndim; d++)
4900 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4903 /* Communicate the cg and atom counts */
4908 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4909 d, dir, sbuf[0], sbuf[1]);
4911 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4913 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4915 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4916 srenew(comm->buf_int, comm->nalloc_int);
4919 /* Communicate the charge group indices, sizes and flags */
4920 dd_sendrecv_int(dd, d, dir,
4921 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4922 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4924 nvs = ncg[cdd] + nat[cdd]*nvec;
4925 i = rbuf[0] + rbuf[1] *nvec;
4926 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4928 /* Communicate cgcm and state */
4929 dd_sendrecv_rvec(dd, d, dir,
4930 comm->cgcm_state[cdd], nvs,
4931 comm->vbuf.v+nvr, i);
4932 ncg_recv += rbuf[0];
4933 nat_recv += rbuf[1];
4937 /* Process the received charge groups */
4939 for (cg = 0; cg < ncg_recv; cg++)
4941 flag = comm->buf_int[cg*DD_CGIBS+1];
4943 if (dim >= npbcdim && dd->nc[dim] > 2)
4945 /* No pbc in this dim and more than one domain boundary.
4946 * We do a separate check if a charge group didn't move too far.
4948 if (((flag & DD_FLAG_FW(d)) &&
4949 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4950 ((flag & DD_FLAG_BW(d)) &&
4951 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4953 cg_move_error(fplog, dd, step, cg, dim,
4954 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4956 comm->vbuf.v[buf_pos],
4957 comm->vbuf.v[buf_pos],
4958 comm->vbuf.v[buf_pos][dim]);
4965 /* Check which direction this cg should go */
4966 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4970 /* The cell boundaries for dimension d2 are not equal
4971 * for each cell row of the lower dimension(s),
4972 * therefore we might need to redetermine where
4973 * this cg should go.
4976 /* If this cg crosses the box boundary in dimension d2
4977 * we can use the communicated flag, so we do not
4978 * have to worry about pbc.
4980 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4981 (flag & DD_FLAG_FW(d2))) ||
4982 (dd->ci[dim2] == 0 &&
4983 (flag & DD_FLAG_BW(d2)))))
4985 /* Clear the two flags for this dimension */
4986 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4987 /* Determine the location of this cg
4988 * in lattice coordinates
4990 pos_d = comm->vbuf.v[buf_pos][dim2];
4993 for (d3 = dim2+1; d3 < DIM; d3++)
4996 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
4999 /* Check of we are not at the box edge.
5000 * pbc is only handled in the first step above,
5001 * but this check could move over pbc while
5002 * the first step did not due to different rounding.
5004 if (pos_d >= cell_x1[dim2] &&
5005 dd->ci[dim2] != dd->nc[dim2]-1)
5007 flag |= DD_FLAG_FW(d2);
5009 else if (pos_d < cell_x0[dim2] &&
5012 flag |= DD_FLAG_BW(d2);
5014 comm->buf_int[cg*DD_CGIBS+1] = flag;
5017 /* Set to which neighboring cell this cg should go */
5018 if (flag & DD_FLAG_FW(d2))
5022 else if (flag & DD_FLAG_BW(d2))
5024 if (dd->nc[dd->dim[d2]] > 2)
5036 nrcg = flag & DD_FLAG_NRCG;
5039 if (home_pos_cg+1 > dd->cg_nalloc)
5041 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
5042 srenew(dd->index_gl, dd->cg_nalloc);
5043 srenew(dd->cgindex, dd->cg_nalloc+1);
5045 /* Set the global charge group index and size */
5046 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
5047 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
5048 /* Copy the state from the buffer */
5049 dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
5050 if (fr->cutoff_scheme == ecutsGROUP)
5053 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
5057 /* Set the cginfo */
5058 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
5059 dd->index_gl[home_pos_cg]);
5062 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
5065 if (home_pos_at+nrcg > state->nalloc)
5067 dd_realloc_state(state, f, home_pos_at+nrcg);
5069 for (i = 0; i < nrcg; i++)
5071 copy_rvec(comm->vbuf.v[buf_pos++],
5072 state->x[home_pos_at+i]);
5076 for (i = 0; i < nrcg; i++)
5078 copy_rvec(comm->vbuf.v[buf_pos++],
5079 state->v[home_pos_at+i]);
5084 for (i = 0; i < nrcg; i++)
5086 copy_rvec(comm->vbuf.v[buf_pos++],
5087 state->sd_X[home_pos_at+i]);
5092 for (i = 0; i < nrcg; i++)
5094 copy_rvec(comm->vbuf.v[buf_pos++],
5095 state->cg_p[home_pos_at+i]);
5099 home_pos_at += nrcg;
5103 /* Reallocate the buffers if necessary */
5104 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
5106 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
5107 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
5109 nvr = ncg[mc] + nat[mc]*nvec;
5110 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
5112 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
5113 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
5115 /* Copy from the receive to the send buffers */
5116 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
5117 comm->buf_int + cg*DD_CGIBS,
5118 DD_CGIBS*sizeof(int));
5119 memcpy(comm->cgcm_state[mc][nvr],
5120 comm->vbuf.v[buf_pos],
5121 (1+nrcg*nvec)*sizeof(rvec));
5122 buf_pos += 1 + nrcg*nvec;
5129 /* With sorting (!bCompact) the indices are now only partially up to date
5130 * and ncg_home and nat_home are not the real count, since there are
5131 * "holes" in the arrays for the charge groups that moved to neighbors.
5133 if (fr->cutoff_scheme == ecutsVERLET)
5135 moved = get_moved(comm, home_pos_cg);
5137 for (i = dd->ncg_home; i < home_pos_cg; i++)
5142 dd->ncg_home = home_pos_cg;
5143 dd->nat_home = home_pos_at;
5148 "Finished repartitioning: cgs moved out %d, new home %d\n",
5149 *ncg_moved, dd->ncg_home-*ncg_moved);
5154 void dd_cycles_add(gmx_domdec_t *dd, float cycles, int ddCycl)
5156 dd->comm->cycl[ddCycl] += cycles;
5157 dd->comm->cycl_n[ddCycl]++;
5158 if (cycles > dd->comm->cycl_max[ddCycl])
5160 dd->comm->cycl_max[ddCycl] = cycles;
5164 static double force_flop_count(t_nrnb *nrnb)
5171 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
5173 /* To get closer to the real timings, we half the count
5174 * for the normal loops and again half it for water loops.
5177 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5179 sum += nrnb->n[i]*0.25*cost_nrnb(i);
5183 sum += nrnb->n[i]*0.50*cost_nrnb(i);
5186 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
5189 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5191 sum += nrnb->n[i]*cost_nrnb(i);
5194 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
5196 sum += nrnb->n[i]*cost_nrnb(i);
5202 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
5204 if (dd->comm->eFlop)
5206 dd->comm->flop -= force_flop_count(nrnb);
5209 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
5211 if (dd->comm->eFlop)
5213 dd->comm->flop += force_flop_count(nrnb);
5218 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
5222 for (i = 0; i < ddCyclNr; i++)
5224 dd->comm->cycl[i] = 0;
5225 dd->comm->cycl_n[i] = 0;
5226 dd->comm->cycl_max[i] = 0;
5229 dd->comm->flop_n = 0;
5232 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
5234 gmx_domdec_comm_t *comm;
5235 gmx_domdec_load_t *load;
5236 gmx_domdec_root_t *root = NULL;
5237 int d, dim, cid, i, pos;
5238 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
5243 fprintf(debug, "get_load_distribution start\n");
5246 wallcycle_start(wcycle, ewcDDCOMMLOAD);
5250 bSepPME = (dd->pme_nodeid >= 0);
5252 for (d = dd->ndim-1; d >= 0; d--)
5255 /* Check if we participate in the communication in this dimension */
5256 if (d == dd->ndim-1 ||
5257 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
5259 load = &comm->load[d];
5262 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
5265 if (d == dd->ndim-1)
5267 sbuf[pos++] = dd_force_load(comm);
5268 sbuf[pos++] = sbuf[0];
5271 sbuf[pos++] = sbuf[0];
5272 sbuf[pos++] = cell_frac;
5275 sbuf[pos++] = comm->cell_f_max0[d];
5276 sbuf[pos++] = comm->cell_f_min1[d];
5281 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
5282 sbuf[pos++] = comm->cycl[ddCyclPME];
5287 sbuf[pos++] = comm->load[d+1].sum;
5288 sbuf[pos++] = comm->load[d+1].max;
5291 sbuf[pos++] = comm->load[d+1].sum_m;
5292 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
5293 sbuf[pos++] = comm->load[d+1].flags;
5296 sbuf[pos++] = comm->cell_f_max0[d];
5297 sbuf[pos++] = comm->cell_f_min1[d];
5302 sbuf[pos++] = comm->load[d+1].mdf;
5303 sbuf[pos++] = comm->load[d+1].pme;
5307 /* Communicate a row in DD direction d.
5308 * The communicators are setup such that the root always has rank 0.
5311 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
5312 load->load, load->nload*sizeof(float), MPI_BYTE,
5313 0, comm->mpi_comm_load[d]);
5315 if (dd->ci[dim] == dd->master_ci[dim])
5317 /* We are the root, process this row */
5318 if (comm->bDynLoadBal)
5320 root = comm->root[d];
5330 for (i = 0; i < dd->nc[dim]; i++)
5332 load->sum += load->load[pos++];
5333 load->max = max(load->max, load->load[pos]);
5339 /* This direction could not be load balanced properly,
5340 * therefore we need to use the maximum iso the average load.
5342 load->sum_m = max(load->sum_m, load->load[pos]);
5346 load->sum_m += load->load[pos];
5349 load->cvol_min = min(load->cvol_min, load->load[pos]);
5353 load->flags = (int)(load->load[pos++] + 0.5);
5357 root->cell_f_max0[i] = load->load[pos++];
5358 root->cell_f_min1[i] = load->load[pos++];
5363 load->mdf = max(load->mdf, load->load[pos]);
5365 load->pme = max(load->pme, load->load[pos]);
5369 if (comm->bDynLoadBal && root->bLimited)
5371 load->sum_m *= dd->nc[dim];
5372 load->flags |= (1<<d);
5380 comm->nload += dd_load_count(comm);
5381 comm->load_step += comm->cycl[ddCyclStep];
5382 comm->load_sum += comm->load[0].sum;
5383 comm->load_max += comm->load[0].max;
5384 if (comm->bDynLoadBal)
5386 for (d = 0; d < dd->ndim; d++)
5388 if (comm->load[0].flags & (1<<d))
5390 comm->load_lim[d]++;
5396 comm->load_mdf += comm->load[0].mdf;
5397 comm->load_pme += comm->load[0].pme;
5401 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5405 fprintf(debug, "get_load_distribution finished\n");
5409 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5411 /* Return the relative performance loss on the total run time
5412 * due to the force calculation load imbalance.
5414 if (dd->comm->nload > 0)
5417 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5418 (dd->comm->load_step*dd->nnodes);
5426 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5429 int npp, npme, nnodes, d, limp;
5430 float imbal, pme_f_ratio, lossf, lossp = 0;
5432 gmx_domdec_comm_t *comm;
5435 if (DDMASTER(dd) && comm->nload > 0)
5438 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5439 nnodes = npp + npme;
5440 imbal = comm->load_max*npp/comm->load_sum - 1;
5441 lossf = dd_force_imb_perf_loss(dd);
5442 sprintf(buf, " Average load imbalance: %.1f %%\n", imbal*100);
5443 fprintf(fplog, "%s", buf);
5444 fprintf(stderr, "\n");
5445 fprintf(stderr, "%s", buf);
5446 sprintf(buf, " Part of the total run time spent waiting due to load imbalance: %.1f %%\n", lossf*100);
5447 fprintf(fplog, "%s", buf);
5448 fprintf(stderr, "%s", buf);
5450 if (comm->bDynLoadBal)
5452 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5453 for (d = 0; d < dd->ndim; d++)
5455 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5456 sprintf(buf+strlen(buf), " %c %d %%", dim2char(dd->dim[d]), limp);
5462 sprintf(buf+strlen(buf), "\n");
5463 fprintf(fplog, "%s", buf);
5464 fprintf(stderr, "%s", buf);
5468 pme_f_ratio = comm->load_pme/comm->load_mdf;
5469 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5472 lossp *= (float)npme/(float)nnodes;
5476 lossp *= (float)npp/(float)nnodes;
5478 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pme_f_ratio);
5479 fprintf(fplog, "%s", buf);
5480 fprintf(stderr, "%s", buf);
5481 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossp)*100);
5482 fprintf(fplog, "%s", buf);
5483 fprintf(stderr, "%s", buf);
5485 fprintf(fplog, "\n");
5486 fprintf(stderr, "\n");
5488 if (lossf >= DD_PERF_LOSS_WARN)
5491 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5492 " in the domain decomposition.\n", lossf*100);
5493 if (!comm->bDynLoadBal)
5495 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5499 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5501 fprintf(fplog, "%s\n", buf);
5502 fprintf(stderr, "%s\n", buf);
5504 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS_WARN)
5507 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5508 " had %s work to do than the PP nodes.\n"
5509 " You might want to %s the number of PME nodes\n"
5510 " or %s the cut-off and the grid spacing.\n",
5512 (lossp < 0) ? "less" : "more",
5513 (lossp < 0) ? "decrease" : "increase",
5514 (lossp < 0) ? "decrease" : "increase");
5515 fprintf(fplog, "%s\n", buf);
5516 fprintf(stderr, "%s\n", buf);
5521 static float dd_vol_min(gmx_domdec_t *dd)
5523 return dd->comm->load[0].cvol_min*dd->nnodes;
5526 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5528 return dd->comm->load[0].flags;
5531 static float dd_f_imbal(gmx_domdec_t *dd)
5533 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5536 float dd_pme_f_ratio(gmx_domdec_t *dd)
5538 if (dd->comm->cycl_n[ddCyclPME] > 0)
5540 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5548 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
5553 flags = dd_load_flags(dd);
5557 "DD load balancing is limited by minimum cell size in dimension");
5558 for (d = 0; d < dd->ndim; d++)
5562 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5565 fprintf(fplog, "\n");
5567 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5568 if (dd->comm->bDynLoadBal)
5570 fprintf(fplog, " vol min/aver %5.3f%c",
5571 dd_vol_min(dd), flags ? '!' : ' ');
5573 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5574 if (dd->comm->cycl_n[ddCyclPME])
5576 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5578 fprintf(fplog, "\n\n");
5581 static void dd_print_load_verbose(gmx_domdec_t *dd)
5583 if (dd->comm->bDynLoadBal)
5585 fprintf(stderr, "vol %4.2f%c ",
5586 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5588 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5589 if (dd->comm->cycl_n[ddCyclPME])
5591 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5596 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5601 gmx_domdec_root_t *root;
5602 gmx_bool bPartOfGroup = FALSE;
5604 dim = dd->dim[dim_ind];
5605 copy_ivec(loc, loc_c);
5606 for (i = 0; i < dd->nc[dim]; i++)
5609 rank = dd_index(dd->nc, loc_c);
5610 if (rank == dd->rank)
5612 /* This process is part of the group */
5613 bPartOfGroup = TRUE;
5616 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5620 dd->comm->mpi_comm_load[dim_ind] = c_row;
5621 if (dd->comm->eDLB != edlbNO)
5623 if (dd->ci[dim] == dd->master_ci[dim])
5625 /* This is the root process of this row */
5626 snew(dd->comm->root[dim_ind], 1);
5627 root = dd->comm->root[dim_ind];
5628 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5629 snew(root->old_cell_f, dd->nc[dim]+1);
5630 snew(root->bCellMin, dd->nc[dim]);
5633 snew(root->cell_f_max0, dd->nc[dim]);
5634 snew(root->cell_f_min1, dd->nc[dim]);
5635 snew(root->bound_min, dd->nc[dim]);
5636 snew(root->bound_max, dd->nc[dim]);
5638 snew(root->buf_ncd, dd->nc[dim]);
5642 /* This is not a root process, we only need to receive cell_f */
5643 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5646 if (dd->ci[dim] == dd->master_ci[dim])
5648 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5654 void dd_setup_dlb_resource_sharing(t_commrec gmx_unused *cr,
5655 const gmx_hw_info_t gmx_unused *hwinfo,
5656 const gmx_hw_opt_t gmx_unused *hw_opt)
5659 int physicalnode_id_hash;
5662 MPI_Comm mpi_comm_pp_physicalnode;
5664 if (!(cr->duty & DUTY_PP) ||
5665 hw_opt->gpu_opt.ncuda_dev_use == 0)
5667 /* Only PP nodes (currently) use GPUs.
5668 * If we don't have GPUs, there are no resources to share.
5673 physicalnode_id_hash = gmx_physicalnode_id_hash();
5675 gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
5681 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5682 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5683 dd->rank, physicalnode_id_hash, gpu_id);
5685 /* Split the PP communicator over the physical nodes */
5686 /* TODO: See if we should store this (before), as it's also used for
5687 * for the nodecomm summution.
5689 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5690 &mpi_comm_pp_physicalnode);
5691 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5692 &dd->comm->mpi_comm_gpu_shared);
5693 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5694 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5698 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5701 /* Note that some ranks could share a GPU, while others don't */
5703 if (dd->comm->nrank_gpu_shared == 1)
5705 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5710 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
5713 int dim0, dim1, i, j;
5718 fprintf(debug, "Making load communicators\n");
5721 snew(dd->comm->load, dd->ndim);
5722 snew(dd->comm->mpi_comm_load, dd->ndim);
5725 make_load_communicator(dd, 0, loc);
5729 for (i = 0; i < dd->nc[dim0]; i++)
5732 make_load_communicator(dd, 1, loc);
5738 for (i = 0; i < dd->nc[dim0]; i++)
5742 for (j = 0; j < dd->nc[dim1]; j++)
5745 make_load_communicator(dd, 2, loc);
5752 fprintf(debug, "Finished making load communicators\n");
5757 void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
5760 int d, dim, i, j, m;
5763 ivec dd_zp[DD_MAXIZONE];
5764 gmx_domdec_zones_t *zones;
5765 gmx_domdec_ns_ranges_t *izone;
5767 for (d = 0; d < dd->ndim; d++)
5770 copy_ivec(dd->ci, tmp);
5771 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5772 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5773 copy_ivec(dd->ci, tmp);
5774 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5775 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5778 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5781 dd->neighbor[d][1]);
5787 fprintf(fplog, "\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5789 dd->nc[XX], dd->nc[YY], dd->nc[ZZ],
5790 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5797 for (i = 0; i < nzonep; i++)
5799 copy_ivec(dd_zp3[i], dd_zp[i]);
5805 for (i = 0; i < nzonep; i++)
5807 copy_ivec(dd_zp2[i], dd_zp[i]);
5813 for (i = 0; i < nzonep; i++)
5815 copy_ivec(dd_zp1[i], dd_zp[i]);
5819 gmx_fatal(FARGS, "Can only do 1, 2 or 3D domain decomposition");
5824 zones = &dd->comm->zones;
5826 for (i = 0; i < nzone; i++)
5829 clear_ivec(zones->shift[i]);
5830 for (d = 0; d < dd->ndim; d++)
5832 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5837 for (i = 0; i < nzone; i++)
5839 for (d = 0; d < DIM; d++)
5841 s[d] = dd->ci[d] - zones->shift[i][d];
5846 else if (s[d] >= dd->nc[d])
5852 zones->nizone = nzonep;
5853 for (i = 0; i < zones->nizone; i++)
5855 if (dd_zp[i][0] != i)
5857 gmx_fatal(FARGS, "Internal inconsistency in the dd grid setup");
5859 izone = &zones->izone[i];
5860 izone->j0 = dd_zp[i][1];
5861 izone->j1 = dd_zp[i][2];
5862 for (dim = 0; dim < DIM; dim++)
5864 if (dd->nc[dim] == 1)
5866 /* All shifts should be allowed */
5867 izone->shift0[dim] = -1;
5868 izone->shift1[dim] = 1;
5873 izone->shift0[d] = 0;
5874 izone->shift1[d] = 0;
5875 for(j=izone->j0; j<izone->j1; j++) {
5876 if (dd->shift[j][d] > dd->shift[i][d])
5877 izone->shift0[d] = -1;
5878 if (dd->shift[j][d] < dd->shift[i][d])
5879 izone->shift1[d] = 1;
5885 /* Assume the shift are not more than 1 cell */
5886 izone->shift0[dim] = 1;
5887 izone->shift1[dim] = -1;
5888 for (j = izone->j0; j < izone->j1; j++)
5890 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5891 if (shift_diff < izone->shift0[dim])
5893 izone->shift0[dim] = shift_diff;
5895 if (shift_diff > izone->shift1[dim])
5897 izone->shift1[dim] = shift_diff;
5904 if (dd->comm->eDLB != edlbNO)
5906 snew(dd->comm->root, dd->ndim);
5909 if (dd->comm->bRecordLoad)
5911 make_load_communicators(dd);
5915 static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reorder)
5918 gmx_domdec_comm_t *comm;
5929 if (comm->bCartesianPP)
5931 /* Set up cartesian communication for the particle-particle part */
5934 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5935 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5938 for (i = 0; i < DIM; i++)
5942 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5944 /* We overwrite the old communicator with the new cartesian one */
5945 cr->mpi_comm_mygroup = comm_cart;
5948 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5949 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5951 if (comm->bCartesianPP_PME)
5953 /* Since we want to use the original cartesian setup for sim,
5954 * and not the one after split, we need to make an index.
5956 snew(comm->ddindex2ddnodeid, dd->nnodes);
5957 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
5958 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5959 /* Get the rank of the DD master,
5960 * above we made sure that the master node is a PP node.
5970 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
5972 else if (comm->bCartesianPP)
5974 if (cr->npmenodes == 0)
5976 /* The PP communicator is also
5977 * the communicator for this simulation
5979 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5981 cr->nodeid = dd->rank;
5983 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
5985 /* We need to make an index to go from the coordinates
5986 * to the nodeid of this simulation.
5988 snew(comm->ddindex2simnodeid, dd->nnodes);
5989 snew(buf, dd->nnodes);
5990 if (cr->duty & DUTY_PP)
5992 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5994 /* Communicate the ddindex to simulation nodeid index */
5995 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
5996 cr->mpi_comm_mysim);
5999 /* Determine the master coordinates and rank.
6000 * The DD master should be the same node as the master of this sim.
6002 for (i = 0; i < dd->nnodes; i++)
6004 if (comm->ddindex2simnodeid[i] == 0)
6006 ddindex2xyz(dd->nc, i, dd->master_ci);
6007 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
6012 fprintf(debug, "The master rank is %d\n", dd->masterrank);
6017 /* No Cartesian communicators */
6018 /* We use the rank in dd->comm->all as DD index */
6019 ddindex2xyz(dd->nc, dd->rank, dd->ci);
6020 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
6022 clear_ivec(dd->master_ci);
6029 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6030 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6035 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6036 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6040 static void receive_ddindex2simnodeid(t_commrec *cr)
6044 gmx_domdec_comm_t *comm;
6051 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
6053 snew(comm->ddindex2simnodeid, dd->nnodes);
6054 snew(buf, dd->nnodes);
6055 if (cr->duty & DUTY_PP)
6057 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
6060 /* Communicate the ddindex to simulation nodeid index */
6061 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6062 cr->mpi_comm_mysim);
6069 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
6070 int ncg, int natoms)
6072 gmx_domdec_master_t *ma;
6077 snew(ma->ncg, dd->nnodes);
6078 snew(ma->index, dd->nnodes+1);
6080 snew(ma->nat, dd->nnodes);
6081 snew(ma->ibuf, dd->nnodes*2);
6082 snew(ma->cell_x, DIM);
6083 for (i = 0; i < DIM; i++)
6085 snew(ma->cell_x[i], dd->nc[i]+1);
6088 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
6094 snew(ma->vbuf, natoms);
6100 static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_node_order,
6101 int gmx_unused reorder)
6104 gmx_domdec_comm_t *comm;
6115 if (comm->bCartesianPP)
6117 for (i = 1; i < DIM; i++)
6119 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
6121 if (bDiv[YY] || bDiv[ZZ])
6123 comm->bCartesianPP_PME = TRUE;
6124 /* If we have 2D PME decomposition, which is always in x+y,
6125 * we stack the PME only nodes in z.
6126 * Otherwise we choose the direction that provides the thinnest slab
6127 * of PME only nodes as this will have the least effect
6128 * on the PP communication.
6129 * But for the PME communication the opposite might be better.
6131 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
6133 dd->nc[YY] > dd->nc[ZZ]))
6135 comm->cartpmedim = ZZ;
6139 comm->cartpmedim = YY;
6141 comm->ntot[comm->cartpmedim]
6142 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
6146 fprintf(fplog, "#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
6148 "Will not use a Cartesian communicator for PP <-> PME\n\n");
6153 if (comm->bCartesianPP_PME)
6157 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
6160 for (i = 0; i < DIM; i++)
6164 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
6167 MPI_Comm_rank(comm_cart, &rank);
6168 if (MASTERNODE(cr) && rank != 0)
6170 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
6173 /* With this assigment we loose the link to the original communicator
6174 * which will usually be MPI_COMM_WORLD, unless have multisim.
6176 cr->mpi_comm_mysim = comm_cart;
6177 cr->sim_nodeid = rank;
6179 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
6183 fprintf(fplog, "Cartesian nodeid %d, coordinates %d %d %d\n\n",
6184 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6187 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
6191 if (cr->npmenodes == 0 ||
6192 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
6194 cr->duty = DUTY_PME;
6197 /* Split the sim communicator into PP and PME only nodes */
6198 MPI_Comm_split(cr->mpi_comm_mysim,
6200 dd_index(comm->ntot, dd->ci),
6201 &cr->mpi_comm_mygroup);
6205 switch (dd_node_order)
6210 fprintf(fplog, "Order of the nodes: PP first, PME last\n");
6213 case ddnoINTERLEAVE:
6214 /* Interleave the PP-only and PME-only nodes,
6215 * as on clusters with dual-core machines this will double
6216 * the communication bandwidth of the PME processes
6217 * and thus speed up the PP <-> PME and inter PME communication.
6221 fprintf(fplog, "Interleaving PP and PME nodes\n");
6223 comm->pmenodes = dd_pmenodes(cr);
6228 gmx_fatal(FARGS, "Unknown dd_node_order=%d", dd_node_order);
6231 if (dd_simnode2pmenode(cr, cr->sim_nodeid) == -1)
6233 cr->duty = DUTY_PME;
6240 /* Split the sim communicator into PP and PME only nodes */
6241 MPI_Comm_split(cr->mpi_comm_mysim,
6244 &cr->mpi_comm_mygroup);
6245 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
6251 fprintf(fplog, "This is a %s only node\n\n",
6252 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
6256 void make_dd_communicators(FILE *fplog, t_commrec *cr, int dd_node_order)
6259 gmx_domdec_comm_t *comm;
6265 copy_ivec(dd->nc, comm->ntot);
6267 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
6268 comm->bCartesianPP_PME = FALSE;
6270 /* Reorder the nodes by default. This might change the MPI ranks.
6271 * Real reordering is only supported on very few architectures,
6272 * Blue Gene is one of them.
6274 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
6276 if (cr->npmenodes > 0)
6278 /* Split the communicator into a PP and PME part */
6279 split_communicator(fplog, cr, dd_node_order, CartReorder);
6280 if (comm->bCartesianPP_PME)
6282 /* We (possibly) reordered the nodes in split_communicator,
6283 * so it is no longer required in make_pp_communicator.
6285 CartReorder = FALSE;
6290 /* All nodes do PP and PME */
6292 /* We do not require separate communicators */
6293 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
6297 if (cr->duty & DUTY_PP)
6299 /* Copy or make a new PP communicator */
6300 make_pp_communicator(fplog, cr, CartReorder);
6304 receive_ddindex2simnodeid(cr);
6307 if (!(cr->duty & DUTY_PME))
6309 /* Set up the commnuication to our PME node */
6310 dd->pme_nodeid = dd_simnode2pmenode(cr, cr->sim_nodeid);
6311 dd->pme_receive_vir_ener = receive_vir_ener(cr);
6314 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
6315 dd->pme_nodeid, dd->pme_receive_vir_ener);
6320 dd->pme_nodeid = -1;
6325 dd->ma = init_gmx_domdec_master_t(dd,
6327 comm->cgs_gl.index[comm->cgs_gl.nr]);
6331 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6333 real *slb_frac, tot;
6338 if (nc > 1 && size_string != NULL)
6342 fprintf(fplog, "Using static load balancing for the %s direction\n",
6347 for (i = 0; i < nc; i++)
6350 sscanf(size_string, "%lf%n", &dbl, &n);
6353 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6362 fprintf(fplog, "Relative cell sizes:");
6364 for (i = 0; i < nc; i++)
6369 fprintf(fplog, " %5.3f", slb_frac[i]);
6374 fprintf(fplog, "\n");
6381 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
6384 gmx_mtop_ilistloop_t iloop;
6388 iloop = gmx_mtop_ilistloop_init(mtop);
6389 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6391 for (ftype = 0; ftype < F_NRE; ftype++)
6393 if ((interaction_function[ftype].flags & IF_BOND) &&
6396 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6404 static int dd_getenv(FILE *fplog, const char *env_var, int def)
6410 val = getenv(env_var);
6413 if (sscanf(val, "%d", &nst) <= 0)
6419 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6427 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6431 fprintf(stderr, "\n%s\n", warn_string);
6435 fprintf(fplog, "\n%s\n", warn_string);
6439 static void check_dd_restrictions(t_commrec *cr, gmx_domdec_t *dd,
6440 t_inputrec *ir, FILE *fplog)
6442 if (ir->ePBC == epbcSCREW &&
6443 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6445 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6448 if (ir->ns_type == ensSIMPLE)
6450 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
6453 if (ir->nstlist == 0)
6455 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6458 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6460 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6464 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6469 r = ddbox->box_size[XX];
6470 for (di = 0; di < dd->ndim; di++)
6473 /* Check using the initial average cell size */
6474 r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6480 static int check_dlb_support(FILE *fplog, t_commrec *cr,
6481 const char *dlb_opt, gmx_bool bRecordLoad,
6482 unsigned long Flags, t_inputrec *ir)
6490 case 'a': eDLB = edlbAUTO; break;
6491 case 'n': eDLB = edlbNO; break;
6492 case 'y': eDLB = edlbYES; break;
6493 default: gmx_incons("Unknown dlb_opt");
6496 if (Flags & MD_RERUN)
6501 if (!EI_DYNAMICS(ir->eI))
6503 if (eDLB == edlbYES)
6505 sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
6506 dd_warning(cr, fplog, buf);
6514 dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6519 if (Flags & MD_REPRODUCIBLE)
6526 dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
6530 dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6533 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", eDLB);
6541 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6546 if (getenv("GMX_DD_ORDER_ZYX") != NULL)
6548 /* Decomposition order z,y,x */
6551 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6553 for (dim = DIM-1; dim >= 0; dim--)
6555 if (dd->nc[dim] > 1)
6557 dd->dim[dd->ndim++] = dim;
6563 /* Decomposition order x,y,z */
6564 for (dim = 0; dim < DIM; dim++)
6566 if (dd->nc[dim] > 1)
6568 dd->dim[dd->ndim++] = dim;
6574 static gmx_domdec_comm_t *init_dd_comm()
6576 gmx_domdec_comm_t *comm;
6580 snew(comm->cggl_flag, DIM*2);
6581 snew(comm->cgcm_state, DIM*2);
6582 for (i = 0; i < DIM*2; i++)
6584 comm->cggl_flag_nalloc[i] = 0;
6585 comm->cgcm_state_nalloc[i] = 0;
6588 comm->nalloc_int = 0;
6589 comm->buf_int = NULL;
6591 vec_rvec_init(&comm->vbuf);
6593 comm->n_load_have = 0;
6594 comm->n_load_collect = 0;
6596 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6598 comm->sum_nat[i] = 0;
6602 comm->load_step = 0;
6605 clear_ivec(comm->load_lim);
6612 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
6613 unsigned long Flags,
6615 real comm_distance_min, real rconstr,
6616 const char *dlb_opt, real dlb_scale,
6617 const char *sizex, const char *sizey, const char *sizez,
6618 gmx_mtop_t *mtop, t_inputrec *ir,
6619 matrix box, rvec *x,
6621 int *npme_x, int *npme_y)
6624 gmx_domdec_comm_t *comm;
6627 real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
6634 "\nInitializing Domain Decomposition on %d nodes\n", cr->nnodes);
6639 dd->comm = init_dd_comm();
6641 snew(comm->cggl_flag, DIM*2);
6642 snew(comm->cgcm_state, DIM*2);
6644 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6645 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6647 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
6648 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
6649 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
6650 recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
6651 comm->nstSortCG = dd_getenv(fplog, "GMX_DD_NST_SORT_CHARGE_GROUPS", 1);
6652 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
6653 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
6654 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
6656 dd->pme_recv_f_alloc = 0;
6657 dd->pme_recv_f_buf = NULL;
6659 if (dd->bSendRecv2 && fplog)
6661 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6667 fprintf(fplog, "Will load balance based on FLOP count\n");
6669 if (comm->eFlop > 1)
6671 srand(1+cr->nodeid);
6673 comm->bRecordLoad = TRUE;
6677 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6681 /* Initialize to GPU share count to 0, might change later */
6682 comm->nrank_gpu_shared = 0;
6684 comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
6686 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6689 fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
6691 dd->bGridJump = comm->bDynLoadBal;
6692 comm->bPMELoadBalDLBLimits = FALSE;
6694 if (comm->nstSortCG)
6698 if (comm->nstSortCG == 1)
6700 fprintf(fplog, "Will sort the charge groups at every domain (re)decomposition\n");
6704 fprintf(fplog, "Will sort the charge groups every %d steps\n",
6708 snew(comm->sort, 1);
6714 fprintf(fplog, "Will not sort the charge groups\n");
6718 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6720 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6721 if (comm->bInterCGBondeds)
6723 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6727 comm->bInterCGMultiBody = FALSE;
6730 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6731 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6733 if (ir->rlistlong == 0)
6735 /* Set the cut-off to some very large value,
6736 * so we don't need if statements everywhere in the code.
6737 * We use sqrt, since the cut-off is squared in some places.
6739 comm->cutoff = GMX_CUTOFF_INF;
6743 comm->cutoff = ir->rlistlong;
6745 comm->cutoff_mbody = 0;
6747 comm->cellsize_limit = 0;
6748 comm->bBondComm = FALSE;
6750 if (comm->bInterCGBondeds)
6752 if (comm_distance_min > 0)
6754 comm->cutoff_mbody = comm_distance_min;
6755 if (Flags & MD_DDBONDCOMM)
6757 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6761 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6763 r_bonded_limit = comm->cutoff_mbody;
6765 else if (ir->bPeriodicMols)
6767 /* Can not easily determine the required cut-off */
6768 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6769 comm->cutoff_mbody = comm->cutoff/2;
6770 r_bonded_limit = comm->cutoff_mbody;
6776 dd_bonded_cg_distance(fplog, mtop, ir, x, box,
6777 Flags & MD_DDBONDCHECK, &r_2b, &r_mb);
6779 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6780 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6782 /* We use an initial margin of 10% for the minimum cell size,
6783 * except when we are just below the non-bonded cut-off.
6785 if (Flags & MD_DDBONDCOMM)
6787 if (max(r_2b, r_mb) > comm->cutoff)
6789 r_bonded = max(r_2b, r_mb);
6790 r_bonded_limit = 1.1*r_bonded;
6791 comm->bBondComm = TRUE;
6796 r_bonded_limit = min(1.1*r_bonded, comm->cutoff);
6798 /* We determine cutoff_mbody later */
6802 /* No special bonded communication,
6803 * simply increase the DD cut-off.
6805 r_bonded_limit = 1.1*max(r_2b, r_mb);
6806 comm->cutoff_mbody = r_bonded_limit;
6807 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6810 comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit);
6814 "Minimum cell size due to bonded interactions: %.3f nm\n",
6815 comm->cellsize_limit);
6819 if (dd->bInterCGcons && rconstr <= 0)
6821 /* There is a cell size limit due to the constraints (P-LINCS) */
6822 rconstr = constr_r_max(fplog, mtop, ir);
6826 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6828 if (rconstr > comm->cellsize_limit)
6830 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6834 else if (rconstr > 0 && fplog)
6836 /* Here we do not check for dd->bInterCGcons,
6837 * because one can also set a cell size limit for virtual sites only
6838 * and at this point we don't know yet if there are intercg v-sites.
6841 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6844 comm->cellsize_limit = max(comm->cellsize_limit, rconstr);
6846 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6850 copy_ivec(nc, dd->nc);
6851 set_dd_dim(fplog, dd);
6852 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, x, ddbox);
6854 if (cr->npmenodes == -1)
6858 acs = average_cellsize_min(dd, ddbox);
6859 if (acs < comm->cellsize_limit)
6863 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6865 gmx_fatal_collective(FARGS, cr, NULL,
6866 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6867 acs, comm->cellsize_limit);
6872 set_ddbox_cr(cr, NULL, ir, box, &comm->cgs_gl, x, ddbox);
6874 /* We need to choose the optimal DD grid and possibly PME nodes */
6875 limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6876 comm->eDLB != edlbNO, dlb_scale,
6877 comm->cellsize_limit, comm->cutoff,
6878 comm->bInterCGBondeds);
6880 if (dd->nc[XX] == 0)
6882 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6883 sprintf(buf, "Change the number of nodes or mdrun option %s%s%s",
6884 !bC ? "-rdd" : "-rcon",
6885 comm->eDLB != edlbNO ? " or -dds" : "",
6886 bC ? " or your LINCS settings" : "");
6888 gmx_fatal_collective(FARGS, cr, NULL,
6889 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6891 "Look in the log file for details on the domain decomposition",
6892 cr->nnodes-cr->npmenodes, limit, buf);
6894 set_dd_dim(fplog, dd);
6900 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6901 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6904 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6905 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6907 gmx_fatal_collective(FARGS, cr, NULL,
6908 "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6909 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6911 if (cr->npmenodes > dd->nnodes)
6913 gmx_fatal_collective(FARGS, cr, NULL,
6914 "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6916 if (cr->npmenodes > 0)
6918 comm->npmenodes = cr->npmenodes;
6922 comm->npmenodes = dd->nnodes;
6925 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
6927 /* The following choices should match those
6928 * in comm_cost_est in domdec_setup.c.
6929 * Note that here the checks have to take into account
6930 * that the decomposition might occur in a different order than xyz
6931 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6932 * in which case they will not match those in comm_cost_est,
6933 * but since that is mainly for testing purposes that's fine.
6935 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6936 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6937 getenv("GMX_PMEONEDD") == NULL)
6939 comm->npmedecompdim = 2;
6940 comm->npmenodes_x = dd->nc[XX];
6941 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6945 /* In case nc is 1 in both x and y we could still choose to
6946 * decompose pme in y instead of x, but we use x for simplicity.
6948 comm->npmedecompdim = 1;
6949 if (dd->dim[0] == YY)
6951 comm->npmenodes_x = 1;
6952 comm->npmenodes_y = comm->npmenodes;
6956 comm->npmenodes_x = comm->npmenodes;
6957 comm->npmenodes_y = 1;
6962 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6963 comm->npmenodes_x, comm->npmenodes_y, 1);
6968 comm->npmedecompdim = 0;
6969 comm->npmenodes_x = 0;
6970 comm->npmenodes_y = 0;
6973 /* Technically we don't need both of these,
6974 * but it simplifies code not having to recalculate it.
6976 *npme_x = comm->npmenodes_x;
6977 *npme_y = comm->npmenodes_y;
6979 snew(comm->slb_frac, DIM);
6980 if (comm->eDLB == edlbNO)
6982 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
6983 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
6984 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], sizez);
6987 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6989 if (comm->bBondComm || comm->eDLB != edlbNO)
6991 /* Set the bonded communication distance to halfway
6992 * the minimum and the maximum,
6993 * since the extra communication cost is nearly zero.
6995 acs = average_cellsize_min(dd, ddbox);
6996 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6997 if (comm->eDLB != edlbNO)
6999 /* Check if this does not limit the scaling */
7000 comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs);
7002 if (!comm->bBondComm)
7004 /* Without bBondComm do not go beyond the n.b. cut-off */
7005 comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff);
7006 if (comm->cellsize_limit >= comm->cutoff)
7008 /* We don't loose a lot of efficieny
7009 * when increasing it to the n.b. cut-off.
7010 * It can even be slightly faster, because we need
7011 * less checks for the communication setup.
7013 comm->cutoff_mbody = comm->cutoff;
7016 /* Check if we did not end up below our original limit */
7017 comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit);
7019 if (comm->cutoff_mbody > comm->cellsize_limit)
7021 comm->cellsize_limit = comm->cutoff_mbody;
7024 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
7029 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
7030 "cellsize limit %f\n",
7031 comm->bBondComm, comm->cellsize_limit);
7036 check_dd_restrictions(cr, dd, ir, fplog);
7039 comm->partition_step = INT_MIN;
7042 clear_dd_cycle_counts(dd);
7047 static void set_dlb_limits(gmx_domdec_t *dd)
7052 for (d = 0; d < dd->ndim; d++)
7054 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
7055 dd->comm->cellsize_min[dd->dim[d]] =
7056 dd->comm->cellsize_min_dlb[dd->dim[d]];
7061 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
7064 gmx_domdec_comm_t *comm;
7074 fprintf(fplog, "At step %s the performance loss due to force load imbalance is %.1f %%\n", gmx_step_str(step, buf), dd_force_imb_perf_loss(dd)*100);
7077 cellsize_min = comm->cellsize_min[dd->dim[0]];
7078 for (d = 1; d < dd->ndim; d++)
7080 cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
7083 if (cellsize_min < comm->cellsize_limit*1.05)
7085 dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
7087 /* Change DLB from "auto" to "no". */
7088 comm->eDLB = edlbNO;
7093 dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
7094 comm->bDynLoadBal = TRUE;
7095 dd->bGridJump = TRUE;
7099 /* We can set the required cell size info here,
7100 * so we do not need to communicate this.
7101 * The grid is completely uniform.
7103 for (d = 0; d < dd->ndim; d++)
7107 comm->load[d].sum_m = comm->load[d].sum;
7109 nc = dd->nc[dd->dim[d]];
7110 for (i = 0; i < nc; i++)
7112 comm->root[d]->cell_f[i] = i/(real)nc;
7115 comm->root[d]->cell_f_max0[i] = i /(real)nc;
7116 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
7119 comm->root[d]->cell_f[nc] = 1.0;
7124 static char *init_bLocalCG(gmx_mtop_t *mtop)
7129 ncg = ncg_mtop(mtop);
7130 snew(bLocalCG, ncg);
7131 for (cg = 0; cg < ncg; cg++)
7133 bLocalCG[cg] = FALSE;
7139 void dd_init_bondeds(FILE *fplog,
7140 gmx_domdec_t *dd, gmx_mtop_t *mtop,
7142 t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
7144 gmx_domdec_comm_t *comm;
7148 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
7152 if (comm->bBondComm)
7154 /* Communicate atoms beyond the cut-off for bonded interactions */
7157 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
7159 comm->bLocalCG = init_bLocalCG(mtop);
7163 /* Only communicate atoms based on cut-off */
7164 comm->cglink = NULL;
7165 comm->bLocalCG = NULL;
7169 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
7171 gmx_bool bDynLoadBal, real dlb_scale,
7174 gmx_domdec_comm_t *comm;
7189 fprintf(fplog, "The maximum number of communication pulses is:");
7190 for (d = 0; d < dd->ndim; d++)
7192 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
7194 fprintf(fplog, "\n");
7195 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
7196 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
7197 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
7198 for (d = 0; d < DIM; d++)
7202 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
7209 comm->cellsize_min_dlb[d]/
7210 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
7212 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
7215 fprintf(fplog, "\n");
7219 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
7220 fprintf(fplog, "The initial number of communication pulses is:");
7221 for (d = 0; d < dd->ndim; d++)
7223 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
7225 fprintf(fplog, "\n");
7226 fprintf(fplog, "The initial domain decomposition cell size is:");
7227 for (d = 0; d < DIM; d++)
7231 fprintf(fplog, " %c %.2f nm",
7232 dim2char(d), dd->comm->cellsize_min[d]);
7235 fprintf(fplog, "\n\n");
7238 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
7240 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
7241 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7242 "non-bonded interactions", "", comm->cutoff);
7246 limit = dd->comm->cellsize_limit;
7250 if (dynamic_dd_box(ddbox, ir))
7252 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
7254 limit = dd->comm->cellsize_min[XX];
7255 for (d = 1; d < DIM; d++)
7257 limit = min(limit, dd->comm->cellsize_min[d]);
7261 if (comm->bInterCGBondeds)
7263 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7264 "two-body bonded interactions", "(-rdd)",
7265 max(comm->cutoff, comm->cutoff_mbody));
7266 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7267 "multi-body bonded interactions", "(-rdd)",
7268 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit));
7272 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7273 "virtual site constructions", "(-rcon)", limit);
7275 if (dd->constraint_comm)
7277 sprintf(buf, "atoms separated by up to %d constraints",
7279 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7280 buf, "(-rcon)", limit);
7282 fprintf(fplog, "\n");
7288 static void set_cell_limits_dlb(gmx_domdec_t *dd,
7290 const t_inputrec *ir,
7291 const gmx_ddbox_t *ddbox)
7293 gmx_domdec_comm_t *comm;
7294 int d, dim, npulse, npulse_d_max, npulse_d;
7299 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7301 /* Determine the maximum number of comm. pulses in one dimension */
7303 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7305 /* Determine the maximum required number of grid pulses */
7306 if (comm->cellsize_limit >= comm->cutoff)
7308 /* Only a single pulse is required */
7311 else if (!bNoCutOff && comm->cellsize_limit > 0)
7313 /* We round down slightly here to avoid overhead due to the latency
7314 * of extra communication calls when the cut-off
7315 * would be only slightly longer than the cell size.
7316 * Later cellsize_limit is redetermined,
7317 * so we can not miss interactions due to this rounding.
7319 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7323 /* There is no cell size limit */
7324 npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7327 if (!bNoCutOff && npulse > 1)
7329 /* See if we can do with less pulses, based on dlb_scale */
7331 for (d = 0; d < dd->ndim; d++)
7334 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7335 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7336 npulse_d_max = max(npulse_d_max, npulse_d);
7338 npulse = min(npulse, npulse_d_max);
7341 /* This env var can override npulse */
7342 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
7349 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7350 for (d = 0; d < dd->ndim; d++)
7352 comm->cd[d].np_dlb = min(npulse, dd->nc[dd->dim[d]]-1);
7353 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7354 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7355 comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb);
7356 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7358 comm->bVacDLBNoLimit = FALSE;
7362 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7363 if (!comm->bVacDLBNoLimit)
7365 comm->cellsize_limit = max(comm->cellsize_limit,
7366 comm->cutoff/comm->maxpulse);
7368 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7369 /* Set the minimum cell size for each DD dimension */
7370 for (d = 0; d < dd->ndim; d++)
7372 if (comm->bVacDLBNoLimit ||
7373 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7375 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7379 comm->cellsize_min_dlb[dd->dim[d]] =
7380 comm->cutoff/comm->cd[d].np_dlb;
7383 if (comm->cutoff_mbody <= 0)
7385 comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit);
7387 if (comm->bDynLoadBal)
7393 gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd, int ePBC)
7395 /* If each molecule is a single charge group
7396 * or we use domain decomposition for each periodic dimension,
7397 * we do not need to take pbc into account for the bonded interactions.
7399 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7402 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7405 void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7406 t_inputrec *ir, gmx_ddbox_t *ddbox)
7408 gmx_domdec_comm_t *comm;
7414 /* Initialize the thread data.
7415 * This can not be done in init_domain_decomposition,
7416 * as the numbers of threads is determined later.
7418 comm->nth = gmx_omp_nthreads_get(emntDomdec);
7421 snew(comm->dth, comm->nth);
7424 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
7426 init_ddpme(dd, &comm->ddpme[0], 0);
7427 if (comm->npmedecompdim >= 2)
7429 init_ddpme(dd, &comm->ddpme[1], 1);
7434 comm->npmenodes = 0;
7435 if (dd->pme_nodeid >= 0)
7437 gmx_fatal_collective(FARGS, NULL, dd,
7438 "Can not have separate PME nodes without PME electrostatics");
7444 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7446 if (comm->eDLB != edlbNO)
7448 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7451 print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
7452 if (comm->eDLB == edlbAUTO)
7456 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7458 print_dd_settings(fplog, dd, ir, TRUE, dlb_scale, ddbox);
7461 if (ir->ePBC == epbcNONE)
7463 vol_frac = 1 - 1/(double)dd->nnodes;
7468 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7472 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7474 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7476 dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
7479 static gmx_bool test_dd_cutoff(t_commrec *cr,
7480 t_state *state, t_inputrec *ir,
7491 set_ddbox(dd, FALSE, cr, ir, state->box,
7492 TRUE, &dd->comm->cgs_gl, state->x, &ddbox);
7496 for (d = 0; d < dd->ndim; d++)
7500 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7501 if (dynamic_dd_box(&ddbox, ir))
7503 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7506 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7508 if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
7509 dd->comm->cd[d].np_dlb > 0)
7511 if (np > dd->comm->cd[d].np_dlb)
7516 /* If a current local cell size is smaller than the requested
7517 * cut-off, we could still fix it, but this gets very complicated.
7518 * Without fixing here, we might actually need more checks.
7520 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7527 if (dd->comm->eDLB != edlbNO)
7529 /* If DLB is not active yet, we don't need to check the grid jumps.
7530 * Actually we shouldn't, because then the grid jump data is not set.
7532 if (dd->comm->bDynLoadBal &&
7533 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7538 gmx_sumi(1, &LocallyLimited, cr);
7540 if (LocallyLimited > 0)
7549 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, t_inputrec *ir,
7552 gmx_bool bCutoffAllowed;
7554 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7558 cr->dd->comm->cutoff = cutoff_req;
7561 return bCutoffAllowed;
7564 void change_dd_dlb_cutoff_limit(t_commrec *cr)
7566 gmx_domdec_comm_t *comm;
7568 comm = cr->dd->comm;
7570 /* Turn on the DLB limiting (might have been on already) */
7571 comm->bPMELoadBalDLBLimits = TRUE;
7573 /* Change the cut-off limit */
7574 comm->PMELoadBal_max_cutoff = comm->cutoff;
7577 static void merge_cg_buffers(int ncell,
7578 gmx_domdec_comm_dim_t *cd, int pulse,
7580 int *index_gl, int *recv_i,
7581 rvec *cg_cm, rvec *recv_vr,
7583 cginfo_mb_t *cginfo_mb, int *cginfo)
7585 gmx_domdec_ind_t *ind, *ind_p;
7586 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7587 int shift, shift_at;
7589 ind = &cd->ind[pulse];
7591 /* First correct the already stored data */
7592 shift = ind->nrecv[ncell];
7593 for (cell = ncell-1; cell >= 0; cell--)
7595 shift -= ind->nrecv[cell];
7598 /* Move the cg's present from previous grid pulses */
7599 cg0 = ncg_cell[ncell+cell];
7600 cg1 = ncg_cell[ncell+cell+1];
7601 cgindex[cg1+shift] = cgindex[cg1];
7602 for (cg = cg1-1; cg >= cg0; cg--)
7604 index_gl[cg+shift] = index_gl[cg];
7605 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7606 cgindex[cg+shift] = cgindex[cg];
7607 cginfo[cg+shift] = cginfo[cg];
7609 /* Correct the already stored send indices for the shift */
7610 for (p = 1; p <= pulse; p++)
7612 ind_p = &cd->ind[p];
7614 for (c = 0; c < cell; c++)
7616 cg0 += ind_p->nsend[c];
7618 cg1 = cg0 + ind_p->nsend[cell];
7619 for (cg = cg0; cg < cg1; cg++)
7621 ind_p->index[cg] += shift;
7627 /* Merge in the communicated buffers */
7631 for (cell = 0; cell < ncell; cell++)
7633 cg1 = ncg_cell[ncell+cell+1] + shift;
7636 /* Correct the old cg indices */
7637 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7639 cgindex[cg+1] += shift_at;
7642 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7644 /* Copy this charge group from the buffer */
7645 index_gl[cg1] = recv_i[cg0];
7646 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7647 /* Add it to the cgindex */
7648 cg_gl = index_gl[cg1];
7649 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7650 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7651 cgindex[cg1+1] = cgindex[cg1] + nat;
7656 shift += ind->nrecv[cell];
7657 ncg_cell[ncell+cell+1] = cg1;
7661 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7662 int nzone, int cg0, const int *cgindex)
7666 /* Store the atom block boundaries for easy copying of communication buffers
7669 for (zone = 0; zone < nzone; zone++)
7671 for (p = 0; p < cd->np; p++)
7673 cd->ind[p].cell2at0[zone] = cgindex[cg];
7674 cg += cd->ind[p].nrecv[zone];
7675 cd->ind[p].cell2at1[zone] = cgindex[cg];
7680 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7686 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7688 if (!bLocalCG[link->a[i]])
7697 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7699 real c[DIM][4]; /* the corners for the non-bonded communication */
7700 real cr0; /* corner for rounding */
7701 real cr1[4]; /* corners for rounding */
7702 real bc[DIM]; /* corners for bounded communication */
7703 real bcr1; /* corner for rounding for bonded communication */
7706 /* Determine the corners of the domain(s) we are communicating with */
7708 set_dd_corners(const gmx_domdec_t *dd,
7709 int dim0, int dim1, int dim2,
7713 const gmx_domdec_comm_t *comm;
7714 const gmx_domdec_zones_t *zones;
7719 zones = &comm->zones;
7721 /* Keep the compiler happy */
7725 /* The first dimension is equal for all cells */
7726 c->c[0][0] = comm->cell_x0[dim0];
7729 c->bc[0] = c->c[0][0];
7734 /* This cell row is only seen from the first row */
7735 c->c[1][0] = comm->cell_x0[dim1];
7736 /* All rows can see this row */
7737 c->c[1][1] = comm->cell_x0[dim1];
7740 c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7743 /* For the multi-body distance we need the maximum */
7744 c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7747 /* Set the upper-right corner for rounding */
7748 c->cr0 = comm->cell_x1[dim0];
7753 for (j = 0; j < 4; j++)
7755 c->c[2][j] = comm->cell_x0[dim2];
7759 /* Use the maximum of the i-cells that see a j-cell */
7760 for (i = 0; i < zones->nizone; i++)
7762 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7768 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7774 /* For the multi-body distance we need the maximum */
7775 c->bc[2] = comm->cell_x0[dim2];
7776 for (i = 0; i < 2; i++)
7778 for (j = 0; j < 2; j++)
7780 c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0);
7786 /* Set the upper-right corner for rounding */
7787 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7788 * Only cell (0,0,0) can see cell 7 (1,1,1)
7790 c->cr1[0] = comm->cell_x1[dim1];
7791 c->cr1[3] = comm->cell_x1[dim1];
7794 c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7797 /* For the multi-body distance we need the maximum */
7798 c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7805 /* Determine which cg's we need to send in this pulse from this zone */
7807 get_zone_pulse_cgs(gmx_domdec_t *dd,
7808 int zonei, int zone,
7810 const int *index_gl,
7812 int dim, int dim_ind,
7813 int dim0, int dim1, int dim2,
7814 real r_comm2, real r_bcomm2,
7818 real skew_fac2_d, real skew_fac_01,
7819 rvec *v_d, rvec *v_0, rvec *v_1,
7820 const dd_corners_t *c,
7822 gmx_bool bDistBonded,
7828 gmx_domdec_ind_t *ind,
7829 int **ibuf, int *ibuf_nalloc,
7835 gmx_domdec_comm_t *comm;
7837 gmx_bool bDistMB_pulse;
7839 real r2, rb2, r, tric_sh;
7842 int nsend_z, nsend, nat;
7846 bScrew = (dd->bScrewPBC && dim == XX);
7848 bDistMB_pulse = (bDistMB && bDistBonded);
7854 for (cg = cg0; cg < cg1; cg++)
7858 if (tric_dist[dim_ind] == 0)
7860 /* Rectangular direction, easy */
7861 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7868 r = cg_cm[cg][dim] - c->bc[dim_ind];
7874 /* Rounding gives at most a 16% reduction
7875 * in communicated atoms
7877 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7879 r = cg_cm[cg][dim0] - c->cr0;
7880 /* This is the first dimension, so always r >= 0 */
7887 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7889 r = cg_cm[cg][dim1] - c->cr1[zone];
7896 r = cg_cm[cg][dim1] - c->bcr1;
7906 /* Triclinic direction, more complicated */
7909 /* Rounding, conservative as the skew_fac multiplication
7910 * will slightly underestimate the distance.
7912 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7914 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7915 for (i = dim0+1; i < DIM; i++)
7917 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7919 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7922 rb[dim0] = rn[dim0];
7925 /* Take care that the cell planes along dim0 might not
7926 * be orthogonal to those along dim1 and dim2.
7928 for (i = 1; i <= dim_ind; i++)
7931 if (normal[dim0][dimd] > 0)
7933 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7936 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7941 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7943 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7945 for (i = dim1+1; i < DIM; i++)
7947 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7949 rn[dim1] += tric_sh;
7952 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7953 /* Take care of coupling of the distances
7954 * to the planes along dim0 and dim1 through dim2.
7956 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7957 /* Take care that the cell planes along dim1
7958 * might not be orthogonal to that along dim2.
7960 if (normal[dim1][dim2] > 0)
7962 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7968 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7971 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7972 /* Take care of coupling of the distances
7973 * to the planes along dim0 and dim1 through dim2.
7975 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7976 /* Take care that the cell planes along dim1
7977 * might not be orthogonal to that along dim2.
7979 if (normal[dim1][dim2] > 0)
7981 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7986 /* The distance along the communication direction */
7987 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
7989 for (i = dim+1; i < DIM; i++)
7991 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7996 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7997 /* Take care of coupling of the distances
7998 * to the planes along dim0 and dim1 through dim2.
8000 if (dim_ind == 1 && zonei == 1)
8002 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
8008 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
8011 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
8012 /* Take care of coupling of the distances
8013 * to the planes along dim0 and dim1 through dim2.
8015 if (dim_ind == 1 && zonei == 1)
8017 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
8025 ((bDistMB && rb2 < r_bcomm2) ||
8026 (bDist2B && r2 < r_bcomm2)) &&
8028 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
8029 missing_link(comm->cglink, index_gl[cg],
8032 /* Make an index to the local charge groups */
8033 if (nsend+1 > ind->nalloc)
8035 ind->nalloc = over_alloc_large(nsend+1);
8036 srenew(ind->index, ind->nalloc);
8038 if (nsend+1 > *ibuf_nalloc)
8040 *ibuf_nalloc = over_alloc_large(nsend+1);
8041 srenew(*ibuf, *ibuf_nalloc);
8043 ind->index[nsend] = cg;
8044 (*ibuf)[nsend] = index_gl[cg];
8046 vec_rvec_check_alloc(vbuf, nsend+1);
8048 if (dd->ci[dim] == 0)
8050 /* Correct cg_cm for pbc */
8051 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
8054 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
8055 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
8060 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
8063 nat += cgindex[cg+1] - cgindex[cg];
8069 *nsend_z_ptr = nsend_z;
8072 static void setup_dd_communication(gmx_domdec_t *dd,
8073 matrix box, gmx_ddbox_t *ddbox,
8074 t_forcerec *fr, t_state *state, rvec **f)
8076 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
8077 int nzone, nzone_send, zone, zonei, cg0, cg1;
8078 int c, i, j, cg, cg_gl, nrcg;
8079 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
8080 gmx_domdec_comm_t *comm;
8081 gmx_domdec_zones_t *zones;
8082 gmx_domdec_comm_dim_t *cd;
8083 gmx_domdec_ind_t *ind;
8084 cginfo_mb_t *cginfo_mb;
8085 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
8086 real r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
8087 dd_corners_t corners;
8089 rvec *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
8090 real skew_fac2_d, skew_fac_01;
8097 fprintf(debug, "Setting up DD communication\n");
8102 switch (fr->cutoff_scheme)
8111 gmx_incons("unimplemented");
8115 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8117 dim = dd->dim[dim_ind];
8119 /* Check if we need to use triclinic distances */
8120 tric_dist[dim_ind] = 0;
8121 for (i = 0; i <= dim_ind; i++)
8123 if (ddbox->tric_dir[dd->dim[i]])
8125 tric_dist[dim_ind] = 1;
8130 bBondComm = comm->bBondComm;
8132 /* Do we need to determine extra distances for multi-body bondeds? */
8133 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8135 /* Do we need to determine extra distances for only two-body bondeds? */
8136 bDist2B = (bBondComm && !bDistMB);
8138 r_comm2 = sqr(comm->cutoff);
8139 r_bcomm2 = sqr(comm->cutoff_mbody);
8143 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, sqrt(r_bcomm2));
8146 zones = &comm->zones;
8149 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8150 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8152 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8154 /* Triclinic stuff */
8155 normal = ddbox->normal;
8159 v_0 = ddbox->v[dim0];
8160 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8162 /* Determine the coupling coefficient for the distances
8163 * to the cell planes along dim0 and dim1 through dim2.
8164 * This is required for correct rounding.
8167 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8170 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8176 v_1 = ddbox->v[dim1];
8179 zone_cg_range = zones->cg_range;
8180 index_gl = dd->index_gl;
8181 cgindex = dd->cgindex;
8182 cginfo_mb = fr->cginfo_mb;
8184 zone_cg_range[0] = 0;
8185 zone_cg_range[1] = dd->ncg_home;
8186 comm->zone_ncg1[0] = dd->ncg_home;
8187 pos_cg = dd->ncg_home;
8189 nat_tot = dd->nat_home;
8191 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8193 dim = dd->dim[dim_ind];
8194 cd = &comm->cd[dim_ind];
8196 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8198 /* No pbc in this dimension, the first node should not comm. */
8206 v_d = ddbox->v[dim];
8207 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
8209 cd->bInPlace = TRUE;
8210 for (p = 0; p < cd->np; p++)
8212 /* Only atoms communicated in the first pulse are used
8213 * for multi-body bonded interactions or for bBondComm.
8215 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8220 for (zone = 0; zone < nzone_send; zone++)
8222 if (tric_dist[dim_ind] && dim_ind > 0)
8224 /* Determine slightly more optimized skew_fac's
8226 * This reduces the number of communicated atoms
8227 * by about 10% for 3D DD of rhombic dodecahedra.
8229 for (dimd = 0; dimd < dim; dimd++)
8231 sf2_round[dimd] = 1;
8232 if (ddbox->tric_dir[dimd])
8234 for (i = dd->dim[dimd]+1; i < DIM; i++)
8236 /* If we are shifted in dimension i
8237 * and the cell plane is tilted forward
8238 * in dimension i, skip this coupling.
8240 if (!(zones->shift[nzone+zone][i] &&
8241 ddbox->v[dimd][i][dimd] >= 0))
8244 sqr(ddbox->v[dimd][i][dimd]);
8247 sf2_round[dimd] = 1/sf2_round[dimd];
8252 zonei = zone_perm[dim_ind][zone];
8255 /* Here we permutate the zones to obtain a convenient order
8256 * for neighbor searching
8258 cg0 = zone_cg_range[zonei];
8259 cg1 = zone_cg_range[zonei+1];
8263 /* Look only at the cg's received in the previous grid pulse
8265 cg1 = zone_cg_range[nzone+zone+1];
8266 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8269 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8270 for (th = 0; th < comm->nth; th++)
8272 gmx_domdec_ind_t *ind_p;
8273 int **ibuf_p, *ibuf_nalloc_p;
8275 int *nsend_p, *nat_p;
8281 /* Thread 0 writes in the comm buffers */
8283 ibuf_p = &comm->buf_int;
8284 ibuf_nalloc_p = &comm->nalloc_int;
8285 vbuf_p = &comm->vbuf;
8288 nsend_zone_p = &ind->nsend[zone];
8292 /* Other threads write into temp buffers */
8293 ind_p = &comm->dth[th].ind;
8294 ibuf_p = &comm->dth[th].ibuf;
8295 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8296 vbuf_p = &comm->dth[th].vbuf;
8297 nsend_p = &comm->dth[th].nsend;
8298 nat_p = &comm->dth[th].nat;
8299 nsend_zone_p = &comm->dth[th].nsend_zone;
8301 comm->dth[th].nsend = 0;
8302 comm->dth[th].nat = 0;
8303 comm->dth[th].nsend_zone = 0;
8313 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8314 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8317 /* Get the cg's for this pulse in this zone */
8318 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8320 dim, dim_ind, dim0, dim1, dim2,
8323 normal, skew_fac2_d, skew_fac_01,
8324 v_d, v_0, v_1, &corners, sf2_round,
8325 bDistBonded, bBondComm,
8329 ibuf_p, ibuf_nalloc_p,
8335 /* Append data of threads>=1 to the communication buffers */
8336 for (th = 1; th < comm->nth; th++)
8338 dd_comm_setup_work_t *dth;
8341 dth = &comm->dth[th];
8343 ns1 = nsend + dth->nsend_zone;
8344 if (ns1 > ind->nalloc)
8346 ind->nalloc = over_alloc_dd(ns1);
8347 srenew(ind->index, ind->nalloc);
8349 if (ns1 > comm->nalloc_int)
8351 comm->nalloc_int = over_alloc_dd(ns1);
8352 srenew(comm->buf_int, comm->nalloc_int);
8354 if (ns1 > comm->vbuf.nalloc)
8356 comm->vbuf.nalloc = over_alloc_dd(ns1);
8357 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8360 for (i = 0; i < dth->nsend_zone; i++)
8362 ind->index[nsend] = dth->ind.index[i];
8363 comm->buf_int[nsend] = dth->ibuf[i];
8364 copy_rvec(dth->vbuf.v[i],
8365 comm->vbuf.v[nsend]);
8369 ind->nsend[zone] += dth->nsend_zone;
8372 /* Clear the counts in case we do not have pbc */
8373 for (zone = nzone_send; zone < nzone; zone++)
8375 ind->nsend[zone] = 0;
8377 ind->nsend[nzone] = nsend;
8378 ind->nsend[nzone+1] = nat;
8379 /* Communicate the number of cg's and atoms to receive */
8380 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8381 ind->nsend, nzone+2,
8382 ind->nrecv, nzone+2);
8384 /* The rvec buffer is also required for atom buffers of size nsend
8385 * in dd_move_x and dd_move_f.
8387 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8391 /* We can receive in place if only the last zone is not empty */
8392 for (zone = 0; zone < nzone-1; zone++)
8394 if (ind->nrecv[zone] > 0)
8396 cd->bInPlace = FALSE;
8401 /* The int buffer is only required here for the cg indices */
8402 if (ind->nrecv[nzone] > comm->nalloc_int2)
8404 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8405 srenew(comm->buf_int2, comm->nalloc_int2);
8407 /* The rvec buffer is also required for atom buffers
8408 * of size nrecv in dd_move_x and dd_move_f.
8410 i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8411 vec_rvec_check_alloc(&comm->vbuf2, i);
8415 /* Make space for the global cg indices */
8416 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8417 || dd->cg_nalloc == 0)
8419 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8420 srenew(index_gl, dd->cg_nalloc);
8421 srenew(cgindex, dd->cg_nalloc+1);
8423 /* Communicate the global cg indices */
8426 recv_i = index_gl + pos_cg;
8430 recv_i = comm->buf_int2;
8432 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8433 comm->buf_int, nsend,
8434 recv_i, ind->nrecv[nzone]);
8436 /* Make space for cg_cm */
8437 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8438 if (fr->cutoff_scheme == ecutsGROUP)
8446 /* Communicate cg_cm */
8449 recv_vr = cg_cm + pos_cg;
8453 recv_vr = comm->vbuf2.v;
8455 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8456 comm->vbuf.v, nsend,
8457 recv_vr, ind->nrecv[nzone]);
8459 /* Make the charge group index */
8462 zone = (p == 0 ? 0 : nzone - 1);
8463 while (zone < nzone)
8465 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8467 cg_gl = index_gl[pos_cg];
8468 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8469 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8470 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8473 /* Update the charge group presence,
8474 * so we can use it in the next pass of the loop.
8476 comm->bLocalCG[cg_gl] = TRUE;
8482 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8485 zone_cg_range[nzone+zone] = pos_cg;
8490 /* This part of the code is never executed with bBondComm. */
8491 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8492 index_gl, recv_i, cg_cm, recv_vr,
8493 cgindex, fr->cginfo_mb, fr->cginfo);
8494 pos_cg += ind->nrecv[nzone];
8496 nat_tot += ind->nrecv[nzone+1];
8500 /* Store the atom block for easy copying of communication buffers */
8501 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8505 dd->index_gl = index_gl;
8506 dd->cgindex = cgindex;
8508 dd->ncg_tot = zone_cg_range[zones->n];
8509 dd->nat_tot = nat_tot;
8510 comm->nat[ddnatHOME] = dd->nat_home;
8511 for (i = ddnatZONE; i < ddnatNR; i++)
8513 comm->nat[i] = dd->nat_tot;
8518 /* We don't need to update cginfo, since that was alrady done above.
8519 * So we pass NULL for the forcerec.
8521 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8522 NULL, comm->bLocalCG);
8527 fprintf(debug, "Finished setting up DD communication, zones:");
8528 for (c = 0; c < zones->n; c++)
8530 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8532 fprintf(debug, "\n");
8536 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8540 for (c = 0; c < zones->nizone; c++)
8542 zones->izone[c].cg1 = zones->cg_range[c+1];
8543 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8544 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8548 static void set_zones_size(gmx_domdec_t *dd,
8549 matrix box, const gmx_ddbox_t *ddbox,
8550 int zone_start, int zone_end)
8552 gmx_domdec_comm_t *comm;
8553 gmx_domdec_zones_t *zones;
8555 int z, zi, zj0, zj1, d, dim;
8558 real size_j, add_tric;
8563 zones = &comm->zones;
8565 /* Do we need to determine extra distances for multi-body bondeds? */
8566 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8568 for (z = zone_start; z < zone_end; z++)
8570 /* Copy cell limits to zone limits.
8571 * Valid for non-DD dims and non-shifted dims.
8573 copy_rvec(comm->cell_x0, zones->size[z].x0);
8574 copy_rvec(comm->cell_x1, zones->size[z].x1);
8577 for (d = 0; d < dd->ndim; d++)
8581 for (z = 0; z < zones->n; z++)
8583 /* With a staggered grid we have different sizes
8584 * for non-shifted dimensions.
8586 if (dd->bGridJump && zones->shift[z][dim] == 0)
8590 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8591 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8595 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8596 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8602 rcmbs = comm->cutoff_mbody;
8603 if (ddbox->tric_dir[dim])
8605 rcs /= ddbox->skew_fac[dim];
8606 rcmbs /= ddbox->skew_fac[dim];
8609 /* Set the lower limit for the shifted zone dimensions */
8610 for (z = zone_start; z < zone_end; z++)
8612 if (zones->shift[z][dim] > 0)
8615 if (!dd->bGridJump || d == 0)
8617 zones->size[z].x0[dim] = comm->cell_x1[dim];
8618 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8622 /* Here we take the lower limit of the zone from
8623 * the lowest domain of the zone below.
8627 zones->size[z].x0[dim] =
8628 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8634 zones->size[z].x0[dim] =
8635 zones->size[zone_perm[2][z-4]].x0[dim];
8639 zones->size[z].x0[dim] =
8640 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8643 /* A temporary limit, is updated below */
8644 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8648 for (zi = 0; zi < zones->nizone; zi++)
8650 if (zones->shift[zi][dim] == 0)
8652 /* This takes the whole zone into account.
8653 * With multiple pulses this will lead
8654 * to a larger zone then strictly necessary.
8656 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8657 zones->size[zi].x1[dim]+rcmbs);
8665 /* Loop over the i-zones to set the upper limit of each
8668 for (zi = 0; zi < zones->nizone; zi++)
8670 if (zones->shift[zi][dim] == 0)
8672 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8674 if (zones->shift[z][dim] > 0)
8676 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8677 zones->size[zi].x1[dim]+rcs);
8684 for (z = zone_start; z < zone_end; z++)
8686 /* Initialization only required to keep the compiler happy */
8687 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8690 /* To determine the bounding box for a zone we need to find
8691 * the extreme corners of 4, 2 or 1 corners.
8693 nc = 1 << (ddbox->npbcdim - 1);
8695 for (c = 0; c < nc; c++)
8697 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8701 corner[YY] = zones->size[z].x0[YY];
8705 corner[YY] = zones->size[z].x1[YY];
8709 corner[ZZ] = zones->size[z].x0[ZZ];
8713 corner[ZZ] = zones->size[z].x1[ZZ];
8715 if (dd->ndim == 1 && box[ZZ][YY] != 0)
8717 /* With 1D domain decomposition the cg's are not in
8718 * the triclinic box, but triclinic x-y and rectangular y-z.
8719 * Shift y back, so it will later end up at 0.
8721 corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
8723 /* Apply the triclinic couplings */
8724 assert(ddbox->npbcdim <= DIM);
8725 for (i = YY; i < ddbox->npbcdim; i++)
8727 for (j = XX; j < i; j++)
8729 corner[j] += corner[i]*box[i][j]/box[i][i];
8734 copy_rvec(corner, corner_min);
8735 copy_rvec(corner, corner_max);
8739 for (i = 0; i < DIM; i++)
8741 corner_min[i] = min(corner_min[i], corner[i]);
8742 corner_max[i] = max(corner_max[i], corner[i]);
8746 /* Copy the extreme cornes without offset along x */
8747 for (i = 0; i < DIM; i++)
8749 zones->size[z].bb_x0[i] = corner_min[i];
8750 zones->size[z].bb_x1[i] = corner_max[i];
8752 /* Add the offset along x */
8753 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8754 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8757 if (zone_start == 0)
8760 for (dim = 0; dim < DIM; dim++)
8762 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8764 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
8769 for (z = zone_start; z < zone_end; z++)
8771 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8773 zones->size[z].x0[XX], zones->size[z].x1[XX],
8774 zones->size[z].x0[YY], zones->size[z].x1[YY],
8775 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8776 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8778 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8779 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8780 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8785 static int comp_cgsort(const void *a, const void *b)
8789 gmx_cgsort_t *cga, *cgb;
8790 cga = (gmx_cgsort_t *)a;
8791 cgb = (gmx_cgsort_t *)b;
8793 comp = cga->nsc - cgb->nsc;
8796 comp = cga->ind_gl - cgb->ind_gl;
8802 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8807 /* Order the data */
8808 for (i = 0; i < n; i++)
8810 buf[i] = a[sort[i].ind];
8813 /* Copy back to the original array */
8814 for (i = 0; i < n; i++)
8820 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8825 /* Order the data */
8826 for (i = 0; i < n; i++)
8828 copy_rvec(v[sort[i].ind], buf[i]);
8831 /* Copy back to the original array */
8832 for (i = 0; i < n; i++)
8834 copy_rvec(buf[i], v[i]);
8838 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8841 int a, atot, cg, cg0, cg1, i;
8843 if (cgindex == NULL)
8845 /* Avoid the useless loop of the atoms within a cg */
8846 order_vec_cg(ncg, sort, v, buf);
8851 /* Order the data */
8853 for (cg = 0; cg < ncg; cg++)
8855 cg0 = cgindex[sort[cg].ind];
8856 cg1 = cgindex[sort[cg].ind+1];
8857 for (i = cg0; i < cg1; i++)
8859 copy_rvec(v[i], buf[a]);
8865 /* Copy back to the original array */
8866 for (a = 0; a < atot; a++)
8868 copy_rvec(buf[a], v[a]);
8872 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8873 int nsort_new, gmx_cgsort_t *sort_new,
8874 gmx_cgsort_t *sort1)
8878 /* The new indices are not very ordered, so we qsort them */
8879 gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8881 /* sort2 is already ordered, so now we can merge the two arrays */
8885 while (i2 < nsort2 || i_new < nsort_new)
8889 sort1[i1++] = sort_new[i_new++];
8891 else if (i_new == nsort_new)
8893 sort1[i1++] = sort2[i2++];
8895 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8896 (sort2[i2].nsc == sort_new[i_new].nsc &&
8897 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8899 sort1[i1++] = sort2[i2++];
8903 sort1[i1++] = sort_new[i_new++];
8908 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8910 gmx_domdec_sort_t *sort;
8911 gmx_cgsort_t *cgsort, *sort_i;
8912 int ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
8913 int sort_last, sort_skip;
8915 sort = dd->comm->sort;
8917 a = fr->ns.grid->cell_index;
8919 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns.grid->ncells;
8921 if (ncg_home_old >= 0)
8923 /* The charge groups that remained in the same ns grid cell
8924 * are completely ordered. So we can sort efficiently by sorting
8925 * the charge groups that did move into the stationary list.
8930 for (i = 0; i < dd->ncg_home; i++)
8932 /* Check if this cg did not move to another node */
8935 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8937 /* This cg is new on this node or moved ns grid cell */
8938 if (nsort_new >= sort->sort_new_nalloc)
8940 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8941 srenew(sort->sort_new, sort->sort_new_nalloc);
8943 sort_i = &(sort->sort_new[nsort_new++]);
8947 /* This cg did not move */
8948 sort_i = &(sort->sort2[nsort2++]);
8950 /* Sort on the ns grid cell indices
8951 * and the global topology index.
8952 * index_gl is irrelevant with cell ns,
8953 * but we set it here anyhow to avoid a conditional.
8956 sort_i->ind_gl = dd->index_gl[i];
8963 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8966 /* Sort efficiently */
8967 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8972 cgsort = sort->sort;
8974 for (i = 0; i < dd->ncg_home; i++)
8976 /* Sort on the ns grid cell indices
8977 * and the global topology index
8979 cgsort[i].nsc = a[i];
8980 cgsort[i].ind_gl = dd->index_gl[i];
8982 if (cgsort[i].nsc < moved)
8989 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
8991 /* Determine the order of the charge groups using qsort */
8992 gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
8998 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
9001 int ncg_new, i, *a, na;
9003 sort = dd->comm->sort->sort;
9005 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
9008 for (i = 0; i < na; i++)
9012 sort[ncg_new].ind = a[i];
9020 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
9023 gmx_domdec_sort_t *sort;
9024 gmx_cgsort_t *cgsort, *sort_i;
9026 int ncg_new, i, *ibuf, cgsize;
9029 sort = dd->comm->sort;
9031 if (dd->ncg_home > sort->sort_nalloc)
9033 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
9034 srenew(sort->sort, sort->sort_nalloc);
9035 srenew(sort->sort2, sort->sort_nalloc);
9037 cgsort = sort->sort;
9039 switch (fr->cutoff_scheme)
9042 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
9045 ncg_new = dd_sort_order_nbnxn(dd, fr);
9048 gmx_incons("unimplemented");
9052 /* We alloc with the old size, since cgindex is still old */
9053 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9054 vbuf = dd->comm->vbuf.v;
9058 cgindex = dd->cgindex;
9065 /* Remove the charge groups which are no longer at home here */
9066 dd->ncg_home = ncg_new;
9069 fprintf(debug, "Set the new home charge group count to %d\n",
9073 /* Reorder the state */
9074 for (i = 0; i < estNR; i++)
9076 if (EST_DISTR(i) && (state->flags & (1<<i)))
9081 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
9084 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
9087 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->sd_X, vbuf);
9090 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
9094 case estDISRE_INITF:
9095 case estDISRE_RM3TAV:
9096 case estORIRE_INITF:
9098 /* No ordering required */
9101 gmx_incons("Unknown state entry encountered in dd_sort_state");
9106 if (fr->cutoff_scheme == ecutsGROUP)
9109 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9112 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9114 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9115 srenew(sort->ibuf, sort->ibuf_nalloc);
9118 /* Reorder the global cg index */
9119 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9120 /* Reorder the cginfo */
9121 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9122 /* Rebuild the local cg index */
9126 for (i = 0; i < dd->ncg_home; i++)
9128 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9129 ibuf[i+1] = ibuf[i] + cgsize;
9131 for (i = 0; i < dd->ncg_home+1; i++)
9133 dd->cgindex[i] = ibuf[i];
9138 for (i = 0; i < dd->ncg_home+1; i++)
9143 /* Set the home atom number */
9144 dd->nat_home = dd->cgindex[dd->ncg_home];
9146 if (fr->cutoff_scheme == ecutsVERLET)
9148 /* The atoms are now exactly in grid order, update the grid order */
9149 nbnxn_set_atomorder(fr->nbv->nbs);
9153 /* Copy the sorted ns cell indices back to the ns grid struct */
9154 for (i = 0; i < dd->ncg_home; i++)
9156 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
9158 fr->ns.grid->nr = dd->ncg_home;
9162 static void add_dd_statistics(gmx_domdec_t *dd)
9164 gmx_domdec_comm_t *comm;
9169 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9171 comm->sum_nat[ddnat-ddnatZONE] +=
9172 comm->nat[ddnat] - comm->nat[ddnat-1];
9177 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9179 gmx_domdec_comm_t *comm;
9184 /* Reset all the statistics and counters for total run counting */
9185 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9187 comm->sum_nat[ddnat-ddnatZONE] = 0;
9191 comm->load_step = 0;
9194 clear_ivec(comm->load_lim);
9199 void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
9201 gmx_domdec_comm_t *comm;
9205 comm = cr->dd->comm;
9207 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9214 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9216 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9218 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9223 " av. #atoms communicated per step for force: %d x %.1f\n",
9227 if (cr->dd->vsite_comm)
9230 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9231 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9236 if (cr->dd->constraint_comm)
9239 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9240 1 + ir->nLincsIter, av);
9244 gmx_incons(" Unknown type for DD statistics");
9247 fprintf(fplog, "\n");
9249 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9251 print_dd_load_av(fplog, cr->dd);
9255 void dd_partition_system(FILE *fplog,
9258 gmx_bool bMasterState,
9260 t_state *state_global,
9261 gmx_mtop_t *top_global,
9263 t_state *state_local,
9266 gmx_localtop_t *top_local,
9269 gmx_shellfc_t shellfc,
9270 gmx_constr_t constr,
9272 gmx_wallcycle_t wcycle,
9276 gmx_domdec_comm_t *comm;
9277 gmx_ddbox_t ddbox = {0};
9279 gmx_int64_t step_pcoupl;
9280 rvec cell_ns_x0, cell_ns_x1;
9281 int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9282 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
9283 gmx_bool bRedist, bSortCG, bResortAll;
9284 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9291 bBoxChanged = (bMasterState || DEFORM(*ir));
9292 if (ir->epc != epcNO)
9294 /* With nstpcouple > 1 pressure coupling happens.
9295 * one step after calculating the pressure.
9296 * Box scaling happens at the end of the MD step,
9297 * after the DD partitioning.
9298 * We therefore have to do DLB in the first partitioning
9299 * after an MD step where P-coupling occured.
9300 * We need to determine the last step in which p-coupling occurred.
9301 * MRS -- need to validate this for vv?
9306 step_pcoupl = step - 1;
9310 step_pcoupl = ((step - 1)/n)*n + 1;
9312 if (step_pcoupl >= comm->partition_step)
9318 bNStGlobalComm = (step % nstglobalcomm == 0);
9320 if (!comm->bDynLoadBal)
9326 /* Should we do dynamic load balacing this step?
9327 * Since it requires (possibly expensive) global communication,
9328 * we might want to do DLB less frequently.
9330 if (bBoxChanged || ir->epc != epcNO)
9332 bDoDLB = bBoxChanged;
9336 bDoDLB = bNStGlobalComm;
9340 /* Check if we have recorded loads on the nodes */
9341 if (comm->bRecordLoad && dd_load_count(comm))
9343 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
9345 /* Check if we should use DLB at the second partitioning
9346 * and every 100 partitionings,
9347 * so the extra communication cost is negligible.
9349 n = max(100, nstglobalcomm);
9350 bCheckDLB = (comm->n_load_collect == 0 ||
9351 comm->n_load_have % n == n-1);
9358 /* Print load every nstlog, first and last step to the log file */
9359 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9360 comm->n_load_collect == 0 ||
9362 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9364 /* Avoid extra communication due to verbose screen output
9365 * when nstglobalcomm is set.
9367 if (bDoDLB || bLogLoad || bCheckDLB ||
9368 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9370 get_load_distribution(dd, wcycle);
9375 dd_print_load(fplog, dd, step-1);
9379 dd_print_load_verbose(dd);
9382 comm->n_load_collect++;
9386 /* Since the timings are node dependent, the master decides */
9390 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
9393 fprintf(debug, "step %s, imb loss %f\n",
9394 gmx_step_str(step, sbuf),
9395 dd_force_imb_perf_loss(dd));
9398 dd_bcast(dd, sizeof(bTurnOnDLB), &bTurnOnDLB);
9401 turn_on_dlb(fplog, cr, step);
9406 comm->n_load_have++;
9409 cgs_gl = &comm->cgs_gl;
9414 /* Clear the old state */
9415 clear_dd_indices(dd, 0, 0);
9418 set_ddbox(dd, bMasterState, cr, ir, state_global->box,
9419 TRUE, cgs_gl, state_global->x, &ddbox);
9421 get_cg_distribution(fplog, step, dd, cgs_gl,
9422 state_global->box, &ddbox, state_global->x);
9424 dd_distribute_state(dd, cgs_gl,
9425 state_global, state_local, f);
9427 dd_make_local_cgs(dd, &top_local->cgs);
9429 /* Ensure that we have space for the new distribution */
9430 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9432 if (fr->cutoff_scheme == ecutsGROUP)
9434 calc_cgcm(fplog, 0, dd->ncg_home,
9435 &top_local->cgs, state_local->x, fr->cg_cm);
9438 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9440 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9442 else if (state_local->ddp_count != dd->ddp_count)
9444 if (state_local->ddp_count > dd->ddp_count)
9446 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9449 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9451 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9454 /* Clear the old state */
9455 clear_dd_indices(dd, 0, 0);
9457 /* Build the new indices */
9458 rebuild_cgindex(dd, cgs_gl->index, state_local);
9459 make_dd_indices(dd, cgs_gl->index, 0);
9460 ncgindex_set = dd->ncg_home;
9462 if (fr->cutoff_scheme == ecutsGROUP)
9464 /* Redetermine the cg COMs */
9465 calc_cgcm(fplog, 0, dd->ncg_home,
9466 &top_local->cgs, state_local->x, fr->cg_cm);
9469 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9471 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9473 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9474 TRUE, &top_local->cgs, state_local->x, &ddbox);
9476 bRedist = comm->bDynLoadBal;
9480 /* We have the full state, only redistribute the cgs */
9482 /* Clear the non-home indices */
9483 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9486 /* Avoid global communication for dim's without pbc and -gcom */
9487 if (!bNStGlobalComm)
9489 copy_rvec(comm->box0, ddbox.box0 );
9490 copy_rvec(comm->box_size, ddbox.box_size);
9492 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9493 bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
9498 /* For dim's without pbc and -gcom */
9499 copy_rvec(ddbox.box0, comm->box0 );
9500 copy_rvec(ddbox.box_size, comm->box_size);
9502 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9505 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9507 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9510 /* Check if we should sort the charge groups */
9511 if (comm->nstSortCG > 0)
9513 bSortCG = (bMasterState ||
9514 (bRedist && (step % comm->nstSortCG == 0)));
9521 ncg_home_old = dd->ncg_home;
9526 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9528 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9530 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9532 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9535 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9537 &comm->cell_x0, &comm->cell_x1,
9538 dd->ncg_home, fr->cg_cm,
9539 cell_ns_x0, cell_ns_x1, &grid_density);
9543 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9546 switch (fr->cutoff_scheme)
9549 copy_ivec(fr->ns.grid->n, ncells_old);
9550 grid_first(fplog, fr->ns.grid, dd, &ddbox,
9551 state_local->box, cell_ns_x0, cell_ns_x1,
9552 fr->rlistlong, grid_density);
9555 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9558 gmx_incons("unimplemented");
9560 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9561 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9565 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9567 /* Sort the state on charge group position.
9568 * This enables exact restarts from this step.
9569 * It also improves performance by about 15% with larger numbers
9570 * of atoms per node.
9573 /* Fill the ns grid with the home cell,
9574 * so we can sort with the indices.
9576 set_zones_ncg_home(dd);
9578 switch (fr->cutoff_scheme)
9581 set_zones_size(dd, state_local->box, &ddbox, 0, 1);
9583 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9585 comm->zones.size[0].bb_x0,
9586 comm->zones.size[0].bb_x1,
9588 comm->zones.dens_zone0,
9591 ncg_moved, bRedist ? comm->moved : NULL,
9592 fr->nbv->grp[eintLocal].kernel_type,
9593 fr->nbv->grp[eintLocal].nbat);
9595 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9598 fill_grid(&comm->zones, fr->ns.grid, dd->ncg_home,
9599 0, dd->ncg_home, fr->cg_cm);
9601 copy_ivec(fr->ns.grid->n, ncells_new);
9604 gmx_incons("unimplemented");
9607 bResortAll = bMasterState;
9609 /* Check if we can user the old order and ns grid cell indices
9610 * of the charge groups to sort the charge groups efficiently.
9612 if (ncells_new[XX] != ncells_old[XX] ||
9613 ncells_new[YY] != ncells_old[YY] ||
9614 ncells_new[ZZ] != ncells_old[ZZ])
9621 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9622 gmx_step_str(step, sbuf), dd->ncg_home);
9624 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9625 bResortAll ? -1 : ncg_home_old);
9626 /* Rebuild all the indices */
9627 ga2la_clear(dd->ga2la);
9630 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9633 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9635 /* Setup up the communication and communicate the coordinates */
9636 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9638 /* Set the indices */
9639 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9641 /* Set the charge group boundaries for neighbor searching */
9642 set_cg_boundaries(&comm->zones);
9644 if (fr->cutoff_scheme == ecutsVERLET)
9646 set_zones_size(dd, state_local->box, &ddbox,
9647 bSortCG ? 1 : 0, comm->zones.n);
9650 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9653 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9654 -1,state_local->x,state_local->box);
9657 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9659 /* Extract a local topology from the global topology */
9660 for (i = 0; i < dd->ndim; i++)
9662 np[dd->dim[i]] = comm->cd[i].np;
9664 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9665 comm->cellsize_min, np,
9667 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
9668 vsite, top_global, top_local);
9670 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9672 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9674 /* Set up the special atom communication */
9675 n = comm->nat[ddnatZONE];
9676 for (i = ddnatZONE+1; i < ddnatNR; i++)
9681 if (vsite && vsite->n_intercg_vsite)
9683 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9687 if (dd->bInterCGcons || dd->bInterCGsettles)
9689 /* Only for inter-cg constraints we need special code */
9690 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9691 constr, ir->nProjOrder,
9692 top_local->idef.il);
9696 gmx_incons("Unknown special atom type setup");
9701 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9703 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9705 /* Make space for the extra coordinates for virtual site
9706 * or constraint communication.
9708 state_local->natoms = comm->nat[ddnatNR-1];
9709 if (state_local->natoms > state_local->nalloc)
9711 dd_realloc_state(state_local, f, state_local->natoms);
9714 if (fr->bF_NoVirSum)
9716 if (vsite && vsite->n_intercg_vsite)
9718 nat_f_novirsum = comm->nat[ddnatVSITE];
9722 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9724 nat_f_novirsum = dd->nat_tot;
9728 nat_f_novirsum = dd->nat_home;
9737 /* Set the number of atoms required for the force calculation.
9738 * Forces need to be constrained when using a twin-range setup
9739 * or with energy minimization. For simple simulations we could
9740 * avoid some allocation, zeroing and copying, but this is
9741 * probably not worth the complications ande checking.
9743 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9744 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9746 /* We make the all mdatoms up to nat_tot_con.
9747 * We could save some work by only setting invmass
9748 * between nat_tot and nat_tot_con.
9750 /* This call also sets the new number of home particles to dd->nat_home */
9751 atoms2md(top_global, ir,
9752 comm->nat[ddnatCON], dd->gatindex, dd->nat_home, mdatoms);
9754 /* Now we have the charges we can sort the FE interactions */
9755 dd_sort_local_top(dd, mdatoms, top_local);
9759 /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
9760 split_vsites_over_threads(top_local->idef.il, mdatoms, FALSE, vsite);
9765 /* Make the local shell stuff, currently no communication is done */
9766 make_local_shells(cr, mdatoms, shellfc);
9769 if (ir->implicit_solvent)
9771 make_local_gb(cr, fr->born, ir->gb_algorithm);
9774 setup_bonded_threading(fr, &top_local->idef);
9776 if (!(cr->duty & DUTY_PME))
9778 /* Send the charges and/or c6/sigmas to our PME only node */
9779 gmx_pme_send_parameters(cr, mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
9780 mdatoms->chargeA, mdatoms->chargeB,
9781 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
9782 mdatoms->sigmaA, mdatoms->sigmaB,
9783 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9788 set_constraints(constr, top_local, ir, mdatoms, cr);
9791 if (ir->ePull != epullNO)
9793 /* Update the local pull groups */
9794 dd_make_local_pull_groups(dd, ir->pull, mdatoms);
9799 /* Update the local rotation groups */
9800 dd_make_local_rotation_groups(dd, ir->rot);
9803 if (ir->eSwapCoords != eswapNO)
9805 /* Update the local groups needed for ion swapping */
9806 dd_make_local_swap_groups(dd, ir->swap);
9809 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
9810 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
9812 add_dd_statistics(dd);
9814 /* Make sure we only count the cycles for this DD partitioning */
9815 clear_dd_cycle_counts(dd);
9817 /* Because the order of the atoms might have changed since
9818 * the last vsite construction, we need to communicate the constructing
9819 * atom coordinates again (for spreading the forces this MD step).
9821 dd_move_x_vsites(dd, state_local->box, state_local->x);
9823 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9825 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9827 dd_move_x(dd, state_local->box, state_local->x);
9828 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9829 -1, state_local->x, state_local->box);
9832 /* Store the partitioning step */
9833 comm->partition_step = step;
9835 /* Increase the DD partitioning counter */
9837 /* The state currently matches this DD partitioning count, store it */
9838 state_local->ddp_count = dd->ddp_count;
9841 /* The DD master node knows the complete cg distribution,
9842 * store the count so we can possibly skip the cg info communication.
9844 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9847 if (comm->DD_debug > 0)
9849 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9850 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9851 "after partitioning");