2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
47 #include "gromacs/math/vec.h"
49 #include "domdec_network.h"
51 #include "chargegroup.h"
60 #include "gromacs/topology/mtop_util.h"
61 #include "gmx_ga2la.h"
63 #include "nbnxn_search.h"
65 #include "gmx_omp_nthreads.h"
66 #include "gpu_utils.h"
68 #include "gromacs/utility/futil.h"
69 #include "gromacs/fileio/gmxfio.h"
70 #include "gromacs/fileio/pdbio.h"
71 #include "gromacs/imd/imd.h"
72 #include "gromacs/mdlib/nb_verlet.h"
73 #include "gromacs/pbcutil/ishift.h"
74 #include "gromacs/pbcutil/pbc.h"
75 #include "gromacs/pulling/pull.h"
76 #include "gromacs/pulling/pull_rotation.h"
77 #include "gromacs/swap/swapcoords.h"
78 #include "gromacs/timing/wallcycle.h"
79 #include "gromacs/utility/basenetwork.h"
80 #include "gromacs/utility/fatalerror.h"
81 #include "gromacs/utility/gmxmpi.h"
82 #include "gromacs/utility/qsort_threadsafe.h"
83 #include "gromacs/utility/smalloc.h"
85 #define DDRANK(dd, rank) (rank)
86 #define DDMASTERRANK(dd) (dd->masterrank)
88 typedef struct gmx_domdec_master
90 /* The cell boundaries */
92 /* The global charge group division */
93 int *ncg; /* Number of home charge groups for each node */
94 int *index; /* Index of nnodes+1 into cg */
95 int *cg; /* Global charge group index */
96 int *nat; /* Number of home atoms for each node. */
97 int *ibuf; /* Buffer for communication */
98 rvec *vbuf; /* Buffer for state scattering and gathering */
99 } gmx_domdec_master_t;
103 /* The numbers of charge groups to send and receive for each cell
104 * that requires communication, the last entry contains the total
105 * number of atoms that needs to be communicated.
107 int nsend[DD_MAXIZONE+2];
108 int nrecv[DD_MAXIZONE+2];
109 /* The charge groups to send */
112 /* The atom range for non-in-place communication */
113 int cell2at0[DD_MAXIZONE];
114 int cell2at1[DD_MAXIZONE];
119 int np; /* Number of grid pulses in this dimension */
120 int np_dlb; /* For dlb, for use with edlbAUTO */
121 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
123 gmx_bool bInPlace; /* Can we communicate in place? */
124 } gmx_domdec_comm_dim_t;
128 gmx_bool *bCellMin; /* Temp. var.: is this cell size at the limit */
129 real *cell_f; /* State var.: cell boundaries, box relative */
130 real *old_cell_f; /* Temp. var.: old cell size */
131 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
132 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
133 real *bound_min; /* Temp. var.: lower limit for cell boundary */
134 real *bound_max; /* Temp. var.: upper limit for cell boundary */
135 gmx_bool bLimited; /* State var.: is DLB limited in this dim and row */
136 real *buf_ncd; /* Temp. var. */
139 #define DD_NLOAD_MAX 9
141 /* Here floats are accurate enough, since these variables
142 * only influence the load balancing, not the actual MD results.
169 gmx_cgsort_t *sort_new;
181 /* This enum determines the order of the coordinates.
182 * ddnatHOME and ddnatZONE should be first and second,
183 * the others can be ordered as wanted.
186 ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR
190 edlbAUTO, edlbNO, edlbYES, edlbNR
192 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
196 int dim; /* The dimension */
197 gmx_bool dim_match; /* Tells if DD and PME dims match */
198 int nslab; /* The number of PME slabs in this dimension */
199 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
200 int *pp_min; /* The minimum pp node location, size nslab */
201 int *pp_max; /* The maximum pp node location,size nslab */
202 int maxshift; /* The maximum shift for coordinate redistribution in PME */
207 real min0; /* The minimum bottom of this zone */
208 real max1; /* The maximum top of this zone */
209 real min1; /* The minimum top of this zone */
210 real mch0; /* The maximum bottom communicaton height for this zone */
211 real mch1; /* The maximum top communicaton height for this zone */
212 real p1_0; /* The bottom value of the first cell in this zone */
213 real p1_1; /* The top value of the first cell in this zone */
218 gmx_domdec_ind_t ind;
225 } dd_comm_setup_work_t;
227 typedef struct gmx_domdec_comm
229 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
230 * unless stated otherwise.
233 /* The number of decomposition dimensions for PME, 0: no PME */
235 /* The number of nodes doing PME (PP/PME or only PME) */
239 /* The communication setup including the PME only nodes */
240 gmx_bool bCartesianPP_PME;
243 int *pmenodes; /* size npmenodes */
244 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
245 * but with bCartesianPP_PME */
246 gmx_ddpme_t ddpme[2];
248 /* The DD particle-particle nodes only */
249 gmx_bool bCartesianPP;
250 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
252 /* The global charge groups */
255 /* Should we sort the cgs */
257 gmx_domdec_sort_t *sort;
259 /* Are there charge groups? */
262 /* Are there bonded and multi-body interactions between charge groups? */
263 gmx_bool bInterCGBondeds;
264 gmx_bool bInterCGMultiBody;
266 /* Data for the optional bonded interaction atom communication range */
273 /* Are we actually using DLB? */
274 gmx_bool bDynLoadBal;
276 /* Cell sizes for static load balancing, first index cartesian */
279 /* The width of the communicated boundaries */
282 /* The minimum cell size (including triclinic correction) */
284 /* For dlb, for use with edlbAUTO */
285 rvec cellsize_min_dlb;
286 /* The lower limit for the DD cell size with DLB */
288 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
289 gmx_bool bVacDLBNoLimit;
291 /* With PME load balancing we set limits on DLB */
292 gmx_bool bPMELoadBalDLBLimits;
293 /* DLB needs to take into account that we want to allow this maximum
294 * cut-off (for PME load balancing), this could limit cell boundaries.
296 real PMELoadBal_max_cutoff;
298 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
300 /* box0 and box_size are required with dim's without pbc and -gcom */
304 /* The cell boundaries */
308 /* The old location of the cell boundaries, to check cg displacements */
312 /* The communication setup and charge group boundaries for the zones */
313 gmx_domdec_zones_t zones;
315 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
316 * cell boundaries of neighboring cells for dynamic load balancing.
318 gmx_ddzone_t zone_d1[2];
319 gmx_ddzone_t zone_d2[2][2];
321 /* The coordinate/force communication setup and indices */
322 gmx_domdec_comm_dim_t cd[DIM];
323 /* The maximum number of cells to communicate with in one dimension */
326 /* Which cg distribution is stored on the master node */
327 int master_cg_ddp_count;
329 /* The number of cg's received from the direct neighbors */
330 int zone_ncg1[DD_MAXZONE];
332 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
335 /* Array for signalling if atoms have moved to another domain */
339 /* Communication buffer for general use */
343 /* Communication buffer for general use */
346 /* Temporary storage for thread parallel communication setup */
348 dd_comm_setup_work_t *dth;
350 /* Communication buffers only used with multiple grid pulses */
355 /* Communication buffers for local redistribution */
357 int cggl_flag_nalloc[DIM*2];
359 int cgcm_state_nalloc[DIM*2];
361 /* Cell sizes for dynamic load balancing */
362 gmx_domdec_root_t **root;
366 real cell_f_max0[DIM];
367 real cell_f_min1[DIM];
369 /* Stuff for load communication */
370 gmx_bool bRecordLoad;
371 gmx_domdec_load_t *load;
372 int nrank_gpu_shared;
374 MPI_Comm *mpi_comm_load;
375 MPI_Comm mpi_comm_gpu_shared;
378 /* Maximum DLB scaling per load balancing step in percent */
382 float cycl[ddCyclNr];
383 int cycl_n[ddCyclNr];
384 float cycl_max[ddCyclNr];
385 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
389 /* Have often have did we have load measurements */
391 /* Have often have we collected the load measurements */
395 double sum_nat[ddnatNR-ddnatZONE];
405 /* The last partition step */
406 gmx_int64_t partition_step;
414 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
417 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
418 #define DD_FLAG_NRCG 65535
419 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
420 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
422 /* Zone permutation required to obtain consecutive charge groups
423 * for neighbor searching.
425 static const int zone_perm[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
427 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
428 * components see only j zones with that component 0.
431 /* The DD zone order */
432 static const ivec dd_zo[DD_MAXZONE] =
433 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
438 static const ivec dd_zp3[dd_zp3n] = {{0, 0, 8}, {1, 3, 6}, {2, 5, 6}, {3, 5, 7}};
443 static const ivec dd_zp2[dd_zp2n] = {{0, 0, 4}, {1, 3, 4}};
448 static const ivec dd_zp1[dd_zp1n] = {{0, 0, 2}};
450 /* Factors used to avoid problems due to rounding issues */
451 #define DD_CELL_MARGIN 1.0001
452 #define DD_CELL_MARGIN2 1.00005
453 /* Factor to account for pressure scaling during nstlist steps */
454 #define DD_PRES_SCALE_MARGIN 1.02
456 /* Turn on DLB when the load imbalance causes this amount of total loss.
457 * There is a bit of overhead with DLB and it's difficult to achieve
458 * a load imbalance of less than 2% with DLB.
460 #define DD_PERF_LOSS_DLB_ON 0.02
462 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
463 #define DD_PERF_LOSS_WARN 0.05
465 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
467 /* Use separate MPI send and receive commands
468 * when nnodes <= GMX_DD_NNODES_SENDRECV.
469 * This saves memory (and some copying for small nnodes).
470 * For high parallelization scatter and gather calls are used.
472 #define GMX_DD_NNODES_SENDRECV 4
476 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
478 static void index2xyz(ivec nc,int ind,ivec xyz)
480 xyz[XX] = ind % nc[XX];
481 xyz[YY] = (ind / nc[XX]) % nc[YY];
482 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
486 /* This order is required to minimize the coordinate communication in PME
487 * which uses decomposition in the x direction.
489 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
491 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
493 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
494 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
495 xyz[ZZ] = ind % nc[ZZ];
498 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
503 ddindex = dd_index(dd->nc, c);
504 if (dd->comm->bCartesianPP_PME)
506 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
508 else if (dd->comm->bCartesianPP)
511 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
522 static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox, t_inputrec *ir)
524 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
527 int ddglatnr(gmx_domdec_t *dd, int i)
537 if (i >= dd->comm->nat[ddnatNR-1])
539 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
541 atnr = dd->gatindex[i] + 1;
547 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
549 return &dd->comm->cgs_gl;
552 static void vec_rvec_init(vec_rvec_t *v)
558 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
562 v->nalloc = over_alloc_dd(n);
563 srenew(v->v, v->nalloc);
567 void dd_store_state(gmx_domdec_t *dd, t_state *state)
571 if (state->ddp_count != dd->ddp_count)
573 gmx_incons("The state does not the domain decomposition state");
576 state->ncg_gl = dd->ncg_home;
577 if (state->ncg_gl > state->cg_gl_nalloc)
579 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
580 srenew(state->cg_gl, state->cg_gl_nalloc);
582 for (i = 0; i < state->ncg_gl; i++)
584 state->cg_gl[i] = dd->index_gl[i];
587 state->ddp_count_cg_gl = dd->ddp_count;
590 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
592 return &dd->comm->zones;
595 void dd_get_ns_ranges(gmx_domdec_t *dd, int icg,
596 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
598 gmx_domdec_zones_t *zones;
601 zones = &dd->comm->zones;
604 while (icg >= zones->izone[izone].cg1)
613 else if (izone < zones->nizone)
615 *jcg0 = zones->izone[izone].jcg0;
619 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
620 icg, izone, zones->nizone);
623 *jcg1 = zones->izone[izone].jcg1;
625 for (d = 0; d < dd->ndim; d++)
628 shift0[dim] = zones->izone[izone].shift0[dim];
629 shift1[dim] = zones->izone[izone].shift1[dim];
630 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
632 /* A conservative approach, this can be optimized */
639 int dd_natoms_vsite(gmx_domdec_t *dd)
641 return dd->comm->nat[ddnatVSITE];
644 void dd_get_constraint_range(gmx_domdec_t *dd, int *at_start, int *at_end)
646 *at_start = dd->comm->nat[ddnatCON-1];
647 *at_end = dd->comm->nat[ddnatCON];
650 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
652 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
653 int *index, *cgindex;
654 gmx_domdec_comm_t *comm;
655 gmx_domdec_comm_dim_t *cd;
656 gmx_domdec_ind_t *ind;
657 rvec shift = {0, 0, 0}, *buf, *rbuf;
658 gmx_bool bPBC, bScrew;
662 cgindex = dd->cgindex;
667 nat_tot = dd->nat_home;
668 for (d = 0; d < dd->ndim; d++)
670 bPBC = (dd->ci[dd->dim[d]] == 0);
671 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
674 copy_rvec(box[dd->dim[d]], shift);
677 for (p = 0; p < cd->np; p++)
684 for (i = 0; i < ind->nsend[nzone]; i++)
686 at0 = cgindex[index[i]];
687 at1 = cgindex[index[i]+1];
688 for (j = at0; j < at1; j++)
690 copy_rvec(x[j], buf[n]);
697 for (i = 0; i < ind->nsend[nzone]; i++)
699 at0 = cgindex[index[i]];
700 at1 = cgindex[index[i]+1];
701 for (j = at0; j < at1; j++)
703 /* We need to shift the coordinates */
704 rvec_add(x[j], shift, buf[n]);
711 for (i = 0; i < ind->nsend[nzone]; i++)
713 at0 = cgindex[index[i]];
714 at1 = cgindex[index[i]+1];
715 for (j = at0; j < at1; j++)
718 buf[n][XX] = x[j][XX] + shift[XX];
720 * This operation requires a special shift force
721 * treatment, which is performed in calc_vir.
723 buf[n][YY] = box[YY][YY] - x[j][YY];
724 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
736 rbuf = comm->vbuf2.v;
738 /* Send and receive the coordinates */
739 dd_sendrecv_rvec(dd, d, dddirBackward,
740 buf, ind->nsend[nzone+1],
741 rbuf, ind->nrecv[nzone+1]);
745 for (zone = 0; zone < nzone; zone++)
747 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
749 copy_rvec(rbuf[j], x[i]);
754 nat_tot += ind->nrecv[nzone+1];
760 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
762 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
763 int *index, *cgindex;
764 gmx_domdec_comm_t *comm;
765 gmx_domdec_comm_dim_t *cd;
766 gmx_domdec_ind_t *ind;
770 gmx_bool bPBC, bScrew;
774 cgindex = dd->cgindex;
779 nzone = comm->zones.n/2;
780 nat_tot = dd->nat_tot;
781 for (d = dd->ndim-1; d >= 0; d--)
783 bPBC = (dd->ci[dd->dim[d]] == 0);
784 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
785 if (fshift == NULL && !bScrew)
789 /* Determine which shift vector we need */
795 for (p = cd->np-1; p >= 0; p--)
798 nat_tot -= ind->nrecv[nzone+1];
805 sbuf = comm->vbuf2.v;
807 for (zone = 0; zone < nzone; zone++)
809 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
811 copy_rvec(f[i], sbuf[j]);
816 /* Communicate the forces */
817 dd_sendrecv_rvec(dd, d, dddirForward,
818 sbuf, ind->nrecv[nzone+1],
819 buf, ind->nsend[nzone+1]);
821 /* Add the received forces */
825 for (i = 0; i < ind->nsend[nzone]; i++)
827 at0 = cgindex[index[i]];
828 at1 = cgindex[index[i]+1];
829 for (j = at0; j < at1; j++)
831 rvec_inc(f[j], buf[n]);
838 for (i = 0; i < ind->nsend[nzone]; i++)
840 at0 = cgindex[index[i]];
841 at1 = cgindex[index[i]+1];
842 for (j = at0; j < at1; j++)
844 rvec_inc(f[j], buf[n]);
845 /* Add this force to the shift force */
846 rvec_inc(fshift[is], buf[n]);
853 for (i = 0; i < ind->nsend[nzone]; i++)
855 at0 = cgindex[index[i]];
856 at1 = cgindex[index[i]+1];
857 for (j = at0; j < at1; j++)
859 /* Rotate the force */
860 f[j][XX] += buf[n][XX];
861 f[j][YY] -= buf[n][YY];
862 f[j][ZZ] -= buf[n][ZZ];
865 /* Add this force to the shift force */
866 rvec_inc(fshift[is], buf[n]);
877 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
879 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
880 int *index, *cgindex;
881 gmx_domdec_comm_t *comm;
882 gmx_domdec_comm_dim_t *cd;
883 gmx_domdec_ind_t *ind;
888 cgindex = dd->cgindex;
890 buf = &comm->vbuf.v[0][0];
893 nat_tot = dd->nat_home;
894 for (d = 0; d < dd->ndim; d++)
897 for (p = 0; p < cd->np; p++)
902 for (i = 0; i < ind->nsend[nzone]; i++)
904 at0 = cgindex[index[i]];
905 at1 = cgindex[index[i]+1];
906 for (j = at0; j < at1; j++)
919 rbuf = &comm->vbuf2.v[0][0];
921 /* Send and receive the coordinates */
922 dd_sendrecv_real(dd, d, dddirBackward,
923 buf, ind->nsend[nzone+1],
924 rbuf, ind->nrecv[nzone+1]);
928 for (zone = 0; zone < nzone; zone++)
930 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
937 nat_tot += ind->nrecv[nzone+1];
943 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
945 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
946 int *index, *cgindex;
947 gmx_domdec_comm_t *comm;
948 gmx_domdec_comm_dim_t *cd;
949 gmx_domdec_ind_t *ind;
954 cgindex = dd->cgindex;
956 buf = &comm->vbuf.v[0][0];
959 nzone = comm->zones.n/2;
960 nat_tot = dd->nat_tot;
961 for (d = dd->ndim-1; d >= 0; d--)
964 for (p = cd->np-1; p >= 0; p--)
967 nat_tot -= ind->nrecv[nzone+1];
974 sbuf = &comm->vbuf2.v[0][0];
976 for (zone = 0; zone < nzone; zone++)
978 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
985 /* Communicate the forces */
986 dd_sendrecv_real(dd, d, dddirForward,
987 sbuf, ind->nrecv[nzone+1],
988 buf, ind->nsend[nzone+1]);
990 /* Add the received forces */
992 for (i = 0; i < ind->nsend[nzone]; i++)
994 at0 = cgindex[index[i]];
995 at1 = cgindex[index[i]+1];
996 for (j = at0; j < at1; j++)
1007 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
1009 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
1011 zone->min0, zone->max1,
1012 zone->mch0, zone->mch0,
1013 zone->p1_0, zone->p1_1);
1017 #define DDZONECOMM_MAXZONE 5
1018 #define DDZONECOMM_BUFSIZE 3
1020 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
1021 int ddimind, int direction,
1022 gmx_ddzone_t *buf_s, int n_s,
1023 gmx_ddzone_t *buf_r, int n_r)
1025 #define ZBS DDZONECOMM_BUFSIZE
1026 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
1027 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
1030 for (i = 0; i < n_s; i++)
1032 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
1033 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
1034 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
1035 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
1036 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
1037 vbuf_s[i*ZBS+1][2] = 0;
1038 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
1039 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
1040 vbuf_s[i*ZBS+2][2] = 0;
1043 dd_sendrecv_rvec(dd, ddimind, direction,
1047 for (i = 0; i < n_r; i++)
1049 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
1050 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
1051 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
1052 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
1053 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
1054 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
1055 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
1061 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
1062 rvec cell_ns_x0, rvec cell_ns_x1)
1064 int d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
1066 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
1067 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
1068 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
1069 rvec extr_s[2], extr_r[2];
1071 real dist_d, c = 0, det;
1072 gmx_domdec_comm_t *comm;
1073 gmx_bool bPBC, bUse;
1077 for (d = 1; d < dd->ndim; d++)
1080 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
1081 zp->min0 = cell_ns_x0[dim];
1082 zp->max1 = cell_ns_x1[dim];
1083 zp->min1 = cell_ns_x1[dim];
1084 zp->mch0 = cell_ns_x0[dim];
1085 zp->mch1 = cell_ns_x1[dim];
1086 zp->p1_0 = cell_ns_x0[dim];
1087 zp->p1_1 = cell_ns_x1[dim];
1090 for (d = dd->ndim-2; d >= 0; d--)
1093 bPBC = (dim < ddbox->npbcdim);
1095 /* Use an rvec to store two reals */
1096 extr_s[d][0] = comm->cell_f0[d+1];
1097 extr_s[d][1] = comm->cell_f1[d+1];
1098 extr_s[d][2] = comm->cell_f1[d+1];
1101 /* Store the extremes in the backward sending buffer,
1102 * so the get updated separately from the forward communication.
1104 for (d1 = d; d1 < dd->ndim-1; d1++)
1106 /* We invert the order to be able to use the same loop for buf_e */
1107 buf_s[pos].min0 = extr_s[d1][1];
1108 buf_s[pos].max1 = extr_s[d1][0];
1109 buf_s[pos].min1 = extr_s[d1][2];
1110 buf_s[pos].mch0 = 0;
1111 buf_s[pos].mch1 = 0;
1112 /* Store the cell corner of the dimension we communicate along */
1113 buf_s[pos].p1_0 = comm->cell_x0[dim];
1114 buf_s[pos].p1_1 = 0;
1118 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1121 if (dd->ndim == 3 && d == 0)
1123 buf_s[pos] = comm->zone_d2[0][1];
1125 buf_s[pos] = comm->zone_d1[0];
1129 /* We only need to communicate the extremes
1130 * in the forward direction
1132 npulse = comm->cd[d].np;
1135 /* Take the minimum to avoid double communication */
1136 npulse_min = min(npulse, dd->nc[dim]-1-npulse);
1140 /* Without PBC we should really not communicate over
1141 * the boundaries, but implementing that complicates
1142 * the communication setup and therefore we simply
1143 * do all communication, but ignore some data.
1145 npulse_min = npulse;
1147 for (p = 0; p < npulse_min; p++)
1149 /* Communicate the extremes forward */
1150 bUse = (bPBC || dd->ci[dim] > 0);
1152 dd_sendrecv_rvec(dd, d, dddirForward,
1153 extr_s+d, dd->ndim-d-1,
1154 extr_r+d, dd->ndim-d-1);
1158 for (d1 = d; d1 < dd->ndim-1; d1++)
1160 extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]);
1161 extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]);
1162 extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]);
1168 for (p = 0; p < npulse; p++)
1170 /* Communicate all the zone information backward */
1171 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1173 dd_sendrecv_ddzone(dd, d, dddirBackward,
1180 for (d1 = d+1; d1 < dd->ndim; d1++)
1182 /* Determine the decrease of maximum required
1183 * communication height along d1 due to the distance along d,
1184 * this avoids a lot of useless atom communication.
1186 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1188 if (ddbox->tric_dir[dim])
1190 /* c is the off-diagonal coupling between the cell planes
1191 * along directions d and d1.
1193 c = ddbox->v[dim][dd->dim[d1]][dim];
1199 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1202 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1206 /* A negative value signals out of range */
1212 /* Accumulate the extremes over all pulses */
1213 for (i = 0; i < buf_size; i++)
1217 buf_e[i] = buf_r[i];
1223 buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0);
1224 buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1);
1225 buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1);
1228 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1236 if (bUse && dh[d1] >= 0)
1238 buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
1239 buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
1242 /* Copy the received buffer to the send buffer,
1243 * to pass the data through with the next pulse.
1245 buf_s[i] = buf_r[i];
1247 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1248 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1250 /* Store the extremes */
1253 for (d1 = d; d1 < dd->ndim-1; d1++)
1255 extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0);
1256 extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1);
1257 extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1);
1261 if (d == 1 || (d == 0 && dd->ndim == 3))
1263 for (i = d; i < 2; i++)
1265 comm->zone_d2[1-d][i] = buf_e[pos];
1271 comm->zone_d1[1] = buf_e[pos];
1281 for (i = 0; i < 2; i++)
1285 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1287 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1288 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1294 for (i = 0; i < 2; i++)
1296 for (j = 0; j < 2; j++)
1300 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1302 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1303 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1307 for (d = 1; d < dd->ndim; d++)
1309 comm->cell_f_max0[d] = extr_s[d-1][0];
1310 comm->cell_f_min1[d] = extr_s[d-1][1];
1313 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1314 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1319 static void dd_collect_cg(gmx_domdec_t *dd,
1320 t_state *state_local)
1322 gmx_domdec_master_t *ma = NULL;
1323 int buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
1326 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1328 /* The master has the correct distribution */
1332 if (state_local->ddp_count == dd->ddp_count)
1334 ncg_home = dd->ncg_home;
1336 nat_home = dd->nat_home;
1338 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1340 cgs_gl = &dd->comm->cgs_gl;
1342 ncg_home = state_local->ncg_gl;
1343 cg = state_local->cg_gl;
1345 for (i = 0; i < ncg_home; i++)
1347 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1352 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1355 buf2[0] = dd->ncg_home;
1356 buf2[1] = dd->nat_home;
1366 /* Collect the charge group and atom counts on the master */
1367 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1372 for (i = 0; i < dd->nnodes; i++)
1374 ma->ncg[i] = ma->ibuf[2*i];
1375 ma->nat[i] = ma->ibuf[2*i+1];
1376 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1379 /* Make byte counts and indices */
1380 for (i = 0; i < dd->nnodes; i++)
1382 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1383 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1387 fprintf(debug, "Initial charge group distribution: ");
1388 for (i = 0; i < dd->nnodes; i++)
1390 fprintf(debug, " %d", ma->ncg[i]);
1392 fprintf(debug, "\n");
1396 /* Collect the charge group indices on the master */
1398 dd->ncg_home*sizeof(int), dd->index_gl,
1399 DDMASTER(dd) ? ma->ibuf : NULL,
1400 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1401 DDMASTER(dd) ? ma->cg : NULL);
1403 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1406 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1409 gmx_domdec_master_t *ma;
1410 int n, i, c, a, nalloc = 0;
1419 MPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1420 dd->rank, dd->mpi_comm_all);
1425 /* Copy the master coordinates to the global array */
1426 cgs_gl = &dd->comm->cgs_gl;
1428 n = DDMASTERRANK(dd);
1430 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1432 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1434 copy_rvec(lv[a++], v[c]);
1438 for (n = 0; n < dd->nnodes; n++)
1442 if (ma->nat[n] > nalloc)
1444 nalloc = over_alloc_dd(ma->nat[n]);
1445 srenew(buf, nalloc);
1448 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1449 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1452 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1454 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1456 copy_rvec(buf[a++], v[c]);
1465 static void get_commbuffer_counts(gmx_domdec_t *dd,
1466 int **counts, int **disps)
1468 gmx_domdec_master_t *ma;
1473 /* Make the rvec count and displacment arrays */
1475 *disps = ma->ibuf + dd->nnodes;
1476 for (n = 0; n < dd->nnodes; n++)
1478 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1479 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1483 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1486 gmx_domdec_master_t *ma;
1487 int *rcounts = NULL, *disps = NULL;
1496 get_commbuffer_counts(dd, &rcounts, &disps);
1501 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv, rcounts, disps, buf);
1505 cgs_gl = &dd->comm->cgs_gl;
1508 for (n = 0; n < dd->nnodes; n++)
1510 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1512 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1514 copy_rvec(buf[a++], v[c]);
1521 void dd_collect_vec(gmx_domdec_t *dd,
1522 t_state *state_local, rvec *lv, rvec *v)
1524 gmx_domdec_master_t *ma;
1525 int n, i, c, a, nalloc = 0;
1528 dd_collect_cg(dd, state_local);
1530 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1532 dd_collect_vec_sendrecv(dd, lv, v);
1536 dd_collect_vec_gatherv(dd, lv, v);
1541 void dd_collect_state(gmx_domdec_t *dd,
1542 t_state *state_local, t_state *state)
1546 nh = state->nhchainlength;
1550 for (i = 0; i < efptNR; i++)
1552 state->lambda[i] = state_local->lambda[i];
1554 state->fep_state = state_local->fep_state;
1555 state->veta = state_local->veta;
1556 state->vol0 = state_local->vol0;
1557 copy_mat(state_local->box, state->box);
1558 copy_mat(state_local->boxv, state->boxv);
1559 copy_mat(state_local->svir_prev, state->svir_prev);
1560 copy_mat(state_local->fvir_prev, state->fvir_prev);
1561 copy_mat(state_local->pres_prev, state->pres_prev);
1563 for (i = 0; i < state_local->ngtc; i++)
1565 for (j = 0; j < nh; j++)
1567 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1568 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1570 state->therm_integral[i] = state_local->therm_integral[i];
1572 for (i = 0; i < state_local->nnhpres; i++)
1574 for (j = 0; j < nh; j++)
1576 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1577 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1581 for (est = 0; est < estNR; est++)
1583 if (EST_DISTR(est) && (state_local->flags & (1<<est)))
1588 dd_collect_vec(dd, state_local, state_local->x, state->x);
1591 dd_collect_vec(dd, state_local, state_local->v, state->v);
1594 dd_collect_vec(dd, state_local, state_local->sd_X, state->sd_X);
1597 dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
1599 case estDISRE_INITF:
1600 case estDISRE_RM3TAV:
1601 case estORIRE_INITF:
1605 gmx_incons("Unknown state entry encountered in dd_collect_state");
1611 static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
1617 fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
1620 state->nalloc = over_alloc_dd(nalloc);
1622 for (est = 0; est < estNR; est++)
1624 if (EST_DISTR(est) && (state->flags & (1<<est)))
1629 srenew(state->x, state->nalloc);
1632 srenew(state->v, state->nalloc);
1635 srenew(state->sd_X, state->nalloc);
1638 srenew(state->cg_p, state->nalloc);
1640 case estDISRE_INITF:
1641 case estDISRE_RM3TAV:
1642 case estORIRE_INITF:
1644 /* No reallocation required */
1647 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1654 srenew(*f, state->nalloc);
1658 static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
1661 if (nalloc > fr->cg_nalloc)
1665 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
1667 fr->cg_nalloc = over_alloc_dd(nalloc);
1668 srenew(fr->cginfo, fr->cg_nalloc);
1669 if (fr->cutoff_scheme == ecutsGROUP)
1671 srenew(fr->cg_cm, fr->cg_nalloc);
1674 if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
1676 /* We don't use charge groups, we use x in state to set up
1677 * the atom communication.
1679 dd_realloc_state(state, f, nalloc);
1683 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1686 gmx_domdec_master_t *ma;
1687 int n, i, c, a, nalloc = 0;
1694 for (n = 0; n < dd->nnodes; n++)
1698 if (ma->nat[n] > nalloc)
1700 nalloc = over_alloc_dd(ma->nat[n]);
1701 srenew(buf, nalloc);
1703 /* Use lv as a temporary buffer */
1705 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1707 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1709 copy_rvec(v[c], buf[a++]);
1712 if (a != ma->nat[n])
1714 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1719 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1720 DDRANK(dd, n), n, dd->mpi_comm_all);
1725 n = DDMASTERRANK(dd);
1727 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1729 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1731 copy_rvec(v[c], lv[a++]);
1738 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1739 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1744 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1747 gmx_domdec_master_t *ma;
1748 int *scounts = NULL, *disps = NULL;
1749 int n, i, c, a, nalloc = 0;
1756 get_commbuffer_counts(dd, &scounts, &disps);
1760 for (n = 0; n < dd->nnodes; n++)
1762 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1764 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1766 copy_rvec(v[c], buf[a++]);
1772 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1775 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
1777 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1779 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1783 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1787 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1790 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1791 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1792 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1794 if (dfhist->nlambda > 0)
1796 int nlam = dfhist->nlambda;
1797 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1798 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1799 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1800 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1801 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1802 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1804 for (i = 0; i < nlam; i++)
1806 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1807 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1808 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1809 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1810 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1811 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1816 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1817 t_state *state, t_state *state_local,
1822 nh = state->nhchainlength;
1826 for (i = 0; i < efptNR; i++)
1828 state_local->lambda[i] = state->lambda[i];
1830 state_local->fep_state = state->fep_state;
1831 state_local->veta = state->veta;
1832 state_local->vol0 = state->vol0;
1833 copy_mat(state->box, state_local->box);
1834 copy_mat(state->box_rel, state_local->box_rel);
1835 copy_mat(state->boxv, state_local->boxv);
1836 copy_mat(state->svir_prev, state_local->svir_prev);
1837 copy_mat(state->fvir_prev, state_local->fvir_prev);
1838 copy_df_history(&state_local->dfhist, &state->dfhist);
1839 for (i = 0; i < state_local->ngtc; i++)
1841 for (j = 0; j < nh; j++)
1843 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1844 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1846 state_local->therm_integral[i] = state->therm_integral[i];
1848 for (i = 0; i < state_local->nnhpres; i++)
1850 for (j = 0; j < nh; j++)
1852 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1853 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1857 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
1858 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1859 dd_bcast(dd, sizeof(real), &state_local->veta);
1860 dd_bcast(dd, sizeof(real), &state_local->vol0);
1861 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1862 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1863 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1864 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1865 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1866 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
1867 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
1868 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
1869 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
1870 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
1872 /* communicate df_history -- required for restarting from checkpoint */
1873 dd_distribute_dfhist(dd, &state_local->dfhist);
1875 if (dd->nat_home > state_local->nalloc)
1877 dd_realloc_state(state_local, f, dd->nat_home);
1879 for (i = 0; i < estNR; i++)
1881 if (EST_DISTR(i) && (state_local->flags & (1<<i)))
1886 dd_distribute_vec(dd, cgs, state->x, state_local->x);
1889 dd_distribute_vec(dd, cgs, state->v, state_local->v);
1892 dd_distribute_vec(dd, cgs, state->sd_X, state_local->sd_X);
1895 dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
1897 case estDISRE_INITF:
1898 case estDISRE_RM3TAV:
1899 case estORIRE_INITF:
1901 /* Not implemented yet */
1904 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1910 static char dim2char(int dim)
1916 case XX: c = 'X'; break;
1917 case YY: c = 'Y'; break;
1918 case ZZ: c = 'Z'; break;
1919 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1925 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1926 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1928 rvec grid_s[2], *grid_r = NULL, cx, r;
1929 char fname[STRLEN], buf[22];
1931 int a, i, d, z, y, x;
1935 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1936 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1940 snew(grid_r, 2*dd->nnodes);
1943 dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : NULL);
1947 for (d = 0; d < DIM; d++)
1949 for (i = 0; i < DIM; i++)
1957 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1959 tric[d][i] = box[i][d]/box[i][i];
1968 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1969 out = gmx_fio_fopen(fname, "w");
1970 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1972 for (i = 0; i < dd->nnodes; i++)
1974 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1975 for (d = 0; d < DIM; d++)
1977 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1979 for (z = 0; z < 2; z++)
1981 for (y = 0; y < 2; y++)
1983 for (x = 0; x < 2; x++)
1985 cx[XX] = grid_r[i*2+x][XX];
1986 cx[YY] = grid_r[i*2+y][YY];
1987 cx[ZZ] = grid_r[i*2+z][ZZ];
1989 gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
1990 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
1994 for (d = 0; d < DIM; d++)
1996 for (x = 0; x < 4; x++)
2000 case 0: y = 1 + i*8 + 2*x; break;
2001 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
2002 case 2: y = 1 + i*8 + x; break;
2004 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
2008 gmx_fio_fclose(out);
2013 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
2014 gmx_mtop_t *mtop, t_commrec *cr,
2015 int natoms, rvec x[], matrix box)
2017 char fname[STRLEN], buf[22];
2019 int i, ii, resnr, c;
2020 char *atomname, *resname;
2027 natoms = dd->comm->nat[ddnatVSITE];
2030 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
2032 out = gmx_fio_fopen(fname, "w");
2034 fprintf(out, "TITLE %s\n", title);
2035 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2036 for (i = 0; i < natoms; i++)
2038 ii = dd->gatindex[i];
2039 gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
2040 if (i < dd->comm->nat[ddnatZONE])
2043 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
2049 else if (i < dd->comm->nat[ddnatVSITE])
2051 b = dd->comm->zones.n;
2055 b = dd->comm->zones.n + 1;
2057 gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
2058 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
2060 fprintf(out, "TER\n");
2062 gmx_fio_fclose(out);
2065 real dd_cutoff_mbody(gmx_domdec_t *dd)
2067 gmx_domdec_comm_t *comm;
2074 if (comm->bInterCGBondeds)
2076 if (comm->cutoff_mbody > 0)
2078 r = comm->cutoff_mbody;
2082 /* cutoff_mbody=0 means we do not have DLB */
2083 r = comm->cellsize_min[dd->dim[0]];
2084 for (di = 1; di < dd->ndim; di++)
2086 r = min(r, comm->cellsize_min[dd->dim[di]]);
2088 if (comm->bBondComm)
2090 r = max(r, comm->cutoff_mbody);
2094 r = min(r, comm->cutoff);
2102 real dd_cutoff_twobody(gmx_domdec_t *dd)
2106 r_mb = dd_cutoff_mbody(dd);
2108 return max(dd->comm->cutoff, r_mb);
2112 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd, ivec coord, ivec coord_pme)
2116 nc = dd->nc[dd->comm->cartpmedim];
2117 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2118 copy_ivec(coord, coord_pme);
2119 coord_pme[dd->comm->cartpmedim] =
2120 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2123 static int low_ddindex2pmeindex(int ndd, int npme, int ddindex)
2125 /* Here we assign a PME node to communicate with this DD node
2126 * by assuming that the major index of both is x.
2127 * We add cr->npmenodes/2 to obtain an even distribution.
2129 return (ddindex*npme + npme/2)/ndd;
2132 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
2134 return low_ddindex2pmeindex(dd->nnodes, dd->comm->npmenodes, ddindex);
2137 static int cr_ddindex2pmeindex(const t_commrec *cr, int ddindex)
2139 return low_ddindex2pmeindex(cr->dd->nnodes, cr->npmenodes, ddindex);
2142 static int *dd_pmenodes(t_commrec *cr)
2147 snew(pmenodes, cr->npmenodes);
2149 for (i = 0; i < cr->dd->nnodes; i++)
2151 p0 = cr_ddindex2pmeindex(cr, i);
2152 p1 = cr_ddindex2pmeindex(cr, i+1);
2153 if (i+1 == cr->dd->nnodes || p1 > p0)
2157 fprintf(debug, "pmenode[%d] = %d\n", n, i+1+n);
2159 pmenodes[n] = i + 1 + n;
2167 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
2170 ivec coords, coords_pme, nc;
2175 if (dd->comm->bCartesian) {
2176 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2177 dd_coords2pmecoords(dd,coords,coords_pme);
2178 copy_ivec(dd->ntot,nc);
2179 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2180 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2182 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2184 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2190 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
2195 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
2197 gmx_domdec_comm_t *comm;
2199 int ddindex, nodeid = -1;
2201 comm = cr->dd->comm;
2206 if (comm->bCartesianPP_PME)
2209 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
2214 ddindex = dd_index(cr->dd->nc, coords);
2215 if (comm->bCartesianPP)
2217 nodeid = comm->ddindex2simnodeid[ddindex];
2223 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
2235 static int dd_simnode2pmenode(t_commrec *cr, int sim_nodeid)
2238 gmx_domdec_comm_t *comm;
2239 ivec coord, coord_pme;
2246 /* This assumes a uniform x domain decomposition grid cell size */
2247 if (comm->bCartesianPP_PME)
2250 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
2251 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2253 /* This is a PP node */
2254 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2255 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
2259 else if (comm->bCartesianPP)
2261 if (sim_nodeid < dd->nnodes)
2263 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2268 /* This assumes DD cells with identical x coordinates
2269 * are numbered sequentially.
2271 if (dd->comm->pmenodes == NULL)
2273 if (sim_nodeid < dd->nnodes)
2275 /* The DD index equals the nodeid */
2276 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2282 while (sim_nodeid > dd->comm->pmenodes[i])
2286 if (sim_nodeid < dd->comm->pmenodes[i])
2288 pmenode = dd->comm->pmenodes[i];
2296 void get_pme_nnodes(const gmx_domdec_t *dd,
2297 int *npmenodes_x, int *npmenodes_y)
2301 *npmenodes_x = dd->comm->npmenodes_x;
2302 *npmenodes_y = dd->comm->npmenodes_y;
2311 gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
2313 gmx_bool bPMEOnlyNode;
2315 if (DOMAINDECOMP(cr))
2317 bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
2321 bPMEOnlyNode = FALSE;
2324 return bPMEOnlyNode;
2327 void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
2328 int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
2332 ivec coord, coord_pme;
2336 snew(*my_ddnodes, (dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2339 for (x = 0; x < dd->nc[XX]; x++)
2341 for (y = 0; y < dd->nc[YY]; y++)
2343 for (z = 0; z < dd->nc[ZZ]; z++)
2345 if (dd->comm->bCartesianPP_PME)
2350 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2351 if (dd->ci[XX] == coord_pme[XX] &&
2352 dd->ci[YY] == coord_pme[YY] &&
2353 dd->ci[ZZ] == coord_pme[ZZ])
2355 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2360 /* The slab corresponds to the nodeid in the PME group */
2361 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2363 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2370 /* The last PP-only node is the peer node */
2371 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2375 fprintf(debug, "Receive coordinates from PP ranks:");
2376 for (x = 0; x < *nmy_ddnodes; x++)
2378 fprintf(debug, " %d", (*my_ddnodes)[x]);
2380 fprintf(debug, "\n");
2384 static gmx_bool receive_vir_ener(t_commrec *cr)
2386 gmx_domdec_comm_t *comm;
2387 int pmenode, coords[DIM], rank;
2391 if (cr->npmenodes < cr->dd->nnodes)
2393 comm = cr->dd->comm;
2394 if (comm->bCartesianPP_PME)
2396 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2398 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2399 coords[comm->cartpmedim]++;
2400 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2402 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2403 if (dd_simnode2pmenode(cr, rank) == pmenode)
2405 /* This is not the last PP node for pmenode */
2413 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2414 if (cr->sim_nodeid+1 < cr->nnodes &&
2415 dd_simnode2pmenode(cr, cr->sim_nodeid+1) == pmenode)
2417 /* This is not the last PP node for pmenode */
2426 static void set_zones_ncg_home(gmx_domdec_t *dd)
2428 gmx_domdec_zones_t *zones;
2431 zones = &dd->comm->zones;
2433 zones->cg_range[0] = 0;
2434 for (i = 1; i < zones->n+1; i++)
2436 zones->cg_range[i] = dd->ncg_home;
2438 /* zone_ncg1[0] should always be equal to ncg_home */
2439 dd->comm->zone_ncg1[0] = dd->ncg_home;
2442 static void rebuild_cgindex(gmx_domdec_t *dd,
2443 const int *gcgs_index, t_state *state)
2445 int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
2448 dd_cg_gl = dd->index_gl;
2449 cgindex = dd->cgindex;
2452 for (i = 0; i < state->ncg_gl; i++)
2456 dd_cg_gl[i] = cg_gl;
2457 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2461 dd->ncg_home = state->ncg_gl;
2464 set_zones_ncg_home(dd);
2467 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2469 while (cg >= cginfo_mb->cg_end)
2474 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2477 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2478 t_forcerec *fr, char *bLocalCG)
2480 cginfo_mb_t *cginfo_mb;
2486 cginfo_mb = fr->cginfo_mb;
2487 cginfo = fr->cginfo;
2489 for (cg = cg0; cg < cg1; cg++)
2491 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2495 if (bLocalCG != NULL)
2497 for (cg = cg0; cg < cg1; cg++)
2499 bLocalCG[index_gl[cg]] = TRUE;
2504 static void make_dd_indices(gmx_domdec_t *dd,
2505 const int *gcgs_index, int cg_start)
2507 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2508 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2513 bLocalCG = dd->comm->bLocalCG;
2515 if (dd->nat_tot > dd->gatindex_nalloc)
2517 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2518 srenew(dd->gatindex, dd->gatindex_nalloc);
2521 nzone = dd->comm->zones.n;
2522 zone2cg = dd->comm->zones.cg_range;
2523 zone_ncg1 = dd->comm->zone_ncg1;
2524 index_gl = dd->index_gl;
2525 gatindex = dd->gatindex;
2526 bCGs = dd->comm->bCGs;
2528 if (zone2cg[1] != dd->ncg_home)
2530 gmx_incons("dd->ncg_zone is not up to date");
2533 /* Make the local to global and global to local atom index */
2534 a = dd->cgindex[cg_start];
2535 for (zone = 0; zone < nzone; zone++)
2543 cg0 = zone2cg[zone];
2545 cg1 = zone2cg[zone+1];
2546 cg1_p1 = cg0 + zone_ncg1[zone];
2548 for (cg = cg0; cg < cg1; cg++)
2553 /* Signal that this cg is from more than one pulse away */
2556 cg_gl = index_gl[cg];
2559 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2562 ga2la_set(dd->ga2la, a_gl, a, zone1);
2568 gatindex[a] = cg_gl;
2569 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2576 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2579 int ncg, i, ngl, nerr;
2582 if (bLocalCG == NULL)
2586 for (i = 0; i < dd->ncg_tot; i++)
2588 if (!bLocalCG[dd->index_gl[i]])
2591 "DD rank %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2596 for (i = 0; i < ncg_sys; i++)
2603 if (ngl != dd->ncg_tot)
2605 fprintf(stderr, "DD rank %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2612 static void check_index_consistency(gmx_domdec_t *dd,
2613 int natoms_sys, int ncg_sys,
2616 int nerr, ngl, i, a, cell;
2621 if (dd->comm->DD_debug > 1)
2623 snew(have, natoms_sys);
2624 for (a = 0; a < dd->nat_tot; a++)
2626 if (have[dd->gatindex[a]] > 0)
2628 fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2632 have[dd->gatindex[a]] = a + 1;
2638 snew(have, dd->nat_tot);
2641 for (i = 0; i < natoms_sys; i++)
2643 if (ga2la_get(dd->ga2la, i, &a, &cell))
2645 if (a >= dd->nat_tot)
2647 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2653 if (dd->gatindex[a] != i)
2655 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2662 if (ngl != dd->nat_tot)
2665 "DD rank %d, %s: %d global atom indices, %d local atoms\n",
2666 dd->rank, where, ngl, dd->nat_tot);
2668 for (a = 0; a < dd->nat_tot; a++)
2673 "DD rank %d, %s: local atom %d, global %d has no global index\n",
2674 dd->rank, where, a+1, dd->gatindex[a]+1);
2679 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2683 gmx_fatal(FARGS, "DD rank %d, %s: %d atom/cg index inconsistencies",
2684 dd->rank, where, nerr);
2688 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2695 /* Clear the whole list without searching */
2696 ga2la_clear(dd->ga2la);
2700 for (i = a_start; i < dd->nat_tot; i++)
2702 ga2la_del(dd->ga2la, dd->gatindex[i]);
2706 bLocalCG = dd->comm->bLocalCG;
2709 for (i = cg_start; i < dd->ncg_tot; i++)
2711 bLocalCG[dd->index_gl[i]] = FALSE;
2715 dd_clear_local_vsite_indices(dd);
2717 if (dd->constraints)
2719 dd_clear_local_constraint_indices(dd);
2723 /* This function should be used for moving the domain boudaries during DLB,
2724 * for obtaining the minimum cell size. It checks the initially set limit
2725 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2726 * and, possibly, a longer cut-off limit set for PME load balancing.
2728 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2732 cellsize_min = comm->cellsize_min[dim];
2734 if (!comm->bVacDLBNoLimit)
2736 /* The cut-off might have changed, e.g. by PME load balacning,
2737 * from the value used to set comm->cellsize_min, so check it.
2739 cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2741 if (comm->bPMELoadBalDLBLimits)
2743 /* Check for the cut-off limit set by the PME load balancing */
2744 cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2748 return cellsize_min;
2751 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2754 real grid_jump_limit;
2756 /* The distance between the boundaries of cells at distance
2757 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2758 * and by the fact that cells should not be shifted by more than
2759 * half their size, such that cg's only shift by one cell
2760 * at redecomposition.
2762 grid_jump_limit = comm->cellsize_limit;
2763 if (!comm->bVacDLBNoLimit)
2765 if (comm->bPMELoadBalDLBLimits)
2767 cutoff = max(cutoff, comm->PMELoadBal_max_cutoff);
2769 grid_jump_limit = max(grid_jump_limit,
2770 cutoff/comm->cd[dim_ind].np);
2773 return grid_jump_limit;
2776 static gmx_bool check_grid_jump(gmx_int64_t step,
2782 gmx_domdec_comm_t *comm;
2791 for (d = 1; d < dd->ndim; d++)
2794 limit = grid_jump_limit(comm, cutoff, d);
2795 bfac = ddbox->box_size[dim];
2796 if (ddbox->tric_dir[dim])
2798 bfac *= ddbox->skew_fac[dim];
2800 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2801 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2809 /* This error should never be triggered under normal
2810 * circumstances, but you never know ...
2812 gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
2813 gmx_step_str(step, buf),
2814 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2822 static int dd_load_count(gmx_domdec_comm_t *comm)
2824 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2827 static float dd_force_load(gmx_domdec_comm_t *comm)
2834 if (comm->eFlop > 1)
2836 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2841 load = comm->cycl[ddCyclF];
2842 if (comm->cycl_n[ddCyclF] > 1)
2844 /* Subtract the maximum of the last n cycle counts
2845 * to get rid of possible high counts due to other sources,
2846 * for instance system activity, that would otherwise
2847 * affect the dynamic load balancing.
2849 load -= comm->cycl_max[ddCyclF];
2853 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2855 float gpu_wait, gpu_wait_sum;
2857 gpu_wait = comm->cycl[ddCyclWaitGPU];
2858 if (comm->cycl_n[ddCyclF] > 1)
2860 /* We should remove the WaitGPU time of the same MD step
2861 * as the one with the maximum F time, since the F time
2862 * and the wait time are not independent.
2863 * Furthermore, the step for the max F time should be chosen
2864 * the same on all ranks that share the same GPU.
2865 * But to keep the code simple, we remove the average instead.
2866 * The main reason for artificially long times at some steps
2867 * is spurious CPU activity or MPI time, so we don't expect
2868 * that changes in the GPU wait time matter a lot here.
2870 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2872 /* Sum the wait times over the ranks that share the same GPU */
2873 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2874 comm->mpi_comm_gpu_shared);
2875 /* Replace the wait time by the average over the ranks */
2876 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2884 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2886 gmx_domdec_comm_t *comm;
2891 snew(*dim_f, dd->nc[dim]+1);
2893 for (i = 1; i < dd->nc[dim]; i++)
2895 if (comm->slb_frac[dim])
2897 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2901 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2904 (*dim_f)[dd->nc[dim]] = 1;
2907 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2909 int pmeindex, slab, nso, i;
2912 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2918 ddpme->dim = dimind;
2920 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2922 ddpme->nslab = (ddpme->dim == 0 ?
2923 dd->comm->npmenodes_x :
2924 dd->comm->npmenodes_y);
2926 if (ddpme->nslab <= 1)
2931 nso = dd->comm->npmenodes/ddpme->nslab;
2932 /* Determine for each PME slab the PP location range for dimension dim */
2933 snew(ddpme->pp_min, ddpme->nslab);
2934 snew(ddpme->pp_max, ddpme->nslab);
2935 for (slab = 0; slab < ddpme->nslab; slab++)
2937 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2938 ddpme->pp_max[slab] = 0;
2940 for (i = 0; i < dd->nnodes; i++)
2942 ddindex2xyz(dd->nc, i, xyz);
2943 /* For y only use our y/z slab.
2944 * This assumes that the PME x grid size matches the DD grid size.
2946 if (dimind == 0 || xyz[XX] == dd->ci[XX])
2948 pmeindex = ddindex2pmeindex(dd, i);
2951 slab = pmeindex/nso;
2955 slab = pmeindex % ddpme->nslab;
2957 ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]);
2958 ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]);
2962 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
2965 int dd_pme_maxshift_x(gmx_domdec_t *dd)
2967 if (dd->comm->ddpme[0].dim == XX)
2969 return dd->comm->ddpme[0].maxshift;
2977 int dd_pme_maxshift_y(gmx_domdec_t *dd)
2979 if (dd->comm->ddpme[0].dim == YY)
2981 return dd->comm->ddpme[0].maxshift;
2983 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
2985 return dd->comm->ddpme[1].maxshift;
2993 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
2994 gmx_bool bUniform, gmx_ddbox_t *ddbox, real *cell_f)
2996 gmx_domdec_comm_t *comm;
2999 real range, pme_boundary;
3003 nc = dd->nc[ddpme->dim];
3006 if (!ddpme->dim_match)
3008 /* PP decomposition is not along dim: the worst situation */
3011 else if (ns <= 3 || (bUniform && ns == nc))
3013 /* The optimal situation */
3018 /* We need to check for all pme nodes which nodes they
3019 * could possibly need to communicate with.
3021 xmin = ddpme->pp_min;
3022 xmax = ddpme->pp_max;
3023 /* Allow for atoms to be maximally 2/3 times the cut-off
3024 * out of their DD cell. This is a reasonable balance between
3025 * between performance and support for most charge-group/cut-off
3028 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
3029 /* Avoid extra communication when we are exactly at a boundary */
3033 for (s = 0; s < ns; s++)
3035 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
3036 pme_boundary = (real)s/ns;
3039 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
3041 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
3045 pme_boundary = (real)(s+1)/ns;
3048 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
3050 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
3057 ddpme->maxshift = sh;
3061 fprintf(debug, "PME slab communication range for dim %d is %d\n",
3062 ddpme->dim, ddpme->maxshift);
3066 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3070 for (d = 0; d < dd->ndim; d++)
3073 if (dim < ddbox->nboundeddim &&
3074 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
3075 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
3077 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
3078 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3079 dd->nc[dim], dd->comm->cellsize_limit);
3085 setcellsizeslbLOCAL, setcellsizeslbMASTER, setcellsizeslbPULSE_ONLY
3088 /* Set the domain boundaries. Use for static (or no) load balancing,
3089 * and also for the starting state for dynamic load balancing.
3090 * setmode determine if and where the boundaries are stored, use enum above.
3091 * Returns the number communication pulses in npulse.
3093 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
3094 int setmode, ivec npulse)
3096 gmx_domdec_comm_t *comm;
3099 real *cell_x, cell_dx, cellsize;
3103 for (d = 0; d < DIM; d++)
3105 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
3107 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
3110 cell_dx = ddbox->box_size[d]/dd->nc[d];
3113 case setcellsizeslbMASTER:
3114 for (j = 0; j < dd->nc[d]+1; j++)
3116 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
3119 case setcellsizeslbLOCAL:
3120 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
3121 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
3126 cellsize = cell_dx*ddbox->skew_fac[d];
3127 while (cellsize*npulse[d] < comm->cutoff)
3131 cellsize_min[d] = cellsize;
3135 /* Statically load balanced grid */
3136 /* Also when we are not doing a master distribution we determine
3137 * all cell borders in a loop to obtain identical values
3138 * to the master distribution case and to determine npulse.
3140 if (setmode == setcellsizeslbMASTER)
3142 cell_x = dd->ma->cell_x[d];
3146 snew(cell_x, dd->nc[d]+1);
3148 cell_x[0] = ddbox->box0[d];
3149 for (j = 0; j < dd->nc[d]; j++)
3151 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
3152 cell_x[j+1] = cell_x[j] + cell_dx;
3153 cellsize = cell_dx*ddbox->skew_fac[d];
3154 while (cellsize*npulse[d] < comm->cutoff &&
3155 npulse[d] < dd->nc[d]-1)
3159 cellsize_min[d] = min(cellsize_min[d], cellsize);
3161 if (setmode == setcellsizeslbLOCAL)
3163 comm->cell_x0[d] = cell_x[dd->ci[d]];
3164 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
3166 if (setmode != setcellsizeslbMASTER)
3171 /* The following limitation is to avoid that a cell would receive
3172 * some of its own home charge groups back over the periodic boundary.
3173 * Double charge groups cause trouble with the global indices.
3175 if (d < ddbox->npbcdim &&
3176 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
3178 char error_string[STRLEN];
3180 sprintf(error_string,
3181 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3182 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
3184 dd->nc[d], dd->nc[d],
3185 dd->nnodes > dd->nc[d] ? "cells" : "ranks");
3187 if (setmode == setcellsizeslbLOCAL)
3189 gmx_fatal_collective(FARGS, NULL, dd, error_string);
3193 gmx_fatal(FARGS, error_string);
3198 if (!comm->bDynLoadBal)
3200 copy_rvec(cellsize_min, comm->cellsize_min);
3203 for (d = 0; d < comm->npmedecompdim; d++)
3205 set_pme_maxshift(dd, &comm->ddpme[d],
3206 comm->slb_frac[dd->dim[d]] == NULL, ddbox,
3207 comm->ddpme[d].slb_dim_f);
3212 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
3213 int d, int dim, gmx_domdec_root_t *root,
3215 gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
3217 gmx_domdec_comm_t *comm;
3218 int ncd, i, j, nmin, nmin_old;
3219 gmx_bool bLimLo, bLimHi;
3221 real fac, halfway, cellsize_limit_f_i, region_size;
3222 gmx_bool bPBC, bLastHi = FALSE;
3223 int nrange[] = {range[0], range[1]};
3225 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
3231 bPBC = (dim < ddbox->npbcdim);
3233 cell_size = root->buf_ncd;
3237 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
3240 /* First we need to check if the scaling does not make cells
3241 * smaller than the smallest allowed size.
3242 * We need to do this iteratively, since if a cell is too small,
3243 * it needs to be enlarged, which makes all the other cells smaller,
3244 * which could in turn make another cell smaller than allowed.
3246 for (i = range[0]; i < range[1]; i++)
3248 root->bCellMin[i] = FALSE;
3254 /* We need the total for normalization */
3256 for (i = range[0]; i < range[1]; i++)
3258 if (root->bCellMin[i] == FALSE)
3260 fac += cell_size[i];
3263 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3264 /* Determine the cell boundaries */
3265 for (i = range[0]; i < range[1]; i++)
3267 if (root->bCellMin[i] == FALSE)
3269 cell_size[i] *= fac;
3270 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3272 cellsize_limit_f_i = 0;
3276 cellsize_limit_f_i = cellsize_limit_f;
3278 if (cell_size[i] < cellsize_limit_f_i)
3280 root->bCellMin[i] = TRUE;
3281 cell_size[i] = cellsize_limit_f_i;
3285 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3288 while (nmin > nmin_old);
3291 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3292 /* For this check we should not use DD_CELL_MARGIN,
3293 * but a slightly smaller factor,
3294 * since rounding could get use below the limit.
3296 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3299 gmx_fatal(FARGS, "Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3300 gmx_step_str(step, buf),
3301 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3302 ncd, comm->cellsize_min[dim]);
3305 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
3309 /* Check if the boundary did not displace more than halfway
3310 * each of the cells it bounds, as this could cause problems,
3311 * especially when the differences between cell sizes are large.
3312 * If changes are applied, they will not make cells smaller
3313 * than the cut-off, as we check all the boundaries which
3314 * might be affected by a change and if the old state was ok,
3315 * the cells will at most be shrunk back to their old size.
3317 for (i = range[0]+1; i < range[1]; i++)
3319 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3320 if (root->cell_f[i] < halfway)
3322 root->cell_f[i] = halfway;
3323 /* Check if the change also causes shifts of the next boundaries */
3324 for (j = i+1; j < range[1]; j++)
3326 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3328 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3332 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3333 if (root->cell_f[i] > halfway)
3335 root->cell_f[i] = halfway;
3336 /* Check if the change also causes shifts of the next boundaries */
3337 for (j = i-1; j >= range[0]+1; j--)
3339 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3341 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3348 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3349 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3350 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3351 * for a and b nrange is used */
3354 /* Take care of the staggering of the cell boundaries */
3357 for (i = range[0]; i < range[1]; i++)
3359 root->cell_f_max0[i] = root->cell_f[i];
3360 root->cell_f_min1[i] = root->cell_f[i+1];
3365 for (i = range[0]+1; i < range[1]; i++)
3367 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3368 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3369 if (bLimLo && bLimHi)
3371 /* Both limits violated, try the best we can */
3372 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3373 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3374 nrange[0] = range[0];
3376 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3379 nrange[1] = range[1];
3380 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3386 /* root->cell_f[i] = root->bound_min[i]; */
3387 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3390 else if (bLimHi && !bLastHi)
3393 if (nrange[1] < range[1]) /* found a LimLo before */
3395 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3396 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3397 nrange[0] = nrange[1];
3399 root->cell_f[i] = root->bound_max[i];
3401 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3403 nrange[1] = range[1];
3406 if (nrange[1] < range[1]) /* found last a LimLo */
3408 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3409 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3410 nrange[0] = nrange[1];
3411 nrange[1] = range[1];
3412 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3414 else if (nrange[0] > range[0]) /* found at least one LimHi */
3416 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3423 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3424 int d, int dim, gmx_domdec_root_t *root,
3425 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3426 gmx_bool bUniform, gmx_int64_t step)
3428 gmx_domdec_comm_t *comm;
3429 int ncd, d1, i, j, pos;
3431 real load_aver, load_i, imbalance, change, change_max, sc;
3432 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3436 int range[] = { 0, 0 };
3440 /* Convert the maximum change from the input percentage to a fraction */
3441 change_limit = comm->dlb_scale_lim*0.01;
3445 bPBC = (dim < ddbox->npbcdim);
3447 cell_size = root->buf_ncd;
3449 /* Store the original boundaries */
3450 for (i = 0; i < ncd+1; i++)
3452 root->old_cell_f[i] = root->cell_f[i];
3456 for (i = 0; i < ncd; i++)
3458 cell_size[i] = 1.0/ncd;
3461 else if (dd_load_count(comm))
3463 load_aver = comm->load[d].sum_m/ncd;
3465 for (i = 0; i < ncd; i++)
3467 /* Determine the relative imbalance of cell i */
3468 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3469 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3470 /* Determine the change of the cell size using underrelaxation */
3471 change = -relax*imbalance;
3472 change_max = max(change_max, max(change, -change));
3474 /* Limit the amount of scaling.
3475 * We need to use the same rescaling for all cells in one row,
3476 * otherwise the load balancing might not converge.
3479 if (change_max > change_limit)
3481 sc *= change_limit/change_max;
3483 for (i = 0; i < ncd; i++)
3485 /* Determine the relative imbalance of cell i */
3486 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3487 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3488 /* Determine the change of the cell size using underrelaxation */
3489 change = -sc*imbalance;
3490 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3494 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3495 cellsize_limit_f *= DD_CELL_MARGIN;
3496 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3497 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3498 if (ddbox->tric_dir[dim])
3500 cellsize_limit_f /= ddbox->skew_fac[dim];
3501 dist_min_f /= ddbox->skew_fac[dim];
3503 if (bDynamicBox && d > 0)
3505 dist_min_f *= DD_PRES_SCALE_MARGIN;
3507 if (d > 0 && !bUniform)
3509 /* Make sure that the grid is not shifted too much */
3510 for (i = 1; i < ncd; i++)
3512 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3514 gmx_incons("Inconsistent DD boundary staggering limits!");
3516 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3517 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3520 root->bound_min[i] += 0.5*space;
3522 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3523 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3526 root->bound_max[i] += 0.5*space;
3531 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3533 root->cell_f_max0[i-1] + dist_min_f,
3534 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3535 root->cell_f_min1[i] - dist_min_f);
3540 root->cell_f[0] = 0;
3541 root->cell_f[ncd] = 1;
3542 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3545 /* After the checks above, the cells should obey the cut-off
3546 * restrictions, but it does not hurt to check.
3548 for (i = 0; i < ncd; i++)
3552 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3553 dim, i, root->cell_f[i], root->cell_f[i+1]);
3556 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3557 root->cell_f[i+1] - root->cell_f[i] <
3558 cellsize_limit_f/DD_CELL_MARGIN)
3562 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3563 gmx_step_str(step, buf), dim2char(dim), i,
3564 (root->cell_f[i+1] - root->cell_f[i])
3565 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3570 /* Store the cell boundaries of the lower dimensions at the end */
3571 for (d1 = 0; d1 < d; d1++)
3573 root->cell_f[pos++] = comm->cell_f0[d1];
3574 root->cell_f[pos++] = comm->cell_f1[d1];
3577 if (d < comm->npmedecompdim)
3579 /* The master determines the maximum shift for
3580 * the coordinate communication between separate PME nodes.
3582 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3584 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3587 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3591 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3592 gmx_ddbox_t *ddbox, int dimind)
3594 gmx_domdec_comm_t *comm;
3599 /* Set the cell dimensions */
3600 dim = dd->dim[dimind];
3601 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3602 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3603 if (dim >= ddbox->nboundeddim)
3605 comm->cell_x0[dim] += ddbox->box0[dim];
3606 comm->cell_x1[dim] += ddbox->box0[dim];
3610 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3611 int d, int dim, real *cell_f_row,
3614 gmx_domdec_comm_t *comm;
3620 /* Each node would only need to know two fractions,
3621 * but it is probably cheaper to broadcast the whole array.
3623 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3624 0, comm->mpi_comm_load[d]);
3626 /* Copy the fractions for this dimension from the buffer */
3627 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3628 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3629 /* The whole array was communicated, so set the buffer position */
3630 pos = dd->nc[dim] + 1;
3631 for (d1 = 0; d1 <= d; d1++)
3635 /* Copy the cell fractions of the lower dimensions */
3636 comm->cell_f0[d1] = cell_f_row[pos++];
3637 comm->cell_f1[d1] = cell_f_row[pos++];
3639 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3641 /* Convert the communicated shift from float to int */
3642 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3645 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3649 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3650 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3651 gmx_bool bUniform, gmx_int64_t step)
3653 gmx_domdec_comm_t *comm;
3655 gmx_bool bRowMember, bRowRoot;
3660 for (d = 0; d < dd->ndim; d++)
3665 for (d1 = d; d1 < dd->ndim; d1++)
3667 if (dd->ci[dd->dim[d1]] > 0)
3680 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3681 ddbox, bDynamicBox, bUniform, step);
3682 cell_f_row = comm->root[d]->cell_f;
3686 cell_f_row = comm->cell_f_row;
3688 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3693 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3697 /* This function assumes the box is static and should therefore
3698 * not be called when the box has changed since the last
3699 * call to dd_partition_system.
3701 for (d = 0; d < dd->ndim; d++)
3703 relative_to_absolute_cell_bounds(dd, ddbox, d);
3709 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3710 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3711 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3712 gmx_wallcycle_t wcycle)
3714 gmx_domdec_comm_t *comm;
3721 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3722 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3723 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3725 else if (bDynamicBox)
3727 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3730 /* Set the dimensions for which no DD is used */
3731 for (dim = 0; dim < DIM; dim++)
3733 if (dd->nc[dim] == 1)
3735 comm->cell_x0[dim] = 0;
3736 comm->cell_x1[dim] = ddbox->box_size[dim];
3737 if (dim >= ddbox->nboundeddim)
3739 comm->cell_x0[dim] += ddbox->box0[dim];
3740 comm->cell_x1[dim] += ddbox->box0[dim];
3746 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3749 gmx_domdec_comm_dim_t *cd;
3751 for (d = 0; d < dd->ndim; d++)
3753 cd = &dd->comm->cd[d];
3754 np = npulse[dd->dim[d]];
3755 if (np > cd->np_nalloc)
3759 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3760 dim2char(dd->dim[d]), np);
3762 if (DDMASTER(dd) && cd->np_nalloc > 0)
3764 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3766 srenew(cd->ind, np);
3767 for (i = cd->np_nalloc; i < np; i++)
3769 cd->ind[i].index = NULL;
3770 cd->ind[i].nalloc = 0;
3779 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3780 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3781 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3782 gmx_wallcycle_t wcycle)
3784 gmx_domdec_comm_t *comm;
3790 /* Copy the old cell boundaries for the cg displacement check */
3791 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3792 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3794 if (comm->bDynLoadBal)
3798 check_box_size(dd, ddbox);
3800 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3804 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbLOCAL, npulse);
3805 realloc_comm_ind(dd, npulse);
3810 for (d = 0; d < DIM; d++)
3812 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3813 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3818 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3820 rvec cell_ns_x0, rvec cell_ns_x1,
3823 gmx_domdec_comm_t *comm;
3828 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3830 dim = dd->dim[dim_ind];
3832 /* Without PBC we don't have restrictions on the outer cells */
3833 if (!(dim >= ddbox->npbcdim &&
3834 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3835 comm->bDynLoadBal &&
3836 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3837 comm->cellsize_min[dim])
3840 gmx_fatal(FARGS, "Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3841 gmx_step_str(step, buf), dim2char(dim),
3842 comm->cell_x1[dim] - comm->cell_x0[dim],
3843 ddbox->skew_fac[dim],
3844 dd->comm->cellsize_min[dim],
3845 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3849 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3851 /* Communicate the boundaries and update cell_ns_x0/1 */
3852 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3853 if (dd->bGridJump && dd->ndim > 1)
3855 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3860 static void make_tric_corr_matrix(int npbcdim, matrix box, matrix tcm)
3864 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3872 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3873 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3882 static void check_screw_box(matrix box)
3884 /* Mathematical limitation */
3885 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3887 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3890 /* Limitation due to the asymmetry of the eighth shell method */
3891 if (box[ZZ][YY] != 0)
3893 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3897 static void distribute_cg(FILE *fplog, gmx_int64_t step,
3898 matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3901 gmx_domdec_master_t *ma;
3902 int **tmp_ind = NULL, *tmp_nalloc = NULL;
3903 int i, icg, j, k, k0, k1, d, npbcdim;
3905 rvec box_size, cg_cm;
3907 real nrcg, inv_ncg, pos_d;
3909 gmx_bool bUnbounded, bScrew;
3913 if (tmp_ind == NULL)
3915 snew(tmp_nalloc, dd->nnodes);
3916 snew(tmp_ind, dd->nnodes);
3917 for (i = 0; i < dd->nnodes; i++)
3919 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3920 snew(tmp_ind[i], tmp_nalloc[i]);
3924 /* Clear the count */
3925 for (i = 0; i < dd->nnodes; i++)
3931 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3933 cgindex = cgs->index;
3935 /* Compute the center of geometry for all charge groups */
3936 for (icg = 0; icg < cgs->nr; icg++)
3939 k1 = cgindex[icg+1];
3943 copy_rvec(pos[k0], cg_cm);
3950 for (k = k0; (k < k1); k++)
3952 rvec_inc(cg_cm, pos[k]);
3954 for (d = 0; (d < DIM); d++)
3956 cg_cm[d] *= inv_ncg;
3959 /* Put the charge group in the box and determine the cell index */
3960 for (d = DIM-1; d >= 0; d--)
3963 if (d < dd->npbcdim)
3965 bScrew = (dd->bScrewPBC && d == XX);
3966 if (tric_dir[d] && dd->nc[d] > 1)
3968 /* Use triclinic coordintates for this dimension */
3969 for (j = d+1; j < DIM; j++)
3971 pos_d += cg_cm[j]*tcm[j][d];
3974 while (pos_d >= box[d][d])
3977 rvec_dec(cg_cm, box[d]);
3980 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3981 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3983 for (k = k0; (k < k1); k++)
3985 rvec_dec(pos[k], box[d]);
3988 pos[k][YY] = box[YY][YY] - pos[k][YY];
3989 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3996 rvec_inc(cg_cm, box[d]);
3999 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
4000 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
4002 for (k = k0; (k < k1); k++)
4004 rvec_inc(pos[k], box[d]);
4007 pos[k][YY] = box[YY][YY] - pos[k][YY];
4008 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4013 /* This could be done more efficiently */
4015 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
4020 i = dd_index(dd->nc, ind);
4021 if (ma->ncg[i] == tmp_nalloc[i])
4023 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
4024 srenew(tmp_ind[i], tmp_nalloc[i]);
4026 tmp_ind[i][ma->ncg[i]] = icg;
4028 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
4032 for (i = 0; i < dd->nnodes; i++)
4035 for (k = 0; k < ma->ncg[i]; k++)
4037 ma->cg[k1++] = tmp_ind[i][k];
4040 ma->index[dd->nnodes] = k1;
4042 for (i = 0; i < dd->nnodes; i++)
4052 fprintf(fplog, "Charge group distribution at step %s:",
4053 gmx_step_str(step, buf));
4054 for (i = 0; i < dd->nnodes; i++)
4056 fprintf(fplog, " %d", ma->ncg[i]);
4058 fprintf(fplog, "\n");
4062 static void get_cg_distribution(FILE *fplog, gmx_int64_t step, gmx_domdec_t *dd,
4063 t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
4066 gmx_domdec_master_t *ma = NULL;
4069 int *ibuf, buf2[2] = { 0, 0 };
4070 gmx_bool bMaster = DDMASTER(dd);
4078 check_screw_box(box);
4081 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbMASTER, npulse);
4083 distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
4084 for (i = 0; i < dd->nnodes; i++)
4086 ma->ibuf[2*i] = ma->ncg[i];
4087 ma->ibuf[2*i+1] = ma->nat[i];
4095 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
4097 dd->ncg_home = buf2[0];
4098 dd->nat_home = buf2[1];
4099 dd->ncg_tot = dd->ncg_home;
4100 dd->nat_tot = dd->nat_home;
4101 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
4103 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
4104 srenew(dd->index_gl, dd->cg_nalloc);
4105 srenew(dd->cgindex, dd->cg_nalloc+1);
4109 for (i = 0; i < dd->nnodes; i++)
4111 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
4112 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
4117 DDMASTER(dd) ? ma->ibuf : NULL,
4118 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
4119 DDMASTER(dd) ? ma->cg : NULL,
4120 dd->ncg_home*sizeof(int), dd->index_gl);
4122 /* Determine the home charge group sizes */
4124 for (i = 0; i < dd->ncg_home; i++)
4126 cg_gl = dd->index_gl[i];
4128 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
4133 fprintf(debug, "Home charge groups:\n");
4134 for (i = 0; i < dd->ncg_home; i++)
4136 fprintf(debug, " %d", dd->index_gl[i]);
4139 fprintf(debug, "\n");
4142 fprintf(debug, "\n");
4146 static int compact_and_copy_vec_at(int ncg, int *move,
4149 rvec *src, gmx_domdec_comm_t *comm,
4152 int m, icg, i, i0, i1, nrcg;
4158 for (m = 0; m < DIM*2; m++)
4164 for (icg = 0; icg < ncg; icg++)
4166 i1 = cgindex[icg+1];
4172 /* Compact the home array in place */
4173 for (i = i0; i < i1; i++)
4175 copy_rvec(src[i], src[home_pos++]);
4181 /* Copy to the communication buffer */
4183 pos_vec[m] += 1 + vec*nrcg;
4184 for (i = i0; i < i1; i++)
4186 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
4188 pos_vec[m] += (nvec - vec - 1)*nrcg;
4192 home_pos += i1 - i0;
4200 static int compact_and_copy_vec_cg(int ncg, int *move,
4202 int nvec, rvec *src, gmx_domdec_comm_t *comm,
4205 int m, icg, i0, i1, nrcg;
4211 for (m = 0; m < DIM*2; m++)
4217 for (icg = 0; icg < ncg; icg++)
4219 i1 = cgindex[icg+1];
4225 /* Compact the home array in place */
4226 copy_rvec(src[icg], src[home_pos++]);
4232 /* Copy to the communication buffer */
4233 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
4234 pos_vec[m] += 1 + nrcg*nvec;
4246 static int compact_ind(int ncg, int *move,
4247 int *index_gl, int *cgindex,
4249 gmx_ga2la_t ga2la, char *bLocalCG,
4252 int cg, nat, a0, a1, a, a_gl;
4257 for (cg = 0; cg < ncg; cg++)
4263 /* Compact the home arrays in place.
4264 * Anything that can be done here avoids access to global arrays.
4266 cgindex[home_pos] = nat;
4267 for (a = a0; a < a1; a++)
4270 gatindex[nat] = a_gl;
4271 /* The cell number stays 0, so we don't need to set it */
4272 ga2la_change_la(ga2la, a_gl, nat);
4275 index_gl[home_pos] = index_gl[cg];
4276 cginfo[home_pos] = cginfo[cg];
4277 /* The charge group remains local, so bLocalCG does not change */
4282 /* Clear the global indices */
4283 for (a = a0; a < a1; a++)
4285 ga2la_del(ga2la, gatindex[a]);
4289 bLocalCG[index_gl[cg]] = FALSE;
4293 cgindex[home_pos] = nat;
4298 static void clear_and_mark_ind(int ncg, int *move,
4299 int *index_gl, int *cgindex, int *gatindex,
4300 gmx_ga2la_t ga2la, char *bLocalCG,
4305 for (cg = 0; cg < ncg; cg++)
4311 /* Clear the global indices */
4312 for (a = a0; a < a1; a++)
4314 ga2la_del(ga2la, gatindex[a]);
4318 bLocalCG[index_gl[cg]] = FALSE;
4320 /* Signal that this cg has moved using the ns cell index.
4321 * Here we set it to -1. fill_grid will change it
4322 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4324 cell_index[cg] = -1;
4329 static void print_cg_move(FILE *fplog,
4331 gmx_int64_t step, int cg, int dim, int dir,
4332 gmx_bool bHaveLimitdAndCMOld, real limitd,
4333 rvec cm_old, rvec cm_new, real pos_d)
4335 gmx_domdec_comm_t *comm;
4340 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4341 if (bHaveLimitdAndCMOld)
4343 fprintf(fplog, "The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4344 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4348 fprintf(fplog, "The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4349 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4351 fprintf(fplog, "distance out of cell %f\n",
4352 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4353 if (bHaveLimitdAndCMOld)
4355 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4356 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4358 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4359 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4360 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4362 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4363 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4365 comm->cell_x0[dim], comm->cell_x1[dim]);
4368 static void cg_move_error(FILE *fplog,
4370 gmx_int64_t step, int cg, int dim, int dir,
4371 gmx_bool bHaveLimitdAndCMOld, real limitd,
4372 rvec cm_old, rvec cm_new, real pos_d)
4376 print_cg_move(fplog, dd, step, cg, dim, dir,
4377 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4379 print_cg_move(stderr, dd, step, cg, dim, dir,
4380 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4382 "A charge group moved too far between two domain decomposition steps\n"
4383 "This usually means that your system is not well equilibrated");
4386 static void rotate_state_atom(t_state *state, int a)
4390 for (est = 0; est < estNR; est++)
4392 if (EST_DISTR(est) && (state->flags & (1<<est)))
4397 /* Rotate the complete state; for a rectangular box only */
4398 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4399 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4402 state->v[a][YY] = -state->v[a][YY];
4403 state->v[a][ZZ] = -state->v[a][ZZ];
4406 state->sd_X[a][YY] = -state->sd_X[a][YY];
4407 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4410 state->cg_p[a][YY] = -state->cg_p[a][YY];
4411 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4413 case estDISRE_INITF:
4414 case estDISRE_RM3TAV:
4415 case estORIRE_INITF:
4417 /* These are distances, so not affected by rotation */
4420 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4426 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4428 if (natoms > comm->moved_nalloc)
4430 /* Contents should be preserved here */
4431 comm->moved_nalloc = over_alloc_dd(natoms);
4432 srenew(comm->moved, comm->moved_nalloc);
4438 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
4441 ivec tric_dir, matrix tcm,
4442 rvec cell_x0, rvec cell_x1,
4443 rvec limitd, rvec limit0, rvec limit1,
4445 int cg_start, int cg_end,
4450 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4451 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4455 real inv_ncg, pos_d;
4458 npbcdim = dd->npbcdim;
4460 for (cg = cg_start; cg < cg_end; cg++)
4467 copy_rvec(state->x[k0], cm_new);
4474 for (k = k0; (k < k1); k++)
4476 rvec_inc(cm_new, state->x[k]);
4478 for (d = 0; (d < DIM); d++)
4480 cm_new[d] = inv_ncg*cm_new[d];
4485 /* Do pbc and check DD cell boundary crossings */
4486 for (d = DIM-1; d >= 0; d--)
4490 bScrew = (dd->bScrewPBC && d == XX);
4491 /* Determine the location of this cg in lattice coordinates */
4495 for (d2 = d+1; d2 < DIM; d2++)
4497 pos_d += cm_new[d2]*tcm[d2][d];
4500 /* Put the charge group in the triclinic unit-cell */
4501 if (pos_d >= cell_x1[d])
4503 if (pos_d >= limit1[d])
4505 cg_move_error(fplog, dd, step, cg, d, 1, TRUE, limitd[d],
4506 cg_cm[cg], cm_new, pos_d);
4509 if (dd->ci[d] == dd->nc[d] - 1)
4511 rvec_dec(cm_new, state->box[d]);
4514 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4515 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4517 for (k = k0; (k < k1); k++)
4519 rvec_dec(state->x[k], state->box[d]);
4522 rotate_state_atom(state, k);
4527 else if (pos_d < cell_x0[d])
4529 if (pos_d < limit0[d])
4531 cg_move_error(fplog, dd, step, cg, d, -1, TRUE, limitd[d],
4532 cg_cm[cg], cm_new, pos_d);
4537 rvec_inc(cm_new, state->box[d]);
4540 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4541 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4543 for (k = k0; (k < k1); k++)
4545 rvec_inc(state->x[k], state->box[d]);
4548 rotate_state_atom(state, k);
4554 else if (d < npbcdim)
4556 /* Put the charge group in the rectangular unit-cell */
4557 while (cm_new[d] >= state->box[d][d])
4559 rvec_dec(cm_new, state->box[d]);
4560 for (k = k0; (k < k1); k++)
4562 rvec_dec(state->x[k], state->box[d]);
4565 while (cm_new[d] < 0)
4567 rvec_inc(cm_new, state->box[d]);
4568 for (k = k0; (k < k1); k++)
4570 rvec_inc(state->x[k], state->box[d]);
4576 copy_rvec(cm_new, cg_cm[cg]);
4578 /* Determine where this cg should go */
4581 for (d = 0; d < dd->ndim; d++)
4586 flag |= DD_FLAG_FW(d);
4592 else if (dev[dim] == -1)
4594 flag |= DD_FLAG_BW(d);
4597 if (dd->nc[dim] > 2)
4608 /* Temporarily store the flag in move */
4609 move[cg] = mc + flag;
4613 static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
4614 gmx_domdec_t *dd, ivec tric_dir,
4615 t_state *state, rvec **f,
4624 int ncg[DIM*2], nat[DIM*2];
4625 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4626 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4627 int sbuf[2], rbuf[2];
4628 int home_pos_cg, home_pos_at, buf_pos;
4630 gmx_bool bV = FALSE, bSDX = FALSE, bCGP = FALSE;
4633 real inv_ncg, pos_d;
4635 rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
4637 cginfo_mb_t *cginfo_mb;
4638 gmx_domdec_comm_t *comm;
4640 int nthread, thread;
4644 check_screw_box(state->box);
4648 if (fr->cutoff_scheme == ecutsGROUP)
4653 for (i = 0; i < estNR; i++)
4659 case estX: /* Always present */ break;
4660 case estV: bV = (state->flags & (1<<i)); break;
4661 case estSDX: bSDX = (state->flags & (1<<i)); break;
4662 case estCGP: bCGP = (state->flags & (1<<i)); break;
4665 case estDISRE_INITF:
4666 case estDISRE_RM3TAV:
4667 case estORIRE_INITF:
4669 /* No processing required */
4672 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4677 if (dd->ncg_tot > comm->nalloc_int)
4679 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4680 srenew(comm->buf_int, comm->nalloc_int);
4682 move = comm->buf_int;
4684 /* Clear the count */
4685 for (c = 0; c < dd->ndim*2; c++)
4691 npbcdim = dd->npbcdim;
4693 for (d = 0; (d < DIM); d++)
4695 limitd[d] = dd->comm->cellsize_min[d];
4696 if (d >= npbcdim && dd->ci[d] == 0)
4698 cell_x0[d] = -GMX_FLOAT_MAX;
4702 cell_x0[d] = comm->cell_x0[d];
4704 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4706 cell_x1[d] = GMX_FLOAT_MAX;
4710 cell_x1[d] = comm->cell_x1[d];
4714 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4715 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4719 /* We check after communication if a charge group moved
4720 * more than one cell. Set the pre-comm check limit to float_max.
4722 limit0[d] = -GMX_FLOAT_MAX;
4723 limit1[d] = GMX_FLOAT_MAX;
4727 make_tric_corr_matrix(npbcdim, state->box, tcm);
4729 cgindex = dd->cgindex;
4731 nthread = gmx_omp_nthreads_get(emntDomdec);
4733 /* Compute the center of geometry for all home charge groups
4734 * and put them in the box and determine where they should go.
4736 #pragma omp parallel for num_threads(nthread) schedule(static)
4737 for (thread = 0; thread < nthread; thread++)
4739 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4740 cell_x0, cell_x1, limitd, limit0, limit1,
4742 ( thread *dd->ncg_home)/nthread,
4743 ((thread+1)*dd->ncg_home)/nthread,
4744 fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
4748 for (cg = 0; cg < dd->ncg_home; cg++)
4753 flag = mc & ~DD_FLAG_NRCG;
4754 mc = mc & DD_FLAG_NRCG;
4757 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4759 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4760 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4762 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4763 /* We store the cg size in the lower 16 bits
4764 * and the place where the charge group should go
4765 * in the next 6 bits. This saves some communication volume.
4767 nrcg = cgindex[cg+1] - cgindex[cg];
4768 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4774 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4775 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4778 for (i = 0; i < dd->ndim*2; i++)
4780 *ncg_moved += ncg[i];
4797 /* Make sure the communication buffers are large enough */
4798 for (mc = 0; mc < dd->ndim*2; mc++)
4800 nvr = ncg[mc] + nat[mc]*nvec;
4801 if (nvr > comm->cgcm_state_nalloc[mc])
4803 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4804 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4808 switch (fr->cutoff_scheme)
4811 /* Recalculating cg_cm might be cheaper than communicating,
4812 * but that could give rise to rounding issues.
4815 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4816 nvec, cg_cm, comm, bCompact);
4819 /* Without charge groups we send the moved atom coordinates
4820 * over twice. This is so the code below can be used without
4821 * many conditionals for both for with and without charge groups.
4824 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4825 nvec, state->x, comm, FALSE);
4828 home_pos_cg -= *ncg_moved;
4832 gmx_incons("unimplemented");
4838 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4839 nvec, vec++, state->x, comm, bCompact);
4842 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4843 nvec, vec++, state->v, comm, bCompact);
4847 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4848 nvec, vec++, state->sd_X, comm, bCompact);
4852 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4853 nvec, vec++, state->cg_p, comm, bCompact);
4858 compact_ind(dd->ncg_home, move,
4859 dd->index_gl, dd->cgindex, dd->gatindex,
4860 dd->ga2la, comm->bLocalCG,
4865 if (fr->cutoff_scheme == ecutsVERLET)
4867 moved = get_moved(comm, dd->ncg_home);
4869 for (k = 0; k < dd->ncg_home; k++)
4876 moved = fr->ns.grid->cell_index;
4879 clear_and_mark_ind(dd->ncg_home, move,
4880 dd->index_gl, dd->cgindex, dd->gatindex,
4881 dd->ga2la, comm->bLocalCG,
4885 cginfo_mb = fr->cginfo_mb;
4887 *ncg_stay_home = home_pos_cg;
4888 for (d = 0; d < dd->ndim; d++)
4894 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4897 /* Communicate the cg and atom counts */
4902 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4903 d, dir, sbuf[0], sbuf[1]);
4905 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4907 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4909 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4910 srenew(comm->buf_int, comm->nalloc_int);
4913 /* Communicate the charge group indices, sizes and flags */
4914 dd_sendrecv_int(dd, d, dir,
4915 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4916 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4918 nvs = ncg[cdd] + nat[cdd]*nvec;
4919 i = rbuf[0] + rbuf[1] *nvec;
4920 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4922 /* Communicate cgcm and state */
4923 dd_sendrecv_rvec(dd, d, dir,
4924 comm->cgcm_state[cdd], nvs,
4925 comm->vbuf.v+nvr, i);
4926 ncg_recv += rbuf[0];
4927 nat_recv += rbuf[1];
4931 /* Process the received charge groups */
4933 for (cg = 0; cg < ncg_recv; cg++)
4935 flag = comm->buf_int[cg*DD_CGIBS+1];
4937 if (dim >= npbcdim && dd->nc[dim] > 2)
4939 /* No pbc in this dim and more than one domain boundary.
4940 * We do a separate check if a charge group didn't move too far.
4942 if (((flag & DD_FLAG_FW(d)) &&
4943 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4944 ((flag & DD_FLAG_BW(d)) &&
4945 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4947 cg_move_error(fplog, dd, step, cg, dim,
4948 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4950 comm->vbuf.v[buf_pos],
4951 comm->vbuf.v[buf_pos],
4952 comm->vbuf.v[buf_pos][dim]);
4959 /* Check which direction this cg should go */
4960 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4964 /* The cell boundaries for dimension d2 are not equal
4965 * for each cell row of the lower dimension(s),
4966 * therefore we might need to redetermine where
4967 * this cg should go.
4970 /* If this cg crosses the box boundary in dimension d2
4971 * we can use the communicated flag, so we do not
4972 * have to worry about pbc.
4974 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4975 (flag & DD_FLAG_FW(d2))) ||
4976 (dd->ci[dim2] == 0 &&
4977 (flag & DD_FLAG_BW(d2)))))
4979 /* Clear the two flags for this dimension */
4980 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4981 /* Determine the location of this cg
4982 * in lattice coordinates
4984 pos_d = comm->vbuf.v[buf_pos][dim2];
4987 for (d3 = dim2+1; d3 < DIM; d3++)
4990 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
4993 /* Check of we are not at the box edge.
4994 * pbc is only handled in the first step above,
4995 * but this check could move over pbc while
4996 * the first step did not due to different rounding.
4998 if (pos_d >= cell_x1[dim2] &&
4999 dd->ci[dim2] != dd->nc[dim2]-1)
5001 flag |= DD_FLAG_FW(d2);
5003 else if (pos_d < cell_x0[dim2] &&
5006 flag |= DD_FLAG_BW(d2);
5008 comm->buf_int[cg*DD_CGIBS+1] = flag;
5011 /* Set to which neighboring cell this cg should go */
5012 if (flag & DD_FLAG_FW(d2))
5016 else if (flag & DD_FLAG_BW(d2))
5018 if (dd->nc[dd->dim[d2]] > 2)
5030 nrcg = flag & DD_FLAG_NRCG;
5033 if (home_pos_cg+1 > dd->cg_nalloc)
5035 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
5036 srenew(dd->index_gl, dd->cg_nalloc);
5037 srenew(dd->cgindex, dd->cg_nalloc+1);
5039 /* Set the global charge group index and size */
5040 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
5041 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
5042 /* Copy the state from the buffer */
5043 dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
5044 if (fr->cutoff_scheme == ecutsGROUP)
5047 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
5051 /* Set the cginfo */
5052 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
5053 dd->index_gl[home_pos_cg]);
5056 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
5059 if (home_pos_at+nrcg > state->nalloc)
5061 dd_realloc_state(state, f, home_pos_at+nrcg);
5063 for (i = 0; i < nrcg; i++)
5065 copy_rvec(comm->vbuf.v[buf_pos++],
5066 state->x[home_pos_at+i]);
5070 for (i = 0; i < nrcg; i++)
5072 copy_rvec(comm->vbuf.v[buf_pos++],
5073 state->v[home_pos_at+i]);
5078 for (i = 0; i < nrcg; i++)
5080 copy_rvec(comm->vbuf.v[buf_pos++],
5081 state->sd_X[home_pos_at+i]);
5086 for (i = 0; i < nrcg; i++)
5088 copy_rvec(comm->vbuf.v[buf_pos++],
5089 state->cg_p[home_pos_at+i]);
5093 home_pos_at += nrcg;
5097 /* Reallocate the buffers if necessary */
5098 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
5100 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
5101 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
5103 nvr = ncg[mc] + nat[mc]*nvec;
5104 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
5106 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
5107 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
5109 /* Copy from the receive to the send buffers */
5110 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
5111 comm->buf_int + cg*DD_CGIBS,
5112 DD_CGIBS*sizeof(int));
5113 memcpy(comm->cgcm_state[mc][nvr],
5114 comm->vbuf.v[buf_pos],
5115 (1+nrcg*nvec)*sizeof(rvec));
5116 buf_pos += 1 + nrcg*nvec;
5123 /* With sorting (!bCompact) the indices are now only partially up to date
5124 * and ncg_home and nat_home are not the real count, since there are
5125 * "holes" in the arrays for the charge groups that moved to neighbors.
5127 if (fr->cutoff_scheme == ecutsVERLET)
5129 moved = get_moved(comm, home_pos_cg);
5131 for (i = dd->ncg_home; i < home_pos_cg; i++)
5136 dd->ncg_home = home_pos_cg;
5137 dd->nat_home = home_pos_at;
5142 "Finished repartitioning: cgs moved out %d, new home %d\n",
5143 *ncg_moved, dd->ncg_home-*ncg_moved);
5148 void dd_cycles_add(gmx_domdec_t *dd, float cycles, int ddCycl)
5150 dd->comm->cycl[ddCycl] += cycles;
5151 dd->comm->cycl_n[ddCycl]++;
5152 if (cycles > dd->comm->cycl_max[ddCycl])
5154 dd->comm->cycl_max[ddCycl] = cycles;
5158 static double force_flop_count(t_nrnb *nrnb)
5165 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
5167 /* To get closer to the real timings, we half the count
5168 * for the normal loops and again half it for water loops.
5171 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5173 sum += nrnb->n[i]*0.25*cost_nrnb(i);
5177 sum += nrnb->n[i]*0.50*cost_nrnb(i);
5180 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
5183 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5185 sum += nrnb->n[i]*cost_nrnb(i);
5188 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
5190 sum += nrnb->n[i]*cost_nrnb(i);
5196 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
5198 if (dd->comm->eFlop)
5200 dd->comm->flop -= force_flop_count(nrnb);
5203 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
5205 if (dd->comm->eFlop)
5207 dd->comm->flop += force_flop_count(nrnb);
5212 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
5216 for (i = 0; i < ddCyclNr; i++)
5218 dd->comm->cycl[i] = 0;
5219 dd->comm->cycl_n[i] = 0;
5220 dd->comm->cycl_max[i] = 0;
5223 dd->comm->flop_n = 0;
5226 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
5228 gmx_domdec_comm_t *comm;
5229 gmx_domdec_load_t *load;
5230 gmx_domdec_root_t *root = NULL;
5231 int d, dim, cid, i, pos;
5232 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
5237 fprintf(debug, "get_load_distribution start\n");
5240 wallcycle_start(wcycle, ewcDDCOMMLOAD);
5244 bSepPME = (dd->pme_nodeid >= 0);
5246 for (d = dd->ndim-1; d >= 0; d--)
5249 /* Check if we participate in the communication in this dimension */
5250 if (d == dd->ndim-1 ||
5251 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
5253 load = &comm->load[d];
5256 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
5259 if (d == dd->ndim-1)
5261 sbuf[pos++] = dd_force_load(comm);
5262 sbuf[pos++] = sbuf[0];
5265 sbuf[pos++] = sbuf[0];
5266 sbuf[pos++] = cell_frac;
5269 sbuf[pos++] = comm->cell_f_max0[d];
5270 sbuf[pos++] = comm->cell_f_min1[d];
5275 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
5276 sbuf[pos++] = comm->cycl[ddCyclPME];
5281 sbuf[pos++] = comm->load[d+1].sum;
5282 sbuf[pos++] = comm->load[d+1].max;
5285 sbuf[pos++] = comm->load[d+1].sum_m;
5286 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
5287 sbuf[pos++] = comm->load[d+1].flags;
5290 sbuf[pos++] = comm->cell_f_max0[d];
5291 sbuf[pos++] = comm->cell_f_min1[d];
5296 sbuf[pos++] = comm->load[d+1].mdf;
5297 sbuf[pos++] = comm->load[d+1].pme;
5301 /* Communicate a row in DD direction d.
5302 * The communicators are setup such that the root always has rank 0.
5305 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
5306 load->load, load->nload*sizeof(float), MPI_BYTE,
5307 0, comm->mpi_comm_load[d]);
5309 if (dd->ci[dim] == dd->master_ci[dim])
5311 /* We are the root, process this row */
5312 if (comm->bDynLoadBal)
5314 root = comm->root[d];
5324 for (i = 0; i < dd->nc[dim]; i++)
5326 load->sum += load->load[pos++];
5327 load->max = max(load->max, load->load[pos]);
5333 /* This direction could not be load balanced properly,
5334 * therefore we need to use the maximum iso the average load.
5336 load->sum_m = max(load->sum_m, load->load[pos]);
5340 load->sum_m += load->load[pos];
5343 load->cvol_min = min(load->cvol_min, load->load[pos]);
5347 load->flags = (int)(load->load[pos++] + 0.5);
5351 root->cell_f_max0[i] = load->load[pos++];
5352 root->cell_f_min1[i] = load->load[pos++];
5357 load->mdf = max(load->mdf, load->load[pos]);
5359 load->pme = max(load->pme, load->load[pos]);
5363 if (comm->bDynLoadBal && root->bLimited)
5365 load->sum_m *= dd->nc[dim];
5366 load->flags |= (1<<d);
5374 comm->nload += dd_load_count(comm);
5375 comm->load_step += comm->cycl[ddCyclStep];
5376 comm->load_sum += comm->load[0].sum;
5377 comm->load_max += comm->load[0].max;
5378 if (comm->bDynLoadBal)
5380 for (d = 0; d < dd->ndim; d++)
5382 if (comm->load[0].flags & (1<<d))
5384 comm->load_lim[d]++;
5390 comm->load_mdf += comm->load[0].mdf;
5391 comm->load_pme += comm->load[0].pme;
5395 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5399 fprintf(debug, "get_load_distribution finished\n");
5403 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5405 /* Return the relative performance loss on the total run time
5406 * due to the force calculation load imbalance.
5408 if (dd->comm->nload > 0)
5411 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5412 (dd->comm->load_step*dd->nnodes);
5420 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5423 int npp, npme, nnodes, d, limp;
5424 float imbal, pme_f_ratio, lossf, lossp = 0;
5426 gmx_domdec_comm_t *comm;
5429 if (DDMASTER(dd) && comm->nload > 0)
5432 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5433 nnodes = npp + npme;
5434 imbal = comm->load_max*npp/comm->load_sum - 1;
5435 lossf = dd_force_imb_perf_loss(dd);
5436 sprintf(buf, " Average load imbalance: %.1f %%\n", imbal*100);
5437 fprintf(fplog, "%s", buf);
5438 fprintf(stderr, "\n");
5439 fprintf(stderr, "%s", buf);
5440 sprintf(buf, " Part of the total run time spent waiting due to load imbalance: %.1f %%\n", lossf*100);
5441 fprintf(fplog, "%s", buf);
5442 fprintf(stderr, "%s", buf);
5444 if (comm->bDynLoadBal)
5446 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5447 for (d = 0; d < dd->ndim; d++)
5449 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5450 sprintf(buf+strlen(buf), " %c %d %%", dim2char(dd->dim[d]), limp);
5456 sprintf(buf+strlen(buf), "\n");
5457 fprintf(fplog, "%s", buf);
5458 fprintf(stderr, "%s", buf);
5462 pme_f_ratio = comm->load_pme/comm->load_mdf;
5463 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5466 lossp *= (float)npme/(float)nnodes;
5470 lossp *= (float)npp/(float)nnodes;
5472 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pme_f_ratio);
5473 fprintf(fplog, "%s", buf);
5474 fprintf(stderr, "%s", buf);
5475 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossp)*100);
5476 fprintf(fplog, "%s", buf);
5477 fprintf(stderr, "%s", buf);
5479 fprintf(fplog, "\n");
5480 fprintf(stderr, "\n");
5482 if (lossf >= DD_PERF_LOSS_WARN)
5485 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5486 " in the domain decomposition.\n", lossf*100);
5487 if (!comm->bDynLoadBal)
5489 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5493 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5495 fprintf(fplog, "%s\n", buf);
5496 fprintf(stderr, "%s\n", buf);
5498 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS_WARN)
5501 "NOTE: %.1f %% performance was lost because the PME ranks\n"
5502 " had %s work to do than the PP ranks.\n"
5503 " You might want to %s the number of PME ranks\n"
5504 " or %s the cut-off and the grid spacing.\n",
5506 (lossp < 0) ? "less" : "more",
5507 (lossp < 0) ? "decrease" : "increase",
5508 (lossp < 0) ? "decrease" : "increase");
5509 fprintf(fplog, "%s\n", buf);
5510 fprintf(stderr, "%s\n", buf);
5515 static float dd_vol_min(gmx_domdec_t *dd)
5517 return dd->comm->load[0].cvol_min*dd->nnodes;
5520 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5522 return dd->comm->load[0].flags;
5525 static float dd_f_imbal(gmx_domdec_t *dd)
5527 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5530 float dd_pme_f_ratio(gmx_domdec_t *dd)
5532 if (dd->comm->cycl_n[ddCyclPME] > 0)
5534 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5542 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
5547 flags = dd_load_flags(dd);
5551 "DD load balancing is limited by minimum cell size in dimension");
5552 for (d = 0; d < dd->ndim; d++)
5556 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5559 fprintf(fplog, "\n");
5561 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5562 if (dd->comm->bDynLoadBal)
5564 fprintf(fplog, " vol min/aver %5.3f%c",
5565 dd_vol_min(dd), flags ? '!' : ' ');
5567 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5568 if (dd->comm->cycl_n[ddCyclPME])
5570 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5572 fprintf(fplog, "\n\n");
5575 static void dd_print_load_verbose(gmx_domdec_t *dd)
5577 if (dd->comm->bDynLoadBal)
5579 fprintf(stderr, "vol %4.2f%c ",
5580 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5582 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5583 if (dd->comm->cycl_n[ddCyclPME])
5585 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5590 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5595 gmx_domdec_root_t *root;
5596 gmx_bool bPartOfGroup = FALSE;
5598 dim = dd->dim[dim_ind];
5599 copy_ivec(loc, loc_c);
5600 for (i = 0; i < dd->nc[dim]; i++)
5603 rank = dd_index(dd->nc, loc_c);
5604 if (rank == dd->rank)
5606 /* This process is part of the group */
5607 bPartOfGroup = TRUE;
5610 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5614 dd->comm->mpi_comm_load[dim_ind] = c_row;
5615 if (dd->comm->eDLB != edlbNO)
5617 if (dd->ci[dim] == dd->master_ci[dim])
5619 /* This is the root process of this row */
5620 snew(dd->comm->root[dim_ind], 1);
5621 root = dd->comm->root[dim_ind];
5622 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5623 snew(root->old_cell_f, dd->nc[dim]+1);
5624 snew(root->bCellMin, dd->nc[dim]);
5627 snew(root->cell_f_max0, dd->nc[dim]);
5628 snew(root->cell_f_min1, dd->nc[dim]);
5629 snew(root->bound_min, dd->nc[dim]);
5630 snew(root->bound_max, dd->nc[dim]);
5632 snew(root->buf_ncd, dd->nc[dim]);
5636 /* This is not a root process, we only need to receive cell_f */
5637 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5640 if (dd->ci[dim] == dd->master_ci[dim])
5642 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5648 void dd_setup_dlb_resource_sharing(t_commrec gmx_unused *cr,
5649 const gmx_hw_info_t gmx_unused *hwinfo,
5650 const gmx_hw_opt_t gmx_unused *hw_opt)
5653 int physicalnode_id_hash;
5656 MPI_Comm mpi_comm_pp_physicalnode;
5658 if (!(cr->duty & DUTY_PP) ||
5659 hw_opt->gpu_opt.ncuda_dev_use == 0)
5661 /* Only PP nodes (currently) use GPUs.
5662 * If we don't have GPUs, there are no resources to share.
5667 physicalnode_id_hash = gmx_physicalnode_id_hash();
5669 gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
5675 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5676 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5677 dd->rank, physicalnode_id_hash, gpu_id);
5679 /* Split the PP communicator over the physical nodes */
5680 /* TODO: See if we should store this (before), as it's also used for
5681 * for the nodecomm summution.
5683 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5684 &mpi_comm_pp_physicalnode);
5685 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5686 &dd->comm->mpi_comm_gpu_shared);
5687 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5688 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5692 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5695 /* Note that some ranks could share a GPU, while others don't */
5697 if (dd->comm->nrank_gpu_shared == 1)
5699 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5704 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
5707 int dim0, dim1, i, j;
5712 fprintf(debug, "Making load communicators\n");
5715 snew(dd->comm->load, dd->ndim);
5716 snew(dd->comm->mpi_comm_load, dd->ndim);
5719 make_load_communicator(dd, 0, loc);
5723 for (i = 0; i < dd->nc[dim0]; i++)
5726 make_load_communicator(dd, 1, loc);
5732 for (i = 0; i < dd->nc[dim0]; i++)
5736 for (j = 0; j < dd->nc[dim1]; j++)
5739 make_load_communicator(dd, 2, loc);
5746 fprintf(debug, "Finished making load communicators\n");
5751 void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
5754 int d, dim, i, j, m;
5757 ivec dd_zp[DD_MAXIZONE];
5758 gmx_domdec_zones_t *zones;
5759 gmx_domdec_ns_ranges_t *izone;
5761 for (d = 0; d < dd->ndim; d++)
5764 copy_ivec(dd->ci, tmp);
5765 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5766 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5767 copy_ivec(dd->ci, tmp);
5768 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5769 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5772 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5775 dd->neighbor[d][1]);
5781 fprintf(fplog, "\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5783 dd->nc[XX], dd->nc[YY], dd->nc[ZZ],
5784 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5791 for (i = 0; i < nzonep; i++)
5793 copy_ivec(dd_zp3[i], dd_zp[i]);
5799 for (i = 0; i < nzonep; i++)
5801 copy_ivec(dd_zp2[i], dd_zp[i]);
5807 for (i = 0; i < nzonep; i++)
5809 copy_ivec(dd_zp1[i], dd_zp[i]);
5813 gmx_fatal(FARGS, "Can only do 1, 2 or 3D domain decomposition");
5818 zones = &dd->comm->zones;
5820 for (i = 0; i < nzone; i++)
5823 clear_ivec(zones->shift[i]);
5824 for (d = 0; d < dd->ndim; d++)
5826 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5831 for (i = 0; i < nzone; i++)
5833 for (d = 0; d < DIM; d++)
5835 s[d] = dd->ci[d] - zones->shift[i][d];
5840 else if (s[d] >= dd->nc[d])
5846 zones->nizone = nzonep;
5847 for (i = 0; i < zones->nizone; i++)
5849 if (dd_zp[i][0] != i)
5851 gmx_fatal(FARGS, "Internal inconsistency in the dd grid setup");
5853 izone = &zones->izone[i];
5854 izone->j0 = dd_zp[i][1];
5855 izone->j1 = dd_zp[i][2];
5856 for (dim = 0; dim < DIM; dim++)
5858 if (dd->nc[dim] == 1)
5860 /* All shifts should be allowed */
5861 izone->shift0[dim] = -1;
5862 izone->shift1[dim] = 1;
5867 izone->shift0[d] = 0;
5868 izone->shift1[d] = 0;
5869 for(j=izone->j0; j<izone->j1; j++) {
5870 if (dd->shift[j][d] > dd->shift[i][d])
5871 izone->shift0[d] = -1;
5872 if (dd->shift[j][d] < dd->shift[i][d])
5873 izone->shift1[d] = 1;
5879 /* Assume the shift are not more than 1 cell */
5880 izone->shift0[dim] = 1;
5881 izone->shift1[dim] = -1;
5882 for (j = izone->j0; j < izone->j1; j++)
5884 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5885 if (shift_diff < izone->shift0[dim])
5887 izone->shift0[dim] = shift_diff;
5889 if (shift_diff > izone->shift1[dim])
5891 izone->shift1[dim] = shift_diff;
5898 if (dd->comm->eDLB != edlbNO)
5900 snew(dd->comm->root, dd->ndim);
5903 if (dd->comm->bRecordLoad)
5905 make_load_communicators(dd);
5909 static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reorder)
5912 gmx_domdec_comm_t *comm;
5923 if (comm->bCartesianPP)
5925 /* Set up cartesian communication for the particle-particle part */
5928 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5929 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5932 for (i = 0; i < DIM; i++)
5936 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5938 /* We overwrite the old communicator with the new cartesian one */
5939 cr->mpi_comm_mygroup = comm_cart;
5942 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5943 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5945 if (comm->bCartesianPP_PME)
5947 /* Since we want to use the original cartesian setup for sim,
5948 * and not the one after split, we need to make an index.
5950 snew(comm->ddindex2ddnodeid, dd->nnodes);
5951 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
5952 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5953 /* Get the rank of the DD master,
5954 * above we made sure that the master node is a PP node.
5964 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
5966 else if (comm->bCartesianPP)
5968 if (cr->npmenodes == 0)
5970 /* The PP communicator is also
5971 * the communicator for this simulation
5973 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5975 cr->nodeid = dd->rank;
5977 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
5979 /* We need to make an index to go from the coordinates
5980 * to the nodeid of this simulation.
5982 snew(comm->ddindex2simnodeid, dd->nnodes);
5983 snew(buf, dd->nnodes);
5984 if (cr->duty & DUTY_PP)
5986 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5988 /* Communicate the ddindex to simulation nodeid index */
5989 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
5990 cr->mpi_comm_mysim);
5993 /* Determine the master coordinates and rank.
5994 * The DD master should be the same node as the master of this sim.
5996 for (i = 0; i < dd->nnodes; i++)
5998 if (comm->ddindex2simnodeid[i] == 0)
6000 ddindex2xyz(dd->nc, i, dd->master_ci);
6001 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
6006 fprintf(debug, "The master rank is %d\n", dd->masterrank);
6011 /* No Cartesian communicators */
6012 /* We use the rank in dd->comm->all as DD index */
6013 ddindex2xyz(dd->nc, dd->rank, dd->ci);
6014 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
6016 clear_ivec(dd->master_ci);
6023 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
6024 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6029 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
6030 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6034 static void receive_ddindex2simnodeid(t_commrec *cr)
6038 gmx_domdec_comm_t *comm;
6045 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
6047 snew(comm->ddindex2simnodeid, dd->nnodes);
6048 snew(buf, dd->nnodes);
6049 if (cr->duty & DUTY_PP)
6051 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
6054 /* Communicate the ddindex to simulation nodeid index */
6055 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6056 cr->mpi_comm_mysim);
6063 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
6064 int ncg, int natoms)
6066 gmx_domdec_master_t *ma;
6071 snew(ma->ncg, dd->nnodes);
6072 snew(ma->index, dd->nnodes+1);
6074 snew(ma->nat, dd->nnodes);
6075 snew(ma->ibuf, dd->nnodes*2);
6076 snew(ma->cell_x, DIM);
6077 for (i = 0; i < DIM; i++)
6079 snew(ma->cell_x[i], dd->nc[i]+1);
6082 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
6088 snew(ma->vbuf, natoms);
6094 static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_node_order,
6095 int gmx_unused reorder)
6098 gmx_domdec_comm_t *comm;
6109 if (comm->bCartesianPP)
6111 for (i = 1; i < DIM; i++)
6113 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
6115 if (bDiv[YY] || bDiv[ZZ])
6117 comm->bCartesianPP_PME = TRUE;
6118 /* If we have 2D PME decomposition, which is always in x+y,
6119 * we stack the PME only nodes in z.
6120 * Otherwise we choose the direction that provides the thinnest slab
6121 * of PME only nodes as this will have the least effect
6122 * on the PP communication.
6123 * But for the PME communication the opposite might be better.
6125 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
6127 dd->nc[YY] > dd->nc[ZZ]))
6129 comm->cartpmedim = ZZ;
6133 comm->cartpmedim = YY;
6135 comm->ntot[comm->cartpmedim]
6136 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
6140 fprintf(fplog, "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
6142 "Will not use a Cartesian communicator for PP <-> PME\n\n");
6147 if (comm->bCartesianPP_PME)
6151 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
6154 for (i = 0; i < DIM; i++)
6158 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
6161 MPI_Comm_rank(comm_cart, &rank);
6162 if (MASTERNODE(cr) && rank != 0)
6164 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
6167 /* With this assigment we loose the link to the original communicator
6168 * which will usually be MPI_COMM_WORLD, unless have multisim.
6170 cr->mpi_comm_mysim = comm_cart;
6171 cr->sim_nodeid = rank;
6173 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
6177 fprintf(fplog, "Cartesian rank %d, coordinates %d %d %d\n\n",
6178 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6181 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
6185 if (cr->npmenodes == 0 ||
6186 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
6188 cr->duty = DUTY_PME;
6191 /* Split the sim communicator into PP and PME only nodes */
6192 MPI_Comm_split(cr->mpi_comm_mysim,
6194 dd_index(comm->ntot, dd->ci),
6195 &cr->mpi_comm_mygroup);
6199 switch (dd_node_order)
6204 fprintf(fplog, "Order of the ranks: PP first, PME last\n");
6207 case ddnoINTERLEAVE:
6208 /* Interleave the PP-only and PME-only nodes,
6209 * as on clusters with dual-core machines this will double
6210 * the communication bandwidth of the PME processes
6211 * and thus speed up the PP <-> PME and inter PME communication.
6215 fprintf(fplog, "Interleaving PP and PME ranks\n");
6217 comm->pmenodes = dd_pmenodes(cr);
6222 gmx_fatal(FARGS, "Unknown dd_node_order=%d", dd_node_order);
6225 if (dd_simnode2pmenode(cr, cr->sim_nodeid) == -1)
6227 cr->duty = DUTY_PME;
6234 /* Split the sim communicator into PP and PME only nodes */
6235 MPI_Comm_split(cr->mpi_comm_mysim,
6238 &cr->mpi_comm_mygroup);
6239 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
6245 fprintf(fplog, "This rank does only %s work.\n\n",
6246 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
6250 void make_dd_communicators(FILE *fplog, t_commrec *cr, int dd_node_order)
6253 gmx_domdec_comm_t *comm;
6259 copy_ivec(dd->nc, comm->ntot);
6261 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
6262 comm->bCartesianPP_PME = FALSE;
6264 /* Reorder the nodes by default. This might change the MPI ranks.
6265 * Real reordering is only supported on very few architectures,
6266 * Blue Gene is one of them.
6268 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
6270 if (cr->npmenodes > 0)
6272 /* Split the communicator into a PP and PME part */
6273 split_communicator(fplog, cr, dd_node_order, CartReorder);
6274 if (comm->bCartesianPP_PME)
6276 /* We (possibly) reordered the nodes in split_communicator,
6277 * so it is no longer required in make_pp_communicator.
6279 CartReorder = FALSE;
6284 /* All nodes do PP and PME */
6286 /* We do not require separate communicators */
6287 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
6291 if (cr->duty & DUTY_PP)
6293 /* Copy or make a new PP communicator */
6294 make_pp_communicator(fplog, cr, CartReorder);
6298 receive_ddindex2simnodeid(cr);
6301 if (!(cr->duty & DUTY_PME))
6303 /* Set up the commnuication to our PME node */
6304 dd->pme_nodeid = dd_simnode2pmenode(cr, cr->sim_nodeid);
6305 dd->pme_receive_vir_ener = receive_vir_ener(cr);
6308 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
6309 dd->pme_nodeid, dd->pme_receive_vir_ener);
6314 dd->pme_nodeid = -1;
6319 dd->ma = init_gmx_domdec_master_t(dd,
6321 comm->cgs_gl.index[comm->cgs_gl.nr]);
6325 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6327 real *slb_frac, tot;
6332 if (nc > 1 && size_string != NULL)
6336 fprintf(fplog, "Using static load balancing for the %s direction\n",
6341 for (i = 0; i < nc; i++)
6344 sscanf(size_string, "%lf%n", &dbl, &n);
6347 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6356 fprintf(fplog, "Relative cell sizes:");
6358 for (i = 0; i < nc; i++)
6363 fprintf(fplog, " %5.3f", slb_frac[i]);
6368 fprintf(fplog, "\n");
6375 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
6378 gmx_mtop_ilistloop_t iloop;
6382 iloop = gmx_mtop_ilistloop_init(mtop);
6383 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6385 for (ftype = 0; ftype < F_NRE; ftype++)
6387 if ((interaction_function[ftype].flags & IF_BOND) &&
6390 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6398 static int dd_getenv(FILE *fplog, const char *env_var, int def)
6404 val = getenv(env_var);
6407 if (sscanf(val, "%d", &nst) <= 0)
6413 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6421 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6425 fprintf(stderr, "\n%s\n", warn_string);
6429 fprintf(fplog, "\n%s\n", warn_string);
6433 static void check_dd_restrictions(t_commrec *cr, gmx_domdec_t *dd,
6434 t_inputrec *ir, FILE *fplog)
6436 if (ir->ePBC == epbcSCREW &&
6437 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6439 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6442 if (ir->ns_type == ensSIMPLE)
6444 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
6447 if (ir->nstlist == 0)
6449 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6452 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6454 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6458 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6463 r = ddbox->box_size[XX];
6464 for (di = 0; di < dd->ndim; di++)
6467 /* Check using the initial average cell size */
6468 r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6474 static int check_dlb_support(FILE *fplog, t_commrec *cr,
6475 const char *dlb_opt, gmx_bool bRecordLoad,
6476 unsigned long Flags, t_inputrec *ir)
6484 case 'a': eDLB = edlbAUTO; break;
6485 case 'n': eDLB = edlbNO; break;
6486 case 'y': eDLB = edlbYES; break;
6487 default: gmx_incons("Unknown dlb_opt");
6490 if (Flags & MD_RERUN)
6495 if (!EI_DYNAMICS(ir->eI))
6497 if (eDLB == edlbYES)
6499 sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
6500 dd_warning(cr, fplog, buf);
6508 dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6513 if (Flags & MD_REPRODUCIBLE)
6520 dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
6524 dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6527 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", eDLB);
6535 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6540 if (getenv("GMX_DD_ORDER_ZYX") != NULL)
6542 /* Decomposition order z,y,x */
6545 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6547 for (dim = DIM-1; dim >= 0; dim--)
6549 if (dd->nc[dim] > 1)
6551 dd->dim[dd->ndim++] = dim;
6557 /* Decomposition order x,y,z */
6558 for (dim = 0; dim < DIM; dim++)
6560 if (dd->nc[dim] > 1)
6562 dd->dim[dd->ndim++] = dim;
6568 static gmx_domdec_comm_t *init_dd_comm()
6570 gmx_domdec_comm_t *comm;
6574 snew(comm->cggl_flag, DIM*2);
6575 snew(comm->cgcm_state, DIM*2);
6576 for (i = 0; i < DIM*2; i++)
6578 comm->cggl_flag_nalloc[i] = 0;
6579 comm->cgcm_state_nalloc[i] = 0;
6582 comm->nalloc_int = 0;
6583 comm->buf_int = NULL;
6585 vec_rvec_init(&comm->vbuf);
6587 comm->n_load_have = 0;
6588 comm->n_load_collect = 0;
6590 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6592 comm->sum_nat[i] = 0;
6596 comm->load_step = 0;
6599 clear_ivec(comm->load_lim);
6606 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
6607 unsigned long Flags,
6609 real comm_distance_min, real rconstr,
6610 const char *dlb_opt, real dlb_scale,
6611 const char *sizex, const char *sizey, const char *sizez,
6612 gmx_mtop_t *mtop, t_inputrec *ir,
6613 matrix box, rvec *x,
6615 int *npme_x, int *npme_y)
6618 gmx_domdec_comm_t *comm;
6621 real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
6628 "\nInitializing Domain Decomposition on %d ranks\n", cr->nnodes);
6633 dd->comm = init_dd_comm();
6635 snew(comm->cggl_flag, DIM*2);
6636 snew(comm->cgcm_state, DIM*2);
6638 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6639 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6641 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
6642 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
6643 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
6644 recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
6645 comm->nstSortCG = dd_getenv(fplog, "GMX_DD_NST_SORT_CHARGE_GROUPS", 1);
6646 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
6647 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
6648 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
6650 dd->pme_recv_f_alloc = 0;
6651 dd->pme_recv_f_buf = NULL;
6653 if (dd->bSendRecv2 && fplog)
6655 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6661 fprintf(fplog, "Will load balance based on FLOP count\n");
6663 if (comm->eFlop > 1)
6665 srand(1+cr->nodeid);
6667 comm->bRecordLoad = TRUE;
6671 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6675 /* Initialize to GPU share count to 0, might change later */
6676 comm->nrank_gpu_shared = 0;
6678 comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
6680 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6683 fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
6685 dd->bGridJump = comm->bDynLoadBal;
6686 comm->bPMELoadBalDLBLimits = FALSE;
6688 if (comm->nstSortCG)
6692 if (comm->nstSortCG == 1)
6694 fprintf(fplog, "Will sort the charge groups at every domain (re)decomposition\n");
6698 fprintf(fplog, "Will sort the charge groups every %d steps\n",
6702 snew(comm->sort, 1);
6708 fprintf(fplog, "Will not sort the charge groups\n");
6712 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6714 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6715 if (comm->bInterCGBondeds)
6717 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6721 comm->bInterCGMultiBody = FALSE;
6724 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6725 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6727 if (ir->rlistlong == 0)
6729 /* Set the cut-off to some very large value,
6730 * so we don't need if statements everywhere in the code.
6731 * We use sqrt, since the cut-off is squared in some places.
6733 comm->cutoff = GMX_CUTOFF_INF;
6737 comm->cutoff = ir->rlistlong;
6739 comm->cutoff_mbody = 0;
6741 comm->cellsize_limit = 0;
6742 comm->bBondComm = FALSE;
6744 if (comm->bInterCGBondeds)
6746 if (comm_distance_min > 0)
6748 comm->cutoff_mbody = comm_distance_min;
6749 if (Flags & MD_DDBONDCOMM)
6751 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6755 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6757 r_bonded_limit = comm->cutoff_mbody;
6759 else if (ir->bPeriodicMols)
6761 /* Can not easily determine the required cut-off */
6762 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6763 comm->cutoff_mbody = comm->cutoff/2;
6764 r_bonded_limit = comm->cutoff_mbody;
6770 dd_bonded_cg_distance(fplog, mtop, ir, x, box,
6771 Flags & MD_DDBONDCHECK, &r_2b, &r_mb);
6773 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6774 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6776 /* We use an initial margin of 10% for the minimum cell size,
6777 * except when we are just below the non-bonded cut-off.
6779 if (Flags & MD_DDBONDCOMM)
6781 if (max(r_2b, r_mb) > comm->cutoff)
6783 r_bonded = max(r_2b, r_mb);
6784 r_bonded_limit = 1.1*r_bonded;
6785 comm->bBondComm = TRUE;
6790 r_bonded_limit = min(1.1*r_bonded, comm->cutoff);
6792 /* We determine cutoff_mbody later */
6796 /* No special bonded communication,
6797 * simply increase the DD cut-off.
6799 r_bonded_limit = 1.1*max(r_2b, r_mb);
6800 comm->cutoff_mbody = r_bonded_limit;
6801 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6804 comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit);
6808 "Minimum cell size due to bonded interactions: %.3f nm\n",
6809 comm->cellsize_limit);
6813 if (dd->bInterCGcons && rconstr <= 0)
6815 /* There is a cell size limit due to the constraints (P-LINCS) */
6816 rconstr = constr_r_max(fplog, mtop, ir);
6820 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6822 if (rconstr > comm->cellsize_limit)
6824 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6828 else if (rconstr > 0 && fplog)
6830 /* Here we do not check for dd->bInterCGcons,
6831 * because one can also set a cell size limit for virtual sites only
6832 * and at this point we don't know yet if there are intercg v-sites.
6835 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6838 comm->cellsize_limit = max(comm->cellsize_limit, rconstr);
6840 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6844 copy_ivec(nc, dd->nc);
6845 set_dd_dim(fplog, dd);
6846 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, x, ddbox);
6848 if (cr->npmenodes == -1)
6852 acs = average_cellsize_min(dd, ddbox);
6853 if (acs < comm->cellsize_limit)
6857 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6859 gmx_fatal_collective(FARGS, cr, NULL,
6860 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6861 acs, comm->cellsize_limit);
6866 set_ddbox_cr(cr, NULL, ir, box, &comm->cgs_gl, x, ddbox);
6868 /* We need to choose the optimal DD grid and possibly PME nodes */
6869 limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6870 comm->eDLB != edlbNO, dlb_scale,
6871 comm->cellsize_limit, comm->cutoff,
6872 comm->bInterCGBondeds);
6874 if (dd->nc[XX] == 0)
6876 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6877 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
6878 !bC ? "-rdd" : "-rcon",
6879 comm->eDLB != edlbNO ? " or -dds" : "",
6880 bC ? " or your LINCS settings" : "");
6882 gmx_fatal_collective(FARGS, cr, NULL,
6883 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
6885 "Look in the log file for details on the domain decomposition",
6886 cr->nnodes-cr->npmenodes, limit, buf);
6888 set_dd_dim(fplog, dd);
6894 "Domain decomposition grid %d x %d x %d, separate PME ranks %d\n",
6895 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6898 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6899 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6901 gmx_fatal_collective(FARGS, cr, NULL,
6902 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
6903 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6905 if (cr->npmenodes > dd->nnodes)
6907 gmx_fatal_collective(FARGS, cr, NULL,
6908 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6910 if (cr->npmenodes > 0)
6912 comm->npmenodes = cr->npmenodes;
6916 comm->npmenodes = dd->nnodes;
6919 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
6921 /* The following choices should match those
6922 * in comm_cost_est in domdec_setup.c.
6923 * Note that here the checks have to take into account
6924 * that the decomposition might occur in a different order than xyz
6925 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6926 * in which case they will not match those in comm_cost_est,
6927 * but since that is mainly for testing purposes that's fine.
6929 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6930 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6931 getenv("GMX_PMEONEDD") == NULL)
6933 comm->npmedecompdim = 2;
6934 comm->npmenodes_x = dd->nc[XX];
6935 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6939 /* In case nc is 1 in both x and y we could still choose to
6940 * decompose pme in y instead of x, but we use x for simplicity.
6942 comm->npmedecompdim = 1;
6943 if (dd->dim[0] == YY)
6945 comm->npmenodes_x = 1;
6946 comm->npmenodes_y = comm->npmenodes;
6950 comm->npmenodes_x = comm->npmenodes;
6951 comm->npmenodes_y = 1;
6956 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6957 comm->npmenodes_x, comm->npmenodes_y, 1);
6962 comm->npmedecompdim = 0;
6963 comm->npmenodes_x = 0;
6964 comm->npmenodes_y = 0;
6967 /* Technically we don't need both of these,
6968 * but it simplifies code not having to recalculate it.
6970 *npme_x = comm->npmenodes_x;
6971 *npme_y = comm->npmenodes_y;
6973 snew(comm->slb_frac, DIM);
6974 if (comm->eDLB == edlbNO)
6976 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
6977 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
6978 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], sizez);
6981 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6983 if (comm->bBondComm || comm->eDLB != edlbNO)
6985 /* Set the bonded communication distance to halfway
6986 * the minimum and the maximum,
6987 * since the extra communication cost is nearly zero.
6989 acs = average_cellsize_min(dd, ddbox);
6990 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6991 if (comm->eDLB != edlbNO)
6993 /* Check if this does not limit the scaling */
6994 comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs);
6996 if (!comm->bBondComm)
6998 /* Without bBondComm do not go beyond the n.b. cut-off */
6999 comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff);
7000 if (comm->cellsize_limit >= comm->cutoff)
7002 /* We don't loose a lot of efficieny
7003 * when increasing it to the n.b. cut-off.
7004 * It can even be slightly faster, because we need
7005 * less checks for the communication setup.
7007 comm->cutoff_mbody = comm->cutoff;
7010 /* Check if we did not end up below our original limit */
7011 comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit);
7013 if (comm->cutoff_mbody > comm->cellsize_limit)
7015 comm->cellsize_limit = comm->cutoff_mbody;
7018 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
7023 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
7024 "cellsize limit %f\n",
7025 comm->bBondComm, comm->cellsize_limit);
7030 check_dd_restrictions(cr, dd, ir, fplog);
7033 comm->partition_step = INT_MIN;
7036 clear_dd_cycle_counts(dd);
7041 static void set_dlb_limits(gmx_domdec_t *dd)
7046 for (d = 0; d < dd->ndim; d++)
7048 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
7049 dd->comm->cellsize_min[dd->dim[d]] =
7050 dd->comm->cellsize_min_dlb[dd->dim[d]];
7055 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
7058 gmx_domdec_comm_t *comm;
7068 fprintf(fplog, "At step %s the performance loss due to force load imbalance is %.1f %%\n", gmx_step_str(step, buf), dd_force_imb_perf_loss(dd)*100);
7071 cellsize_min = comm->cellsize_min[dd->dim[0]];
7072 for (d = 1; d < dd->ndim; d++)
7074 cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
7077 if (cellsize_min < comm->cellsize_limit*1.05)
7079 dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
7081 /* Change DLB from "auto" to "no". */
7082 comm->eDLB = edlbNO;
7087 dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
7088 comm->bDynLoadBal = TRUE;
7089 dd->bGridJump = TRUE;
7093 /* We can set the required cell size info here,
7094 * so we do not need to communicate this.
7095 * The grid is completely uniform.
7097 for (d = 0; d < dd->ndim; d++)
7101 comm->load[d].sum_m = comm->load[d].sum;
7103 nc = dd->nc[dd->dim[d]];
7104 for (i = 0; i < nc; i++)
7106 comm->root[d]->cell_f[i] = i/(real)nc;
7109 comm->root[d]->cell_f_max0[i] = i /(real)nc;
7110 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
7113 comm->root[d]->cell_f[nc] = 1.0;
7118 static char *init_bLocalCG(gmx_mtop_t *mtop)
7123 ncg = ncg_mtop(mtop);
7124 snew(bLocalCG, ncg);
7125 for (cg = 0; cg < ncg; cg++)
7127 bLocalCG[cg] = FALSE;
7133 void dd_init_bondeds(FILE *fplog,
7134 gmx_domdec_t *dd, gmx_mtop_t *mtop,
7136 t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
7138 gmx_domdec_comm_t *comm;
7142 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
7146 if (comm->bBondComm)
7148 /* Communicate atoms beyond the cut-off for bonded interactions */
7151 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
7153 comm->bLocalCG = init_bLocalCG(mtop);
7157 /* Only communicate atoms based on cut-off */
7158 comm->cglink = NULL;
7159 comm->bLocalCG = NULL;
7163 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
7165 gmx_bool bDynLoadBal, real dlb_scale,
7168 gmx_domdec_comm_t *comm;
7183 fprintf(fplog, "The maximum number of communication pulses is:");
7184 for (d = 0; d < dd->ndim; d++)
7186 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
7188 fprintf(fplog, "\n");
7189 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
7190 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
7191 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
7192 for (d = 0; d < DIM; d++)
7196 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
7203 comm->cellsize_min_dlb[d]/
7204 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
7206 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
7209 fprintf(fplog, "\n");
7213 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
7214 fprintf(fplog, "The initial number of communication pulses is:");
7215 for (d = 0; d < dd->ndim; d++)
7217 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
7219 fprintf(fplog, "\n");
7220 fprintf(fplog, "The initial domain decomposition cell size is:");
7221 for (d = 0; d < DIM; d++)
7225 fprintf(fplog, " %c %.2f nm",
7226 dim2char(d), dd->comm->cellsize_min[d]);
7229 fprintf(fplog, "\n\n");
7232 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
7234 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
7235 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7236 "non-bonded interactions", "", comm->cutoff);
7240 limit = dd->comm->cellsize_limit;
7244 if (dynamic_dd_box(ddbox, ir))
7246 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
7248 limit = dd->comm->cellsize_min[XX];
7249 for (d = 1; d < DIM; d++)
7251 limit = min(limit, dd->comm->cellsize_min[d]);
7255 if (comm->bInterCGBondeds)
7257 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7258 "two-body bonded interactions", "(-rdd)",
7259 max(comm->cutoff, comm->cutoff_mbody));
7260 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7261 "multi-body bonded interactions", "(-rdd)",
7262 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit));
7266 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7267 "virtual site constructions", "(-rcon)", limit);
7269 if (dd->constraint_comm)
7271 sprintf(buf, "atoms separated by up to %d constraints",
7273 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7274 buf, "(-rcon)", limit);
7276 fprintf(fplog, "\n");
7282 static void set_cell_limits_dlb(gmx_domdec_t *dd,
7284 const t_inputrec *ir,
7285 const gmx_ddbox_t *ddbox)
7287 gmx_domdec_comm_t *comm;
7288 int d, dim, npulse, npulse_d_max, npulse_d;
7293 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7295 /* Determine the maximum number of comm. pulses in one dimension */
7297 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7299 /* Determine the maximum required number of grid pulses */
7300 if (comm->cellsize_limit >= comm->cutoff)
7302 /* Only a single pulse is required */
7305 else if (!bNoCutOff && comm->cellsize_limit > 0)
7307 /* We round down slightly here to avoid overhead due to the latency
7308 * of extra communication calls when the cut-off
7309 * would be only slightly longer than the cell size.
7310 * Later cellsize_limit is redetermined,
7311 * so we can not miss interactions due to this rounding.
7313 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7317 /* There is no cell size limit */
7318 npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7321 if (!bNoCutOff && npulse > 1)
7323 /* See if we can do with less pulses, based on dlb_scale */
7325 for (d = 0; d < dd->ndim; d++)
7328 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7329 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7330 npulse_d_max = max(npulse_d_max, npulse_d);
7332 npulse = min(npulse, npulse_d_max);
7335 /* This env var can override npulse */
7336 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
7343 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7344 for (d = 0; d < dd->ndim; d++)
7346 comm->cd[d].np_dlb = min(npulse, dd->nc[dd->dim[d]]-1);
7347 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7348 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7349 comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb);
7350 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7352 comm->bVacDLBNoLimit = FALSE;
7356 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7357 if (!comm->bVacDLBNoLimit)
7359 comm->cellsize_limit = max(comm->cellsize_limit,
7360 comm->cutoff/comm->maxpulse);
7362 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7363 /* Set the minimum cell size for each DD dimension */
7364 for (d = 0; d < dd->ndim; d++)
7366 if (comm->bVacDLBNoLimit ||
7367 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7369 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7373 comm->cellsize_min_dlb[dd->dim[d]] =
7374 comm->cutoff/comm->cd[d].np_dlb;
7377 if (comm->cutoff_mbody <= 0)
7379 comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit);
7381 if (comm->bDynLoadBal)
7387 gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd, int ePBC)
7389 /* If each molecule is a single charge group
7390 * or we use domain decomposition for each periodic dimension,
7391 * we do not need to take pbc into account for the bonded interactions.
7393 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7396 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7399 void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7400 t_inputrec *ir, gmx_ddbox_t *ddbox)
7402 gmx_domdec_comm_t *comm;
7408 /* Initialize the thread data.
7409 * This can not be done in init_domain_decomposition,
7410 * as the numbers of threads is determined later.
7412 comm->nth = gmx_omp_nthreads_get(emntDomdec);
7415 snew(comm->dth, comm->nth);
7418 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
7420 init_ddpme(dd, &comm->ddpme[0], 0);
7421 if (comm->npmedecompdim >= 2)
7423 init_ddpme(dd, &comm->ddpme[1], 1);
7428 comm->npmenodes = 0;
7429 if (dd->pme_nodeid >= 0)
7431 gmx_fatal_collective(FARGS, NULL, dd,
7432 "Can not have separate PME ranks without PME electrostatics");
7438 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7440 if (comm->eDLB != edlbNO)
7442 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7445 print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
7446 if (comm->eDLB == edlbAUTO)
7450 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7452 print_dd_settings(fplog, dd, ir, TRUE, dlb_scale, ddbox);
7455 if (ir->ePBC == epbcNONE)
7457 vol_frac = 1 - 1/(double)dd->nnodes;
7462 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7466 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7468 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7470 dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
7473 static gmx_bool test_dd_cutoff(t_commrec *cr,
7474 t_state *state, t_inputrec *ir,
7485 set_ddbox(dd, FALSE, cr, ir, state->box,
7486 TRUE, &dd->comm->cgs_gl, state->x, &ddbox);
7490 for (d = 0; d < dd->ndim; d++)
7494 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7495 if (dynamic_dd_box(&ddbox, ir))
7497 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7500 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7502 if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
7503 dd->comm->cd[d].np_dlb > 0)
7505 if (np > dd->comm->cd[d].np_dlb)
7510 /* If a current local cell size is smaller than the requested
7511 * cut-off, we could still fix it, but this gets very complicated.
7512 * Without fixing here, we might actually need more checks.
7514 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7521 if (dd->comm->eDLB != edlbNO)
7523 /* If DLB is not active yet, we don't need to check the grid jumps.
7524 * Actually we shouldn't, because then the grid jump data is not set.
7526 if (dd->comm->bDynLoadBal &&
7527 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7532 gmx_sumi(1, &LocallyLimited, cr);
7534 if (LocallyLimited > 0)
7543 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, t_inputrec *ir,
7546 gmx_bool bCutoffAllowed;
7548 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7552 cr->dd->comm->cutoff = cutoff_req;
7555 return bCutoffAllowed;
7558 void change_dd_dlb_cutoff_limit(t_commrec *cr)
7560 gmx_domdec_comm_t *comm;
7562 comm = cr->dd->comm;
7564 /* Turn on the DLB limiting (might have been on already) */
7565 comm->bPMELoadBalDLBLimits = TRUE;
7567 /* Change the cut-off limit */
7568 comm->PMELoadBal_max_cutoff = comm->cutoff;
7571 static void merge_cg_buffers(int ncell,
7572 gmx_domdec_comm_dim_t *cd, int pulse,
7574 int *index_gl, int *recv_i,
7575 rvec *cg_cm, rvec *recv_vr,
7577 cginfo_mb_t *cginfo_mb, int *cginfo)
7579 gmx_domdec_ind_t *ind, *ind_p;
7580 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7581 int shift, shift_at;
7583 ind = &cd->ind[pulse];
7585 /* First correct the already stored data */
7586 shift = ind->nrecv[ncell];
7587 for (cell = ncell-1; cell >= 0; cell--)
7589 shift -= ind->nrecv[cell];
7592 /* Move the cg's present from previous grid pulses */
7593 cg0 = ncg_cell[ncell+cell];
7594 cg1 = ncg_cell[ncell+cell+1];
7595 cgindex[cg1+shift] = cgindex[cg1];
7596 for (cg = cg1-1; cg >= cg0; cg--)
7598 index_gl[cg+shift] = index_gl[cg];
7599 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7600 cgindex[cg+shift] = cgindex[cg];
7601 cginfo[cg+shift] = cginfo[cg];
7603 /* Correct the already stored send indices for the shift */
7604 for (p = 1; p <= pulse; p++)
7606 ind_p = &cd->ind[p];
7608 for (c = 0; c < cell; c++)
7610 cg0 += ind_p->nsend[c];
7612 cg1 = cg0 + ind_p->nsend[cell];
7613 for (cg = cg0; cg < cg1; cg++)
7615 ind_p->index[cg] += shift;
7621 /* Merge in the communicated buffers */
7625 for (cell = 0; cell < ncell; cell++)
7627 cg1 = ncg_cell[ncell+cell+1] + shift;
7630 /* Correct the old cg indices */
7631 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7633 cgindex[cg+1] += shift_at;
7636 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7638 /* Copy this charge group from the buffer */
7639 index_gl[cg1] = recv_i[cg0];
7640 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7641 /* Add it to the cgindex */
7642 cg_gl = index_gl[cg1];
7643 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7644 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7645 cgindex[cg1+1] = cgindex[cg1] + nat;
7650 shift += ind->nrecv[cell];
7651 ncg_cell[ncell+cell+1] = cg1;
7655 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7656 int nzone, int cg0, const int *cgindex)
7660 /* Store the atom block boundaries for easy copying of communication buffers
7663 for (zone = 0; zone < nzone; zone++)
7665 for (p = 0; p < cd->np; p++)
7667 cd->ind[p].cell2at0[zone] = cgindex[cg];
7668 cg += cd->ind[p].nrecv[zone];
7669 cd->ind[p].cell2at1[zone] = cgindex[cg];
7674 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7680 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7682 if (!bLocalCG[link->a[i]])
7691 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7693 real c[DIM][4]; /* the corners for the non-bonded communication */
7694 real cr0; /* corner for rounding */
7695 real cr1[4]; /* corners for rounding */
7696 real bc[DIM]; /* corners for bounded communication */
7697 real bcr1; /* corner for rounding for bonded communication */
7700 /* Determine the corners of the domain(s) we are communicating with */
7702 set_dd_corners(const gmx_domdec_t *dd,
7703 int dim0, int dim1, int dim2,
7707 const gmx_domdec_comm_t *comm;
7708 const gmx_domdec_zones_t *zones;
7713 zones = &comm->zones;
7715 /* Keep the compiler happy */
7719 /* The first dimension is equal for all cells */
7720 c->c[0][0] = comm->cell_x0[dim0];
7723 c->bc[0] = c->c[0][0];
7728 /* This cell row is only seen from the first row */
7729 c->c[1][0] = comm->cell_x0[dim1];
7730 /* All rows can see this row */
7731 c->c[1][1] = comm->cell_x0[dim1];
7734 c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7737 /* For the multi-body distance we need the maximum */
7738 c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7741 /* Set the upper-right corner for rounding */
7742 c->cr0 = comm->cell_x1[dim0];
7747 for (j = 0; j < 4; j++)
7749 c->c[2][j] = comm->cell_x0[dim2];
7753 /* Use the maximum of the i-cells that see a j-cell */
7754 for (i = 0; i < zones->nizone; i++)
7756 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7762 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7768 /* For the multi-body distance we need the maximum */
7769 c->bc[2] = comm->cell_x0[dim2];
7770 for (i = 0; i < 2; i++)
7772 for (j = 0; j < 2; j++)
7774 c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0);
7780 /* Set the upper-right corner for rounding */
7781 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7782 * Only cell (0,0,0) can see cell 7 (1,1,1)
7784 c->cr1[0] = comm->cell_x1[dim1];
7785 c->cr1[3] = comm->cell_x1[dim1];
7788 c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7791 /* For the multi-body distance we need the maximum */
7792 c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7799 /* Determine which cg's we need to send in this pulse from this zone */
7801 get_zone_pulse_cgs(gmx_domdec_t *dd,
7802 int zonei, int zone,
7804 const int *index_gl,
7806 int dim, int dim_ind,
7807 int dim0, int dim1, int dim2,
7808 real r_comm2, real r_bcomm2,
7812 real skew_fac2_d, real skew_fac_01,
7813 rvec *v_d, rvec *v_0, rvec *v_1,
7814 const dd_corners_t *c,
7816 gmx_bool bDistBonded,
7822 gmx_domdec_ind_t *ind,
7823 int **ibuf, int *ibuf_nalloc,
7829 gmx_domdec_comm_t *comm;
7831 gmx_bool bDistMB_pulse;
7833 real r2, rb2, r, tric_sh;
7836 int nsend_z, nsend, nat;
7840 bScrew = (dd->bScrewPBC && dim == XX);
7842 bDistMB_pulse = (bDistMB && bDistBonded);
7848 for (cg = cg0; cg < cg1; cg++)
7852 if (tric_dist[dim_ind] == 0)
7854 /* Rectangular direction, easy */
7855 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7862 r = cg_cm[cg][dim] - c->bc[dim_ind];
7868 /* Rounding gives at most a 16% reduction
7869 * in communicated atoms
7871 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7873 r = cg_cm[cg][dim0] - c->cr0;
7874 /* This is the first dimension, so always r >= 0 */
7881 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7883 r = cg_cm[cg][dim1] - c->cr1[zone];
7890 r = cg_cm[cg][dim1] - c->bcr1;
7900 /* Triclinic direction, more complicated */
7903 /* Rounding, conservative as the skew_fac multiplication
7904 * will slightly underestimate the distance.
7906 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7908 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7909 for (i = dim0+1; i < DIM; i++)
7911 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7913 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7916 rb[dim0] = rn[dim0];
7919 /* Take care that the cell planes along dim0 might not
7920 * be orthogonal to those along dim1 and dim2.
7922 for (i = 1; i <= dim_ind; i++)
7925 if (normal[dim0][dimd] > 0)
7927 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7930 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7935 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7937 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7939 for (i = dim1+1; i < DIM; i++)
7941 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7943 rn[dim1] += tric_sh;
7946 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7947 /* Take care of coupling of the distances
7948 * to the planes along dim0 and dim1 through dim2.
7950 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7951 /* Take care that the cell planes along dim1
7952 * might not be orthogonal to that along dim2.
7954 if (normal[dim1][dim2] > 0)
7956 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7962 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7965 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7966 /* Take care of coupling of the distances
7967 * to the planes along dim0 and dim1 through dim2.
7969 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7970 /* Take care that the cell planes along dim1
7971 * might not be orthogonal to that along dim2.
7973 if (normal[dim1][dim2] > 0)
7975 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7980 /* The distance along the communication direction */
7981 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
7983 for (i = dim+1; i < DIM; i++)
7985 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7990 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7991 /* Take care of coupling of the distances
7992 * to the planes along dim0 and dim1 through dim2.
7994 if (dim_ind == 1 && zonei == 1)
7996 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
8002 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
8005 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
8006 /* Take care of coupling of the distances
8007 * to the planes along dim0 and dim1 through dim2.
8009 if (dim_ind == 1 && zonei == 1)
8011 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
8019 ((bDistMB && rb2 < r_bcomm2) ||
8020 (bDist2B && r2 < r_bcomm2)) &&
8022 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
8023 missing_link(comm->cglink, index_gl[cg],
8026 /* Make an index to the local charge groups */
8027 if (nsend+1 > ind->nalloc)
8029 ind->nalloc = over_alloc_large(nsend+1);
8030 srenew(ind->index, ind->nalloc);
8032 if (nsend+1 > *ibuf_nalloc)
8034 *ibuf_nalloc = over_alloc_large(nsend+1);
8035 srenew(*ibuf, *ibuf_nalloc);
8037 ind->index[nsend] = cg;
8038 (*ibuf)[nsend] = index_gl[cg];
8040 vec_rvec_check_alloc(vbuf, nsend+1);
8042 if (dd->ci[dim] == 0)
8044 /* Correct cg_cm for pbc */
8045 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
8048 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
8049 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
8054 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
8057 nat += cgindex[cg+1] - cgindex[cg];
8063 *nsend_z_ptr = nsend_z;
8066 static void setup_dd_communication(gmx_domdec_t *dd,
8067 matrix box, gmx_ddbox_t *ddbox,
8068 t_forcerec *fr, t_state *state, rvec **f)
8070 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
8071 int nzone, nzone_send, zone, zonei, cg0, cg1;
8072 int c, i, j, cg, cg_gl, nrcg;
8073 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
8074 gmx_domdec_comm_t *comm;
8075 gmx_domdec_zones_t *zones;
8076 gmx_domdec_comm_dim_t *cd;
8077 gmx_domdec_ind_t *ind;
8078 cginfo_mb_t *cginfo_mb;
8079 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
8080 real r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
8081 dd_corners_t corners;
8083 rvec *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
8084 real skew_fac2_d, skew_fac_01;
8091 fprintf(debug, "Setting up DD communication\n");
8096 switch (fr->cutoff_scheme)
8105 gmx_incons("unimplemented");
8109 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8111 dim = dd->dim[dim_ind];
8113 /* Check if we need to use triclinic distances */
8114 tric_dist[dim_ind] = 0;
8115 for (i = 0; i <= dim_ind; i++)
8117 if (ddbox->tric_dir[dd->dim[i]])
8119 tric_dist[dim_ind] = 1;
8124 bBondComm = comm->bBondComm;
8126 /* Do we need to determine extra distances for multi-body bondeds? */
8127 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8129 /* Do we need to determine extra distances for only two-body bondeds? */
8130 bDist2B = (bBondComm && !bDistMB);
8132 r_comm2 = sqr(comm->cutoff);
8133 r_bcomm2 = sqr(comm->cutoff_mbody);
8137 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, sqrt(r_bcomm2));
8140 zones = &comm->zones;
8143 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8144 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8146 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8148 /* Triclinic stuff */
8149 normal = ddbox->normal;
8153 v_0 = ddbox->v[dim0];
8154 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8156 /* Determine the coupling coefficient for the distances
8157 * to the cell planes along dim0 and dim1 through dim2.
8158 * This is required for correct rounding.
8161 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8164 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8170 v_1 = ddbox->v[dim1];
8173 zone_cg_range = zones->cg_range;
8174 index_gl = dd->index_gl;
8175 cgindex = dd->cgindex;
8176 cginfo_mb = fr->cginfo_mb;
8178 zone_cg_range[0] = 0;
8179 zone_cg_range[1] = dd->ncg_home;
8180 comm->zone_ncg1[0] = dd->ncg_home;
8181 pos_cg = dd->ncg_home;
8183 nat_tot = dd->nat_home;
8185 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8187 dim = dd->dim[dim_ind];
8188 cd = &comm->cd[dim_ind];
8190 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8192 /* No pbc in this dimension, the first node should not comm. */
8200 v_d = ddbox->v[dim];
8201 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
8203 cd->bInPlace = TRUE;
8204 for (p = 0; p < cd->np; p++)
8206 /* Only atoms communicated in the first pulse are used
8207 * for multi-body bonded interactions or for bBondComm.
8209 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8214 for (zone = 0; zone < nzone_send; zone++)
8216 if (tric_dist[dim_ind] && dim_ind > 0)
8218 /* Determine slightly more optimized skew_fac's
8220 * This reduces the number of communicated atoms
8221 * by about 10% for 3D DD of rhombic dodecahedra.
8223 for (dimd = 0; dimd < dim; dimd++)
8225 sf2_round[dimd] = 1;
8226 if (ddbox->tric_dir[dimd])
8228 for (i = dd->dim[dimd]+1; i < DIM; i++)
8230 /* If we are shifted in dimension i
8231 * and the cell plane is tilted forward
8232 * in dimension i, skip this coupling.
8234 if (!(zones->shift[nzone+zone][i] &&
8235 ddbox->v[dimd][i][dimd] >= 0))
8238 sqr(ddbox->v[dimd][i][dimd]);
8241 sf2_round[dimd] = 1/sf2_round[dimd];
8246 zonei = zone_perm[dim_ind][zone];
8249 /* Here we permutate the zones to obtain a convenient order
8250 * for neighbor searching
8252 cg0 = zone_cg_range[zonei];
8253 cg1 = zone_cg_range[zonei+1];
8257 /* Look only at the cg's received in the previous grid pulse
8259 cg1 = zone_cg_range[nzone+zone+1];
8260 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8263 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8264 for (th = 0; th < comm->nth; th++)
8266 gmx_domdec_ind_t *ind_p;
8267 int **ibuf_p, *ibuf_nalloc_p;
8269 int *nsend_p, *nat_p;
8275 /* Thread 0 writes in the comm buffers */
8277 ibuf_p = &comm->buf_int;
8278 ibuf_nalloc_p = &comm->nalloc_int;
8279 vbuf_p = &comm->vbuf;
8282 nsend_zone_p = &ind->nsend[zone];
8286 /* Other threads write into temp buffers */
8287 ind_p = &comm->dth[th].ind;
8288 ibuf_p = &comm->dth[th].ibuf;
8289 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8290 vbuf_p = &comm->dth[th].vbuf;
8291 nsend_p = &comm->dth[th].nsend;
8292 nat_p = &comm->dth[th].nat;
8293 nsend_zone_p = &comm->dth[th].nsend_zone;
8295 comm->dth[th].nsend = 0;
8296 comm->dth[th].nat = 0;
8297 comm->dth[th].nsend_zone = 0;
8307 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8308 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8311 /* Get the cg's for this pulse in this zone */
8312 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8314 dim, dim_ind, dim0, dim1, dim2,
8317 normal, skew_fac2_d, skew_fac_01,
8318 v_d, v_0, v_1, &corners, sf2_round,
8319 bDistBonded, bBondComm,
8323 ibuf_p, ibuf_nalloc_p,
8329 /* Append data of threads>=1 to the communication buffers */
8330 for (th = 1; th < comm->nth; th++)
8332 dd_comm_setup_work_t *dth;
8335 dth = &comm->dth[th];
8337 ns1 = nsend + dth->nsend_zone;
8338 if (ns1 > ind->nalloc)
8340 ind->nalloc = over_alloc_dd(ns1);
8341 srenew(ind->index, ind->nalloc);
8343 if (ns1 > comm->nalloc_int)
8345 comm->nalloc_int = over_alloc_dd(ns1);
8346 srenew(comm->buf_int, comm->nalloc_int);
8348 if (ns1 > comm->vbuf.nalloc)
8350 comm->vbuf.nalloc = over_alloc_dd(ns1);
8351 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8354 for (i = 0; i < dth->nsend_zone; i++)
8356 ind->index[nsend] = dth->ind.index[i];
8357 comm->buf_int[nsend] = dth->ibuf[i];
8358 copy_rvec(dth->vbuf.v[i],
8359 comm->vbuf.v[nsend]);
8363 ind->nsend[zone] += dth->nsend_zone;
8366 /* Clear the counts in case we do not have pbc */
8367 for (zone = nzone_send; zone < nzone; zone++)
8369 ind->nsend[zone] = 0;
8371 ind->nsend[nzone] = nsend;
8372 ind->nsend[nzone+1] = nat;
8373 /* Communicate the number of cg's and atoms to receive */
8374 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8375 ind->nsend, nzone+2,
8376 ind->nrecv, nzone+2);
8378 /* The rvec buffer is also required for atom buffers of size nsend
8379 * in dd_move_x and dd_move_f.
8381 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8385 /* We can receive in place if only the last zone is not empty */
8386 for (zone = 0; zone < nzone-1; zone++)
8388 if (ind->nrecv[zone] > 0)
8390 cd->bInPlace = FALSE;
8395 /* The int buffer is only required here for the cg indices */
8396 if (ind->nrecv[nzone] > comm->nalloc_int2)
8398 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8399 srenew(comm->buf_int2, comm->nalloc_int2);
8401 /* The rvec buffer is also required for atom buffers
8402 * of size nrecv in dd_move_x and dd_move_f.
8404 i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8405 vec_rvec_check_alloc(&comm->vbuf2, i);
8409 /* Make space for the global cg indices */
8410 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8411 || dd->cg_nalloc == 0)
8413 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8414 srenew(index_gl, dd->cg_nalloc);
8415 srenew(cgindex, dd->cg_nalloc+1);
8417 /* Communicate the global cg indices */
8420 recv_i = index_gl + pos_cg;
8424 recv_i = comm->buf_int2;
8426 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8427 comm->buf_int, nsend,
8428 recv_i, ind->nrecv[nzone]);
8430 /* Make space for cg_cm */
8431 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8432 if (fr->cutoff_scheme == ecutsGROUP)
8440 /* Communicate cg_cm */
8443 recv_vr = cg_cm + pos_cg;
8447 recv_vr = comm->vbuf2.v;
8449 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8450 comm->vbuf.v, nsend,
8451 recv_vr, ind->nrecv[nzone]);
8453 /* Make the charge group index */
8456 zone = (p == 0 ? 0 : nzone - 1);
8457 while (zone < nzone)
8459 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8461 cg_gl = index_gl[pos_cg];
8462 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8463 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8464 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8467 /* Update the charge group presence,
8468 * so we can use it in the next pass of the loop.
8470 comm->bLocalCG[cg_gl] = TRUE;
8476 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8479 zone_cg_range[nzone+zone] = pos_cg;
8484 /* This part of the code is never executed with bBondComm. */
8485 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8486 index_gl, recv_i, cg_cm, recv_vr,
8487 cgindex, fr->cginfo_mb, fr->cginfo);
8488 pos_cg += ind->nrecv[nzone];
8490 nat_tot += ind->nrecv[nzone+1];
8494 /* Store the atom block for easy copying of communication buffers */
8495 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8499 dd->index_gl = index_gl;
8500 dd->cgindex = cgindex;
8502 dd->ncg_tot = zone_cg_range[zones->n];
8503 dd->nat_tot = nat_tot;
8504 comm->nat[ddnatHOME] = dd->nat_home;
8505 for (i = ddnatZONE; i < ddnatNR; i++)
8507 comm->nat[i] = dd->nat_tot;
8512 /* We don't need to update cginfo, since that was alrady done above.
8513 * So we pass NULL for the forcerec.
8515 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8516 NULL, comm->bLocalCG);
8521 fprintf(debug, "Finished setting up DD communication, zones:");
8522 for (c = 0; c < zones->n; c++)
8524 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8526 fprintf(debug, "\n");
8530 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8534 for (c = 0; c < zones->nizone; c++)
8536 zones->izone[c].cg1 = zones->cg_range[c+1];
8537 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8538 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8542 static void set_zones_size(gmx_domdec_t *dd,
8543 matrix box, const gmx_ddbox_t *ddbox,
8544 int zone_start, int zone_end)
8546 gmx_domdec_comm_t *comm;
8547 gmx_domdec_zones_t *zones;
8549 int z, zi, zj0, zj1, d, dim;
8552 real size_j, add_tric;
8557 zones = &comm->zones;
8559 /* Do we need to determine extra distances for multi-body bondeds? */
8560 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8562 for (z = zone_start; z < zone_end; z++)
8564 /* Copy cell limits to zone limits.
8565 * Valid for non-DD dims and non-shifted dims.
8567 copy_rvec(comm->cell_x0, zones->size[z].x0);
8568 copy_rvec(comm->cell_x1, zones->size[z].x1);
8571 for (d = 0; d < dd->ndim; d++)
8575 for (z = 0; z < zones->n; z++)
8577 /* With a staggered grid we have different sizes
8578 * for non-shifted dimensions.
8580 if (dd->bGridJump && zones->shift[z][dim] == 0)
8584 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8585 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8589 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8590 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8596 rcmbs = comm->cutoff_mbody;
8597 if (ddbox->tric_dir[dim])
8599 rcs /= ddbox->skew_fac[dim];
8600 rcmbs /= ddbox->skew_fac[dim];
8603 /* Set the lower limit for the shifted zone dimensions */
8604 for (z = zone_start; z < zone_end; z++)
8606 if (zones->shift[z][dim] > 0)
8609 if (!dd->bGridJump || d == 0)
8611 zones->size[z].x0[dim] = comm->cell_x1[dim];
8612 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8616 /* Here we take the lower limit of the zone from
8617 * the lowest domain of the zone below.
8621 zones->size[z].x0[dim] =
8622 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8628 zones->size[z].x0[dim] =
8629 zones->size[zone_perm[2][z-4]].x0[dim];
8633 zones->size[z].x0[dim] =
8634 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8637 /* A temporary limit, is updated below */
8638 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8642 for (zi = 0; zi < zones->nizone; zi++)
8644 if (zones->shift[zi][dim] == 0)
8646 /* This takes the whole zone into account.
8647 * With multiple pulses this will lead
8648 * to a larger zone then strictly necessary.
8650 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8651 zones->size[zi].x1[dim]+rcmbs);
8659 /* Loop over the i-zones to set the upper limit of each
8662 for (zi = 0; zi < zones->nizone; zi++)
8664 if (zones->shift[zi][dim] == 0)
8666 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8668 if (zones->shift[z][dim] > 0)
8670 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8671 zones->size[zi].x1[dim]+rcs);
8678 for (z = zone_start; z < zone_end; z++)
8680 /* Initialization only required to keep the compiler happy */
8681 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8684 /* To determine the bounding box for a zone we need to find
8685 * the extreme corners of 4, 2 or 1 corners.
8687 nc = 1 << (ddbox->npbcdim - 1);
8689 for (c = 0; c < nc; c++)
8691 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8695 corner[YY] = zones->size[z].x0[YY];
8699 corner[YY] = zones->size[z].x1[YY];
8703 corner[ZZ] = zones->size[z].x0[ZZ];
8707 corner[ZZ] = zones->size[z].x1[ZZ];
8709 if (dd->ndim == 1 && box[ZZ][YY] != 0)
8711 /* With 1D domain decomposition the cg's are not in
8712 * the triclinic box, but triclinic x-y and rectangular y-z.
8713 * Shift y back, so it will later end up at 0.
8715 corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
8717 /* Apply the triclinic couplings */
8718 assert(ddbox->npbcdim <= DIM);
8719 for (i = YY; i < ddbox->npbcdim; i++)
8721 for (j = XX; j < i; j++)
8723 corner[j] += corner[i]*box[i][j]/box[i][i];
8728 copy_rvec(corner, corner_min);
8729 copy_rvec(corner, corner_max);
8733 for (i = 0; i < DIM; i++)
8735 corner_min[i] = min(corner_min[i], corner[i]);
8736 corner_max[i] = max(corner_max[i], corner[i]);
8740 /* Copy the extreme cornes without offset along x */
8741 for (i = 0; i < DIM; i++)
8743 zones->size[z].bb_x0[i] = corner_min[i];
8744 zones->size[z].bb_x1[i] = corner_max[i];
8746 /* Add the offset along x */
8747 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8748 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8751 if (zone_start == 0)
8754 for (dim = 0; dim < DIM; dim++)
8756 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8758 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
8763 for (z = zone_start; z < zone_end; z++)
8765 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8767 zones->size[z].x0[XX], zones->size[z].x1[XX],
8768 zones->size[z].x0[YY], zones->size[z].x1[YY],
8769 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8770 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8772 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8773 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8774 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8779 static int comp_cgsort(const void *a, const void *b)
8783 gmx_cgsort_t *cga, *cgb;
8784 cga = (gmx_cgsort_t *)a;
8785 cgb = (gmx_cgsort_t *)b;
8787 comp = cga->nsc - cgb->nsc;
8790 comp = cga->ind_gl - cgb->ind_gl;
8796 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8801 /* Order the data */
8802 for (i = 0; i < n; i++)
8804 buf[i] = a[sort[i].ind];
8807 /* Copy back to the original array */
8808 for (i = 0; i < n; i++)
8814 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8819 /* Order the data */
8820 for (i = 0; i < n; i++)
8822 copy_rvec(v[sort[i].ind], buf[i]);
8825 /* Copy back to the original array */
8826 for (i = 0; i < n; i++)
8828 copy_rvec(buf[i], v[i]);
8832 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8835 int a, atot, cg, cg0, cg1, i;
8837 if (cgindex == NULL)
8839 /* Avoid the useless loop of the atoms within a cg */
8840 order_vec_cg(ncg, sort, v, buf);
8845 /* Order the data */
8847 for (cg = 0; cg < ncg; cg++)
8849 cg0 = cgindex[sort[cg].ind];
8850 cg1 = cgindex[sort[cg].ind+1];
8851 for (i = cg0; i < cg1; i++)
8853 copy_rvec(v[i], buf[a]);
8859 /* Copy back to the original array */
8860 for (a = 0; a < atot; a++)
8862 copy_rvec(buf[a], v[a]);
8866 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8867 int nsort_new, gmx_cgsort_t *sort_new,
8868 gmx_cgsort_t *sort1)
8872 /* The new indices are not very ordered, so we qsort them */
8873 gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8875 /* sort2 is already ordered, so now we can merge the two arrays */
8879 while (i2 < nsort2 || i_new < nsort_new)
8883 sort1[i1++] = sort_new[i_new++];
8885 else if (i_new == nsort_new)
8887 sort1[i1++] = sort2[i2++];
8889 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8890 (sort2[i2].nsc == sort_new[i_new].nsc &&
8891 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8893 sort1[i1++] = sort2[i2++];
8897 sort1[i1++] = sort_new[i_new++];
8902 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8904 gmx_domdec_sort_t *sort;
8905 gmx_cgsort_t *cgsort, *sort_i;
8906 int ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
8907 int sort_last, sort_skip;
8909 sort = dd->comm->sort;
8911 a = fr->ns.grid->cell_index;
8913 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns.grid->ncells;
8915 if (ncg_home_old >= 0)
8917 /* The charge groups that remained in the same ns grid cell
8918 * are completely ordered. So we can sort efficiently by sorting
8919 * the charge groups that did move into the stationary list.
8924 for (i = 0; i < dd->ncg_home; i++)
8926 /* Check if this cg did not move to another node */
8929 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8931 /* This cg is new on this node or moved ns grid cell */
8932 if (nsort_new >= sort->sort_new_nalloc)
8934 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8935 srenew(sort->sort_new, sort->sort_new_nalloc);
8937 sort_i = &(sort->sort_new[nsort_new++]);
8941 /* This cg did not move */
8942 sort_i = &(sort->sort2[nsort2++]);
8944 /* Sort on the ns grid cell indices
8945 * and the global topology index.
8946 * index_gl is irrelevant with cell ns,
8947 * but we set it here anyhow to avoid a conditional.
8950 sort_i->ind_gl = dd->index_gl[i];
8957 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8960 /* Sort efficiently */
8961 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8966 cgsort = sort->sort;
8968 for (i = 0; i < dd->ncg_home; i++)
8970 /* Sort on the ns grid cell indices
8971 * and the global topology index
8973 cgsort[i].nsc = a[i];
8974 cgsort[i].ind_gl = dd->index_gl[i];
8976 if (cgsort[i].nsc < moved)
8983 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
8985 /* Determine the order of the charge groups using qsort */
8986 gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
8992 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
8995 int ncg_new, i, *a, na;
8997 sort = dd->comm->sort->sort;
8999 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
9002 for (i = 0; i < na; i++)
9006 sort[ncg_new].ind = a[i];
9014 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
9017 gmx_domdec_sort_t *sort;
9018 gmx_cgsort_t *cgsort, *sort_i;
9020 int ncg_new, i, *ibuf, cgsize;
9023 sort = dd->comm->sort;
9025 if (dd->ncg_home > sort->sort_nalloc)
9027 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
9028 srenew(sort->sort, sort->sort_nalloc);
9029 srenew(sort->sort2, sort->sort_nalloc);
9031 cgsort = sort->sort;
9033 switch (fr->cutoff_scheme)
9036 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
9039 ncg_new = dd_sort_order_nbnxn(dd, fr);
9042 gmx_incons("unimplemented");
9046 /* We alloc with the old size, since cgindex is still old */
9047 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9048 vbuf = dd->comm->vbuf.v;
9052 cgindex = dd->cgindex;
9059 /* Remove the charge groups which are no longer at home here */
9060 dd->ncg_home = ncg_new;
9063 fprintf(debug, "Set the new home charge group count to %d\n",
9067 /* Reorder the state */
9068 for (i = 0; i < estNR; i++)
9070 if (EST_DISTR(i) && (state->flags & (1<<i)))
9075 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
9078 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
9081 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->sd_X, vbuf);
9084 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
9088 case estDISRE_INITF:
9089 case estDISRE_RM3TAV:
9090 case estORIRE_INITF:
9092 /* No ordering required */
9095 gmx_incons("Unknown state entry encountered in dd_sort_state");
9100 if (fr->cutoff_scheme == ecutsGROUP)
9103 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9106 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9108 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9109 srenew(sort->ibuf, sort->ibuf_nalloc);
9112 /* Reorder the global cg index */
9113 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9114 /* Reorder the cginfo */
9115 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9116 /* Rebuild the local cg index */
9120 for (i = 0; i < dd->ncg_home; i++)
9122 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9123 ibuf[i+1] = ibuf[i] + cgsize;
9125 for (i = 0; i < dd->ncg_home+1; i++)
9127 dd->cgindex[i] = ibuf[i];
9132 for (i = 0; i < dd->ncg_home+1; i++)
9137 /* Set the home atom number */
9138 dd->nat_home = dd->cgindex[dd->ncg_home];
9140 if (fr->cutoff_scheme == ecutsVERLET)
9142 /* The atoms are now exactly in grid order, update the grid order */
9143 nbnxn_set_atomorder(fr->nbv->nbs);
9147 /* Copy the sorted ns cell indices back to the ns grid struct */
9148 for (i = 0; i < dd->ncg_home; i++)
9150 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
9152 fr->ns.grid->nr = dd->ncg_home;
9156 static void add_dd_statistics(gmx_domdec_t *dd)
9158 gmx_domdec_comm_t *comm;
9163 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9165 comm->sum_nat[ddnat-ddnatZONE] +=
9166 comm->nat[ddnat] - comm->nat[ddnat-1];
9171 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9173 gmx_domdec_comm_t *comm;
9178 /* Reset all the statistics and counters for total run counting */
9179 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9181 comm->sum_nat[ddnat-ddnatZONE] = 0;
9185 comm->load_step = 0;
9188 clear_ivec(comm->load_lim);
9193 void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
9195 gmx_domdec_comm_t *comm;
9199 comm = cr->dd->comm;
9201 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9208 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9210 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9212 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9217 " av. #atoms communicated per step for force: %d x %.1f\n",
9221 if (cr->dd->vsite_comm)
9224 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9225 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9230 if (cr->dd->constraint_comm)
9233 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9234 1 + ir->nLincsIter, av);
9238 gmx_incons(" Unknown type for DD statistics");
9241 fprintf(fplog, "\n");
9243 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9245 print_dd_load_av(fplog, cr->dd);
9249 void dd_partition_system(FILE *fplog,
9252 gmx_bool bMasterState,
9254 t_state *state_global,
9255 gmx_mtop_t *top_global,
9257 t_state *state_local,
9260 gmx_localtop_t *top_local,
9263 gmx_shellfc_t shellfc,
9264 gmx_constr_t constr,
9266 gmx_wallcycle_t wcycle,
9270 gmx_domdec_comm_t *comm;
9271 gmx_ddbox_t ddbox = {0};
9273 gmx_int64_t step_pcoupl;
9274 rvec cell_ns_x0, cell_ns_x1;
9275 int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9276 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
9277 gmx_bool bRedist, bSortCG, bResortAll;
9278 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9285 bBoxChanged = (bMasterState || DEFORM(*ir));
9286 if (ir->epc != epcNO)
9288 /* With nstpcouple > 1 pressure coupling happens.
9289 * one step after calculating the pressure.
9290 * Box scaling happens at the end of the MD step,
9291 * after the DD partitioning.
9292 * We therefore have to do DLB in the first partitioning
9293 * after an MD step where P-coupling occured.
9294 * We need to determine the last step in which p-coupling occurred.
9295 * MRS -- need to validate this for vv?
9300 step_pcoupl = step - 1;
9304 step_pcoupl = ((step - 1)/n)*n + 1;
9306 if (step_pcoupl >= comm->partition_step)
9312 bNStGlobalComm = (step % nstglobalcomm == 0);
9314 if (!comm->bDynLoadBal)
9320 /* Should we do dynamic load balacing this step?
9321 * Since it requires (possibly expensive) global communication,
9322 * we might want to do DLB less frequently.
9324 if (bBoxChanged || ir->epc != epcNO)
9326 bDoDLB = bBoxChanged;
9330 bDoDLB = bNStGlobalComm;
9334 /* Check if we have recorded loads on the nodes */
9335 if (comm->bRecordLoad && dd_load_count(comm))
9337 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
9339 /* Check if we should use DLB at the second partitioning
9340 * and every 100 partitionings,
9341 * so the extra communication cost is negligible.
9343 n = max(100, nstglobalcomm);
9344 bCheckDLB = (comm->n_load_collect == 0 ||
9345 comm->n_load_have % n == n-1);
9352 /* Print load every nstlog, first and last step to the log file */
9353 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9354 comm->n_load_collect == 0 ||
9356 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9358 /* Avoid extra communication due to verbose screen output
9359 * when nstglobalcomm is set.
9361 if (bDoDLB || bLogLoad || bCheckDLB ||
9362 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9364 get_load_distribution(dd, wcycle);
9369 dd_print_load(fplog, dd, step-1);
9373 dd_print_load_verbose(dd);
9376 comm->n_load_collect++;
9380 /* Since the timings are node dependent, the master decides */
9384 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
9387 fprintf(debug, "step %s, imb loss %f\n",
9388 gmx_step_str(step, sbuf),
9389 dd_force_imb_perf_loss(dd));
9392 dd_bcast(dd, sizeof(bTurnOnDLB), &bTurnOnDLB);
9395 turn_on_dlb(fplog, cr, step);
9400 comm->n_load_have++;
9403 cgs_gl = &comm->cgs_gl;
9408 /* Clear the old state */
9409 clear_dd_indices(dd, 0, 0);
9412 set_ddbox(dd, bMasterState, cr, ir, state_global->box,
9413 TRUE, cgs_gl, state_global->x, &ddbox);
9415 get_cg_distribution(fplog, step, dd, cgs_gl,
9416 state_global->box, &ddbox, state_global->x);
9418 dd_distribute_state(dd, cgs_gl,
9419 state_global, state_local, f);
9421 dd_make_local_cgs(dd, &top_local->cgs);
9423 /* Ensure that we have space for the new distribution */
9424 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9426 if (fr->cutoff_scheme == ecutsGROUP)
9428 calc_cgcm(fplog, 0, dd->ncg_home,
9429 &top_local->cgs, state_local->x, fr->cg_cm);
9432 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9434 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9436 else if (state_local->ddp_count != dd->ddp_count)
9438 if (state_local->ddp_count > dd->ddp_count)
9440 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9443 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9445 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9448 /* Clear the old state */
9449 clear_dd_indices(dd, 0, 0);
9451 /* Build the new indices */
9452 rebuild_cgindex(dd, cgs_gl->index, state_local);
9453 make_dd_indices(dd, cgs_gl->index, 0);
9454 ncgindex_set = dd->ncg_home;
9456 if (fr->cutoff_scheme == ecutsGROUP)
9458 /* Redetermine the cg COMs */
9459 calc_cgcm(fplog, 0, dd->ncg_home,
9460 &top_local->cgs, state_local->x, fr->cg_cm);
9463 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9465 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9467 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9468 TRUE, &top_local->cgs, state_local->x, &ddbox);
9470 bRedist = comm->bDynLoadBal;
9474 /* We have the full state, only redistribute the cgs */
9476 /* Clear the non-home indices */
9477 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9480 /* Avoid global communication for dim's without pbc and -gcom */
9481 if (!bNStGlobalComm)
9483 copy_rvec(comm->box0, ddbox.box0 );
9484 copy_rvec(comm->box_size, ddbox.box_size);
9486 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9487 bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
9492 /* For dim's without pbc and -gcom */
9493 copy_rvec(ddbox.box0, comm->box0 );
9494 copy_rvec(ddbox.box_size, comm->box_size);
9496 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9499 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9501 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9504 /* Check if we should sort the charge groups */
9505 if (comm->nstSortCG > 0)
9507 bSortCG = (bMasterState ||
9508 (bRedist && (step % comm->nstSortCG == 0)));
9515 ncg_home_old = dd->ncg_home;
9520 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9522 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9524 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9526 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9529 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9531 &comm->cell_x0, &comm->cell_x1,
9532 dd->ncg_home, fr->cg_cm,
9533 cell_ns_x0, cell_ns_x1, &grid_density);
9537 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9540 switch (fr->cutoff_scheme)
9543 copy_ivec(fr->ns.grid->n, ncells_old);
9544 grid_first(fplog, fr->ns.grid, dd, &ddbox,
9545 state_local->box, cell_ns_x0, cell_ns_x1,
9546 fr->rlistlong, grid_density);
9549 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9552 gmx_incons("unimplemented");
9554 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9555 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9559 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9561 /* Sort the state on charge group position.
9562 * This enables exact restarts from this step.
9563 * It also improves performance by about 15% with larger numbers
9564 * of atoms per node.
9567 /* Fill the ns grid with the home cell,
9568 * so we can sort with the indices.
9570 set_zones_ncg_home(dd);
9572 switch (fr->cutoff_scheme)
9575 set_zones_size(dd, state_local->box, &ddbox, 0, 1);
9577 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9579 comm->zones.size[0].bb_x0,
9580 comm->zones.size[0].bb_x1,
9582 comm->zones.dens_zone0,
9585 ncg_moved, bRedist ? comm->moved : NULL,
9586 fr->nbv->grp[eintLocal].kernel_type,
9587 fr->nbv->grp[eintLocal].nbat);
9589 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9592 fill_grid(&comm->zones, fr->ns.grid, dd->ncg_home,
9593 0, dd->ncg_home, fr->cg_cm);
9595 copy_ivec(fr->ns.grid->n, ncells_new);
9598 gmx_incons("unimplemented");
9601 bResortAll = bMasterState;
9603 /* Check if we can user the old order and ns grid cell indices
9604 * of the charge groups to sort the charge groups efficiently.
9606 if (ncells_new[XX] != ncells_old[XX] ||
9607 ncells_new[YY] != ncells_old[YY] ||
9608 ncells_new[ZZ] != ncells_old[ZZ])
9615 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9616 gmx_step_str(step, sbuf), dd->ncg_home);
9618 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9619 bResortAll ? -1 : ncg_home_old);
9620 /* Rebuild all the indices */
9621 ga2la_clear(dd->ga2la);
9624 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9627 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9629 /* Setup up the communication and communicate the coordinates */
9630 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9632 /* Set the indices */
9633 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9635 /* Set the charge group boundaries for neighbor searching */
9636 set_cg_boundaries(&comm->zones);
9638 if (fr->cutoff_scheme == ecutsVERLET)
9640 set_zones_size(dd, state_local->box, &ddbox,
9641 bSortCG ? 1 : 0, comm->zones.n);
9644 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9647 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9648 -1,state_local->x,state_local->box);
9651 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9653 /* Extract a local topology from the global topology */
9654 for (i = 0; i < dd->ndim; i++)
9656 np[dd->dim[i]] = comm->cd[i].np;
9658 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9659 comm->cellsize_min, np,
9661 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
9662 vsite, top_global, top_local);
9664 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9666 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9668 /* Set up the special atom communication */
9669 n = comm->nat[ddnatZONE];
9670 for (i = ddnatZONE+1; i < ddnatNR; i++)
9675 if (vsite && vsite->n_intercg_vsite)
9677 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9681 if (dd->bInterCGcons || dd->bInterCGsettles)
9683 /* Only for inter-cg constraints we need special code */
9684 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9685 constr, ir->nProjOrder,
9686 top_local->idef.il);
9690 gmx_incons("Unknown special atom type setup");
9695 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9697 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9699 /* Make space for the extra coordinates for virtual site
9700 * or constraint communication.
9702 state_local->natoms = comm->nat[ddnatNR-1];
9703 if (state_local->natoms > state_local->nalloc)
9705 dd_realloc_state(state_local, f, state_local->natoms);
9708 if (fr->bF_NoVirSum)
9710 if (vsite && vsite->n_intercg_vsite)
9712 nat_f_novirsum = comm->nat[ddnatVSITE];
9716 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9718 nat_f_novirsum = dd->nat_tot;
9722 nat_f_novirsum = dd->nat_home;
9731 /* Set the number of atoms required for the force calculation.
9732 * Forces need to be constrained when using a twin-range setup
9733 * or with energy minimization. For simple simulations we could
9734 * avoid some allocation, zeroing and copying, but this is
9735 * probably not worth the complications ande checking.
9737 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9738 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9740 /* We make the all mdatoms up to nat_tot_con.
9741 * We could save some work by only setting invmass
9742 * between nat_tot and nat_tot_con.
9744 /* This call also sets the new number of home particles to dd->nat_home */
9745 atoms2md(top_global, ir,
9746 comm->nat[ddnatCON], dd->gatindex, dd->nat_home, mdatoms);
9748 /* Now we have the charges we can sort the FE interactions */
9749 dd_sort_local_top(dd, mdatoms, top_local);
9753 /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
9754 split_vsites_over_threads(top_local->idef.il, mdatoms, FALSE, vsite);
9759 /* Make the local shell stuff, currently no communication is done */
9760 make_local_shells(cr, mdatoms, shellfc);
9763 if (ir->implicit_solvent)
9765 make_local_gb(cr, fr->born, ir->gb_algorithm);
9768 setup_bonded_threading(fr, &top_local->idef);
9770 if (!(cr->duty & DUTY_PME))
9772 /* Send the charges and/or c6/sigmas to our PME only node */
9773 gmx_pme_send_parameters(cr, mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
9774 mdatoms->chargeA, mdatoms->chargeB,
9775 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
9776 mdatoms->sigmaA, mdatoms->sigmaB,
9777 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9782 set_constraints(constr, top_local, ir, mdatoms, cr);
9785 if (ir->ePull != epullNO)
9787 /* Update the local pull groups */
9788 dd_make_local_pull_groups(dd, ir->pull, mdatoms);
9793 /* Update the local rotation groups */
9794 dd_make_local_rotation_groups(dd, ir->rot);
9797 if (ir->eSwapCoords != eswapNO)
9799 /* Update the local groups needed for ion swapping */
9800 dd_make_local_swap_groups(dd, ir->swap);
9803 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
9804 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
9806 add_dd_statistics(dd);
9808 /* Make sure we only count the cycles for this DD partitioning */
9809 clear_dd_cycle_counts(dd);
9811 /* Because the order of the atoms might have changed since
9812 * the last vsite construction, we need to communicate the constructing
9813 * atom coordinates again (for spreading the forces this MD step).
9815 dd_move_x_vsites(dd, state_local->box, state_local->x);
9817 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9819 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9821 dd_move_x(dd, state_local->box, state_local->x);
9822 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9823 -1, state_local->x, state_local->box);
9826 /* Store the partitioning step */
9827 comm->partition_step = step;
9829 /* Increase the DD partitioning counter */
9831 /* The state currently matches this DD partitioning count, store it */
9832 state_local->ddp_count = dd->ddp_count;
9835 /* The DD master node knows the complete cg distribution,
9836 * store the count so we can possibly skip the cg info communication.
9838 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9841 if (comm->DD_debug > 0)
9843 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9844 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9845 "after partitioning");