2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
49 #include "gromacs/math/vec.h"
51 #include "domdec_network.h"
53 #include "chargegroup.h"
62 #include "gromacs/topology/mtop_util.h"
63 #include "gmx_ga2la.h"
65 #include "nbnxn_search.h"
67 #include "gmx_omp_nthreads.h"
68 #include "gpu_utils.h"
70 #include "gromacs/utility/futil.h"
71 #include "gromacs/fileio/gmxfio.h"
72 #include "gromacs/fileio/pdbio.h"
73 #include "gromacs/imd/imd.h"
74 #include "gromacs/pbcutil/ishift.h"
75 #include "gromacs/pbcutil/pbc.h"
76 #include "gromacs/pulling/pull.h"
77 #include "gromacs/pulling/pull_rotation.h"
78 #include "gromacs/swap/swapcoords.h"
79 #include "gromacs/timing/wallcycle.h"
80 #include "gromacs/utility/basenetwork.h"
81 #include "gromacs/utility/fatalerror.h"
82 #include "gromacs/utility/gmxmpi.h"
83 #include "gromacs/utility/qsort_threadsafe.h"
84 #include "gromacs/utility/smalloc.h"
86 #define DDRANK(dd, rank) (rank)
87 #define DDMASTERRANK(dd) (dd->masterrank)
89 typedef struct gmx_domdec_master
91 /* The cell boundaries */
93 /* The global charge group division */
94 int *ncg; /* Number of home charge groups for each node */
95 int *index; /* Index of nnodes+1 into cg */
96 int *cg; /* Global charge group index */
97 int *nat; /* Number of home atoms for each node. */
98 int *ibuf; /* Buffer for communication */
99 rvec *vbuf; /* Buffer for state scattering and gathering */
100 } gmx_domdec_master_t;
104 /* The numbers of charge groups to send and receive for each cell
105 * that requires communication, the last entry contains the total
106 * number of atoms that needs to be communicated.
108 int nsend[DD_MAXIZONE+2];
109 int nrecv[DD_MAXIZONE+2];
110 /* The charge groups to send */
113 /* The atom range for non-in-place communication */
114 int cell2at0[DD_MAXIZONE];
115 int cell2at1[DD_MAXIZONE];
120 int np; /* Number of grid pulses in this dimension */
121 int np_dlb; /* For dlb, for use with edlbAUTO */
122 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
124 gmx_bool bInPlace; /* Can we communicate in place? */
125 } gmx_domdec_comm_dim_t;
129 gmx_bool *bCellMin; /* Temp. var.: is this cell size at the limit */
130 real *cell_f; /* State var.: cell boundaries, box relative */
131 real *old_cell_f; /* Temp. var.: old cell size */
132 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
133 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
134 real *bound_min; /* Temp. var.: lower limit for cell boundary */
135 real *bound_max; /* Temp. var.: upper limit for cell boundary */
136 gmx_bool bLimited; /* State var.: is DLB limited in this dim and row */
137 real *buf_ncd; /* Temp. var. */
140 #define DD_NLOAD_MAX 9
142 /* Here floats are accurate enough, since these variables
143 * only influence the load balancing, not the actual MD results.
170 gmx_cgsort_t *sort_new;
182 /* This enum determines the order of the coordinates.
183 * ddnatHOME and ddnatZONE should be first and second,
184 * the others can be ordered as wanted.
187 ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR
191 edlbAUTO, edlbNO, edlbYES, edlbNR
193 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
197 int dim; /* The dimension */
198 gmx_bool dim_match; /* Tells if DD and PME dims match */
199 int nslab; /* The number of PME slabs in this dimension */
200 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
201 int *pp_min; /* The minimum pp node location, size nslab */
202 int *pp_max; /* The maximum pp node location,size nslab */
203 int maxshift; /* The maximum shift for coordinate redistribution in PME */
208 real min0; /* The minimum bottom of this zone */
209 real max1; /* The maximum top of this zone */
210 real min1; /* The minimum top of this zone */
211 real mch0; /* The maximum bottom communicaton height for this zone */
212 real mch1; /* The maximum top communicaton height for this zone */
213 real p1_0; /* The bottom value of the first cell in this zone */
214 real p1_1; /* The top value of the first cell in this zone */
219 gmx_domdec_ind_t ind;
226 } dd_comm_setup_work_t;
228 typedef struct gmx_domdec_comm
230 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
231 * unless stated otherwise.
234 /* The number of decomposition dimensions for PME, 0: no PME */
236 /* The number of nodes doing PME (PP/PME or only PME) */
240 /* The communication setup including the PME only nodes */
241 gmx_bool bCartesianPP_PME;
244 int *pmenodes; /* size npmenodes */
245 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
246 * but with bCartesianPP_PME */
247 gmx_ddpme_t ddpme[2];
249 /* The DD particle-particle nodes only */
250 gmx_bool bCartesianPP;
251 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
253 /* The global charge groups */
256 /* Should we sort the cgs */
258 gmx_domdec_sort_t *sort;
260 /* Are there charge groups? */
263 /* Are there bonded and multi-body interactions between charge groups? */
264 gmx_bool bInterCGBondeds;
265 gmx_bool bInterCGMultiBody;
267 /* Data for the optional bonded interaction atom communication range */
274 /* Are we actually using DLB? */
275 gmx_bool bDynLoadBal;
277 /* Cell sizes for static load balancing, first index cartesian */
280 /* The width of the communicated boundaries */
283 /* The minimum cell size (including triclinic correction) */
285 /* For dlb, for use with edlbAUTO */
286 rvec cellsize_min_dlb;
287 /* The lower limit for the DD cell size with DLB */
289 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
290 gmx_bool bVacDLBNoLimit;
292 /* With PME load balancing we set limits on DLB */
293 gmx_bool bPMELoadBalDLBLimits;
294 /* DLB needs to take into account that we want to allow this maximum
295 * cut-off (for PME load balancing), this could limit cell boundaries.
297 real PMELoadBal_max_cutoff;
299 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
301 /* box0 and box_size are required with dim's without pbc and -gcom */
305 /* The cell boundaries */
309 /* The old location of the cell boundaries, to check cg displacements */
313 /* The communication setup and charge group boundaries for the zones */
314 gmx_domdec_zones_t zones;
316 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
317 * cell boundaries of neighboring cells for dynamic load balancing.
319 gmx_ddzone_t zone_d1[2];
320 gmx_ddzone_t zone_d2[2][2];
322 /* The coordinate/force communication setup and indices */
323 gmx_domdec_comm_dim_t cd[DIM];
324 /* The maximum number of cells to communicate with in one dimension */
327 /* Which cg distribution is stored on the master node */
328 int master_cg_ddp_count;
330 /* The number of cg's received from the direct neighbors */
331 int zone_ncg1[DD_MAXZONE];
333 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
336 /* Array for signalling if atoms have moved to another domain */
340 /* Communication buffer for general use */
344 /* Communication buffer for general use */
347 /* Temporary storage for thread parallel communication setup */
349 dd_comm_setup_work_t *dth;
351 /* Communication buffers only used with multiple grid pulses */
356 /* Communication buffers for local redistribution */
358 int cggl_flag_nalloc[DIM*2];
360 int cgcm_state_nalloc[DIM*2];
362 /* Cell sizes for dynamic load balancing */
363 gmx_domdec_root_t **root;
367 real cell_f_max0[DIM];
368 real cell_f_min1[DIM];
370 /* Stuff for load communication */
371 gmx_bool bRecordLoad;
372 gmx_domdec_load_t *load;
373 int nrank_gpu_shared;
375 MPI_Comm *mpi_comm_load;
376 MPI_Comm mpi_comm_gpu_shared;
379 /* Maximum DLB scaling per load balancing step in percent */
383 float cycl[ddCyclNr];
384 int cycl_n[ddCyclNr];
385 float cycl_max[ddCyclNr];
386 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
390 /* Have often have did we have load measurements */
392 /* Have often have we collected the load measurements */
396 double sum_nat[ddnatNR-ddnatZONE];
406 /* The last partition step */
407 gmx_int64_t partition_step;
415 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
418 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
419 #define DD_FLAG_NRCG 65535
420 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
421 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
423 /* Zone permutation required to obtain consecutive charge groups
424 * for neighbor searching.
426 static const int zone_perm[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
428 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
429 * components see only j zones with that component 0.
432 /* The DD zone order */
433 static const ivec dd_zo[DD_MAXZONE] =
434 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
439 static const ivec dd_zp3[dd_zp3n] = {{0, 0, 8}, {1, 3, 6}, {2, 5, 6}, {3, 5, 7}};
444 static const ivec dd_zp2[dd_zp2n] = {{0, 0, 4}, {1, 3, 4}};
449 static const ivec dd_zp1[dd_zp1n] = {{0, 0, 2}};
451 /* Factors used to avoid problems due to rounding issues */
452 #define DD_CELL_MARGIN 1.0001
453 #define DD_CELL_MARGIN2 1.00005
454 /* Factor to account for pressure scaling during nstlist steps */
455 #define DD_PRES_SCALE_MARGIN 1.02
457 /* Turn on DLB when the load imbalance causes this amount of total loss.
458 * There is a bit of overhead with DLB and it's difficult to achieve
459 * a load imbalance of less than 2% with DLB.
461 #define DD_PERF_LOSS_DLB_ON 0.02
463 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
464 #define DD_PERF_LOSS_WARN 0.05
466 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
468 /* Use separate MPI send and receive commands
469 * when nnodes <= GMX_DD_NNODES_SENDRECV.
470 * This saves memory (and some copying for small nnodes).
471 * For high parallelization scatter and gather calls are used.
473 #define GMX_DD_NNODES_SENDRECV 4
477 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
479 static void index2xyz(ivec nc,int ind,ivec xyz)
481 xyz[XX] = ind % nc[XX];
482 xyz[YY] = (ind / nc[XX]) % nc[YY];
483 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
487 /* This order is required to minimize the coordinate communication in PME
488 * which uses decomposition in the x direction.
490 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
492 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
494 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
495 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
496 xyz[ZZ] = ind % nc[ZZ];
499 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
504 ddindex = dd_index(dd->nc, c);
505 if (dd->comm->bCartesianPP_PME)
507 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
509 else if (dd->comm->bCartesianPP)
512 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
523 static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox, t_inputrec *ir)
525 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
528 int ddglatnr(gmx_domdec_t *dd, int i)
538 if (i >= dd->comm->nat[ddnatNR-1])
540 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
542 atnr = dd->gatindex[i] + 1;
548 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
550 return &dd->comm->cgs_gl;
553 static void vec_rvec_init(vec_rvec_t *v)
559 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
563 v->nalloc = over_alloc_dd(n);
564 srenew(v->v, v->nalloc);
568 void dd_store_state(gmx_domdec_t *dd, t_state *state)
572 if (state->ddp_count != dd->ddp_count)
574 gmx_incons("The state does not the domain decomposition state");
577 state->ncg_gl = dd->ncg_home;
578 if (state->ncg_gl > state->cg_gl_nalloc)
580 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
581 srenew(state->cg_gl, state->cg_gl_nalloc);
583 for (i = 0; i < state->ncg_gl; i++)
585 state->cg_gl[i] = dd->index_gl[i];
588 state->ddp_count_cg_gl = dd->ddp_count;
591 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
593 return &dd->comm->zones;
596 void dd_get_ns_ranges(gmx_domdec_t *dd, int icg,
597 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
599 gmx_domdec_zones_t *zones;
602 zones = &dd->comm->zones;
605 while (icg >= zones->izone[izone].cg1)
614 else if (izone < zones->nizone)
616 *jcg0 = zones->izone[izone].jcg0;
620 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
621 icg, izone, zones->nizone);
624 *jcg1 = zones->izone[izone].jcg1;
626 for (d = 0; d < dd->ndim; d++)
629 shift0[dim] = zones->izone[izone].shift0[dim];
630 shift1[dim] = zones->izone[izone].shift1[dim];
631 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
633 /* A conservative approach, this can be optimized */
640 int dd_natoms_vsite(gmx_domdec_t *dd)
642 return dd->comm->nat[ddnatVSITE];
645 void dd_get_constraint_range(gmx_domdec_t *dd, int *at_start, int *at_end)
647 *at_start = dd->comm->nat[ddnatCON-1];
648 *at_end = dd->comm->nat[ddnatCON];
651 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
653 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
654 int *index, *cgindex;
655 gmx_domdec_comm_t *comm;
656 gmx_domdec_comm_dim_t *cd;
657 gmx_domdec_ind_t *ind;
658 rvec shift = {0, 0, 0}, *buf, *rbuf;
659 gmx_bool bPBC, bScrew;
663 cgindex = dd->cgindex;
668 nat_tot = dd->nat_home;
669 for (d = 0; d < dd->ndim; d++)
671 bPBC = (dd->ci[dd->dim[d]] == 0);
672 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
675 copy_rvec(box[dd->dim[d]], shift);
678 for (p = 0; p < cd->np; p++)
685 for (i = 0; i < ind->nsend[nzone]; i++)
687 at0 = cgindex[index[i]];
688 at1 = cgindex[index[i]+1];
689 for (j = at0; j < at1; j++)
691 copy_rvec(x[j], buf[n]);
698 for (i = 0; i < ind->nsend[nzone]; i++)
700 at0 = cgindex[index[i]];
701 at1 = cgindex[index[i]+1];
702 for (j = at0; j < at1; j++)
704 /* We need to shift the coordinates */
705 rvec_add(x[j], shift, buf[n]);
712 for (i = 0; i < ind->nsend[nzone]; i++)
714 at0 = cgindex[index[i]];
715 at1 = cgindex[index[i]+1];
716 for (j = at0; j < at1; j++)
719 buf[n][XX] = x[j][XX] + shift[XX];
721 * This operation requires a special shift force
722 * treatment, which is performed in calc_vir.
724 buf[n][YY] = box[YY][YY] - x[j][YY];
725 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
737 rbuf = comm->vbuf2.v;
739 /* Send and receive the coordinates */
740 dd_sendrecv_rvec(dd, d, dddirBackward,
741 buf, ind->nsend[nzone+1],
742 rbuf, ind->nrecv[nzone+1]);
746 for (zone = 0; zone < nzone; zone++)
748 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
750 copy_rvec(rbuf[j], x[i]);
755 nat_tot += ind->nrecv[nzone+1];
761 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
763 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
764 int *index, *cgindex;
765 gmx_domdec_comm_t *comm;
766 gmx_domdec_comm_dim_t *cd;
767 gmx_domdec_ind_t *ind;
771 gmx_bool bPBC, bScrew;
775 cgindex = dd->cgindex;
780 nzone = comm->zones.n/2;
781 nat_tot = dd->nat_tot;
782 for (d = dd->ndim-1; d >= 0; d--)
784 bPBC = (dd->ci[dd->dim[d]] == 0);
785 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
786 if (fshift == NULL && !bScrew)
790 /* Determine which shift vector we need */
796 for (p = cd->np-1; p >= 0; p--)
799 nat_tot -= ind->nrecv[nzone+1];
806 sbuf = comm->vbuf2.v;
808 for (zone = 0; zone < nzone; zone++)
810 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
812 copy_rvec(f[i], sbuf[j]);
817 /* Communicate the forces */
818 dd_sendrecv_rvec(dd, d, dddirForward,
819 sbuf, ind->nrecv[nzone+1],
820 buf, ind->nsend[nzone+1]);
822 /* Add the received forces */
826 for (i = 0; i < ind->nsend[nzone]; i++)
828 at0 = cgindex[index[i]];
829 at1 = cgindex[index[i]+1];
830 for (j = at0; j < at1; j++)
832 rvec_inc(f[j], buf[n]);
839 for (i = 0; i < ind->nsend[nzone]; i++)
841 at0 = cgindex[index[i]];
842 at1 = cgindex[index[i]+1];
843 for (j = at0; j < at1; j++)
845 rvec_inc(f[j], buf[n]);
846 /* Add this force to the shift force */
847 rvec_inc(fshift[is], buf[n]);
854 for (i = 0; i < ind->nsend[nzone]; i++)
856 at0 = cgindex[index[i]];
857 at1 = cgindex[index[i]+1];
858 for (j = at0; j < at1; j++)
860 /* Rotate the force */
861 f[j][XX] += buf[n][XX];
862 f[j][YY] -= buf[n][YY];
863 f[j][ZZ] -= buf[n][ZZ];
866 /* Add this force to the shift force */
867 rvec_inc(fshift[is], buf[n]);
878 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
880 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
881 int *index, *cgindex;
882 gmx_domdec_comm_t *comm;
883 gmx_domdec_comm_dim_t *cd;
884 gmx_domdec_ind_t *ind;
889 cgindex = dd->cgindex;
891 buf = &comm->vbuf.v[0][0];
894 nat_tot = dd->nat_home;
895 for (d = 0; d < dd->ndim; d++)
898 for (p = 0; p < cd->np; p++)
903 for (i = 0; i < ind->nsend[nzone]; i++)
905 at0 = cgindex[index[i]];
906 at1 = cgindex[index[i]+1];
907 for (j = at0; j < at1; j++)
920 rbuf = &comm->vbuf2.v[0][0];
922 /* Send and receive the coordinates */
923 dd_sendrecv_real(dd, d, dddirBackward,
924 buf, ind->nsend[nzone+1],
925 rbuf, ind->nrecv[nzone+1]);
929 for (zone = 0; zone < nzone; zone++)
931 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
938 nat_tot += ind->nrecv[nzone+1];
944 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
946 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
947 int *index, *cgindex;
948 gmx_domdec_comm_t *comm;
949 gmx_domdec_comm_dim_t *cd;
950 gmx_domdec_ind_t *ind;
955 cgindex = dd->cgindex;
957 buf = &comm->vbuf.v[0][0];
960 nzone = comm->zones.n/2;
961 nat_tot = dd->nat_tot;
962 for (d = dd->ndim-1; d >= 0; d--)
965 for (p = cd->np-1; p >= 0; p--)
968 nat_tot -= ind->nrecv[nzone+1];
975 sbuf = &comm->vbuf2.v[0][0];
977 for (zone = 0; zone < nzone; zone++)
979 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
986 /* Communicate the forces */
987 dd_sendrecv_real(dd, d, dddirForward,
988 sbuf, ind->nrecv[nzone+1],
989 buf, ind->nsend[nzone+1]);
991 /* Add the received forces */
993 for (i = 0; i < ind->nsend[nzone]; i++)
995 at0 = cgindex[index[i]];
996 at1 = cgindex[index[i]+1];
997 for (j = at0; j < at1; j++)
1008 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
1010 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
1012 zone->min0, zone->max1,
1013 zone->mch0, zone->mch0,
1014 zone->p1_0, zone->p1_1);
1018 #define DDZONECOMM_MAXZONE 5
1019 #define DDZONECOMM_BUFSIZE 3
1021 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
1022 int ddimind, int direction,
1023 gmx_ddzone_t *buf_s, int n_s,
1024 gmx_ddzone_t *buf_r, int n_r)
1026 #define ZBS DDZONECOMM_BUFSIZE
1027 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
1028 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
1031 for (i = 0; i < n_s; i++)
1033 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
1034 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
1035 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
1036 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
1037 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
1038 vbuf_s[i*ZBS+1][2] = 0;
1039 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
1040 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
1041 vbuf_s[i*ZBS+2][2] = 0;
1044 dd_sendrecv_rvec(dd, ddimind, direction,
1048 for (i = 0; i < n_r; i++)
1050 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
1051 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
1052 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
1053 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
1054 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
1055 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
1056 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
1062 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
1063 rvec cell_ns_x0, rvec cell_ns_x1)
1065 int d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
1067 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
1068 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
1069 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
1070 rvec extr_s[2], extr_r[2];
1072 real dist_d, c = 0, det;
1073 gmx_domdec_comm_t *comm;
1074 gmx_bool bPBC, bUse;
1078 for (d = 1; d < dd->ndim; d++)
1081 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
1082 zp->min0 = cell_ns_x0[dim];
1083 zp->max1 = cell_ns_x1[dim];
1084 zp->min1 = cell_ns_x1[dim];
1085 zp->mch0 = cell_ns_x0[dim];
1086 zp->mch1 = cell_ns_x1[dim];
1087 zp->p1_0 = cell_ns_x0[dim];
1088 zp->p1_1 = cell_ns_x1[dim];
1091 for (d = dd->ndim-2; d >= 0; d--)
1094 bPBC = (dim < ddbox->npbcdim);
1096 /* Use an rvec to store two reals */
1097 extr_s[d][0] = comm->cell_f0[d+1];
1098 extr_s[d][1] = comm->cell_f1[d+1];
1099 extr_s[d][2] = comm->cell_f1[d+1];
1102 /* Store the extremes in the backward sending buffer,
1103 * so the get updated separately from the forward communication.
1105 for (d1 = d; d1 < dd->ndim-1; d1++)
1107 /* We invert the order to be able to use the same loop for buf_e */
1108 buf_s[pos].min0 = extr_s[d1][1];
1109 buf_s[pos].max1 = extr_s[d1][0];
1110 buf_s[pos].min1 = extr_s[d1][2];
1111 buf_s[pos].mch0 = 0;
1112 buf_s[pos].mch1 = 0;
1113 /* Store the cell corner of the dimension we communicate along */
1114 buf_s[pos].p1_0 = comm->cell_x0[dim];
1115 buf_s[pos].p1_1 = 0;
1119 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1122 if (dd->ndim == 3 && d == 0)
1124 buf_s[pos] = comm->zone_d2[0][1];
1126 buf_s[pos] = comm->zone_d1[0];
1130 /* We only need to communicate the extremes
1131 * in the forward direction
1133 npulse = comm->cd[d].np;
1136 /* Take the minimum to avoid double communication */
1137 npulse_min = min(npulse, dd->nc[dim]-1-npulse);
1141 /* Without PBC we should really not communicate over
1142 * the boundaries, but implementing that complicates
1143 * the communication setup and therefore we simply
1144 * do all communication, but ignore some data.
1146 npulse_min = npulse;
1148 for (p = 0; p < npulse_min; p++)
1150 /* Communicate the extremes forward */
1151 bUse = (bPBC || dd->ci[dim] > 0);
1153 dd_sendrecv_rvec(dd, d, dddirForward,
1154 extr_s+d, dd->ndim-d-1,
1155 extr_r+d, dd->ndim-d-1);
1159 for (d1 = d; d1 < dd->ndim-1; d1++)
1161 extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]);
1162 extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]);
1163 extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]);
1169 for (p = 0; p < npulse; p++)
1171 /* Communicate all the zone information backward */
1172 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1174 dd_sendrecv_ddzone(dd, d, dddirBackward,
1181 for (d1 = d+1; d1 < dd->ndim; d1++)
1183 /* Determine the decrease of maximum required
1184 * communication height along d1 due to the distance along d,
1185 * this avoids a lot of useless atom communication.
1187 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1189 if (ddbox->tric_dir[dim])
1191 /* c is the off-diagonal coupling between the cell planes
1192 * along directions d and d1.
1194 c = ddbox->v[dim][dd->dim[d1]][dim];
1200 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1203 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1207 /* A negative value signals out of range */
1213 /* Accumulate the extremes over all pulses */
1214 for (i = 0; i < buf_size; i++)
1218 buf_e[i] = buf_r[i];
1224 buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0);
1225 buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1);
1226 buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1);
1229 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1237 if (bUse && dh[d1] >= 0)
1239 buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
1240 buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
1243 /* Copy the received buffer to the send buffer,
1244 * to pass the data through with the next pulse.
1246 buf_s[i] = buf_r[i];
1248 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1249 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1251 /* Store the extremes */
1254 for (d1 = d; d1 < dd->ndim-1; d1++)
1256 extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0);
1257 extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1);
1258 extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1);
1262 if (d == 1 || (d == 0 && dd->ndim == 3))
1264 for (i = d; i < 2; i++)
1266 comm->zone_d2[1-d][i] = buf_e[pos];
1272 comm->zone_d1[1] = buf_e[pos];
1282 for (i = 0; i < 2; i++)
1286 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1288 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1289 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1295 for (i = 0; i < 2; i++)
1297 for (j = 0; j < 2; j++)
1301 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1303 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1304 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1308 for (d = 1; d < dd->ndim; d++)
1310 comm->cell_f_max0[d] = extr_s[d-1][0];
1311 comm->cell_f_min1[d] = extr_s[d-1][1];
1314 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1315 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1320 static void dd_collect_cg(gmx_domdec_t *dd,
1321 t_state *state_local)
1323 gmx_domdec_master_t *ma = NULL;
1324 int buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
1327 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1329 /* The master has the correct distribution */
1333 if (state_local->ddp_count == dd->ddp_count)
1335 ncg_home = dd->ncg_home;
1337 nat_home = dd->nat_home;
1339 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1341 cgs_gl = &dd->comm->cgs_gl;
1343 ncg_home = state_local->ncg_gl;
1344 cg = state_local->cg_gl;
1346 for (i = 0; i < ncg_home; i++)
1348 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1353 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1356 buf2[0] = dd->ncg_home;
1357 buf2[1] = dd->nat_home;
1367 /* Collect the charge group and atom counts on the master */
1368 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1373 for (i = 0; i < dd->nnodes; i++)
1375 ma->ncg[i] = ma->ibuf[2*i];
1376 ma->nat[i] = ma->ibuf[2*i+1];
1377 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1380 /* Make byte counts and indices */
1381 for (i = 0; i < dd->nnodes; i++)
1383 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1384 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1388 fprintf(debug, "Initial charge group distribution: ");
1389 for (i = 0; i < dd->nnodes; i++)
1391 fprintf(debug, " %d", ma->ncg[i]);
1393 fprintf(debug, "\n");
1397 /* Collect the charge group indices on the master */
1399 dd->ncg_home*sizeof(int), dd->index_gl,
1400 DDMASTER(dd) ? ma->ibuf : NULL,
1401 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1402 DDMASTER(dd) ? ma->cg : NULL);
1404 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1407 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1410 gmx_domdec_master_t *ma;
1411 int n, i, c, a, nalloc = 0;
1420 MPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1421 dd->rank, dd->mpi_comm_all);
1426 /* Copy the master coordinates to the global array */
1427 cgs_gl = &dd->comm->cgs_gl;
1429 n = DDMASTERRANK(dd);
1431 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1433 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1435 copy_rvec(lv[a++], v[c]);
1439 for (n = 0; n < dd->nnodes; n++)
1443 if (ma->nat[n] > nalloc)
1445 nalloc = over_alloc_dd(ma->nat[n]);
1446 srenew(buf, nalloc);
1449 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1450 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1453 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1455 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1457 copy_rvec(buf[a++], v[c]);
1466 static void get_commbuffer_counts(gmx_domdec_t *dd,
1467 int **counts, int **disps)
1469 gmx_domdec_master_t *ma;
1474 /* Make the rvec count and displacment arrays */
1476 *disps = ma->ibuf + dd->nnodes;
1477 for (n = 0; n < dd->nnodes; n++)
1479 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1480 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1484 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1487 gmx_domdec_master_t *ma;
1488 int *rcounts = NULL, *disps = NULL;
1497 get_commbuffer_counts(dd, &rcounts, &disps);
1502 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv, rcounts, disps, buf);
1506 cgs_gl = &dd->comm->cgs_gl;
1509 for (n = 0; n < dd->nnodes; n++)
1511 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1513 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1515 copy_rvec(buf[a++], v[c]);
1522 void dd_collect_vec(gmx_domdec_t *dd,
1523 t_state *state_local, rvec *lv, rvec *v)
1525 gmx_domdec_master_t *ma;
1526 int n, i, c, a, nalloc = 0;
1529 dd_collect_cg(dd, state_local);
1531 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1533 dd_collect_vec_sendrecv(dd, lv, v);
1537 dd_collect_vec_gatherv(dd, lv, v);
1542 void dd_collect_state(gmx_domdec_t *dd,
1543 t_state *state_local, t_state *state)
1547 nh = state->nhchainlength;
1551 for (i = 0; i < efptNR; i++)
1553 state->lambda[i] = state_local->lambda[i];
1555 state->fep_state = state_local->fep_state;
1556 state->veta = state_local->veta;
1557 state->vol0 = state_local->vol0;
1558 copy_mat(state_local->box, state->box);
1559 copy_mat(state_local->boxv, state->boxv);
1560 copy_mat(state_local->svir_prev, state->svir_prev);
1561 copy_mat(state_local->fvir_prev, state->fvir_prev);
1562 copy_mat(state_local->pres_prev, state->pres_prev);
1564 for (i = 0; i < state_local->ngtc; i++)
1566 for (j = 0; j < nh; j++)
1568 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1569 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1571 state->therm_integral[i] = state_local->therm_integral[i];
1573 for (i = 0; i < state_local->nnhpres; i++)
1575 for (j = 0; j < nh; j++)
1577 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1578 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1582 for (est = 0; est < estNR; est++)
1584 if (EST_DISTR(est) && (state_local->flags & (1<<est)))
1589 dd_collect_vec(dd, state_local, state_local->x, state->x);
1592 dd_collect_vec(dd, state_local, state_local->v, state->v);
1595 dd_collect_vec(dd, state_local, state_local->sd_X, state->sd_X);
1598 dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
1600 case estDISRE_INITF:
1601 case estDISRE_RM3TAV:
1602 case estORIRE_INITF:
1606 gmx_incons("Unknown state entry encountered in dd_collect_state");
1612 static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
1618 fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
1621 state->nalloc = over_alloc_dd(nalloc);
1623 for (est = 0; est < estNR; est++)
1625 if (EST_DISTR(est) && (state->flags & (1<<est)))
1630 srenew(state->x, state->nalloc);
1633 srenew(state->v, state->nalloc);
1636 srenew(state->sd_X, state->nalloc);
1639 srenew(state->cg_p, state->nalloc);
1641 case estDISRE_INITF:
1642 case estDISRE_RM3TAV:
1643 case estORIRE_INITF:
1645 /* No reallocation required */
1648 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1655 srenew(*f, state->nalloc);
1659 static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
1662 if (nalloc > fr->cg_nalloc)
1666 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
1668 fr->cg_nalloc = over_alloc_dd(nalloc);
1669 srenew(fr->cginfo, fr->cg_nalloc);
1670 if (fr->cutoff_scheme == ecutsGROUP)
1672 srenew(fr->cg_cm, fr->cg_nalloc);
1675 if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
1677 /* We don't use charge groups, we use x in state to set up
1678 * the atom communication.
1680 dd_realloc_state(state, f, nalloc);
1684 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1687 gmx_domdec_master_t *ma;
1688 int n, i, c, a, nalloc = 0;
1695 for (n = 0; n < dd->nnodes; n++)
1699 if (ma->nat[n] > nalloc)
1701 nalloc = over_alloc_dd(ma->nat[n]);
1702 srenew(buf, nalloc);
1704 /* Use lv as a temporary buffer */
1706 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1708 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1710 copy_rvec(v[c], buf[a++]);
1713 if (a != ma->nat[n])
1715 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1720 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1721 DDRANK(dd, n), n, dd->mpi_comm_all);
1726 n = DDMASTERRANK(dd);
1728 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1730 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1732 copy_rvec(v[c], lv[a++]);
1739 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1740 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1745 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1748 gmx_domdec_master_t *ma;
1749 int *scounts = NULL, *disps = NULL;
1750 int n, i, c, a, nalloc = 0;
1757 get_commbuffer_counts(dd, &scounts, &disps);
1761 for (n = 0; n < dd->nnodes; n++)
1763 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1765 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1767 copy_rvec(v[c], buf[a++]);
1773 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1776 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
1778 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1780 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1784 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1788 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1791 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1792 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1793 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1795 if (dfhist->nlambda > 0)
1797 int nlam = dfhist->nlambda;
1798 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1799 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1800 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1801 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1802 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1803 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1805 for (i = 0; i < nlam; i++)
1807 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1808 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1809 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1810 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1811 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1812 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1817 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1818 t_state *state, t_state *state_local,
1823 nh = state->nhchainlength;
1827 for (i = 0; i < efptNR; i++)
1829 state_local->lambda[i] = state->lambda[i];
1831 state_local->fep_state = state->fep_state;
1832 state_local->veta = state->veta;
1833 state_local->vol0 = state->vol0;
1834 copy_mat(state->box, state_local->box);
1835 copy_mat(state->box_rel, state_local->box_rel);
1836 copy_mat(state->boxv, state_local->boxv);
1837 copy_mat(state->svir_prev, state_local->svir_prev);
1838 copy_mat(state->fvir_prev, state_local->fvir_prev);
1839 copy_df_history(&state_local->dfhist, &state->dfhist);
1840 for (i = 0; i < state_local->ngtc; i++)
1842 for (j = 0; j < nh; j++)
1844 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1845 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1847 state_local->therm_integral[i] = state->therm_integral[i];
1849 for (i = 0; i < state_local->nnhpres; i++)
1851 for (j = 0; j < nh; j++)
1853 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1854 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1858 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
1859 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1860 dd_bcast(dd, sizeof(real), &state_local->veta);
1861 dd_bcast(dd, sizeof(real), &state_local->vol0);
1862 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1863 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1864 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1865 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1866 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1867 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
1868 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
1869 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
1870 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
1871 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
1873 /* communicate df_history -- required for restarting from checkpoint */
1874 dd_distribute_dfhist(dd, &state_local->dfhist);
1876 if (dd->nat_home > state_local->nalloc)
1878 dd_realloc_state(state_local, f, dd->nat_home);
1880 for (i = 0; i < estNR; i++)
1882 if (EST_DISTR(i) && (state_local->flags & (1<<i)))
1887 dd_distribute_vec(dd, cgs, state->x, state_local->x);
1890 dd_distribute_vec(dd, cgs, state->v, state_local->v);
1893 dd_distribute_vec(dd, cgs, state->sd_X, state_local->sd_X);
1896 dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
1898 case estDISRE_INITF:
1899 case estDISRE_RM3TAV:
1900 case estORIRE_INITF:
1902 /* Not implemented yet */
1905 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1911 static char dim2char(int dim)
1917 case XX: c = 'X'; break;
1918 case YY: c = 'Y'; break;
1919 case ZZ: c = 'Z'; break;
1920 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1926 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1927 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1929 rvec grid_s[2], *grid_r = NULL, cx, r;
1930 char fname[STRLEN], buf[22];
1932 int a, i, d, z, y, x;
1936 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1937 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1941 snew(grid_r, 2*dd->nnodes);
1944 dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : NULL);
1948 for (d = 0; d < DIM; d++)
1950 for (i = 0; i < DIM; i++)
1958 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1960 tric[d][i] = box[i][d]/box[i][i];
1969 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1970 out = gmx_fio_fopen(fname, "w");
1971 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1973 for (i = 0; i < dd->nnodes; i++)
1975 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1976 for (d = 0; d < DIM; d++)
1978 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1980 for (z = 0; z < 2; z++)
1982 for (y = 0; y < 2; y++)
1984 for (x = 0; x < 2; x++)
1986 cx[XX] = grid_r[i*2+x][XX];
1987 cx[YY] = grid_r[i*2+y][YY];
1988 cx[ZZ] = grid_r[i*2+z][ZZ];
1990 gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
1991 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
1995 for (d = 0; d < DIM; d++)
1997 for (x = 0; x < 4; x++)
2001 case 0: y = 1 + i*8 + 2*x; break;
2002 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
2003 case 2: y = 1 + i*8 + x; break;
2005 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
2009 gmx_fio_fclose(out);
2014 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
2015 gmx_mtop_t *mtop, t_commrec *cr,
2016 int natoms, rvec x[], matrix box)
2018 char fname[STRLEN], buf[22];
2020 int i, ii, resnr, c;
2021 char *atomname, *resname;
2028 natoms = dd->comm->nat[ddnatVSITE];
2031 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
2033 out = gmx_fio_fopen(fname, "w");
2035 fprintf(out, "TITLE %s\n", title);
2036 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2037 for (i = 0; i < natoms; i++)
2039 ii = dd->gatindex[i];
2040 gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
2041 if (i < dd->comm->nat[ddnatZONE])
2044 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
2050 else if (i < dd->comm->nat[ddnatVSITE])
2052 b = dd->comm->zones.n;
2056 b = dd->comm->zones.n + 1;
2058 gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
2059 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
2061 fprintf(out, "TER\n");
2063 gmx_fio_fclose(out);
2066 real dd_cutoff_mbody(gmx_domdec_t *dd)
2068 gmx_domdec_comm_t *comm;
2075 if (comm->bInterCGBondeds)
2077 if (comm->cutoff_mbody > 0)
2079 r = comm->cutoff_mbody;
2083 /* cutoff_mbody=0 means we do not have DLB */
2084 r = comm->cellsize_min[dd->dim[0]];
2085 for (di = 1; di < dd->ndim; di++)
2087 r = min(r, comm->cellsize_min[dd->dim[di]]);
2089 if (comm->bBondComm)
2091 r = max(r, comm->cutoff_mbody);
2095 r = min(r, comm->cutoff);
2103 real dd_cutoff_twobody(gmx_domdec_t *dd)
2107 r_mb = dd_cutoff_mbody(dd);
2109 return max(dd->comm->cutoff, r_mb);
2113 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd, ivec coord, ivec coord_pme)
2117 nc = dd->nc[dd->comm->cartpmedim];
2118 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2119 copy_ivec(coord, coord_pme);
2120 coord_pme[dd->comm->cartpmedim] =
2121 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2124 static int low_ddindex2pmeindex(int ndd, int npme, int ddindex)
2126 /* Here we assign a PME node to communicate with this DD node
2127 * by assuming that the major index of both is x.
2128 * We add cr->npmenodes/2 to obtain an even distribution.
2130 return (ddindex*npme + npme/2)/ndd;
2133 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
2135 return low_ddindex2pmeindex(dd->nnodes, dd->comm->npmenodes, ddindex);
2138 static int cr_ddindex2pmeindex(const t_commrec *cr, int ddindex)
2140 return low_ddindex2pmeindex(cr->dd->nnodes, cr->npmenodes, ddindex);
2143 static int *dd_pmenodes(t_commrec *cr)
2148 snew(pmenodes, cr->npmenodes);
2150 for (i = 0; i < cr->dd->nnodes; i++)
2152 p0 = cr_ddindex2pmeindex(cr, i);
2153 p1 = cr_ddindex2pmeindex(cr, i+1);
2154 if (i+1 == cr->dd->nnodes || p1 > p0)
2158 fprintf(debug, "pmenode[%d] = %d\n", n, i+1+n);
2160 pmenodes[n] = i + 1 + n;
2168 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
2171 ivec coords, coords_pme, nc;
2176 if (dd->comm->bCartesian) {
2177 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2178 dd_coords2pmecoords(dd,coords,coords_pme);
2179 copy_ivec(dd->ntot,nc);
2180 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2181 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2183 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2185 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2191 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
2196 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
2198 gmx_domdec_comm_t *comm;
2200 int ddindex, nodeid = -1;
2202 comm = cr->dd->comm;
2207 if (comm->bCartesianPP_PME)
2210 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
2215 ddindex = dd_index(cr->dd->nc, coords);
2216 if (comm->bCartesianPP)
2218 nodeid = comm->ddindex2simnodeid[ddindex];
2224 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
2236 static int dd_simnode2pmenode(t_commrec *cr, int sim_nodeid)
2239 gmx_domdec_comm_t *comm;
2240 ivec coord, coord_pme;
2247 /* This assumes a uniform x domain decomposition grid cell size */
2248 if (comm->bCartesianPP_PME)
2251 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
2252 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2254 /* This is a PP node */
2255 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2256 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
2260 else if (comm->bCartesianPP)
2262 if (sim_nodeid < dd->nnodes)
2264 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2269 /* This assumes DD cells with identical x coordinates
2270 * are numbered sequentially.
2272 if (dd->comm->pmenodes == NULL)
2274 if (sim_nodeid < dd->nnodes)
2276 /* The DD index equals the nodeid */
2277 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2283 while (sim_nodeid > dd->comm->pmenodes[i])
2287 if (sim_nodeid < dd->comm->pmenodes[i])
2289 pmenode = dd->comm->pmenodes[i];
2297 void get_pme_nnodes(const gmx_domdec_t *dd,
2298 int *npmenodes_x, int *npmenodes_y)
2302 *npmenodes_x = dd->comm->npmenodes_x;
2303 *npmenodes_y = dd->comm->npmenodes_y;
2312 gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
2314 gmx_bool bPMEOnlyNode;
2316 if (DOMAINDECOMP(cr))
2318 bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
2322 bPMEOnlyNode = FALSE;
2325 return bPMEOnlyNode;
2328 void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
2329 int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
2333 ivec coord, coord_pme;
2337 snew(*my_ddnodes, (dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2340 for (x = 0; x < dd->nc[XX]; x++)
2342 for (y = 0; y < dd->nc[YY]; y++)
2344 for (z = 0; z < dd->nc[ZZ]; z++)
2346 if (dd->comm->bCartesianPP_PME)
2351 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2352 if (dd->ci[XX] == coord_pme[XX] &&
2353 dd->ci[YY] == coord_pme[YY] &&
2354 dd->ci[ZZ] == coord_pme[ZZ])
2356 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2361 /* The slab corresponds to the nodeid in the PME group */
2362 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2364 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2371 /* The last PP-only node is the peer node */
2372 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2376 fprintf(debug, "Receive coordinates from PP nodes:");
2377 for (x = 0; x < *nmy_ddnodes; x++)
2379 fprintf(debug, " %d", (*my_ddnodes)[x]);
2381 fprintf(debug, "\n");
2385 static gmx_bool receive_vir_ener(t_commrec *cr)
2387 gmx_domdec_comm_t *comm;
2388 int pmenode, coords[DIM], rank;
2392 if (cr->npmenodes < cr->dd->nnodes)
2394 comm = cr->dd->comm;
2395 if (comm->bCartesianPP_PME)
2397 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2399 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2400 coords[comm->cartpmedim]++;
2401 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2403 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2404 if (dd_simnode2pmenode(cr, rank) == pmenode)
2406 /* This is not the last PP node for pmenode */
2414 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2415 if (cr->sim_nodeid+1 < cr->nnodes &&
2416 dd_simnode2pmenode(cr, cr->sim_nodeid+1) == pmenode)
2418 /* This is not the last PP node for pmenode */
2427 static void set_zones_ncg_home(gmx_domdec_t *dd)
2429 gmx_domdec_zones_t *zones;
2432 zones = &dd->comm->zones;
2434 zones->cg_range[0] = 0;
2435 for (i = 1; i < zones->n+1; i++)
2437 zones->cg_range[i] = dd->ncg_home;
2439 /* zone_ncg1[0] should always be equal to ncg_home */
2440 dd->comm->zone_ncg1[0] = dd->ncg_home;
2443 static void rebuild_cgindex(gmx_domdec_t *dd,
2444 const int *gcgs_index, t_state *state)
2446 int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
2449 dd_cg_gl = dd->index_gl;
2450 cgindex = dd->cgindex;
2453 for (i = 0; i < state->ncg_gl; i++)
2457 dd_cg_gl[i] = cg_gl;
2458 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2462 dd->ncg_home = state->ncg_gl;
2465 set_zones_ncg_home(dd);
2468 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2470 while (cg >= cginfo_mb->cg_end)
2475 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2478 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2479 t_forcerec *fr, char *bLocalCG)
2481 cginfo_mb_t *cginfo_mb;
2487 cginfo_mb = fr->cginfo_mb;
2488 cginfo = fr->cginfo;
2490 for (cg = cg0; cg < cg1; cg++)
2492 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2496 if (bLocalCG != NULL)
2498 for (cg = cg0; cg < cg1; cg++)
2500 bLocalCG[index_gl[cg]] = TRUE;
2505 static void make_dd_indices(gmx_domdec_t *dd,
2506 const int *gcgs_index, int cg_start)
2508 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2509 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2514 bLocalCG = dd->comm->bLocalCG;
2516 if (dd->nat_tot > dd->gatindex_nalloc)
2518 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2519 srenew(dd->gatindex, dd->gatindex_nalloc);
2522 nzone = dd->comm->zones.n;
2523 zone2cg = dd->comm->zones.cg_range;
2524 zone_ncg1 = dd->comm->zone_ncg1;
2525 index_gl = dd->index_gl;
2526 gatindex = dd->gatindex;
2527 bCGs = dd->comm->bCGs;
2529 if (zone2cg[1] != dd->ncg_home)
2531 gmx_incons("dd->ncg_zone is not up to date");
2534 /* Make the local to global and global to local atom index */
2535 a = dd->cgindex[cg_start];
2536 for (zone = 0; zone < nzone; zone++)
2544 cg0 = zone2cg[zone];
2546 cg1 = zone2cg[zone+1];
2547 cg1_p1 = cg0 + zone_ncg1[zone];
2549 for (cg = cg0; cg < cg1; cg++)
2554 /* Signal that this cg is from more than one pulse away */
2557 cg_gl = index_gl[cg];
2560 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2563 ga2la_set(dd->ga2la, a_gl, a, zone1);
2569 gatindex[a] = cg_gl;
2570 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2577 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2580 int ncg, i, ngl, nerr;
2583 if (bLocalCG == NULL)
2587 for (i = 0; i < dd->ncg_tot; i++)
2589 if (!bLocalCG[dd->index_gl[i]])
2592 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2597 for (i = 0; i < ncg_sys; i++)
2604 if (ngl != dd->ncg_tot)
2606 fprintf(stderr, "DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2613 static void check_index_consistency(gmx_domdec_t *dd,
2614 int natoms_sys, int ncg_sys,
2617 int nerr, ngl, i, a, cell;
2622 if (dd->comm->DD_debug > 1)
2624 snew(have, natoms_sys);
2625 for (a = 0; a < dd->nat_tot; a++)
2627 if (have[dd->gatindex[a]] > 0)
2629 fprintf(stderr, "DD node %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2633 have[dd->gatindex[a]] = a + 1;
2639 snew(have, dd->nat_tot);
2642 for (i = 0; i < natoms_sys; i++)
2644 if (ga2la_get(dd->ga2la, i, &a, &cell))
2646 if (a >= dd->nat_tot)
2648 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2654 if (dd->gatindex[a] != i)
2656 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2663 if (ngl != dd->nat_tot)
2666 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2667 dd->rank, where, ngl, dd->nat_tot);
2669 for (a = 0; a < dd->nat_tot; a++)
2674 "DD node %d, %s: local atom %d, global %d has no global index\n",
2675 dd->rank, where, a+1, dd->gatindex[a]+1);
2680 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2684 gmx_fatal(FARGS, "DD node %d, %s: %d atom/cg index inconsistencies",
2685 dd->rank, where, nerr);
2689 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2696 /* Clear the whole list without searching */
2697 ga2la_clear(dd->ga2la);
2701 for (i = a_start; i < dd->nat_tot; i++)
2703 ga2la_del(dd->ga2la, dd->gatindex[i]);
2707 bLocalCG = dd->comm->bLocalCG;
2710 for (i = cg_start; i < dd->ncg_tot; i++)
2712 bLocalCG[dd->index_gl[i]] = FALSE;
2716 dd_clear_local_vsite_indices(dd);
2718 if (dd->constraints)
2720 dd_clear_local_constraint_indices(dd);
2724 /* This function should be used for moving the domain boudaries during DLB,
2725 * for obtaining the minimum cell size. It checks the initially set limit
2726 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2727 * and, possibly, a longer cut-off limit set for PME load balancing.
2729 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2733 cellsize_min = comm->cellsize_min[dim];
2735 if (!comm->bVacDLBNoLimit)
2737 /* The cut-off might have changed, e.g. by PME load balacning,
2738 * from the value used to set comm->cellsize_min, so check it.
2740 cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2742 if (comm->bPMELoadBalDLBLimits)
2744 /* Check for the cut-off limit set by the PME load balancing */
2745 cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2749 return cellsize_min;
2752 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2755 real grid_jump_limit;
2757 /* The distance between the boundaries of cells at distance
2758 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2759 * and by the fact that cells should not be shifted by more than
2760 * half their size, such that cg's only shift by one cell
2761 * at redecomposition.
2763 grid_jump_limit = comm->cellsize_limit;
2764 if (!comm->bVacDLBNoLimit)
2766 if (comm->bPMELoadBalDLBLimits)
2768 cutoff = max(cutoff, comm->PMELoadBal_max_cutoff);
2770 grid_jump_limit = max(grid_jump_limit,
2771 cutoff/comm->cd[dim_ind].np);
2774 return grid_jump_limit;
2777 static gmx_bool check_grid_jump(gmx_int64_t step,
2783 gmx_domdec_comm_t *comm;
2792 for (d = 1; d < dd->ndim; d++)
2795 limit = grid_jump_limit(comm, cutoff, d);
2796 bfac = ddbox->box_size[dim];
2797 if (ddbox->tric_dir[dim])
2799 bfac *= ddbox->skew_fac[dim];
2801 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2802 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2810 /* This error should never be triggered under normal
2811 * circumstances, but you never know ...
2813 gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with less nodes might avoid this issue.",
2814 gmx_step_str(step, buf),
2815 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2823 static int dd_load_count(gmx_domdec_comm_t *comm)
2825 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2828 static float dd_force_load(gmx_domdec_comm_t *comm)
2835 if (comm->eFlop > 1)
2837 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2842 load = comm->cycl[ddCyclF];
2843 if (comm->cycl_n[ddCyclF] > 1)
2845 /* Subtract the maximum of the last n cycle counts
2846 * to get rid of possible high counts due to other sources,
2847 * for instance system activity, that would otherwise
2848 * affect the dynamic load balancing.
2850 load -= comm->cycl_max[ddCyclF];
2854 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2856 float gpu_wait, gpu_wait_sum;
2858 gpu_wait = comm->cycl[ddCyclWaitGPU];
2859 if (comm->cycl_n[ddCyclF] > 1)
2861 /* We should remove the WaitGPU time of the same MD step
2862 * as the one with the maximum F time, since the F time
2863 * and the wait time are not independent.
2864 * Furthermore, the step for the max F time should be chosen
2865 * the same on all ranks that share the same GPU.
2866 * But to keep the code simple, we remove the average instead.
2867 * The main reason for artificially long times at some steps
2868 * is spurious CPU activity or MPI time, so we don't expect
2869 * that changes in the GPU wait time matter a lot here.
2871 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2873 /* Sum the wait times over the ranks that share the same GPU */
2874 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2875 comm->mpi_comm_gpu_shared);
2876 /* Replace the wait time by the average over the ranks */
2877 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2885 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2887 gmx_domdec_comm_t *comm;
2892 snew(*dim_f, dd->nc[dim]+1);
2894 for (i = 1; i < dd->nc[dim]; i++)
2896 if (comm->slb_frac[dim])
2898 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2902 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2905 (*dim_f)[dd->nc[dim]] = 1;
2908 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2910 int pmeindex, slab, nso, i;
2913 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2919 ddpme->dim = dimind;
2921 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2923 ddpme->nslab = (ddpme->dim == 0 ?
2924 dd->comm->npmenodes_x :
2925 dd->comm->npmenodes_y);
2927 if (ddpme->nslab <= 1)
2932 nso = dd->comm->npmenodes/ddpme->nslab;
2933 /* Determine for each PME slab the PP location range for dimension dim */
2934 snew(ddpme->pp_min, ddpme->nslab);
2935 snew(ddpme->pp_max, ddpme->nslab);
2936 for (slab = 0; slab < ddpme->nslab; slab++)
2938 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2939 ddpme->pp_max[slab] = 0;
2941 for (i = 0; i < dd->nnodes; i++)
2943 ddindex2xyz(dd->nc, i, xyz);
2944 /* For y only use our y/z slab.
2945 * This assumes that the PME x grid size matches the DD grid size.
2947 if (dimind == 0 || xyz[XX] == dd->ci[XX])
2949 pmeindex = ddindex2pmeindex(dd, i);
2952 slab = pmeindex/nso;
2956 slab = pmeindex % ddpme->nslab;
2958 ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]);
2959 ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]);
2963 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
2966 int dd_pme_maxshift_x(gmx_domdec_t *dd)
2968 if (dd->comm->ddpme[0].dim == XX)
2970 return dd->comm->ddpme[0].maxshift;
2978 int dd_pme_maxshift_y(gmx_domdec_t *dd)
2980 if (dd->comm->ddpme[0].dim == YY)
2982 return dd->comm->ddpme[0].maxshift;
2984 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
2986 return dd->comm->ddpme[1].maxshift;
2994 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
2995 gmx_bool bUniform, gmx_ddbox_t *ddbox, real *cell_f)
2997 gmx_domdec_comm_t *comm;
3000 real range, pme_boundary;
3004 nc = dd->nc[ddpme->dim];
3007 if (!ddpme->dim_match)
3009 /* PP decomposition is not along dim: the worst situation */
3012 else if (ns <= 3 || (bUniform && ns == nc))
3014 /* The optimal situation */
3019 /* We need to check for all pme nodes which nodes they
3020 * could possibly need to communicate with.
3022 xmin = ddpme->pp_min;
3023 xmax = ddpme->pp_max;
3024 /* Allow for atoms to be maximally 2/3 times the cut-off
3025 * out of their DD cell. This is a reasonable balance between
3026 * between performance and support for most charge-group/cut-off
3029 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
3030 /* Avoid extra communication when we are exactly at a boundary */
3034 for (s = 0; s < ns; s++)
3036 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
3037 pme_boundary = (real)s/ns;
3040 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
3042 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
3046 pme_boundary = (real)(s+1)/ns;
3049 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
3051 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
3058 ddpme->maxshift = sh;
3062 fprintf(debug, "PME slab communication range for dim %d is %d\n",
3063 ddpme->dim, ddpme->maxshift);
3067 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3071 for (d = 0; d < dd->ndim; d++)
3074 if (dim < ddbox->nboundeddim &&
3075 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
3076 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
3078 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
3079 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3080 dd->nc[dim], dd->comm->cellsize_limit);
3086 setcellsizeslbLOCAL, setcellsizeslbMASTER, setcellsizeslbPULSE_ONLY
3089 /* Set the domain boundaries. Use for static (or no) load balancing,
3090 * and also for the starting state for dynamic load balancing.
3091 * setmode determine if and where the boundaries are stored, use enum above.
3092 * Returns the number communication pulses in npulse.
3094 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
3095 int setmode, ivec npulse)
3097 gmx_domdec_comm_t *comm;
3100 real *cell_x, cell_dx, cellsize;
3104 for (d = 0; d < DIM; d++)
3106 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
3108 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
3111 cell_dx = ddbox->box_size[d]/dd->nc[d];
3114 case setcellsizeslbMASTER:
3115 for (j = 0; j < dd->nc[d]+1; j++)
3117 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
3120 case setcellsizeslbLOCAL:
3121 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
3122 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
3127 cellsize = cell_dx*ddbox->skew_fac[d];
3128 while (cellsize*npulse[d] < comm->cutoff)
3132 cellsize_min[d] = cellsize;
3136 /* Statically load balanced grid */
3137 /* Also when we are not doing a master distribution we determine
3138 * all cell borders in a loop to obtain identical values
3139 * to the master distribution case and to determine npulse.
3141 if (setmode == setcellsizeslbMASTER)
3143 cell_x = dd->ma->cell_x[d];
3147 snew(cell_x, dd->nc[d]+1);
3149 cell_x[0] = ddbox->box0[d];
3150 for (j = 0; j < dd->nc[d]; j++)
3152 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
3153 cell_x[j+1] = cell_x[j] + cell_dx;
3154 cellsize = cell_dx*ddbox->skew_fac[d];
3155 while (cellsize*npulse[d] < comm->cutoff &&
3156 npulse[d] < dd->nc[d]-1)
3160 cellsize_min[d] = min(cellsize_min[d], cellsize);
3162 if (setmode == setcellsizeslbLOCAL)
3164 comm->cell_x0[d] = cell_x[dd->ci[d]];
3165 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
3167 if (setmode != setcellsizeslbMASTER)
3172 /* The following limitation is to avoid that a cell would receive
3173 * some of its own home charge groups back over the periodic boundary.
3174 * Double charge groups cause trouble with the global indices.
3176 if (d < ddbox->npbcdim &&
3177 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
3179 char error_string[STRLEN];
3181 sprintf(error_string,
3182 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3183 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
3185 dd->nc[d], dd->nc[d],
3186 dd->nnodes > dd->nc[d] ? "cells" : "processors");
3188 if (setmode == setcellsizeslbLOCAL)
3190 gmx_fatal_collective(FARGS, NULL, dd, error_string);
3194 gmx_fatal(FARGS, error_string);
3199 if (!comm->bDynLoadBal)
3201 copy_rvec(cellsize_min, comm->cellsize_min);
3204 for (d = 0; d < comm->npmedecompdim; d++)
3206 set_pme_maxshift(dd, &comm->ddpme[d],
3207 comm->slb_frac[dd->dim[d]] == NULL, ddbox,
3208 comm->ddpme[d].slb_dim_f);
3213 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
3214 int d, int dim, gmx_domdec_root_t *root,
3216 gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
3218 gmx_domdec_comm_t *comm;
3219 int ncd, i, j, nmin, nmin_old;
3220 gmx_bool bLimLo, bLimHi;
3222 real fac, halfway, cellsize_limit_f_i, region_size;
3223 gmx_bool bPBC, bLastHi = FALSE;
3224 int nrange[] = {range[0], range[1]};
3226 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
3232 bPBC = (dim < ddbox->npbcdim);
3234 cell_size = root->buf_ncd;
3238 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
3241 /* First we need to check if the scaling does not make cells
3242 * smaller than the smallest allowed size.
3243 * We need to do this iteratively, since if a cell is too small,
3244 * it needs to be enlarged, which makes all the other cells smaller,
3245 * which could in turn make another cell smaller than allowed.
3247 for (i = range[0]; i < range[1]; i++)
3249 root->bCellMin[i] = FALSE;
3255 /* We need the total for normalization */
3257 for (i = range[0]; i < range[1]; i++)
3259 if (root->bCellMin[i] == FALSE)
3261 fac += cell_size[i];
3264 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3265 /* Determine the cell boundaries */
3266 for (i = range[0]; i < range[1]; i++)
3268 if (root->bCellMin[i] == FALSE)
3270 cell_size[i] *= fac;
3271 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3273 cellsize_limit_f_i = 0;
3277 cellsize_limit_f_i = cellsize_limit_f;
3279 if (cell_size[i] < cellsize_limit_f_i)
3281 root->bCellMin[i] = TRUE;
3282 cell_size[i] = cellsize_limit_f_i;
3286 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3289 while (nmin > nmin_old);
3292 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3293 /* For this check we should not use DD_CELL_MARGIN,
3294 * but a slightly smaller factor,
3295 * since rounding could get use below the limit.
3297 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3300 gmx_fatal(FARGS, "Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3301 gmx_step_str(step, buf),
3302 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3303 ncd, comm->cellsize_min[dim]);
3306 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
3310 /* Check if the boundary did not displace more than halfway
3311 * each of the cells it bounds, as this could cause problems,
3312 * especially when the differences between cell sizes are large.
3313 * If changes are applied, they will not make cells smaller
3314 * than the cut-off, as we check all the boundaries which
3315 * might be affected by a change and if the old state was ok,
3316 * the cells will at most be shrunk back to their old size.
3318 for (i = range[0]+1; i < range[1]; i++)
3320 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3321 if (root->cell_f[i] < halfway)
3323 root->cell_f[i] = halfway;
3324 /* Check if the change also causes shifts of the next boundaries */
3325 for (j = i+1; j < range[1]; j++)
3327 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3329 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3333 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3334 if (root->cell_f[i] > halfway)
3336 root->cell_f[i] = halfway;
3337 /* Check if the change also causes shifts of the next boundaries */
3338 for (j = i-1; j >= range[0]+1; j--)
3340 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3342 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3349 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3350 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3351 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3352 * for a and b nrange is used */
3355 /* Take care of the staggering of the cell boundaries */
3358 for (i = range[0]; i < range[1]; i++)
3360 root->cell_f_max0[i] = root->cell_f[i];
3361 root->cell_f_min1[i] = root->cell_f[i+1];
3366 for (i = range[0]+1; i < range[1]; i++)
3368 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3369 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3370 if (bLimLo && bLimHi)
3372 /* Both limits violated, try the best we can */
3373 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3374 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3375 nrange[0] = range[0];
3377 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3380 nrange[1] = range[1];
3381 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3387 /* root->cell_f[i] = root->bound_min[i]; */
3388 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3391 else if (bLimHi && !bLastHi)
3394 if (nrange[1] < range[1]) /* found a LimLo before */
3396 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3397 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3398 nrange[0] = nrange[1];
3400 root->cell_f[i] = root->bound_max[i];
3402 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3404 nrange[1] = range[1];
3407 if (nrange[1] < range[1]) /* found last a LimLo */
3409 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3410 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3411 nrange[0] = nrange[1];
3412 nrange[1] = range[1];
3413 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3415 else if (nrange[0] > range[0]) /* found at least one LimHi */
3417 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3424 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3425 int d, int dim, gmx_domdec_root_t *root,
3426 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3427 gmx_bool bUniform, gmx_int64_t step)
3429 gmx_domdec_comm_t *comm;
3430 int ncd, d1, i, j, pos;
3432 real load_aver, load_i, imbalance, change, change_max, sc;
3433 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3437 int range[] = { 0, 0 };
3441 /* Convert the maximum change from the input percentage to a fraction */
3442 change_limit = comm->dlb_scale_lim*0.01;
3446 bPBC = (dim < ddbox->npbcdim);
3448 cell_size = root->buf_ncd;
3450 /* Store the original boundaries */
3451 for (i = 0; i < ncd+1; i++)
3453 root->old_cell_f[i] = root->cell_f[i];
3457 for (i = 0; i < ncd; i++)
3459 cell_size[i] = 1.0/ncd;
3462 else if (dd_load_count(comm))
3464 load_aver = comm->load[d].sum_m/ncd;
3466 for (i = 0; i < ncd; i++)
3468 /* Determine the relative imbalance of cell i */
3469 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3470 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3471 /* Determine the change of the cell size using underrelaxation */
3472 change = -relax*imbalance;
3473 change_max = max(change_max, max(change, -change));
3475 /* Limit the amount of scaling.
3476 * We need to use the same rescaling for all cells in one row,
3477 * otherwise the load balancing might not converge.
3480 if (change_max > change_limit)
3482 sc *= change_limit/change_max;
3484 for (i = 0; i < ncd; i++)
3486 /* Determine the relative imbalance of cell i */
3487 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3488 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3489 /* Determine the change of the cell size using underrelaxation */
3490 change = -sc*imbalance;
3491 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3495 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3496 cellsize_limit_f *= DD_CELL_MARGIN;
3497 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3498 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3499 if (ddbox->tric_dir[dim])
3501 cellsize_limit_f /= ddbox->skew_fac[dim];
3502 dist_min_f /= ddbox->skew_fac[dim];
3504 if (bDynamicBox && d > 0)
3506 dist_min_f *= DD_PRES_SCALE_MARGIN;
3508 if (d > 0 && !bUniform)
3510 /* Make sure that the grid is not shifted too much */
3511 for (i = 1; i < ncd; i++)
3513 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3515 gmx_incons("Inconsistent DD boundary staggering limits!");
3517 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3518 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3521 root->bound_min[i] += 0.5*space;
3523 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3524 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3527 root->bound_max[i] += 0.5*space;
3532 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3534 root->cell_f_max0[i-1] + dist_min_f,
3535 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3536 root->cell_f_min1[i] - dist_min_f);
3541 root->cell_f[0] = 0;
3542 root->cell_f[ncd] = 1;
3543 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3546 /* After the checks above, the cells should obey the cut-off
3547 * restrictions, but it does not hurt to check.
3549 for (i = 0; i < ncd; i++)
3553 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3554 dim, i, root->cell_f[i], root->cell_f[i+1]);
3557 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3558 root->cell_f[i+1] - root->cell_f[i] <
3559 cellsize_limit_f/DD_CELL_MARGIN)
3563 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3564 gmx_step_str(step, buf), dim2char(dim), i,
3565 (root->cell_f[i+1] - root->cell_f[i])
3566 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3571 /* Store the cell boundaries of the lower dimensions at the end */
3572 for (d1 = 0; d1 < d; d1++)
3574 root->cell_f[pos++] = comm->cell_f0[d1];
3575 root->cell_f[pos++] = comm->cell_f1[d1];
3578 if (d < comm->npmedecompdim)
3580 /* The master determines the maximum shift for
3581 * the coordinate communication between separate PME nodes.
3583 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3585 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3588 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3592 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3593 gmx_ddbox_t *ddbox, int dimind)
3595 gmx_domdec_comm_t *comm;
3600 /* Set the cell dimensions */
3601 dim = dd->dim[dimind];
3602 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3603 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3604 if (dim >= ddbox->nboundeddim)
3606 comm->cell_x0[dim] += ddbox->box0[dim];
3607 comm->cell_x1[dim] += ddbox->box0[dim];
3611 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3612 int d, int dim, real *cell_f_row,
3615 gmx_domdec_comm_t *comm;
3621 /* Each node would only need to know two fractions,
3622 * but it is probably cheaper to broadcast the whole array.
3624 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3625 0, comm->mpi_comm_load[d]);
3627 /* Copy the fractions for this dimension from the buffer */
3628 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3629 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3630 /* The whole array was communicated, so set the buffer position */
3631 pos = dd->nc[dim] + 1;
3632 for (d1 = 0; d1 <= d; d1++)
3636 /* Copy the cell fractions of the lower dimensions */
3637 comm->cell_f0[d1] = cell_f_row[pos++];
3638 comm->cell_f1[d1] = cell_f_row[pos++];
3640 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3642 /* Convert the communicated shift from float to int */
3643 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3646 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3650 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3651 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3652 gmx_bool bUniform, gmx_int64_t step)
3654 gmx_domdec_comm_t *comm;
3656 gmx_bool bRowMember, bRowRoot;
3661 for (d = 0; d < dd->ndim; d++)
3666 for (d1 = d; d1 < dd->ndim; d1++)
3668 if (dd->ci[dd->dim[d1]] > 0)
3681 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3682 ddbox, bDynamicBox, bUniform, step);
3683 cell_f_row = comm->root[d]->cell_f;
3687 cell_f_row = comm->cell_f_row;
3689 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3694 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3698 /* This function assumes the box is static and should therefore
3699 * not be called when the box has changed since the last
3700 * call to dd_partition_system.
3702 for (d = 0; d < dd->ndim; d++)
3704 relative_to_absolute_cell_bounds(dd, ddbox, d);
3710 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3711 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3712 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3713 gmx_wallcycle_t wcycle)
3715 gmx_domdec_comm_t *comm;
3722 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3723 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3724 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3726 else if (bDynamicBox)
3728 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3731 /* Set the dimensions for which no DD is used */
3732 for (dim = 0; dim < DIM; dim++)
3734 if (dd->nc[dim] == 1)
3736 comm->cell_x0[dim] = 0;
3737 comm->cell_x1[dim] = ddbox->box_size[dim];
3738 if (dim >= ddbox->nboundeddim)
3740 comm->cell_x0[dim] += ddbox->box0[dim];
3741 comm->cell_x1[dim] += ddbox->box0[dim];
3747 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3750 gmx_domdec_comm_dim_t *cd;
3752 for (d = 0; d < dd->ndim; d++)
3754 cd = &dd->comm->cd[d];
3755 np = npulse[dd->dim[d]];
3756 if (np > cd->np_nalloc)
3760 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3761 dim2char(dd->dim[d]), np);
3763 if (DDMASTER(dd) && cd->np_nalloc > 0)
3765 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3767 srenew(cd->ind, np);
3768 for (i = cd->np_nalloc; i < np; i++)
3770 cd->ind[i].index = NULL;
3771 cd->ind[i].nalloc = 0;
3780 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3781 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3782 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3783 gmx_wallcycle_t wcycle)
3785 gmx_domdec_comm_t *comm;
3791 /* Copy the old cell boundaries for the cg displacement check */
3792 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3793 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3795 if (comm->bDynLoadBal)
3799 check_box_size(dd, ddbox);
3801 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3805 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbLOCAL, npulse);
3806 realloc_comm_ind(dd, npulse);
3811 for (d = 0; d < DIM; d++)
3813 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3814 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3819 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3821 rvec cell_ns_x0, rvec cell_ns_x1,
3824 gmx_domdec_comm_t *comm;
3829 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3831 dim = dd->dim[dim_ind];
3833 /* Without PBC we don't have restrictions on the outer cells */
3834 if (!(dim >= ddbox->npbcdim &&
3835 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3836 comm->bDynLoadBal &&
3837 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3838 comm->cellsize_min[dim])
3841 gmx_fatal(FARGS, "Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3842 gmx_step_str(step, buf), dim2char(dim),
3843 comm->cell_x1[dim] - comm->cell_x0[dim],
3844 ddbox->skew_fac[dim],
3845 dd->comm->cellsize_min[dim],
3846 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3850 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3852 /* Communicate the boundaries and update cell_ns_x0/1 */
3853 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3854 if (dd->bGridJump && dd->ndim > 1)
3856 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3861 static void make_tric_corr_matrix(int npbcdim, matrix box, matrix tcm)
3865 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3873 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3874 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3883 static void check_screw_box(matrix box)
3885 /* Mathematical limitation */
3886 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3888 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3891 /* Limitation due to the asymmetry of the eighth shell method */
3892 if (box[ZZ][YY] != 0)
3894 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3898 static void distribute_cg(FILE *fplog, gmx_int64_t step,
3899 matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3902 gmx_domdec_master_t *ma;
3903 int **tmp_ind = NULL, *tmp_nalloc = NULL;
3904 int i, icg, j, k, k0, k1, d, npbcdim;
3906 rvec box_size, cg_cm;
3908 real nrcg, inv_ncg, pos_d;
3910 gmx_bool bUnbounded, bScrew;
3914 if (tmp_ind == NULL)
3916 snew(tmp_nalloc, dd->nnodes);
3917 snew(tmp_ind, dd->nnodes);
3918 for (i = 0; i < dd->nnodes; i++)
3920 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3921 snew(tmp_ind[i], tmp_nalloc[i]);
3925 /* Clear the count */
3926 for (i = 0; i < dd->nnodes; i++)
3932 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3934 cgindex = cgs->index;
3936 /* Compute the center of geometry for all charge groups */
3937 for (icg = 0; icg < cgs->nr; icg++)
3940 k1 = cgindex[icg+1];
3944 copy_rvec(pos[k0], cg_cm);
3951 for (k = k0; (k < k1); k++)
3953 rvec_inc(cg_cm, pos[k]);
3955 for (d = 0; (d < DIM); d++)
3957 cg_cm[d] *= inv_ncg;
3960 /* Put the charge group in the box and determine the cell index */
3961 for (d = DIM-1; d >= 0; d--)
3964 if (d < dd->npbcdim)
3966 bScrew = (dd->bScrewPBC && d == XX);
3967 if (tric_dir[d] && dd->nc[d] > 1)
3969 /* Use triclinic coordintates for this dimension */
3970 for (j = d+1; j < DIM; j++)
3972 pos_d += cg_cm[j]*tcm[j][d];
3975 while (pos_d >= box[d][d])
3978 rvec_dec(cg_cm, box[d]);
3981 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3982 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3984 for (k = k0; (k < k1); k++)
3986 rvec_dec(pos[k], box[d]);
3989 pos[k][YY] = box[YY][YY] - pos[k][YY];
3990 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3997 rvec_inc(cg_cm, box[d]);
4000 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
4001 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
4003 for (k = k0; (k < k1); k++)
4005 rvec_inc(pos[k], box[d]);
4008 pos[k][YY] = box[YY][YY] - pos[k][YY];
4009 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4014 /* This could be done more efficiently */
4016 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
4021 i = dd_index(dd->nc, ind);
4022 if (ma->ncg[i] == tmp_nalloc[i])
4024 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
4025 srenew(tmp_ind[i], tmp_nalloc[i]);
4027 tmp_ind[i][ma->ncg[i]] = icg;
4029 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
4033 for (i = 0; i < dd->nnodes; i++)
4036 for (k = 0; k < ma->ncg[i]; k++)
4038 ma->cg[k1++] = tmp_ind[i][k];
4041 ma->index[dd->nnodes] = k1;
4043 for (i = 0; i < dd->nnodes; i++)
4053 fprintf(fplog, "Charge group distribution at step %s:",
4054 gmx_step_str(step, buf));
4055 for (i = 0; i < dd->nnodes; i++)
4057 fprintf(fplog, " %d", ma->ncg[i]);
4059 fprintf(fplog, "\n");
4063 static void get_cg_distribution(FILE *fplog, gmx_int64_t step, gmx_domdec_t *dd,
4064 t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
4067 gmx_domdec_master_t *ma = NULL;
4070 int *ibuf, buf2[2] = { 0, 0 };
4071 gmx_bool bMaster = DDMASTER(dd);
4079 check_screw_box(box);
4082 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbMASTER, npulse);
4084 distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
4085 for (i = 0; i < dd->nnodes; i++)
4087 ma->ibuf[2*i] = ma->ncg[i];
4088 ma->ibuf[2*i+1] = ma->nat[i];
4096 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
4098 dd->ncg_home = buf2[0];
4099 dd->nat_home = buf2[1];
4100 dd->ncg_tot = dd->ncg_home;
4101 dd->nat_tot = dd->nat_home;
4102 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
4104 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
4105 srenew(dd->index_gl, dd->cg_nalloc);
4106 srenew(dd->cgindex, dd->cg_nalloc+1);
4110 for (i = 0; i < dd->nnodes; i++)
4112 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
4113 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
4118 DDMASTER(dd) ? ma->ibuf : NULL,
4119 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
4120 DDMASTER(dd) ? ma->cg : NULL,
4121 dd->ncg_home*sizeof(int), dd->index_gl);
4123 /* Determine the home charge group sizes */
4125 for (i = 0; i < dd->ncg_home; i++)
4127 cg_gl = dd->index_gl[i];
4129 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
4134 fprintf(debug, "Home charge groups:\n");
4135 for (i = 0; i < dd->ncg_home; i++)
4137 fprintf(debug, " %d", dd->index_gl[i]);
4140 fprintf(debug, "\n");
4143 fprintf(debug, "\n");
4147 static int compact_and_copy_vec_at(int ncg, int *move,
4150 rvec *src, gmx_domdec_comm_t *comm,
4153 int m, icg, i, i0, i1, nrcg;
4159 for (m = 0; m < DIM*2; m++)
4165 for (icg = 0; icg < ncg; icg++)
4167 i1 = cgindex[icg+1];
4173 /* Compact the home array in place */
4174 for (i = i0; i < i1; i++)
4176 copy_rvec(src[i], src[home_pos++]);
4182 /* Copy to the communication buffer */
4184 pos_vec[m] += 1 + vec*nrcg;
4185 for (i = i0; i < i1; i++)
4187 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
4189 pos_vec[m] += (nvec - vec - 1)*nrcg;
4193 home_pos += i1 - i0;
4201 static int compact_and_copy_vec_cg(int ncg, int *move,
4203 int nvec, rvec *src, gmx_domdec_comm_t *comm,
4206 int m, icg, i0, i1, nrcg;
4212 for (m = 0; m < DIM*2; m++)
4218 for (icg = 0; icg < ncg; icg++)
4220 i1 = cgindex[icg+1];
4226 /* Compact the home array in place */
4227 copy_rvec(src[icg], src[home_pos++]);
4233 /* Copy to the communication buffer */
4234 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
4235 pos_vec[m] += 1 + nrcg*nvec;
4247 static int compact_ind(int ncg, int *move,
4248 int *index_gl, int *cgindex,
4250 gmx_ga2la_t ga2la, char *bLocalCG,
4253 int cg, nat, a0, a1, a, a_gl;
4258 for (cg = 0; cg < ncg; cg++)
4264 /* Compact the home arrays in place.
4265 * Anything that can be done here avoids access to global arrays.
4267 cgindex[home_pos] = nat;
4268 for (a = a0; a < a1; a++)
4271 gatindex[nat] = a_gl;
4272 /* The cell number stays 0, so we don't need to set it */
4273 ga2la_change_la(ga2la, a_gl, nat);
4276 index_gl[home_pos] = index_gl[cg];
4277 cginfo[home_pos] = cginfo[cg];
4278 /* The charge group remains local, so bLocalCG does not change */
4283 /* Clear the global indices */
4284 for (a = a0; a < a1; a++)
4286 ga2la_del(ga2la, gatindex[a]);
4290 bLocalCG[index_gl[cg]] = FALSE;
4294 cgindex[home_pos] = nat;
4299 static void clear_and_mark_ind(int ncg, int *move,
4300 int *index_gl, int *cgindex, int *gatindex,
4301 gmx_ga2la_t ga2la, char *bLocalCG,
4306 for (cg = 0; cg < ncg; cg++)
4312 /* Clear the global indices */
4313 for (a = a0; a < a1; a++)
4315 ga2la_del(ga2la, gatindex[a]);
4319 bLocalCG[index_gl[cg]] = FALSE;
4321 /* Signal that this cg has moved using the ns cell index.
4322 * Here we set it to -1. fill_grid will change it
4323 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4325 cell_index[cg] = -1;
4330 static void print_cg_move(FILE *fplog,
4332 gmx_int64_t step, int cg, int dim, int dir,
4333 gmx_bool bHaveLimitdAndCMOld, real limitd,
4334 rvec cm_old, rvec cm_new, real pos_d)
4336 gmx_domdec_comm_t *comm;
4341 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4342 if (bHaveLimitdAndCMOld)
4344 fprintf(fplog, "The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4345 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4349 fprintf(fplog, "The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4350 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4352 fprintf(fplog, "distance out of cell %f\n",
4353 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4354 if (bHaveLimitdAndCMOld)
4356 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4357 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4359 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4360 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4361 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4363 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4364 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4366 comm->cell_x0[dim], comm->cell_x1[dim]);
4369 static void cg_move_error(FILE *fplog,
4371 gmx_int64_t step, int cg, int dim, int dir,
4372 gmx_bool bHaveLimitdAndCMOld, real limitd,
4373 rvec cm_old, rvec cm_new, real pos_d)
4377 print_cg_move(fplog, dd, step, cg, dim, dir,
4378 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4380 print_cg_move(stderr, dd, step, cg, dim, dir,
4381 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4383 "A charge group moved too far between two domain decomposition steps\n"
4384 "This usually means that your system is not well equilibrated");
4387 static void rotate_state_atom(t_state *state, int a)
4391 for (est = 0; est < estNR; est++)
4393 if (EST_DISTR(est) && (state->flags & (1<<est)))
4398 /* Rotate the complete state; for a rectangular box only */
4399 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4400 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4403 state->v[a][YY] = -state->v[a][YY];
4404 state->v[a][ZZ] = -state->v[a][ZZ];
4407 state->sd_X[a][YY] = -state->sd_X[a][YY];
4408 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4411 state->cg_p[a][YY] = -state->cg_p[a][YY];
4412 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4414 case estDISRE_INITF:
4415 case estDISRE_RM3TAV:
4416 case estORIRE_INITF:
4418 /* These are distances, so not affected by rotation */
4421 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4427 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4429 if (natoms > comm->moved_nalloc)
4431 /* Contents should be preserved here */
4432 comm->moved_nalloc = over_alloc_dd(natoms);
4433 srenew(comm->moved, comm->moved_nalloc);
4439 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
4442 ivec tric_dir, matrix tcm,
4443 rvec cell_x0, rvec cell_x1,
4444 rvec limitd, rvec limit0, rvec limit1,
4446 int cg_start, int cg_end,
4451 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4452 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4456 real inv_ncg, pos_d;
4459 npbcdim = dd->npbcdim;
4461 for (cg = cg_start; cg < cg_end; cg++)
4468 copy_rvec(state->x[k0], cm_new);
4475 for (k = k0; (k < k1); k++)
4477 rvec_inc(cm_new, state->x[k]);
4479 for (d = 0; (d < DIM); d++)
4481 cm_new[d] = inv_ncg*cm_new[d];
4486 /* Do pbc and check DD cell boundary crossings */
4487 for (d = DIM-1; d >= 0; d--)
4491 bScrew = (dd->bScrewPBC && d == XX);
4492 /* Determine the location of this cg in lattice coordinates */
4496 for (d2 = d+1; d2 < DIM; d2++)
4498 pos_d += cm_new[d2]*tcm[d2][d];
4501 /* Put the charge group in the triclinic unit-cell */
4502 if (pos_d >= cell_x1[d])
4504 if (pos_d >= limit1[d])
4506 cg_move_error(fplog, dd, step, cg, d, 1, TRUE, limitd[d],
4507 cg_cm[cg], cm_new, pos_d);
4510 if (dd->ci[d] == dd->nc[d] - 1)
4512 rvec_dec(cm_new, state->box[d]);
4515 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4516 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4518 for (k = k0; (k < k1); k++)
4520 rvec_dec(state->x[k], state->box[d]);
4523 rotate_state_atom(state, k);
4528 else if (pos_d < cell_x0[d])
4530 if (pos_d < limit0[d])
4532 cg_move_error(fplog, dd, step, cg, d, -1, TRUE, limitd[d],
4533 cg_cm[cg], cm_new, pos_d);
4538 rvec_inc(cm_new, state->box[d]);
4541 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4542 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4544 for (k = k0; (k < k1); k++)
4546 rvec_inc(state->x[k], state->box[d]);
4549 rotate_state_atom(state, k);
4555 else if (d < npbcdim)
4557 /* Put the charge group in the rectangular unit-cell */
4558 while (cm_new[d] >= state->box[d][d])
4560 rvec_dec(cm_new, state->box[d]);
4561 for (k = k0; (k < k1); k++)
4563 rvec_dec(state->x[k], state->box[d]);
4566 while (cm_new[d] < 0)
4568 rvec_inc(cm_new, state->box[d]);
4569 for (k = k0; (k < k1); k++)
4571 rvec_inc(state->x[k], state->box[d]);
4577 copy_rvec(cm_new, cg_cm[cg]);
4579 /* Determine where this cg should go */
4582 for (d = 0; d < dd->ndim; d++)
4587 flag |= DD_FLAG_FW(d);
4593 else if (dev[dim] == -1)
4595 flag |= DD_FLAG_BW(d);
4598 if (dd->nc[dim] > 2)
4609 /* Temporarily store the flag in move */
4610 move[cg] = mc + flag;
4614 static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
4615 gmx_domdec_t *dd, ivec tric_dir,
4616 t_state *state, rvec **f,
4625 int ncg[DIM*2], nat[DIM*2];
4626 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4627 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4628 int sbuf[2], rbuf[2];
4629 int home_pos_cg, home_pos_at, buf_pos;
4631 gmx_bool bV = FALSE, bSDX = FALSE, bCGP = FALSE;
4634 real inv_ncg, pos_d;
4636 rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
4638 cginfo_mb_t *cginfo_mb;
4639 gmx_domdec_comm_t *comm;
4641 int nthread, thread;
4645 check_screw_box(state->box);
4649 if (fr->cutoff_scheme == ecutsGROUP)
4654 for (i = 0; i < estNR; i++)
4660 case estX: /* Always present */ break;
4661 case estV: bV = (state->flags & (1<<i)); break;
4662 case estSDX: bSDX = (state->flags & (1<<i)); break;
4663 case estCGP: bCGP = (state->flags & (1<<i)); break;
4666 case estDISRE_INITF:
4667 case estDISRE_RM3TAV:
4668 case estORIRE_INITF:
4670 /* No processing required */
4673 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4678 if (dd->ncg_tot > comm->nalloc_int)
4680 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4681 srenew(comm->buf_int, comm->nalloc_int);
4683 move = comm->buf_int;
4685 /* Clear the count */
4686 for (c = 0; c < dd->ndim*2; c++)
4692 npbcdim = dd->npbcdim;
4694 for (d = 0; (d < DIM); d++)
4696 limitd[d] = dd->comm->cellsize_min[d];
4697 if (d >= npbcdim && dd->ci[d] == 0)
4699 cell_x0[d] = -GMX_FLOAT_MAX;
4703 cell_x0[d] = comm->cell_x0[d];
4705 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4707 cell_x1[d] = GMX_FLOAT_MAX;
4711 cell_x1[d] = comm->cell_x1[d];
4715 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4716 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4720 /* We check after communication if a charge group moved
4721 * more than one cell. Set the pre-comm check limit to float_max.
4723 limit0[d] = -GMX_FLOAT_MAX;
4724 limit1[d] = GMX_FLOAT_MAX;
4728 make_tric_corr_matrix(npbcdim, state->box, tcm);
4730 cgindex = dd->cgindex;
4732 nthread = gmx_omp_nthreads_get(emntDomdec);
4734 /* Compute the center of geometry for all home charge groups
4735 * and put them in the box and determine where they should go.
4737 #pragma omp parallel for num_threads(nthread) schedule(static)
4738 for (thread = 0; thread < nthread; thread++)
4740 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4741 cell_x0, cell_x1, limitd, limit0, limit1,
4743 ( thread *dd->ncg_home)/nthread,
4744 ((thread+1)*dd->ncg_home)/nthread,
4745 fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
4749 for (cg = 0; cg < dd->ncg_home; cg++)
4754 flag = mc & ~DD_FLAG_NRCG;
4755 mc = mc & DD_FLAG_NRCG;
4758 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4760 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4761 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4763 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4764 /* We store the cg size in the lower 16 bits
4765 * and the place where the charge group should go
4766 * in the next 6 bits. This saves some communication volume.
4768 nrcg = cgindex[cg+1] - cgindex[cg];
4769 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4775 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4776 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4779 for (i = 0; i < dd->ndim*2; i++)
4781 *ncg_moved += ncg[i];
4798 /* Make sure the communication buffers are large enough */
4799 for (mc = 0; mc < dd->ndim*2; mc++)
4801 nvr = ncg[mc] + nat[mc]*nvec;
4802 if (nvr > comm->cgcm_state_nalloc[mc])
4804 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4805 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4809 switch (fr->cutoff_scheme)
4812 /* Recalculating cg_cm might be cheaper than communicating,
4813 * but that could give rise to rounding issues.
4816 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4817 nvec, cg_cm, comm, bCompact);
4820 /* Without charge groups we send the moved atom coordinates
4821 * over twice. This is so the code below can be used without
4822 * many conditionals for both for with and without charge groups.
4825 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4826 nvec, state->x, comm, FALSE);
4829 home_pos_cg -= *ncg_moved;
4833 gmx_incons("unimplemented");
4839 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4840 nvec, vec++, state->x, comm, bCompact);
4843 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4844 nvec, vec++, state->v, comm, bCompact);
4848 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4849 nvec, vec++, state->sd_X, comm, bCompact);
4853 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4854 nvec, vec++, state->cg_p, comm, bCompact);
4859 compact_ind(dd->ncg_home, move,
4860 dd->index_gl, dd->cgindex, dd->gatindex,
4861 dd->ga2la, comm->bLocalCG,
4866 if (fr->cutoff_scheme == ecutsVERLET)
4868 moved = get_moved(comm, dd->ncg_home);
4870 for (k = 0; k < dd->ncg_home; k++)
4877 moved = fr->ns.grid->cell_index;
4880 clear_and_mark_ind(dd->ncg_home, move,
4881 dd->index_gl, dd->cgindex, dd->gatindex,
4882 dd->ga2la, comm->bLocalCG,
4886 cginfo_mb = fr->cginfo_mb;
4888 *ncg_stay_home = home_pos_cg;
4889 for (d = 0; d < dd->ndim; d++)
4895 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4898 /* Communicate the cg and atom counts */
4903 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4904 d, dir, sbuf[0], sbuf[1]);
4906 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4908 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4910 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4911 srenew(comm->buf_int, comm->nalloc_int);
4914 /* Communicate the charge group indices, sizes and flags */
4915 dd_sendrecv_int(dd, d, dir,
4916 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4917 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4919 nvs = ncg[cdd] + nat[cdd]*nvec;
4920 i = rbuf[0] + rbuf[1] *nvec;
4921 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4923 /* Communicate cgcm and state */
4924 dd_sendrecv_rvec(dd, d, dir,
4925 comm->cgcm_state[cdd], nvs,
4926 comm->vbuf.v+nvr, i);
4927 ncg_recv += rbuf[0];
4928 nat_recv += rbuf[1];
4932 /* Process the received charge groups */
4934 for (cg = 0; cg < ncg_recv; cg++)
4936 flag = comm->buf_int[cg*DD_CGIBS+1];
4938 if (dim >= npbcdim && dd->nc[dim] > 2)
4940 /* No pbc in this dim and more than one domain boundary.
4941 * We do a separate check if a charge group didn't move too far.
4943 if (((flag & DD_FLAG_FW(d)) &&
4944 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4945 ((flag & DD_FLAG_BW(d)) &&
4946 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4948 cg_move_error(fplog, dd, step, cg, dim,
4949 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4951 comm->vbuf.v[buf_pos],
4952 comm->vbuf.v[buf_pos],
4953 comm->vbuf.v[buf_pos][dim]);
4960 /* Check which direction this cg should go */
4961 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4965 /* The cell boundaries for dimension d2 are not equal
4966 * for each cell row of the lower dimension(s),
4967 * therefore we might need to redetermine where
4968 * this cg should go.
4971 /* If this cg crosses the box boundary in dimension d2
4972 * we can use the communicated flag, so we do not
4973 * have to worry about pbc.
4975 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4976 (flag & DD_FLAG_FW(d2))) ||
4977 (dd->ci[dim2] == 0 &&
4978 (flag & DD_FLAG_BW(d2)))))
4980 /* Clear the two flags for this dimension */
4981 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4982 /* Determine the location of this cg
4983 * in lattice coordinates
4985 pos_d = comm->vbuf.v[buf_pos][dim2];
4988 for (d3 = dim2+1; d3 < DIM; d3++)
4991 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
4994 /* Check of we are not at the box edge.
4995 * pbc is only handled in the first step above,
4996 * but this check could move over pbc while
4997 * the first step did not due to different rounding.
4999 if (pos_d >= cell_x1[dim2] &&
5000 dd->ci[dim2] != dd->nc[dim2]-1)
5002 flag |= DD_FLAG_FW(d2);
5004 else if (pos_d < cell_x0[dim2] &&
5007 flag |= DD_FLAG_BW(d2);
5009 comm->buf_int[cg*DD_CGIBS+1] = flag;
5012 /* Set to which neighboring cell this cg should go */
5013 if (flag & DD_FLAG_FW(d2))
5017 else if (flag & DD_FLAG_BW(d2))
5019 if (dd->nc[dd->dim[d2]] > 2)
5031 nrcg = flag & DD_FLAG_NRCG;
5034 if (home_pos_cg+1 > dd->cg_nalloc)
5036 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
5037 srenew(dd->index_gl, dd->cg_nalloc);
5038 srenew(dd->cgindex, dd->cg_nalloc+1);
5040 /* Set the global charge group index and size */
5041 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
5042 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
5043 /* Copy the state from the buffer */
5044 dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
5045 if (fr->cutoff_scheme == ecutsGROUP)
5048 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
5052 /* Set the cginfo */
5053 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
5054 dd->index_gl[home_pos_cg]);
5057 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
5060 if (home_pos_at+nrcg > state->nalloc)
5062 dd_realloc_state(state, f, home_pos_at+nrcg);
5064 for (i = 0; i < nrcg; i++)
5066 copy_rvec(comm->vbuf.v[buf_pos++],
5067 state->x[home_pos_at+i]);
5071 for (i = 0; i < nrcg; i++)
5073 copy_rvec(comm->vbuf.v[buf_pos++],
5074 state->v[home_pos_at+i]);
5079 for (i = 0; i < nrcg; i++)
5081 copy_rvec(comm->vbuf.v[buf_pos++],
5082 state->sd_X[home_pos_at+i]);
5087 for (i = 0; i < nrcg; i++)
5089 copy_rvec(comm->vbuf.v[buf_pos++],
5090 state->cg_p[home_pos_at+i]);
5094 home_pos_at += nrcg;
5098 /* Reallocate the buffers if necessary */
5099 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
5101 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
5102 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
5104 nvr = ncg[mc] + nat[mc]*nvec;
5105 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
5107 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
5108 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
5110 /* Copy from the receive to the send buffers */
5111 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
5112 comm->buf_int + cg*DD_CGIBS,
5113 DD_CGIBS*sizeof(int));
5114 memcpy(comm->cgcm_state[mc][nvr],
5115 comm->vbuf.v[buf_pos],
5116 (1+nrcg*nvec)*sizeof(rvec));
5117 buf_pos += 1 + nrcg*nvec;
5124 /* With sorting (!bCompact) the indices are now only partially up to date
5125 * and ncg_home and nat_home are not the real count, since there are
5126 * "holes" in the arrays for the charge groups that moved to neighbors.
5128 if (fr->cutoff_scheme == ecutsVERLET)
5130 moved = get_moved(comm, home_pos_cg);
5132 for (i = dd->ncg_home; i < home_pos_cg; i++)
5137 dd->ncg_home = home_pos_cg;
5138 dd->nat_home = home_pos_at;
5143 "Finished repartitioning: cgs moved out %d, new home %d\n",
5144 *ncg_moved, dd->ncg_home-*ncg_moved);
5149 void dd_cycles_add(gmx_domdec_t *dd, float cycles, int ddCycl)
5151 dd->comm->cycl[ddCycl] += cycles;
5152 dd->comm->cycl_n[ddCycl]++;
5153 if (cycles > dd->comm->cycl_max[ddCycl])
5155 dd->comm->cycl_max[ddCycl] = cycles;
5159 static double force_flop_count(t_nrnb *nrnb)
5166 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
5168 /* To get closer to the real timings, we half the count
5169 * for the normal loops and again half it for water loops.
5172 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5174 sum += nrnb->n[i]*0.25*cost_nrnb(i);
5178 sum += nrnb->n[i]*0.50*cost_nrnb(i);
5181 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
5184 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5186 sum += nrnb->n[i]*cost_nrnb(i);
5189 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
5191 sum += nrnb->n[i]*cost_nrnb(i);
5197 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
5199 if (dd->comm->eFlop)
5201 dd->comm->flop -= force_flop_count(nrnb);
5204 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
5206 if (dd->comm->eFlop)
5208 dd->comm->flop += force_flop_count(nrnb);
5213 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
5217 for (i = 0; i < ddCyclNr; i++)
5219 dd->comm->cycl[i] = 0;
5220 dd->comm->cycl_n[i] = 0;
5221 dd->comm->cycl_max[i] = 0;
5224 dd->comm->flop_n = 0;
5227 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
5229 gmx_domdec_comm_t *comm;
5230 gmx_domdec_load_t *load;
5231 gmx_domdec_root_t *root = NULL;
5232 int d, dim, cid, i, pos;
5233 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
5238 fprintf(debug, "get_load_distribution start\n");
5241 wallcycle_start(wcycle, ewcDDCOMMLOAD);
5245 bSepPME = (dd->pme_nodeid >= 0);
5247 for (d = dd->ndim-1; d >= 0; d--)
5250 /* Check if we participate in the communication in this dimension */
5251 if (d == dd->ndim-1 ||
5252 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
5254 load = &comm->load[d];
5257 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
5260 if (d == dd->ndim-1)
5262 sbuf[pos++] = dd_force_load(comm);
5263 sbuf[pos++] = sbuf[0];
5266 sbuf[pos++] = sbuf[0];
5267 sbuf[pos++] = cell_frac;
5270 sbuf[pos++] = comm->cell_f_max0[d];
5271 sbuf[pos++] = comm->cell_f_min1[d];
5276 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
5277 sbuf[pos++] = comm->cycl[ddCyclPME];
5282 sbuf[pos++] = comm->load[d+1].sum;
5283 sbuf[pos++] = comm->load[d+1].max;
5286 sbuf[pos++] = comm->load[d+1].sum_m;
5287 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
5288 sbuf[pos++] = comm->load[d+1].flags;
5291 sbuf[pos++] = comm->cell_f_max0[d];
5292 sbuf[pos++] = comm->cell_f_min1[d];
5297 sbuf[pos++] = comm->load[d+1].mdf;
5298 sbuf[pos++] = comm->load[d+1].pme;
5302 /* Communicate a row in DD direction d.
5303 * The communicators are setup such that the root always has rank 0.
5306 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
5307 load->load, load->nload*sizeof(float), MPI_BYTE,
5308 0, comm->mpi_comm_load[d]);
5310 if (dd->ci[dim] == dd->master_ci[dim])
5312 /* We are the root, process this row */
5313 if (comm->bDynLoadBal)
5315 root = comm->root[d];
5325 for (i = 0; i < dd->nc[dim]; i++)
5327 load->sum += load->load[pos++];
5328 load->max = max(load->max, load->load[pos]);
5334 /* This direction could not be load balanced properly,
5335 * therefore we need to use the maximum iso the average load.
5337 load->sum_m = max(load->sum_m, load->load[pos]);
5341 load->sum_m += load->load[pos];
5344 load->cvol_min = min(load->cvol_min, load->load[pos]);
5348 load->flags = (int)(load->load[pos++] + 0.5);
5352 root->cell_f_max0[i] = load->load[pos++];
5353 root->cell_f_min1[i] = load->load[pos++];
5358 load->mdf = max(load->mdf, load->load[pos]);
5360 load->pme = max(load->pme, load->load[pos]);
5364 if (comm->bDynLoadBal && root->bLimited)
5366 load->sum_m *= dd->nc[dim];
5367 load->flags |= (1<<d);
5375 comm->nload += dd_load_count(comm);
5376 comm->load_step += comm->cycl[ddCyclStep];
5377 comm->load_sum += comm->load[0].sum;
5378 comm->load_max += comm->load[0].max;
5379 if (comm->bDynLoadBal)
5381 for (d = 0; d < dd->ndim; d++)
5383 if (comm->load[0].flags & (1<<d))
5385 comm->load_lim[d]++;
5391 comm->load_mdf += comm->load[0].mdf;
5392 comm->load_pme += comm->load[0].pme;
5396 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5400 fprintf(debug, "get_load_distribution finished\n");
5404 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5406 /* Return the relative performance loss on the total run time
5407 * due to the force calculation load imbalance.
5409 if (dd->comm->nload > 0)
5412 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5413 (dd->comm->load_step*dd->nnodes);
5421 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5424 int npp, npme, nnodes, d, limp;
5425 float imbal, pme_f_ratio, lossf, lossp = 0;
5427 gmx_domdec_comm_t *comm;
5430 if (DDMASTER(dd) && comm->nload > 0)
5433 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5434 nnodes = npp + npme;
5435 imbal = comm->load_max*npp/comm->load_sum - 1;
5436 lossf = dd_force_imb_perf_loss(dd);
5437 sprintf(buf, " Average load imbalance: %.1f %%\n", imbal*100);
5438 fprintf(fplog, "%s", buf);
5439 fprintf(stderr, "\n");
5440 fprintf(stderr, "%s", buf);
5441 sprintf(buf, " Part of the total run time spent waiting due to load imbalance: %.1f %%\n", lossf*100);
5442 fprintf(fplog, "%s", buf);
5443 fprintf(stderr, "%s", buf);
5445 if (comm->bDynLoadBal)
5447 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5448 for (d = 0; d < dd->ndim; d++)
5450 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5451 sprintf(buf+strlen(buf), " %c %d %%", dim2char(dd->dim[d]), limp);
5457 sprintf(buf+strlen(buf), "\n");
5458 fprintf(fplog, "%s", buf);
5459 fprintf(stderr, "%s", buf);
5463 pme_f_ratio = comm->load_pme/comm->load_mdf;
5464 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5467 lossp *= (float)npme/(float)nnodes;
5471 lossp *= (float)npp/(float)nnodes;
5473 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pme_f_ratio);
5474 fprintf(fplog, "%s", buf);
5475 fprintf(stderr, "%s", buf);
5476 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossp)*100);
5477 fprintf(fplog, "%s", buf);
5478 fprintf(stderr, "%s", buf);
5480 fprintf(fplog, "\n");
5481 fprintf(stderr, "\n");
5483 if (lossf >= DD_PERF_LOSS_WARN)
5486 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5487 " in the domain decomposition.\n", lossf*100);
5488 if (!comm->bDynLoadBal)
5490 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5494 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5496 fprintf(fplog, "%s\n", buf);
5497 fprintf(stderr, "%s\n", buf);
5499 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS_WARN)
5502 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5503 " had %s work to do than the PP nodes.\n"
5504 " You might want to %s the number of PME nodes\n"
5505 " or %s the cut-off and the grid spacing.\n",
5507 (lossp < 0) ? "less" : "more",
5508 (lossp < 0) ? "decrease" : "increase",
5509 (lossp < 0) ? "decrease" : "increase");
5510 fprintf(fplog, "%s\n", buf);
5511 fprintf(stderr, "%s\n", buf);
5516 static float dd_vol_min(gmx_domdec_t *dd)
5518 return dd->comm->load[0].cvol_min*dd->nnodes;
5521 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5523 return dd->comm->load[0].flags;
5526 static float dd_f_imbal(gmx_domdec_t *dd)
5528 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5531 float dd_pme_f_ratio(gmx_domdec_t *dd)
5533 if (dd->comm->cycl_n[ddCyclPME] > 0)
5535 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5543 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
5548 flags = dd_load_flags(dd);
5552 "DD load balancing is limited by minimum cell size in dimension");
5553 for (d = 0; d < dd->ndim; d++)
5557 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5560 fprintf(fplog, "\n");
5562 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5563 if (dd->comm->bDynLoadBal)
5565 fprintf(fplog, " vol min/aver %5.3f%c",
5566 dd_vol_min(dd), flags ? '!' : ' ');
5568 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5569 if (dd->comm->cycl_n[ddCyclPME])
5571 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5573 fprintf(fplog, "\n\n");
5576 static void dd_print_load_verbose(gmx_domdec_t *dd)
5578 if (dd->comm->bDynLoadBal)
5580 fprintf(stderr, "vol %4.2f%c ",
5581 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5583 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5584 if (dd->comm->cycl_n[ddCyclPME])
5586 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5591 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5596 gmx_domdec_root_t *root;
5597 gmx_bool bPartOfGroup = FALSE;
5599 dim = dd->dim[dim_ind];
5600 copy_ivec(loc, loc_c);
5601 for (i = 0; i < dd->nc[dim]; i++)
5604 rank = dd_index(dd->nc, loc_c);
5605 if (rank == dd->rank)
5607 /* This process is part of the group */
5608 bPartOfGroup = TRUE;
5611 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5615 dd->comm->mpi_comm_load[dim_ind] = c_row;
5616 if (dd->comm->eDLB != edlbNO)
5618 if (dd->ci[dim] == dd->master_ci[dim])
5620 /* This is the root process of this row */
5621 snew(dd->comm->root[dim_ind], 1);
5622 root = dd->comm->root[dim_ind];
5623 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5624 snew(root->old_cell_f, dd->nc[dim]+1);
5625 snew(root->bCellMin, dd->nc[dim]);
5628 snew(root->cell_f_max0, dd->nc[dim]);
5629 snew(root->cell_f_min1, dd->nc[dim]);
5630 snew(root->bound_min, dd->nc[dim]);
5631 snew(root->bound_max, dd->nc[dim]);
5633 snew(root->buf_ncd, dd->nc[dim]);
5637 /* This is not a root process, we only need to receive cell_f */
5638 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5641 if (dd->ci[dim] == dd->master_ci[dim])
5643 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5649 void dd_setup_dlb_resource_sharing(t_commrec gmx_unused *cr,
5650 const gmx_hw_info_t gmx_unused *hwinfo,
5651 const gmx_hw_opt_t gmx_unused *hw_opt)
5654 int physicalnode_id_hash;
5657 MPI_Comm mpi_comm_pp_physicalnode;
5659 if (!(cr->duty & DUTY_PP) ||
5660 hw_opt->gpu_opt.ncuda_dev_use == 0)
5662 /* Only PP nodes (currently) use GPUs.
5663 * If we don't have GPUs, there are no resources to share.
5668 physicalnode_id_hash = gmx_physicalnode_id_hash();
5670 gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
5676 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5677 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5678 dd->rank, physicalnode_id_hash, gpu_id);
5680 /* Split the PP communicator over the physical nodes */
5681 /* TODO: See if we should store this (before), as it's also used for
5682 * for the nodecomm summution.
5684 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5685 &mpi_comm_pp_physicalnode);
5686 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5687 &dd->comm->mpi_comm_gpu_shared);
5688 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5689 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5693 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5696 /* Note that some ranks could share a GPU, while others don't */
5698 if (dd->comm->nrank_gpu_shared == 1)
5700 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5705 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
5708 int dim0, dim1, i, j;
5713 fprintf(debug, "Making load communicators\n");
5716 snew(dd->comm->load, dd->ndim);
5717 snew(dd->comm->mpi_comm_load, dd->ndim);
5720 make_load_communicator(dd, 0, loc);
5724 for (i = 0; i < dd->nc[dim0]; i++)
5727 make_load_communicator(dd, 1, loc);
5733 for (i = 0; i < dd->nc[dim0]; i++)
5737 for (j = 0; j < dd->nc[dim1]; j++)
5740 make_load_communicator(dd, 2, loc);
5747 fprintf(debug, "Finished making load communicators\n");
5752 void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
5755 int d, dim, i, j, m;
5758 ivec dd_zp[DD_MAXIZONE];
5759 gmx_domdec_zones_t *zones;
5760 gmx_domdec_ns_ranges_t *izone;
5762 for (d = 0; d < dd->ndim; d++)
5765 copy_ivec(dd->ci, tmp);
5766 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5767 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5768 copy_ivec(dd->ci, tmp);
5769 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5770 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5773 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5776 dd->neighbor[d][1]);
5782 fprintf(fplog, "\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5784 dd->nc[XX], dd->nc[YY], dd->nc[ZZ],
5785 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5792 for (i = 0; i < nzonep; i++)
5794 copy_ivec(dd_zp3[i], dd_zp[i]);
5800 for (i = 0; i < nzonep; i++)
5802 copy_ivec(dd_zp2[i], dd_zp[i]);
5808 for (i = 0; i < nzonep; i++)
5810 copy_ivec(dd_zp1[i], dd_zp[i]);
5814 gmx_fatal(FARGS, "Can only do 1, 2 or 3D domain decomposition");
5819 zones = &dd->comm->zones;
5821 for (i = 0; i < nzone; i++)
5824 clear_ivec(zones->shift[i]);
5825 for (d = 0; d < dd->ndim; d++)
5827 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5832 for (i = 0; i < nzone; i++)
5834 for (d = 0; d < DIM; d++)
5836 s[d] = dd->ci[d] - zones->shift[i][d];
5841 else if (s[d] >= dd->nc[d])
5847 zones->nizone = nzonep;
5848 for (i = 0; i < zones->nizone; i++)
5850 if (dd_zp[i][0] != i)
5852 gmx_fatal(FARGS, "Internal inconsistency in the dd grid setup");
5854 izone = &zones->izone[i];
5855 izone->j0 = dd_zp[i][1];
5856 izone->j1 = dd_zp[i][2];
5857 for (dim = 0; dim < DIM; dim++)
5859 if (dd->nc[dim] == 1)
5861 /* All shifts should be allowed */
5862 izone->shift0[dim] = -1;
5863 izone->shift1[dim] = 1;
5868 izone->shift0[d] = 0;
5869 izone->shift1[d] = 0;
5870 for(j=izone->j0; j<izone->j1; j++) {
5871 if (dd->shift[j][d] > dd->shift[i][d])
5872 izone->shift0[d] = -1;
5873 if (dd->shift[j][d] < dd->shift[i][d])
5874 izone->shift1[d] = 1;
5880 /* Assume the shift are not more than 1 cell */
5881 izone->shift0[dim] = 1;
5882 izone->shift1[dim] = -1;
5883 for (j = izone->j0; j < izone->j1; j++)
5885 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5886 if (shift_diff < izone->shift0[dim])
5888 izone->shift0[dim] = shift_diff;
5890 if (shift_diff > izone->shift1[dim])
5892 izone->shift1[dim] = shift_diff;
5899 if (dd->comm->eDLB != edlbNO)
5901 snew(dd->comm->root, dd->ndim);
5904 if (dd->comm->bRecordLoad)
5906 make_load_communicators(dd);
5910 static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reorder)
5913 gmx_domdec_comm_t *comm;
5924 if (comm->bCartesianPP)
5926 /* Set up cartesian communication for the particle-particle part */
5929 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5930 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5933 for (i = 0; i < DIM; i++)
5937 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5939 /* We overwrite the old communicator with the new cartesian one */
5940 cr->mpi_comm_mygroup = comm_cart;
5943 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5944 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5946 if (comm->bCartesianPP_PME)
5948 /* Since we want to use the original cartesian setup for sim,
5949 * and not the one after split, we need to make an index.
5951 snew(comm->ddindex2ddnodeid, dd->nnodes);
5952 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
5953 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5954 /* Get the rank of the DD master,
5955 * above we made sure that the master node is a PP node.
5965 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
5967 else if (comm->bCartesianPP)
5969 if (cr->npmenodes == 0)
5971 /* The PP communicator is also
5972 * the communicator for this simulation
5974 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5976 cr->nodeid = dd->rank;
5978 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
5980 /* We need to make an index to go from the coordinates
5981 * to the nodeid of this simulation.
5983 snew(comm->ddindex2simnodeid, dd->nnodes);
5984 snew(buf, dd->nnodes);
5985 if (cr->duty & DUTY_PP)
5987 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5989 /* Communicate the ddindex to simulation nodeid index */
5990 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
5991 cr->mpi_comm_mysim);
5994 /* Determine the master coordinates and rank.
5995 * The DD master should be the same node as the master of this sim.
5997 for (i = 0; i < dd->nnodes; i++)
5999 if (comm->ddindex2simnodeid[i] == 0)
6001 ddindex2xyz(dd->nc, i, dd->master_ci);
6002 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
6007 fprintf(debug, "The master rank is %d\n", dd->masterrank);
6012 /* No Cartesian communicators */
6013 /* We use the rank in dd->comm->all as DD index */
6014 ddindex2xyz(dd->nc, dd->rank, dd->ci);
6015 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
6017 clear_ivec(dd->master_ci);
6024 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6025 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6030 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6031 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6035 static void receive_ddindex2simnodeid(t_commrec *cr)
6039 gmx_domdec_comm_t *comm;
6046 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
6048 snew(comm->ddindex2simnodeid, dd->nnodes);
6049 snew(buf, dd->nnodes);
6050 if (cr->duty & DUTY_PP)
6052 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
6055 /* Communicate the ddindex to simulation nodeid index */
6056 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6057 cr->mpi_comm_mysim);
6064 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
6065 int ncg, int natoms)
6067 gmx_domdec_master_t *ma;
6072 snew(ma->ncg, dd->nnodes);
6073 snew(ma->index, dd->nnodes+1);
6075 snew(ma->nat, dd->nnodes);
6076 snew(ma->ibuf, dd->nnodes*2);
6077 snew(ma->cell_x, DIM);
6078 for (i = 0; i < DIM; i++)
6080 snew(ma->cell_x[i], dd->nc[i]+1);
6083 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
6089 snew(ma->vbuf, natoms);
6095 static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_node_order,
6096 int gmx_unused reorder)
6099 gmx_domdec_comm_t *comm;
6110 if (comm->bCartesianPP)
6112 for (i = 1; i < DIM; i++)
6114 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
6116 if (bDiv[YY] || bDiv[ZZ])
6118 comm->bCartesianPP_PME = TRUE;
6119 /* If we have 2D PME decomposition, which is always in x+y,
6120 * we stack the PME only nodes in z.
6121 * Otherwise we choose the direction that provides the thinnest slab
6122 * of PME only nodes as this will have the least effect
6123 * on the PP communication.
6124 * But for the PME communication the opposite might be better.
6126 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
6128 dd->nc[YY] > dd->nc[ZZ]))
6130 comm->cartpmedim = ZZ;
6134 comm->cartpmedim = YY;
6136 comm->ntot[comm->cartpmedim]
6137 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
6141 fprintf(fplog, "#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
6143 "Will not use a Cartesian communicator for PP <-> PME\n\n");
6148 if (comm->bCartesianPP_PME)
6152 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
6155 for (i = 0; i < DIM; i++)
6159 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
6162 MPI_Comm_rank(comm_cart, &rank);
6163 if (MASTERNODE(cr) && rank != 0)
6165 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
6168 /* With this assigment we loose the link to the original communicator
6169 * which will usually be MPI_COMM_WORLD, unless have multisim.
6171 cr->mpi_comm_mysim = comm_cart;
6172 cr->sim_nodeid = rank;
6174 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
6178 fprintf(fplog, "Cartesian nodeid %d, coordinates %d %d %d\n\n",
6179 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6182 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
6186 if (cr->npmenodes == 0 ||
6187 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
6189 cr->duty = DUTY_PME;
6192 /* Split the sim communicator into PP and PME only nodes */
6193 MPI_Comm_split(cr->mpi_comm_mysim,
6195 dd_index(comm->ntot, dd->ci),
6196 &cr->mpi_comm_mygroup);
6200 switch (dd_node_order)
6205 fprintf(fplog, "Order of the nodes: PP first, PME last\n");
6208 case ddnoINTERLEAVE:
6209 /* Interleave the PP-only and PME-only nodes,
6210 * as on clusters with dual-core machines this will double
6211 * the communication bandwidth of the PME processes
6212 * and thus speed up the PP <-> PME and inter PME communication.
6216 fprintf(fplog, "Interleaving PP and PME nodes\n");
6218 comm->pmenodes = dd_pmenodes(cr);
6223 gmx_fatal(FARGS, "Unknown dd_node_order=%d", dd_node_order);
6226 if (dd_simnode2pmenode(cr, cr->sim_nodeid) == -1)
6228 cr->duty = DUTY_PME;
6235 /* Split the sim communicator into PP and PME only nodes */
6236 MPI_Comm_split(cr->mpi_comm_mysim,
6239 &cr->mpi_comm_mygroup);
6240 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
6246 fprintf(fplog, "This is a %s only node\n\n",
6247 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
6251 void make_dd_communicators(FILE *fplog, t_commrec *cr, int dd_node_order)
6254 gmx_domdec_comm_t *comm;
6260 copy_ivec(dd->nc, comm->ntot);
6262 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
6263 comm->bCartesianPP_PME = FALSE;
6265 /* Reorder the nodes by default. This might change the MPI ranks.
6266 * Real reordering is only supported on very few architectures,
6267 * Blue Gene is one of them.
6269 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
6271 if (cr->npmenodes > 0)
6273 /* Split the communicator into a PP and PME part */
6274 split_communicator(fplog, cr, dd_node_order, CartReorder);
6275 if (comm->bCartesianPP_PME)
6277 /* We (possibly) reordered the nodes in split_communicator,
6278 * so it is no longer required in make_pp_communicator.
6280 CartReorder = FALSE;
6285 /* All nodes do PP and PME */
6287 /* We do not require separate communicators */
6288 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
6292 if (cr->duty & DUTY_PP)
6294 /* Copy or make a new PP communicator */
6295 make_pp_communicator(fplog, cr, CartReorder);
6299 receive_ddindex2simnodeid(cr);
6302 if (!(cr->duty & DUTY_PME))
6304 /* Set up the commnuication to our PME node */
6305 dd->pme_nodeid = dd_simnode2pmenode(cr, cr->sim_nodeid);
6306 dd->pme_receive_vir_ener = receive_vir_ener(cr);
6309 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
6310 dd->pme_nodeid, dd->pme_receive_vir_ener);
6315 dd->pme_nodeid = -1;
6320 dd->ma = init_gmx_domdec_master_t(dd,
6322 comm->cgs_gl.index[comm->cgs_gl.nr]);
6326 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6328 real *slb_frac, tot;
6333 if (nc > 1 && size_string != NULL)
6337 fprintf(fplog, "Using static load balancing for the %s direction\n",
6342 for (i = 0; i < nc; i++)
6345 sscanf(size_string, "%lf%n", &dbl, &n);
6348 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6357 fprintf(fplog, "Relative cell sizes:");
6359 for (i = 0; i < nc; i++)
6364 fprintf(fplog, " %5.3f", slb_frac[i]);
6369 fprintf(fplog, "\n");
6376 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
6379 gmx_mtop_ilistloop_t iloop;
6383 iloop = gmx_mtop_ilistloop_init(mtop);
6384 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6386 for (ftype = 0; ftype < F_NRE; ftype++)
6388 if ((interaction_function[ftype].flags & IF_BOND) &&
6391 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6399 static int dd_getenv(FILE *fplog, const char *env_var, int def)
6405 val = getenv(env_var);
6408 if (sscanf(val, "%d", &nst) <= 0)
6414 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6422 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6426 fprintf(stderr, "\n%s\n", warn_string);
6430 fprintf(fplog, "\n%s\n", warn_string);
6434 static void check_dd_restrictions(t_commrec *cr, gmx_domdec_t *dd,
6435 t_inputrec *ir, FILE *fplog)
6437 if (ir->ePBC == epbcSCREW &&
6438 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6440 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6443 if (ir->ns_type == ensSIMPLE)
6445 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
6448 if (ir->nstlist == 0)
6450 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6453 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6455 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6459 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6464 r = ddbox->box_size[XX];
6465 for (di = 0; di < dd->ndim; di++)
6468 /* Check using the initial average cell size */
6469 r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6475 static int check_dlb_support(FILE *fplog, t_commrec *cr,
6476 const char *dlb_opt, gmx_bool bRecordLoad,
6477 unsigned long Flags, t_inputrec *ir)
6485 case 'a': eDLB = edlbAUTO; break;
6486 case 'n': eDLB = edlbNO; break;
6487 case 'y': eDLB = edlbYES; break;
6488 default: gmx_incons("Unknown dlb_opt");
6491 if (Flags & MD_RERUN)
6496 if (!EI_DYNAMICS(ir->eI))
6498 if (eDLB == edlbYES)
6500 sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
6501 dd_warning(cr, fplog, buf);
6509 dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6514 if (Flags & MD_REPRODUCIBLE)
6521 dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
6525 dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6528 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", eDLB);
6536 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6541 if (getenv("GMX_DD_ORDER_ZYX") != NULL)
6543 /* Decomposition order z,y,x */
6546 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6548 for (dim = DIM-1; dim >= 0; dim--)
6550 if (dd->nc[dim] > 1)
6552 dd->dim[dd->ndim++] = dim;
6558 /* Decomposition order x,y,z */
6559 for (dim = 0; dim < DIM; dim++)
6561 if (dd->nc[dim] > 1)
6563 dd->dim[dd->ndim++] = dim;
6569 static gmx_domdec_comm_t *init_dd_comm()
6571 gmx_domdec_comm_t *comm;
6575 snew(comm->cggl_flag, DIM*2);
6576 snew(comm->cgcm_state, DIM*2);
6577 for (i = 0; i < DIM*2; i++)
6579 comm->cggl_flag_nalloc[i] = 0;
6580 comm->cgcm_state_nalloc[i] = 0;
6583 comm->nalloc_int = 0;
6584 comm->buf_int = NULL;
6586 vec_rvec_init(&comm->vbuf);
6588 comm->n_load_have = 0;
6589 comm->n_load_collect = 0;
6591 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6593 comm->sum_nat[i] = 0;
6597 comm->load_step = 0;
6600 clear_ivec(comm->load_lim);
6607 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
6608 unsigned long Flags,
6610 real comm_distance_min, real rconstr,
6611 const char *dlb_opt, real dlb_scale,
6612 const char *sizex, const char *sizey, const char *sizez,
6613 gmx_mtop_t *mtop, t_inputrec *ir,
6614 matrix box, rvec *x,
6616 int *npme_x, int *npme_y)
6619 gmx_domdec_comm_t *comm;
6622 real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
6629 "\nInitializing Domain Decomposition on %d nodes\n", cr->nnodes);
6634 dd->comm = init_dd_comm();
6636 snew(comm->cggl_flag, DIM*2);
6637 snew(comm->cgcm_state, DIM*2);
6639 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6640 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6642 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
6643 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
6644 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
6645 recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
6646 comm->nstSortCG = dd_getenv(fplog, "GMX_DD_NST_SORT_CHARGE_GROUPS", 1);
6647 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
6648 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
6649 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
6651 dd->pme_recv_f_alloc = 0;
6652 dd->pme_recv_f_buf = NULL;
6654 if (dd->bSendRecv2 && fplog)
6656 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6662 fprintf(fplog, "Will load balance based on FLOP count\n");
6664 if (comm->eFlop > 1)
6666 srand(1+cr->nodeid);
6668 comm->bRecordLoad = TRUE;
6672 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6676 /* Initialize to GPU share count to 0, might change later */
6677 comm->nrank_gpu_shared = 0;
6679 comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
6681 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6684 fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
6686 dd->bGridJump = comm->bDynLoadBal;
6687 comm->bPMELoadBalDLBLimits = FALSE;
6689 if (comm->nstSortCG)
6693 if (comm->nstSortCG == 1)
6695 fprintf(fplog, "Will sort the charge groups at every domain (re)decomposition\n");
6699 fprintf(fplog, "Will sort the charge groups every %d steps\n",
6703 snew(comm->sort, 1);
6709 fprintf(fplog, "Will not sort the charge groups\n");
6713 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6715 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6716 if (comm->bInterCGBondeds)
6718 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6722 comm->bInterCGMultiBody = FALSE;
6725 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6726 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6728 if (ir->rlistlong == 0)
6730 /* Set the cut-off to some very large value,
6731 * so we don't need if statements everywhere in the code.
6732 * We use sqrt, since the cut-off is squared in some places.
6734 comm->cutoff = GMX_CUTOFF_INF;
6738 comm->cutoff = ir->rlistlong;
6740 comm->cutoff_mbody = 0;
6742 comm->cellsize_limit = 0;
6743 comm->bBondComm = FALSE;
6745 if (comm->bInterCGBondeds)
6747 if (comm_distance_min > 0)
6749 comm->cutoff_mbody = comm_distance_min;
6750 if (Flags & MD_DDBONDCOMM)
6752 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6756 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6758 r_bonded_limit = comm->cutoff_mbody;
6760 else if (ir->bPeriodicMols)
6762 /* Can not easily determine the required cut-off */
6763 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6764 comm->cutoff_mbody = comm->cutoff/2;
6765 r_bonded_limit = comm->cutoff_mbody;
6771 dd_bonded_cg_distance(fplog, mtop, ir, x, box,
6772 Flags & MD_DDBONDCHECK, &r_2b, &r_mb);
6774 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6775 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6777 /* We use an initial margin of 10% for the minimum cell size,
6778 * except when we are just below the non-bonded cut-off.
6780 if (Flags & MD_DDBONDCOMM)
6782 if (max(r_2b, r_mb) > comm->cutoff)
6784 r_bonded = max(r_2b, r_mb);
6785 r_bonded_limit = 1.1*r_bonded;
6786 comm->bBondComm = TRUE;
6791 r_bonded_limit = min(1.1*r_bonded, comm->cutoff);
6793 /* We determine cutoff_mbody later */
6797 /* No special bonded communication,
6798 * simply increase the DD cut-off.
6800 r_bonded_limit = 1.1*max(r_2b, r_mb);
6801 comm->cutoff_mbody = r_bonded_limit;
6802 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6805 comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit);
6809 "Minimum cell size due to bonded interactions: %.3f nm\n",
6810 comm->cellsize_limit);
6814 if (dd->bInterCGcons && rconstr <= 0)
6816 /* There is a cell size limit due to the constraints (P-LINCS) */
6817 rconstr = constr_r_max(fplog, mtop, ir);
6821 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6823 if (rconstr > comm->cellsize_limit)
6825 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6829 else if (rconstr > 0 && fplog)
6831 /* Here we do not check for dd->bInterCGcons,
6832 * because one can also set a cell size limit for virtual sites only
6833 * and at this point we don't know yet if there are intercg v-sites.
6836 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6839 comm->cellsize_limit = max(comm->cellsize_limit, rconstr);
6841 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6845 copy_ivec(nc, dd->nc);
6846 set_dd_dim(fplog, dd);
6847 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, x, ddbox);
6849 if (cr->npmenodes == -1)
6853 acs = average_cellsize_min(dd, ddbox);
6854 if (acs < comm->cellsize_limit)
6858 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6860 gmx_fatal_collective(FARGS, cr, NULL,
6861 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6862 acs, comm->cellsize_limit);
6867 set_ddbox_cr(cr, NULL, ir, box, &comm->cgs_gl, x, ddbox);
6869 /* We need to choose the optimal DD grid and possibly PME nodes */
6870 limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6871 comm->eDLB != edlbNO, dlb_scale,
6872 comm->cellsize_limit, comm->cutoff,
6873 comm->bInterCGBondeds);
6875 if (dd->nc[XX] == 0)
6877 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6878 sprintf(buf, "Change the number of nodes or mdrun option %s%s%s",
6879 !bC ? "-rdd" : "-rcon",
6880 comm->eDLB != edlbNO ? " or -dds" : "",
6881 bC ? " or your LINCS settings" : "");
6883 gmx_fatal_collective(FARGS, cr, NULL,
6884 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6886 "Look in the log file for details on the domain decomposition",
6887 cr->nnodes-cr->npmenodes, limit, buf);
6889 set_dd_dim(fplog, dd);
6895 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6896 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6899 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6900 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6902 gmx_fatal_collective(FARGS, cr, NULL,
6903 "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6904 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6906 if (cr->npmenodes > dd->nnodes)
6908 gmx_fatal_collective(FARGS, cr, NULL,
6909 "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6911 if (cr->npmenodes > 0)
6913 comm->npmenodes = cr->npmenodes;
6917 comm->npmenodes = dd->nnodes;
6920 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
6922 /* The following choices should match those
6923 * in comm_cost_est in domdec_setup.c.
6924 * Note that here the checks have to take into account
6925 * that the decomposition might occur in a different order than xyz
6926 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6927 * in which case they will not match those in comm_cost_est,
6928 * but since that is mainly for testing purposes that's fine.
6930 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6931 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6932 getenv("GMX_PMEONEDD") == NULL)
6934 comm->npmedecompdim = 2;
6935 comm->npmenodes_x = dd->nc[XX];
6936 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6940 /* In case nc is 1 in both x and y we could still choose to
6941 * decompose pme in y instead of x, but we use x for simplicity.
6943 comm->npmedecompdim = 1;
6944 if (dd->dim[0] == YY)
6946 comm->npmenodes_x = 1;
6947 comm->npmenodes_y = comm->npmenodes;
6951 comm->npmenodes_x = comm->npmenodes;
6952 comm->npmenodes_y = 1;
6957 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6958 comm->npmenodes_x, comm->npmenodes_y, 1);
6963 comm->npmedecompdim = 0;
6964 comm->npmenodes_x = 0;
6965 comm->npmenodes_y = 0;
6968 /* Technically we don't need both of these,
6969 * but it simplifies code not having to recalculate it.
6971 *npme_x = comm->npmenodes_x;
6972 *npme_y = comm->npmenodes_y;
6974 snew(comm->slb_frac, DIM);
6975 if (comm->eDLB == edlbNO)
6977 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
6978 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
6979 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], sizez);
6982 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6984 if (comm->bBondComm || comm->eDLB != edlbNO)
6986 /* Set the bonded communication distance to halfway
6987 * the minimum and the maximum,
6988 * since the extra communication cost is nearly zero.
6990 acs = average_cellsize_min(dd, ddbox);
6991 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6992 if (comm->eDLB != edlbNO)
6994 /* Check if this does not limit the scaling */
6995 comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs);
6997 if (!comm->bBondComm)
6999 /* Without bBondComm do not go beyond the n.b. cut-off */
7000 comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff);
7001 if (comm->cellsize_limit >= comm->cutoff)
7003 /* We don't loose a lot of efficieny
7004 * when increasing it to the n.b. cut-off.
7005 * It can even be slightly faster, because we need
7006 * less checks for the communication setup.
7008 comm->cutoff_mbody = comm->cutoff;
7011 /* Check if we did not end up below our original limit */
7012 comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit);
7014 if (comm->cutoff_mbody > comm->cellsize_limit)
7016 comm->cellsize_limit = comm->cutoff_mbody;
7019 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
7024 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
7025 "cellsize limit %f\n",
7026 comm->bBondComm, comm->cellsize_limit);
7031 check_dd_restrictions(cr, dd, ir, fplog);
7034 comm->partition_step = INT_MIN;
7037 clear_dd_cycle_counts(dd);
7042 static void set_dlb_limits(gmx_domdec_t *dd)
7047 for (d = 0; d < dd->ndim; d++)
7049 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
7050 dd->comm->cellsize_min[dd->dim[d]] =
7051 dd->comm->cellsize_min_dlb[dd->dim[d]];
7056 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
7059 gmx_domdec_comm_t *comm;
7069 fprintf(fplog, "At step %s the performance loss due to force load imbalance is %.1f %%\n", gmx_step_str(step, buf), dd_force_imb_perf_loss(dd)*100);
7072 cellsize_min = comm->cellsize_min[dd->dim[0]];
7073 for (d = 1; d < dd->ndim; d++)
7075 cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
7078 if (cellsize_min < comm->cellsize_limit*1.05)
7080 dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
7082 /* Change DLB from "auto" to "no". */
7083 comm->eDLB = edlbNO;
7088 dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
7089 comm->bDynLoadBal = TRUE;
7090 dd->bGridJump = TRUE;
7094 /* We can set the required cell size info here,
7095 * so we do not need to communicate this.
7096 * The grid is completely uniform.
7098 for (d = 0; d < dd->ndim; d++)
7102 comm->load[d].sum_m = comm->load[d].sum;
7104 nc = dd->nc[dd->dim[d]];
7105 for (i = 0; i < nc; i++)
7107 comm->root[d]->cell_f[i] = i/(real)nc;
7110 comm->root[d]->cell_f_max0[i] = i /(real)nc;
7111 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
7114 comm->root[d]->cell_f[nc] = 1.0;
7119 static char *init_bLocalCG(gmx_mtop_t *mtop)
7124 ncg = ncg_mtop(mtop);
7125 snew(bLocalCG, ncg);
7126 for (cg = 0; cg < ncg; cg++)
7128 bLocalCG[cg] = FALSE;
7134 void dd_init_bondeds(FILE *fplog,
7135 gmx_domdec_t *dd, gmx_mtop_t *mtop,
7137 t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
7139 gmx_domdec_comm_t *comm;
7143 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
7147 if (comm->bBondComm)
7149 /* Communicate atoms beyond the cut-off for bonded interactions */
7152 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
7154 comm->bLocalCG = init_bLocalCG(mtop);
7158 /* Only communicate atoms based on cut-off */
7159 comm->cglink = NULL;
7160 comm->bLocalCG = NULL;
7164 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
7166 gmx_bool bDynLoadBal, real dlb_scale,
7169 gmx_domdec_comm_t *comm;
7184 fprintf(fplog, "The maximum number of communication pulses is:");
7185 for (d = 0; d < dd->ndim; d++)
7187 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
7189 fprintf(fplog, "\n");
7190 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
7191 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
7192 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
7193 for (d = 0; d < DIM; d++)
7197 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
7204 comm->cellsize_min_dlb[d]/
7205 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
7207 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
7210 fprintf(fplog, "\n");
7214 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
7215 fprintf(fplog, "The initial number of communication pulses is:");
7216 for (d = 0; d < dd->ndim; d++)
7218 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
7220 fprintf(fplog, "\n");
7221 fprintf(fplog, "The initial domain decomposition cell size is:");
7222 for (d = 0; d < DIM; d++)
7226 fprintf(fplog, " %c %.2f nm",
7227 dim2char(d), dd->comm->cellsize_min[d]);
7230 fprintf(fplog, "\n\n");
7233 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
7235 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
7236 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7237 "non-bonded interactions", "", comm->cutoff);
7241 limit = dd->comm->cellsize_limit;
7245 if (dynamic_dd_box(ddbox, ir))
7247 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
7249 limit = dd->comm->cellsize_min[XX];
7250 for (d = 1; d < DIM; d++)
7252 limit = min(limit, dd->comm->cellsize_min[d]);
7256 if (comm->bInterCGBondeds)
7258 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7259 "two-body bonded interactions", "(-rdd)",
7260 max(comm->cutoff, comm->cutoff_mbody));
7261 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7262 "multi-body bonded interactions", "(-rdd)",
7263 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit));
7267 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7268 "virtual site constructions", "(-rcon)", limit);
7270 if (dd->constraint_comm)
7272 sprintf(buf, "atoms separated by up to %d constraints",
7274 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7275 buf, "(-rcon)", limit);
7277 fprintf(fplog, "\n");
7283 static void set_cell_limits_dlb(gmx_domdec_t *dd,
7285 const t_inputrec *ir,
7286 const gmx_ddbox_t *ddbox)
7288 gmx_domdec_comm_t *comm;
7289 int d, dim, npulse, npulse_d_max, npulse_d;
7294 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7296 /* Determine the maximum number of comm. pulses in one dimension */
7298 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7300 /* Determine the maximum required number of grid pulses */
7301 if (comm->cellsize_limit >= comm->cutoff)
7303 /* Only a single pulse is required */
7306 else if (!bNoCutOff && comm->cellsize_limit > 0)
7308 /* We round down slightly here to avoid overhead due to the latency
7309 * of extra communication calls when the cut-off
7310 * would be only slightly longer than the cell size.
7311 * Later cellsize_limit is redetermined,
7312 * so we can not miss interactions due to this rounding.
7314 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7318 /* There is no cell size limit */
7319 npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7322 if (!bNoCutOff && npulse > 1)
7324 /* See if we can do with less pulses, based on dlb_scale */
7326 for (d = 0; d < dd->ndim; d++)
7329 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7330 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7331 npulse_d_max = max(npulse_d_max, npulse_d);
7333 npulse = min(npulse, npulse_d_max);
7336 /* This env var can override npulse */
7337 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
7344 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7345 for (d = 0; d < dd->ndim; d++)
7347 comm->cd[d].np_dlb = min(npulse, dd->nc[dd->dim[d]]-1);
7348 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7349 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7350 comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb);
7351 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7353 comm->bVacDLBNoLimit = FALSE;
7357 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7358 if (!comm->bVacDLBNoLimit)
7360 comm->cellsize_limit = max(comm->cellsize_limit,
7361 comm->cutoff/comm->maxpulse);
7363 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7364 /* Set the minimum cell size for each DD dimension */
7365 for (d = 0; d < dd->ndim; d++)
7367 if (comm->bVacDLBNoLimit ||
7368 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7370 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7374 comm->cellsize_min_dlb[dd->dim[d]] =
7375 comm->cutoff/comm->cd[d].np_dlb;
7378 if (comm->cutoff_mbody <= 0)
7380 comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit);
7382 if (comm->bDynLoadBal)
7388 gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd, int ePBC)
7390 /* If each molecule is a single charge group
7391 * or we use domain decomposition for each periodic dimension,
7392 * we do not need to take pbc into account for the bonded interactions.
7394 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7397 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7400 void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7401 t_inputrec *ir, gmx_ddbox_t *ddbox)
7403 gmx_domdec_comm_t *comm;
7409 /* Initialize the thread data.
7410 * This can not be done in init_domain_decomposition,
7411 * as the numbers of threads is determined later.
7413 comm->nth = gmx_omp_nthreads_get(emntDomdec);
7416 snew(comm->dth, comm->nth);
7419 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
7421 init_ddpme(dd, &comm->ddpme[0], 0);
7422 if (comm->npmedecompdim >= 2)
7424 init_ddpme(dd, &comm->ddpme[1], 1);
7429 comm->npmenodes = 0;
7430 if (dd->pme_nodeid >= 0)
7432 gmx_fatal_collective(FARGS, NULL, dd,
7433 "Can not have separate PME nodes without PME electrostatics");
7439 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7441 if (comm->eDLB != edlbNO)
7443 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7446 print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
7447 if (comm->eDLB == edlbAUTO)
7451 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7453 print_dd_settings(fplog, dd, ir, TRUE, dlb_scale, ddbox);
7456 if (ir->ePBC == epbcNONE)
7458 vol_frac = 1 - 1/(double)dd->nnodes;
7463 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7467 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7469 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7471 dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
7474 static gmx_bool test_dd_cutoff(t_commrec *cr,
7475 t_state *state, t_inputrec *ir,
7486 set_ddbox(dd, FALSE, cr, ir, state->box,
7487 TRUE, &dd->comm->cgs_gl, state->x, &ddbox);
7491 for (d = 0; d < dd->ndim; d++)
7495 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7496 if (dynamic_dd_box(&ddbox, ir))
7498 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7501 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7503 if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
7504 dd->comm->cd[d].np_dlb > 0)
7506 if (np > dd->comm->cd[d].np_dlb)
7511 /* If a current local cell size is smaller than the requested
7512 * cut-off, we could still fix it, but this gets very complicated.
7513 * Without fixing here, we might actually need more checks.
7515 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7522 if (dd->comm->eDLB != edlbNO)
7524 /* If DLB is not active yet, we don't need to check the grid jumps.
7525 * Actually we shouldn't, because then the grid jump data is not set.
7527 if (dd->comm->bDynLoadBal &&
7528 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7533 gmx_sumi(1, &LocallyLimited, cr);
7535 if (LocallyLimited > 0)
7544 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, t_inputrec *ir,
7547 gmx_bool bCutoffAllowed;
7549 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7553 cr->dd->comm->cutoff = cutoff_req;
7556 return bCutoffAllowed;
7559 void change_dd_dlb_cutoff_limit(t_commrec *cr)
7561 gmx_domdec_comm_t *comm;
7563 comm = cr->dd->comm;
7565 /* Turn on the DLB limiting (might have been on already) */
7566 comm->bPMELoadBalDLBLimits = TRUE;
7568 /* Change the cut-off limit */
7569 comm->PMELoadBal_max_cutoff = comm->cutoff;
7572 static void merge_cg_buffers(int ncell,
7573 gmx_domdec_comm_dim_t *cd, int pulse,
7575 int *index_gl, int *recv_i,
7576 rvec *cg_cm, rvec *recv_vr,
7578 cginfo_mb_t *cginfo_mb, int *cginfo)
7580 gmx_domdec_ind_t *ind, *ind_p;
7581 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7582 int shift, shift_at;
7584 ind = &cd->ind[pulse];
7586 /* First correct the already stored data */
7587 shift = ind->nrecv[ncell];
7588 for (cell = ncell-1; cell >= 0; cell--)
7590 shift -= ind->nrecv[cell];
7593 /* Move the cg's present from previous grid pulses */
7594 cg0 = ncg_cell[ncell+cell];
7595 cg1 = ncg_cell[ncell+cell+1];
7596 cgindex[cg1+shift] = cgindex[cg1];
7597 for (cg = cg1-1; cg >= cg0; cg--)
7599 index_gl[cg+shift] = index_gl[cg];
7600 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7601 cgindex[cg+shift] = cgindex[cg];
7602 cginfo[cg+shift] = cginfo[cg];
7604 /* Correct the already stored send indices for the shift */
7605 for (p = 1; p <= pulse; p++)
7607 ind_p = &cd->ind[p];
7609 for (c = 0; c < cell; c++)
7611 cg0 += ind_p->nsend[c];
7613 cg1 = cg0 + ind_p->nsend[cell];
7614 for (cg = cg0; cg < cg1; cg++)
7616 ind_p->index[cg] += shift;
7622 /* Merge in the communicated buffers */
7626 for (cell = 0; cell < ncell; cell++)
7628 cg1 = ncg_cell[ncell+cell+1] + shift;
7631 /* Correct the old cg indices */
7632 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7634 cgindex[cg+1] += shift_at;
7637 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7639 /* Copy this charge group from the buffer */
7640 index_gl[cg1] = recv_i[cg0];
7641 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7642 /* Add it to the cgindex */
7643 cg_gl = index_gl[cg1];
7644 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7645 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7646 cgindex[cg1+1] = cgindex[cg1] + nat;
7651 shift += ind->nrecv[cell];
7652 ncg_cell[ncell+cell+1] = cg1;
7656 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7657 int nzone, int cg0, const int *cgindex)
7661 /* Store the atom block boundaries for easy copying of communication buffers
7664 for (zone = 0; zone < nzone; zone++)
7666 for (p = 0; p < cd->np; p++)
7668 cd->ind[p].cell2at0[zone] = cgindex[cg];
7669 cg += cd->ind[p].nrecv[zone];
7670 cd->ind[p].cell2at1[zone] = cgindex[cg];
7675 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7681 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7683 if (!bLocalCG[link->a[i]])
7692 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7694 real c[DIM][4]; /* the corners for the non-bonded communication */
7695 real cr0; /* corner for rounding */
7696 real cr1[4]; /* corners for rounding */
7697 real bc[DIM]; /* corners for bounded communication */
7698 real bcr1; /* corner for rounding for bonded communication */
7701 /* Determine the corners of the domain(s) we are communicating with */
7703 set_dd_corners(const gmx_domdec_t *dd,
7704 int dim0, int dim1, int dim2,
7708 const gmx_domdec_comm_t *comm;
7709 const gmx_domdec_zones_t *zones;
7714 zones = &comm->zones;
7716 /* Keep the compiler happy */
7720 /* The first dimension is equal for all cells */
7721 c->c[0][0] = comm->cell_x0[dim0];
7724 c->bc[0] = c->c[0][0];
7729 /* This cell row is only seen from the first row */
7730 c->c[1][0] = comm->cell_x0[dim1];
7731 /* All rows can see this row */
7732 c->c[1][1] = comm->cell_x0[dim1];
7735 c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7738 /* For the multi-body distance we need the maximum */
7739 c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7742 /* Set the upper-right corner for rounding */
7743 c->cr0 = comm->cell_x1[dim0];
7748 for (j = 0; j < 4; j++)
7750 c->c[2][j] = comm->cell_x0[dim2];
7754 /* Use the maximum of the i-cells that see a j-cell */
7755 for (i = 0; i < zones->nizone; i++)
7757 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7763 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7769 /* For the multi-body distance we need the maximum */
7770 c->bc[2] = comm->cell_x0[dim2];
7771 for (i = 0; i < 2; i++)
7773 for (j = 0; j < 2; j++)
7775 c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0);
7781 /* Set the upper-right corner for rounding */
7782 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7783 * Only cell (0,0,0) can see cell 7 (1,1,1)
7785 c->cr1[0] = comm->cell_x1[dim1];
7786 c->cr1[3] = comm->cell_x1[dim1];
7789 c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7792 /* For the multi-body distance we need the maximum */
7793 c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7800 /* Determine which cg's we need to send in this pulse from this zone */
7802 get_zone_pulse_cgs(gmx_domdec_t *dd,
7803 int zonei, int zone,
7805 const int *index_gl,
7807 int dim, int dim_ind,
7808 int dim0, int dim1, int dim2,
7809 real r_comm2, real r_bcomm2,
7813 real skew_fac2_d, real skew_fac_01,
7814 rvec *v_d, rvec *v_0, rvec *v_1,
7815 const dd_corners_t *c,
7817 gmx_bool bDistBonded,
7823 gmx_domdec_ind_t *ind,
7824 int **ibuf, int *ibuf_nalloc,
7830 gmx_domdec_comm_t *comm;
7832 gmx_bool bDistMB_pulse;
7834 real r2, rb2, r, tric_sh;
7837 int nsend_z, nsend, nat;
7841 bScrew = (dd->bScrewPBC && dim == XX);
7843 bDistMB_pulse = (bDistMB && bDistBonded);
7849 for (cg = cg0; cg < cg1; cg++)
7853 if (tric_dist[dim_ind] == 0)
7855 /* Rectangular direction, easy */
7856 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7863 r = cg_cm[cg][dim] - c->bc[dim_ind];
7869 /* Rounding gives at most a 16% reduction
7870 * in communicated atoms
7872 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7874 r = cg_cm[cg][dim0] - c->cr0;
7875 /* This is the first dimension, so always r >= 0 */
7882 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7884 r = cg_cm[cg][dim1] - c->cr1[zone];
7891 r = cg_cm[cg][dim1] - c->bcr1;
7901 /* Triclinic direction, more complicated */
7904 /* Rounding, conservative as the skew_fac multiplication
7905 * will slightly underestimate the distance.
7907 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7909 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7910 for (i = dim0+1; i < DIM; i++)
7912 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7914 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7917 rb[dim0] = rn[dim0];
7920 /* Take care that the cell planes along dim0 might not
7921 * be orthogonal to those along dim1 and dim2.
7923 for (i = 1; i <= dim_ind; i++)
7926 if (normal[dim0][dimd] > 0)
7928 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7931 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7936 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7938 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7940 for (i = dim1+1; i < DIM; i++)
7942 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7944 rn[dim1] += tric_sh;
7947 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7948 /* Take care of coupling of the distances
7949 * to the planes along dim0 and dim1 through dim2.
7951 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7952 /* Take care that the cell planes along dim1
7953 * might not be orthogonal to that along dim2.
7955 if (normal[dim1][dim2] > 0)
7957 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7963 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7966 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7967 /* Take care of coupling of the distances
7968 * to the planes along dim0 and dim1 through dim2.
7970 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7971 /* Take care that the cell planes along dim1
7972 * might not be orthogonal to that along dim2.
7974 if (normal[dim1][dim2] > 0)
7976 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7981 /* The distance along the communication direction */
7982 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
7984 for (i = dim+1; i < DIM; i++)
7986 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7991 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7992 /* Take care of coupling of the distances
7993 * to the planes along dim0 and dim1 through dim2.
7995 if (dim_ind == 1 && zonei == 1)
7997 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
8003 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
8006 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
8007 /* Take care of coupling of the distances
8008 * to the planes along dim0 and dim1 through dim2.
8010 if (dim_ind == 1 && zonei == 1)
8012 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
8020 ((bDistMB && rb2 < r_bcomm2) ||
8021 (bDist2B && r2 < r_bcomm2)) &&
8023 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
8024 missing_link(comm->cglink, index_gl[cg],
8027 /* Make an index to the local charge groups */
8028 if (nsend+1 > ind->nalloc)
8030 ind->nalloc = over_alloc_large(nsend+1);
8031 srenew(ind->index, ind->nalloc);
8033 if (nsend+1 > *ibuf_nalloc)
8035 *ibuf_nalloc = over_alloc_large(nsend+1);
8036 srenew(*ibuf, *ibuf_nalloc);
8038 ind->index[nsend] = cg;
8039 (*ibuf)[nsend] = index_gl[cg];
8041 vec_rvec_check_alloc(vbuf, nsend+1);
8043 if (dd->ci[dim] == 0)
8045 /* Correct cg_cm for pbc */
8046 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
8049 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
8050 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
8055 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
8058 nat += cgindex[cg+1] - cgindex[cg];
8064 *nsend_z_ptr = nsend_z;
8067 static void setup_dd_communication(gmx_domdec_t *dd,
8068 matrix box, gmx_ddbox_t *ddbox,
8069 t_forcerec *fr, t_state *state, rvec **f)
8071 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
8072 int nzone, nzone_send, zone, zonei, cg0, cg1;
8073 int c, i, j, cg, cg_gl, nrcg;
8074 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
8075 gmx_domdec_comm_t *comm;
8076 gmx_domdec_zones_t *zones;
8077 gmx_domdec_comm_dim_t *cd;
8078 gmx_domdec_ind_t *ind;
8079 cginfo_mb_t *cginfo_mb;
8080 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
8081 real r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
8082 dd_corners_t corners;
8084 rvec *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
8085 real skew_fac2_d, skew_fac_01;
8092 fprintf(debug, "Setting up DD communication\n");
8097 switch (fr->cutoff_scheme)
8106 gmx_incons("unimplemented");
8110 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8112 dim = dd->dim[dim_ind];
8114 /* Check if we need to use triclinic distances */
8115 tric_dist[dim_ind] = 0;
8116 for (i = 0; i <= dim_ind; i++)
8118 if (ddbox->tric_dir[dd->dim[i]])
8120 tric_dist[dim_ind] = 1;
8125 bBondComm = comm->bBondComm;
8127 /* Do we need to determine extra distances for multi-body bondeds? */
8128 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8130 /* Do we need to determine extra distances for only two-body bondeds? */
8131 bDist2B = (bBondComm && !bDistMB);
8133 r_comm2 = sqr(comm->cutoff);
8134 r_bcomm2 = sqr(comm->cutoff_mbody);
8138 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, sqrt(r_bcomm2));
8141 zones = &comm->zones;
8144 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8145 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8147 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8149 /* Triclinic stuff */
8150 normal = ddbox->normal;
8154 v_0 = ddbox->v[dim0];
8155 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8157 /* Determine the coupling coefficient for the distances
8158 * to the cell planes along dim0 and dim1 through dim2.
8159 * This is required for correct rounding.
8162 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8165 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8171 v_1 = ddbox->v[dim1];
8174 zone_cg_range = zones->cg_range;
8175 index_gl = dd->index_gl;
8176 cgindex = dd->cgindex;
8177 cginfo_mb = fr->cginfo_mb;
8179 zone_cg_range[0] = 0;
8180 zone_cg_range[1] = dd->ncg_home;
8181 comm->zone_ncg1[0] = dd->ncg_home;
8182 pos_cg = dd->ncg_home;
8184 nat_tot = dd->nat_home;
8186 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8188 dim = dd->dim[dim_ind];
8189 cd = &comm->cd[dim_ind];
8191 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8193 /* No pbc in this dimension, the first node should not comm. */
8201 v_d = ddbox->v[dim];
8202 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
8204 cd->bInPlace = TRUE;
8205 for (p = 0; p < cd->np; p++)
8207 /* Only atoms communicated in the first pulse are used
8208 * for multi-body bonded interactions or for bBondComm.
8210 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8215 for (zone = 0; zone < nzone_send; zone++)
8217 if (tric_dist[dim_ind] && dim_ind > 0)
8219 /* Determine slightly more optimized skew_fac's
8221 * This reduces the number of communicated atoms
8222 * by about 10% for 3D DD of rhombic dodecahedra.
8224 for (dimd = 0; dimd < dim; dimd++)
8226 sf2_round[dimd] = 1;
8227 if (ddbox->tric_dir[dimd])
8229 for (i = dd->dim[dimd]+1; i < DIM; i++)
8231 /* If we are shifted in dimension i
8232 * and the cell plane is tilted forward
8233 * in dimension i, skip this coupling.
8235 if (!(zones->shift[nzone+zone][i] &&
8236 ddbox->v[dimd][i][dimd] >= 0))
8239 sqr(ddbox->v[dimd][i][dimd]);
8242 sf2_round[dimd] = 1/sf2_round[dimd];
8247 zonei = zone_perm[dim_ind][zone];
8250 /* Here we permutate the zones to obtain a convenient order
8251 * for neighbor searching
8253 cg0 = zone_cg_range[zonei];
8254 cg1 = zone_cg_range[zonei+1];
8258 /* Look only at the cg's received in the previous grid pulse
8260 cg1 = zone_cg_range[nzone+zone+1];
8261 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8264 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8265 for (th = 0; th < comm->nth; th++)
8267 gmx_domdec_ind_t *ind_p;
8268 int **ibuf_p, *ibuf_nalloc_p;
8270 int *nsend_p, *nat_p;
8276 /* Thread 0 writes in the comm buffers */
8278 ibuf_p = &comm->buf_int;
8279 ibuf_nalloc_p = &comm->nalloc_int;
8280 vbuf_p = &comm->vbuf;
8283 nsend_zone_p = &ind->nsend[zone];
8287 /* Other threads write into temp buffers */
8288 ind_p = &comm->dth[th].ind;
8289 ibuf_p = &comm->dth[th].ibuf;
8290 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8291 vbuf_p = &comm->dth[th].vbuf;
8292 nsend_p = &comm->dth[th].nsend;
8293 nat_p = &comm->dth[th].nat;
8294 nsend_zone_p = &comm->dth[th].nsend_zone;
8296 comm->dth[th].nsend = 0;
8297 comm->dth[th].nat = 0;
8298 comm->dth[th].nsend_zone = 0;
8308 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8309 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8312 /* Get the cg's for this pulse in this zone */
8313 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8315 dim, dim_ind, dim0, dim1, dim2,
8318 normal, skew_fac2_d, skew_fac_01,
8319 v_d, v_0, v_1, &corners, sf2_round,
8320 bDistBonded, bBondComm,
8324 ibuf_p, ibuf_nalloc_p,
8330 /* Append data of threads>=1 to the communication buffers */
8331 for (th = 1; th < comm->nth; th++)
8333 dd_comm_setup_work_t *dth;
8336 dth = &comm->dth[th];
8338 ns1 = nsend + dth->nsend_zone;
8339 if (ns1 > ind->nalloc)
8341 ind->nalloc = over_alloc_dd(ns1);
8342 srenew(ind->index, ind->nalloc);
8344 if (ns1 > comm->nalloc_int)
8346 comm->nalloc_int = over_alloc_dd(ns1);
8347 srenew(comm->buf_int, comm->nalloc_int);
8349 if (ns1 > comm->vbuf.nalloc)
8351 comm->vbuf.nalloc = over_alloc_dd(ns1);
8352 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8355 for (i = 0; i < dth->nsend_zone; i++)
8357 ind->index[nsend] = dth->ind.index[i];
8358 comm->buf_int[nsend] = dth->ibuf[i];
8359 copy_rvec(dth->vbuf.v[i],
8360 comm->vbuf.v[nsend]);
8364 ind->nsend[zone] += dth->nsend_zone;
8367 /* Clear the counts in case we do not have pbc */
8368 for (zone = nzone_send; zone < nzone; zone++)
8370 ind->nsend[zone] = 0;
8372 ind->nsend[nzone] = nsend;
8373 ind->nsend[nzone+1] = nat;
8374 /* Communicate the number of cg's and atoms to receive */
8375 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8376 ind->nsend, nzone+2,
8377 ind->nrecv, nzone+2);
8379 /* The rvec buffer is also required for atom buffers of size nsend
8380 * in dd_move_x and dd_move_f.
8382 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8386 /* We can receive in place if only the last zone is not empty */
8387 for (zone = 0; zone < nzone-1; zone++)
8389 if (ind->nrecv[zone] > 0)
8391 cd->bInPlace = FALSE;
8396 /* The int buffer is only required here for the cg indices */
8397 if (ind->nrecv[nzone] > comm->nalloc_int2)
8399 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8400 srenew(comm->buf_int2, comm->nalloc_int2);
8402 /* The rvec buffer is also required for atom buffers
8403 * of size nrecv in dd_move_x and dd_move_f.
8405 i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8406 vec_rvec_check_alloc(&comm->vbuf2, i);
8410 /* Make space for the global cg indices */
8411 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8412 || dd->cg_nalloc == 0)
8414 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8415 srenew(index_gl, dd->cg_nalloc);
8416 srenew(cgindex, dd->cg_nalloc+1);
8418 /* Communicate the global cg indices */
8421 recv_i = index_gl + pos_cg;
8425 recv_i = comm->buf_int2;
8427 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8428 comm->buf_int, nsend,
8429 recv_i, ind->nrecv[nzone]);
8431 /* Make space for cg_cm */
8432 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8433 if (fr->cutoff_scheme == ecutsGROUP)
8441 /* Communicate cg_cm */
8444 recv_vr = cg_cm + pos_cg;
8448 recv_vr = comm->vbuf2.v;
8450 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8451 comm->vbuf.v, nsend,
8452 recv_vr, ind->nrecv[nzone]);
8454 /* Make the charge group index */
8457 zone = (p == 0 ? 0 : nzone - 1);
8458 while (zone < nzone)
8460 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8462 cg_gl = index_gl[pos_cg];
8463 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8464 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8465 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8468 /* Update the charge group presence,
8469 * so we can use it in the next pass of the loop.
8471 comm->bLocalCG[cg_gl] = TRUE;
8477 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8480 zone_cg_range[nzone+zone] = pos_cg;
8485 /* This part of the code is never executed with bBondComm. */
8486 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8487 index_gl, recv_i, cg_cm, recv_vr,
8488 cgindex, fr->cginfo_mb, fr->cginfo);
8489 pos_cg += ind->nrecv[nzone];
8491 nat_tot += ind->nrecv[nzone+1];
8495 /* Store the atom block for easy copying of communication buffers */
8496 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8500 dd->index_gl = index_gl;
8501 dd->cgindex = cgindex;
8503 dd->ncg_tot = zone_cg_range[zones->n];
8504 dd->nat_tot = nat_tot;
8505 comm->nat[ddnatHOME] = dd->nat_home;
8506 for (i = ddnatZONE; i < ddnatNR; i++)
8508 comm->nat[i] = dd->nat_tot;
8513 /* We don't need to update cginfo, since that was alrady done above.
8514 * So we pass NULL for the forcerec.
8516 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8517 NULL, comm->bLocalCG);
8522 fprintf(debug, "Finished setting up DD communication, zones:");
8523 for (c = 0; c < zones->n; c++)
8525 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8527 fprintf(debug, "\n");
8531 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8535 for (c = 0; c < zones->nizone; c++)
8537 zones->izone[c].cg1 = zones->cg_range[c+1];
8538 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8539 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8543 static void set_zones_size(gmx_domdec_t *dd,
8544 matrix box, const gmx_ddbox_t *ddbox,
8545 int zone_start, int zone_end)
8547 gmx_domdec_comm_t *comm;
8548 gmx_domdec_zones_t *zones;
8550 int z, zi, zj0, zj1, d, dim;
8553 real size_j, add_tric;
8558 zones = &comm->zones;
8560 /* Do we need to determine extra distances for multi-body bondeds? */
8561 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8563 for (z = zone_start; z < zone_end; z++)
8565 /* Copy cell limits to zone limits.
8566 * Valid for non-DD dims and non-shifted dims.
8568 copy_rvec(comm->cell_x0, zones->size[z].x0);
8569 copy_rvec(comm->cell_x1, zones->size[z].x1);
8572 for (d = 0; d < dd->ndim; d++)
8576 for (z = 0; z < zones->n; z++)
8578 /* With a staggered grid we have different sizes
8579 * for non-shifted dimensions.
8581 if (dd->bGridJump && zones->shift[z][dim] == 0)
8585 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8586 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8590 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8591 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8597 rcmbs = comm->cutoff_mbody;
8598 if (ddbox->tric_dir[dim])
8600 rcs /= ddbox->skew_fac[dim];
8601 rcmbs /= ddbox->skew_fac[dim];
8604 /* Set the lower limit for the shifted zone dimensions */
8605 for (z = zone_start; z < zone_end; z++)
8607 if (zones->shift[z][dim] > 0)
8610 if (!dd->bGridJump || d == 0)
8612 zones->size[z].x0[dim] = comm->cell_x1[dim];
8613 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8617 /* Here we take the lower limit of the zone from
8618 * the lowest domain of the zone below.
8622 zones->size[z].x0[dim] =
8623 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8629 zones->size[z].x0[dim] =
8630 zones->size[zone_perm[2][z-4]].x0[dim];
8634 zones->size[z].x0[dim] =
8635 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8638 /* A temporary limit, is updated below */
8639 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8643 for (zi = 0; zi < zones->nizone; zi++)
8645 if (zones->shift[zi][dim] == 0)
8647 /* This takes the whole zone into account.
8648 * With multiple pulses this will lead
8649 * to a larger zone then strictly necessary.
8651 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8652 zones->size[zi].x1[dim]+rcmbs);
8660 /* Loop over the i-zones to set the upper limit of each
8663 for (zi = 0; zi < zones->nizone; zi++)
8665 if (zones->shift[zi][dim] == 0)
8667 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8669 if (zones->shift[z][dim] > 0)
8671 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8672 zones->size[zi].x1[dim]+rcs);
8679 for (z = zone_start; z < zone_end; z++)
8681 /* Initialization only required to keep the compiler happy */
8682 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8685 /* To determine the bounding box for a zone we need to find
8686 * the extreme corners of 4, 2 or 1 corners.
8688 nc = 1 << (ddbox->npbcdim - 1);
8690 for (c = 0; c < nc; c++)
8692 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8696 corner[YY] = zones->size[z].x0[YY];
8700 corner[YY] = zones->size[z].x1[YY];
8704 corner[ZZ] = zones->size[z].x0[ZZ];
8708 corner[ZZ] = zones->size[z].x1[ZZ];
8710 if (dd->ndim == 1 && box[ZZ][YY] != 0)
8712 /* With 1D domain decomposition the cg's are not in
8713 * the triclinic box, but triclinic x-y and rectangular y-z.
8714 * Shift y back, so it will later end up at 0.
8716 corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
8718 /* Apply the triclinic couplings */
8719 assert(ddbox->npbcdim <= DIM);
8720 for (i = YY; i < ddbox->npbcdim; i++)
8722 for (j = XX; j < i; j++)
8724 corner[j] += corner[i]*box[i][j]/box[i][i];
8729 copy_rvec(corner, corner_min);
8730 copy_rvec(corner, corner_max);
8734 for (i = 0; i < DIM; i++)
8736 corner_min[i] = min(corner_min[i], corner[i]);
8737 corner_max[i] = max(corner_max[i], corner[i]);
8741 /* Copy the extreme cornes without offset along x */
8742 for (i = 0; i < DIM; i++)
8744 zones->size[z].bb_x0[i] = corner_min[i];
8745 zones->size[z].bb_x1[i] = corner_max[i];
8747 /* Add the offset along x */
8748 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8749 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8752 if (zone_start == 0)
8755 for (dim = 0; dim < DIM; dim++)
8757 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8759 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
8764 for (z = zone_start; z < zone_end; z++)
8766 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8768 zones->size[z].x0[XX], zones->size[z].x1[XX],
8769 zones->size[z].x0[YY], zones->size[z].x1[YY],
8770 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8771 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8773 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8774 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8775 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8780 static int comp_cgsort(const void *a, const void *b)
8784 gmx_cgsort_t *cga, *cgb;
8785 cga = (gmx_cgsort_t *)a;
8786 cgb = (gmx_cgsort_t *)b;
8788 comp = cga->nsc - cgb->nsc;
8791 comp = cga->ind_gl - cgb->ind_gl;
8797 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8802 /* Order the data */
8803 for (i = 0; i < n; i++)
8805 buf[i] = a[sort[i].ind];
8808 /* Copy back to the original array */
8809 for (i = 0; i < n; i++)
8815 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8820 /* Order the data */
8821 for (i = 0; i < n; i++)
8823 copy_rvec(v[sort[i].ind], buf[i]);
8826 /* Copy back to the original array */
8827 for (i = 0; i < n; i++)
8829 copy_rvec(buf[i], v[i]);
8833 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8836 int a, atot, cg, cg0, cg1, i;
8838 if (cgindex == NULL)
8840 /* Avoid the useless loop of the atoms within a cg */
8841 order_vec_cg(ncg, sort, v, buf);
8846 /* Order the data */
8848 for (cg = 0; cg < ncg; cg++)
8850 cg0 = cgindex[sort[cg].ind];
8851 cg1 = cgindex[sort[cg].ind+1];
8852 for (i = cg0; i < cg1; i++)
8854 copy_rvec(v[i], buf[a]);
8860 /* Copy back to the original array */
8861 for (a = 0; a < atot; a++)
8863 copy_rvec(buf[a], v[a]);
8867 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8868 int nsort_new, gmx_cgsort_t *sort_new,
8869 gmx_cgsort_t *sort1)
8873 /* The new indices are not very ordered, so we qsort them */
8874 gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8876 /* sort2 is already ordered, so now we can merge the two arrays */
8880 while (i2 < nsort2 || i_new < nsort_new)
8884 sort1[i1++] = sort_new[i_new++];
8886 else if (i_new == nsort_new)
8888 sort1[i1++] = sort2[i2++];
8890 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8891 (sort2[i2].nsc == sort_new[i_new].nsc &&
8892 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8894 sort1[i1++] = sort2[i2++];
8898 sort1[i1++] = sort_new[i_new++];
8903 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8905 gmx_domdec_sort_t *sort;
8906 gmx_cgsort_t *cgsort, *sort_i;
8907 int ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
8908 int sort_last, sort_skip;
8910 sort = dd->comm->sort;
8912 a = fr->ns.grid->cell_index;
8914 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns.grid->ncells;
8916 if (ncg_home_old >= 0)
8918 /* The charge groups that remained in the same ns grid cell
8919 * are completely ordered. So we can sort efficiently by sorting
8920 * the charge groups that did move into the stationary list.
8925 for (i = 0; i < dd->ncg_home; i++)
8927 /* Check if this cg did not move to another node */
8930 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8932 /* This cg is new on this node or moved ns grid cell */
8933 if (nsort_new >= sort->sort_new_nalloc)
8935 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8936 srenew(sort->sort_new, sort->sort_new_nalloc);
8938 sort_i = &(sort->sort_new[nsort_new++]);
8942 /* This cg did not move */
8943 sort_i = &(sort->sort2[nsort2++]);
8945 /* Sort on the ns grid cell indices
8946 * and the global topology index.
8947 * index_gl is irrelevant with cell ns,
8948 * but we set it here anyhow to avoid a conditional.
8951 sort_i->ind_gl = dd->index_gl[i];
8958 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8961 /* Sort efficiently */
8962 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8967 cgsort = sort->sort;
8969 for (i = 0; i < dd->ncg_home; i++)
8971 /* Sort on the ns grid cell indices
8972 * and the global topology index
8974 cgsort[i].nsc = a[i];
8975 cgsort[i].ind_gl = dd->index_gl[i];
8977 if (cgsort[i].nsc < moved)
8984 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
8986 /* Determine the order of the charge groups using qsort */
8987 gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
8993 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
8996 int ncg_new, i, *a, na;
8998 sort = dd->comm->sort->sort;
9000 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
9003 for (i = 0; i < na; i++)
9007 sort[ncg_new].ind = a[i];
9015 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
9018 gmx_domdec_sort_t *sort;
9019 gmx_cgsort_t *cgsort, *sort_i;
9021 int ncg_new, i, *ibuf, cgsize;
9024 sort = dd->comm->sort;
9026 if (dd->ncg_home > sort->sort_nalloc)
9028 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
9029 srenew(sort->sort, sort->sort_nalloc);
9030 srenew(sort->sort2, sort->sort_nalloc);
9032 cgsort = sort->sort;
9034 switch (fr->cutoff_scheme)
9037 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
9040 ncg_new = dd_sort_order_nbnxn(dd, fr);
9043 gmx_incons("unimplemented");
9047 /* We alloc with the old size, since cgindex is still old */
9048 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9049 vbuf = dd->comm->vbuf.v;
9053 cgindex = dd->cgindex;
9060 /* Remove the charge groups which are no longer at home here */
9061 dd->ncg_home = ncg_new;
9064 fprintf(debug, "Set the new home charge group count to %d\n",
9068 /* Reorder the state */
9069 for (i = 0; i < estNR; i++)
9071 if (EST_DISTR(i) && (state->flags & (1<<i)))
9076 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
9079 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
9082 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->sd_X, vbuf);
9085 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
9089 case estDISRE_INITF:
9090 case estDISRE_RM3TAV:
9091 case estORIRE_INITF:
9093 /* No ordering required */
9096 gmx_incons("Unknown state entry encountered in dd_sort_state");
9101 if (fr->cutoff_scheme == ecutsGROUP)
9104 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9107 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9109 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9110 srenew(sort->ibuf, sort->ibuf_nalloc);
9113 /* Reorder the global cg index */
9114 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9115 /* Reorder the cginfo */
9116 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9117 /* Rebuild the local cg index */
9121 for (i = 0; i < dd->ncg_home; i++)
9123 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9124 ibuf[i+1] = ibuf[i] + cgsize;
9126 for (i = 0; i < dd->ncg_home+1; i++)
9128 dd->cgindex[i] = ibuf[i];
9133 for (i = 0; i < dd->ncg_home+1; i++)
9138 /* Set the home atom number */
9139 dd->nat_home = dd->cgindex[dd->ncg_home];
9141 if (fr->cutoff_scheme == ecutsVERLET)
9143 /* The atoms are now exactly in grid order, update the grid order */
9144 nbnxn_set_atomorder(fr->nbv->nbs);
9148 /* Copy the sorted ns cell indices back to the ns grid struct */
9149 for (i = 0; i < dd->ncg_home; i++)
9151 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
9153 fr->ns.grid->nr = dd->ncg_home;
9157 static void add_dd_statistics(gmx_domdec_t *dd)
9159 gmx_domdec_comm_t *comm;
9164 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9166 comm->sum_nat[ddnat-ddnatZONE] +=
9167 comm->nat[ddnat] - comm->nat[ddnat-1];
9172 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9174 gmx_domdec_comm_t *comm;
9179 /* Reset all the statistics and counters for total run counting */
9180 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9182 comm->sum_nat[ddnat-ddnatZONE] = 0;
9186 comm->load_step = 0;
9189 clear_ivec(comm->load_lim);
9194 void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
9196 gmx_domdec_comm_t *comm;
9200 comm = cr->dd->comm;
9202 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9209 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9211 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9213 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9218 " av. #atoms communicated per step for force: %d x %.1f\n",
9222 if (cr->dd->vsite_comm)
9225 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9226 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9231 if (cr->dd->constraint_comm)
9234 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9235 1 + ir->nLincsIter, av);
9239 gmx_incons(" Unknown type for DD statistics");
9242 fprintf(fplog, "\n");
9244 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9246 print_dd_load_av(fplog, cr->dd);
9250 void dd_partition_system(FILE *fplog,
9253 gmx_bool bMasterState,
9255 t_state *state_global,
9256 gmx_mtop_t *top_global,
9258 t_state *state_local,
9261 gmx_localtop_t *top_local,
9264 gmx_shellfc_t shellfc,
9265 gmx_constr_t constr,
9267 gmx_wallcycle_t wcycle,
9271 gmx_domdec_comm_t *comm;
9272 gmx_ddbox_t ddbox = {0};
9274 gmx_int64_t step_pcoupl;
9275 rvec cell_ns_x0, cell_ns_x1;
9276 int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9277 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
9278 gmx_bool bRedist, bSortCG, bResortAll;
9279 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9286 bBoxChanged = (bMasterState || DEFORM(*ir));
9287 if (ir->epc != epcNO)
9289 /* With nstpcouple > 1 pressure coupling happens.
9290 * one step after calculating the pressure.
9291 * Box scaling happens at the end of the MD step,
9292 * after the DD partitioning.
9293 * We therefore have to do DLB in the first partitioning
9294 * after an MD step where P-coupling occured.
9295 * We need to determine the last step in which p-coupling occurred.
9296 * MRS -- need to validate this for vv?
9301 step_pcoupl = step - 1;
9305 step_pcoupl = ((step - 1)/n)*n + 1;
9307 if (step_pcoupl >= comm->partition_step)
9313 bNStGlobalComm = (step % nstglobalcomm == 0);
9315 if (!comm->bDynLoadBal)
9321 /* Should we do dynamic load balacing this step?
9322 * Since it requires (possibly expensive) global communication,
9323 * we might want to do DLB less frequently.
9325 if (bBoxChanged || ir->epc != epcNO)
9327 bDoDLB = bBoxChanged;
9331 bDoDLB = bNStGlobalComm;
9335 /* Check if we have recorded loads on the nodes */
9336 if (comm->bRecordLoad && dd_load_count(comm))
9338 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
9340 /* Check if we should use DLB at the second partitioning
9341 * and every 100 partitionings,
9342 * so the extra communication cost is negligible.
9344 n = max(100, nstglobalcomm);
9345 bCheckDLB = (comm->n_load_collect == 0 ||
9346 comm->n_load_have % n == n-1);
9353 /* Print load every nstlog, first and last step to the log file */
9354 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9355 comm->n_load_collect == 0 ||
9357 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9359 /* Avoid extra communication due to verbose screen output
9360 * when nstglobalcomm is set.
9362 if (bDoDLB || bLogLoad || bCheckDLB ||
9363 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9365 get_load_distribution(dd, wcycle);
9370 dd_print_load(fplog, dd, step-1);
9374 dd_print_load_verbose(dd);
9377 comm->n_load_collect++;
9381 /* Since the timings are node dependent, the master decides */
9385 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
9388 fprintf(debug, "step %s, imb loss %f\n",
9389 gmx_step_str(step, sbuf),
9390 dd_force_imb_perf_loss(dd));
9393 dd_bcast(dd, sizeof(bTurnOnDLB), &bTurnOnDLB);
9396 turn_on_dlb(fplog, cr, step);
9401 comm->n_load_have++;
9404 cgs_gl = &comm->cgs_gl;
9409 /* Clear the old state */
9410 clear_dd_indices(dd, 0, 0);
9413 set_ddbox(dd, bMasterState, cr, ir, state_global->box,
9414 TRUE, cgs_gl, state_global->x, &ddbox);
9416 get_cg_distribution(fplog, step, dd, cgs_gl,
9417 state_global->box, &ddbox, state_global->x);
9419 dd_distribute_state(dd, cgs_gl,
9420 state_global, state_local, f);
9422 dd_make_local_cgs(dd, &top_local->cgs);
9424 /* Ensure that we have space for the new distribution */
9425 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9427 if (fr->cutoff_scheme == ecutsGROUP)
9429 calc_cgcm(fplog, 0, dd->ncg_home,
9430 &top_local->cgs, state_local->x, fr->cg_cm);
9433 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9435 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9437 else if (state_local->ddp_count != dd->ddp_count)
9439 if (state_local->ddp_count > dd->ddp_count)
9441 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9444 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9446 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9449 /* Clear the old state */
9450 clear_dd_indices(dd, 0, 0);
9452 /* Build the new indices */
9453 rebuild_cgindex(dd, cgs_gl->index, state_local);
9454 make_dd_indices(dd, cgs_gl->index, 0);
9455 ncgindex_set = dd->ncg_home;
9457 if (fr->cutoff_scheme == ecutsGROUP)
9459 /* Redetermine the cg COMs */
9460 calc_cgcm(fplog, 0, dd->ncg_home,
9461 &top_local->cgs, state_local->x, fr->cg_cm);
9464 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9466 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9468 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9469 TRUE, &top_local->cgs, state_local->x, &ddbox);
9471 bRedist = comm->bDynLoadBal;
9475 /* We have the full state, only redistribute the cgs */
9477 /* Clear the non-home indices */
9478 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9481 /* Avoid global communication for dim's without pbc and -gcom */
9482 if (!bNStGlobalComm)
9484 copy_rvec(comm->box0, ddbox.box0 );
9485 copy_rvec(comm->box_size, ddbox.box_size);
9487 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9488 bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
9493 /* For dim's without pbc and -gcom */
9494 copy_rvec(ddbox.box0, comm->box0 );
9495 copy_rvec(ddbox.box_size, comm->box_size);
9497 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9500 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9502 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9505 /* Check if we should sort the charge groups */
9506 if (comm->nstSortCG > 0)
9508 bSortCG = (bMasterState ||
9509 (bRedist && (step % comm->nstSortCG == 0)));
9516 ncg_home_old = dd->ncg_home;
9521 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9523 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9525 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9527 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9530 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9532 &comm->cell_x0, &comm->cell_x1,
9533 dd->ncg_home, fr->cg_cm,
9534 cell_ns_x0, cell_ns_x1, &grid_density);
9538 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9541 switch (fr->cutoff_scheme)
9544 copy_ivec(fr->ns.grid->n, ncells_old);
9545 grid_first(fplog, fr->ns.grid, dd, &ddbox,
9546 state_local->box, cell_ns_x0, cell_ns_x1,
9547 fr->rlistlong, grid_density);
9550 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9553 gmx_incons("unimplemented");
9555 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9556 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9560 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9562 /* Sort the state on charge group position.
9563 * This enables exact restarts from this step.
9564 * It also improves performance by about 15% with larger numbers
9565 * of atoms per node.
9568 /* Fill the ns grid with the home cell,
9569 * so we can sort with the indices.
9571 set_zones_ncg_home(dd);
9573 switch (fr->cutoff_scheme)
9576 set_zones_size(dd, state_local->box, &ddbox, 0, 1);
9578 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9580 comm->zones.size[0].bb_x0,
9581 comm->zones.size[0].bb_x1,
9583 comm->zones.dens_zone0,
9586 ncg_moved, bRedist ? comm->moved : NULL,
9587 fr->nbv->grp[eintLocal].kernel_type,
9588 fr->nbv->grp[eintLocal].nbat);
9590 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9593 fill_grid(&comm->zones, fr->ns.grid, dd->ncg_home,
9594 0, dd->ncg_home, fr->cg_cm);
9596 copy_ivec(fr->ns.grid->n, ncells_new);
9599 gmx_incons("unimplemented");
9602 bResortAll = bMasterState;
9604 /* Check if we can user the old order and ns grid cell indices
9605 * of the charge groups to sort the charge groups efficiently.
9607 if (ncells_new[XX] != ncells_old[XX] ||
9608 ncells_new[YY] != ncells_old[YY] ||
9609 ncells_new[ZZ] != ncells_old[ZZ])
9616 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9617 gmx_step_str(step, sbuf), dd->ncg_home);
9619 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9620 bResortAll ? -1 : ncg_home_old);
9621 /* Rebuild all the indices */
9622 ga2la_clear(dd->ga2la);
9625 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9628 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9630 /* Setup up the communication and communicate the coordinates */
9631 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9633 /* Set the indices */
9634 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9636 /* Set the charge group boundaries for neighbor searching */
9637 set_cg_boundaries(&comm->zones);
9639 if (fr->cutoff_scheme == ecutsVERLET)
9641 set_zones_size(dd, state_local->box, &ddbox,
9642 bSortCG ? 1 : 0, comm->zones.n);
9645 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9648 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9649 -1,state_local->x,state_local->box);
9652 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9654 /* Extract a local topology from the global topology */
9655 for (i = 0; i < dd->ndim; i++)
9657 np[dd->dim[i]] = comm->cd[i].np;
9659 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9660 comm->cellsize_min, np,
9662 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
9663 vsite, top_global, top_local);
9665 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9667 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9669 /* Set up the special atom communication */
9670 n = comm->nat[ddnatZONE];
9671 for (i = ddnatZONE+1; i < ddnatNR; i++)
9676 if (vsite && vsite->n_intercg_vsite)
9678 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9682 if (dd->bInterCGcons || dd->bInterCGsettles)
9684 /* Only for inter-cg constraints we need special code */
9685 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9686 constr, ir->nProjOrder,
9687 top_local->idef.il);
9691 gmx_incons("Unknown special atom type setup");
9696 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9698 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9700 /* Make space for the extra coordinates for virtual site
9701 * or constraint communication.
9703 state_local->natoms = comm->nat[ddnatNR-1];
9704 if (state_local->natoms > state_local->nalloc)
9706 dd_realloc_state(state_local, f, state_local->natoms);
9709 if (fr->bF_NoVirSum)
9711 if (vsite && vsite->n_intercg_vsite)
9713 nat_f_novirsum = comm->nat[ddnatVSITE];
9717 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9719 nat_f_novirsum = dd->nat_tot;
9723 nat_f_novirsum = dd->nat_home;
9732 /* Set the number of atoms required for the force calculation.
9733 * Forces need to be constrained when using a twin-range setup
9734 * or with energy minimization. For simple simulations we could
9735 * avoid some allocation, zeroing and copying, but this is
9736 * probably not worth the complications ande checking.
9738 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9739 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9741 /* We make the all mdatoms up to nat_tot_con.
9742 * We could save some work by only setting invmass
9743 * between nat_tot and nat_tot_con.
9745 /* This call also sets the new number of home particles to dd->nat_home */
9746 atoms2md(top_global, ir,
9747 comm->nat[ddnatCON], dd->gatindex, dd->nat_home, mdatoms);
9749 /* Now we have the charges we can sort the FE interactions */
9750 dd_sort_local_top(dd, mdatoms, top_local);
9754 /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
9755 split_vsites_over_threads(top_local->idef.il, mdatoms, FALSE, vsite);
9760 /* Make the local shell stuff, currently no communication is done */
9761 make_local_shells(cr, mdatoms, shellfc);
9764 if (ir->implicit_solvent)
9766 make_local_gb(cr, fr->born, ir->gb_algorithm);
9769 setup_bonded_threading(fr, &top_local->idef);
9771 if (!(cr->duty & DUTY_PME))
9773 /* Send the charges and/or c6/sigmas to our PME only node */
9774 gmx_pme_send_parameters(cr, mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
9775 mdatoms->chargeA, mdatoms->chargeB,
9776 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
9777 mdatoms->sigmaA, mdatoms->sigmaB,
9778 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9783 set_constraints(constr, top_local, ir, mdatoms, cr);
9786 if (ir->ePull != epullNO)
9788 /* Update the local pull groups */
9789 dd_make_local_pull_groups(dd, ir->pull, mdatoms);
9794 /* Update the local rotation groups */
9795 dd_make_local_rotation_groups(dd, ir->rot);
9798 if (ir->eSwapCoords != eswapNO)
9800 /* Update the local groups needed for ion swapping */
9801 dd_make_local_swap_groups(dd, ir->swap);
9804 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
9805 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
9807 add_dd_statistics(dd);
9809 /* Make sure we only count the cycles for this DD partitioning */
9810 clear_dd_cycle_counts(dd);
9812 /* Because the order of the atoms might have changed since
9813 * the last vsite construction, we need to communicate the constructing
9814 * atom coordinates again (for spreading the forces this MD step).
9816 dd_move_x_vsites(dd, state_local->box, state_local->x);
9818 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9820 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9822 dd_move_x(dd, state_local->box, state_local->x);
9823 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9824 -1, state_local->x, state_local->box);
9827 /* Store the partitioning step */
9828 comm->partition_step = step;
9830 /* Increase the DD partitioning counter */
9832 /* The state currently matches this DD partitioning count, store it */
9833 state_local->ddp_count = dd->ddp_count;
9836 /* The DD master node knows the complete cg distribution,
9837 * store the count so we can possibly skip the cg info communication.
9839 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9842 if (comm->DD_debug > 0)
9844 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9845 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9846 "after partitioning");