2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
49 #include "gromacs/math/vec.h"
51 #include "domdec_network.h"
53 #include "chargegroup.h"
62 #include "gromacs/topology/mtop_util.h"
63 #include "gmx_ga2la.h"
65 #include "nbnxn_search.h"
67 #include "gmx_omp_nthreads.h"
68 #include "gpu_utils.h"
70 #include "gromacs/utility/futil.h"
71 #include "gromacs/fileio/gmxfio.h"
72 #include "gromacs/fileio/pdbio.h"
73 #include "gromacs/imd/imd.h"
74 #include "gromacs/pbcutil/ishift.h"
75 #include "gromacs/pbcutil/pbc.h"
76 #include "gromacs/pulling/pull.h"
77 #include "gromacs/pulling/pull_rotation.h"
78 #include "gromacs/swap/swapcoords.h"
79 #include "gromacs/timing/wallcycle.h"
80 #include "gromacs/utility/basenetwork.h"
81 #include "gromacs/utility/fatalerror.h"
82 #include "gromacs/utility/gmxmpi.h"
83 #include "gromacs/utility/qsort_threadsafe.h"
84 #include "gromacs/utility/smalloc.h"
86 #define DDRANK(dd, rank) (rank)
87 #define DDMASTERRANK(dd) (dd->masterrank)
89 typedef struct gmx_domdec_master
91 /* The cell boundaries */
93 /* The global charge group division */
94 int *ncg; /* Number of home charge groups for each node */
95 int *index; /* Index of nnodes+1 into cg */
96 int *cg; /* Global charge group index */
97 int *nat; /* Number of home atoms for each node. */
98 int *ibuf; /* Buffer for communication */
99 rvec *vbuf; /* Buffer for state scattering and gathering */
100 } gmx_domdec_master_t;
104 /* The numbers of charge groups to send and receive for each cell
105 * that requires communication, the last entry contains the total
106 * number of atoms that needs to be communicated.
108 int nsend[DD_MAXIZONE+2];
109 int nrecv[DD_MAXIZONE+2];
110 /* The charge groups to send */
113 /* The atom range for non-in-place communication */
114 int cell2at0[DD_MAXIZONE];
115 int cell2at1[DD_MAXIZONE];
120 int np; /* Number of grid pulses in this dimension */
121 int np_dlb; /* For dlb, for use with edlbAUTO */
122 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
124 gmx_bool bInPlace; /* Can we communicate in place? */
125 } gmx_domdec_comm_dim_t;
129 gmx_bool *bCellMin; /* Temp. var.: is this cell size at the limit */
130 real *cell_f; /* State var.: cell boundaries, box relative */
131 real *old_cell_f; /* Temp. var.: old cell size */
132 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
133 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
134 real *bound_min; /* Temp. var.: lower limit for cell boundary */
135 real *bound_max; /* Temp. var.: upper limit for cell boundary */
136 gmx_bool bLimited; /* State var.: is DLB limited in this dim and row */
137 real *buf_ncd; /* Temp. var. */
140 #define DD_NLOAD_MAX 9
142 /* Here floats are accurate enough, since these variables
143 * only influence the load balancing, not the actual MD results.
170 gmx_cgsort_t *sort_new;
182 /* This enum determines the order of the coordinates.
183 * ddnatHOME and ddnatZONE should be first and second,
184 * the others can be ordered as wanted.
187 ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR
191 edlbAUTO, edlbNO, edlbYES, edlbNR
193 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
197 int dim; /* The dimension */
198 gmx_bool dim_match; /* Tells if DD and PME dims match */
199 int nslab; /* The number of PME slabs in this dimension */
200 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
201 int *pp_min; /* The minimum pp node location, size nslab */
202 int *pp_max; /* The maximum pp node location,size nslab */
203 int maxshift; /* The maximum shift for coordinate redistribution in PME */
208 real min0; /* The minimum bottom of this zone */
209 real max1; /* The maximum top of this zone */
210 real min1; /* The minimum top of this zone */
211 real mch0; /* The maximum bottom communicaton height for this zone */
212 real mch1; /* The maximum top communicaton height for this zone */
213 real p1_0; /* The bottom value of the first cell in this zone */
214 real p1_1; /* The top value of the first cell in this zone */
219 gmx_domdec_ind_t ind;
226 } dd_comm_setup_work_t;
228 typedef struct gmx_domdec_comm
230 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
231 * unless stated otherwise.
234 /* The number of decomposition dimensions for PME, 0: no PME */
236 /* The number of nodes doing PME (PP/PME or only PME) */
240 /* The communication setup including the PME only nodes */
241 gmx_bool bCartesianPP_PME;
244 int *pmenodes; /* size npmenodes */
245 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
246 * but with bCartesianPP_PME */
247 gmx_ddpme_t ddpme[2];
249 /* The DD particle-particle nodes only */
250 gmx_bool bCartesianPP;
251 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
253 /* The global charge groups */
256 /* Should we sort the cgs */
258 gmx_domdec_sort_t *sort;
260 /* Are there charge groups? */
263 /* Are there bonded and multi-body interactions between charge groups? */
264 gmx_bool bInterCGBondeds;
265 gmx_bool bInterCGMultiBody;
267 /* Data for the optional bonded interaction atom communication range */
274 /* Are we actually using DLB? */
275 gmx_bool bDynLoadBal;
277 /* Cell sizes for static load balancing, first index cartesian */
280 /* The width of the communicated boundaries */
283 /* The minimum cell size (including triclinic correction) */
285 /* For dlb, for use with edlbAUTO */
286 rvec cellsize_min_dlb;
287 /* The lower limit for the DD cell size with DLB */
289 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
290 gmx_bool bVacDLBNoLimit;
292 /* With PME load balancing we set limits on DLB */
293 gmx_bool bPMELoadBalDLBLimits;
294 /* DLB needs to take into account that we want to allow this maximum
295 * cut-off (for PME load balancing), this could limit cell boundaries.
297 real PMELoadBal_max_cutoff;
299 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
301 /* box0 and box_size are required with dim's without pbc and -gcom */
305 /* The cell boundaries */
309 /* The old location of the cell boundaries, to check cg displacements */
313 /* The communication setup and charge group boundaries for the zones */
314 gmx_domdec_zones_t zones;
316 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
317 * cell boundaries of neighboring cells for dynamic load balancing.
319 gmx_ddzone_t zone_d1[2];
320 gmx_ddzone_t zone_d2[2][2];
322 /* The coordinate/force communication setup and indices */
323 gmx_domdec_comm_dim_t cd[DIM];
324 /* The maximum number of cells to communicate with in one dimension */
327 /* Which cg distribution is stored on the master node */
328 int master_cg_ddp_count;
330 /* The number of cg's received from the direct neighbors */
331 int zone_ncg1[DD_MAXZONE];
333 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
336 /* Array for signalling if atoms have moved to another domain */
340 /* Communication buffer for general use */
344 /* Communication buffer for general use */
347 /* Temporary storage for thread parallel communication setup */
349 dd_comm_setup_work_t *dth;
351 /* Communication buffers only used with multiple grid pulses */
356 /* Communication buffers for local redistribution */
358 int cggl_flag_nalloc[DIM*2];
360 int cgcm_state_nalloc[DIM*2];
362 /* Cell sizes for dynamic load balancing */
363 gmx_domdec_root_t **root;
367 real cell_f_max0[DIM];
368 real cell_f_min1[DIM];
370 /* Stuff for load communication */
371 gmx_bool bRecordLoad;
372 gmx_domdec_load_t *load;
373 int nrank_gpu_shared;
375 MPI_Comm *mpi_comm_load;
376 MPI_Comm mpi_comm_gpu_shared;
379 /* Maximum DLB scaling per load balancing step in percent */
383 float cycl[ddCyclNr];
384 int cycl_n[ddCyclNr];
385 float cycl_max[ddCyclNr];
386 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
390 /* Have often have did we have load measurements */
392 /* Have often have we collected the load measurements */
396 double sum_nat[ddnatNR-ddnatZONE];
406 /* The last partition step */
407 gmx_int64_t partition_step;
415 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
418 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
419 #define DD_FLAG_NRCG 65535
420 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
421 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
423 /* Zone permutation required to obtain consecutive charge groups
424 * for neighbor searching.
426 static const int zone_perm[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
428 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
429 * components see only j zones with that component 0.
432 /* The DD zone order */
433 static const ivec dd_zo[DD_MAXZONE] =
434 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
439 static const ivec dd_zp3[dd_zp3n] = {{0, 0, 8}, {1, 3, 6}, {2, 5, 6}, {3, 5, 7}};
444 static const ivec dd_zp2[dd_zp2n] = {{0, 0, 4}, {1, 3, 4}};
449 static const ivec dd_zp1[dd_zp1n] = {{0, 0, 2}};
451 /* Factors used to avoid problems due to rounding issues */
452 #define DD_CELL_MARGIN 1.0001
453 #define DD_CELL_MARGIN2 1.00005
454 /* Factor to account for pressure scaling during nstlist steps */
455 #define DD_PRES_SCALE_MARGIN 1.02
457 /* Turn on DLB when the load imbalance causes this amount of total loss.
458 * There is a bit of overhead with DLB and it's difficult to achieve
459 * a load imbalance of less than 2% with DLB.
461 #define DD_PERF_LOSS_DLB_ON 0.02
463 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
464 #define DD_PERF_LOSS_WARN 0.05
466 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
468 /* Use separate MPI send and receive commands
469 * when nnodes <= GMX_DD_NNODES_SENDRECV.
470 * This saves memory (and some copying for small nnodes).
471 * For high parallelization scatter and gather calls are used.
473 #define GMX_DD_NNODES_SENDRECV 4
477 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
479 static void index2xyz(ivec nc,int ind,ivec xyz)
481 xyz[XX] = ind % nc[XX];
482 xyz[YY] = (ind / nc[XX]) % nc[YY];
483 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
487 /* This order is required to minimize the coordinate communication in PME
488 * which uses decomposition in the x direction.
490 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
492 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
494 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
495 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
496 xyz[ZZ] = ind % nc[ZZ];
499 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
504 ddindex = dd_index(dd->nc, c);
505 if (dd->comm->bCartesianPP_PME)
507 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
509 else if (dd->comm->bCartesianPP)
512 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
523 static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox, t_inputrec *ir)
525 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
528 int ddglatnr(gmx_domdec_t *dd, int i)
538 if (i >= dd->comm->nat[ddnatNR-1])
540 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
542 atnr = dd->gatindex[i] + 1;
548 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
550 return &dd->comm->cgs_gl;
553 static void vec_rvec_init(vec_rvec_t *v)
559 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
563 v->nalloc = over_alloc_dd(n);
564 srenew(v->v, v->nalloc);
568 void dd_store_state(gmx_domdec_t *dd, t_state *state)
572 if (state->ddp_count != dd->ddp_count)
574 gmx_incons("The state does not the domain decomposition state");
577 state->ncg_gl = dd->ncg_home;
578 if (state->ncg_gl > state->cg_gl_nalloc)
580 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
581 srenew(state->cg_gl, state->cg_gl_nalloc);
583 for (i = 0; i < state->ncg_gl; i++)
585 state->cg_gl[i] = dd->index_gl[i];
588 state->ddp_count_cg_gl = dd->ddp_count;
591 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
593 return &dd->comm->zones;
596 void dd_get_ns_ranges(gmx_domdec_t *dd, int icg,
597 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
599 gmx_domdec_zones_t *zones;
602 zones = &dd->comm->zones;
605 while (icg >= zones->izone[izone].cg1)
614 else if (izone < zones->nizone)
616 *jcg0 = zones->izone[izone].jcg0;
620 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
621 icg, izone, zones->nizone);
624 *jcg1 = zones->izone[izone].jcg1;
626 for (d = 0; d < dd->ndim; d++)
629 shift0[dim] = zones->izone[izone].shift0[dim];
630 shift1[dim] = zones->izone[izone].shift1[dim];
631 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
633 /* A conservative approach, this can be optimized */
640 int dd_natoms_vsite(gmx_domdec_t *dd)
642 return dd->comm->nat[ddnatVSITE];
645 void dd_get_constraint_range(gmx_domdec_t *dd, int *at_start, int *at_end)
647 *at_start = dd->comm->nat[ddnatCON-1];
648 *at_end = dd->comm->nat[ddnatCON];
651 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
653 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
654 int *index, *cgindex;
655 gmx_domdec_comm_t *comm;
656 gmx_domdec_comm_dim_t *cd;
657 gmx_domdec_ind_t *ind;
658 rvec shift = {0, 0, 0}, *buf, *rbuf;
659 gmx_bool bPBC, bScrew;
663 cgindex = dd->cgindex;
668 nat_tot = dd->nat_home;
669 for (d = 0; d < dd->ndim; d++)
671 bPBC = (dd->ci[dd->dim[d]] == 0);
672 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
675 copy_rvec(box[dd->dim[d]], shift);
678 for (p = 0; p < cd->np; p++)
685 for (i = 0; i < ind->nsend[nzone]; i++)
687 at0 = cgindex[index[i]];
688 at1 = cgindex[index[i]+1];
689 for (j = at0; j < at1; j++)
691 copy_rvec(x[j], buf[n]);
698 for (i = 0; i < ind->nsend[nzone]; i++)
700 at0 = cgindex[index[i]];
701 at1 = cgindex[index[i]+1];
702 for (j = at0; j < at1; j++)
704 /* We need to shift the coordinates */
705 rvec_add(x[j], shift, buf[n]);
712 for (i = 0; i < ind->nsend[nzone]; i++)
714 at0 = cgindex[index[i]];
715 at1 = cgindex[index[i]+1];
716 for (j = at0; j < at1; j++)
719 buf[n][XX] = x[j][XX] + shift[XX];
721 * This operation requires a special shift force
722 * treatment, which is performed in calc_vir.
724 buf[n][YY] = box[YY][YY] - x[j][YY];
725 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
737 rbuf = comm->vbuf2.v;
739 /* Send and receive the coordinates */
740 dd_sendrecv_rvec(dd, d, dddirBackward,
741 buf, ind->nsend[nzone+1],
742 rbuf, ind->nrecv[nzone+1]);
746 for (zone = 0; zone < nzone; zone++)
748 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
750 copy_rvec(rbuf[j], x[i]);
755 nat_tot += ind->nrecv[nzone+1];
761 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
763 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
764 int *index, *cgindex;
765 gmx_domdec_comm_t *comm;
766 gmx_domdec_comm_dim_t *cd;
767 gmx_domdec_ind_t *ind;
771 gmx_bool bPBC, bScrew;
775 cgindex = dd->cgindex;
780 nzone = comm->zones.n/2;
781 nat_tot = dd->nat_tot;
782 for (d = dd->ndim-1; d >= 0; d--)
784 bPBC = (dd->ci[dd->dim[d]] == 0);
785 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
786 if (fshift == NULL && !bScrew)
790 /* Determine which shift vector we need */
796 for (p = cd->np-1; p >= 0; p--)
799 nat_tot -= ind->nrecv[nzone+1];
806 sbuf = comm->vbuf2.v;
808 for (zone = 0; zone < nzone; zone++)
810 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
812 copy_rvec(f[i], sbuf[j]);
817 /* Communicate the forces */
818 dd_sendrecv_rvec(dd, d, dddirForward,
819 sbuf, ind->nrecv[nzone+1],
820 buf, ind->nsend[nzone+1]);
822 /* Add the received forces */
826 for (i = 0; i < ind->nsend[nzone]; i++)
828 at0 = cgindex[index[i]];
829 at1 = cgindex[index[i]+1];
830 for (j = at0; j < at1; j++)
832 rvec_inc(f[j], buf[n]);
839 for (i = 0; i < ind->nsend[nzone]; i++)
841 at0 = cgindex[index[i]];
842 at1 = cgindex[index[i]+1];
843 for (j = at0; j < at1; j++)
845 rvec_inc(f[j], buf[n]);
846 /* Add this force to the shift force */
847 rvec_inc(fshift[is], buf[n]);
854 for (i = 0; i < ind->nsend[nzone]; i++)
856 at0 = cgindex[index[i]];
857 at1 = cgindex[index[i]+1];
858 for (j = at0; j < at1; j++)
860 /* Rotate the force */
861 f[j][XX] += buf[n][XX];
862 f[j][YY] -= buf[n][YY];
863 f[j][ZZ] -= buf[n][ZZ];
866 /* Add this force to the shift force */
867 rvec_inc(fshift[is], buf[n]);
878 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
880 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
881 int *index, *cgindex;
882 gmx_domdec_comm_t *comm;
883 gmx_domdec_comm_dim_t *cd;
884 gmx_domdec_ind_t *ind;
889 cgindex = dd->cgindex;
891 buf = &comm->vbuf.v[0][0];
894 nat_tot = dd->nat_home;
895 for (d = 0; d < dd->ndim; d++)
898 for (p = 0; p < cd->np; p++)
903 for (i = 0; i < ind->nsend[nzone]; i++)
905 at0 = cgindex[index[i]];
906 at1 = cgindex[index[i]+1];
907 for (j = at0; j < at1; j++)
920 rbuf = &comm->vbuf2.v[0][0];
922 /* Send and receive the coordinates */
923 dd_sendrecv_real(dd, d, dddirBackward,
924 buf, ind->nsend[nzone+1],
925 rbuf, ind->nrecv[nzone+1]);
929 for (zone = 0; zone < nzone; zone++)
931 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
938 nat_tot += ind->nrecv[nzone+1];
944 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
946 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
947 int *index, *cgindex;
948 gmx_domdec_comm_t *comm;
949 gmx_domdec_comm_dim_t *cd;
950 gmx_domdec_ind_t *ind;
955 cgindex = dd->cgindex;
957 buf = &comm->vbuf.v[0][0];
960 nzone = comm->zones.n/2;
961 nat_tot = dd->nat_tot;
962 for (d = dd->ndim-1; d >= 0; d--)
965 for (p = cd->np-1; p >= 0; p--)
968 nat_tot -= ind->nrecv[nzone+1];
975 sbuf = &comm->vbuf2.v[0][0];
977 for (zone = 0; zone < nzone; zone++)
979 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
986 /* Communicate the forces */
987 dd_sendrecv_real(dd, d, dddirForward,
988 sbuf, ind->nrecv[nzone+1],
989 buf, ind->nsend[nzone+1]);
991 /* Add the received forces */
993 for (i = 0; i < ind->nsend[nzone]; i++)
995 at0 = cgindex[index[i]];
996 at1 = cgindex[index[i]+1];
997 for (j = at0; j < at1; j++)
1008 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
1010 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
1012 zone->min0, zone->max1,
1013 zone->mch0, zone->mch0,
1014 zone->p1_0, zone->p1_1);
1018 #define DDZONECOMM_MAXZONE 5
1019 #define DDZONECOMM_BUFSIZE 3
1021 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
1022 int ddimind, int direction,
1023 gmx_ddzone_t *buf_s, int n_s,
1024 gmx_ddzone_t *buf_r, int n_r)
1026 #define ZBS DDZONECOMM_BUFSIZE
1027 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
1028 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
1031 for (i = 0; i < n_s; i++)
1033 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
1034 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
1035 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
1036 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
1037 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
1038 vbuf_s[i*ZBS+1][2] = 0;
1039 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
1040 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
1041 vbuf_s[i*ZBS+2][2] = 0;
1044 dd_sendrecv_rvec(dd, ddimind, direction,
1048 for (i = 0; i < n_r; i++)
1050 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
1051 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
1052 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
1053 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
1054 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
1055 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
1056 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
1062 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
1063 rvec cell_ns_x0, rvec cell_ns_x1)
1065 int d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
1067 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
1068 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
1069 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
1070 rvec extr_s[2], extr_r[2];
1072 real dist_d, c = 0, det;
1073 gmx_domdec_comm_t *comm;
1074 gmx_bool bPBC, bUse;
1078 for (d = 1; d < dd->ndim; d++)
1081 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
1082 zp->min0 = cell_ns_x0[dim];
1083 zp->max1 = cell_ns_x1[dim];
1084 zp->min1 = cell_ns_x1[dim];
1085 zp->mch0 = cell_ns_x0[dim];
1086 zp->mch1 = cell_ns_x1[dim];
1087 zp->p1_0 = cell_ns_x0[dim];
1088 zp->p1_1 = cell_ns_x1[dim];
1091 for (d = dd->ndim-2; d >= 0; d--)
1094 bPBC = (dim < ddbox->npbcdim);
1096 /* Use an rvec to store two reals */
1097 extr_s[d][0] = comm->cell_f0[d+1];
1098 extr_s[d][1] = comm->cell_f1[d+1];
1099 extr_s[d][2] = comm->cell_f1[d+1];
1102 /* Store the extremes in the backward sending buffer,
1103 * so the get updated separately from the forward communication.
1105 for (d1 = d; d1 < dd->ndim-1; d1++)
1107 /* We invert the order to be able to use the same loop for buf_e */
1108 buf_s[pos].min0 = extr_s[d1][1];
1109 buf_s[pos].max1 = extr_s[d1][0];
1110 buf_s[pos].min1 = extr_s[d1][2];
1111 buf_s[pos].mch0 = 0;
1112 buf_s[pos].mch1 = 0;
1113 /* Store the cell corner of the dimension we communicate along */
1114 buf_s[pos].p1_0 = comm->cell_x0[dim];
1115 buf_s[pos].p1_1 = 0;
1119 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1122 if (dd->ndim == 3 && d == 0)
1124 buf_s[pos] = comm->zone_d2[0][1];
1126 buf_s[pos] = comm->zone_d1[0];
1130 /* We only need to communicate the extremes
1131 * in the forward direction
1133 npulse = comm->cd[d].np;
1136 /* Take the minimum to avoid double communication */
1137 npulse_min = min(npulse, dd->nc[dim]-1-npulse);
1141 /* Without PBC we should really not communicate over
1142 * the boundaries, but implementing that complicates
1143 * the communication setup and therefore we simply
1144 * do all communication, but ignore some data.
1146 npulse_min = npulse;
1148 for (p = 0; p < npulse_min; p++)
1150 /* Communicate the extremes forward */
1151 bUse = (bPBC || dd->ci[dim] > 0);
1153 dd_sendrecv_rvec(dd, d, dddirForward,
1154 extr_s+d, dd->ndim-d-1,
1155 extr_r+d, dd->ndim-d-1);
1159 for (d1 = d; d1 < dd->ndim-1; d1++)
1161 extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]);
1162 extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]);
1163 extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]);
1169 for (p = 0; p < npulse; p++)
1171 /* Communicate all the zone information backward */
1172 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1174 dd_sendrecv_ddzone(dd, d, dddirBackward,
1181 for (d1 = d+1; d1 < dd->ndim; d1++)
1183 /* Determine the decrease of maximum required
1184 * communication height along d1 due to the distance along d,
1185 * this avoids a lot of useless atom communication.
1187 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1189 if (ddbox->tric_dir[dim])
1191 /* c is the off-diagonal coupling between the cell planes
1192 * along directions d and d1.
1194 c = ddbox->v[dim][dd->dim[d1]][dim];
1200 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1203 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1207 /* A negative value signals out of range */
1213 /* Accumulate the extremes over all pulses */
1214 for (i = 0; i < buf_size; i++)
1218 buf_e[i] = buf_r[i];
1224 buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0);
1225 buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1);
1226 buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1);
1229 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1237 if (bUse && dh[d1] >= 0)
1239 buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
1240 buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
1243 /* Copy the received buffer to the send buffer,
1244 * to pass the data through with the next pulse.
1246 buf_s[i] = buf_r[i];
1248 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1249 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1251 /* Store the extremes */
1254 for (d1 = d; d1 < dd->ndim-1; d1++)
1256 extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0);
1257 extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1);
1258 extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1);
1262 if (d == 1 || (d == 0 && dd->ndim == 3))
1264 for (i = d; i < 2; i++)
1266 comm->zone_d2[1-d][i] = buf_e[pos];
1272 comm->zone_d1[1] = buf_e[pos];
1282 for (i = 0; i < 2; i++)
1286 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1288 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1289 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1295 for (i = 0; i < 2; i++)
1297 for (j = 0; j < 2; j++)
1301 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1303 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1304 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1308 for (d = 1; d < dd->ndim; d++)
1310 comm->cell_f_max0[d] = extr_s[d-1][0];
1311 comm->cell_f_min1[d] = extr_s[d-1][1];
1314 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1315 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1320 static void dd_collect_cg(gmx_domdec_t *dd,
1321 t_state *state_local)
1323 gmx_domdec_master_t *ma = NULL;
1324 int buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
1327 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1329 /* The master has the correct distribution */
1333 if (state_local->ddp_count == dd->ddp_count)
1335 ncg_home = dd->ncg_home;
1337 nat_home = dd->nat_home;
1339 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1341 cgs_gl = &dd->comm->cgs_gl;
1343 ncg_home = state_local->ncg_gl;
1344 cg = state_local->cg_gl;
1346 for (i = 0; i < ncg_home; i++)
1348 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1353 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1356 buf2[0] = dd->ncg_home;
1357 buf2[1] = dd->nat_home;
1367 /* Collect the charge group and atom counts on the master */
1368 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1373 for (i = 0; i < dd->nnodes; i++)
1375 ma->ncg[i] = ma->ibuf[2*i];
1376 ma->nat[i] = ma->ibuf[2*i+1];
1377 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1380 /* Make byte counts and indices */
1381 for (i = 0; i < dd->nnodes; i++)
1383 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1384 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1388 fprintf(debug, "Initial charge group distribution: ");
1389 for (i = 0; i < dd->nnodes; i++)
1391 fprintf(debug, " %d", ma->ncg[i]);
1393 fprintf(debug, "\n");
1397 /* Collect the charge group indices on the master */
1399 dd->ncg_home*sizeof(int), dd->index_gl,
1400 DDMASTER(dd) ? ma->ibuf : NULL,
1401 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1402 DDMASTER(dd) ? ma->cg : NULL);
1404 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1407 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1410 gmx_domdec_master_t *ma;
1411 int n, i, c, a, nalloc = 0;
1420 MPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1421 dd->rank, dd->mpi_comm_all);
1426 /* Copy the master coordinates to the global array */
1427 cgs_gl = &dd->comm->cgs_gl;
1429 n = DDMASTERRANK(dd);
1431 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1433 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1435 copy_rvec(lv[a++], v[c]);
1439 for (n = 0; n < dd->nnodes; n++)
1443 if (ma->nat[n] > nalloc)
1445 nalloc = over_alloc_dd(ma->nat[n]);
1446 srenew(buf, nalloc);
1449 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1450 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1453 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1455 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1457 copy_rvec(buf[a++], v[c]);
1466 static void get_commbuffer_counts(gmx_domdec_t *dd,
1467 int **counts, int **disps)
1469 gmx_domdec_master_t *ma;
1474 /* Make the rvec count and displacment arrays */
1476 *disps = ma->ibuf + dd->nnodes;
1477 for (n = 0; n < dd->nnodes; n++)
1479 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1480 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1484 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1487 gmx_domdec_master_t *ma;
1488 int *rcounts = NULL, *disps = NULL;
1497 get_commbuffer_counts(dd, &rcounts, &disps);
1502 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv, rcounts, disps, buf);
1506 cgs_gl = &dd->comm->cgs_gl;
1509 for (n = 0; n < dd->nnodes; n++)
1511 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1513 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1515 copy_rvec(buf[a++], v[c]);
1522 void dd_collect_vec(gmx_domdec_t *dd,
1523 t_state *state_local, rvec *lv, rvec *v)
1525 gmx_domdec_master_t *ma;
1526 int n, i, c, a, nalloc = 0;
1529 dd_collect_cg(dd, state_local);
1531 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1533 dd_collect_vec_sendrecv(dd, lv, v);
1537 dd_collect_vec_gatherv(dd, lv, v);
1542 void dd_collect_state(gmx_domdec_t *dd,
1543 t_state *state_local, t_state *state)
1547 nh = state->nhchainlength;
1551 for (i = 0; i < efptNR; i++)
1553 state->lambda[i] = state_local->lambda[i];
1555 state->fep_state = state_local->fep_state;
1556 state->veta = state_local->veta;
1557 state->vol0 = state_local->vol0;
1558 copy_mat(state_local->box, state->box);
1559 copy_mat(state_local->boxv, state->boxv);
1560 copy_mat(state_local->svir_prev, state->svir_prev);
1561 copy_mat(state_local->fvir_prev, state->fvir_prev);
1562 copy_mat(state_local->pres_prev, state->pres_prev);
1564 for (i = 0; i < state_local->ngtc; i++)
1566 for (j = 0; j < nh; j++)
1568 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1569 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1571 state->therm_integral[i] = state_local->therm_integral[i];
1573 for (i = 0; i < state_local->nnhpres; i++)
1575 for (j = 0; j < nh; j++)
1577 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1578 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1582 for (est = 0; est < estNR; est++)
1584 if (EST_DISTR(est) && (state_local->flags & (1<<est)))
1589 dd_collect_vec(dd, state_local, state_local->x, state->x);
1592 dd_collect_vec(dd, state_local, state_local->v, state->v);
1595 dd_collect_vec(dd, state_local, state_local->sd_X, state->sd_X);
1598 dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
1600 case estDISRE_INITF:
1601 case estDISRE_RM3TAV:
1602 case estORIRE_INITF:
1606 gmx_incons("Unknown state entry encountered in dd_collect_state");
1612 static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
1618 fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
1621 state->nalloc = over_alloc_dd(nalloc);
1623 for (est = 0; est < estNR; est++)
1625 if (EST_DISTR(est) && (state->flags & (1<<est)))
1630 srenew(state->x, state->nalloc);
1633 srenew(state->v, state->nalloc);
1636 srenew(state->sd_X, state->nalloc);
1639 srenew(state->cg_p, state->nalloc);
1641 case estDISRE_INITF:
1642 case estDISRE_RM3TAV:
1643 case estORIRE_INITF:
1645 /* No reallocation required */
1648 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1655 srenew(*f, state->nalloc);
1659 static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
1662 if (nalloc > fr->cg_nalloc)
1666 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
1668 fr->cg_nalloc = over_alloc_dd(nalloc);
1669 srenew(fr->cginfo, fr->cg_nalloc);
1670 if (fr->cutoff_scheme == ecutsGROUP)
1672 srenew(fr->cg_cm, fr->cg_nalloc);
1675 if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
1677 /* We don't use charge groups, we use x in state to set up
1678 * the atom communication.
1680 dd_realloc_state(state, f, nalloc);
1684 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1687 gmx_domdec_master_t *ma;
1688 int n, i, c, a, nalloc = 0;
1695 for (n = 0; n < dd->nnodes; n++)
1699 if (ma->nat[n] > nalloc)
1701 nalloc = over_alloc_dd(ma->nat[n]);
1702 srenew(buf, nalloc);
1704 /* Use lv as a temporary buffer */
1706 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1708 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1710 copy_rvec(v[c], buf[a++]);
1713 if (a != ma->nat[n])
1715 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1720 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1721 DDRANK(dd, n), n, dd->mpi_comm_all);
1726 n = DDMASTERRANK(dd);
1728 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1730 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1732 copy_rvec(v[c], lv[a++]);
1739 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1740 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1745 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1748 gmx_domdec_master_t *ma;
1749 int *scounts = NULL, *disps = NULL;
1750 int n, i, c, a, nalloc = 0;
1757 get_commbuffer_counts(dd, &scounts, &disps);
1761 for (n = 0; n < dd->nnodes; n++)
1763 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1765 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1767 copy_rvec(v[c], buf[a++]);
1773 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1776 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
1778 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1780 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1784 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1788 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1791 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1792 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1793 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1795 if (dfhist->nlambda > 0)
1797 int nlam = dfhist->nlambda;
1798 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1799 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1800 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1801 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1802 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1803 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1805 for (i = 0; i < nlam; i++)
1807 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1808 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1809 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1810 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1811 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1812 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1817 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1818 t_state *state, t_state *state_local,
1823 nh = state->nhchainlength;
1827 for (i = 0; i < efptNR; i++)
1829 state_local->lambda[i] = state->lambda[i];
1831 state_local->fep_state = state->fep_state;
1832 state_local->veta = state->veta;
1833 state_local->vol0 = state->vol0;
1834 copy_mat(state->box, state_local->box);
1835 copy_mat(state->box_rel, state_local->box_rel);
1836 copy_mat(state->boxv, state_local->boxv);
1837 copy_mat(state->svir_prev, state_local->svir_prev);
1838 copy_mat(state->fvir_prev, state_local->fvir_prev);
1839 copy_df_history(&state_local->dfhist, &state->dfhist);
1840 for (i = 0; i < state_local->ngtc; i++)
1842 for (j = 0; j < nh; j++)
1844 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1845 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1847 state_local->therm_integral[i] = state->therm_integral[i];
1849 for (i = 0; i < state_local->nnhpres; i++)
1851 for (j = 0; j < nh; j++)
1853 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1854 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1858 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
1859 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1860 dd_bcast(dd, sizeof(real), &state_local->veta);
1861 dd_bcast(dd, sizeof(real), &state_local->vol0);
1862 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1863 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1864 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1865 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1866 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1867 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
1868 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
1869 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
1870 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
1871 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
1873 /* communicate df_history -- required for restarting from checkpoint */
1874 dd_distribute_dfhist(dd, &state_local->dfhist);
1876 if (dd->nat_home > state_local->nalloc)
1878 dd_realloc_state(state_local, f, dd->nat_home);
1880 for (i = 0; i < estNR; i++)
1882 if (EST_DISTR(i) && (state_local->flags & (1<<i)))
1887 dd_distribute_vec(dd, cgs, state->x, state_local->x);
1890 dd_distribute_vec(dd, cgs, state->v, state_local->v);
1893 dd_distribute_vec(dd, cgs, state->sd_X, state_local->sd_X);
1896 dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
1898 case estDISRE_INITF:
1899 case estDISRE_RM3TAV:
1900 case estORIRE_INITF:
1902 /* Not implemented yet */
1905 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1911 static char dim2char(int dim)
1917 case XX: c = 'X'; break;
1918 case YY: c = 'Y'; break;
1919 case ZZ: c = 'Z'; break;
1920 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1926 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1927 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1929 rvec grid_s[2], *grid_r = NULL, cx, r;
1930 char fname[STRLEN], format[STRLEN], buf[22];
1932 int a, i, d, z, y, x;
1936 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1937 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1941 snew(grid_r, 2*dd->nnodes);
1944 dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : NULL);
1948 for (d = 0; d < DIM; d++)
1950 for (i = 0; i < DIM; i++)
1958 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1960 tric[d][i] = box[i][d]/box[i][i];
1969 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1970 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
1971 out = gmx_fio_fopen(fname, "w");
1972 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1974 for (i = 0; i < dd->nnodes; i++)
1976 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1977 for (d = 0; d < DIM; d++)
1979 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1981 for (z = 0; z < 2; z++)
1983 for (y = 0; y < 2; y++)
1985 for (x = 0; x < 2; x++)
1987 cx[XX] = grid_r[i*2+x][XX];
1988 cx[YY] = grid_r[i*2+y][YY];
1989 cx[ZZ] = grid_r[i*2+z][ZZ];
1991 fprintf(out, format, "ATOM", a++, "CA", "GLY", ' ', 1+i,
1992 ' ', 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol);
1996 for (d = 0; d < DIM; d++)
1998 for (x = 0; x < 4; x++)
2002 case 0: y = 1 + i*8 + 2*x; break;
2003 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
2004 case 2: y = 1 + i*8 + x; break;
2006 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
2010 gmx_fio_fclose(out);
2015 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
2016 gmx_mtop_t *mtop, t_commrec *cr,
2017 int natoms, rvec x[], matrix box)
2019 char fname[STRLEN], format[STRLEN], format4[STRLEN], buf[22];
2021 int i, ii, resnr, c;
2022 char *atomname, *resname;
2029 natoms = dd->comm->nat[ddnatVSITE];
2032 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
2034 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
2035 sprintf(format4, "%s%s\n", get_pdbformat4(), "%6.2f%6.2f");
2037 out = gmx_fio_fopen(fname, "w");
2039 fprintf(out, "TITLE %s\n", title);
2040 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2041 for (i = 0; i < natoms; i++)
2043 ii = dd->gatindex[i];
2044 gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
2045 if (i < dd->comm->nat[ddnatZONE])
2048 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
2054 else if (i < dd->comm->nat[ddnatVSITE])
2056 b = dd->comm->zones.n;
2060 b = dd->comm->zones.n + 1;
2062 fprintf(out, strlen(atomname) < 4 ? format : format4,
2063 "ATOM", (ii+1)%100000,
2064 atomname, resname, ' ', resnr%10000, ' ',
2065 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b);
2067 fprintf(out, "TER\n");
2069 gmx_fio_fclose(out);
2072 real dd_cutoff_mbody(gmx_domdec_t *dd)
2074 gmx_domdec_comm_t *comm;
2081 if (comm->bInterCGBondeds)
2083 if (comm->cutoff_mbody > 0)
2085 r = comm->cutoff_mbody;
2089 /* cutoff_mbody=0 means we do not have DLB */
2090 r = comm->cellsize_min[dd->dim[0]];
2091 for (di = 1; di < dd->ndim; di++)
2093 r = min(r, comm->cellsize_min[dd->dim[di]]);
2095 if (comm->bBondComm)
2097 r = max(r, comm->cutoff_mbody);
2101 r = min(r, comm->cutoff);
2109 real dd_cutoff_twobody(gmx_domdec_t *dd)
2113 r_mb = dd_cutoff_mbody(dd);
2115 return max(dd->comm->cutoff, r_mb);
2119 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd, ivec coord, ivec coord_pme)
2123 nc = dd->nc[dd->comm->cartpmedim];
2124 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2125 copy_ivec(coord, coord_pme);
2126 coord_pme[dd->comm->cartpmedim] =
2127 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2130 static int low_ddindex2pmeindex(int ndd, int npme, int ddindex)
2132 /* Here we assign a PME node to communicate with this DD node
2133 * by assuming that the major index of both is x.
2134 * We add cr->npmenodes/2 to obtain an even distribution.
2136 return (ddindex*npme + npme/2)/ndd;
2139 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
2141 return low_ddindex2pmeindex(dd->nnodes, dd->comm->npmenodes, ddindex);
2144 static int cr_ddindex2pmeindex(const t_commrec *cr, int ddindex)
2146 return low_ddindex2pmeindex(cr->dd->nnodes, cr->npmenodes, ddindex);
2149 static int *dd_pmenodes(t_commrec *cr)
2154 snew(pmenodes, cr->npmenodes);
2156 for (i = 0; i < cr->dd->nnodes; i++)
2158 p0 = cr_ddindex2pmeindex(cr, i);
2159 p1 = cr_ddindex2pmeindex(cr, i+1);
2160 if (i+1 == cr->dd->nnodes || p1 > p0)
2164 fprintf(debug, "pmenode[%d] = %d\n", n, i+1+n);
2166 pmenodes[n] = i + 1 + n;
2174 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
2177 ivec coords, coords_pme, nc;
2182 if (dd->comm->bCartesian) {
2183 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2184 dd_coords2pmecoords(dd,coords,coords_pme);
2185 copy_ivec(dd->ntot,nc);
2186 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2187 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2189 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2191 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2197 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
2202 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
2204 gmx_domdec_comm_t *comm;
2206 int ddindex, nodeid = -1;
2208 comm = cr->dd->comm;
2213 if (comm->bCartesianPP_PME)
2216 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
2221 ddindex = dd_index(cr->dd->nc, coords);
2222 if (comm->bCartesianPP)
2224 nodeid = comm->ddindex2simnodeid[ddindex];
2230 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
2242 static int dd_simnode2pmenode(t_commrec *cr, int sim_nodeid)
2245 gmx_domdec_comm_t *comm;
2246 ivec coord, coord_pme;
2253 /* This assumes a uniform x domain decomposition grid cell size */
2254 if (comm->bCartesianPP_PME)
2257 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
2258 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2260 /* This is a PP node */
2261 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2262 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
2266 else if (comm->bCartesianPP)
2268 if (sim_nodeid < dd->nnodes)
2270 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2275 /* This assumes DD cells with identical x coordinates
2276 * are numbered sequentially.
2278 if (dd->comm->pmenodes == NULL)
2280 if (sim_nodeid < dd->nnodes)
2282 /* The DD index equals the nodeid */
2283 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2289 while (sim_nodeid > dd->comm->pmenodes[i])
2293 if (sim_nodeid < dd->comm->pmenodes[i])
2295 pmenode = dd->comm->pmenodes[i];
2303 void get_pme_nnodes(const gmx_domdec_t *dd,
2304 int *npmenodes_x, int *npmenodes_y)
2308 *npmenodes_x = dd->comm->npmenodes_x;
2309 *npmenodes_y = dd->comm->npmenodes_y;
2318 gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
2320 gmx_bool bPMEOnlyNode;
2322 if (DOMAINDECOMP(cr))
2324 bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
2328 bPMEOnlyNode = FALSE;
2331 return bPMEOnlyNode;
2334 void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
2335 int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
2339 ivec coord, coord_pme;
2343 snew(*my_ddnodes, (dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2346 for (x = 0; x < dd->nc[XX]; x++)
2348 for (y = 0; y < dd->nc[YY]; y++)
2350 for (z = 0; z < dd->nc[ZZ]; z++)
2352 if (dd->comm->bCartesianPP_PME)
2357 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2358 if (dd->ci[XX] == coord_pme[XX] &&
2359 dd->ci[YY] == coord_pme[YY] &&
2360 dd->ci[ZZ] == coord_pme[ZZ])
2362 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2367 /* The slab corresponds to the nodeid in the PME group */
2368 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2370 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2377 /* The last PP-only node is the peer node */
2378 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2382 fprintf(debug, "Receive coordinates from PP nodes:");
2383 for (x = 0; x < *nmy_ddnodes; x++)
2385 fprintf(debug, " %d", (*my_ddnodes)[x]);
2387 fprintf(debug, "\n");
2391 static gmx_bool receive_vir_ener(t_commrec *cr)
2393 gmx_domdec_comm_t *comm;
2394 int pmenode, coords[DIM], rank;
2398 if (cr->npmenodes < cr->dd->nnodes)
2400 comm = cr->dd->comm;
2401 if (comm->bCartesianPP_PME)
2403 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2405 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2406 coords[comm->cartpmedim]++;
2407 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2409 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2410 if (dd_simnode2pmenode(cr, rank) == pmenode)
2412 /* This is not the last PP node for pmenode */
2420 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2421 if (cr->sim_nodeid+1 < cr->nnodes &&
2422 dd_simnode2pmenode(cr, cr->sim_nodeid+1) == pmenode)
2424 /* This is not the last PP node for pmenode */
2433 static void set_zones_ncg_home(gmx_domdec_t *dd)
2435 gmx_domdec_zones_t *zones;
2438 zones = &dd->comm->zones;
2440 zones->cg_range[0] = 0;
2441 for (i = 1; i < zones->n+1; i++)
2443 zones->cg_range[i] = dd->ncg_home;
2445 /* zone_ncg1[0] should always be equal to ncg_home */
2446 dd->comm->zone_ncg1[0] = dd->ncg_home;
2449 static void rebuild_cgindex(gmx_domdec_t *dd,
2450 const int *gcgs_index, t_state *state)
2452 int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
2455 dd_cg_gl = dd->index_gl;
2456 cgindex = dd->cgindex;
2459 for (i = 0; i < state->ncg_gl; i++)
2463 dd_cg_gl[i] = cg_gl;
2464 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2468 dd->ncg_home = state->ncg_gl;
2471 set_zones_ncg_home(dd);
2474 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2476 while (cg >= cginfo_mb->cg_end)
2481 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2484 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2485 t_forcerec *fr, char *bLocalCG)
2487 cginfo_mb_t *cginfo_mb;
2493 cginfo_mb = fr->cginfo_mb;
2494 cginfo = fr->cginfo;
2496 for (cg = cg0; cg < cg1; cg++)
2498 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2502 if (bLocalCG != NULL)
2504 for (cg = cg0; cg < cg1; cg++)
2506 bLocalCG[index_gl[cg]] = TRUE;
2511 static void make_dd_indices(gmx_domdec_t *dd,
2512 const int *gcgs_index, int cg_start)
2514 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2515 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2520 bLocalCG = dd->comm->bLocalCG;
2522 if (dd->nat_tot > dd->gatindex_nalloc)
2524 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2525 srenew(dd->gatindex, dd->gatindex_nalloc);
2528 nzone = dd->comm->zones.n;
2529 zone2cg = dd->comm->zones.cg_range;
2530 zone_ncg1 = dd->comm->zone_ncg1;
2531 index_gl = dd->index_gl;
2532 gatindex = dd->gatindex;
2533 bCGs = dd->comm->bCGs;
2535 if (zone2cg[1] != dd->ncg_home)
2537 gmx_incons("dd->ncg_zone is not up to date");
2540 /* Make the local to global and global to local atom index */
2541 a = dd->cgindex[cg_start];
2542 for (zone = 0; zone < nzone; zone++)
2550 cg0 = zone2cg[zone];
2552 cg1 = zone2cg[zone+1];
2553 cg1_p1 = cg0 + zone_ncg1[zone];
2555 for (cg = cg0; cg < cg1; cg++)
2560 /* Signal that this cg is from more than one pulse away */
2563 cg_gl = index_gl[cg];
2566 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2569 ga2la_set(dd->ga2la, a_gl, a, zone1);
2575 gatindex[a] = cg_gl;
2576 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2583 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2586 int ncg, i, ngl, nerr;
2589 if (bLocalCG == NULL)
2593 for (i = 0; i < dd->ncg_tot; i++)
2595 if (!bLocalCG[dd->index_gl[i]])
2598 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2603 for (i = 0; i < ncg_sys; i++)
2610 if (ngl != dd->ncg_tot)
2612 fprintf(stderr, "DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2619 static void check_index_consistency(gmx_domdec_t *dd,
2620 int natoms_sys, int ncg_sys,
2623 int nerr, ngl, i, a, cell;
2628 if (dd->comm->DD_debug > 1)
2630 snew(have, natoms_sys);
2631 for (a = 0; a < dd->nat_tot; a++)
2633 if (have[dd->gatindex[a]] > 0)
2635 fprintf(stderr, "DD node %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2639 have[dd->gatindex[a]] = a + 1;
2645 snew(have, dd->nat_tot);
2648 for (i = 0; i < natoms_sys; i++)
2650 if (ga2la_get(dd->ga2la, i, &a, &cell))
2652 if (a >= dd->nat_tot)
2654 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2660 if (dd->gatindex[a] != i)
2662 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2669 if (ngl != dd->nat_tot)
2672 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2673 dd->rank, where, ngl, dd->nat_tot);
2675 for (a = 0; a < dd->nat_tot; a++)
2680 "DD node %d, %s: local atom %d, global %d has no global index\n",
2681 dd->rank, where, a+1, dd->gatindex[a]+1);
2686 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2690 gmx_fatal(FARGS, "DD node %d, %s: %d atom/cg index inconsistencies",
2691 dd->rank, where, nerr);
2695 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2702 /* Clear the whole list without searching */
2703 ga2la_clear(dd->ga2la);
2707 for (i = a_start; i < dd->nat_tot; i++)
2709 ga2la_del(dd->ga2la, dd->gatindex[i]);
2713 bLocalCG = dd->comm->bLocalCG;
2716 for (i = cg_start; i < dd->ncg_tot; i++)
2718 bLocalCG[dd->index_gl[i]] = FALSE;
2722 dd_clear_local_vsite_indices(dd);
2724 if (dd->constraints)
2726 dd_clear_local_constraint_indices(dd);
2730 /* This function should be used for moving the domain boudaries during DLB,
2731 * for obtaining the minimum cell size. It checks the initially set limit
2732 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2733 * and, possibly, a longer cut-off limit set for PME load balancing.
2735 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2739 cellsize_min = comm->cellsize_min[dim];
2741 if (!comm->bVacDLBNoLimit)
2743 /* The cut-off might have changed, e.g. by PME load balacning,
2744 * from the value used to set comm->cellsize_min, so check it.
2746 cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2748 if (comm->bPMELoadBalDLBLimits)
2750 /* Check for the cut-off limit set by the PME load balancing */
2751 cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2755 return cellsize_min;
2758 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2761 real grid_jump_limit;
2763 /* The distance between the boundaries of cells at distance
2764 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2765 * and by the fact that cells should not be shifted by more than
2766 * half their size, such that cg's only shift by one cell
2767 * at redecomposition.
2769 grid_jump_limit = comm->cellsize_limit;
2770 if (!comm->bVacDLBNoLimit)
2772 if (comm->bPMELoadBalDLBLimits)
2774 cutoff = max(cutoff, comm->PMELoadBal_max_cutoff);
2776 grid_jump_limit = max(grid_jump_limit,
2777 cutoff/comm->cd[dim_ind].np);
2780 return grid_jump_limit;
2783 static gmx_bool check_grid_jump(gmx_int64_t step,
2789 gmx_domdec_comm_t *comm;
2798 for (d = 1; d < dd->ndim; d++)
2801 limit = grid_jump_limit(comm, cutoff, d);
2802 bfac = ddbox->box_size[dim];
2803 if (ddbox->tric_dir[dim])
2805 bfac *= ddbox->skew_fac[dim];
2807 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2808 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2816 /* This error should never be triggered under normal
2817 * circumstances, but you never know ...
2819 gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with less nodes might avoid this issue.",
2820 gmx_step_str(step, buf),
2821 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2829 static int dd_load_count(gmx_domdec_comm_t *comm)
2831 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2834 static float dd_force_load(gmx_domdec_comm_t *comm)
2841 if (comm->eFlop > 1)
2843 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2848 load = comm->cycl[ddCyclF];
2849 if (comm->cycl_n[ddCyclF] > 1)
2851 /* Subtract the maximum of the last n cycle counts
2852 * to get rid of possible high counts due to other sources,
2853 * for instance system activity, that would otherwise
2854 * affect the dynamic load balancing.
2856 load -= comm->cycl_max[ddCyclF];
2860 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2862 float gpu_wait, gpu_wait_sum;
2864 gpu_wait = comm->cycl[ddCyclWaitGPU];
2865 if (comm->cycl_n[ddCyclF] > 1)
2867 /* We should remove the WaitGPU time of the same MD step
2868 * as the one with the maximum F time, since the F time
2869 * and the wait time are not independent.
2870 * Furthermore, the step for the max F time should be chosen
2871 * the same on all ranks that share the same GPU.
2872 * But to keep the code simple, we remove the average instead.
2873 * The main reason for artificially long times at some steps
2874 * is spurious CPU activity or MPI time, so we don't expect
2875 * that changes in the GPU wait time matter a lot here.
2877 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2879 /* Sum the wait times over the ranks that share the same GPU */
2880 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2881 comm->mpi_comm_gpu_shared);
2882 /* Replace the wait time by the average over the ranks */
2883 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2891 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2893 gmx_domdec_comm_t *comm;
2898 snew(*dim_f, dd->nc[dim]+1);
2900 for (i = 1; i < dd->nc[dim]; i++)
2902 if (comm->slb_frac[dim])
2904 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2908 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2911 (*dim_f)[dd->nc[dim]] = 1;
2914 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2916 int pmeindex, slab, nso, i;
2919 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2925 ddpme->dim = dimind;
2927 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2929 ddpme->nslab = (ddpme->dim == 0 ?
2930 dd->comm->npmenodes_x :
2931 dd->comm->npmenodes_y);
2933 if (ddpme->nslab <= 1)
2938 nso = dd->comm->npmenodes/ddpme->nslab;
2939 /* Determine for each PME slab the PP location range for dimension dim */
2940 snew(ddpme->pp_min, ddpme->nslab);
2941 snew(ddpme->pp_max, ddpme->nslab);
2942 for (slab = 0; slab < ddpme->nslab; slab++)
2944 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2945 ddpme->pp_max[slab] = 0;
2947 for (i = 0; i < dd->nnodes; i++)
2949 ddindex2xyz(dd->nc, i, xyz);
2950 /* For y only use our y/z slab.
2951 * This assumes that the PME x grid size matches the DD grid size.
2953 if (dimind == 0 || xyz[XX] == dd->ci[XX])
2955 pmeindex = ddindex2pmeindex(dd, i);
2958 slab = pmeindex/nso;
2962 slab = pmeindex % ddpme->nslab;
2964 ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]);
2965 ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]);
2969 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
2972 int dd_pme_maxshift_x(gmx_domdec_t *dd)
2974 if (dd->comm->ddpme[0].dim == XX)
2976 return dd->comm->ddpme[0].maxshift;
2984 int dd_pme_maxshift_y(gmx_domdec_t *dd)
2986 if (dd->comm->ddpme[0].dim == YY)
2988 return dd->comm->ddpme[0].maxshift;
2990 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
2992 return dd->comm->ddpme[1].maxshift;
3000 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
3001 gmx_bool bUniform, gmx_ddbox_t *ddbox, real *cell_f)
3003 gmx_domdec_comm_t *comm;
3006 real range, pme_boundary;
3010 nc = dd->nc[ddpme->dim];
3013 if (!ddpme->dim_match)
3015 /* PP decomposition is not along dim: the worst situation */
3018 else if (ns <= 3 || (bUniform && ns == nc))
3020 /* The optimal situation */
3025 /* We need to check for all pme nodes which nodes they
3026 * could possibly need to communicate with.
3028 xmin = ddpme->pp_min;
3029 xmax = ddpme->pp_max;
3030 /* Allow for atoms to be maximally 2/3 times the cut-off
3031 * out of their DD cell. This is a reasonable balance between
3032 * between performance and support for most charge-group/cut-off
3035 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
3036 /* Avoid extra communication when we are exactly at a boundary */
3040 for (s = 0; s < ns; s++)
3042 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
3043 pme_boundary = (real)s/ns;
3046 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
3048 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
3052 pme_boundary = (real)(s+1)/ns;
3055 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
3057 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
3064 ddpme->maxshift = sh;
3068 fprintf(debug, "PME slab communication range for dim %d is %d\n",
3069 ddpme->dim, ddpme->maxshift);
3073 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3077 for (d = 0; d < dd->ndim; d++)
3080 if (dim < ddbox->nboundeddim &&
3081 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
3082 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
3084 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
3085 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3086 dd->nc[dim], dd->comm->cellsize_limit);
3092 setcellsizeslbLOCAL, setcellsizeslbMASTER, setcellsizeslbPULSE_ONLY
3095 /* Set the domain boundaries. Use for static (or no) load balancing,
3096 * and also for the starting state for dynamic load balancing.
3097 * setmode determine if and where the boundaries are stored, use enum above.
3098 * Returns the number communication pulses in npulse.
3100 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
3101 int setmode, ivec npulse)
3103 gmx_domdec_comm_t *comm;
3106 real *cell_x, cell_dx, cellsize;
3110 for (d = 0; d < DIM; d++)
3112 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
3114 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
3117 cell_dx = ddbox->box_size[d]/dd->nc[d];
3120 case setcellsizeslbMASTER:
3121 for (j = 0; j < dd->nc[d]+1; j++)
3123 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
3126 case setcellsizeslbLOCAL:
3127 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
3128 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
3133 cellsize = cell_dx*ddbox->skew_fac[d];
3134 while (cellsize*npulse[d] < comm->cutoff)
3138 cellsize_min[d] = cellsize;
3142 /* Statically load balanced grid */
3143 /* Also when we are not doing a master distribution we determine
3144 * all cell borders in a loop to obtain identical values
3145 * to the master distribution case and to determine npulse.
3147 if (setmode == setcellsizeslbMASTER)
3149 cell_x = dd->ma->cell_x[d];
3153 snew(cell_x, dd->nc[d]+1);
3155 cell_x[0] = ddbox->box0[d];
3156 for (j = 0; j < dd->nc[d]; j++)
3158 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
3159 cell_x[j+1] = cell_x[j] + cell_dx;
3160 cellsize = cell_dx*ddbox->skew_fac[d];
3161 while (cellsize*npulse[d] < comm->cutoff &&
3162 npulse[d] < dd->nc[d]-1)
3166 cellsize_min[d] = min(cellsize_min[d], cellsize);
3168 if (setmode == setcellsizeslbLOCAL)
3170 comm->cell_x0[d] = cell_x[dd->ci[d]];
3171 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
3173 if (setmode != setcellsizeslbMASTER)
3178 /* The following limitation is to avoid that a cell would receive
3179 * some of its own home charge groups back over the periodic boundary.
3180 * Double charge groups cause trouble with the global indices.
3182 if (d < ddbox->npbcdim &&
3183 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
3185 char error_string[STRLEN];
3187 sprintf(error_string,
3188 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3189 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
3191 dd->nc[d], dd->nc[d],
3192 dd->nnodes > dd->nc[d] ? "cells" : "processors");
3194 if (setmode == setcellsizeslbLOCAL)
3196 gmx_fatal_collective(FARGS, NULL, dd, error_string);
3200 gmx_fatal(FARGS, error_string);
3205 if (!comm->bDynLoadBal)
3207 copy_rvec(cellsize_min, comm->cellsize_min);
3210 for (d = 0; d < comm->npmedecompdim; d++)
3212 set_pme_maxshift(dd, &comm->ddpme[d],
3213 comm->slb_frac[dd->dim[d]] == NULL, ddbox,
3214 comm->ddpme[d].slb_dim_f);
3219 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
3220 int d, int dim, gmx_domdec_root_t *root,
3222 gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
3224 gmx_domdec_comm_t *comm;
3225 int ncd, i, j, nmin, nmin_old;
3226 gmx_bool bLimLo, bLimHi;
3228 real fac, halfway, cellsize_limit_f_i, region_size;
3229 gmx_bool bPBC, bLastHi = FALSE;
3230 int nrange[] = {range[0], range[1]};
3232 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
3238 bPBC = (dim < ddbox->npbcdim);
3240 cell_size = root->buf_ncd;
3244 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
3247 /* First we need to check if the scaling does not make cells
3248 * smaller than the smallest allowed size.
3249 * We need to do this iteratively, since if a cell is too small,
3250 * it needs to be enlarged, which makes all the other cells smaller,
3251 * which could in turn make another cell smaller than allowed.
3253 for (i = range[0]; i < range[1]; i++)
3255 root->bCellMin[i] = FALSE;
3261 /* We need the total for normalization */
3263 for (i = range[0]; i < range[1]; i++)
3265 if (root->bCellMin[i] == FALSE)
3267 fac += cell_size[i];
3270 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3271 /* Determine the cell boundaries */
3272 for (i = range[0]; i < range[1]; i++)
3274 if (root->bCellMin[i] == FALSE)
3276 cell_size[i] *= fac;
3277 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3279 cellsize_limit_f_i = 0;
3283 cellsize_limit_f_i = cellsize_limit_f;
3285 if (cell_size[i] < cellsize_limit_f_i)
3287 root->bCellMin[i] = TRUE;
3288 cell_size[i] = cellsize_limit_f_i;
3292 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3295 while (nmin > nmin_old);
3298 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3299 /* For this check we should not use DD_CELL_MARGIN,
3300 * but a slightly smaller factor,
3301 * since rounding could get use below the limit.
3303 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3306 gmx_fatal(FARGS, "Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3307 gmx_step_str(step, buf),
3308 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3309 ncd, comm->cellsize_min[dim]);
3312 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
3316 /* Check if the boundary did not displace more than halfway
3317 * each of the cells it bounds, as this could cause problems,
3318 * especially when the differences between cell sizes are large.
3319 * If changes are applied, they will not make cells smaller
3320 * than the cut-off, as we check all the boundaries which
3321 * might be affected by a change and if the old state was ok,
3322 * the cells will at most be shrunk back to their old size.
3324 for (i = range[0]+1; i < range[1]; i++)
3326 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3327 if (root->cell_f[i] < halfway)
3329 root->cell_f[i] = halfway;
3330 /* Check if the change also causes shifts of the next boundaries */
3331 for (j = i+1; j < range[1]; j++)
3333 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3335 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3339 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3340 if (root->cell_f[i] > halfway)
3342 root->cell_f[i] = halfway;
3343 /* Check if the change also causes shifts of the next boundaries */
3344 for (j = i-1; j >= range[0]+1; j--)
3346 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3348 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3355 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3356 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3357 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3358 * for a and b nrange is used */
3361 /* Take care of the staggering of the cell boundaries */
3364 for (i = range[0]; i < range[1]; i++)
3366 root->cell_f_max0[i] = root->cell_f[i];
3367 root->cell_f_min1[i] = root->cell_f[i+1];
3372 for (i = range[0]+1; i < range[1]; i++)
3374 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3375 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3376 if (bLimLo && bLimHi)
3378 /* Both limits violated, try the best we can */
3379 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3380 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3381 nrange[0] = range[0];
3383 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3386 nrange[1] = range[1];
3387 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3393 /* root->cell_f[i] = root->bound_min[i]; */
3394 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3397 else if (bLimHi && !bLastHi)
3400 if (nrange[1] < range[1]) /* found a LimLo before */
3402 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3403 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3404 nrange[0] = nrange[1];
3406 root->cell_f[i] = root->bound_max[i];
3408 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3410 nrange[1] = range[1];
3413 if (nrange[1] < range[1]) /* found last a LimLo */
3415 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3416 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3417 nrange[0] = nrange[1];
3418 nrange[1] = range[1];
3419 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3421 else if (nrange[0] > range[0]) /* found at least one LimHi */
3423 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3430 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3431 int d, int dim, gmx_domdec_root_t *root,
3432 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3433 gmx_bool bUniform, gmx_int64_t step)
3435 gmx_domdec_comm_t *comm;
3436 int ncd, d1, i, j, pos;
3438 real load_aver, load_i, imbalance, change, change_max, sc;
3439 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3443 int range[] = { 0, 0 };
3447 /* Convert the maximum change from the input percentage to a fraction */
3448 change_limit = comm->dlb_scale_lim*0.01;
3452 bPBC = (dim < ddbox->npbcdim);
3454 cell_size = root->buf_ncd;
3456 /* Store the original boundaries */
3457 for (i = 0; i < ncd+1; i++)
3459 root->old_cell_f[i] = root->cell_f[i];
3463 for (i = 0; i < ncd; i++)
3465 cell_size[i] = 1.0/ncd;
3468 else if (dd_load_count(comm))
3470 load_aver = comm->load[d].sum_m/ncd;
3472 for (i = 0; i < ncd; i++)
3474 /* Determine the relative imbalance of cell i */
3475 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3476 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3477 /* Determine the change of the cell size using underrelaxation */
3478 change = -relax*imbalance;
3479 change_max = max(change_max, max(change, -change));
3481 /* Limit the amount of scaling.
3482 * We need to use the same rescaling for all cells in one row,
3483 * otherwise the load balancing might not converge.
3486 if (change_max > change_limit)
3488 sc *= change_limit/change_max;
3490 for (i = 0; i < ncd; i++)
3492 /* Determine the relative imbalance of cell i */
3493 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3494 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3495 /* Determine the change of the cell size using underrelaxation */
3496 change = -sc*imbalance;
3497 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3501 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3502 cellsize_limit_f *= DD_CELL_MARGIN;
3503 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3504 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3505 if (ddbox->tric_dir[dim])
3507 cellsize_limit_f /= ddbox->skew_fac[dim];
3508 dist_min_f /= ddbox->skew_fac[dim];
3510 if (bDynamicBox && d > 0)
3512 dist_min_f *= DD_PRES_SCALE_MARGIN;
3514 if (d > 0 && !bUniform)
3516 /* Make sure that the grid is not shifted too much */
3517 for (i = 1; i < ncd; i++)
3519 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3521 gmx_incons("Inconsistent DD boundary staggering limits!");
3523 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3524 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3527 root->bound_min[i] += 0.5*space;
3529 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3530 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3533 root->bound_max[i] += 0.5*space;
3538 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3540 root->cell_f_max0[i-1] + dist_min_f,
3541 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3542 root->cell_f_min1[i] - dist_min_f);
3547 root->cell_f[0] = 0;
3548 root->cell_f[ncd] = 1;
3549 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3552 /* After the checks above, the cells should obey the cut-off
3553 * restrictions, but it does not hurt to check.
3555 for (i = 0; i < ncd; i++)
3559 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3560 dim, i, root->cell_f[i], root->cell_f[i+1]);
3563 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3564 root->cell_f[i+1] - root->cell_f[i] <
3565 cellsize_limit_f/DD_CELL_MARGIN)
3569 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3570 gmx_step_str(step, buf), dim2char(dim), i,
3571 (root->cell_f[i+1] - root->cell_f[i])
3572 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3577 /* Store the cell boundaries of the lower dimensions at the end */
3578 for (d1 = 0; d1 < d; d1++)
3580 root->cell_f[pos++] = comm->cell_f0[d1];
3581 root->cell_f[pos++] = comm->cell_f1[d1];
3584 if (d < comm->npmedecompdim)
3586 /* The master determines the maximum shift for
3587 * the coordinate communication between separate PME nodes.
3589 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3591 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3594 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3598 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3599 gmx_ddbox_t *ddbox, int dimind)
3601 gmx_domdec_comm_t *comm;
3606 /* Set the cell dimensions */
3607 dim = dd->dim[dimind];
3608 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3609 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3610 if (dim >= ddbox->nboundeddim)
3612 comm->cell_x0[dim] += ddbox->box0[dim];
3613 comm->cell_x1[dim] += ddbox->box0[dim];
3617 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3618 int d, int dim, real *cell_f_row,
3621 gmx_domdec_comm_t *comm;
3627 /* Each node would only need to know two fractions,
3628 * but it is probably cheaper to broadcast the whole array.
3630 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3631 0, comm->mpi_comm_load[d]);
3633 /* Copy the fractions for this dimension from the buffer */
3634 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3635 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3636 /* The whole array was communicated, so set the buffer position */
3637 pos = dd->nc[dim] + 1;
3638 for (d1 = 0; d1 <= d; d1++)
3642 /* Copy the cell fractions of the lower dimensions */
3643 comm->cell_f0[d1] = cell_f_row[pos++];
3644 comm->cell_f1[d1] = cell_f_row[pos++];
3646 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3648 /* Convert the communicated shift from float to int */
3649 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3652 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3656 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3657 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3658 gmx_bool bUniform, gmx_int64_t step)
3660 gmx_domdec_comm_t *comm;
3662 gmx_bool bRowMember, bRowRoot;
3667 for (d = 0; d < dd->ndim; d++)
3672 for (d1 = d; d1 < dd->ndim; d1++)
3674 if (dd->ci[dd->dim[d1]] > 0)
3687 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3688 ddbox, bDynamicBox, bUniform, step);
3689 cell_f_row = comm->root[d]->cell_f;
3693 cell_f_row = comm->cell_f_row;
3695 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3700 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3704 /* This function assumes the box is static and should therefore
3705 * not be called when the box has changed since the last
3706 * call to dd_partition_system.
3708 for (d = 0; d < dd->ndim; d++)
3710 relative_to_absolute_cell_bounds(dd, ddbox, d);
3716 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3717 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3718 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3719 gmx_wallcycle_t wcycle)
3721 gmx_domdec_comm_t *comm;
3728 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3729 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3730 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3732 else if (bDynamicBox)
3734 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3737 /* Set the dimensions for which no DD is used */
3738 for (dim = 0; dim < DIM; dim++)
3740 if (dd->nc[dim] == 1)
3742 comm->cell_x0[dim] = 0;
3743 comm->cell_x1[dim] = ddbox->box_size[dim];
3744 if (dim >= ddbox->nboundeddim)
3746 comm->cell_x0[dim] += ddbox->box0[dim];
3747 comm->cell_x1[dim] += ddbox->box0[dim];
3753 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3756 gmx_domdec_comm_dim_t *cd;
3758 for (d = 0; d < dd->ndim; d++)
3760 cd = &dd->comm->cd[d];
3761 np = npulse[dd->dim[d]];
3762 if (np > cd->np_nalloc)
3766 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3767 dim2char(dd->dim[d]), np);
3769 if (DDMASTER(dd) && cd->np_nalloc > 0)
3771 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3773 srenew(cd->ind, np);
3774 for (i = cd->np_nalloc; i < np; i++)
3776 cd->ind[i].index = NULL;
3777 cd->ind[i].nalloc = 0;
3786 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3787 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3788 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3789 gmx_wallcycle_t wcycle)
3791 gmx_domdec_comm_t *comm;
3797 /* Copy the old cell boundaries for the cg displacement check */
3798 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3799 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3801 if (comm->bDynLoadBal)
3805 check_box_size(dd, ddbox);
3807 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3811 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbLOCAL, npulse);
3812 realloc_comm_ind(dd, npulse);
3817 for (d = 0; d < DIM; d++)
3819 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3820 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3825 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3827 rvec cell_ns_x0, rvec cell_ns_x1,
3830 gmx_domdec_comm_t *comm;
3835 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3837 dim = dd->dim[dim_ind];
3839 /* Without PBC we don't have restrictions on the outer cells */
3840 if (!(dim >= ddbox->npbcdim &&
3841 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3842 comm->bDynLoadBal &&
3843 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3844 comm->cellsize_min[dim])
3847 gmx_fatal(FARGS, "Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3848 gmx_step_str(step, buf), dim2char(dim),
3849 comm->cell_x1[dim] - comm->cell_x0[dim],
3850 ddbox->skew_fac[dim],
3851 dd->comm->cellsize_min[dim],
3852 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3856 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3858 /* Communicate the boundaries and update cell_ns_x0/1 */
3859 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3860 if (dd->bGridJump && dd->ndim > 1)
3862 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3867 static void make_tric_corr_matrix(int npbcdim, matrix box, matrix tcm)
3871 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3879 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3880 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3889 static void check_screw_box(matrix box)
3891 /* Mathematical limitation */
3892 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3894 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3897 /* Limitation due to the asymmetry of the eighth shell method */
3898 if (box[ZZ][YY] != 0)
3900 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3904 static void distribute_cg(FILE *fplog, gmx_int64_t step,
3905 matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3908 gmx_domdec_master_t *ma;
3909 int **tmp_ind = NULL, *tmp_nalloc = NULL;
3910 int i, icg, j, k, k0, k1, d, npbcdim;
3912 rvec box_size, cg_cm;
3914 real nrcg, inv_ncg, pos_d;
3916 gmx_bool bUnbounded, bScrew;
3920 if (tmp_ind == NULL)
3922 snew(tmp_nalloc, dd->nnodes);
3923 snew(tmp_ind, dd->nnodes);
3924 for (i = 0; i < dd->nnodes; i++)
3926 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3927 snew(tmp_ind[i], tmp_nalloc[i]);
3931 /* Clear the count */
3932 for (i = 0; i < dd->nnodes; i++)
3938 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3940 cgindex = cgs->index;
3942 /* Compute the center of geometry for all charge groups */
3943 for (icg = 0; icg < cgs->nr; icg++)
3946 k1 = cgindex[icg+1];
3950 copy_rvec(pos[k0], cg_cm);
3957 for (k = k0; (k < k1); k++)
3959 rvec_inc(cg_cm, pos[k]);
3961 for (d = 0; (d < DIM); d++)
3963 cg_cm[d] *= inv_ncg;
3966 /* Put the charge group in the box and determine the cell index */
3967 for (d = DIM-1; d >= 0; d--)
3970 if (d < dd->npbcdim)
3972 bScrew = (dd->bScrewPBC && d == XX);
3973 if (tric_dir[d] && dd->nc[d] > 1)
3975 /* Use triclinic coordintates for this dimension */
3976 for (j = d+1; j < DIM; j++)
3978 pos_d += cg_cm[j]*tcm[j][d];
3981 while (pos_d >= box[d][d])
3984 rvec_dec(cg_cm, box[d]);
3987 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3988 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3990 for (k = k0; (k < k1); k++)
3992 rvec_dec(pos[k], box[d]);
3995 pos[k][YY] = box[YY][YY] - pos[k][YY];
3996 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4003 rvec_inc(cg_cm, box[d]);
4006 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
4007 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
4009 for (k = k0; (k < k1); k++)
4011 rvec_inc(pos[k], box[d]);
4014 pos[k][YY] = box[YY][YY] - pos[k][YY];
4015 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4020 /* This could be done more efficiently */
4022 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
4027 i = dd_index(dd->nc, ind);
4028 if (ma->ncg[i] == tmp_nalloc[i])
4030 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
4031 srenew(tmp_ind[i], tmp_nalloc[i]);
4033 tmp_ind[i][ma->ncg[i]] = icg;
4035 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
4039 for (i = 0; i < dd->nnodes; i++)
4042 for (k = 0; k < ma->ncg[i]; k++)
4044 ma->cg[k1++] = tmp_ind[i][k];
4047 ma->index[dd->nnodes] = k1;
4049 for (i = 0; i < dd->nnodes; i++)
4059 fprintf(fplog, "Charge group distribution at step %s:",
4060 gmx_step_str(step, buf));
4061 for (i = 0; i < dd->nnodes; i++)
4063 fprintf(fplog, " %d", ma->ncg[i]);
4065 fprintf(fplog, "\n");
4069 static void get_cg_distribution(FILE *fplog, gmx_int64_t step, gmx_domdec_t *dd,
4070 t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
4073 gmx_domdec_master_t *ma = NULL;
4076 int *ibuf, buf2[2] = { 0, 0 };
4077 gmx_bool bMaster = DDMASTER(dd);
4085 check_screw_box(box);
4088 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbMASTER, npulse);
4090 distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
4091 for (i = 0; i < dd->nnodes; i++)
4093 ma->ibuf[2*i] = ma->ncg[i];
4094 ma->ibuf[2*i+1] = ma->nat[i];
4102 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
4104 dd->ncg_home = buf2[0];
4105 dd->nat_home = buf2[1];
4106 dd->ncg_tot = dd->ncg_home;
4107 dd->nat_tot = dd->nat_home;
4108 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
4110 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
4111 srenew(dd->index_gl, dd->cg_nalloc);
4112 srenew(dd->cgindex, dd->cg_nalloc+1);
4116 for (i = 0; i < dd->nnodes; i++)
4118 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
4119 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
4124 DDMASTER(dd) ? ma->ibuf : NULL,
4125 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
4126 DDMASTER(dd) ? ma->cg : NULL,
4127 dd->ncg_home*sizeof(int), dd->index_gl);
4129 /* Determine the home charge group sizes */
4131 for (i = 0; i < dd->ncg_home; i++)
4133 cg_gl = dd->index_gl[i];
4135 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
4140 fprintf(debug, "Home charge groups:\n");
4141 for (i = 0; i < dd->ncg_home; i++)
4143 fprintf(debug, " %d", dd->index_gl[i]);
4146 fprintf(debug, "\n");
4149 fprintf(debug, "\n");
4153 static int compact_and_copy_vec_at(int ncg, int *move,
4156 rvec *src, gmx_domdec_comm_t *comm,
4159 int m, icg, i, i0, i1, nrcg;
4165 for (m = 0; m < DIM*2; m++)
4171 for (icg = 0; icg < ncg; icg++)
4173 i1 = cgindex[icg+1];
4179 /* Compact the home array in place */
4180 for (i = i0; i < i1; i++)
4182 copy_rvec(src[i], src[home_pos++]);
4188 /* Copy to the communication buffer */
4190 pos_vec[m] += 1 + vec*nrcg;
4191 for (i = i0; i < i1; i++)
4193 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
4195 pos_vec[m] += (nvec - vec - 1)*nrcg;
4199 home_pos += i1 - i0;
4207 static int compact_and_copy_vec_cg(int ncg, int *move,
4209 int nvec, rvec *src, gmx_domdec_comm_t *comm,
4212 int m, icg, i0, i1, nrcg;
4218 for (m = 0; m < DIM*2; m++)
4224 for (icg = 0; icg < ncg; icg++)
4226 i1 = cgindex[icg+1];
4232 /* Compact the home array in place */
4233 copy_rvec(src[icg], src[home_pos++]);
4239 /* Copy to the communication buffer */
4240 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
4241 pos_vec[m] += 1 + nrcg*nvec;
4253 static int compact_ind(int ncg, int *move,
4254 int *index_gl, int *cgindex,
4256 gmx_ga2la_t ga2la, char *bLocalCG,
4259 int cg, nat, a0, a1, a, a_gl;
4264 for (cg = 0; cg < ncg; cg++)
4270 /* Compact the home arrays in place.
4271 * Anything that can be done here avoids access to global arrays.
4273 cgindex[home_pos] = nat;
4274 for (a = a0; a < a1; a++)
4277 gatindex[nat] = a_gl;
4278 /* The cell number stays 0, so we don't need to set it */
4279 ga2la_change_la(ga2la, a_gl, nat);
4282 index_gl[home_pos] = index_gl[cg];
4283 cginfo[home_pos] = cginfo[cg];
4284 /* The charge group remains local, so bLocalCG does not change */
4289 /* Clear the global indices */
4290 for (a = a0; a < a1; a++)
4292 ga2la_del(ga2la, gatindex[a]);
4296 bLocalCG[index_gl[cg]] = FALSE;
4300 cgindex[home_pos] = nat;
4305 static void clear_and_mark_ind(int ncg, int *move,
4306 int *index_gl, int *cgindex, int *gatindex,
4307 gmx_ga2la_t ga2la, char *bLocalCG,
4312 for (cg = 0; cg < ncg; cg++)
4318 /* Clear the global indices */
4319 for (a = a0; a < a1; a++)
4321 ga2la_del(ga2la, gatindex[a]);
4325 bLocalCG[index_gl[cg]] = FALSE;
4327 /* Signal that this cg has moved using the ns cell index.
4328 * Here we set it to -1. fill_grid will change it
4329 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4331 cell_index[cg] = -1;
4336 static void print_cg_move(FILE *fplog,
4338 gmx_int64_t step, int cg, int dim, int dir,
4339 gmx_bool bHaveLimitdAndCMOld, real limitd,
4340 rvec cm_old, rvec cm_new, real pos_d)
4342 gmx_domdec_comm_t *comm;
4347 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4348 if (bHaveLimitdAndCMOld)
4350 fprintf(fplog, "The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4351 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4355 fprintf(fplog, "The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4356 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4358 fprintf(fplog, "distance out of cell %f\n",
4359 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4360 if (bHaveLimitdAndCMOld)
4362 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4363 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4365 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4366 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4367 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4369 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4370 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4372 comm->cell_x0[dim], comm->cell_x1[dim]);
4375 static void cg_move_error(FILE *fplog,
4377 gmx_int64_t step, int cg, int dim, int dir,
4378 gmx_bool bHaveLimitdAndCMOld, real limitd,
4379 rvec cm_old, rvec cm_new, real pos_d)
4383 print_cg_move(fplog, dd, step, cg, dim, dir,
4384 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4386 print_cg_move(stderr, dd, step, cg, dim, dir,
4387 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4389 "A charge group moved too far between two domain decomposition steps\n"
4390 "This usually means that your system is not well equilibrated");
4393 static void rotate_state_atom(t_state *state, int a)
4397 for (est = 0; est < estNR; est++)
4399 if (EST_DISTR(est) && (state->flags & (1<<est)))
4404 /* Rotate the complete state; for a rectangular box only */
4405 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4406 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4409 state->v[a][YY] = -state->v[a][YY];
4410 state->v[a][ZZ] = -state->v[a][ZZ];
4413 state->sd_X[a][YY] = -state->sd_X[a][YY];
4414 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4417 state->cg_p[a][YY] = -state->cg_p[a][YY];
4418 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4420 case estDISRE_INITF:
4421 case estDISRE_RM3TAV:
4422 case estORIRE_INITF:
4424 /* These are distances, so not affected by rotation */
4427 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4433 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4435 if (natoms > comm->moved_nalloc)
4437 /* Contents should be preserved here */
4438 comm->moved_nalloc = over_alloc_dd(natoms);
4439 srenew(comm->moved, comm->moved_nalloc);
4445 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
4448 ivec tric_dir, matrix tcm,
4449 rvec cell_x0, rvec cell_x1,
4450 rvec limitd, rvec limit0, rvec limit1,
4452 int cg_start, int cg_end,
4457 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4458 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4462 real inv_ncg, pos_d;
4465 npbcdim = dd->npbcdim;
4467 for (cg = cg_start; cg < cg_end; cg++)
4474 copy_rvec(state->x[k0], cm_new);
4481 for (k = k0; (k < k1); k++)
4483 rvec_inc(cm_new, state->x[k]);
4485 for (d = 0; (d < DIM); d++)
4487 cm_new[d] = inv_ncg*cm_new[d];
4492 /* Do pbc and check DD cell boundary crossings */
4493 for (d = DIM-1; d >= 0; d--)
4497 bScrew = (dd->bScrewPBC && d == XX);
4498 /* Determine the location of this cg in lattice coordinates */
4502 for (d2 = d+1; d2 < DIM; d2++)
4504 pos_d += cm_new[d2]*tcm[d2][d];
4507 /* Put the charge group in the triclinic unit-cell */
4508 if (pos_d >= cell_x1[d])
4510 if (pos_d >= limit1[d])
4512 cg_move_error(fplog, dd, step, cg, d, 1, TRUE, limitd[d],
4513 cg_cm[cg], cm_new, pos_d);
4516 if (dd->ci[d] == dd->nc[d] - 1)
4518 rvec_dec(cm_new, state->box[d]);
4521 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4522 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4524 for (k = k0; (k < k1); k++)
4526 rvec_dec(state->x[k], state->box[d]);
4529 rotate_state_atom(state, k);
4534 else if (pos_d < cell_x0[d])
4536 if (pos_d < limit0[d])
4538 cg_move_error(fplog, dd, step, cg, d, -1, TRUE, limitd[d],
4539 cg_cm[cg], cm_new, pos_d);
4544 rvec_inc(cm_new, state->box[d]);
4547 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4548 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4550 for (k = k0; (k < k1); k++)
4552 rvec_inc(state->x[k], state->box[d]);
4555 rotate_state_atom(state, k);
4561 else if (d < npbcdim)
4563 /* Put the charge group in the rectangular unit-cell */
4564 while (cm_new[d] >= state->box[d][d])
4566 rvec_dec(cm_new, state->box[d]);
4567 for (k = k0; (k < k1); k++)
4569 rvec_dec(state->x[k], state->box[d]);
4572 while (cm_new[d] < 0)
4574 rvec_inc(cm_new, state->box[d]);
4575 for (k = k0; (k < k1); k++)
4577 rvec_inc(state->x[k], state->box[d]);
4583 copy_rvec(cm_new, cg_cm[cg]);
4585 /* Determine where this cg should go */
4588 for (d = 0; d < dd->ndim; d++)
4593 flag |= DD_FLAG_FW(d);
4599 else if (dev[dim] == -1)
4601 flag |= DD_FLAG_BW(d);
4604 if (dd->nc[dim] > 2)
4615 /* Temporarily store the flag in move */
4616 move[cg] = mc + flag;
4620 static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
4621 gmx_domdec_t *dd, ivec tric_dir,
4622 t_state *state, rvec **f,
4631 int ncg[DIM*2], nat[DIM*2];
4632 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4633 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4634 int sbuf[2], rbuf[2];
4635 int home_pos_cg, home_pos_at, buf_pos;
4637 gmx_bool bV = FALSE, bSDX = FALSE, bCGP = FALSE;
4640 real inv_ncg, pos_d;
4642 rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
4644 cginfo_mb_t *cginfo_mb;
4645 gmx_domdec_comm_t *comm;
4647 int nthread, thread;
4651 check_screw_box(state->box);
4655 if (fr->cutoff_scheme == ecutsGROUP)
4660 for (i = 0; i < estNR; i++)
4666 case estX: /* Always present */ break;
4667 case estV: bV = (state->flags & (1<<i)); break;
4668 case estSDX: bSDX = (state->flags & (1<<i)); break;
4669 case estCGP: bCGP = (state->flags & (1<<i)); break;
4672 case estDISRE_INITF:
4673 case estDISRE_RM3TAV:
4674 case estORIRE_INITF:
4676 /* No processing required */
4679 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4684 if (dd->ncg_tot > comm->nalloc_int)
4686 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4687 srenew(comm->buf_int, comm->nalloc_int);
4689 move = comm->buf_int;
4691 /* Clear the count */
4692 for (c = 0; c < dd->ndim*2; c++)
4698 npbcdim = dd->npbcdim;
4700 for (d = 0; (d < DIM); d++)
4702 limitd[d] = dd->comm->cellsize_min[d];
4703 if (d >= npbcdim && dd->ci[d] == 0)
4705 cell_x0[d] = -GMX_FLOAT_MAX;
4709 cell_x0[d] = comm->cell_x0[d];
4711 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4713 cell_x1[d] = GMX_FLOAT_MAX;
4717 cell_x1[d] = comm->cell_x1[d];
4721 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4722 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4726 /* We check after communication if a charge group moved
4727 * more than one cell. Set the pre-comm check limit to float_max.
4729 limit0[d] = -GMX_FLOAT_MAX;
4730 limit1[d] = GMX_FLOAT_MAX;
4734 make_tric_corr_matrix(npbcdim, state->box, tcm);
4736 cgindex = dd->cgindex;
4738 nthread = gmx_omp_nthreads_get(emntDomdec);
4740 /* Compute the center of geometry for all home charge groups
4741 * and put them in the box and determine where they should go.
4743 #pragma omp parallel for num_threads(nthread) schedule(static)
4744 for (thread = 0; thread < nthread; thread++)
4746 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4747 cell_x0, cell_x1, limitd, limit0, limit1,
4749 ( thread *dd->ncg_home)/nthread,
4750 ((thread+1)*dd->ncg_home)/nthread,
4751 fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
4755 for (cg = 0; cg < dd->ncg_home; cg++)
4760 flag = mc & ~DD_FLAG_NRCG;
4761 mc = mc & DD_FLAG_NRCG;
4764 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4766 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4767 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4769 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4770 /* We store the cg size in the lower 16 bits
4771 * and the place where the charge group should go
4772 * in the next 6 bits. This saves some communication volume.
4774 nrcg = cgindex[cg+1] - cgindex[cg];
4775 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4781 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4782 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4785 for (i = 0; i < dd->ndim*2; i++)
4787 *ncg_moved += ncg[i];
4804 /* Make sure the communication buffers are large enough */
4805 for (mc = 0; mc < dd->ndim*2; mc++)
4807 nvr = ncg[mc] + nat[mc]*nvec;
4808 if (nvr > comm->cgcm_state_nalloc[mc])
4810 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4811 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4815 switch (fr->cutoff_scheme)
4818 /* Recalculating cg_cm might be cheaper than communicating,
4819 * but that could give rise to rounding issues.
4822 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4823 nvec, cg_cm, comm, bCompact);
4826 /* Without charge groups we send the moved atom coordinates
4827 * over twice. This is so the code below can be used without
4828 * many conditionals for both for with and without charge groups.
4831 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4832 nvec, state->x, comm, FALSE);
4835 home_pos_cg -= *ncg_moved;
4839 gmx_incons("unimplemented");
4845 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4846 nvec, vec++, state->x, comm, bCompact);
4849 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4850 nvec, vec++, state->v, comm, bCompact);
4854 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4855 nvec, vec++, state->sd_X, comm, bCompact);
4859 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4860 nvec, vec++, state->cg_p, comm, bCompact);
4865 compact_ind(dd->ncg_home, move,
4866 dd->index_gl, dd->cgindex, dd->gatindex,
4867 dd->ga2la, comm->bLocalCG,
4872 if (fr->cutoff_scheme == ecutsVERLET)
4874 moved = get_moved(comm, dd->ncg_home);
4876 for (k = 0; k < dd->ncg_home; k++)
4883 moved = fr->ns.grid->cell_index;
4886 clear_and_mark_ind(dd->ncg_home, move,
4887 dd->index_gl, dd->cgindex, dd->gatindex,
4888 dd->ga2la, comm->bLocalCG,
4892 cginfo_mb = fr->cginfo_mb;
4894 *ncg_stay_home = home_pos_cg;
4895 for (d = 0; d < dd->ndim; d++)
4901 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4904 /* Communicate the cg and atom counts */
4909 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4910 d, dir, sbuf[0], sbuf[1]);
4912 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4914 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4916 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4917 srenew(comm->buf_int, comm->nalloc_int);
4920 /* Communicate the charge group indices, sizes and flags */
4921 dd_sendrecv_int(dd, d, dir,
4922 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4923 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4925 nvs = ncg[cdd] + nat[cdd]*nvec;
4926 i = rbuf[0] + rbuf[1] *nvec;
4927 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4929 /* Communicate cgcm and state */
4930 dd_sendrecv_rvec(dd, d, dir,
4931 comm->cgcm_state[cdd], nvs,
4932 comm->vbuf.v+nvr, i);
4933 ncg_recv += rbuf[0];
4934 nat_recv += rbuf[1];
4938 /* Process the received charge groups */
4940 for (cg = 0; cg < ncg_recv; cg++)
4942 flag = comm->buf_int[cg*DD_CGIBS+1];
4944 if (dim >= npbcdim && dd->nc[dim] > 2)
4946 /* No pbc in this dim and more than one domain boundary.
4947 * We do a separate check if a charge group didn't move too far.
4949 if (((flag & DD_FLAG_FW(d)) &&
4950 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4951 ((flag & DD_FLAG_BW(d)) &&
4952 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4954 cg_move_error(fplog, dd, step, cg, dim,
4955 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4957 comm->vbuf.v[buf_pos],
4958 comm->vbuf.v[buf_pos],
4959 comm->vbuf.v[buf_pos][dim]);
4966 /* Check which direction this cg should go */
4967 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4971 /* The cell boundaries for dimension d2 are not equal
4972 * for each cell row of the lower dimension(s),
4973 * therefore we might need to redetermine where
4974 * this cg should go.
4977 /* If this cg crosses the box boundary in dimension d2
4978 * we can use the communicated flag, so we do not
4979 * have to worry about pbc.
4981 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4982 (flag & DD_FLAG_FW(d2))) ||
4983 (dd->ci[dim2] == 0 &&
4984 (flag & DD_FLAG_BW(d2)))))
4986 /* Clear the two flags for this dimension */
4987 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4988 /* Determine the location of this cg
4989 * in lattice coordinates
4991 pos_d = comm->vbuf.v[buf_pos][dim2];
4994 for (d3 = dim2+1; d3 < DIM; d3++)
4997 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
5000 /* Check of we are not at the box edge.
5001 * pbc is only handled in the first step above,
5002 * but this check could move over pbc while
5003 * the first step did not due to different rounding.
5005 if (pos_d >= cell_x1[dim2] &&
5006 dd->ci[dim2] != dd->nc[dim2]-1)
5008 flag |= DD_FLAG_FW(d2);
5010 else if (pos_d < cell_x0[dim2] &&
5013 flag |= DD_FLAG_BW(d2);
5015 comm->buf_int[cg*DD_CGIBS+1] = flag;
5018 /* Set to which neighboring cell this cg should go */
5019 if (flag & DD_FLAG_FW(d2))
5023 else if (flag & DD_FLAG_BW(d2))
5025 if (dd->nc[dd->dim[d2]] > 2)
5037 nrcg = flag & DD_FLAG_NRCG;
5040 if (home_pos_cg+1 > dd->cg_nalloc)
5042 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
5043 srenew(dd->index_gl, dd->cg_nalloc);
5044 srenew(dd->cgindex, dd->cg_nalloc+1);
5046 /* Set the global charge group index and size */
5047 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
5048 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
5049 /* Copy the state from the buffer */
5050 dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
5051 if (fr->cutoff_scheme == ecutsGROUP)
5054 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
5058 /* Set the cginfo */
5059 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
5060 dd->index_gl[home_pos_cg]);
5063 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
5066 if (home_pos_at+nrcg > state->nalloc)
5068 dd_realloc_state(state, f, home_pos_at+nrcg);
5070 for (i = 0; i < nrcg; i++)
5072 copy_rvec(comm->vbuf.v[buf_pos++],
5073 state->x[home_pos_at+i]);
5077 for (i = 0; i < nrcg; i++)
5079 copy_rvec(comm->vbuf.v[buf_pos++],
5080 state->v[home_pos_at+i]);
5085 for (i = 0; i < nrcg; i++)
5087 copy_rvec(comm->vbuf.v[buf_pos++],
5088 state->sd_X[home_pos_at+i]);
5093 for (i = 0; i < nrcg; i++)
5095 copy_rvec(comm->vbuf.v[buf_pos++],
5096 state->cg_p[home_pos_at+i]);
5100 home_pos_at += nrcg;
5104 /* Reallocate the buffers if necessary */
5105 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
5107 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
5108 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
5110 nvr = ncg[mc] + nat[mc]*nvec;
5111 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
5113 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
5114 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
5116 /* Copy from the receive to the send buffers */
5117 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
5118 comm->buf_int + cg*DD_CGIBS,
5119 DD_CGIBS*sizeof(int));
5120 memcpy(comm->cgcm_state[mc][nvr],
5121 comm->vbuf.v[buf_pos],
5122 (1+nrcg*nvec)*sizeof(rvec));
5123 buf_pos += 1 + nrcg*nvec;
5130 /* With sorting (!bCompact) the indices are now only partially up to date
5131 * and ncg_home and nat_home are not the real count, since there are
5132 * "holes" in the arrays for the charge groups that moved to neighbors.
5134 if (fr->cutoff_scheme == ecutsVERLET)
5136 moved = get_moved(comm, home_pos_cg);
5138 for (i = dd->ncg_home; i < home_pos_cg; i++)
5143 dd->ncg_home = home_pos_cg;
5144 dd->nat_home = home_pos_at;
5149 "Finished repartitioning: cgs moved out %d, new home %d\n",
5150 *ncg_moved, dd->ncg_home-*ncg_moved);
5155 void dd_cycles_add(gmx_domdec_t *dd, float cycles, int ddCycl)
5157 dd->comm->cycl[ddCycl] += cycles;
5158 dd->comm->cycl_n[ddCycl]++;
5159 if (cycles > dd->comm->cycl_max[ddCycl])
5161 dd->comm->cycl_max[ddCycl] = cycles;
5165 static double force_flop_count(t_nrnb *nrnb)
5172 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
5174 /* To get closer to the real timings, we half the count
5175 * for the normal loops and again half it for water loops.
5178 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5180 sum += nrnb->n[i]*0.25*cost_nrnb(i);
5184 sum += nrnb->n[i]*0.50*cost_nrnb(i);
5187 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
5190 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5192 sum += nrnb->n[i]*cost_nrnb(i);
5195 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
5197 sum += nrnb->n[i]*cost_nrnb(i);
5203 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
5205 if (dd->comm->eFlop)
5207 dd->comm->flop -= force_flop_count(nrnb);
5210 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
5212 if (dd->comm->eFlop)
5214 dd->comm->flop += force_flop_count(nrnb);
5219 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
5223 for (i = 0; i < ddCyclNr; i++)
5225 dd->comm->cycl[i] = 0;
5226 dd->comm->cycl_n[i] = 0;
5227 dd->comm->cycl_max[i] = 0;
5230 dd->comm->flop_n = 0;
5233 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
5235 gmx_domdec_comm_t *comm;
5236 gmx_domdec_load_t *load;
5237 gmx_domdec_root_t *root = NULL;
5238 int d, dim, cid, i, pos;
5239 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
5244 fprintf(debug, "get_load_distribution start\n");
5247 wallcycle_start(wcycle, ewcDDCOMMLOAD);
5251 bSepPME = (dd->pme_nodeid >= 0);
5253 for (d = dd->ndim-1; d >= 0; d--)
5256 /* Check if we participate in the communication in this dimension */
5257 if (d == dd->ndim-1 ||
5258 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
5260 load = &comm->load[d];
5263 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
5266 if (d == dd->ndim-1)
5268 sbuf[pos++] = dd_force_load(comm);
5269 sbuf[pos++] = sbuf[0];
5272 sbuf[pos++] = sbuf[0];
5273 sbuf[pos++] = cell_frac;
5276 sbuf[pos++] = comm->cell_f_max0[d];
5277 sbuf[pos++] = comm->cell_f_min1[d];
5282 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
5283 sbuf[pos++] = comm->cycl[ddCyclPME];
5288 sbuf[pos++] = comm->load[d+1].sum;
5289 sbuf[pos++] = comm->load[d+1].max;
5292 sbuf[pos++] = comm->load[d+1].sum_m;
5293 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
5294 sbuf[pos++] = comm->load[d+1].flags;
5297 sbuf[pos++] = comm->cell_f_max0[d];
5298 sbuf[pos++] = comm->cell_f_min1[d];
5303 sbuf[pos++] = comm->load[d+1].mdf;
5304 sbuf[pos++] = comm->load[d+1].pme;
5308 /* Communicate a row in DD direction d.
5309 * The communicators are setup such that the root always has rank 0.
5312 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
5313 load->load, load->nload*sizeof(float), MPI_BYTE,
5314 0, comm->mpi_comm_load[d]);
5316 if (dd->ci[dim] == dd->master_ci[dim])
5318 /* We are the root, process this row */
5319 if (comm->bDynLoadBal)
5321 root = comm->root[d];
5331 for (i = 0; i < dd->nc[dim]; i++)
5333 load->sum += load->load[pos++];
5334 load->max = max(load->max, load->load[pos]);
5340 /* This direction could not be load balanced properly,
5341 * therefore we need to use the maximum iso the average load.
5343 load->sum_m = max(load->sum_m, load->load[pos]);
5347 load->sum_m += load->load[pos];
5350 load->cvol_min = min(load->cvol_min, load->load[pos]);
5354 load->flags = (int)(load->load[pos++] + 0.5);
5358 root->cell_f_max0[i] = load->load[pos++];
5359 root->cell_f_min1[i] = load->load[pos++];
5364 load->mdf = max(load->mdf, load->load[pos]);
5366 load->pme = max(load->pme, load->load[pos]);
5370 if (comm->bDynLoadBal && root->bLimited)
5372 load->sum_m *= dd->nc[dim];
5373 load->flags |= (1<<d);
5381 comm->nload += dd_load_count(comm);
5382 comm->load_step += comm->cycl[ddCyclStep];
5383 comm->load_sum += comm->load[0].sum;
5384 comm->load_max += comm->load[0].max;
5385 if (comm->bDynLoadBal)
5387 for (d = 0; d < dd->ndim; d++)
5389 if (comm->load[0].flags & (1<<d))
5391 comm->load_lim[d]++;
5397 comm->load_mdf += comm->load[0].mdf;
5398 comm->load_pme += comm->load[0].pme;
5402 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5406 fprintf(debug, "get_load_distribution finished\n");
5410 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5412 /* Return the relative performance loss on the total run time
5413 * due to the force calculation load imbalance.
5415 if (dd->comm->nload > 0)
5418 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5419 (dd->comm->load_step*dd->nnodes);
5427 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5430 int npp, npme, nnodes, d, limp;
5431 float imbal, pme_f_ratio, lossf, lossp = 0;
5433 gmx_domdec_comm_t *comm;
5436 if (DDMASTER(dd) && comm->nload > 0)
5439 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5440 nnodes = npp + npme;
5441 imbal = comm->load_max*npp/comm->load_sum - 1;
5442 lossf = dd_force_imb_perf_loss(dd);
5443 sprintf(buf, " Average load imbalance: %.1f %%\n", imbal*100);
5444 fprintf(fplog, "%s", buf);
5445 fprintf(stderr, "\n");
5446 fprintf(stderr, "%s", buf);
5447 sprintf(buf, " Part of the total run time spent waiting due to load imbalance: %.1f %%\n", lossf*100);
5448 fprintf(fplog, "%s", buf);
5449 fprintf(stderr, "%s", buf);
5451 if (comm->bDynLoadBal)
5453 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5454 for (d = 0; d < dd->ndim; d++)
5456 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5457 sprintf(buf+strlen(buf), " %c %d %%", dim2char(dd->dim[d]), limp);
5463 sprintf(buf+strlen(buf), "\n");
5464 fprintf(fplog, "%s", buf);
5465 fprintf(stderr, "%s", buf);
5469 pme_f_ratio = comm->load_pme/comm->load_mdf;
5470 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5473 lossp *= (float)npme/(float)nnodes;
5477 lossp *= (float)npp/(float)nnodes;
5479 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pme_f_ratio);
5480 fprintf(fplog, "%s", buf);
5481 fprintf(stderr, "%s", buf);
5482 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossp)*100);
5483 fprintf(fplog, "%s", buf);
5484 fprintf(stderr, "%s", buf);
5486 fprintf(fplog, "\n");
5487 fprintf(stderr, "\n");
5489 if (lossf >= DD_PERF_LOSS_WARN)
5492 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5493 " in the domain decomposition.\n", lossf*100);
5494 if (!comm->bDynLoadBal)
5496 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5500 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5502 fprintf(fplog, "%s\n", buf);
5503 fprintf(stderr, "%s\n", buf);
5505 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS_WARN)
5508 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5509 " had %s work to do than the PP nodes.\n"
5510 " You might want to %s the number of PME nodes\n"
5511 " or %s the cut-off and the grid spacing.\n",
5513 (lossp < 0) ? "less" : "more",
5514 (lossp < 0) ? "decrease" : "increase",
5515 (lossp < 0) ? "decrease" : "increase");
5516 fprintf(fplog, "%s\n", buf);
5517 fprintf(stderr, "%s\n", buf);
5522 static float dd_vol_min(gmx_domdec_t *dd)
5524 return dd->comm->load[0].cvol_min*dd->nnodes;
5527 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5529 return dd->comm->load[0].flags;
5532 static float dd_f_imbal(gmx_domdec_t *dd)
5534 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5537 float dd_pme_f_ratio(gmx_domdec_t *dd)
5539 if (dd->comm->cycl_n[ddCyclPME] > 0)
5541 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5549 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
5554 flags = dd_load_flags(dd);
5558 "DD load balancing is limited by minimum cell size in dimension");
5559 for (d = 0; d < dd->ndim; d++)
5563 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5566 fprintf(fplog, "\n");
5568 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5569 if (dd->comm->bDynLoadBal)
5571 fprintf(fplog, " vol min/aver %5.3f%c",
5572 dd_vol_min(dd), flags ? '!' : ' ');
5574 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5575 if (dd->comm->cycl_n[ddCyclPME])
5577 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5579 fprintf(fplog, "\n\n");
5582 static void dd_print_load_verbose(gmx_domdec_t *dd)
5584 if (dd->comm->bDynLoadBal)
5586 fprintf(stderr, "vol %4.2f%c ",
5587 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5589 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5590 if (dd->comm->cycl_n[ddCyclPME])
5592 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5597 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5602 gmx_domdec_root_t *root;
5603 gmx_bool bPartOfGroup = FALSE;
5605 dim = dd->dim[dim_ind];
5606 copy_ivec(loc, loc_c);
5607 for (i = 0; i < dd->nc[dim]; i++)
5610 rank = dd_index(dd->nc, loc_c);
5611 if (rank == dd->rank)
5613 /* This process is part of the group */
5614 bPartOfGroup = TRUE;
5617 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5621 dd->comm->mpi_comm_load[dim_ind] = c_row;
5622 if (dd->comm->eDLB != edlbNO)
5624 if (dd->ci[dim] == dd->master_ci[dim])
5626 /* This is the root process of this row */
5627 snew(dd->comm->root[dim_ind], 1);
5628 root = dd->comm->root[dim_ind];
5629 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5630 snew(root->old_cell_f, dd->nc[dim]+1);
5631 snew(root->bCellMin, dd->nc[dim]);
5634 snew(root->cell_f_max0, dd->nc[dim]);
5635 snew(root->cell_f_min1, dd->nc[dim]);
5636 snew(root->bound_min, dd->nc[dim]);
5637 snew(root->bound_max, dd->nc[dim]);
5639 snew(root->buf_ncd, dd->nc[dim]);
5643 /* This is not a root process, we only need to receive cell_f */
5644 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5647 if (dd->ci[dim] == dd->master_ci[dim])
5649 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5655 void dd_setup_dlb_resource_sharing(t_commrec gmx_unused *cr,
5656 const gmx_hw_info_t gmx_unused *hwinfo,
5657 const gmx_hw_opt_t gmx_unused *hw_opt)
5660 int physicalnode_id_hash;
5663 MPI_Comm mpi_comm_pp_physicalnode;
5665 if (!(cr->duty & DUTY_PP) ||
5666 hw_opt->gpu_opt.ncuda_dev_use == 0)
5668 /* Only PP nodes (currently) use GPUs.
5669 * If we don't have GPUs, there are no resources to share.
5674 physicalnode_id_hash = gmx_physicalnode_id_hash();
5676 gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
5682 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5683 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5684 dd->rank, physicalnode_id_hash, gpu_id);
5686 /* Split the PP communicator over the physical nodes */
5687 /* TODO: See if we should store this (before), as it's also used for
5688 * for the nodecomm summution.
5690 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5691 &mpi_comm_pp_physicalnode);
5692 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5693 &dd->comm->mpi_comm_gpu_shared);
5694 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5695 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5699 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5702 /* Note that some ranks could share a GPU, while others don't */
5704 if (dd->comm->nrank_gpu_shared == 1)
5706 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5711 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
5714 int dim0, dim1, i, j;
5719 fprintf(debug, "Making load communicators\n");
5722 snew(dd->comm->load, dd->ndim);
5723 snew(dd->comm->mpi_comm_load, dd->ndim);
5726 make_load_communicator(dd, 0, loc);
5730 for (i = 0; i < dd->nc[dim0]; i++)
5733 make_load_communicator(dd, 1, loc);
5739 for (i = 0; i < dd->nc[dim0]; i++)
5743 for (j = 0; j < dd->nc[dim1]; j++)
5746 make_load_communicator(dd, 2, loc);
5753 fprintf(debug, "Finished making load communicators\n");
5758 void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
5761 int d, dim, i, j, m;
5764 ivec dd_zp[DD_MAXIZONE];
5765 gmx_domdec_zones_t *zones;
5766 gmx_domdec_ns_ranges_t *izone;
5768 for (d = 0; d < dd->ndim; d++)
5771 copy_ivec(dd->ci, tmp);
5772 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5773 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5774 copy_ivec(dd->ci, tmp);
5775 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5776 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5779 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5782 dd->neighbor[d][1]);
5788 fprintf(fplog, "\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5790 dd->nc[XX], dd->nc[YY], dd->nc[ZZ],
5791 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5798 for (i = 0; i < nzonep; i++)
5800 copy_ivec(dd_zp3[i], dd_zp[i]);
5806 for (i = 0; i < nzonep; i++)
5808 copy_ivec(dd_zp2[i], dd_zp[i]);
5814 for (i = 0; i < nzonep; i++)
5816 copy_ivec(dd_zp1[i], dd_zp[i]);
5820 gmx_fatal(FARGS, "Can only do 1, 2 or 3D domain decomposition");
5825 zones = &dd->comm->zones;
5827 for (i = 0; i < nzone; i++)
5830 clear_ivec(zones->shift[i]);
5831 for (d = 0; d < dd->ndim; d++)
5833 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5838 for (i = 0; i < nzone; i++)
5840 for (d = 0; d < DIM; d++)
5842 s[d] = dd->ci[d] - zones->shift[i][d];
5847 else if (s[d] >= dd->nc[d])
5853 zones->nizone = nzonep;
5854 for (i = 0; i < zones->nizone; i++)
5856 if (dd_zp[i][0] != i)
5858 gmx_fatal(FARGS, "Internal inconsistency in the dd grid setup");
5860 izone = &zones->izone[i];
5861 izone->j0 = dd_zp[i][1];
5862 izone->j1 = dd_zp[i][2];
5863 for (dim = 0; dim < DIM; dim++)
5865 if (dd->nc[dim] == 1)
5867 /* All shifts should be allowed */
5868 izone->shift0[dim] = -1;
5869 izone->shift1[dim] = 1;
5874 izone->shift0[d] = 0;
5875 izone->shift1[d] = 0;
5876 for(j=izone->j0; j<izone->j1; j++) {
5877 if (dd->shift[j][d] > dd->shift[i][d])
5878 izone->shift0[d] = -1;
5879 if (dd->shift[j][d] < dd->shift[i][d])
5880 izone->shift1[d] = 1;
5886 /* Assume the shift are not more than 1 cell */
5887 izone->shift0[dim] = 1;
5888 izone->shift1[dim] = -1;
5889 for (j = izone->j0; j < izone->j1; j++)
5891 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5892 if (shift_diff < izone->shift0[dim])
5894 izone->shift0[dim] = shift_diff;
5896 if (shift_diff > izone->shift1[dim])
5898 izone->shift1[dim] = shift_diff;
5905 if (dd->comm->eDLB != edlbNO)
5907 snew(dd->comm->root, dd->ndim);
5910 if (dd->comm->bRecordLoad)
5912 make_load_communicators(dd);
5916 static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reorder)
5919 gmx_domdec_comm_t *comm;
5930 if (comm->bCartesianPP)
5932 /* Set up cartesian communication for the particle-particle part */
5935 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5936 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5939 for (i = 0; i < DIM; i++)
5943 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5945 /* We overwrite the old communicator with the new cartesian one */
5946 cr->mpi_comm_mygroup = comm_cart;
5949 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5950 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5952 if (comm->bCartesianPP_PME)
5954 /* Since we want to use the original cartesian setup for sim,
5955 * and not the one after split, we need to make an index.
5957 snew(comm->ddindex2ddnodeid, dd->nnodes);
5958 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
5959 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5960 /* Get the rank of the DD master,
5961 * above we made sure that the master node is a PP node.
5971 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
5973 else if (comm->bCartesianPP)
5975 if (cr->npmenodes == 0)
5977 /* The PP communicator is also
5978 * the communicator for this simulation
5980 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5982 cr->nodeid = dd->rank;
5984 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
5986 /* We need to make an index to go from the coordinates
5987 * to the nodeid of this simulation.
5989 snew(comm->ddindex2simnodeid, dd->nnodes);
5990 snew(buf, dd->nnodes);
5991 if (cr->duty & DUTY_PP)
5993 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5995 /* Communicate the ddindex to simulation nodeid index */
5996 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
5997 cr->mpi_comm_mysim);
6000 /* Determine the master coordinates and rank.
6001 * The DD master should be the same node as the master of this sim.
6003 for (i = 0; i < dd->nnodes; i++)
6005 if (comm->ddindex2simnodeid[i] == 0)
6007 ddindex2xyz(dd->nc, i, dd->master_ci);
6008 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
6013 fprintf(debug, "The master rank is %d\n", dd->masterrank);
6018 /* No Cartesian communicators */
6019 /* We use the rank in dd->comm->all as DD index */
6020 ddindex2xyz(dd->nc, dd->rank, dd->ci);
6021 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
6023 clear_ivec(dd->master_ci);
6030 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6031 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6036 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6037 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6041 static void receive_ddindex2simnodeid(t_commrec *cr)
6045 gmx_domdec_comm_t *comm;
6052 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
6054 snew(comm->ddindex2simnodeid, dd->nnodes);
6055 snew(buf, dd->nnodes);
6056 if (cr->duty & DUTY_PP)
6058 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
6061 /* Communicate the ddindex to simulation nodeid index */
6062 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6063 cr->mpi_comm_mysim);
6070 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
6071 int ncg, int natoms)
6073 gmx_domdec_master_t *ma;
6078 snew(ma->ncg, dd->nnodes);
6079 snew(ma->index, dd->nnodes+1);
6081 snew(ma->nat, dd->nnodes);
6082 snew(ma->ibuf, dd->nnodes*2);
6083 snew(ma->cell_x, DIM);
6084 for (i = 0; i < DIM; i++)
6086 snew(ma->cell_x[i], dd->nc[i]+1);
6089 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
6095 snew(ma->vbuf, natoms);
6101 static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_node_order,
6102 int gmx_unused reorder)
6105 gmx_domdec_comm_t *comm;
6116 if (comm->bCartesianPP)
6118 for (i = 1; i < DIM; i++)
6120 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
6122 if (bDiv[YY] || bDiv[ZZ])
6124 comm->bCartesianPP_PME = TRUE;
6125 /* If we have 2D PME decomposition, which is always in x+y,
6126 * we stack the PME only nodes in z.
6127 * Otherwise we choose the direction that provides the thinnest slab
6128 * of PME only nodes as this will have the least effect
6129 * on the PP communication.
6130 * But for the PME communication the opposite might be better.
6132 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
6134 dd->nc[YY] > dd->nc[ZZ]))
6136 comm->cartpmedim = ZZ;
6140 comm->cartpmedim = YY;
6142 comm->ntot[comm->cartpmedim]
6143 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
6147 fprintf(fplog, "#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
6149 "Will not use a Cartesian communicator for PP <-> PME\n\n");
6154 if (comm->bCartesianPP_PME)
6158 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
6161 for (i = 0; i < DIM; i++)
6165 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
6168 MPI_Comm_rank(comm_cart, &rank);
6169 if (MASTERNODE(cr) && rank != 0)
6171 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
6174 /* With this assigment we loose the link to the original communicator
6175 * which will usually be MPI_COMM_WORLD, unless have multisim.
6177 cr->mpi_comm_mysim = comm_cart;
6178 cr->sim_nodeid = rank;
6180 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
6184 fprintf(fplog, "Cartesian nodeid %d, coordinates %d %d %d\n\n",
6185 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6188 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
6192 if (cr->npmenodes == 0 ||
6193 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
6195 cr->duty = DUTY_PME;
6198 /* Split the sim communicator into PP and PME only nodes */
6199 MPI_Comm_split(cr->mpi_comm_mysim,
6201 dd_index(comm->ntot, dd->ci),
6202 &cr->mpi_comm_mygroup);
6206 switch (dd_node_order)
6211 fprintf(fplog, "Order of the nodes: PP first, PME last\n");
6214 case ddnoINTERLEAVE:
6215 /* Interleave the PP-only and PME-only nodes,
6216 * as on clusters with dual-core machines this will double
6217 * the communication bandwidth of the PME processes
6218 * and thus speed up the PP <-> PME and inter PME communication.
6222 fprintf(fplog, "Interleaving PP and PME nodes\n");
6224 comm->pmenodes = dd_pmenodes(cr);
6229 gmx_fatal(FARGS, "Unknown dd_node_order=%d", dd_node_order);
6232 if (dd_simnode2pmenode(cr, cr->sim_nodeid) == -1)
6234 cr->duty = DUTY_PME;
6241 /* Split the sim communicator into PP and PME only nodes */
6242 MPI_Comm_split(cr->mpi_comm_mysim,
6245 &cr->mpi_comm_mygroup);
6246 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
6252 fprintf(fplog, "This is a %s only node\n\n",
6253 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
6257 void make_dd_communicators(FILE *fplog, t_commrec *cr, int dd_node_order)
6260 gmx_domdec_comm_t *comm;
6266 copy_ivec(dd->nc, comm->ntot);
6268 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
6269 comm->bCartesianPP_PME = FALSE;
6271 /* Reorder the nodes by default. This might change the MPI ranks.
6272 * Real reordering is only supported on very few architectures,
6273 * Blue Gene is one of them.
6275 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
6277 if (cr->npmenodes > 0)
6279 /* Split the communicator into a PP and PME part */
6280 split_communicator(fplog, cr, dd_node_order, CartReorder);
6281 if (comm->bCartesianPP_PME)
6283 /* We (possibly) reordered the nodes in split_communicator,
6284 * so it is no longer required in make_pp_communicator.
6286 CartReorder = FALSE;
6291 /* All nodes do PP and PME */
6293 /* We do not require separate communicators */
6294 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
6298 if (cr->duty & DUTY_PP)
6300 /* Copy or make a new PP communicator */
6301 make_pp_communicator(fplog, cr, CartReorder);
6305 receive_ddindex2simnodeid(cr);
6308 if (!(cr->duty & DUTY_PME))
6310 /* Set up the commnuication to our PME node */
6311 dd->pme_nodeid = dd_simnode2pmenode(cr, cr->sim_nodeid);
6312 dd->pme_receive_vir_ener = receive_vir_ener(cr);
6315 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
6316 dd->pme_nodeid, dd->pme_receive_vir_ener);
6321 dd->pme_nodeid = -1;
6326 dd->ma = init_gmx_domdec_master_t(dd,
6328 comm->cgs_gl.index[comm->cgs_gl.nr]);
6332 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6334 real *slb_frac, tot;
6339 if (nc > 1 && size_string != NULL)
6343 fprintf(fplog, "Using static load balancing for the %s direction\n",
6348 for (i = 0; i < nc; i++)
6351 sscanf(size_string, "%lf%n", &dbl, &n);
6354 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6363 fprintf(fplog, "Relative cell sizes:");
6365 for (i = 0; i < nc; i++)
6370 fprintf(fplog, " %5.3f", slb_frac[i]);
6375 fprintf(fplog, "\n");
6382 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
6385 gmx_mtop_ilistloop_t iloop;
6389 iloop = gmx_mtop_ilistloop_init(mtop);
6390 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6392 for (ftype = 0; ftype < F_NRE; ftype++)
6394 if ((interaction_function[ftype].flags & IF_BOND) &&
6397 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6405 static int dd_getenv(FILE *fplog, const char *env_var, int def)
6411 val = getenv(env_var);
6414 if (sscanf(val, "%d", &nst) <= 0)
6420 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6428 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6432 fprintf(stderr, "\n%s\n", warn_string);
6436 fprintf(fplog, "\n%s\n", warn_string);
6440 static void check_dd_restrictions(t_commrec *cr, gmx_domdec_t *dd,
6441 t_inputrec *ir, FILE *fplog)
6443 if (ir->ePBC == epbcSCREW &&
6444 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6446 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6449 if (ir->ns_type == ensSIMPLE)
6451 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
6454 if (ir->nstlist == 0)
6456 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6459 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6461 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6465 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6470 r = ddbox->box_size[XX];
6471 for (di = 0; di < dd->ndim; di++)
6474 /* Check using the initial average cell size */
6475 r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6481 static int check_dlb_support(FILE *fplog, t_commrec *cr,
6482 const char *dlb_opt, gmx_bool bRecordLoad,
6483 unsigned long Flags, t_inputrec *ir)
6491 case 'a': eDLB = edlbAUTO; break;
6492 case 'n': eDLB = edlbNO; break;
6493 case 'y': eDLB = edlbYES; break;
6494 default: gmx_incons("Unknown dlb_opt");
6497 if (Flags & MD_RERUN)
6502 if (!EI_DYNAMICS(ir->eI))
6504 if (eDLB == edlbYES)
6506 sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
6507 dd_warning(cr, fplog, buf);
6515 dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6520 if (Flags & MD_REPRODUCIBLE)
6527 dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
6531 dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6534 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", eDLB);
6542 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6547 if (getenv("GMX_DD_ORDER_ZYX") != NULL)
6549 /* Decomposition order z,y,x */
6552 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6554 for (dim = DIM-1; dim >= 0; dim--)
6556 if (dd->nc[dim] > 1)
6558 dd->dim[dd->ndim++] = dim;
6564 /* Decomposition order x,y,z */
6565 for (dim = 0; dim < DIM; dim++)
6567 if (dd->nc[dim] > 1)
6569 dd->dim[dd->ndim++] = dim;
6575 static gmx_domdec_comm_t *init_dd_comm()
6577 gmx_domdec_comm_t *comm;
6581 snew(comm->cggl_flag, DIM*2);
6582 snew(comm->cgcm_state, DIM*2);
6583 for (i = 0; i < DIM*2; i++)
6585 comm->cggl_flag_nalloc[i] = 0;
6586 comm->cgcm_state_nalloc[i] = 0;
6589 comm->nalloc_int = 0;
6590 comm->buf_int = NULL;
6592 vec_rvec_init(&comm->vbuf);
6594 comm->n_load_have = 0;
6595 comm->n_load_collect = 0;
6597 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6599 comm->sum_nat[i] = 0;
6603 comm->load_step = 0;
6606 clear_ivec(comm->load_lim);
6613 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
6614 unsigned long Flags,
6616 real comm_distance_min, real rconstr,
6617 const char *dlb_opt, real dlb_scale,
6618 const char *sizex, const char *sizey, const char *sizez,
6619 gmx_mtop_t *mtop, t_inputrec *ir,
6620 matrix box, rvec *x,
6622 int *npme_x, int *npme_y)
6625 gmx_domdec_comm_t *comm;
6628 real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
6635 "\nInitializing Domain Decomposition on %d nodes\n", cr->nnodes);
6640 dd->comm = init_dd_comm();
6642 snew(comm->cggl_flag, DIM*2);
6643 snew(comm->cgcm_state, DIM*2);
6645 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6646 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6648 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
6649 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
6650 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
6651 recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
6652 comm->nstSortCG = dd_getenv(fplog, "GMX_DD_NST_SORT_CHARGE_GROUPS", 1);
6653 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
6654 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
6655 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
6657 dd->pme_recv_f_alloc = 0;
6658 dd->pme_recv_f_buf = NULL;
6660 if (dd->bSendRecv2 && fplog)
6662 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6668 fprintf(fplog, "Will load balance based on FLOP count\n");
6670 if (comm->eFlop > 1)
6672 srand(1+cr->nodeid);
6674 comm->bRecordLoad = TRUE;
6678 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6682 /* Initialize to GPU share count to 0, might change later */
6683 comm->nrank_gpu_shared = 0;
6685 comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
6687 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6690 fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
6692 dd->bGridJump = comm->bDynLoadBal;
6693 comm->bPMELoadBalDLBLimits = FALSE;
6695 if (comm->nstSortCG)
6699 if (comm->nstSortCG == 1)
6701 fprintf(fplog, "Will sort the charge groups at every domain (re)decomposition\n");
6705 fprintf(fplog, "Will sort the charge groups every %d steps\n",
6709 snew(comm->sort, 1);
6715 fprintf(fplog, "Will not sort the charge groups\n");
6719 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6721 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6722 if (comm->bInterCGBondeds)
6724 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6728 comm->bInterCGMultiBody = FALSE;
6731 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6732 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6734 if (ir->rlistlong == 0)
6736 /* Set the cut-off to some very large value,
6737 * so we don't need if statements everywhere in the code.
6738 * We use sqrt, since the cut-off is squared in some places.
6740 comm->cutoff = GMX_CUTOFF_INF;
6744 comm->cutoff = ir->rlistlong;
6746 comm->cutoff_mbody = 0;
6748 comm->cellsize_limit = 0;
6749 comm->bBondComm = FALSE;
6751 if (comm->bInterCGBondeds)
6753 if (comm_distance_min > 0)
6755 comm->cutoff_mbody = comm_distance_min;
6756 if (Flags & MD_DDBONDCOMM)
6758 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6762 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6764 r_bonded_limit = comm->cutoff_mbody;
6766 else if (ir->bPeriodicMols)
6768 /* Can not easily determine the required cut-off */
6769 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6770 comm->cutoff_mbody = comm->cutoff/2;
6771 r_bonded_limit = comm->cutoff_mbody;
6777 dd_bonded_cg_distance(fplog, mtop, ir, x, box,
6778 Flags & MD_DDBONDCHECK, &r_2b, &r_mb);
6780 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6781 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6783 /* We use an initial margin of 10% for the minimum cell size,
6784 * except when we are just below the non-bonded cut-off.
6786 if (Flags & MD_DDBONDCOMM)
6788 if (max(r_2b, r_mb) > comm->cutoff)
6790 r_bonded = max(r_2b, r_mb);
6791 r_bonded_limit = 1.1*r_bonded;
6792 comm->bBondComm = TRUE;
6797 r_bonded_limit = min(1.1*r_bonded, comm->cutoff);
6799 /* We determine cutoff_mbody later */
6803 /* No special bonded communication,
6804 * simply increase the DD cut-off.
6806 r_bonded_limit = 1.1*max(r_2b, r_mb);
6807 comm->cutoff_mbody = r_bonded_limit;
6808 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6811 comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit);
6815 "Minimum cell size due to bonded interactions: %.3f nm\n",
6816 comm->cellsize_limit);
6820 if (dd->bInterCGcons && rconstr <= 0)
6822 /* There is a cell size limit due to the constraints (P-LINCS) */
6823 rconstr = constr_r_max(fplog, mtop, ir);
6827 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6829 if (rconstr > comm->cellsize_limit)
6831 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6835 else if (rconstr > 0 && fplog)
6837 /* Here we do not check for dd->bInterCGcons,
6838 * because one can also set a cell size limit for virtual sites only
6839 * and at this point we don't know yet if there are intercg v-sites.
6842 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6845 comm->cellsize_limit = max(comm->cellsize_limit, rconstr);
6847 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6851 copy_ivec(nc, dd->nc);
6852 set_dd_dim(fplog, dd);
6853 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, x, ddbox);
6855 if (cr->npmenodes == -1)
6859 acs = average_cellsize_min(dd, ddbox);
6860 if (acs < comm->cellsize_limit)
6864 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6866 gmx_fatal_collective(FARGS, cr, NULL,
6867 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6868 acs, comm->cellsize_limit);
6873 set_ddbox_cr(cr, NULL, ir, box, &comm->cgs_gl, x, ddbox);
6875 /* We need to choose the optimal DD grid and possibly PME nodes */
6876 limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6877 comm->eDLB != edlbNO, dlb_scale,
6878 comm->cellsize_limit, comm->cutoff,
6879 comm->bInterCGBondeds);
6881 if (dd->nc[XX] == 0)
6883 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6884 sprintf(buf, "Change the number of nodes or mdrun option %s%s%s",
6885 !bC ? "-rdd" : "-rcon",
6886 comm->eDLB != edlbNO ? " or -dds" : "",
6887 bC ? " or your LINCS settings" : "");
6889 gmx_fatal_collective(FARGS, cr, NULL,
6890 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6892 "Look in the log file for details on the domain decomposition",
6893 cr->nnodes-cr->npmenodes, limit, buf);
6895 set_dd_dim(fplog, dd);
6901 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6902 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6905 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6906 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6908 gmx_fatal_collective(FARGS, cr, NULL,
6909 "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6910 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6912 if (cr->npmenodes > dd->nnodes)
6914 gmx_fatal_collective(FARGS, cr, NULL,
6915 "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6917 if (cr->npmenodes > 0)
6919 comm->npmenodes = cr->npmenodes;
6923 comm->npmenodes = dd->nnodes;
6926 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
6928 /* The following choices should match those
6929 * in comm_cost_est in domdec_setup.c.
6930 * Note that here the checks have to take into account
6931 * that the decomposition might occur in a different order than xyz
6932 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6933 * in which case they will not match those in comm_cost_est,
6934 * but since that is mainly for testing purposes that's fine.
6936 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6937 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6938 getenv("GMX_PMEONEDD") == NULL)
6940 comm->npmedecompdim = 2;
6941 comm->npmenodes_x = dd->nc[XX];
6942 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6946 /* In case nc is 1 in both x and y we could still choose to
6947 * decompose pme in y instead of x, but we use x for simplicity.
6949 comm->npmedecompdim = 1;
6950 if (dd->dim[0] == YY)
6952 comm->npmenodes_x = 1;
6953 comm->npmenodes_y = comm->npmenodes;
6957 comm->npmenodes_x = comm->npmenodes;
6958 comm->npmenodes_y = 1;
6963 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6964 comm->npmenodes_x, comm->npmenodes_y, 1);
6969 comm->npmedecompdim = 0;
6970 comm->npmenodes_x = 0;
6971 comm->npmenodes_y = 0;
6974 /* Technically we don't need both of these,
6975 * but it simplifies code not having to recalculate it.
6977 *npme_x = comm->npmenodes_x;
6978 *npme_y = comm->npmenodes_y;
6980 snew(comm->slb_frac, DIM);
6981 if (comm->eDLB == edlbNO)
6983 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
6984 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
6985 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], sizez);
6988 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6990 if (comm->bBondComm || comm->eDLB != edlbNO)
6992 /* Set the bonded communication distance to halfway
6993 * the minimum and the maximum,
6994 * since the extra communication cost is nearly zero.
6996 acs = average_cellsize_min(dd, ddbox);
6997 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6998 if (comm->eDLB != edlbNO)
7000 /* Check if this does not limit the scaling */
7001 comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs);
7003 if (!comm->bBondComm)
7005 /* Without bBondComm do not go beyond the n.b. cut-off */
7006 comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff);
7007 if (comm->cellsize_limit >= comm->cutoff)
7009 /* We don't loose a lot of efficieny
7010 * when increasing it to the n.b. cut-off.
7011 * It can even be slightly faster, because we need
7012 * less checks for the communication setup.
7014 comm->cutoff_mbody = comm->cutoff;
7017 /* Check if we did not end up below our original limit */
7018 comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit);
7020 if (comm->cutoff_mbody > comm->cellsize_limit)
7022 comm->cellsize_limit = comm->cutoff_mbody;
7025 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
7030 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
7031 "cellsize limit %f\n",
7032 comm->bBondComm, comm->cellsize_limit);
7037 check_dd_restrictions(cr, dd, ir, fplog);
7040 comm->partition_step = INT_MIN;
7043 clear_dd_cycle_counts(dd);
7048 static void set_dlb_limits(gmx_domdec_t *dd)
7053 for (d = 0; d < dd->ndim; d++)
7055 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
7056 dd->comm->cellsize_min[dd->dim[d]] =
7057 dd->comm->cellsize_min_dlb[dd->dim[d]];
7062 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
7065 gmx_domdec_comm_t *comm;
7075 fprintf(fplog, "At step %s the performance loss due to force load imbalance is %.1f %%\n", gmx_step_str(step, buf), dd_force_imb_perf_loss(dd)*100);
7078 cellsize_min = comm->cellsize_min[dd->dim[0]];
7079 for (d = 1; d < dd->ndim; d++)
7081 cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
7084 if (cellsize_min < comm->cellsize_limit*1.05)
7086 dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
7088 /* Change DLB from "auto" to "no". */
7089 comm->eDLB = edlbNO;
7094 dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
7095 comm->bDynLoadBal = TRUE;
7096 dd->bGridJump = TRUE;
7100 /* We can set the required cell size info here,
7101 * so we do not need to communicate this.
7102 * The grid is completely uniform.
7104 for (d = 0; d < dd->ndim; d++)
7108 comm->load[d].sum_m = comm->load[d].sum;
7110 nc = dd->nc[dd->dim[d]];
7111 for (i = 0; i < nc; i++)
7113 comm->root[d]->cell_f[i] = i/(real)nc;
7116 comm->root[d]->cell_f_max0[i] = i /(real)nc;
7117 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
7120 comm->root[d]->cell_f[nc] = 1.0;
7125 static char *init_bLocalCG(gmx_mtop_t *mtop)
7130 ncg = ncg_mtop(mtop);
7131 snew(bLocalCG, ncg);
7132 for (cg = 0; cg < ncg; cg++)
7134 bLocalCG[cg] = FALSE;
7140 void dd_init_bondeds(FILE *fplog,
7141 gmx_domdec_t *dd, gmx_mtop_t *mtop,
7143 t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
7145 gmx_domdec_comm_t *comm;
7149 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
7153 if (comm->bBondComm)
7155 /* Communicate atoms beyond the cut-off for bonded interactions */
7158 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
7160 comm->bLocalCG = init_bLocalCG(mtop);
7164 /* Only communicate atoms based on cut-off */
7165 comm->cglink = NULL;
7166 comm->bLocalCG = NULL;
7170 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
7172 gmx_bool bDynLoadBal, real dlb_scale,
7175 gmx_domdec_comm_t *comm;
7190 fprintf(fplog, "The maximum number of communication pulses is:");
7191 for (d = 0; d < dd->ndim; d++)
7193 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
7195 fprintf(fplog, "\n");
7196 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
7197 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
7198 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
7199 for (d = 0; d < DIM; d++)
7203 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
7210 comm->cellsize_min_dlb[d]/
7211 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
7213 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
7216 fprintf(fplog, "\n");
7220 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
7221 fprintf(fplog, "The initial number of communication pulses is:");
7222 for (d = 0; d < dd->ndim; d++)
7224 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
7226 fprintf(fplog, "\n");
7227 fprintf(fplog, "The initial domain decomposition cell size is:");
7228 for (d = 0; d < DIM; d++)
7232 fprintf(fplog, " %c %.2f nm",
7233 dim2char(d), dd->comm->cellsize_min[d]);
7236 fprintf(fplog, "\n\n");
7239 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
7241 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
7242 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7243 "non-bonded interactions", "", comm->cutoff);
7247 limit = dd->comm->cellsize_limit;
7251 if (dynamic_dd_box(ddbox, ir))
7253 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
7255 limit = dd->comm->cellsize_min[XX];
7256 for (d = 1; d < DIM; d++)
7258 limit = min(limit, dd->comm->cellsize_min[d]);
7262 if (comm->bInterCGBondeds)
7264 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7265 "two-body bonded interactions", "(-rdd)",
7266 max(comm->cutoff, comm->cutoff_mbody));
7267 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7268 "multi-body bonded interactions", "(-rdd)",
7269 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit));
7273 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7274 "virtual site constructions", "(-rcon)", limit);
7276 if (dd->constraint_comm)
7278 sprintf(buf, "atoms separated by up to %d constraints",
7280 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7281 buf, "(-rcon)", limit);
7283 fprintf(fplog, "\n");
7289 static void set_cell_limits_dlb(gmx_domdec_t *dd,
7291 const t_inputrec *ir,
7292 const gmx_ddbox_t *ddbox)
7294 gmx_domdec_comm_t *comm;
7295 int d, dim, npulse, npulse_d_max, npulse_d;
7300 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7302 /* Determine the maximum number of comm. pulses in one dimension */
7304 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7306 /* Determine the maximum required number of grid pulses */
7307 if (comm->cellsize_limit >= comm->cutoff)
7309 /* Only a single pulse is required */
7312 else if (!bNoCutOff && comm->cellsize_limit > 0)
7314 /* We round down slightly here to avoid overhead due to the latency
7315 * of extra communication calls when the cut-off
7316 * would be only slightly longer than the cell size.
7317 * Later cellsize_limit is redetermined,
7318 * so we can not miss interactions due to this rounding.
7320 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7324 /* There is no cell size limit */
7325 npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7328 if (!bNoCutOff && npulse > 1)
7330 /* See if we can do with less pulses, based on dlb_scale */
7332 for (d = 0; d < dd->ndim; d++)
7335 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7336 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7337 npulse_d_max = max(npulse_d_max, npulse_d);
7339 npulse = min(npulse, npulse_d_max);
7342 /* This env var can override npulse */
7343 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
7350 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7351 for (d = 0; d < dd->ndim; d++)
7353 comm->cd[d].np_dlb = min(npulse, dd->nc[dd->dim[d]]-1);
7354 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7355 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7356 comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb);
7357 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7359 comm->bVacDLBNoLimit = FALSE;
7363 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7364 if (!comm->bVacDLBNoLimit)
7366 comm->cellsize_limit = max(comm->cellsize_limit,
7367 comm->cutoff/comm->maxpulse);
7369 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7370 /* Set the minimum cell size for each DD dimension */
7371 for (d = 0; d < dd->ndim; d++)
7373 if (comm->bVacDLBNoLimit ||
7374 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7376 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7380 comm->cellsize_min_dlb[dd->dim[d]] =
7381 comm->cutoff/comm->cd[d].np_dlb;
7384 if (comm->cutoff_mbody <= 0)
7386 comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit);
7388 if (comm->bDynLoadBal)
7394 gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd, int ePBC)
7396 /* If each molecule is a single charge group
7397 * or we use domain decomposition for each periodic dimension,
7398 * we do not need to take pbc into account for the bonded interactions.
7400 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7403 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7406 void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7407 t_inputrec *ir, gmx_ddbox_t *ddbox)
7409 gmx_domdec_comm_t *comm;
7415 /* Initialize the thread data.
7416 * This can not be done in init_domain_decomposition,
7417 * as the numbers of threads is determined later.
7419 comm->nth = gmx_omp_nthreads_get(emntDomdec);
7422 snew(comm->dth, comm->nth);
7425 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
7427 init_ddpme(dd, &comm->ddpme[0], 0);
7428 if (comm->npmedecompdim >= 2)
7430 init_ddpme(dd, &comm->ddpme[1], 1);
7435 comm->npmenodes = 0;
7436 if (dd->pme_nodeid >= 0)
7438 gmx_fatal_collective(FARGS, NULL, dd,
7439 "Can not have separate PME nodes without PME electrostatics");
7445 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7447 if (comm->eDLB != edlbNO)
7449 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7452 print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
7453 if (comm->eDLB == edlbAUTO)
7457 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7459 print_dd_settings(fplog, dd, ir, TRUE, dlb_scale, ddbox);
7462 if (ir->ePBC == epbcNONE)
7464 vol_frac = 1 - 1/(double)dd->nnodes;
7469 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7473 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7475 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7477 dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
7480 static gmx_bool test_dd_cutoff(t_commrec *cr,
7481 t_state *state, t_inputrec *ir,
7492 set_ddbox(dd, FALSE, cr, ir, state->box,
7493 TRUE, &dd->comm->cgs_gl, state->x, &ddbox);
7497 for (d = 0; d < dd->ndim; d++)
7501 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7502 if (dynamic_dd_box(&ddbox, ir))
7504 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7507 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7509 if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
7510 dd->comm->cd[d].np_dlb > 0)
7512 if (np > dd->comm->cd[d].np_dlb)
7517 /* If a current local cell size is smaller than the requested
7518 * cut-off, we could still fix it, but this gets very complicated.
7519 * Without fixing here, we might actually need more checks.
7521 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7528 if (dd->comm->eDLB != edlbNO)
7530 /* If DLB is not active yet, we don't need to check the grid jumps.
7531 * Actually we shouldn't, because then the grid jump data is not set.
7533 if (dd->comm->bDynLoadBal &&
7534 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7539 gmx_sumi(1, &LocallyLimited, cr);
7541 if (LocallyLimited > 0)
7550 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, t_inputrec *ir,
7553 gmx_bool bCutoffAllowed;
7555 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7559 cr->dd->comm->cutoff = cutoff_req;
7562 return bCutoffAllowed;
7565 void change_dd_dlb_cutoff_limit(t_commrec *cr)
7567 gmx_domdec_comm_t *comm;
7569 comm = cr->dd->comm;
7571 /* Turn on the DLB limiting (might have been on already) */
7572 comm->bPMELoadBalDLBLimits = TRUE;
7574 /* Change the cut-off limit */
7575 comm->PMELoadBal_max_cutoff = comm->cutoff;
7578 static void merge_cg_buffers(int ncell,
7579 gmx_domdec_comm_dim_t *cd, int pulse,
7581 int *index_gl, int *recv_i,
7582 rvec *cg_cm, rvec *recv_vr,
7584 cginfo_mb_t *cginfo_mb, int *cginfo)
7586 gmx_domdec_ind_t *ind, *ind_p;
7587 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7588 int shift, shift_at;
7590 ind = &cd->ind[pulse];
7592 /* First correct the already stored data */
7593 shift = ind->nrecv[ncell];
7594 for (cell = ncell-1; cell >= 0; cell--)
7596 shift -= ind->nrecv[cell];
7599 /* Move the cg's present from previous grid pulses */
7600 cg0 = ncg_cell[ncell+cell];
7601 cg1 = ncg_cell[ncell+cell+1];
7602 cgindex[cg1+shift] = cgindex[cg1];
7603 for (cg = cg1-1; cg >= cg0; cg--)
7605 index_gl[cg+shift] = index_gl[cg];
7606 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7607 cgindex[cg+shift] = cgindex[cg];
7608 cginfo[cg+shift] = cginfo[cg];
7610 /* Correct the already stored send indices for the shift */
7611 for (p = 1; p <= pulse; p++)
7613 ind_p = &cd->ind[p];
7615 for (c = 0; c < cell; c++)
7617 cg0 += ind_p->nsend[c];
7619 cg1 = cg0 + ind_p->nsend[cell];
7620 for (cg = cg0; cg < cg1; cg++)
7622 ind_p->index[cg] += shift;
7628 /* Merge in the communicated buffers */
7632 for (cell = 0; cell < ncell; cell++)
7634 cg1 = ncg_cell[ncell+cell+1] + shift;
7637 /* Correct the old cg indices */
7638 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7640 cgindex[cg+1] += shift_at;
7643 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7645 /* Copy this charge group from the buffer */
7646 index_gl[cg1] = recv_i[cg0];
7647 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7648 /* Add it to the cgindex */
7649 cg_gl = index_gl[cg1];
7650 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7651 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7652 cgindex[cg1+1] = cgindex[cg1] + nat;
7657 shift += ind->nrecv[cell];
7658 ncg_cell[ncell+cell+1] = cg1;
7662 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7663 int nzone, int cg0, const int *cgindex)
7667 /* Store the atom block boundaries for easy copying of communication buffers
7670 for (zone = 0; zone < nzone; zone++)
7672 for (p = 0; p < cd->np; p++)
7674 cd->ind[p].cell2at0[zone] = cgindex[cg];
7675 cg += cd->ind[p].nrecv[zone];
7676 cd->ind[p].cell2at1[zone] = cgindex[cg];
7681 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7687 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7689 if (!bLocalCG[link->a[i]])
7698 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7700 real c[DIM][4]; /* the corners for the non-bonded communication */
7701 real cr0; /* corner for rounding */
7702 real cr1[4]; /* corners for rounding */
7703 real bc[DIM]; /* corners for bounded communication */
7704 real bcr1; /* corner for rounding for bonded communication */
7707 /* Determine the corners of the domain(s) we are communicating with */
7709 set_dd_corners(const gmx_domdec_t *dd,
7710 int dim0, int dim1, int dim2,
7714 const gmx_domdec_comm_t *comm;
7715 const gmx_domdec_zones_t *zones;
7720 zones = &comm->zones;
7722 /* Keep the compiler happy */
7726 /* The first dimension is equal for all cells */
7727 c->c[0][0] = comm->cell_x0[dim0];
7730 c->bc[0] = c->c[0][0];
7735 /* This cell row is only seen from the first row */
7736 c->c[1][0] = comm->cell_x0[dim1];
7737 /* All rows can see this row */
7738 c->c[1][1] = comm->cell_x0[dim1];
7741 c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7744 /* For the multi-body distance we need the maximum */
7745 c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7748 /* Set the upper-right corner for rounding */
7749 c->cr0 = comm->cell_x1[dim0];
7754 for (j = 0; j < 4; j++)
7756 c->c[2][j] = comm->cell_x0[dim2];
7760 /* Use the maximum of the i-cells that see a j-cell */
7761 for (i = 0; i < zones->nizone; i++)
7763 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7769 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7775 /* For the multi-body distance we need the maximum */
7776 c->bc[2] = comm->cell_x0[dim2];
7777 for (i = 0; i < 2; i++)
7779 for (j = 0; j < 2; j++)
7781 c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0);
7787 /* Set the upper-right corner for rounding */
7788 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7789 * Only cell (0,0,0) can see cell 7 (1,1,1)
7791 c->cr1[0] = comm->cell_x1[dim1];
7792 c->cr1[3] = comm->cell_x1[dim1];
7795 c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7798 /* For the multi-body distance we need the maximum */
7799 c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7806 /* Determine which cg's we need to send in this pulse from this zone */
7808 get_zone_pulse_cgs(gmx_domdec_t *dd,
7809 int zonei, int zone,
7811 const int *index_gl,
7813 int dim, int dim_ind,
7814 int dim0, int dim1, int dim2,
7815 real r_comm2, real r_bcomm2,
7819 real skew_fac2_d, real skew_fac_01,
7820 rvec *v_d, rvec *v_0, rvec *v_1,
7821 const dd_corners_t *c,
7823 gmx_bool bDistBonded,
7829 gmx_domdec_ind_t *ind,
7830 int **ibuf, int *ibuf_nalloc,
7836 gmx_domdec_comm_t *comm;
7838 gmx_bool bDistMB_pulse;
7840 real r2, rb2, r, tric_sh;
7843 int nsend_z, nsend, nat;
7847 bScrew = (dd->bScrewPBC && dim == XX);
7849 bDistMB_pulse = (bDistMB && bDistBonded);
7855 for (cg = cg0; cg < cg1; cg++)
7859 if (tric_dist[dim_ind] == 0)
7861 /* Rectangular direction, easy */
7862 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7869 r = cg_cm[cg][dim] - c->bc[dim_ind];
7875 /* Rounding gives at most a 16% reduction
7876 * in communicated atoms
7878 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7880 r = cg_cm[cg][dim0] - c->cr0;
7881 /* This is the first dimension, so always r >= 0 */
7888 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7890 r = cg_cm[cg][dim1] - c->cr1[zone];
7897 r = cg_cm[cg][dim1] - c->bcr1;
7907 /* Triclinic direction, more complicated */
7910 /* Rounding, conservative as the skew_fac multiplication
7911 * will slightly underestimate the distance.
7913 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7915 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7916 for (i = dim0+1; i < DIM; i++)
7918 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7920 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7923 rb[dim0] = rn[dim0];
7926 /* Take care that the cell planes along dim0 might not
7927 * be orthogonal to those along dim1 and dim2.
7929 for (i = 1; i <= dim_ind; i++)
7932 if (normal[dim0][dimd] > 0)
7934 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7937 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7942 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7944 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7946 for (i = dim1+1; i < DIM; i++)
7948 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7950 rn[dim1] += tric_sh;
7953 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7954 /* Take care of coupling of the distances
7955 * to the planes along dim0 and dim1 through dim2.
7957 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7958 /* Take care that the cell planes along dim1
7959 * might not be orthogonal to that along dim2.
7961 if (normal[dim1][dim2] > 0)
7963 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7969 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7972 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7973 /* Take care of coupling of the distances
7974 * to the planes along dim0 and dim1 through dim2.
7976 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7977 /* Take care that the cell planes along dim1
7978 * might not be orthogonal to that along dim2.
7980 if (normal[dim1][dim2] > 0)
7982 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7987 /* The distance along the communication direction */
7988 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
7990 for (i = dim+1; i < DIM; i++)
7992 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7997 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7998 /* Take care of coupling of the distances
7999 * to the planes along dim0 and dim1 through dim2.
8001 if (dim_ind == 1 && zonei == 1)
8003 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
8009 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
8012 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
8013 /* Take care of coupling of the distances
8014 * to the planes along dim0 and dim1 through dim2.
8016 if (dim_ind == 1 && zonei == 1)
8018 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
8026 ((bDistMB && rb2 < r_bcomm2) ||
8027 (bDist2B && r2 < r_bcomm2)) &&
8029 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
8030 missing_link(comm->cglink, index_gl[cg],
8033 /* Make an index to the local charge groups */
8034 if (nsend+1 > ind->nalloc)
8036 ind->nalloc = over_alloc_large(nsend+1);
8037 srenew(ind->index, ind->nalloc);
8039 if (nsend+1 > *ibuf_nalloc)
8041 *ibuf_nalloc = over_alloc_large(nsend+1);
8042 srenew(*ibuf, *ibuf_nalloc);
8044 ind->index[nsend] = cg;
8045 (*ibuf)[nsend] = index_gl[cg];
8047 vec_rvec_check_alloc(vbuf, nsend+1);
8049 if (dd->ci[dim] == 0)
8051 /* Correct cg_cm for pbc */
8052 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
8055 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
8056 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
8061 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
8064 nat += cgindex[cg+1] - cgindex[cg];
8070 *nsend_z_ptr = nsend_z;
8073 static void setup_dd_communication(gmx_domdec_t *dd,
8074 matrix box, gmx_ddbox_t *ddbox,
8075 t_forcerec *fr, t_state *state, rvec **f)
8077 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
8078 int nzone, nzone_send, zone, zonei, cg0, cg1;
8079 int c, i, j, cg, cg_gl, nrcg;
8080 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
8081 gmx_domdec_comm_t *comm;
8082 gmx_domdec_zones_t *zones;
8083 gmx_domdec_comm_dim_t *cd;
8084 gmx_domdec_ind_t *ind;
8085 cginfo_mb_t *cginfo_mb;
8086 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
8087 real r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
8088 dd_corners_t corners;
8090 rvec *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
8091 real skew_fac2_d, skew_fac_01;
8098 fprintf(debug, "Setting up DD communication\n");
8103 switch (fr->cutoff_scheme)
8112 gmx_incons("unimplemented");
8116 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8118 dim = dd->dim[dim_ind];
8120 /* Check if we need to use triclinic distances */
8121 tric_dist[dim_ind] = 0;
8122 for (i = 0; i <= dim_ind; i++)
8124 if (ddbox->tric_dir[dd->dim[i]])
8126 tric_dist[dim_ind] = 1;
8131 bBondComm = comm->bBondComm;
8133 /* Do we need to determine extra distances for multi-body bondeds? */
8134 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8136 /* Do we need to determine extra distances for only two-body bondeds? */
8137 bDist2B = (bBondComm && !bDistMB);
8139 r_comm2 = sqr(comm->cutoff);
8140 r_bcomm2 = sqr(comm->cutoff_mbody);
8144 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, sqrt(r_bcomm2));
8147 zones = &comm->zones;
8150 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8151 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8153 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8155 /* Triclinic stuff */
8156 normal = ddbox->normal;
8160 v_0 = ddbox->v[dim0];
8161 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8163 /* Determine the coupling coefficient for the distances
8164 * to the cell planes along dim0 and dim1 through dim2.
8165 * This is required for correct rounding.
8168 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8171 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8177 v_1 = ddbox->v[dim1];
8180 zone_cg_range = zones->cg_range;
8181 index_gl = dd->index_gl;
8182 cgindex = dd->cgindex;
8183 cginfo_mb = fr->cginfo_mb;
8185 zone_cg_range[0] = 0;
8186 zone_cg_range[1] = dd->ncg_home;
8187 comm->zone_ncg1[0] = dd->ncg_home;
8188 pos_cg = dd->ncg_home;
8190 nat_tot = dd->nat_home;
8192 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8194 dim = dd->dim[dim_ind];
8195 cd = &comm->cd[dim_ind];
8197 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8199 /* No pbc in this dimension, the first node should not comm. */
8207 v_d = ddbox->v[dim];
8208 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
8210 cd->bInPlace = TRUE;
8211 for (p = 0; p < cd->np; p++)
8213 /* Only atoms communicated in the first pulse are used
8214 * for multi-body bonded interactions or for bBondComm.
8216 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8221 for (zone = 0; zone < nzone_send; zone++)
8223 if (tric_dist[dim_ind] && dim_ind > 0)
8225 /* Determine slightly more optimized skew_fac's
8227 * This reduces the number of communicated atoms
8228 * by about 10% for 3D DD of rhombic dodecahedra.
8230 for (dimd = 0; dimd < dim; dimd++)
8232 sf2_round[dimd] = 1;
8233 if (ddbox->tric_dir[dimd])
8235 for (i = dd->dim[dimd]+1; i < DIM; i++)
8237 /* If we are shifted in dimension i
8238 * and the cell plane is tilted forward
8239 * in dimension i, skip this coupling.
8241 if (!(zones->shift[nzone+zone][i] &&
8242 ddbox->v[dimd][i][dimd] >= 0))
8245 sqr(ddbox->v[dimd][i][dimd]);
8248 sf2_round[dimd] = 1/sf2_round[dimd];
8253 zonei = zone_perm[dim_ind][zone];
8256 /* Here we permutate the zones to obtain a convenient order
8257 * for neighbor searching
8259 cg0 = zone_cg_range[zonei];
8260 cg1 = zone_cg_range[zonei+1];
8264 /* Look only at the cg's received in the previous grid pulse
8266 cg1 = zone_cg_range[nzone+zone+1];
8267 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8270 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8271 for (th = 0; th < comm->nth; th++)
8273 gmx_domdec_ind_t *ind_p;
8274 int **ibuf_p, *ibuf_nalloc_p;
8276 int *nsend_p, *nat_p;
8282 /* Thread 0 writes in the comm buffers */
8284 ibuf_p = &comm->buf_int;
8285 ibuf_nalloc_p = &comm->nalloc_int;
8286 vbuf_p = &comm->vbuf;
8289 nsend_zone_p = &ind->nsend[zone];
8293 /* Other threads write into temp buffers */
8294 ind_p = &comm->dth[th].ind;
8295 ibuf_p = &comm->dth[th].ibuf;
8296 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8297 vbuf_p = &comm->dth[th].vbuf;
8298 nsend_p = &comm->dth[th].nsend;
8299 nat_p = &comm->dth[th].nat;
8300 nsend_zone_p = &comm->dth[th].nsend_zone;
8302 comm->dth[th].nsend = 0;
8303 comm->dth[th].nat = 0;
8304 comm->dth[th].nsend_zone = 0;
8314 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8315 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8318 /* Get the cg's for this pulse in this zone */
8319 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8321 dim, dim_ind, dim0, dim1, dim2,
8324 normal, skew_fac2_d, skew_fac_01,
8325 v_d, v_0, v_1, &corners, sf2_round,
8326 bDistBonded, bBondComm,
8330 ibuf_p, ibuf_nalloc_p,
8336 /* Append data of threads>=1 to the communication buffers */
8337 for (th = 1; th < comm->nth; th++)
8339 dd_comm_setup_work_t *dth;
8342 dth = &comm->dth[th];
8344 ns1 = nsend + dth->nsend_zone;
8345 if (ns1 > ind->nalloc)
8347 ind->nalloc = over_alloc_dd(ns1);
8348 srenew(ind->index, ind->nalloc);
8350 if (ns1 > comm->nalloc_int)
8352 comm->nalloc_int = over_alloc_dd(ns1);
8353 srenew(comm->buf_int, comm->nalloc_int);
8355 if (ns1 > comm->vbuf.nalloc)
8357 comm->vbuf.nalloc = over_alloc_dd(ns1);
8358 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8361 for (i = 0; i < dth->nsend_zone; i++)
8363 ind->index[nsend] = dth->ind.index[i];
8364 comm->buf_int[nsend] = dth->ibuf[i];
8365 copy_rvec(dth->vbuf.v[i],
8366 comm->vbuf.v[nsend]);
8370 ind->nsend[zone] += dth->nsend_zone;
8373 /* Clear the counts in case we do not have pbc */
8374 for (zone = nzone_send; zone < nzone; zone++)
8376 ind->nsend[zone] = 0;
8378 ind->nsend[nzone] = nsend;
8379 ind->nsend[nzone+1] = nat;
8380 /* Communicate the number of cg's and atoms to receive */
8381 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8382 ind->nsend, nzone+2,
8383 ind->nrecv, nzone+2);
8385 /* The rvec buffer is also required for atom buffers of size nsend
8386 * in dd_move_x and dd_move_f.
8388 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8392 /* We can receive in place if only the last zone is not empty */
8393 for (zone = 0; zone < nzone-1; zone++)
8395 if (ind->nrecv[zone] > 0)
8397 cd->bInPlace = FALSE;
8402 /* The int buffer is only required here for the cg indices */
8403 if (ind->nrecv[nzone] > comm->nalloc_int2)
8405 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8406 srenew(comm->buf_int2, comm->nalloc_int2);
8408 /* The rvec buffer is also required for atom buffers
8409 * of size nrecv in dd_move_x and dd_move_f.
8411 i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8412 vec_rvec_check_alloc(&comm->vbuf2, i);
8416 /* Make space for the global cg indices */
8417 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8418 || dd->cg_nalloc == 0)
8420 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8421 srenew(index_gl, dd->cg_nalloc);
8422 srenew(cgindex, dd->cg_nalloc+1);
8424 /* Communicate the global cg indices */
8427 recv_i = index_gl + pos_cg;
8431 recv_i = comm->buf_int2;
8433 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8434 comm->buf_int, nsend,
8435 recv_i, ind->nrecv[nzone]);
8437 /* Make space for cg_cm */
8438 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8439 if (fr->cutoff_scheme == ecutsGROUP)
8447 /* Communicate cg_cm */
8450 recv_vr = cg_cm + pos_cg;
8454 recv_vr = comm->vbuf2.v;
8456 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8457 comm->vbuf.v, nsend,
8458 recv_vr, ind->nrecv[nzone]);
8460 /* Make the charge group index */
8463 zone = (p == 0 ? 0 : nzone - 1);
8464 while (zone < nzone)
8466 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8468 cg_gl = index_gl[pos_cg];
8469 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8470 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8471 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8474 /* Update the charge group presence,
8475 * so we can use it in the next pass of the loop.
8477 comm->bLocalCG[cg_gl] = TRUE;
8483 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8486 zone_cg_range[nzone+zone] = pos_cg;
8491 /* This part of the code is never executed with bBondComm. */
8492 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8493 index_gl, recv_i, cg_cm, recv_vr,
8494 cgindex, fr->cginfo_mb, fr->cginfo);
8495 pos_cg += ind->nrecv[nzone];
8497 nat_tot += ind->nrecv[nzone+1];
8501 /* Store the atom block for easy copying of communication buffers */
8502 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8506 dd->index_gl = index_gl;
8507 dd->cgindex = cgindex;
8509 dd->ncg_tot = zone_cg_range[zones->n];
8510 dd->nat_tot = nat_tot;
8511 comm->nat[ddnatHOME] = dd->nat_home;
8512 for (i = ddnatZONE; i < ddnatNR; i++)
8514 comm->nat[i] = dd->nat_tot;
8519 /* We don't need to update cginfo, since that was alrady done above.
8520 * So we pass NULL for the forcerec.
8522 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8523 NULL, comm->bLocalCG);
8528 fprintf(debug, "Finished setting up DD communication, zones:");
8529 for (c = 0; c < zones->n; c++)
8531 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8533 fprintf(debug, "\n");
8537 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8541 for (c = 0; c < zones->nizone; c++)
8543 zones->izone[c].cg1 = zones->cg_range[c+1];
8544 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8545 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8549 static void set_zones_size(gmx_domdec_t *dd,
8550 matrix box, const gmx_ddbox_t *ddbox,
8551 int zone_start, int zone_end)
8553 gmx_domdec_comm_t *comm;
8554 gmx_domdec_zones_t *zones;
8556 int z, zi, zj0, zj1, d, dim;
8559 real size_j, add_tric;
8564 zones = &comm->zones;
8566 /* Do we need to determine extra distances for multi-body bondeds? */
8567 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8569 for (z = zone_start; z < zone_end; z++)
8571 /* Copy cell limits to zone limits.
8572 * Valid for non-DD dims and non-shifted dims.
8574 copy_rvec(comm->cell_x0, zones->size[z].x0);
8575 copy_rvec(comm->cell_x1, zones->size[z].x1);
8578 for (d = 0; d < dd->ndim; d++)
8582 for (z = 0; z < zones->n; z++)
8584 /* With a staggered grid we have different sizes
8585 * for non-shifted dimensions.
8587 if (dd->bGridJump && zones->shift[z][dim] == 0)
8591 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8592 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8596 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8597 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8603 rcmbs = comm->cutoff_mbody;
8604 if (ddbox->tric_dir[dim])
8606 rcs /= ddbox->skew_fac[dim];
8607 rcmbs /= ddbox->skew_fac[dim];
8610 /* Set the lower limit for the shifted zone dimensions */
8611 for (z = zone_start; z < zone_end; z++)
8613 if (zones->shift[z][dim] > 0)
8616 if (!dd->bGridJump || d == 0)
8618 zones->size[z].x0[dim] = comm->cell_x1[dim];
8619 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8623 /* Here we take the lower limit of the zone from
8624 * the lowest domain of the zone below.
8628 zones->size[z].x0[dim] =
8629 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8635 zones->size[z].x0[dim] =
8636 zones->size[zone_perm[2][z-4]].x0[dim];
8640 zones->size[z].x0[dim] =
8641 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8644 /* A temporary limit, is updated below */
8645 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8649 for (zi = 0; zi < zones->nizone; zi++)
8651 if (zones->shift[zi][dim] == 0)
8653 /* This takes the whole zone into account.
8654 * With multiple pulses this will lead
8655 * to a larger zone then strictly necessary.
8657 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8658 zones->size[zi].x1[dim]+rcmbs);
8666 /* Loop over the i-zones to set the upper limit of each
8669 for (zi = 0; zi < zones->nizone; zi++)
8671 if (zones->shift[zi][dim] == 0)
8673 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8675 if (zones->shift[z][dim] > 0)
8677 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8678 zones->size[zi].x1[dim]+rcs);
8685 for (z = zone_start; z < zone_end; z++)
8687 /* Initialization only required to keep the compiler happy */
8688 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8691 /* To determine the bounding box for a zone we need to find
8692 * the extreme corners of 4, 2 or 1 corners.
8694 nc = 1 << (ddbox->npbcdim - 1);
8696 for (c = 0; c < nc; c++)
8698 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8702 corner[YY] = zones->size[z].x0[YY];
8706 corner[YY] = zones->size[z].x1[YY];
8710 corner[ZZ] = zones->size[z].x0[ZZ];
8714 corner[ZZ] = zones->size[z].x1[ZZ];
8716 if (dd->ndim == 1 && box[ZZ][YY] != 0)
8718 /* With 1D domain decomposition the cg's are not in
8719 * the triclinic box, but triclinic x-y and rectangular y-z.
8720 * Shift y back, so it will later end up at 0.
8722 corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
8724 /* Apply the triclinic couplings */
8725 assert(ddbox->npbcdim <= DIM);
8726 for (i = YY; i < ddbox->npbcdim; i++)
8728 for (j = XX; j < i; j++)
8730 corner[j] += corner[i]*box[i][j]/box[i][i];
8735 copy_rvec(corner, corner_min);
8736 copy_rvec(corner, corner_max);
8740 for (i = 0; i < DIM; i++)
8742 corner_min[i] = min(corner_min[i], corner[i]);
8743 corner_max[i] = max(corner_max[i], corner[i]);
8747 /* Copy the extreme cornes without offset along x */
8748 for (i = 0; i < DIM; i++)
8750 zones->size[z].bb_x0[i] = corner_min[i];
8751 zones->size[z].bb_x1[i] = corner_max[i];
8753 /* Add the offset along x */
8754 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8755 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8758 if (zone_start == 0)
8761 for (dim = 0; dim < DIM; dim++)
8763 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8765 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
8770 for (z = zone_start; z < zone_end; z++)
8772 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8774 zones->size[z].x0[XX], zones->size[z].x1[XX],
8775 zones->size[z].x0[YY], zones->size[z].x1[YY],
8776 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8777 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8779 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8780 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8781 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8786 static int comp_cgsort(const void *a, const void *b)
8790 gmx_cgsort_t *cga, *cgb;
8791 cga = (gmx_cgsort_t *)a;
8792 cgb = (gmx_cgsort_t *)b;
8794 comp = cga->nsc - cgb->nsc;
8797 comp = cga->ind_gl - cgb->ind_gl;
8803 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8808 /* Order the data */
8809 for (i = 0; i < n; i++)
8811 buf[i] = a[sort[i].ind];
8814 /* Copy back to the original array */
8815 for (i = 0; i < n; i++)
8821 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8826 /* Order the data */
8827 for (i = 0; i < n; i++)
8829 copy_rvec(v[sort[i].ind], buf[i]);
8832 /* Copy back to the original array */
8833 for (i = 0; i < n; i++)
8835 copy_rvec(buf[i], v[i]);
8839 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8842 int a, atot, cg, cg0, cg1, i;
8844 if (cgindex == NULL)
8846 /* Avoid the useless loop of the atoms within a cg */
8847 order_vec_cg(ncg, sort, v, buf);
8852 /* Order the data */
8854 for (cg = 0; cg < ncg; cg++)
8856 cg0 = cgindex[sort[cg].ind];
8857 cg1 = cgindex[sort[cg].ind+1];
8858 for (i = cg0; i < cg1; i++)
8860 copy_rvec(v[i], buf[a]);
8866 /* Copy back to the original array */
8867 for (a = 0; a < atot; a++)
8869 copy_rvec(buf[a], v[a]);
8873 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8874 int nsort_new, gmx_cgsort_t *sort_new,
8875 gmx_cgsort_t *sort1)
8879 /* The new indices are not very ordered, so we qsort them */
8880 gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8882 /* sort2 is already ordered, so now we can merge the two arrays */
8886 while (i2 < nsort2 || i_new < nsort_new)
8890 sort1[i1++] = sort_new[i_new++];
8892 else if (i_new == nsort_new)
8894 sort1[i1++] = sort2[i2++];
8896 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8897 (sort2[i2].nsc == sort_new[i_new].nsc &&
8898 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8900 sort1[i1++] = sort2[i2++];
8904 sort1[i1++] = sort_new[i_new++];
8909 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8911 gmx_domdec_sort_t *sort;
8912 gmx_cgsort_t *cgsort, *sort_i;
8913 int ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
8914 int sort_last, sort_skip;
8916 sort = dd->comm->sort;
8918 a = fr->ns.grid->cell_index;
8920 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns.grid->ncells;
8922 if (ncg_home_old >= 0)
8924 /* The charge groups that remained in the same ns grid cell
8925 * are completely ordered. So we can sort efficiently by sorting
8926 * the charge groups that did move into the stationary list.
8931 for (i = 0; i < dd->ncg_home; i++)
8933 /* Check if this cg did not move to another node */
8936 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8938 /* This cg is new on this node or moved ns grid cell */
8939 if (nsort_new >= sort->sort_new_nalloc)
8941 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8942 srenew(sort->sort_new, sort->sort_new_nalloc);
8944 sort_i = &(sort->sort_new[nsort_new++]);
8948 /* This cg did not move */
8949 sort_i = &(sort->sort2[nsort2++]);
8951 /* Sort on the ns grid cell indices
8952 * and the global topology index.
8953 * index_gl is irrelevant with cell ns,
8954 * but we set it here anyhow to avoid a conditional.
8957 sort_i->ind_gl = dd->index_gl[i];
8964 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8967 /* Sort efficiently */
8968 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8973 cgsort = sort->sort;
8975 for (i = 0; i < dd->ncg_home; i++)
8977 /* Sort on the ns grid cell indices
8978 * and the global topology index
8980 cgsort[i].nsc = a[i];
8981 cgsort[i].ind_gl = dd->index_gl[i];
8983 if (cgsort[i].nsc < moved)
8990 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
8992 /* Determine the order of the charge groups using qsort */
8993 gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
8999 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
9002 int ncg_new, i, *a, na;
9004 sort = dd->comm->sort->sort;
9006 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
9009 for (i = 0; i < na; i++)
9013 sort[ncg_new].ind = a[i];
9021 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
9024 gmx_domdec_sort_t *sort;
9025 gmx_cgsort_t *cgsort, *sort_i;
9027 int ncg_new, i, *ibuf, cgsize;
9030 sort = dd->comm->sort;
9032 if (dd->ncg_home > sort->sort_nalloc)
9034 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
9035 srenew(sort->sort, sort->sort_nalloc);
9036 srenew(sort->sort2, sort->sort_nalloc);
9038 cgsort = sort->sort;
9040 switch (fr->cutoff_scheme)
9043 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
9046 ncg_new = dd_sort_order_nbnxn(dd, fr);
9049 gmx_incons("unimplemented");
9053 /* We alloc with the old size, since cgindex is still old */
9054 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9055 vbuf = dd->comm->vbuf.v;
9059 cgindex = dd->cgindex;
9066 /* Remove the charge groups which are no longer at home here */
9067 dd->ncg_home = ncg_new;
9070 fprintf(debug, "Set the new home charge group count to %d\n",
9074 /* Reorder the state */
9075 for (i = 0; i < estNR; i++)
9077 if (EST_DISTR(i) && (state->flags & (1<<i)))
9082 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
9085 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
9088 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->sd_X, vbuf);
9091 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
9095 case estDISRE_INITF:
9096 case estDISRE_RM3TAV:
9097 case estORIRE_INITF:
9099 /* No ordering required */
9102 gmx_incons("Unknown state entry encountered in dd_sort_state");
9107 if (fr->cutoff_scheme == ecutsGROUP)
9110 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9113 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9115 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9116 srenew(sort->ibuf, sort->ibuf_nalloc);
9119 /* Reorder the global cg index */
9120 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9121 /* Reorder the cginfo */
9122 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9123 /* Rebuild the local cg index */
9127 for (i = 0; i < dd->ncg_home; i++)
9129 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9130 ibuf[i+1] = ibuf[i] + cgsize;
9132 for (i = 0; i < dd->ncg_home+1; i++)
9134 dd->cgindex[i] = ibuf[i];
9139 for (i = 0; i < dd->ncg_home+1; i++)
9144 /* Set the home atom number */
9145 dd->nat_home = dd->cgindex[dd->ncg_home];
9147 if (fr->cutoff_scheme == ecutsVERLET)
9149 /* The atoms are now exactly in grid order, update the grid order */
9150 nbnxn_set_atomorder(fr->nbv->nbs);
9154 /* Copy the sorted ns cell indices back to the ns grid struct */
9155 for (i = 0; i < dd->ncg_home; i++)
9157 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
9159 fr->ns.grid->nr = dd->ncg_home;
9163 static void add_dd_statistics(gmx_domdec_t *dd)
9165 gmx_domdec_comm_t *comm;
9170 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9172 comm->sum_nat[ddnat-ddnatZONE] +=
9173 comm->nat[ddnat] - comm->nat[ddnat-1];
9178 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9180 gmx_domdec_comm_t *comm;
9185 /* Reset all the statistics and counters for total run counting */
9186 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9188 comm->sum_nat[ddnat-ddnatZONE] = 0;
9192 comm->load_step = 0;
9195 clear_ivec(comm->load_lim);
9200 void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
9202 gmx_domdec_comm_t *comm;
9206 comm = cr->dd->comm;
9208 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9215 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9217 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9219 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9224 " av. #atoms communicated per step for force: %d x %.1f\n",
9228 if (cr->dd->vsite_comm)
9231 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9232 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9237 if (cr->dd->constraint_comm)
9240 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9241 1 + ir->nLincsIter, av);
9245 gmx_incons(" Unknown type for DD statistics");
9248 fprintf(fplog, "\n");
9250 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9252 print_dd_load_av(fplog, cr->dd);
9256 void dd_partition_system(FILE *fplog,
9259 gmx_bool bMasterState,
9261 t_state *state_global,
9262 gmx_mtop_t *top_global,
9264 t_state *state_local,
9267 gmx_localtop_t *top_local,
9270 gmx_shellfc_t shellfc,
9271 gmx_constr_t constr,
9273 gmx_wallcycle_t wcycle,
9277 gmx_domdec_comm_t *comm;
9278 gmx_ddbox_t ddbox = {0};
9280 gmx_int64_t step_pcoupl;
9281 rvec cell_ns_x0, cell_ns_x1;
9282 int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9283 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
9284 gmx_bool bRedist, bSortCG, bResortAll;
9285 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9292 bBoxChanged = (bMasterState || DEFORM(*ir));
9293 if (ir->epc != epcNO)
9295 /* With nstpcouple > 1 pressure coupling happens.
9296 * one step after calculating the pressure.
9297 * Box scaling happens at the end of the MD step,
9298 * after the DD partitioning.
9299 * We therefore have to do DLB in the first partitioning
9300 * after an MD step where P-coupling occured.
9301 * We need to determine the last step in which p-coupling occurred.
9302 * MRS -- need to validate this for vv?
9307 step_pcoupl = step - 1;
9311 step_pcoupl = ((step - 1)/n)*n + 1;
9313 if (step_pcoupl >= comm->partition_step)
9319 bNStGlobalComm = (step % nstglobalcomm == 0);
9321 if (!comm->bDynLoadBal)
9327 /* Should we do dynamic load balacing this step?
9328 * Since it requires (possibly expensive) global communication,
9329 * we might want to do DLB less frequently.
9331 if (bBoxChanged || ir->epc != epcNO)
9333 bDoDLB = bBoxChanged;
9337 bDoDLB = bNStGlobalComm;
9341 /* Check if we have recorded loads on the nodes */
9342 if (comm->bRecordLoad && dd_load_count(comm))
9344 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
9346 /* Check if we should use DLB at the second partitioning
9347 * and every 100 partitionings,
9348 * so the extra communication cost is negligible.
9350 n = max(100, nstglobalcomm);
9351 bCheckDLB = (comm->n_load_collect == 0 ||
9352 comm->n_load_have % n == n-1);
9359 /* Print load every nstlog, first and last step to the log file */
9360 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9361 comm->n_load_collect == 0 ||
9363 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9365 /* Avoid extra communication due to verbose screen output
9366 * when nstglobalcomm is set.
9368 if (bDoDLB || bLogLoad || bCheckDLB ||
9369 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9371 get_load_distribution(dd, wcycle);
9376 dd_print_load(fplog, dd, step-1);
9380 dd_print_load_verbose(dd);
9383 comm->n_load_collect++;
9387 /* Since the timings are node dependent, the master decides */
9391 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
9394 fprintf(debug, "step %s, imb loss %f\n",
9395 gmx_step_str(step, sbuf),
9396 dd_force_imb_perf_loss(dd));
9399 dd_bcast(dd, sizeof(bTurnOnDLB), &bTurnOnDLB);
9402 turn_on_dlb(fplog, cr, step);
9407 comm->n_load_have++;
9410 cgs_gl = &comm->cgs_gl;
9415 /* Clear the old state */
9416 clear_dd_indices(dd, 0, 0);
9419 set_ddbox(dd, bMasterState, cr, ir, state_global->box,
9420 TRUE, cgs_gl, state_global->x, &ddbox);
9422 get_cg_distribution(fplog, step, dd, cgs_gl,
9423 state_global->box, &ddbox, state_global->x);
9425 dd_distribute_state(dd, cgs_gl,
9426 state_global, state_local, f);
9428 dd_make_local_cgs(dd, &top_local->cgs);
9430 /* Ensure that we have space for the new distribution */
9431 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9433 if (fr->cutoff_scheme == ecutsGROUP)
9435 calc_cgcm(fplog, 0, dd->ncg_home,
9436 &top_local->cgs, state_local->x, fr->cg_cm);
9439 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9441 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9443 else if (state_local->ddp_count != dd->ddp_count)
9445 if (state_local->ddp_count > dd->ddp_count)
9447 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9450 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9452 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9455 /* Clear the old state */
9456 clear_dd_indices(dd, 0, 0);
9458 /* Build the new indices */
9459 rebuild_cgindex(dd, cgs_gl->index, state_local);
9460 make_dd_indices(dd, cgs_gl->index, 0);
9461 ncgindex_set = dd->ncg_home;
9463 if (fr->cutoff_scheme == ecutsGROUP)
9465 /* Redetermine the cg COMs */
9466 calc_cgcm(fplog, 0, dd->ncg_home,
9467 &top_local->cgs, state_local->x, fr->cg_cm);
9470 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9472 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9474 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9475 TRUE, &top_local->cgs, state_local->x, &ddbox);
9477 bRedist = comm->bDynLoadBal;
9481 /* We have the full state, only redistribute the cgs */
9483 /* Clear the non-home indices */
9484 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9487 /* Avoid global communication for dim's without pbc and -gcom */
9488 if (!bNStGlobalComm)
9490 copy_rvec(comm->box0, ddbox.box0 );
9491 copy_rvec(comm->box_size, ddbox.box_size);
9493 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9494 bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
9499 /* For dim's without pbc and -gcom */
9500 copy_rvec(ddbox.box0, comm->box0 );
9501 copy_rvec(ddbox.box_size, comm->box_size);
9503 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9506 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9508 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9511 /* Check if we should sort the charge groups */
9512 if (comm->nstSortCG > 0)
9514 bSortCG = (bMasterState ||
9515 (bRedist && (step % comm->nstSortCG == 0)));
9522 ncg_home_old = dd->ncg_home;
9527 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9529 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9531 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9533 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9536 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9538 &comm->cell_x0, &comm->cell_x1,
9539 dd->ncg_home, fr->cg_cm,
9540 cell_ns_x0, cell_ns_x1, &grid_density);
9544 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9547 switch (fr->cutoff_scheme)
9550 copy_ivec(fr->ns.grid->n, ncells_old);
9551 grid_first(fplog, fr->ns.grid, dd, &ddbox,
9552 state_local->box, cell_ns_x0, cell_ns_x1,
9553 fr->rlistlong, grid_density);
9556 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9559 gmx_incons("unimplemented");
9561 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9562 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9566 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9568 /* Sort the state on charge group position.
9569 * This enables exact restarts from this step.
9570 * It also improves performance by about 15% with larger numbers
9571 * of atoms per node.
9574 /* Fill the ns grid with the home cell,
9575 * so we can sort with the indices.
9577 set_zones_ncg_home(dd);
9579 switch (fr->cutoff_scheme)
9582 set_zones_size(dd, state_local->box, &ddbox, 0, 1);
9584 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9586 comm->zones.size[0].bb_x0,
9587 comm->zones.size[0].bb_x1,
9589 comm->zones.dens_zone0,
9592 ncg_moved, bRedist ? comm->moved : NULL,
9593 fr->nbv->grp[eintLocal].kernel_type,
9594 fr->nbv->grp[eintLocal].nbat);
9596 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9599 fill_grid(&comm->zones, fr->ns.grid, dd->ncg_home,
9600 0, dd->ncg_home, fr->cg_cm);
9602 copy_ivec(fr->ns.grid->n, ncells_new);
9605 gmx_incons("unimplemented");
9608 bResortAll = bMasterState;
9610 /* Check if we can user the old order and ns grid cell indices
9611 * of the charge groups to sort the charge groups efficiently.
9613 if (ncells_new[XX] != ncells_old[XX] ||
9614 ncells_new[YY] != ncells_old[YY] ||
9615 ncells_new[ZZ] != ncells_old[ZZ])
9622 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9623 gmx_step_str(step, sbuf), dd->ncg_home);
9625 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9626 bResortAll ? -1 : ncg_home_old);
9627 /* Rebuild all the indices */
9628 ga2la_clear(dd->ga2la);
9631 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9634 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9636 /* Setup up the communication and communicate the coordinates */
9637 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9639 /* Set the indices */
9640 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9642 /* Set the charge group boundaries for neighbor searching */
9643 set_cg_boundaries(&comm->zones);
9645 if (fr->cutoff_scheme == ecutsVERLET)
9647 set_zones_size(dd, state_local->box, &ddbox,
9648 bSortCG ? 1 : 0, comm->zones.n);
9651 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9654 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9655 -1,state_local->x,state_local->box);
9658 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9660 /* Extract a local topology from the global topology */
9661 for (i = 0; i < dd->ndim; i++)
9663 np[dd->dim[i]] = comm->cd[i].np;
9665 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9666 comm->cellsize_min, np,
9668 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
9669 vsite, top_global, top_local);
9671 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9673 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9675 /* Set up the special atom communication */
9676 n = comm->nat[ddnatZONE];
9677 for (i = ddnatZONE+1; i < ddnatNR; i++)
9682 if (vsite && vsite->n_intercg_vsite)
9684 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9688 if (dd->bInterCGcons || dd->bInterCGsettles)
9690 /* Only for inter-cg constraints we need special code */
9691 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9692 constr, ir->nProjOrder,
9693 top_local->idef.il);
9697 gmx_incons("Unknown special atom type setup");
9702 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9704 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9706 /* Make space for the extra coordinates for virtual site
9707 * or constraint communication.
9709 state_local->natoms = comm->nat[ddnatNR-1];
9710 if (state_local->natoms > state_local->nalloc)
9712 dd_realloc_state(state_local, f, state_local->natoms);
9715 if (fr->bF_NoVirSum)
9717 if (vsite && vsite->n_intercg_vsite)
9719 nat_f_novirsum = comm->nat[ddnatVSITE];
9723 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9725 nat_f_novirsum = dd->nat_tot;
9729 nat_f_novirsum = dd->nat_home;
9738 /* Set the number of atoms required for the force calculation.
9739 * Forces need to be constrained when using a twin-range setup
9740 * or with energy minimization. For simple simulations we could
9741 * avoid some allocation, zeroing and copying, but this is
9742 * probably not worth the complications ande checking.
9744 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9745 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9747 /* We make the all mdatoms up to nat_tot_con.
9748 * We could save some work by only setting invmass
9749 * between nat_tot and nat_tot_con.
9751 /* This call also sets the new number of home particles to dd->nat_home */
9752 atoms2md(top_global, ir,
9753 comm->nat[ddnatCON], dd->gatindex, dd->nat_home, mdatoms);
9755 /* Now we have the charges we can sort the FE interactions */
9756 dd_sort_local_top(dd, mdatoms, top_local);
9760 /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
9761 split_vsites_over_threads(top_local->idef.il, mdatoms, FALSE, vsite);
9766 /* Make the local shell stuff, currently no communication is done */
9767 make_local_shells(cr, mdatoms, shellfc);
9770 if (ir->implicit_solvent)
9772 make_local_gb(cr, fr->born, ir->gb_algorithm);
9775 setup_bonded_threading(fr, &top_local->idef);
9777 if (!(cr->duty & DUTY_PME))
9779 /* Send the charges and/or c6/sigmas to our PME only node */
9780 gmx_pme_send_parameters(cr, mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
9781 mdatoms->chargeA, mdatoms->chargeB,
9782 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
9783 mdatoms->sigmaA, mdatoms->sigmaB,
9784 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9789 set_constraints(constr, top_local, ir, mdatoms, cr);
9792 if (ir->ePull != epullNO)
9794 /* Update the local pull groups */
9795 dd_make_local_pull_groups(dd, ir->pull, mdatoms);
9800 /* Update the local rotation groups */
9801 dd_make_local_rotation_groups(dd, ir->rot);
9804 if (ir->eSwapCoords != eswapNO)
9806 /* Update the local groups needed for ion swapping */
9807 dd_make_local_swap_groups(dd, ir->swap);
9810 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
9811 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
9813 add_dd_statistics(dd);
9815 /* Make sure we only count the cycles for this DD partitioning */
9816 clear_dd_cycle_counts(dd);
9818 /* Because the order of the atoms might have changed since
9819 * the last vsite construction, we need to communicate the constructing
9820 * atom coordinates again (for spreading the forces this MD step).
9822 dd_move_x_vsites(dd, state_local->box, state_local->x);
9824 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9826 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9828 dd_move_x(dd, state_local->box, state_local->x);
9829 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9830 -1, state_local->x, state_local->box);
9833 /* Store the partitioning step */
9834 comm->partition_step = step;
9836 /* Increase the DD partitioning counter */
9838 /* The state currently matches this DD partitioning count, store it */
9839 state_local->ddp_count = dd->ddp_count;
9842 /* The DD master node knows the complete cg distribution,
9843 * store the count so we can possibly skip the cg info communication.
9845 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9848 if (comm->DD_debug > 0)
9850 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9851 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9852 "after partitioning");