2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
48 #include "gromacs/utility/smalloc.h"
49 #include "gmx_fatal.h"
50 #include "gmx_fatal_collective.h"
53 #include "domdec_network.h"
56 #include "chargegroup.h"
65 #include "mtop_util.h"
66 #include "gmx_ga2la.h"
68 #include "nbnxn_search.h"
70 #include "gmx_omp_nthreads.h"
71 #include "gpu_utils.h"
73 #include "gromacs/fileio/futil.h"
74 #include "gromacs/fileio/gmxfio.h"
75 #include "gromacs/fileio/pdbio.h"
76 #include "gromacs/timing/wallcycle.h"
77 #include "gromacs/utility/gmxmpi.h"
78 #include "gromacs/swap/swapcoords.h"
79 #include "gromacs/utility/qsort_threadsafe.h"
80 #include "gromacs/pulling/pull.h"
81 #include "gromacs/pulling/pull_rotation.h"
82 #include "gromacs/imd/imd.h"
84 #define DDRANK(dd, rank) (rank)
85 #define DDMASTERRANK(dd) (dd->masterrank)
87 typedef struct gmx_domdec_master
89 /* The cell boundaries */
91 /* The global charge group division */
92 int *ncg; /* Number of home charge groups for each node */
93 int *index; /* Index of nnodes+1 into cg */
94 int *cg; /* Global charge group index */
95 int *nat; /* Number of home atoms for each node. */
96 int *ibuf; /* Buffer for communication */
97 rvec *vbuf; /* Buffer for state scattering and gathering */
98 } gmx_domdec_master_t;
102 /* The numbers of charge groups to send and receive for each cell
103 * that requires communication, the last entry contains the total
104 * number of atoms that needs to be communicated.
106 int nsend[DD_MAXIZONE+2];
107 int nrecv[DD_MAXIZONE+2];
108 /* The charge groups to send */
111 /* The atom range for non-in-place communication */
112 int cell2at0[DD_MAXIZONE];
113 int cell2at1[DD_MAXIZONE];
118 int np; /* Number of grid pulses in this dimension */
119 int np_dlb; /* For dlb, for use with edlbAUTO */
120 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
122 gmx_bool bInPlace; /* Can we communicate in place? */
123 } gmx_domdec_comm_dim_t;
127 gmx_bool *bCellMin; /* Temp. var.: is this cell size at the limit */
128 real *cell_f; /* State var.: cell boundaries, box relative */
129 real *old_cell_f; /* Temp. var.: old cell size */
130 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
131 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
132 real *bound_min; /* Temp. var.: lower limit for cell boundary */
133 real *bound_max; /* Temp. var.: upper limit for cell boundary */
134 gmx_bool bLimited; /* State var.: is DLB limited in this dim and row */
135 real *buf_ncd; /* Temp. var. */
138 #define DD_NLOAD_MAX 9
140 /* Here floats are accurate enough, since these variables
141 * only influence the load balancing, not the actual MD results.
168 gmx_cgsort_t *sort_new;
180 /* This enum determines the order of the coordinates.
181 * ddnatHOME and ddnatZONE should be first and second,
182 * the others can be ordered as wanted.
185 ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR
189 edlbAUTO, edlbNO, edlbYES, edlbNR
191 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
195 int dim; /* The dimension */
196 gmx_bool dim_match; /* Tells if DD and PME dims match */
197 int nslab; /* The number of PME slabs in this dimension */
198 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
199 int *pp_min; /* The minimum pp node location, size nslab */
200 int *pp_max; /* The maximum pp node location,size nslab */
201 int maxshift; /* The maximum shift for coordinate redistribution in PME */
206 real min0; /* The minimum bottom of this zone */
207 real max1; /* The maximum top of this zone */
208 real min1; /* The minimum top of this zone */
209 real mch0; /* The maximum bottom communicaton height for this zone */
210 real mch1; /* The maximum top communicaton height for this zone */
211 real p1_0; /* The bottom value of the first cell in this zone */
212 real p1_1; /* The top value of the first cell in this zone */
217 gmx_domdec_ind_t ind;
224 } dd_comm_setup_work_t;
226 typedef struct gmx_domdec_comm
228 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
229 * unless stated otherwise.
232 /* The number of decomposition dimensions for PME, 0: no PME */
234 /* The number of nodes doing PME (PP/PME or only PME) */
238 /* The communication setup including the PME only nodes */
239 gmx_bool bCartesianPP_PME;
242 int *pmenodes; /* size npmenodes */
243 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
244 * but with bCartesianPP_PME */
245 gmx_ddpme_t ddpme[2];
247 /* The DD particle-particle nodes only */
248 gmx_bool bCartesianPP;
249 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
251 /* The global charge groups */
254 /* Should we sort the cgs */
256 gmx_domdec_sort_t *sort;
258 /* Are there charge groups? */
261 /* Are there bonded and multi-body interactions between charge groups? */
262 gmx_bool bInterCGBondeds;
263 gmx_bool bInterCGMultiBody;
265 /* Data for the optional bonded interaction atom communication range */
272 /* Are we actually using DLB? */
273 gmx_bool bDynLoadBal;
275 /* Cell sizes for static load balancing, first index cartesian */
278 /* The width of the communicated boundaries */
281 /* The minimum cell size (including triclinic correction) */
283 /* For dlb, for use with edlbAUTO */
284 rvec cellsize_min_dlb;
285 /* The lower limit for the DD cell size with DLB */
287 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
288 gmx_bool bVacDLBNoLimit;
290 /* With PME load balancing we set limits on DLB */
291 gmx_bool bPMELoadBalDLBLimits;
292 /* DLB needs to take into account that we want to allow this maximum
293 * cut-off (for PME load balancing), this could limit cell boundaries.
295 real PMELoadBal_max_cutoff;
297 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
299 /* box0 and box_size are required with dim's without pbc and -gcom */
303 /* The cell boundaries */
307 /* The old location of the cell boundaries, to check cg displacements */
311 /* The communication setup and charge group boundaries for the zones */
312 gmx_domdec_zones_t zones;
314 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
315 * cell boundaries of neighboring cells for dynamic load balancing.
317 gmx_ddzone_t zone_d1[2];
318 gmx_ddzone_t zone_d2[2][2];
320 /* The coordinate/force communication setup and indices */
321 gmx_domdec_comm_dim_t cd[DIM];
322 /* The maximum number of cells to communicate with in one dimension */
325 /* Which cg distribution is stored on the master node */
326 int master_cg_ddp_count;
328 /* The number of cg's received from the direct neighbors */
329 int zone_ncg1[DD_MAXZONE];
331 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
334 /* Array for signalling if atoms have moved to another domain */
338 /* Communication buffer for general use */
342 /* Communication buffer for general use */
345 /* Temporary storage for thread parallel communication setup */
347 dd_comm_setup_work_t *dth;
349 /* Communication buffers only used with multiple grid pulses */
354 /* Communication buffers for local redistribution */
356 int cggl_flag_nalloc[DIM*2];
358 int cgcm_state_nalloc[DIM*2];
360 /* Cell sizes for dynamic load balancing */
361 gmx_domdec_root_t **root;
365 real cell_f_max0[DIM];
366 real cell_f_min1[DIM];
368 /* Stuff for load communication */
369 gmx_bool bRecordLoad;
370 gmx_domdec_load_t *load;
371 int nrank_gpu_shared;
373 MPI_Comm *mpi_comm_load;
374 MPI_Comm mpi_comm_gpu_shared;
377 /* Maximum DLB scaling per load balancing step in percent */
381 float cycl[ddCyclNr];
382 int cycl_n[ddCyclNr];
383 float cycl_max[ddCyclNr];
384 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
388 /* Have often have did we have load measurements */
390 /* Have often have we collected the load measurements */
394 double sum_nat[ddnatNR-ddnatZONE];
404 /* The last partition step */
405 gmx_int64_t partition_step;
413 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
416 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
417 #define DD_FLAG_NRCG 65535
418 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
419 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
421 /* Zone permutation required to obtain consecutive charge groups
422 * for neighbor searching.
424 static const int zone_perm[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
426 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
427 * components see only j zones with that component 0.
430 /* The DD zone order */
431 static const ivec dd_zo[DD_MAXZONE] =
432 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
437 static const ivec dd_zp3[dd_zp3n] = {{0, 0, 8}, {1, 3, 6}, {2, 5, 6}, {3, 5, 7}};
442 static const ivec dd_zp2[dd_zp2n] = {{0, 0, 4}, {1, 3, 4}};
447 static const ivec dd_zp1[dd_zp1n] = {{0, 0, 2}};
449 /* Factors used to avoid problems due to rounding issues */
450 #define DD_CELL_MARGIN 1.0001
451 #define DD_CELL_MARGIN2 1.00005
452 /* Factor to account for pressure scaling during nstlist steps */
453 #define DD_PRES_SCALE_MARGIN 1.02
455 /* Turn on DLB when the load imbalance causes this amount of total loss.
456 * There is a bit of overhead with DLB and it's difficult to achieve
457 * a load imbalance of less than 2% with DLB.
459 #define DD_PERF_LOSS_DLB_ON 0.02
461 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
462 #define DD_PERF_LOSS_WARN 0.05
464 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
466 /* Use separate MPI send and receive commands
467 * when nnodes <= GMX_DD_NNODES_SENDRECV.
468 * This saves memory (and some copying for small nnodes).
469 * For high parallelization scatter and gather calls are used.
471 #define GMX_DD_NNODES_SENDRECV 4
475 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
477 static void index2xyz(ivec nc,int ind,ivec xyz)
479 xyz[XX] = ind % nc[XX];
480 xyz[YY] = (ind / nc[XX]) % nc[YY];
481 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
485 /* This order is required to minimize the coordinate communication in PME
486 * which uses decomposition in the x direction.
488 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
490 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
492 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
493 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
494 xyz[ZZ] = ind % nc[ZZ];
497 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
502 ddindex = dd_index(dd->nc, c);
503 if (dd->comm->bCartesianPP_PME)
505 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
507 else if (dd->comm->bCartesianPP)
510 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
521 static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox, t_inputrec *ir)
523 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
526 int ddglatnr(gmx_domdec_t *dd, int i)
536 if (i >= dd->comm->nat[ddnatNR-1])
538 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
540 atnr = dd->gatindex[i] + 1;
546 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
548 return &dd->comm->cgs_gl;
551 static void vec_rvec_init(vec_rvec_t *v)
557 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
561 v->nalloc = over_alloc_dd(n);
562 srenew(v->v, v->nalloc);
566 void dd_store_state(gmx_domdec_t *dd, t_state *state)
570 if (state->ddp_count != dd->ddp_count)
572 gmx_incons("The state does not the domain decomposition state");
575 state->ncg_gl = dd->ncg_home;
576 if (state->ncg_gl > state->cg_gl_nalloc)
578 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
579 srenew(state->cg_gl, state->cg_gl_nalloc);
581 for (i = 0; i < state->ncg_gl; i++)
583 state->cg_gl[i] = dd->index_gl[i];
586 state->ddp_count_cg_gl = dd->ddp_count;
589 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
591 return &dd->comm->zones;
594 void dd_get_ns_ranges(gmx_domdec_t *dd, int icg,
595 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
597 gmx_domdec_zones_t *zones;
600 zones = &dd->comm->zones;
603 while (icg >= zones->izone[izone].cg1)
612 else if (izone < zones->nizone)
614 *jcg0 = zones->izone[izone].jcg0;
618 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
619 icg, izone, zones->nizone);
622 *jcg1 = zones->izone[izone].jcg1;
624 for (d = 0; d < dd->ndim; d++)
627 shift0[dim] = zones->izone[izone].shift0[dim];
628 shift1[dim] = zones->izone[izone].shift1[dim];
629 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
631 /* A conservative approach, this can be optimized */
638 int dd_natoms_vsite(gmx_domdec_t *dd)
640 return dd->comm->nat[ddnatVSITE];
643 void dd_get_constraint_range(gmx_domdec_t *dd, int *at_start, int *at_end)
645 *at_start = dd->comm->nat[ddnatCON-1];
646 *at_end = dd->comm->nat[ddnatCON];
649 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
651 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
652 int *index, *cgindex;
653 gmx_domdec_comm_t *comm;
654 gmx_domdec_comm_dim_t *cd;
655 gmx_domdec_ind_t *ind;
656 rvec shift = {0, 0, 0}, *buf, *rbuf;
657 gmx_bool bPBC, bScrew;
661 cgindex = dd->cgindex;
666 nat_tot = dd->nat_home;
667 for (d = 0; d < dd->ndim; d++)
669 bPBC = (dd->ci[dd->dim[d]] == 0);
670 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
673 copy_rvec(box[dd->dim[d]], shift);
676 for (p = 0; p < cd->np; p++)
683 for (i = 0; i < ind->nsend[nzone]; i++)
685 at0 = cgindex[index[i]];
686 at1 = cgindex[index[i]+1];
687 for (j = at0; j < at1; j++)
689 copy_rvec(x[j], buf[n]);
696 for (i = 0; i < ind->nsend[nzone]; i++)
698 at0 = cgindex[index[i]];
699 at1 = cgindex[index[i]+1];
700 for (j = at0; j < at1; j++)
702 /* We need to shift the coordinates */
703 rvec_add(x[j], shift, buf[n]);
710 for (i = 0; i < ind->nsend[nzone]; i++)
712 at0 = cgindex[index[i]];
713 at1 = cgindex[index[i]+1];
714 for (j = at0; j < at1; j++)
717 buf[n][XX] = x[j][XX] + shift[XX];
719 * This operation requires a special shift force
720 * treatment, which is performed in calc_vir.
722 buf[n][YY] = box[YY][YY] - x[j][YY];
723 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
735 rbuf = comm->vbuf2.v;
737 /* Send and receive the coordinates */
738 dd_sendrecv_rvec(dd, d, dddirBackward,
739 buf, ind->nsend[nzone+1],
740 rbuf, ind->nrecv[nzone+1]);
744 for (zone = 0; zone < nzone; zone++)
746 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
748 copy_rvec(rbuf[j], x[i]);
753 nat_tot += ind->nrecv[nzone+1];
759 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
761 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
762 int *index, *cgindex;
763 gmx_domdec_comm_t *comm;
764 gmx_domdec_comm_dim_t *cd;
765 gmx_domdec_ind_t *ind;
769 gmx_bool bPBC, bScrew;
773 cgindex = dd->cgindex;
778 nzone = comm->zones.n/2;
779 nat_tot = dd->nat_tot;
780 for (d = dd->ndim-1; d >= 0; d--)
782 bPBC = (dd->ci[dd->dim[d]] == 0);
783 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
784 if (fshift == NULL && !bScrew)
788 /* Determine which shift vector we need */
794 for (p = cd->np-1; p >= 0; p--)
797 nat_tot -= ind->nrecv[nzone+1];
804 sbuf = comm->vbuf2.v;
806 for (zone = 0; zone < nzone; zone++)
808 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
810 copy_rvec(f[i], sbuf[j]);
815 /* Communicate the forces */
816 dd_sendrecv_rvec(dd, d, dddirForward,
817 sbuf, ind->nrecv[nzone+1],
818 buf, ind->nsend[nzone+1]);
820 /* Add the received forces */
824 for (i = 0; i < ind->nsend[nzone]; i++)
826 at0 = cgindex[index[i]];
827 at1 = cgindex[index[i]+1];
828 for (j = at0; j < at1; j++)
830 rvec_inc(f[j], buf[n]);
837 for (i = 0; i < ind->nsend[nzone]; i++)
839 at0 = cgindex[index[i]];
840 at1 = cgindex[index[i]+1];
841 for (j = at0; j < at1; j++)
843 rvec_inc(f[j], buf[n]);
844 /* Add this force to the shift force */
845 rvec_inc(fshift[is], buf[n]);
852 for (i = 0; i < ind->nsend[nzone]; i++)
854 at0 = cgindex[index[i]];
855 at1 = cgindex[index[i]+1];
856 for (j = at0; j < at1; j++)
858 /* Rotate the force */
859 f[j][XX] += buf[n][XX];
860 f[j][YY] -= buf[n][YY];
861 f[j][ZZ] -= buf[n][ZZ];
864 /* Add this force to the shift force */
865 rvec_inc(fshift[is], buf[n]);
876 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
878 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
879 int *index, *cgindex;
880 gmx_domdec_comm_t *comm;
881 gmx_domdec_comm_dim_t *cd;
882 gmx_domdec_ind_t *ind;
887 cgindex = dd->cgindex;
889 buf = &comm->vbuf.v[0][0];
892 nat_tot = dd->nat_home;
893 for (d = 0; d < dd->ndim; d++)
896 for (p = 0; p < cd->np; p++)
901 for (i = 0; i < ind->nsend[nzone]; i++)
903 at0 = cgindex[index[i]];
904 at1 = cgindex[index[i]+1];
905 for (j = at0; j < at1; j++)
918 rbuf = &comm->vbuf2.v[0][0];
920 /* Send and receive the coordinates */
921 dd_sendrecv_real(dd, d, dddirBackward,
922 buf, ind->nsend[nzone+1],
923 rbuf, ind->nrecv[nzone+1]);
927 for (zone = 0; zone < nzone; zone++)
929 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
936 nat_tot += ind->nrecv[nzone+1];
942 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
944 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
945 int *index, *cgindex;
946 gmx_domdec_comm_t *comm;
947 gmx_domdec_comm_dim_t *cd;
948 gmx_domdec_ind_t *ind;
953 cgindex = dd->cgindex;
955 buf = &comm->vbuf.v[0][0];
958 nzone = comm->zones.n/2;
959 nat_tot = dd->nat_tot;
960 for (d = dd->ndim-1; d >= 0; d--)
963 for (p = cd->np-1; p >= 0; p--)
966 nat_tot -= ind->nrecv[nzone+1];
973 sbuf = &comm->vbuf2.v[0][0];
975 for (zone = 0; zone < nzone; zone++)
977 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
984 /* Communicate the forces */
985 dd_sendrecv_real(dd, d, dddirForward,
986 sbuf, ind->nrecv[nzone+1],
987 buf, ind->nsend[nzone+1]);
989 /* Add the received forces */
991 for (i = 0; i < ind->nsend[nzone]; i++)
993 at0 = cgindex[index[i]];
994 at1 = cgindex[index[i]+1];
995 for (j = at0; j < at1; j++)
1006 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
1008 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
1010 zone->min0, zone->max1,
1011 zone->mch0, zone->mch0,
1012 zone->p1_0, zone->p1_1);
1016 #define DDZONECOMM_MAXZONE 5
1017 #define DDZONECOMM_BUFSIZE 3
1019 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
1020 int ddimind, int direction,
1021 gmx_ddzone_t *buf_s, int n_s,
1022 gmx_ddzone_t *buf_r, int n_r)
1024 #define ZBS DDZONECOMM_BUFSIZE
1025 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
1026 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
1029 for (i = 0; i < n_s; i++)
1031 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
1032 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
1033 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
1034 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
1035 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
1036 vbuf_s[i*ZBS+1][2] = 0;
1037 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
1038 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
1039 vbuf_s[i*ZBS+2][2] = 0;
1042 dd_sendrecv_rvec(dd, ddimind, direction,
1046 for (i = 0; i < n_r; i++)
1048 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
1049 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
1050 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
1051 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
1052 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
1053 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
1054 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
1060 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
1061 rvec cell_ns_x0, rvec cell_ns_x1)
1063 int d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
1065 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
1066 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
1067 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
1068 rvec extr_s[2], extr_r[2];
1070 real dist_d, c = 0, det;
1071 gmx_domdec_comm_t *comm;
1072 gmx_bool bPBC, bUse;
1076 for (d = 1; d < dd->ndim; d++)
1079 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
1080 zp->min0 = cell_ns_x0[dim];
1081 zp->max1 = cell_ns_x1[dim];
1082 zp->min1 = cell_ns_x1[dim];
1083 zp->mch0 = cell_ns_x0[dim];
1084 zp->mch1 = cell_ns_x1[dim];
1085 zp->p1_0 = cell_ns_x0[dim];
1086 zp->p1_1 = cell_ns_x1[dim];
1089 for (d = dd->ndim-2; d >= 0; d--)
1092 bPBC = (dim < ddbox->npbcdim);
1094 /* Use an rvec to store two reals */
1095 extr_s[d][0] = comm->cell_f0[d+1];
1096 extr_s[d][1] = comm->cell_f1[d+1];
1097 extr_s[d][2] = comm->cell_f1[d+1];
1100 /* Store the extremes in the backward sending buffer,
1101 * so the get updated separately from the forward communication.
1103 for (d1 = d; d1 < dd->ndim-1; d1++)
1105 /* We invert the order to be able to use the same loop for buf_e */
1106 buf_s[pos].min0 = extr_s[d1][1];
1107 buf_s[pos].max1 = extr_s[d1][0];
1108 buf_s[pos].min1 = extr_s[d1][2];
1109 buf_s[pos].mch0 = 0;
1110 buf_s[pos].mch1 = 0;
1111 /* Store the cell corner of the dimension we communicate along */
1112 buf_s[pos].p1_0 = comm->cell_x0[dim];
1113 buf_s[pos].p1_1 = 0;
1117 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1120 if (dd->ndim == 3 && d == 0)
1122 buf_s[pos] = comm->zone_d2[0][1];
1124 buf_s[pos] = comm->zone_d1[0];
1128 /* We only need to communicate the extremes
1129 * in the forward direction
1131 npulse = comm->cd[d].np;
1134 /* Take the minimum to avoid double communication */
1135 npulse_min = min(npulse, dd->nc[dim]-1-npulse);
1139 /* Without PBC we should really not communicate over
1140 * the boundaries, but implementing that complicates
1141 * the communication setup and therefore we simply
1142 * do all communication, but ignore some data.
1144 npulse_min = npulse;
1146 for (p = 0; p < npulse_min; p++)
1148 /* Communicate the extremes forward */
1149 bUse = (bPBC || dd->ci[dim] > 0);
1151 dd_sendrecv_rvec(dd, d, dddirForward,
1152 extr_s+d, dd->ndim-d-1,
1153 extr_r+d, dd->ndim-d-1);
1157 for (d1 = d; d1 < dd->ndim-1; d1++)
1159 extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]);
1160 extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]);
1161 extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]);
1167 for (p = 0; p < npulse; p++)
1169 /* Communicate all the zone information backward */
1170 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1172 dd_sendrecv_ddzone(dd, d, dddirBackward,
1179 for (d1 = d+1; d1 < dd->ndim; d1++)
1181 /* Determine the decrease of maximum required
1182 * communication height along d1 due to the distance along d,
1183 * this avoids a lot of useless atom communication.
1185 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1187 if (ddbox->tric_dir[dim])
1189 /* c is the off-diagonal coupling between the cell planes
1190 * along directions d and d1.
1192 c = ddbox->v[dim][dd->dim[d1]][dim];
1198 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1201 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1205 /* A negative value signals out of range */
1211 /* Accumulate the extremes over all pulses */
1212 for (i = 0; i < buf_size; i++)
1216 buf_e[i] = buf_r[i];
1222 buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0);
1223 buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1);
1224 buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1);
1227 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1235 if (bUse && dh[d1] >= 0)
1237 buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
1238 buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
1241 /* Copy the received buffer to the send buffer,
1242 * to pass the data through with the next pulse.
1244 buf_s[i] = buf_r[i];
1246 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1247 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1249 /* Store the extremes */
1252 for (d1 = d; d1 < dd->ndim-1; d1++)
1254 extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0);
1255 extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1);
1256 extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1);
1260 if (d == 1 || (d == 0 && dd->ndim == 3))
1262 for (i = d; i < 2; i++)
1264 comm->zone_d2[1-d][i] = buf_e[pos];
1270 comm->zone_d1[1] = buf_e[pos];
1280 for (i = 0; i < 2; i++)
1284 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1286 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1287 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1293 for (i = 0; i < 2; i++)
1295 for (j = 0; j < 2; j++)
1299 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1301 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1302 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1306 for (d = 1; d < dd->ndim; d++)
1308 comm->cell_f_max0[d] = extr_s[d-1][0];
1309 comm->cell_f_min1[d] = extr_s[d-1][1];
1312 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1313 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1318 static void dd_collect_cg(gmx_domdec_t *dd,
1319 t_state *state_local)
1321 gmx_domdec_master_t *ma = NULL;
1322 int buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
1324 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1326 /* The master has the correct distribution */
1330 if (state_local->ddp_count == dd->ddp_count)
1332 /* The local state and DD are in sync, use the DD indices */
1333 ncg_home = dd->ncg_home;
1335 nat_home = dd->nat_home;
1337 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1339 /* The DD is out of sync with the local state, but we have stored
1340 * the cg indices with the local state, so we can use those.
1344 cgs_gl = &dd->comm->cgs_gl;
1346 ncg_home = state_local->ncg_gl;
1347 cg = state_local->cg_gl;
1349 for (i = 0; i < ncg_home; i++)
1351 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1356 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1370 /* Collect the charge group and atom counts on the master */
1371 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1376 for (i = 0; i < dd->nnodes; i++)
1378 ma->ncg[i] = ma->ibuf[2*i];
1379 ma->nat[i] = ma->ibuf[2*i+1];
1380 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1383 /* Make byte counts and indices */
1384 for (i = 0; i < dd->nnodes; i++)
1386 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1387 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1391 fprintf(debug, "Initial charge group distribution: ");
1392 for (i = 0; i < dd->nnodes; i++)
1394 fprintf(debug, " %d", ma->ncg[i]);
1396 fprintf(debug, "\n");
1400 /* Collect the charge group indices on the master */
1402 ncg_home*sizeof(int), cg,
1403 DDMASTER(dd) ? ma->ibuf : NULL,
1404 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1405 DDMASTER(dd) ? ma->cg : NULL);
1407 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1410 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1413 gmx_domdec_master_t *ma;
1414 int n, i, c, a, nalloc = 0;
1423 MPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1424 dd->rank, dd->mpi_comm_all);
1429 /* Copy the master coordinates to the global array */
1430 cgs_gl = &dd->comm->cgs_gl;
1432 n = DDMASTERRANK(dd);
1434 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1436 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1438 copy_rvec(lv[a++], v[c]);
1442 for (n = 0; n < dd->nnodes; n++)
1446 if (ma->nat[n] > nalloc)
1448 nalloc = over_alloc_dd(ma->nat[n]);
1449 srenew(buf, nalloc);
1452 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1453 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1456 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1458 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1460 copy_rvec(buf[a++], v[c]);
1469 static void get_commbuffer_counts(gmx_domdec_t *dd,
1470 int **counts, int **disps)
1472 gmx_domdec_master_t *ma;
1477 /* Make the rvec count and displacment arrays */
1479 *disps = ma->ibuf + dd->nnodes;
1480 for (n = 0; n < dd->nnodes; n++)
1482 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1483 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1487 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1490 gmx_domdec_master_t *ma;
1491 int *rcounts = NULL, *disps = NULL;
1500 get_commbuffer_counts(dd, &rcounts, &disps);
1505 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv, rcounts, disps, buf);
1509 cgs_gl = &dd->comm->cgs_gl;
1512 for (n = 0; n < dd->nnodes; n++)
1514 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1516 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1518 copy_rvec(buf[a++], v[c]);
1525 void dd_collect_vec(gmx_domdec_t *dd,
1526 t_state *state_local, rvec *lv, rvec *v)
1528 gmx_domdec_master_t *ma;
1529 int n, i, c, a, nalloc = 0;
1532 dd_collect_cg(dd, state_local);
1534 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1536 dd_collect_vec_sendrecv(dd, lv, v);
1540 dd_collect_vec_gatherv(dd, lv, v);
1545 void dd_collect_state(gmx_domdec_t *dd,
1546 t_state *state_local, t_state *state)
1550 nh = state->nhchainlength;
1554 for (i = 0; i < efptNR; i++)
1556 state->lambda[i] = state_local->lambda[i];
1558 state->fep_state = state_local->fep_state;
1559 state->veta = state_local->veta;
1560 state->vol0 = state_local->vol0;
1561 copy_mat(state_local->box, state->box);
1562 copy_mat(state_local->boxv, state->boxv);
1563 copy_mat(state_local->svir_prev, state->svir_prev);
1564 copy_mat(state_local->fvir_prev, state->fvir_prev);
1565 copy_mat(state_local->pres_prev, state->pres_prev);
1567 for (i = 0; i < state_local->ngtc; i++)
1569 for (j = 0; j < nh; j++)
1571 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1572 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1574 state->therm_integral[i] = state_local->therm_integral[i];
1576 for (i = 0; i < state_local->nnhpres; i++)
1578 for (j = 0; j < nh; j++)
1580 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1581 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1585 for (est = 0; est < estNR; est++)
1587 if (EST_DISTR(est) && (state_local->flags & (1<<est)))
1592 dd_collect_vec(dd, state_local, state_local->x, state->x);
1595 dd_collect_vec(dd, state_local, state_local->v, state->v);
1598 dd_collect_vec(dd, state_local, state_local->sd_X, state->sd_X);
1601 dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
1603 case estDISRE_INITF:
1604 case estDISRE_RM3TAV:
1605 case estORIRE_INITF:
1609 gmx_incons("Unknown state entry encountered in dd_collect_state");
1615 static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
1621 fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
1624 state->nalloc = over_alloc_dd(nalloc);
1626 for (est = 0; est < estNR; est++)
1628 if (EST_DISTR(est) && (state->flags & (1<<est)))
1633 srenew(state->x, state->nalloc);
1636 srenew(state->v, state->nalloc);
1639 srenew(state->sd_X, state->nalloc);
1642 srenew(state->cg_p, state->nalloc);
1644 case estDISRE_INITF:
1645 case estDISRE_RM3TAV:
1646 case estORIRE_INITF:
1648 /* No reallocation required */
1651 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1658 srenew(*f, state->nalloc);
1662 static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
1665 if (nalloc > fr->cg_nalloc)
1669 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
1671 fr->cg_nalloc = over_alloc_dd(nalloc);
1672 srenew(fr->cginfo, fr->cg_nalloc);
1673 if (fr->cutoff_scheme == ecutsGROUP)
1675 srenew(fr->cg_cm, fr->cg_nalloc);
1678 if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
1680 /* We don't use charge groups, we use x in state to set up
1681 * the atom communication.
1683 dd_realloc_state(state, f, nalloc);
1687 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1690 gmx_domdec_master_t *ma;
1691 int n, i, c, a, nalloc = 0;
1698 for (n = 0; n < dd->nnodes; n++)
1702 if (ma->nat[n] > nalloc)
1704 nalloc = over_alloc_dd(ma->nat[n]);
1705 srenew(buf, nalloc);
1707 /* Use lv as a temporary buffer */
1709 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1711 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1713 copy_rvec(v[c], buf[a++]);
1716 if (a != ma->nat[n])
1718 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1723 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1724 DDRANK(dd, n), n, dd->mpi_comm_all);
1729 n = DDMASTERRANK(dd);
1731 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1733 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1735 copy_rvec(v[c], lv[a++]);
1742 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1743 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1748 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1751 gmx_domdec_master_t *ma;
1752 int *scounts = NULL, *disps = NULL;
1753 int n, i, c, a, nalloc = 0;
1760 get_commbuffer_counts(dd, &scounts, &disps);
1764 for (n = 0; n < dd->nnodes; n++)
1766 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1768 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1770 copy_rvec(v[c], buf[a++]);
1776 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1779 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
1781 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1783 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1787 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1791 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1794 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1795 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1796 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1798 if (dfhist->nlambda > 0)
1800 int nlam = dfhist->nlambda;
1801 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1802 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1803 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1804 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1805 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1806 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1808 for (i = 0; i < nlam; i++)
1810 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1811 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1812 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1813 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1814 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1815 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1820 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1821 t_state *state, t_state *state_local,
1826 nh = state->nhchainlength;
1830 for (i = 0; i < efptNR; i++)
1832 state_local->lambda[i] = state->lambda[i];
1834 state_local->fep_state = state->fep_state;
1835 state_local->veta = state->veta;
1836 state_local->vol0 = state->vol0;
1837 copy_mat(state->box, state_local->box);
1838 copy_mat(state->box_rel, state_local->box_rel);
1839 copy_mat(state->boxv, state_local->boxv);
1840 copy_mat(state->svir_prev, state_local->svir_prev);
1841 copy_mat(state->fvir_prev, state_local->fvir_prev);
1842 copy_df_history(&state_local->dfhist, &state->dfhist);
1843 for (i = 0; i < state_local->ngtc; i++)
1845 for (j = 0; j < nh; j++)
1847 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1848 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1850 state_local->therm_integral[i] = state->therm_integral[i];
1852 for (i = 0; i < state_local->nnhpres; i++)
1854 for (j = 0; j < nh; j++)
1856 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1857 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1861 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
1862 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1863 dd_bcast(dd, sizeof(real), &state_local->veta);
1864 dd_bcast(dd, sizeof(real), &state_local->vol0);
1865 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1866 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1867 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1868 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1869 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1870 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
1871 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
1872 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
1873 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
1874 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
1876 /* communicate df_history -- required for restarting from checkpoint */
1877 dd_distribute_dfhist(dd, &state_local->dfhist);
1879 if (dd->nat_home > state_local->nalloc)
1881 dd_realloc_state(state_local, f, dd->nat_home);
1883 for (i = 0; i < estNR; i++)
1885 if (EST_DISTR(i) && (state_local->flags & (1<<i)))
1890 dd_distribute_vec(dd, cgs, state->x, state_local->x);
1893 dd_distribute_vec(dd, cgs, state->v, state_local->v);
1896 dd_distribute_vec(dd, cgs, state->sd_X, state_local->sd_X);
1899 dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
1901 case estDISRE_INITF:
1902 case estDISRE_RM3TAV:
1903 case estORIRE_INITF:
1905 /* Not implemented yet */
1908 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1914 static char dim2char(int dim)
1920 case XX: c = 'X'; break;
1921 case YY: c = 'Y'; break;
1922 case ZZ: c = 'Z'; break;
1923 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1929 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1930 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1932 rvec grid_s[2], *grid_r = NULL, cx, r;
1933 char fname[STRLEN], buf[22];
1935 int a, i, d, z, y, x;
1939 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1940 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1944 snew(grid_r, 2*dd->nnodes);
1947 dd_gather(dd, 2*sizeof(rvec), grid_s[0], DDMASTER(dd) ? grid_r[0] : NULL);
1951 for (d = 0; d < DIM; d++)
1953 for (i = 0; i < DIM; i++)
1961 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1963 tric[d][i] = box[i][d]/box[i][i];
1972 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1973 out = gmx_fio_fopen(fname, "w");
1974 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1976 for (i = 0; i < dd->nnodes; i++)
1978 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1979 for (d = 0; d < DIM; d++)
1981 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1983 for (z = 0; z < 2; z++)
1985 for (y = 0; y < 2; y++)
1987 for (x = 0; x < 2; x++)
1989 cx[XX] = grid_r[i*2+x][XX];
1990 cx[YY] = grid_r[i*2+y][YY];
1991 cx[ZZ] = grid_r[i*2+z][ZZ];
1993 gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
1994 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
1998 for (d = 0; d < DIM; d++)
2000 for (x = 0; x < 4; x++)
2004 case 0: y = 1 + i*8 + 2*x; break;
2005 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
2006 case 2: y = 1 + i*8 + x; break;
2008 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
2012 gmx_fio_fclose(out);
2017 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
2018 gmx_mtop_t *mtop, t_commrec *cr,
2019 int natoms, rvec x[], matrix box)
2021 char fname[STRLEN], buf[22];
2023 int i, ii, resnr, c;
2024 char *atomname, *resname;
2031 natoms = dd->comm->nat[ddnatVSITE];
2034 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
2036 out = gmx_fio_fopen(fname, "w");
2038 fprintf(out, "TITLE %s\n", title);
2039 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2040 for (i = 0; i < natoms; i++)
2042 ii = dd->gatindex[i];
2043 gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
2044 if (i < dd->comm->nat[ddnatZONE])
2047 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
2053 else if (i < dd->comm->nat[ddnatVSITE])
2055 b = dd->comm->zones.n;
2059 b = dd->comm->zones.n + 1;
2061 gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
2062 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
2064 fprintf(out, "TER\n");
2066 gmx_fio_fclose(out);
2069 real dd_cutoff_mbody(gmx_domdec_t *dd)
2071 gmx_domdec_comm_t *comm;
2078 if (comm->bInterCGBondeds)
2080 if (comm->cutoff_mbody > 0)
2082 r = comm->cutoff_mbody;
2086 /* cutoff_mbody=0 means we do not have DLB */
2087 r = comm->cellsize_min[dd->dim[0]];
2088 for (di = 1; di < dd->ndim; di++)
2090 r = min(r, comm->cellsize_min[dd->dim[di]]);
2092 if (comm->bBondComm)
2094 r = max(r, comm->cutoff_mbody);
2098 r = min(r, comm->cutoff);
2106 real dd_cutoff_twobody(gmx_domdec_t *dd)
2110 r_mb = dd_cutoff_mbody(dd);
2112 return max(dd->comm->cutoff, r_mb);
2116 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd, ivec coord, ivec coord_pme)
2120 nc = dd->nc[dd->comm->cartpmedim];
2121 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2122 copy_ivec(coord, coord_pme);
2123 coord_pme[dd->comm->cartpmedim] =
2124 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2127 static int low_ddindex2pmeindex(int ndd, int npme, int ddindex)
2129 /* Here we assign a PME node to communicate with this DD node
2130 * by assuming that the major index of both is x.
2131 * We add cr->npmenodes/2 to obtain an even distribution.
2133 return (ddindex*npme + npme/2)/ndd;
2136 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
2138 return low_ddindex2pmeindex(dd->nnodes, dd->comm->npmenodes, ddindex);
2141 static int cr_ddindex2pmeindex(const t_commrec *cr, int ddindex)
2143 return low_ddindex2pmeindex(cr->dd->nnodes, cr->npmenodes, ddindex);
2146 static int *dd_pmenodes(t_commrec *cr)
2151 snew(pmenodes, cr->npmenodes);
2153 for (i = 0; i < cr->dd->nnodes; i++)
2155 p0 = cr_ddindex2pmeindex(cr, i);
2156 p1 = cr_ddindex2pmeindex(cr, i+1);
2157 if (i+1 == cr->dd->nnodes || p1 > p0)
2161 fprintf(debug, "pmenode[%d] = %d\n", n, i+1+n);
2163 pmenodes[n] = i + 1 + n;
2171 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
2174 ivec coords, coords_pme, nc;
2179 if (dd->comm->bCartesian) {
2180 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2181 dd_coords2pmecoords(dd,coords,coords_pme);
2182 copy_ivec(dd->ntot,nc);
2183 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2184 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2186 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2188 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2194 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
2199 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
2201 gmx_domdec_comm_t *comm;
2203 int ddindex, nodeid = -1;
2205 comm = cr->dd->comm;
2210 if (comm->bCartesianPP_PME)
2213 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
2218 ddindex = dd_index(cr->dd->nc, coords);
2219 if (comm->bCartesianPP)
2221 nodeid = comm->ddindex2simnodeid[ddindex];
2227 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
2239 static int dd_simnode2pmenode(t_commrec *cr, int sim_nodeid)
2242 gmx_domdec_comm_t *comm;
2243 ivec coord, coord_pme;
2250 /* This assumes a uniform x domain decomposition grid cell size */
2251 if (comm->bCartesianPP_PME)
2254 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
2255 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2257 /* This is a PP node */
2258 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2259 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
2263 else if (comm->bCartesianPP)
2265 if (sim_nodeid < dd->nnodes)
2267 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2272 /* This assumes DD cells with identical x coordinates
2273 * are numbered sequentially.
2275 if (dd->comm->pmenodes == NULL)
2277 if (sim_nodeid < dd->nnodes)
2279 /* The DD index equals the nodeid */
2280 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2286 while (sim_nodeid > dd->comm->pmenodes[i])
2290 if (sim_nodeid < dd->comm->pmenodes[i])
2292 pmenode = dd->comm->pmenodes[i];
2300 void get_pme_nnodes(const gmx_domdec_t *dd,
2301 int *npmenodes_x, int *npmenodes_y)
2305 *npmenodes_x = dd->comm->npmenodes_x;
2306 *npmenodes_y = dd->comm->npmenodes_y;
2315 gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
2317 gmx_bool bPMEOnlyNode;
2319 if (DOMAINDECOMP(cr))
2321 bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
2325 bPMEOnlyNode = FALSE;
2328 return bPMEOnlyNode;
2331 void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
2332 int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
2336 ivec coord, coord_pme;
2340 snew(*my_ddnodes, (dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2343 for (x = 0; x < dd->nc[XX]; x++)
2345 for (y = 0; y < dd->nc[YY]; y++)
2347 for (z = 0; z < dd->nc[ZZ]; z++)
2349 if (dd->comm->bCartesianPP_PME)
2354 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2355 if (dd->ci[XX] == coord_pme[XX] &&
2356 dd->ci[YY] == coord_pme[YY] &&
2357 dd->ci[ZZ] == coord_pme[ZZ])
2359 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2364 /* The slab corresponds to the nodeid in the PME group */
2365 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2367 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2374 /* The last PP-only node is the peer node */
2375 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2379 fprintf(debug, "Receive coordinates from PP ranks:");
2380 for (x = 0; x < *nmy_ddnodes; x++)
2382 fprintf(debug, " %d", (*my_ddnodes)[x]);
2384 fprintf(debug, "\n");
2388 static gmx_bool receive_vir_ener(t_commrec *cr)
2390 gmx_domdec_comm_t *comm;
2391 int pmenode, coords[DIM], rank;
2395 if (cr->npmenodes < cr->dd->nnodes)
2397 comm = cr->dd->comm;
2398 if (comm->bCartesianPP_PME)
2400 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2402 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2403 coords[comm->cartpmedim]++;
2404 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2406 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2407 if (dd_simnode2pmenode(cr, rank) == pmenode)
2409 /* This is not the last PP node for pmenode */
2417 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2418 if (cr->sim_nodeid+1 < cr->nnodes &&
2419 dd_simnode2pmenode(cr, cr->sim_nodeid+1) == pmenode)
2421 /* This is not the last PP node for pmenode */
2430 static void set_zones_ncg_home(gmx_domdec_t *dd)
2432 gmx_domdec_zones_t *zones;
2435 zones = &dd->comm->zones;
2437 zones->cg_range[0] = 0;
2438 for (i = 1; i < zones->n+1; i++)
2440 zones->cg_range[i] = dd->ncg_home;
2442 /* zone_ncg1[0] should always be equal to ncg_home */
2443 dd->comm->zone_ncg1[0] = dd->ncg_home;
2446 static void rebuild_cgindex(gmx_domdec_t *dd,
2447 const int *gcgs_index, t_state *state)
2449 int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
2452 dd_cg_gl = dd->index_gl;
2453 cgindex = dd->cgindex;
2456 for (i = 0; i < state->ncg_gl; i++)
2460 dd_cg_gl[i] = cg_gl;
2461 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2465 dd->ncg_home = state->ncg_gl;
2468 set_zones_ncg_home(dd);
2471 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2473 while (cg >= cginfo_mb->cg_end)
2478 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2481 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2482 t_forcerec *fr, char *bLocalCG)
2484 cginfo_mb_t *cginfo_mb;
2490 cginfo_mb = fr->cginfo_mb;
2491 cginfo = fr->cginfo;
2493 for (cg = cg0; cg < cg1; cg++)
2495 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2499 if (bLocalCG != NULL)
2501 for (cg = cg0; cg < cg1; cg++)
2503 bLocalCG[index_gl[cg]] = TRUE;
2508 static void make_dd_indices(gmx_domdec_t *dd,
2509 const int *gcgs_index, int cg_start)
2511 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2512 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2517 bLocalCG = dd->comm->bLocalCG;
2519 if (dd->nat_tot > dd->gatindex_nalloc)
2521 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2522 srenew(dd->gatindex, dd->gatindex_nalloc);
2525 nzone = dd->comm->zones.n;
2526 zone2cg = dd->comm->zones.cg_range;
2527 zone_ncg1 = dd->comm->zone_ncg1;
2528 index_gl = dd->index_gl;
2529 gatindex = dd->gatindex;
2530 bCGs = dd->comm->bCGs;
2532 if (zone2cg[1] != dd->ncg_home)
2534 gmx_incons("dd->ncg_zone is not up to date");
2537 /* Make the local to global and global to local atom index */
2538 a = dd->cgindex[cg_start];
2539 for (zone = 0; zone < nzone; zone++)
2547 cg0 = zone2cg[zone];
2549 cg1 = zone2cg[zone+1];
2550 cg1_p1 = cg0 + zone_ncg1[zone];
2552 for (cg = cg0; cg < cg1; cg++)
2557 /* Signal that this cg is from more than one pulse away */
2560 cg_gl = index_gl[cg];
2563 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2566 ga2la_set(dd->ga2la, a_gl, a, zone1);
2572 gatindex[a] = cg_gl;
2573 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2580 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2583 int ncg, i, ngl, nerr;
2586 if (bLocalCG == NULL)
2590 for (i = 0; i < dd->ncg_tot; i++)
2592 if (!bLocalCG[dd->index_gl[i]])
2595 "DD rank %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2600 for (i = 0; i < ncg_sys; i++)
2607 if (ngl != dd->ncg_tot)
2609 fprintf(stderr, "DD rank %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2616 static void check_index_consistency(gmx_domdec_t *dd,
2617 int natoms_sys, int ncg_sys,
2620 int nerr, ngl, i, a, cell;
2625 if (dd->comm->DD_debug > 1)
2627 snew(have, natoms_sys);
2628 for (a = 0; a < dd->nat_tot; a++)
2630 if (have[dd->gatindex[a]] > 0)
2632 fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2636 have[dd->gatindex[a]] = a + 1;
2642 snew(have, dd->nat_tot);
2645 for (i = 0; i < natoms_sys; i++)
2647 if (ga2la_get(dd->ga2la, i, &a, &cell))
2649 if (a >= dd->nat_tot)
2651 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2657 if (dd->gatindex[a] != i)
2659 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2666 if (ngl != dd->nat_tot)
2669 "DD rank %d, %s: %d global atom indices, %d local atoms\n",
2670 dd->rank, where, ngl, dd->nat_tot);
2672 for (a = 0; a < dd->nat_tot; a++)
2677 "DD rank %d, %s: local atom %d, global %d has no global index\n",
2678 dd->rank, where, a+1, dd->gatindex[a]+1);
2683 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2687 gmx_fatal(FARGS, "DD rank %d, %s: %d atom/cg index inconsistencies",
2688 dd->rank, where, nerr);
2692 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2699 /* Clear the whole list without searching */
2700 ga2la_clear(dd->ga2la);
2704 for (i = a_start; i < dd->nat_tot; i++)
2706 ga2la_del(dd->ga2la, dd->gatindex[i]);
2710 bLocalCG = dd->comm->bLocalCG;
2713 for (i = cg_start; i < dd->ncg_tot; i++)
2715 bLocalCG[dd->index_gl[i]] = FALSE;
2719 dd_clear_local_vsite_indices(dd);
2721 if (dd->constraints)
2723 dd_clear_local_constraint_indices(dd);
2727 /* This function should be used for moving the domain boudaries during DLB,
2728 * for obtaining the minimum cell size. It checks the initially set limit
2729 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2730 * and, possibly, a longer cut-off limit set for PME load balancing.
2732 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2736 cellsize_min = comm->cellsize_min[dim];
2738 if (!comm->bVacDLBNoLimit)
2740 /* The cut-off might have changed, e.g. by PME load balacning,
2741 * from the value used to set comm->cellsize_min, so check it.
2743 cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2745 if (comm->bPMELoadBalDLBLimits)
2747 /* Check for the cut-off limit set by the PME load balancing */
2748 cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2752 return cellsize_min;
2755 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2758 real grid_jump_limit;
2760 /* The distance between the boundaries of cells at distance
2761 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2762 * and by the fact that cells should not be shifted by more than
2763 * half their size, such that cg's only shift by one cell
2764 * at redecomposition.
2766 grid_jump_limit = comm->cellsize_limit;
2767 if (!comm->bVacDLBNoLimit)
2769 if (comm->bPMELoadBalDLBLimits)
2771 cutoff = max(cutoff, comm->PMELoadBal_max_cutoff);
2773 grid_jump_limit = max(grid_jump_limit,
2774 cutoff/comm->cd[dim_ind].np);
2777 return grid_jump_limit;
2780 static gmx_bool check_grid_jump(gmx_int64_t step,
2786 gmx_domdec_comm_t *comm;
2795 for (d = 1; d < dd->ndim; d++)
2798 limit = grid_jump_limit(comm, cutoff, d);
2799 bfac = ddbox->box_size[dim];
2800 if (ddbox->tric_dir[dim])
2802 bfac *= ddbox->skew_fac[dim];
2804 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2805 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2813 /* This error should never be triggered under normal
2814 * circumstances, but you never know ...
2816 gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
2817 gmx_step_str(step, buf),
2818 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2826 static int dd_load_count(gmx_domdec_comm_t *comm)
2828 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2831 static float dd_force_load(gmx_domdec_comm_t *comm)
2838 if (comm->eFlop > 1)
2840 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2845 load = comm->cycl[ddCyclF];
2846 if (comm->cycl_n[ddCyclF] > 1)
2848 /* Subtract the maximum of the last n cycle counts
2849 * to get rid of possible high counts due to other sources,
2850 * for instance system activity, that would otherwise
2851 * affect the dynamic load balancing.
2853 load -= comm->cycl_max[ddCyclF];
2857 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2859 float gpu_wait, gpu_wait_sum;
2861 gpu_wait = comm->cycl[ddCyclWaitGPU];
2862 if (comm->cycl_n[ddCyclF] > 1)
2864 /* We should remove the WaitGPU time of the same MD step
2865 * as the one with the maximum F time, since the F time
2866 * and the wait time are not independent.
2867 * Furthermore, the step for the max F time should be chosen
2868 * the same on all ranks that share the same GPU.
2869 * But to keep the code simple, we remove the average instead.
2870 * The main reason for artificially long times at some steps
2871 * is spurious CPU activity or MPI time, so we don't expect
2872 * that changes in the GPU wait time matter a lot here.
2874 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2876 /* Sum the wait times over the ranks that share the same GPU */
2877 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2878 comm->mpi_comm_gpu_shared);
2879 /* Replace the wait time by the average over the ranks */
2880 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2888 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2890 gmx_domdec_comm_t *comm;
2895 snew(*dim_f, dd->nc[dim]+1);
2897 for (i = 1; i < dd->nc[dim]; i++)
2899 if (comm->slb_frac[dim])
2901 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2905 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2908 (*dim_f)[dd->nc[dim]] = 1;
2911 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2913 int pmeindex, slab, nso, i;
2916 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2922 ddpme->dim = dimind;
2924 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2926 ddpme->nslab = (ddpme->dim == 0 ?
2927 dd->comm->npmenodes_x :
2928 dd->comm->npmenodes_y);
2930 if (ddpme->nslab <= 1)
2935 nso = dd->comm->npmenodes/ddpme->nslab;
2936 /* Determine for each PME slab the PP location range for dimension dim */
2937 snew(ddpme->pp_min, ddpme->nslab);
2938 snew(ddpme->pp_max, ddpme->nslab);
2939 for (slab = 0; slab < ddpme->nslab; slab++)
2941 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2942 ddpme->pp_max[slab] = 0;
2944 for (i = 0; i < dd->nnodes; i++)
2946 ddindex2xyz(dd->nc, i, xyz);
2947 /* For y only use our y/z slab.
2948 * This assumes that the PME x grid size matches the DD grid size.
2950 if (dimind == 0 || xyz[XX] == dd->ci[XX])
2952 pmeindex = ddindex2pmeindex(dd, i);
2955 slab = pmeindex/nso;
2959 slab = pmeindex % ddpme->nslab;
2961 ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]);
2962 ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]);
2966 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
2969 int dd_pme_maxshift_x(gmx_domdec_t *dd)
2971 if (dd->comm->ddpme[0].dim == XX)
2973 return dd->comm->ddpme[0].maxshift;
2981 int dd_pme_maxshift_y(gmx_domdec_t *dd)
2983 if (dd->comm->ddpme[0].dim == YY)
2985 return dd->comm->ddpme[0].maxshift;
2987 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
2989 return dd->comm->ddpme[1].maxshift;
2997 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
2998 gmx_bool bUniform, gmx_ddbox_t *ddbox, real *cell_f)
3000 gmx_domdec_comm_t *comm;
3003 real range, pme_boundary;
3007 nc = dd->nc[ddpme->dim];
3010 if (!ddpme->dim_match)
3012 /* PP decomposition is not along dim: the worst situation */
3015 else if (ns <= 3 || (bUniform && ns == nc))
3017 /* The optimal situation */
3022 /* We need to check for all pme nodes which nodes they
3023 * could possibly need to communicate with.
3025 xmin = ddpme->pp_min;
3026 xmax = ddpme->pp_max;
3027 /* Allow for atoms to be maximally 2/3 times the cut-off
3028 * out of their DD cell. This is a reasonable balance between
3029 * between performance and support for most charge-group/cut-off
3032 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
3033 /* Avoid extra communication when we are exactly at a boundary */
3037 for (s = 0; s < ns; s++)
3039 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
3040 pme_boundary = (real)s/ns;
3043 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
3045 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
3049 pme_boundary = (real)(s+1)/ns;
3052 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
3054 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
3061 ddpme->maxshift = sh;
3065 fprintf(debug, "PME slab communication range for dim %d is %d\n",
3066 ddpme->dim, ddpme->maxshift);
3070 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3074 for (d = 0; d < dd->ndim; d++)
3077 if (dim < ddbox->nboundeddim &&
3078 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
3079 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
3081 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
3082 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3083 dd->nc[dim], dd->comm->cellsize_limit);
3089 setcellsizeslbLOCAL, setcellsizeslbMASTER, setcellsizeslbPULSE_ONLY
3092 /* Set the domain boundaries. Use for static (or no) load balancing,
3093 * and also for the starting state for dynamic load balancing.
3094 * setmode determine if and where the boundaries are stored, use enum above.
3095 * Returns the number communication pulses in npulse.
3097 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
3098 int setmode, ivec npulse)
3100 gmx_domdec_comm_t *comm;
3103 real *cell_x, cell_dx, cellsize;
3107 for (d = 0; d < DIM; d++)
3109 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
3111 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
3114 cell_dx = ddbox->box_size[d]/dd->nc[d];
3117 case setcellsizeslbMASTER:
3118 for (j = 0; j < dd->nc[d]+1; j++)
3120 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
3123 case setcellsizeslbLOCAL:
3124 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
3125 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
3130 cellsize = cell_dx*ddbox->skew_fac[d];
3131 while (cellsize*npulse[d] < comm->cutoff)
3135 cellsize_min[d] = cellsize;
3139 /* Statically load balanced grid */
3140 /* Also when we are not doing a master distribution we determine
3141 * all cell borders in a loop to obtain identical values
3142 * to the master distribution case and to determine npulse.
3144 if (setmode == setcellsizeslbMASTER)
3146 cell_x = dd->ma->cell_x[d];
3150 snew(cell_x, dd->nc[d]+1);
3152 cell_x[0] = ddbox->box0[d];
3153 for (j = 0; j < dd->nc[d]; j++)
3155 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
3156 cell_x[j+1] = cell_x[j] + cell_dx;
3157 cellsize = cell_dx*ddbox->skew_fac[d];
3158 while (cellsize*npulse[d] < comm->cutoff &&
3159 npulse[d] < dd->nc[d]-1)
3163 cellsize_min[d] = min(cellsize_min[d], cellsize);
3165 if (setmode == setcellsizeslbLOCAL)
3167 comm->cell_x0[d] = cell_x[dd->ci[d]];
3168 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
3170 if (setmode != setcellsizeslbMASTER)
3175 /* The following limitation is to avoid that a cell would receive
3176 * some of its own home charge groups back over the periodic boundary.
3177 * Double charge groups cause trouble with the global indices.
3179 if (d < ddbox->npbcdim &&
3180 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
3182 char error_string[STRLEN];
3184 sprintf(error_string,
3185 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3186 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
3188 dd->nc[d], dd->nc[d],
3189 dd->nnodes > dd->nc[d] ? "cells" : "ranks");
3191 if (setmode == setcellsizeslbLOCAL)
3193 gmx_fatal_collective(FARGS, NULL, dd, error_string);
3197 gmx_fatal(FARGS, error_string);
3202 if (!comm->bDynLoadBal)
3204 copy_rvec(cellsize_min, comm->cellsize_min);
3207 for (d = 0; d < comm->npmedecompdim; d++)
3209 set_pme_maxshift(dd, &comm->ddpme[d],
3210 comm->slb_frac[dd->dim[d]] == NULL, ddbox,
3211 comm->ddpme[d].slb_dim_f);
3216 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
3217 int d, int dim, gmx_domdec_root_t *root,
3219 gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
3221 gmx_domdec_comm_t *comm;
3222 int ncd, i, j, nmin, nmin_old;
3223 gmx_bool bLimLo, bLimHi;
3225 real fac, halfway, cellsize_limit_f_i, region_size;
3226 gmx_bool bPBC, bLastHi = FALSE;
3227 int nrange[] = {range[0], range[1]};
3229 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
3235 bPBC = (dim < ddbox->npbcdim);
3237 cell_size = root->buf_ncd;
3241 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
3244 /* First we need to check if the scaling does not make cells
3245 * smaller than the smallest allowed size.
3246 * We need to do this iteratively, since if a cell is too small,
3247 * it needs to be enlarged, which makes all the other cells smaller,
3248 * which could in turn make another cell smaller than allowed.
3250 for (i = range[0]; i < range[1]; i++)
3252 root->bCellMin[i] = FALSE;
3258 /* We need the total for normalization */
3260 for (i = range[0]; i < range[1]; i++)
3262 if (root->bCellMin[i] == FALSE)
3264 fac += cell_size[i];
3267 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3268 /* Determine the cell boundaries */
3269 for (i = range[0]; i < range[1]; i++)
3271 if (root->bCellMin[i] == FALSE)
3273 cell_size[i] *= fac;
3274 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3276 cellsize_limit_f_i = 0;
3280 cellsize_limit_f_i = cellsize_limit_f;
3282 if (cell_size[i] < cellsize_limit_f_i)
3284 root->bCellMin[i] = TRUE;
3285 cell_size[i] = cellsize_limit_f_i;
3289 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3292 while (nmin > nmin_old);
3295 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3296 /* For this check we should not use DD_CELL_MARGIN,
3297 * but a slightly smaller factor,
3298 * since rounding could get use below the limit.
3300 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3303 gmx_fatal(FARGS, "Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3304 gmx_step_str(step, buf),
3305 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3306 ncd, comm->cellsize_min[dim]);
3309 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
3313 /* Check if the boundary did not displace more than halfway
3314 * each of the cells it bounds, as this could cause problems,
3315 * especially when the differences between cell sizes are large.
3316 * If changes are applied, they will not make cells smaller
3317 * than the cut-off, as we check all the boundaries which
3318 * might be affected by a change and if the old state was ok,
3319 * the cells will at most be shrunk back to their old size.
3321 for (i = range[0]+1; i < range[1]; i++)
3323 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3324 if (root->cell_f[i] < halfway)
3326 root->cell_f[i] = halfway;
3327 /* Check if the change also causes shifts of the next boundaries */
3328 for (j = i+1; j < range[1]; j++)
3330 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3332 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3336 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3337 if (root->cell_f[i] > halfway)
3339 root->cell_f[i] = halfway;
3340 /* Check if the change also causes shifts of the next boundaries */
3341 for (j = i-1; j >= range[0]+1; j--)
3343 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3345 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3352 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3353 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3354 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3355 * for a and b nrange is used */
3358 /* Take care of the staggering of the cell boundaries */
3361 for (i = range[0]; i < range[1]; i++)
3363 root->cell_f_max0[i] = root->cell_f[i];
3364 root->cell_f_min1[i] = root->cell_f[i+1];
3369 for (i = range[0]+1; i < range[1]; i++)
3371 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3372 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3373 if (bLimLo && bLimHi)
3375 /* Both limits violated, try the best we can */
3376 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3377 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3378 nrange[0] = range[0];
3380 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3383 nrange[1] = range[1];
3384 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3390 /* root->cell_f[i] = root->bound_min[i]; */
3391 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3394 else if (bLimHi && !bLastHi)
3397 if (nrange[1] < range[1]) /* found a LimLo before */
3399 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3400 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3401 nrange[0] = nrange[1];
3403 root->cell_f[i] = root->bound_max[i];
3405 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3407 nrange[1] = range[1];
3410 if (nrange[1] < range[1]) /* found last a LimLo */
3412 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3413 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3414 nrange[0] = nrange[1];
3415 nrange[1] = range[1];
3416 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3418 else if (nrange[0] > range[0]) /* found at least one LimHi */
3420 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3427 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3428 int d, int dim, gmx_domdec_root_t *root,
3429 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3430 gmx_bool bUniform, gmx_int64_t step)
3432 gmx_domdec_comm_t *comm;
3433 int ncd, d1, i, j, pos;
3435 real load_aver, load_i, imbalance, change, change_max, sc;
3436 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3440 int range[] = { 0, 0 };
3444 /* Convert the maximum change from the input percentage to a fraction */
3445 change_limit = comm->dlb_scale_lim*0.01;
3449 bPBC = (dim < ddbox->npbcdim);
3451 cell_size = root->buf_ncd;
3453 /* Store the original boundaries */
3454 for (i = 0; i < ncd+1; i++)
3456 root->old_cell_f[i] = root->cell_f[i];
3460 for (i = 0; i < ncd; i++)
3462 cell_size[i] = 1.0/ncd;
3465 else if (dd_load_count(comm))
3467 load_aver = comm->load[d].sum_m/ncd;
3469 for (i = 0; i < ncd; i++)
3471 /* Determine the relative imbalance of cell i */
3472 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3473 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3474 /* Determine the change of the cell size using underrelaxation */
3475 change = -relax*imbalance;
3476 change_max = max(change_max, max(change, -change));
3478 /* Limit the amount of scaling.
3479 * We need to use the same rescaling for all cells in one row,
3480 * otherwise the load balancing might not converge.
3483 if (change_max > change_limit)
3485 sc *= change_limit/change_max;
3487 for (i = 0; i < ncd; i++)
3489 /* Determine the relative imbalance of cell i */
3490 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3491 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3492 /* Determine the change of the cell size using underrelaxation */
3493 change = -sc*imbalance;
3494 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3498 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3499 cellsize_limit_f *= DD_CELL_MARGIN;
3500 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3501 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3502 if (ddbox->tric_dir[dim])
3504 cellsize_limit_f /= ddbox->skew_fac[dim];
3505 dist_min_f /= ddbox->skew_fac[dim];
3507 if (bDynamicBox && d > 0)
3509 dist_min_f *= DD_PRES_SCALE_MARGIN;
3511 if (d > 0 && !bUniform)
3513 /* Make sure that the grid is not shifted too much */
3514 for (i = 1; i < ncd; i++)
3516 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3518 gmx_incons("Inconsistent DD boundary staggering limits!");
3520 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3521 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3524 root->bound_min[i] += 0.5*space;
3526 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3527 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3530 root->bound_max[i] += 0.5*space;
3535 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3537 root->cell_f_max0[i-1] + dist_min_f,
3538 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3539 root->cell_f_min1[i] - dist_min_f);
3544 root->cell_f[0] = 0;
3545 root->cell_f[ncd] = 1;
3546 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3549 /* After the checks above, the cells should obey the cut-off
3550 * restrictions, but it does not hurt to check.
3552 for (i = 0; i < ncd; i++)
3556 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3557 dim, i, root->cell_f[i], root->cell_f[i+1]);
3560 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3561 root->cell_f[i+1] - root->cell_f[i] <
3562 cellsize_limit_f/DD_CELL_MARGIN)
3566 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3567 gmx_step_str(step, buf), dim2char(dim), i,
3568 (root->cell_f[i+1] - root->cell_f[i])
3569 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3574 /* Store the cell boundaries of the lower dimensions at the end */
3575 for (d1 = 0; d1 < d; d1++)
3577 root->cell_f[pos++] = comm->cell_f0[d1];
3578 root->cell_f[pos++] = comm->cell_f1[d1];
3581 if (d < comm->npmedecompdim)
3583 /* The master determines the maximum shift for
3584 * the coordinate communication between separate PME nodes.
3586 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3588 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3591 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3595 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3596 gmx_ddbox_t *ddbox, int dimind)
3598 gmx_domdec_comm_t *comm;
3603 /* Set the cell dimensions */
3604 dim = dd->dim[dimind];
3605 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3606 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3607 if (dim >= ddbox->nboundeddim)
3609 comm->cell_x0[dim] += ddbox->box0[dim];
3610 comm->cell_x1[dim] += ddbox->box0[dim];
3614 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3615 int d, int dim, real *cell_f_row,
3618 gmx_domdec_comm_t *comm;
3624 /* Each node would only need to know two fractions,
3625 * but it is probably cheaper to broadcast the whole array.
3627 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3628 0, comm->mpi_comm_load[d]);
3630 /* Copy the fractions for this dimension from the buffer */
3631 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3632 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3633 /* The whole array was communicated, so set the buffer position */
3634 pos = dd->nc[dim] + 1;
3635 for (d1 = 0; d1 <= d; d1++)
3639 /* Copy the cell fractions of the lower dimensions */
3640 comm->cell_f0[d1] = cell_f_row[pos++];
3641 comm->cell_f1[d1] = cell_f_row[pos++];
3643 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3645 /* Convert the communicated shift from float to int */
3646 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3649 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3653 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3654 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3655 gmx_bool bUniform, gmx_int64_t step)
3657 gmx_domdec_comm_t *comm;
3659 gmx_bool bRowMember, bRowRoot;
3664 for (d = 0; d < dd->ndim; d++)
3669 for (d1 = d; d1 < dd->ndim; d1++)
3671 if (dd->ci[dd->dim[d1]] > 0)
3684 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3685 ddbox, bDynamicBox, bUniform, step);
3686 cell_f_row = comm->root[d]->cell_f;
3690 cell_f_row = comm->cell_f_row;
3692 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3697 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3701 /* This function assumes the box is static and should therefore
3702 * not be called when the box has changed since the last
3703 * call to dd_partition_system.
3705 for (d = 0; d < dd->ndim; d++)
3707 relative_to_absolute_cell_bounds(dd, ddbox, d);
3713 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3714 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3715 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3716 gmx_wallcycle_t wcycle)
3718 gmx_domdec_comm_t *comm;
3725 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3726 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3727 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3729 else if (bDynamicBox)
3731 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3734 /* Set the dimensions for which no DD is used */
3735 for (dim = 0; dim < DIM; dim++)
3737 if (dd->nc[dim] == 1)
3739 comm->cell_x0[dim] = 0;
3740 comm->cell_x1[dim] = ddbox->box_size[dim];
3741 if (dim >= ddbox->nboundeddim)
3743 comm->cell_x0[dim] += ddbox->box0[dim];
3744 comm->cell_x1[dim] += ddbox->box0[dim];
3750 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3753 gmx_domdec_comm_dim_t *cd;
3755 for (d = 0; d < dd->ndim; d++)
3757 cd = &dd->comm->cd[d];
3758 np = npulse[dd->dim[d]];
3759 if (np > cd->np_nalloc)
3763 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3764 dim2char(dd->dim[d]), np);
3766 if (DDMASTER(dd) && cd->np_nalloc > 0)
3768 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3770 srenew(cd->ind, np);
3771 for (i = cd->np_nalloc; i < np; i++)
3773 cd->ind[i].index = NULL;
3774 cd->ind[i].nalloc = 0;
3783 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3784 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3785 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3786 gmx_wallcycle_t wcycle)
3788 gmx_domdec_comm_t *comm;
3794 /* Copy the old cell boundaries for the cg displacement check */
3795 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3796 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3798 if (comm->bDynLoadBal)
3802 check_box_size(dd, ddbox);
3804 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3808 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbLOCAL, npulse);
3809 realloc_comm_ind(dd, npulse);
3814 for (d = 0; d < DIM; d++)
3816 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3817 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3822 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3824 rvec cell_ns_x0, rvec cell_ns_x1,
3827 gmx_domdec_comm_t *comm;
3832 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3834 dim = dd->dim[dim_ind];
3836 /* Without PBC we don't have restrictions on the outer cells */
3837 if (!(dim >= ddbox->npbcdim &&
3838 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3839 comm->bDynLoadBal &&
3840 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3841 comm->cellsize_min[dim])
3844 gmx_fatal(FARGS, "Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3845 gmx_step_str(step, buf), dim2char(dim),
3846 comm->cell_x1[dim] - comm->cell_x0[dim],
3847 ddbox->skew_fac[dim],
3848 dd->comm->cellsize_min[dim],
3849 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3853 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3855 /* Communicate the boundaries and update cell_ns_x0/1 */
3856 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3857 if (dd->bGridJump && dd->ndim > 1)
3859 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3864 static void make_tric_corr_matrix(int npbcdim, matrix box, matrix tcm)
3868 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3876 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3877 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3886 static void check_screw_box(matrix box)
3888 /* Mathematical limitation */
3889 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3891 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3894 /* Limitation due to the asymmetry of the eighth shell method */
3895 if (box[ZZ][YY] != 0)
3897 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3901 static void distribute_cg(FILE *fplog, gmx_int64_t step,
3902 matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3905 gmx_domdec_master_t *ma;
3906 int **tmp_ind = NULL, *tmp_nalloc = NULL;
3907 int i, icg, j, k, k0, k1, d, npbcdim;
3909 rvec box_size, cg_cm;
3911 real nrcg, inv_ncg, pos_d;
3913 gmx_bool bUnbounded, bScrew;
3917 if (tmp_ind == NULL)
3919 snew(tmp_nalloc, dd->nnodes);
3920 snew(tmp_ind, dd->nnodes);
3921 for (i = 0; i < dd->nnodes; i++)
3923 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3924 snew(tmp_ind[i], tmp_nalloc[i]);
3928 /* Clear the count */
3929 for (i = 0; i < dd->nnodes; i++)
3935 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3937 cgindex = cgs->index;
3939 /* Compute the center of geometry for all charge groups */
3940 for (icg = 0; icg < cgs->nr; icg++)
3943 k1 = cgindex[icg+1];
3947 copy_rvec(pos[k0], cg_cm);
3954 for (k = k0; (k < k1); k++)
3956 rvec_inc(cg_cm, pos[k]);
3958 for (d = 0; (d < DIM); d++)
3960 cg_cm[d] *= inv_ncg;
3963 /* Put the charge group in the box and determine the cell index */
3964 for (d = DIM-1; d >= 0; d--)
3967 if (d < dd->npbcdim)
3969 bScrew = (dd->bScrewPBC && d == XX);
3970 if (tric_dir[d] && dd->nc[d] > 1)
3972 /* Use triclinic coordintates for this dimension */
3973 for (j = d+1; j < DIM; j++)
3975 pos_d += cg_cm[j]*tcm[j][d];
3978 while (pos_d >= box[d][d])
3981 rvec_dec(cg_cm, box[d]);
3984 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3985 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3987 for (k = k0; (k < k1); k++)
3989 rvec_dec(pos[k], box[d]);
3992 pos[k][YY] = box[YY][YY] - pos[k][YY];
3993 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4000 rvec_inc(cg_cm, box[d]);
4003 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
4004 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
4006 for (k = k0; (k < k1); k++)
4008 rvec_inc(pos[k], box[d]);
4011 pos[k][YY] = box[YY][YY] - pos[k][YY];
4012 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4017 /* This could be done more efficiently */
4019 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
4024 i = dd_index(dd->nc, ind);
4025 if (ma->ncg[i] == tmp_nalloc[i])
4027 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
4028 srenew(tmp_ind[i], tmp_nalloc[i]);
4030 tmp_ind[i][ma->ncg[i]] = icg;
4032 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
4036 for (i = 0; i < dd->nnodes; i++)
4039 for (k = 0; k < ma->ncg[i]; k++)
4041 ma->cg[k1++] = tmp_ind[i][k];
4044 ma->index[dd->nnodes] = k1;
4046 for (i = 0; i < dd->nnodes; i++)
4056 fprintf(fplog, "Charge group distribution at step %s:",
4057 gmx_step_str(step, buf));
4058 for (i = 0; i < dd->nnodes; i++)
4060 fprintf(fplog, " %d", ma->ncg[i]);
4062 fprintf(fplog, "\n");
4066 static void get_cg_distribution(FILE *fplog, gmx_int64_t step, gmx_domdec_t *dd,
4067 t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
4070 gmx_domdec_master_t *ma = NULL;
4073 int *ibuf, buf2[2] = { 0, 0 };
4074 gmx_bool bMaster = DDMASTER(dd);
4082 check_screw_box(box);
4085 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbMASTER, npulse);
4087 distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
4088 for (i = 0; i < dd->nnodes; i++)
4090 ma->ibuf[2*i] = ma->ncg[i];
4091 ma->ibuf[2*i+1] = ma->nat[i];
4099 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
4101 dd->ncg_home = buf2[0];
4102 dd->nat_home = buf2[1];
4103 dd->ncg_tot = dd->ncg_home;
4104 dd->nat_tot = dd->nat_home;
4105 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
4107 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
4108 srenew(dd->index_gl, dd->cg_nalloc);
4109 srenew(dd->cgindex, dd->cg_nalloc+1);
4113 for (i = 0; i < dd->nnodes; i++)
4115 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
4116 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
4121 DDMASTER(dd) ? ma->ibuf : NULL,
4122 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
4123 DDMASTER(dd) ? ma->cg : NULL,
4124 dd->ncg_home*sizeof(int), dd->index_gl);
4126 /* Determine the home charge group sizes */
4128 for (i = 0; i < dd->ncg_home; i++)
4130 cg_gl = dd->index_gl[i];
4132 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
4137 fprintf(debug, "Home charge groups:\n");
4138 for (i = 0; i < dd->ncg_home; i++)
4140 fprintf(debug, " %d", dd->index_gl[i]);
4143 fprintf(debug, "\n");
4146 fprintf(debug, "\n");
4150 static int compact_and_copy_vec_at(int ncg, int *move,
4153 rvec *src, gmx_domdec_comm_t *comm,
4156 int m, icg, i, i0, i1, nrcg;
4162 for (m = 0; m < DIM*2; m++)
4168 for (icg = 0; icg < ncg; icg++)
4170 i1 = cgindex[icg+1];
4176 /* Compact the home array in place */
4177 for (i = i0; i < i1; i++)
4179 copy_rvec(src[i], src[home_pos++]);
4185 /* Copy to the communication buffer */
4187 pos_vec[m] += 1 + vec*nrcg;
4188 for (i = i0; i < i1; i++)
4190 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
4192 pos_vec[m] += (nvec - vec - 1)*nrcg;
4196 home_pos += i1 - i0;
4204 static int compact_and_copy_vec_cg(int ncg, int *move,
4206 int nvec, rvec *src, gmx_domdec_comm_t *comm,
4209 int m, icg, i0, i1, nrcg;
4215 for (m = 0; m < DIM*2; m++)
4221 for (icg = 0; icg < ncg; icg++)
4223 i1 = cgindex[icg+1];
4229 /* Compact the home array in place */
4230 copy_rvec(src[icg], src[home_pos++]);
4236 /* Copy to the communication buffer */
4237 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
4238 pos_vec[m] += 1 + nrcg*nvec;
4250 static int compact_ind(int ncg, int *move,
4251 int *index_gl, int *cgindex,
4253 gmx_ga2la_t ga2la, char *bLocalCG,
4256 int cg, nat, a0, a1, a, a_gl;
4261 for (cg = 0; cg < ncg; cg++)
4267 /* Compact the home arrays in place.
4268 * Anything that can be done here avoids access to global arrays.
4270 cgindex[home_pos] = nat;
4271 for (a = a0; a < a1; a++)
4274 gatindex[nat] = a_gl;
4275 /* The cell number stays 0, so we don't need to set it */
4276 ga2la_change_la(ga2la, a_gl, nat);
4279 index_gl[home_pos] = index_gl[cg];
4280 cginfo[home_pos] = cginfo[cg];
4281 /* The charge group remains local, so bLocalCG does not change */
4286 /* Clear the global indices */
4287 for (a = a0; a < a1; a++)
4289 ga2la_del(ga2la, gatindex[a]);
4293 bLocalCG[index_gl[cg]] = FALSE;
4297 cgindex[home_pos] = nat;
4302 static void clear_and_mark_ind(int ncg, int *move,
4303 int *index_gl, int *cgindex, int *gatindex,
4304 gmx_ga2la_t ga2la, char *bLocalCG,
4309 for (cg = 0; cg < ncg; cg++)
4315 /* Clear the global indices */
4316 for (a = a0; a < a1; a++)
4318 ga2la_del(ga2la, gatindex[a]);
4322 bLocalCG[index_gl[cg]] = FALSE;
4324 /* Signal that this cg has moved using the ns cell index.
4325 * Here we set it to -1. fill_grid will change it
4326 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4328 cell_index[cg] = -1;
4333 static void print_cg_move(FILE *fplog,
4335 gmx_int64_t step, int cg, int dim, int dir,
4336 gmx_bool bHaveCgcmOld, real limitd,
4337 rvec cm_old, rvec cm_new, real pos_d)
4339 gmx_domdec_comm_t *comm;
4344 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4347 fprintf(fplog, "%s %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4348 dd->comm->bCGs ? "The charge group starting at atom" : "Atom",
4349 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4353 /* We don't have a limiting distance available: don't print it */
4354 fprintf(fplog, "%s %d moved more than the distance allowed by the domain decomposition in direction %c\n",
4355 dd->comm->bCGs ? "The charge group starting at atom" : "Atom",
4356 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4358 fprintf(fplog, "distance out of cell %f\n",
4359 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4362 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4363 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4365 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4366 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4367 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4369 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4370 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4372 comm->cell_x0[dim], comm->cell_x1[dim]);
4375 static void cg_move_error(FILE *fplog,
4377 gmx_int64_t step, int cg, int dim, int dir,
4378 gmx_bool bHaveCgcmOld, real limitd,
4379 rvec cm_old, rvec cm_new, real pos_d)
4383 print_cg_move(fplog, dd, step, cg, dim, dir,
4384 bHaveCgcmOld, limitd, cm_old, cm_new, pos_d);
4386 print_cg_move(stderr, dd, step, cg, dim, dir,
4387 bHaveCgcmOld, limitd, cm_old, cm_new, pos_d);
4389 "%s moved too far between two domain decomposition steps\n"
4390 "This usually means that your system is not well equilibrated",
4391 dd->comm->bCGs ? "A charge group" : "An atom");
4394 static void rotate_state_atom(t_state *state, int a)
4398 for (est = 0; est < estNR; est++)
4400 if (EST_DISTR(est) && (state->flags & (1<<est)))
4405 /* Rotate the complete state; for a rectangular box only */
4406 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4407 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4410 state->v[a][YY] = -state->v[a][YY];
4411 state->v[a][ZZ] = -state->v[a][ZZ];
4414 state->sd_X[a][YY] = -state->sd_X[a][YY];
4415 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4418 state->cg_p[a][YY] = -state->cg_p[a][YY];
4419 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4421 case estDISRE_INITF:
4422 case estDISRE_RM3TAV:
4423 case estORIRE_INITF:
4425 /* These are distances, so not affected by rotation */
4428 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4434 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4436 if (natoms > comm->moved_nalloc)
4438 /* Contents should be preserved here */
4439 comm->moved_nalloc = over_alloc_dd(natoms);
4440 srenew(comm->moved, comm->moved_nalloc);
4446 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
4449 ivec tric_dir, matrix tcm,
4450 rvec cell_x0, rvec cell_x1,
4451 rvec limitd, rvec limit0, rvec limit1,
4453 int cg_start, int cg_end,
4458 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4459 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4463 real inv_ncg, pos_d;
4466 npbcdim = dd->npbcdim;
4468 for (cg = cg_start; cg < cg_end; cg++)
4475 copy_rvec(state->x[k0], cm_new);
4482 for (k = k0; (k < k1); k++)
4484 rvec_inc(cm_new, state->x[k]);
4486 for (d = 0; (d < DIM); d++)
4488 cm_new[d] = inv_ncg*cm_new[d];
4493 /* Do pbc and check DD cell boundary crossings */
4494 for (d = DIM-1; d >= 0; d--)
4498 bScrew = (dd->bScrewPBC && d == XX);
4499 /* Determine the location of this cg in lattice coordinates */
4503 for (d2 = d+1; d2 < DIM; d2++)
4505 pos_d += cm_new[d2]*tcm[d2][d];
4508 /* Put the charge group in the triclinic unit-cell */
4509 if (pos_d >= cell_x1[d])
4511 if (pos_d >= limit1[d])
4513 cg_move_error(fplog, dd, step, cg, d, 1,
4514 cg_cm != state->x, limitd[d],
4515 cg_cm[cg], cm_new, pos_d);
4518 if (dd->ci[d] == dd->nc[d] - 1)
4520 rvec_dec(cm_new, state->box[d]);
4523 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4524 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4526 for (k = k0; (k < k1); k++)
4528 rvec_dec(state->x[k], state->box[d]);
4531 rotate_state_atom(state, k);
4536 else if (pos_d < cell_x0[d])
4538 if (pos_d < limit0[d])
4540 cg_move_error(fplog, dd, step, cg, d, -1,
4541 cg_cm != state->x, limitd[d],
4542 cg_cm[cg], cm_new, pos_d);
4547 rvec_inc(cm_new, state->box[d]);
4550 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4551 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4553 for (k = k0; (k < k1); k++)
4555 rvec_inc(state->x[k], state->box[d]);
4558 rotate_state_atom(state, k);
4564 else if (d < npbcdim)
4566 /* Put the charge group in the rectangular unit-cell */
4567 while (cm_new[d] >= state->box[d][d])
4569 rvec_dec(cm_new, state->box[d]);
4570 for (k = k0; (k < k1); k++)
4572 rvec_dec(state->x[k], state->box[d]);
4575 while (cm_new[d] < 0)
4577 rvec_inc(cm_new, state->box[d]);
4578 for (k = k0; (k < k1); k++)
4580 rvec_inc(state->x[k], state->box[d]);
4586 copy_rvec(cm_new, cg_cm[cg]);
4588 /* Determine where this cg should go */
4591 for (d = 0; d < dd->ndim; d++)
4596 flag |= DD_FLAG_FW(d);
4602 else if (dev[dim] == -1)
4604 flag |= DD_FLAG_BW(d);
4607 if (dd->nc[dim] > 2)
4618 /* Temporarily store the flag in move */
4619 move[cg] = mc + flag;
4623 static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
4624 gmx_domdec_t *dd, ivec tric_dir,
4625 t_state *state, rvec **f,
4634 int ncg[DIM*2], nat[DIM*2];
4635 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4636 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4637 int sbuf[2], rbuf[2];
4638 int home_pos_cg, home_pos_at, buf_pos;
4640 gmx_bool bV = FALSE, bSDX = FALSE, bCGP = FALSE;
4643 real inv_ncg, pos_d;
4645 rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
4647 cginfo_mb_t *cginfo_mb;
4648 gmx_domdec_comm_t *comm;
4650 int nthread, thread;
4654 check_screw_box(state->box);
4658 if (fr->cutoff_scheme == ecutsGROUP)
4663 for (i = 0; i < estNR; i++)
4669 case estX: /* Always present */ break;
4670 case estV: bV = (state->flags & (1<<i)); break;
4671 case estSDX: bSDX = (state->flags & (1<<i)); break;
4672 case estCGP: bCGP = (state->flags & (1<<i)); break;
4675 case estDISRE_INITF:
4676 case estDISRE_RM3TAV:
4677 case estORIRE_INITF:
4679 /* No processing required */
4682 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4687 if (dd->ncg_tot > comm->nalloc_int)
4689 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4690 srenew(comm->buf_int, comm->nalloc_int);
4692 move = comm->buf_int;
4694 /* Clear the count */
4695 for (c = 0; c < dd->ndim*2; c++)
4701 npbcdim = dd->npbcdim;
4703 for (d = 0; (d < DIM); d++)
4705 limitd[d] = dd->comm->cellsize_min[d];
4706 if (d >= npbcdim && dd->ci[d] == 0)
4708 cell_x0[d] = -GMX_FLOAT_MAX;
4712 cell_x0[d] = comm->cell_x0[d];
4714 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4716 cell_x1[d] = GMX_FLOAT_MAX;
4720 cell_x1[d] = comm->cell_x1[d];
4724 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4725 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4729 /* We check after communication if a charge group moved
4730 * more than one cell. Set the pre-comm check limit to float_max.
4732 limit0[d] = -GMX_FLOAT_MAX;
4733 limit1[d] = GMX_FLOAT_MAX;
4737 make_tric_corr_matrix(npbcdim, state->box, tcm);
4739 cgindex = dd->cgindex;
4741 nthread = gmx_omp_nthreads_get(emntDomdec);
4743 /* Compute the center of geometry for all home charge groups
4744 * and put them in the box and determine where they should go.
4746 #pragma omp parallel for num_threads(nthread) schedule(static)
4747 for (thread = 0; thread < nthread; thread++)
4749 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4750 cell_x0, cell_x1, limitd, limit0, limit1,
4752 ( thread *dd->ncg_home)/nthread,
4753 ((thread+1)*dd->ncg_home)/nthread,
4754 fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
4758 for (cg = 0; cg < dd->ncg_home; cg++)
4763 flag = mc & ~DD_FLAG_NRCG;
4764 mc = mc & DD_FLAG_NRCG;
4767 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4769 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4770 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4772 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4773 /* We store the cg size in the lower 16 bits
4774 * and the place where the charge group should go
4775 * in the next 6 bits. This saves some communication volume.
4777 nrcg = cgindex[cg+1] - cgindex[cg];
4778 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4784 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4785 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4788 for (i = 0; i < dd->ndim*2; i++)
4790 *ncg_moved += ncg[i];
4807 /* Make sure the communication buffers are large enough */
4808 for (mc = 0; mc < dd->ndim*2; mc++)
4810 nvr = ncg[mc] + nat[mc]*nvec;
4811 if (nvr > comm->cgcm_state_nalloc[mc])
4813 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4814 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4818 switch (fr->cutoff_scheme)
4821 /* Recalculating cg_cm might be cheaper than communicating,
4822 * but that could give rise to rounding issues.
4825 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4826 nvec, cg_cm, comm, bCompact);
4829 /* Without charge groups we send the moved atom coordinates
4830 * over twice. This is so the code below can be used without
4831 * many conditionals for both for with and without charge groups.
4834 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4835 nvec, state->x, comm, FALSE);
4838 home_pos_cg -= *ncg_moved;
4842 gmx_incons("unimplemented");
4848 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4849 nvec, vec++, state->x, comm, bCompact);
4852 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4853 nvec, vec++, state->v, comm, bCompact);
4857 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4858 nvec, vec++, state->sd_X, comm, bCompact);
4862 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4863 nvec, vec++, state->cg_p, comm, bCompact);
4868 compact_ind(dd->ncg_home, move,
4869 dd->index_gl, dd->cgindex, dd->gatindex,
4870 dd->ga2la, comm->bLocalCG,
4875 if (fr->cutoff_scheme == ecutsVERLET)
4877 moved = get_moved(comm, dd->ncg_home);
4879 for (k = 0; k < dd->ncg_home; k++)
4886 moved = fr->ns.grid->cell_index;
4889 clear_and_mark_ind(dd->ncg_home, move,
4890 dd->index_gl, dd->cgindex, dd->gatindex,
4891 dd->ga2la, comm->bLocalCG,
4895 cginfo_mb = fr->cginfo_mb;
4897 *ncg_stay_home = home_pos_cg;
4898 for (d = 0; d < dd->ndim; d++)
4904 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4907 /* Communicate the cg and atom counts */
4912 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4913 d, dir, sbuf[0], sbuf[1]);
4915 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4917 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4919 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4920 srenew(comm->buf_int, comm->nalloc_int);
4923 /* Communicate the charge group indices, sizes and flags */
4924 dd_sendrecv_int(dd, d, dir,
4925 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4926 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4928 nvs = ncg[cdd] + nat[cdd]*nvec;
4929 i = rbuf[0] + rbuf[1] *nvec;
4930 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4932 /* Communicate cgcm and state */
4933 dd_sendrecv_rvec(dd, d, dir,
4934 comm->cgcm_state[cdd], nvs,
4935 comm->vbuf.v+nvr, i);
4936 ncg_recv += rbuf[0];
4937 nat_recv += rbuf[1];
4941 /* Process the received charge groups */
4943 for (cg = 0; cg < ncg_recv; cg++)
4945 flag = comm->buf_int[cg*DD_CGIBS+1];
4947 if (dim >= npbcdim && dd->nc[dim] > 2)
4949 /* No pbc in this dim and more than one domain boundary.
4950 * We do a separate check if a charge group didn't move too far.
4952 if (((flag & DD_FLAG_FW(d)) &&
4953 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4954 ((flag & DD_FLAG_BW(d)) &&
4955 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4957 cg_move_error(fplog, dd, step, cg, dim,
4958 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4959 fr->cutoff_scheme == ecutsGROUP, 0,
4960 comm->vbuf.v[buf_pos],
4961 comm->vbuf.v[buf_pos],
4962 comm->vbuf.v[buf_pos][dim]);
4969 /* Check which direction this cg should go */
4970 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4974 /* The cell boundaries for dimension d2 are not equal
4975 * for each cell row of the lower dimension(s),
4976 * therefore we might need to redetermine where
4977 * this cg should go.
4980 /* If this cg crosses the box boundary in dimension d2
4981 * we can use the communicated flag, so we do not
4982 * have to worry about pbc.
4984 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4985 (flag & DD_FLAG_FW(d2))) ||
4986 (dd->ci[dim2] == 0 &&
4987 (flag & DD_FLAG_BW(d2)))))
4989 /* Clear the two flags for this dimension */
4990 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4991 /* Determine the location of this cg
4992 * in lattice coordinates
4994 pos_d = comm->vbuf.v[buf_pos][dim2];
4997 for (d3 = dim2+1; d3 < DIM; d3++)
5000 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
5003 /* Check of we are not at the box edge.
5004 * pbc is only handled in the first step above,
5005 * but this check could move over pbc while
5006 * the first step did not due to different rounding.
5008 if (pos_d >= cell_x1[dim2] &&
5009 dd->ci[dim2] != dd->nc[dim2]-1)
5011 flag |= DD_FLAG_FW(d2);
5013 else if (pos_d < cell_x0[dim2] &&
5016 flag |= DD_FLAG_BW(d2);
5018 comm->buf_int[cg*DD_CGIBS+1] = flag;
5021 /* Set to which neighboring cell this cg should go */
5022 if (flag & DD_FLAG_FW(d2))
5026 else if (flag & DD_FLAG_BW(d2))
5028 if (dd->nc[dd->dim[d2]] > 2)
5040 nrcg = flag & DD_FLAG_NRCG;
5043 if (home_pos_cg+1 > dd->cg_nalloc)
5045 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
5046 srenew(dd->index_gl, dd->cg_nalloc);
5047 srenew(dd->cgindex, dd->cg_nalloc+1);
5049 /* Set the global charge group index and size */
5050 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
5051 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
5052 /* Copy the state from the buffer */
5053 dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
5054 if (fr->cutoff_scheme == ecutsGROUP)
5057 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
5061 /* Set the cginfo */
5062 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
5063 dd->index_gl[home_pos_cg]);
5066 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
5069 if (home_pos_at+nrcg > state->nalloc)
5071 dd_realloc_state(state, f, home_pos_at+nrcg);
5073 for (i = 0; i < nrcg; i++)
5075 copy_rvec(comm->vbuf.v[buf_pos++],
5076 state->x[home_pos_at+i]);
5080 for (i = 0; i < nrcg; i++)
5082 copy_rvec(comm->vbuf.v[buf_pos++],
5083 state->v[home_pos_at+i]);
5088 for (i = 0; i < nrcg; i++)
5090 copy_rvec(comm->vbuf.v[buf_pos++],
5091 state->sd_X[home_pos_at+i]);
5096 for (i = 0; i < nrcg; i++)
5098 copy_rvec(comm->vbuf.v[buf_pos++],
5099 state->cg_p[home_pos_at+i]);
5103 home_pos_at += nrcg;
5107 /* Reallocate the buffers if necessary */
5108 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
5110 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
5111 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
5113 nvr = ncg[mc] + nat[mc]*nvec;
5114 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
5116 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
5117 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
5119 /* Copy from the receive to the send buffers */
5120 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
5121 comm->buf_int + cg*DD_CGIBS,
5122 DD_CGIBS*sizeof(int));
5123 memcpy(comm->cgcm_state[mc][nvr],
5124 comm->vbuf.v[buf_pos],
5125 (1+nrcg*nvec)*sizeof(rvec));
5126 buf_pos += 1 + nrcg*nvec;
5133 /* With sorting (!bCompact) the indices are now only partially up to date
5134 * and ncg_home and nat_home are not the real count, since there are
5135 * "holes" in the arrays for the charge groups that moved to neighbors.
5137 if (fr->cutoff_scheme == ecutsVERLET)
5139 moved = get_moved(comm, home_pos_cg);
5141 for (i = dd->ncg_home; i < home_pos_cg; i++)
5146 dd->ncg_home = home_pos_cg;
5147 dd->nat_home = home_pos_at;
5152 "Finished repartitioning: cgs moved out %d, new home %d\n",
5153 *ncg_moved, dd->ncg_home-*ncg_moved);
5158 void dd_cycles_add(gmx_domdec_t *dd, float cycles, int ddCycl)
5160 dd->comm->cycl[ddCycl] += cycles;
5161 dd->comm->cycl_n[ddCycl]++;
5162 if (cycles > dd->comm->cycl_max[ddCycl])
5164 dd->comm->cycl_max[ddCycl] = cycles;
5168 static double force_flop_count(t_nrnb *nrnb)
5175 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
5177 /* To get closer to the real timings, we half the count
5178 * for the normal loops and again half it for water loops.
5181 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5183 sum += nrnb->n[i]*0.25*cost_nrnb(i);
5187 sum += nrnb->n[i]*0.50*cost_nrnb(i);
5190 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
5193 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5195 sum += nrnb->n[i]*cost_nrnb(i);
5198 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
5200 sum += nrnb->n[i]*cost_nrnb(i);
5206 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
5208 if (dd->comm->eFlop)
5210 dd->comm->flop -= force_flop_count(nrnb);
5213 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
5215 if (dd->comm->eFlop)
5217 dd->comm->flop += force_flop_count(nrnb);
5222 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
5226 for (i = 0; i < ddCyclNr; i++)
5228 dd->comm->cycl[i] = 0;
5229 dd->comm->cycl_n[i] = 0;
5230 dd->comm->cycl_max[i] = 0;
5233 dd->comm->flop_n = 0;
5236 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
5238 gmx_domdec_comm_t *comm;
5239 gmx_domdec_load_t *load;
5240 gmx_domdec_root_t *root = NULL;
5241 int d, dim, cid, i, pos;
5242 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
5247 fprintf(debug, "get_load_distribution start\n");
5250 wallcycle_start(wcycle, ewcDDCOMMLOAD);
5254 bSepPME = (dd->pme_nodeid >= 0);
5256 for (d = dd->ndim-1; d >= 0; d--)
5259 /* Check if we participate in the communication in this dimension */
5260 if (d == dd->ndim-1 ||
5261 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
5263 load = &comm->load[d];
5266 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
5269 if (d == dd->ndim-1)
5271 sbuf[pos++] = dd_force_load(comm);
5272 sbuf[pos++] = sbuf[0];
5275 sbuf[pos++] = sbuf[0];
5276 sbuf[pos++] = cell_frac;
5279 sbuf[pos++] = comm->cell_f_max0[d];
5280 sbuf[pos++] = comm->cell_f_min1[d];
5285 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
5286 sbuf[pos++] = comm->cycl[ddCyclPME];
5291 sbuf[pos++] = comm->load[d+1].sum;
5292 sbuf[pos++] = comm->load[d+1].max;
5295 sbuf[pos++] = comm->load[d+1].sum_m;
5296 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
5297 sbuf[pos++] = comm->load[d+1].flags;
5300 sbuf[pos++] = comm->cell_f_max0[d];
5301 sbuf[pos++] = comm->cell_f_min1[d];
5306 sbuf[pos++] = comm->load[d+1].mdf;
5307 sbuf[pos++] = comm->load[d+1].pme;
5311 /* Communicate a row in DD direction d.
5312 * The communicators are setup such that the root always has rank 0.
5315 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
5316 load->load, load->nload*sizeof(float), MPI_BYTE,
5317 0, comm->mpi_comm_load[d]);
5319 if (dd->ci[dim] == dd->master_ci[dim])
5321 /* We are the root, process this row */
5322 if (comm->bDynLoadBal)
5324 root = comm->root[d];
5334 for (i = 0; i < dd->nc[dim]; i++)
5336 load->sum += load->load[pos++];
5337 load->max = max(load->max, load->load[pos]);
5343 /* This direction could not be load balanced properly,
5344 * therefore we need to use the maximum iso the average load.
5346 load->sum_m = max(load->sum_m, load->load[pos]);
5350 load->sum_m += load->load[pos];
5353 load->cvol_min = min(load->cvol_min, load->load[pos]);
5357 load->flags = (int)(load->load[pos++] + 0.5);
5361 root->cell_f_max0[i] = load->load[pos++];
5362 root->cell_f_min1[i] = load->load[pos++];
5367 load->mdf = max(load->mdf, load->load[pos]);
5369 load->pme = max(load->pme, load->load[pos]);
5373 if (comm->bDynLoadBal && root->bLimited)
5375 load->sum_m *= dd->nc[dim];
5376 load->flags |= (1<<d);
5384 comm->nload += dd_load_count(comm);
5385 comm->load_step += comm->cycl[ddCyclStep];
5386 comm->load_sum += comm->load[0].sum;
5387 comm->load_max += comm->load[0].max;
5388 if (comm->bDynLoadBal)
5390 for (d = 0; d < dd->ndim; d++)
5392 if (comm->load[0].flags & (1<<d))
5394 comm->load_lim[d]++;
5400 comm->load_mdf += comm->load[0].mdf;
5401 comm->load_pme += comm->load[0].pme;
5405 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5409 fprintf(debug, "get_load_distribution finished\n");
5413 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5415 /* Return the relative performance loss on the total run time
5416 * due to the force calculation load imbalance.
5418 if (dd->comm->nload > 0)
5421 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5422 (dd->comm->load_step*dd->nnodes);
5430 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5433 int npp, npme, nnodes, d, limp;
5434 float imbal, pme_f_ratio, lossf, lossp = 0;
5436 gmx_domdec_comm_t *comm;
5439 if (DDMASTER(dd) && comm->nload > 0)
5442 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5443 nnodes = npp + npme;
5444 imbal = comm->load_max*npp/comm->load_sum - 1;
5445 lossf = dd_force_imb_perf_loss(dd);
5446 sprintf(buf, " Average load imbalance: %.1f %%\n", imbal*100);
5447 fprintf(fplog, "%s", buf);
5448 fprintf(stderr, "\n");
5449 fprintf(stderr, "%s", buf);
5450 sprintf(buf, " Part of the total run time spent waiting due to load imbalance: %.1f %%\n", lossf*100);
5451 fprintf(fplog, "%s", buf);
5452 fprintf(stderr, "%s", buf);
5454 if (comm->bDynLoadBal)
5456 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5457 for (d = 0; d < dd->ndim; d++)
5459 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5460 sprintf(buf+strlen(buf), " %c %d %%", dim2char(dd->dim[d]), limp);
5466 sprintf(buf+strlen(buf), "\n");
5467 fprintf(fplog, "%s", buf);
5468 fprintf(stderr, "%s", buf);
5472 pme_f_ratio = comm->load_pme/comm->load_mdf;
5473 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5476 lossp *= (float)npme/(float)nnodes;
5480 lossp *= (float)npp/(float)nnodes;
5482 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pme_f_ratio);
5483 fprintf(fplog, "%s", buf);
5484 fprintf(stderr, "%s", buf);
5485 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossp)*100);
5486 fprintf(fplog, "%s", buf);
5487 fprintf(stderr, "%s", buf);
5489 fprintf(fplog, "\n");
5490 fprintf(stderr, "\n");
5492 if (lossf >= DD_PERF_LOSS_WARN)
5495 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5496 " in the domain decomposition.\n", lossf*100);
5497 if (!comm->bDynLoadBal)
5499 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5503 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5505 fprintf(fplog, "%s\n", buf);
5506 fprintf(stderr, "%s\n", buf);
5508 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS_WARN)
5511 "NOTE: %.1f %% performance was lost because the PME ranks\n"
5512 " had %s work to do than the PP ranks.\n"
5513 " You might want to %s the number of PME ranks\n"
5514 " or %s the cut-off and the grid spacing.\n",
5516 (lossp < 0) ? "less" : "more",
5517 (lossp < 0) ? "decrease" : "increase",
5518 (lossp < 0) ? "decrease" : "increase");
5519 fprintf(fplog, "%s\n", buf);
5520 fprintf(stderr, "%s\n", buf);
5525 static float dd_vol_min(gmx_domdec_t *dd)
5527 return dd->comm->load[0].cvol_min*dd->nnodes;
5530 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5532 return dd->comm->load[0].flags;
5535 static float dd_f_imbal(gmx_domdec_t *dd)
5537 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5540 float dd_pme_f_ratio(gmx_domdec_t *dd)
5542 if (dd->comm->cycl_n[ddCyclPME] > 0)
5544 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5552 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
5557 flags = dd_load_flags(dd);
5561 "DD load balancing is limited by minimum cell size in dimension");
5562 for (d = 0; d < dd->ndim; d++)
5566 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5569 fprintf(fplog, "\n");
5571 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5572 if (dd->comm->bDynLoadBal)
5574 fprintf(fplog, " vol min/aver %5.3f%c",
5575 dd_vol_min(dd), flags ? '!' : ' ');
5577 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5578 if (dd->comm->cycl_n[ddCyclPME])
5580 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5582 fprintf(fplog, "\n\n");
5585 static void dd_print_load_verbose(gmx_domdec_t *dd)
5587 if (dd->comm->bDynLoadBal)
5589 fprintf(stderr, "vol %4.2f%c ",
5590 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5592 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5593 if (dd->comm->cycl_n[ddCyclPME])
5595 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5600 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5605 gmx_domdec_root_t *root;
5606 gmx_bool bPartOfGroup = FALSE;
5608 dim = dd->dim[dim_ind];
5609 copy_ivec(loc, loc_c);
5610 for (i = 0; i < dd->nc[dim]; i++)
5613 rank = dd_index(dd->nc, loc_c);
5614 if (rank == dd->rank)
5616 /* This process is part of the group */
5617 bPartOfGroup = TRUE;
5620 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5624 dd->comm->mpi_comm_load[dim_ind] = c_row;
5625 if (dd->comm->eDLB != edlbNO)
5627 if (dd->ci[dim] == dd->master_ci[dim])
5629 /* This is the root process of this row */
5630 snew(dd->comm->root[dim_ind], 1);
5631 root = dd->comm->root[dim_ind];
5632 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5633 snew(root->old_cell_f, dd->nc[dim]+1);
5634 snew(root->bCellMin, dd->nc[dim]);
5637 snew(root->cell_f_max0, dd->nc[dim]);
5638 snew(root->cell_f_min1, dd->nc[dim]);
5639 snew(root->bound_min, dd->nc[dim]);
5640 snew(root->bound_max, dd->nc[dim]);
5642 snew(root->buf_ncd, dd->nc[dim]);
5646 /* This is not a root process, we only need to receive cell_f */
5647 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5650 if (dd->ci[dim] == dd->master_ci[dim])
5652 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5658 void dd_setup_dlb_resource_sharing(t_commrec gmx_unused *cr,
5659 const gmx_hw_info_t gmx_unused *hwinfo,
5660 const gmx_hw_opt_t gmx_unused *hw_opt)
5663 int physicalnode_id_hash;
5666 MPI_Comm mpi_comm_pp_physicalnode;
5668 if (!(cr->duty & DUTY_PP) ||
5669 hw_opt->gpu_opt.ncuda_dev_use == 0)
5671 /* Only PP nodes (currently) use GPUs.
5672 * If we don't have GPUs, there are no resources to share.
5677 physicalnode_id_hash = gmx_physicalnode_id_hash();
5679 gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
5685 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5686 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5687 dd->rank, physicalnode_id_hash, gpu_id);
5689 /* Split the PP communicator over the physical nodes */
5690 /* TODO: See if we should store this (before), as it's also used for
5691 * for the nodecomm summution.
5693 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5694 &mpi_comm_pp_physicalnode);
5695 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5696 &dd->comm->mpi_comm_gpu_shared);
5697 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5698 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5702 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5705 /* Note that some ranks could share a GPU, while others don't */
5707 if (dd->comm->nrank_gpu_shared == 1)
5709 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5714 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
5717 int dim0, dim1, i, j;
5722 fprintf(debug, "Making load communicators\n");
5725 snew(dd->comm->load, dd->ndim);
5726 snew(dd->comm->mpi_comm_load, dd->ndim);
5729 make_load_communicator(dd, 0, loc);
5733 for (i = 0; i < dd->nc[dim0]; i++)
5736 make_load_communicator(dd, 1, loc);
5742 for (i = 0; i < dd->nc[dim0]; i++)
5746 for (j = 0; j < dd->nc[dim1]; j++)
5749 make_load_communicator(dd, 2, loc);
5756 fprintf(debug, "Finished making load communicators\n");
5761 void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
5764 int d, dim, i, j, m;
5767 ivec dd_zp[DD_MAXIZONE];
5768 gmx_domdec_zones_t *zones;
5769 gmx_domdec_ns_ranges_t *izone;
5771 for (d = 0; d < dd->ndim; d++)
5774 copy_ivec(dd->ci, tmp);
5775 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5776 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5777 copy_ivec(dd->ci, tmp);
5778 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5779 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5782 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5785 dd->neighbor[d][1]);
5791 fprintf(fplog, "\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5793 dd->nc[XX], dd->nc[YY], dd->nc[ZZ],
5794 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5801 for (i = 0; i < nzonep; i++)
5803 copy_ivec(dd_zp3[i], dd_zp[i]);
5809 for (i = 0; i < nzonep; i++)
5811 copy_ivec(dd_zp2[i], dd_zp[i]);
5817 for (i = 0; i < nzonep; i++)
5819 copy_ivec(dd_zp1[i], dd_zp[i]);
5823 gmx_fatal(FARGS, "Can only do 1, 2 or 3D domain decomposition");
5828 zones = &dd->comm->zones;
5830 for (i = 0; i < nzone; i++)
5833 clear_ivec(zones->shift[i]);
5834 for (d = 0; d < dd->ndim; d++)
5836 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5841 for (i = 0; i < nzone; i++)
5843 for (d = 0; d < DIM; d++)
5845 s[d] = dd->ci[d] - zones->shift[i][d];
5850 else if (s[d] >= dd->nc[d])
5856 zones->nizone = nzonep;
5857 for (i = 0; i < zones->nizone; i++)
5859 if (dd_zp[i][0] != i)
5861 gmx_fatal(FARGS, "Internal inconsistency in the dd grid setup");
5863 izone = &zones->izone[i];
5864 izone->j0 = dd_zp[i][1];
5865 izone->j1 = dd_zp[i][2];
5866 for (dim = 0; dim < DIM; dim++)
5868 if (dd->nc[dim] == 1)
5870 /* All shifts should be allowed */
5871 izone->shift0[dim] = -1;
5872 izone->shift1[dim] = 1;
5877 izone->shift0[d] = 0;
5878 izone->shift1[d] = 0;
5879 for(j=izone->j0; j<izone->j1; j++) {
5880 if (dd->shift[j][d] > dd->shift[i][d])
5881 izone->shift0[d] = -1;
5882 if (dd->shift[j][d] < dd->shift[i][d])
5883 izone->shift1[d] = 1;
5889 /* Assume the shift are not more than 1 cell */
5890 izone->shift0[dim] = 1;
5891 izone->shift1[dim] = -1;
5892 for (j = izone->j0; j < izone->j1; j++)
5894 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5895 if (shift_diff < izone->shift0[dim])
5897 izone->shift0[dim] = shift_diff;
5899 if (shift_diff > izone->shift1[dim])
5901 izone->shift1[dim] = shift_diff;
5908 if (dd->comm->eDLB != edlbNO)
5910 snew(dd->comm->root, dd->ndim);
5913 if (dd->comm->bRecordLoad)
5915 make_load_communicators(dd);
5919 static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reorder)
5922 gmx_domdec_comm_t *comm;
5933 if (comm->bCartesianPP)
5935 /* Set up cartesian communication for the particle-particle part */
5938 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5939 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5942 for (i = 0; i < DIM; i++)
5946 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5948 /* We overwrite the old communicator with the new cartesian one */
5949 cr->mpi_comm_mygroup = comm_cart;
5952 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5953 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5955 if (comm->bCartesianPP_PME)
5957 /* Since we want to use the original cartesian setup for sim,
5958 * and not the one after split, we need to make an index.
5960 snew(comm->ddindex2ddnodeid, dd->nnodes);
5961 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
5962 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5963 /* Get the rank of the DD master,
5964 * above we made sure that the master node is a PP node.
5974 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
5976 else if (comm->bCartesianPP)
5978 if (cr->npmenodes == 0)
5980 /* The PP communicator is also
5981 * the communicator for this simulation
5983 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5985 cr->nodeid = dd->rank;
5987 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
5989 /* We need to make an index to go from the coordinates
5990 * to the nodeid of this simulation.
5992 snew(comm->ddindex2simnodeid, dd->nnodes);
5993 snew(buf, dd->nnodes);
5994 if (cr->duty & DUTY_PP)
5996 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5998 /* Communicate the ddindex to simulation nodeid index */
5999 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6000 cr->mpi_comm_mysim);
6003 /* Determine the master coordinates and rank.
6004 * The DD master should be the same node as the master of this sim.
6006 for (i = 0; i < dd->nnodes; i++)
6008 if (comm->ddindex2simnodeid[i] == 0)
6010 ddindex2xyz(dd->nc, i, dd->master_ci);
6011 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
6016 fprintf(debug, "The master rank is %d\n", dd->masterrank);
6021 /* No Cartesian communicators */
6022 /* We use the rank in dd->comm->all as DD index */
6023 ddindex2xyz(dd->nc, dd->rank, dd->ci);
6024 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
6026 clear_ivec(dd->master_ci);
6033 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
6034 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6039 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
6040 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6044 static void receive_ddindex2simnodeid(t_commrec *cr)
6048 gmx_domdec_comm_t *comm;
6055 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
6057 snew(comm->ddindex2simnodeid, dd->nnodes);
6058 snew(buf, dd->nnodes);
6059 if (cr->duty & DUTY_PP)
6061 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
6064 /* Communicate the ddindex to simulation nodeid index */
6065 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6066 cr->mpi_comm_mysim);
6073 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
6074 int ncg, int natoms)
6076 gmx_domdec_master_t *ma;
6081 snew(ma->ncg, dd->nnodes);
6082 snew(ma->index, dd->nnodes+1);
6084 snew(ma->nat, dd->nnodes);
6085 snew(ma->ibuf, dd->nnodes*2);
6086 snew(ma->cell_x, DIM);
6087 for (i = 0; i < DIM; i++)
6089 snew(ma->cell_x[i], dd->nc[i]+1);
6092 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
6098 snew(ma->vbuf, natoms);
6104 static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_node_order,
6105 int gmx_unused reorder)
6108 gmx_domdec_comm_t *comm;
6119 if (comm->bCartesianPP)
6121 for (i = 1; i < DIM; i++)
6123 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
6125 if (bDiv[YY] || bDiv[ZZ])
6127 comm->bCartesianPP_PME = TRUE;
6128 /* If we have 2D PME decomposition, which is always in x+y,
6129 * we stack the PME only nodes in z.
6130 * Otherwise we choose the direction that provides the thinnest slab
6131 * of PME only nodes as this will have the least effect
6132 * on the PP communication.
6133 * But for the PME communication the opposite might be better.
6135 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
6137 dd->nc[YY] > dd->nc[ZZ]))
6139 comm->cartpmedim = ZZ;
6143 comm->cartpmedim = YY;
6145 comm->ntot[comm->cartpmedim]
6146 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
6150 fprintf(fplog, "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
6152 "Will not use a Cartesian communicator for PP <-> PME\n\n");
6157 if (comm->bCartesianPP_PME)
6161 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
6164 for (i = 0; i < DIM; i++)
6168 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
6171 MPI_Comm_rank(comm_cart, &rank);
6172 if (MASTERNODE(cr) && rank != 0)
6174 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
6177 /* With this assigment we loose the link to the original communicator
6178 * which will usually be MPI_COMM_WORLD, unless have multisim.
6180 cr->mpi_comm_mysim = comm_cart;
6181 cr->sim_nodeid = rank;
6183 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
6187 fprintf(fplog, "Cartesian rank %d, coordinates %d %d %d\n\n",
6188 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6191 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
6195 if (cr->npmenodes == 0 ||
6196 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
6198 cr->duty = DUTY_PME;
6201 /* Split the sim communicator into PP and PME only nodes */
6202 MPI_Comm_split(cr->mpi_comm_mysim,
6204 dd_index(comm->ntot, dd->ci),
6205 &cr->mpi_comm_mygroup);
6209 switch (dd_node_order)
6214 fprintf(fplog, "Order of the ranks: PP first, PME last\n");
6217 case ddnoINTERLEAVE:
6218 /* Interleave the PP-only and PME-only nodes,
6219 * as on clusters with dual-core machines this will double
6220 * the communication bandwidth of the PME processes
6221 * and thus speed up the PP <-> PME and inter PME communication.
6225 fprintf(fplog, "Interleaving PP and PME ranks\n");
6227 comm->pmenodes = dd_pmenodes(cr);
6232 gmx_fatal(FARGS, "Unknown dd_node_order=%d", dd_node_order);
6235 if (dd_simnode2pmenode(cr, cr->sim_nodeid) == -1)
6237 cr->duty = DUTY_PME;
6244 /* Split the sim communicator into PP and PME only nodes */
6245 MPI_Comm_split(cr->mpi_comm_mysim,
6248 &cr->mpi_comm_mygroup);
6249 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
6255 fprintf(fplog, "This rank does only %s work.\n\n",
6256 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
6260 void make_dd_communicators(FILE *fplog, t_commrec *cr, int dd_node_order)
6263 gmx_domdec_comm_t *comm;
6269 copy_ivec(dd->nc, comm->ntot);
6271 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
6272 comm->bCartesianPP_PME = FALSE;
6274 /* Reorder the nodes by default. This might change the MPI ranks.
6275 * Real reordering is only supported on very few architectures,
6276 * Blue Gene is one of them.
6278 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
6280 if (cr->npmenodes > 0)
6282 /* Split the communicator into a PP and PME part */
6283 split_communicator(fplog, cr, dd_node_order, CartReorder);
6284 if (comm->bCartesianPP_PME)
6286 /* We (possibly) reordered the nodes in split_communicator,
6287 * so it is no longer required in make_pp_communicator.
6289 CartReorder = FALSE;
6294 /* All nodes do PP and PME */
6296 /* We do not require separate communicators */
6297 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
6301 if (cr->duty & DUTY_PP)
6303 /* Copy or make a new PP communicator */
6304 make_pp_communicator(fplog, cr, CartReorder);
6308 receive_ddindex2simnodeid(cr);
6311 if (!(cr->duty & DUTY_PME))
6313 /* Set up the commnuication to our PME node */
6314 dd->pme_nodeid = dd_simnode2pmenode(cr, cr->sim_nodeid);
6315 dd->pme_receive_vir_ener = receive_vir_ener(cr);
6318 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
6319 dd->pme_nodeid, dd->pme_receive_vir_ener);
6324 dd->pme_nodeid = -1;
6329 dd->ma = init_gmx_domdec_master_t(dd,
6331 comm->cgs_gl.index[comm->cgs_gl.nr]);
6335 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6337 real *slb_frac, tot;
6342 if (nc > 1 && size_string != NULL)
6346 fprintf(fplog, "Using static load balancing for the %s direction\n",
6351 for (i = 0; i < nc; i++)
6354 sscanf(size_string, "%lf%n", &dbl, &n);
6357 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6366 fprintf(fplog, "Relative cell sizes:");
6368 for (i = 0; i < nc; i++)
6373 fprintf(fplog, " %5.3f", slb_frac[i]);
6378 fprintf(fplog, "\n");
6385 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
6388 gmx_mtop_ilistloop_t iloop;
6392 iloop = gmx_mtop_ilistloop_init(mtop);
6393 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6395 for (ftype = 0; ftype < F_NRE; ftype++)
6397 if ((interaction_function[ftype].flags & IF_BOND) &&
6400 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6408 static int dd_getenv(FILE *fplog, const char *env_var, int def)
6414 val = getenv(env_var);
6417 if (sscanf(val, "%d", &nst) <= 0)
6423 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6431 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6435 fprintf(stderr, "\n%s\n", warn_string);
6439 fprintf(fplog, "\n%s\n", warn_string);
6443 static void check_dd_restrictions(t_commrec *cr, gmx_domdec_t *dd,
6444 t_inputrec *ir, FILE *fplog)
6446 if (ir->ePBC == epbcSCREW &&
6447 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6449 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6452 if (ir->ns_type == ensSIMPLE)
6454 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
6457 if (ir->nstlist == 0)
6459 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6462 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6464 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6468 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6473 r = ddbox->box_size[XX];
6474 for (di = 0; di < dd->ndim; di++)
6477 /* Check using the initial average cell size */
6478 r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6484 static int check_dlb_support(FILE *fplog, t_commrec *cr,
6485 const char *dlb_opt, gmx_bool bRecordLoad,
6486 unsigned long Flags, t_inputrec *ir)
6494 case 'a': eDLB = edlbAUTO; break;
6495 case 'n': eDLB = edlbNO; break;
6496 case 'y': eDLB = edlbYES; break;
6497 default: gmx_incons("Unknown dlb_opt");
6500 if (Flags & MD_RERUN)
6505 if (!EI_DYNAMICS(ir->eI))
6507 if (eDLB == edlbYES)
6509 sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
6510 dd_warning(cr, fplog, buf);
6518 dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6523 if (Flags & MD_REPRODUCIBLE)
6530 dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
6534 dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6537 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", eDLB);
6545 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6550 if (getenv("GMX_DD_ORDER_ZYX") != NULL)
6552 /* Decomposition order z,y,x */
6555 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6557 for (dim = DIM-1; dim >= 0; dim--)
6559 if (dd->nc[dim] > 1)
6561 dd->dim[dd->ndim++] = dim;
6567 /* Decomposition order x,y,z */
6568 for (dim = 0; dim < DIM; dim++)
6570 if (dd->nc[dim] > 1)
6572 dd->dim[dd->ndim++] = dim;
6578 static gmx_domdec_comm_t *init_dd_comm()
6580 gmx_domdec_comm_t *comm;
6584 snew(comm->cggl_flag, DIM*2);
6585 snew(comm->cgcm_state, DIM*2);
6586 for (i = 0; i < DIM*2; i++)
6588 comm->cggl_flag_nalloc[i] = 0;
6589 comm->cgcm_state_nalloc[i] = 0;
6592 comm->nalloc_int = 0;
6593 comm->buf_int = NULL;
6595 vec_rvec_init(&comm->vbuf);
6597 comm->n_load_have = 0;
6598 comm->n_load_collect = 0;
6600 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6602 comm->sum_nat[i] = 0;
6606 comm->load_step = 0;
6609 clear_ivec(comm->load_lim);
6616 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
6617 unsigned long Flags,
6619 real comm_distance_min, real rconstr,
6620 const char *dlb_opt, real dlb_scale,
6621 const char *sizex, const char *sizey, const char *sizez,
6622 gmx_mtop_t *mtop, t_inputrec *ir,
6623 matrix box, rvec *x,
6625 int *npme_x, int *npme_y)
6628 gmx_domdec_comm_t *comm;
6631 real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
6638 "\nInitializing Domain Decomposition on %d ranks\n", cr->nnodes);
6643 dd->comm = init_dd_comm();
6645 snew(comm->cggl_flag, DIM*2);
6646 snew(comm->cgcm_state, DIM*2);
6648 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6649 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6651 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
6652 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
6653 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
6654 recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
6655 comm->nstSortCG = dd_getenv(fplog, "GMX_DD_NST_SORT_CHARGE_GROUPS", 1);
6656 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
6657 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
6658 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
6660 dd->pme_recv_f_alloc = 0;
6661 dd->pme_recv_f_buf = NULL;
6663 if (dd->bSendRecv2 && fplog)
6665 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6671 fprintf(fplog, "Will load balance based on FLOP count\n");
6673 if (comm->eFlop > 1)
6675 srand(1+cr->nodeid);
6677 comm->bRecordLoad = TRUE;
6681 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6685 /* Initialize to GPU share count to 0, might change later */
6686 comm->nrank_gpu_shared = 0;
6688 comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
6690 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6693 fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
6695 dd->bGridJump = comm->bDynLoadBal;
6696 comm->bPMELoadBalDLBLimits = FALSE;
6698 if (comm->nstSortCG)
6702 if (comm->nstSortCG == 1)
6704 fprintf(fplog, "Will sort the charge groups at every domain (re)decomposition\n");
6708 fprintf(fplog, "Will sort the charge groups every %d steps\n",
6712 snew(comm->sort, 1);
6718 fprintf(fplog, "Will not sort the charge groups\n");
6722 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6724 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6725 if (comm->bInterCGBondeds)
6727 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6731 comm->bInterCGMultiBody = FALSE;
6734 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6735 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6737 if (ir->rlistlong == 0)
6739 /* Set the cut-off to some very large value,
6740 * so we don't need if statements everywhere in the code.
6741 * We use sqrt, since the cut-off is squared in some places.
6743 comm->cutoff = GMX_CUTOFF_INF;
6747 comm->cutoff = ir->rlistlong;
6749 comm->cutoff_mbody = 0;
6751 comm->cellsize_limit = 0;
6752 comm->bBondComm = FALSE;
6754 /* Atoms should be able to move by up to half the list buffer size (if > 0)
6755 * within nstlist steps. Since boundaries are allowed to displace by half
6756 * a cell size, DD cells should be at least the size of the list buffer.
6758 comm->cellsize_limit = max(comm->cellsize_limit,
6759 ir->rlistlong - max(ir->rvdw, ir->rcoulomb));
6761 if (comm->bInterCGBondeds)
6763 if (comm_distance_min > 0)
6765 comm->cutoff_mbody = comm_distance_min;
6766 if (Flags & MD_DDBONDCOMM)
6768 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6772 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6774 r_bonded_limit = comm->cutoff_mbody;
6776 else if (ir->bPeriodicMols)
6778 /* Can not easily determine the required cut-off */
6779 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6780 comm->cutoff_mbody = comm->cutoff/2;
6781 r_bonded_limit = comm->cutoff_mbody;
6787 dd_bonded_cg_distance(fplog, mtop, ir, x, box,
6788 Flags & MD_DDBONDCHECK, &r_2b, &r_mb);
6790 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6791 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6793 /* We use an initial margin of 10% for the minimum cell size,
6794 * except when we are just below the non-bonded cut-off.
6796 if (Flags & MD_DDBONDCOMM)
6798 if (max(r_2b, r_mb) > comm->cutoff)
6800 r_bonded = max(r_2b, r_mb);
6801 r_bonded_limit = 1.1*r_bonded;
6802 comm->bBondComm = TRUE;
6807 r_bonded_limit = min(1.1*r_bonded, comm->cutoff);
6809 /* We determine cutoff_mbody later */
6813 /* No special bonded communication,
6814 * simply increase the DD cut-off.
6816 r_bonded_limit = 1.1*max(r_2b, r_mb);
6817 comm->cutoff_mbody = r_bonded_limit;
6818 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6821 comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit);
6825 "Minimum cell size due to bonded interactions: %.3f nm\n",
6826 comm->cellsize_limit);
6830 if (dd->bInterCGcons && rconstr <= 0)
6832 /* There is a cell size limit due to the constraints (P-LINCS) */
6833 rconstr = constr_r_max(fplog, mtop, ir);
6837 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6839 if (rconstr > comm->cellsize_limit)
6841 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6845 else if (rconstr > 0 && fplog)
6847 /* Here we do not check for dd->bInterCGcons,
6848 * because one can also set a cell size limit for virtual sites only
6849 * and at this point we don't know yet if there are intercg v-sites.
6852 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6855 comm->cellsize_limit = max(comm->cellsize_limit, rconstr);
6857 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6861 copy_ivec(nc, dd->nc);
6862 set_dd_dim(fplog, dd);
6863 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, x, ddbox);
6865 if (cr->npmenodes == -1)
6869 acs = average_cellsize_min(dd, ddbox);
6870 if (acs < comm->cellsize_limit)
6874 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6876 gmx_fatal_collective(FARGS, cr, NULL,
6877 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6878 acs, comm->cellsize_limit);
6883 set_ddbox_cr(cr, NULL, ir, box, &comm->cgs_gl, x, ddbox);
6885 /* We need to choose the optimal DD grid and possibly PME nodes */
6886 limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6887 comm->eDLB != edlbNO, dlb_scale,
6888 comm->cellsize_limit, comm->cutoff,
6889 comm->bInterCGBondeds);
6891 if (dd->nc[XX] == 0)
6893 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6894 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
6895 !bC ? "-rdd" : "-rcon",
6896 comm->eDLB != edlbNO ? " or -dds" : "",
6897 bC ? " or your LINCS settings" : "");
6899 gmx_fatal_collective(FARGS, cr, NULL,
6900 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
6902 "Look in the log file for details on the domain decomposition",
6903 cr->nnodes-cr->npmenodes, limit, buf);
6905 set_dd_dim(fplog, dd);
6911 "Domain decomposition grid %d x %d x %d, separate PME ranks %d\n",
6912 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6915 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6916 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6918 gmx_fatal_collective(FARGS, cr, NULL,
6919 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
6920 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6922 if (cr->npmenodes > dd->nnodes)
6924 gmx_fatal_collective(FARGS, cr, NULL,
6925 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6927 if (cr->npmenodes > 0)
6929 comm->npmenodes = cr->npmenodes;
6933 comm->npmenodes = dd->nnodes;
6936 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
6938 /* The following choices should match those
6939 * in comm_cost_est in domdec_setup.c.
6940 * Note that here the checks have to take into account
6941 * that the decomposition might occur in a different order than xyz
6942 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6943 * in which case they will not match those in comm_cost_est,
6944 * but since that is mainly for testing purposes that's fine.
6946 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6947 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6948 getenv("GMX_PMEONEDD") == NULL)
6950 comm->npmedecompdim = 2;
6951 comm->npmenodes_x = dd->nc[XX];
6952 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6956 /* In case nc is 1 in both x and y we could still choose to
6957 * decompose pme in y instead of x, but we use x for simplicity.
6959 comm->npmedecompdim = 1;
6960 if (dd->dim[0] == YY)
6962 comm->npmenodes_x = 1;
6963 comm->npmenodes_y = comm->npmenodes;
6967 comm->npmenodes_x = comm->npmenodes;
6968 comm->npmenodes_y = 1;
6973 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6974 comm->npmenodes_x, comm->npmenodes_y, 1);
6979 comm->npmedecompdim = 0;
6980 comm->npmenodes_x = 0;
6981 comm->npmenodes_y = 0;
6984 /* Technically we don't need both of these,
6985 * but it simplifies code not having to recalculate it.
6987 *npme_x = comm->npmenodes_x;
6988 *npme_y = comm->npmenodes_y;
6990 snew(comm->slb_frac, DIM);
6991 if (comm->eDLB == edlbNO)
6993 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
6994 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
6995 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], sizez);
6998 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
7000 if (comm->bBondComm || comm->eDLB != edlbNO)
7002 /* Set the bonded communication distance to halfway
7003 * the minimum and the maximum,
7004 * since the extra communication cost is nearly zero.
7006 acs = average_cellsize_min(dd, ddbox);
7007 comm->cutoff_mbody = 0.5*(r_bonded + acs);
7008 if (comm->eDLB != edlbNO)
7010 /* Check if this does not limit the scaling */
7011 comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs);
7013 if (!comm->bBondComm)
7015 /* Without bBondComm do not go beyond the n.b. cut-off */
7016 comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff);
7017 if (comm->cellsize_limit >= comm->cutoff)
7019 /* We don't loose a lot of efficieny
7020 * when increasing it to the n.b. cut-off.
7021 * It can even be slightly faster, because we need
7022 * less checks for the communication setup.
7024 comm->cutoff_mbody = comm->cutoff;
7027 /* Check if we did not end up below our original limit */
7028 comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit);
7030 if (comm->cutoff_mbody > comm->cellsize_limit)
7032 comm->cellsize_limit = comm->cutoff_mbody;
7035 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
7040 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
7041 "cellsize limit %f\n",
7042 comm->bBondComm, comm->cellsize_limit);
7047 check_dd_restrictions(cr, dd, ir, fplog);
7050 comm->partition_step = INT_MIN;
7053 clear_dd_cycle_counts(dd);
7058 static void set_dlb_limits(gmx_domdec_t *dd)
7063 for (d = 0; d < dd->ndim; d++)
7065 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
7066 dd->comm->cellsize_min[dd->dim[d]] =
7067 dd->comm->cellsize_min_dlb[dd->dim[d]];
7072 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
7075 gmx_domdec_comm_t *comm;
7085 fprintf(fplog, "At step %s the performance loss due to force load imbalance is %.1f %%\n", gmx_step_str(step, buf), dd_force_imb_perf_loss(dd)*100);
7088 cellsize_min = comm->cellsize_min[dd->dim[0]];
7089 for (d = 1; d < dd->ndim; d++)
7091 cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
7094 if (cellsize_min < comm->cellsize_limit*1.05)
7096 dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
7098 /* Change DLB from "auto" to "no". */
7099 comm->eDLB = edlbNO;
7104 dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
7105 comm->bDynLoadBal = TRUE;
7106 dd->bGridJump = TRUE;
7110 /* We can set the required cell size info here,
7111 * so we do not need to communicate this.
7112 * The grid is completely uniform.
7114 for (d = 0; d < dd->ndim; d++)
7118 comm->load[d].sum_m = comm->load[d].sum;
7120 nc = dd->nc[dd->dim[d]];
7121 for (i = 0; i < nc; i++)
7123 comm->root[d]->cell_f[i] = i/(real)nc;
7126 comm->root[d]->cell_f_max0[i] = i /(real)nc;
7127 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
7130 comm->root[d]->cell_f[nc] = 1.0;
7135 static char *init_bLocalCG(gmx_mtop_t *mtop)
7140 ncg = ncg_mtop(mtop);
7141 snew(bLocalCG, ncg);
7142 for (cg = 0; cg < ncg; cg++)
7144 bLocalCG[cg] = FALSE;
7150 void dd_init_bondeds(FILE *fplog,
7151 gmx_domdec_t *dd, gmx_mtop_t *mtop,
7153 t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
7155 gmx_domdec_comm_t *comm;
7159 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
7163 if (comm->bBondComm)
7165 /* Communicate atoms beyond the cut-off for bonded interactions */
7168 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
7170 comm->bLocalCG = init_bLocalCG(mtop);
7174 /* Only communicate atoms based on cut-off */
7175 comm->cglink = NULL;
7176 comm->bLocalCG = NULL;
7180 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
7182 gmx_bool bDynLoadBal, real dlb_scale,
7185 gmx_domdec_comm_t *comm;
7200 fprintf(fplog, "The maximum number of communication pulses is:");
7201 for (d = 0; d < dd->ndim; d++)
7203 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
7205 fprintf(fplog, "\n");
7206 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
7207 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
7208 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
7209 for (d = 0; d < DIM; d++)
7213 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
7220 comm->cellsize_min_dlb[d]/
7221 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
7223 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
7226 fprintf(fplog, "\n");
7230 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
7231 fprintf(fplog, "The initial number of communication pulses is:");
7232 for (d = 0; d < dd->ndim; d++)
7234 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
7236 fprintf(fplog, "\n");
7237 fprintf(fplog, "The initial domain decomposition cell size is:");
7238 for (d = 0; d < DIM; d++)
7242 fprintf(fplog, " %c %.2f nm",
7243 dim2char(d), dd->comm->cellsize_min[d]);
7246 fprintf(fplog, "\n\n");
7249 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
7251 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
7252 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7253 "non-bonded interactions", "", comm->cutoff);
7257 limit = dd->comm->cellsize_limit;
7261 if (dynamic_dd_box(ddbox, ir))
7263 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
7265 limit = dd->comm->cellsize_min[XX];
7266 for (d = 1; d < DIM; d++)
7268 limit = min(limit, dd->comm->cellsize_min[d]);
7272 if (comm->bInterCGBondeds)
7274 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7275 "two-body bonded interactions", "(-rdd)",
7276 max(comm->cutoff, comm->cutoff_mbody));
7277 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7278 "multi-body bonded interactions", "(-rdd)",
7279 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit));
7283 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7284 "virtual site constructions", "(-rcon)", limit);
7286 if (dd->constraint_comm)
7288 sprintf(buf, "atoms separated by up to %d constraints",
7290 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7291 buf, "(-rcon)", limit);
7293 fprintf(fplog, "\n");
7299 static void set_cell_limits_dlb(gmx_domdec_t *dd,
7301 const t_inputrec *ir,
7302 const gmx_ddbox_t *ddbox)
7304 gmx_domdec_comm_t *comm;
7305 int d, dim, npulse, npulse_d_max, npulse_d;
7310 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7312 /* Determine the maximum number of comm. pulses in one dimension */
7314 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7316 /* Determine the maximum required number of grid pulses */
7317 if (comm->cellsize_limit >= comm->cutoff)
7319 /* Only a single pulse is required */
7322 else if (!bNoCutOff && comm->cellsize_limit > 0)
7324 /* We round down slightly here to avoid overhead due to the latency
7325 * of extra communication calls when the cut-off
7326 * would be only slightly longer than the cell size.
7327 * Later cellsize_limit is redetermined,
7328 * so we can not miss interactions due to this rounding.
7330 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7334 /* There is no cell size limit */
7335 npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7338 if (!bNoCutOff && npulse > 1)
7340 /* See if we can do with less pulses, based on dlb_scale */
7342 for (d = 0; d < dd->ndim; d++)
7345 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7346 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7347 npulse_d_max = max(npulse_d_max, npulse_d);
7349 npulse = min(npulse, npulse_d_max);
7352 /* This env var can override npulse */
7353 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
7360 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7361 for (d = 0; d < dd->ndim; d++)
7363 comm->cd[d].np_dlb = min(npulse, dd->nc[dd->dim[d]]-1);
7364 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7365 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7366 comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb);
7367 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7369 comm->bVacDLBNoLimit = FALSE;
7373 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7374 if (!comm->bVacDLBNoLimit)
7376 comm->cellsize_limit = max(comm->cellsize_limit,
7377 comm->cutoff/comm->maxpulse);
7379 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7380 /* Set the minimum cell size for each DD dimension */
7381 for (d = 0; d < dd->ndim; d++)
7383 if (comm->bVacDLBNoLimit ||
7384 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7386 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7390 comm->cellsize_min_dlb[dd->dim[d]] =
7391 comm->cutoff/comm->cd[d].np_dlb;
7394 if (comm->cutoff_mbody <= 0)
7396 comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit);
7398 if (comm->bDynLoadBal)
7404 gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd, int ePBC)
7406 /* If each molecule is a single charge group
7407 * or we use domain decomposition for each periodic dimension,
7408 * we do not need to take pbc into account for the bonded interactions.
7410 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7413 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7416 void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7417 t_inputrec *ir, gmx_ddbox_t *ddbox)
7419 gmx_domdec_comm_t *comm;
7425 /* Initialize the thread data.
7426 * This can not be done in init_domain_decomposition,
7427 * as the numbers of threads is determined later.
7429 comm->nth = gmx_omp_nthreads_get(emntDomdec);
7432 snew(comm->dth, comm->nth);
7435 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
7437 init_ddpme(dd, &comm->ddpme[0], 0);
7438 if (comm->npmedecompdim >= 2)
7440 init_ddpme(dd, &comm->ddpme[1], 1);
7445 comm->npmenodes = 0;
7446 if (dd->pme_nodeid >= 0)
7448 gmx_fatal_collective(FARGS, NULL, dd,
7449 "Can not have separate PME ranks without PME electrostatics");
7455 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7457 if (comm->eDLB != edlbNO)
7459 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7462 print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
7463 if (comm->eDLB == edlbAUTO)
7467 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7469 print_dd_settings(fplog, dd, ir, TRUE, dlb_scale, ddbox);
7472 if (ir->ePBC == epbcNONE)
7474 vol_frac = 1 - 1/(double)dd->nnodes;
7479 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7483 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7485 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7487 dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
7490 static gmx_bool test_dd_cutoff(t_commrec *cr,
7491 t_state *state, t_inputrec *ir,
7502 set_ddbox(dd, FALSE, cr, ir, state->box,
7503 TRUE, &dd->comm->cgs_gl, state->x, &ddbox);
7507 for (d = 0; d < dd->ndim; d++)
7511 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7512 if (dynamic_dd_box(&ddbox, ir))
7514 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7517 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7519 if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
7520 dd->comm->cd[d].np_dlb > 0)
7522 if (np > dd->comm->cd[d].np_dlb)
7527 /* If a current local cell size is smaller than the requested
7528 * cut-off, we could still fix it, but this gets very complicated.
7529 * Without fixing here, we might actually need more checks.
7531 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7538 if (dd->comm->eDLB != edlbNO)
7540 /* If DLB is not active yet, we don't need to check the grid jumps.
7541 * Actually we shouldn't, because then the grid jump data is not set.
7543 if (dd->comm->bDynLoadBal &&
7544 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7549 gmx_sumi(1, &LocallyLimited, cr);
7551 if (LocallyLimited > 0)
7560 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, t_inputrec *ir,
7563 gmx_bool bCutoffAllowed;
7565 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7569 cr->dd->comm->cutoff = cutoff_req;
7572 return bCutoffAllowed;
7575 void change_dd_dlb_cutoff_limit(t_commrec *cr)
7577 gmx_domdec_comm_t *comm;
7579 comm = cr->dd->comm;
7581 /* Turn on the DLB limiting (might have been on already) */
7582 comm->bPMELoadBalDLBLimits = TRUE;
7584 /* Change the cut-off limit */
7585 comm->PMELoadBal_max_cutoff = comm->cutoff;
7588 static void merge_cg_buffers(int ncell,
7589 gmx_domdec_comm_dim_t *cd, int pulse,
7591 int *index_gl, int *recv_i,
7592 rvec *cg_cm, rvec *recv_vr,
7594 cginfo_mb_t *cginfo_mb, int *cginfo)
7596 gmx_domdec_ind_t *ind, *ind_p;
7597 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7598 int shift, shift_at;
7600 ind = &cd->ind[pulse];
7602 /* First correct the already stored data */
7603 shift = ind->nrecv[ncell];
7604 for (cell = ncell-1; cell >= 0; cell--)
7606 shift -= ind->nrecv[cell];
7609 /* Move the cg's present from previous grid pulses */
7610 cg0 = ncg_cell[ncell+cell];
7611 cg1 = ncg_cell[ncell+cell+1];
7612 cgindex[cg1+shift] = cgindex[cg1];
7613 for (cg = cg1-1; cg >= cg0; cg--)
7615 index_gl[cg+shift] = index_gl[cg];
7616 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7617 cgindex[cg+shift] = cgindex[cg];
7618 cginfo[cg+shift] = cginfo[cg];
7620 /* Correct the already stored send indices for the shift */
7621 for (p = 1; p <= pulse; p++)
7623 ind_p = &cd->ind[p];
7625 for (c = 0; c < cell; c++)
7627 cg0 += ind_p->nsend[c];
7629 cg1 = cg0 + ind_p->nsend[cell];
7630 for (cg = cg0; cg < cg1; cg++)
7632 ind_p->index[cg] += shift;
7638 /* Merge in the communicated buffers */
7642 for (cell = 0; cell < ncell; cell++)
7644 cg1 = ncg_cell[ncell+cell+1] + shift;
7647 /* Correct the old cg indices */
7648 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7650 cgindex[cg+1] += shift_at;
7653 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7655 /* Copy this charge group from the buffer */
7656 index_gl[cg1] = recv_i[cg0];
7657 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7658 /* Add it to the cgindex */
7659 cg_gl = index_gl[cg1];
7660 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7661 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7662 cgindex[cg1+1] = cgindex[cg1] + nat;
7667 shift += ind->nrecv[cell];
7668 ncg_cell[ncell+cell+1] = cg1;
7672 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7673 int nzone, int cg0, const int *cgindex)
7677 /* Store the atom block boundaries for easy copying of communication buffers
7680 for (zone = 0; zone < nzone; zone++)
7682 for (p = 0; p < cd->np; p++)
7684 cd->ind[p].cell2at0[zone] = cgindex[cg];
7685 cg += cd->ind[p].nrecv[zone];
7686 cd->ind[p].cell2at1[zone] = cgindex[cg];
7691 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7697 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7699 if (!bLocalCG[link->a[i]])
7708 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7710 real c[DIM][4]; /* the corners for the non-bonded communication */
7711 real cr0; /* corner for rounding */
7712 real cr1[4]; /* corners for rounding */
7713 real bc[DIM]; /* corners for bounded communication */
7714 real bcr1; /* corner for rounding for bonded communication */
7717 /* Determine the corners of the domain(s) we are communicating with */
7719 set_dd_corners(const gmx_domdec_t *dd,
7720 int dim0, int dim1, int dim2,
7724 const gmx_domdec_comm_t *comm;
7725 const gmx_domdec_zones_t *zones;
7730 zones = &comm->zones;
7732 /* Keep the compiler happy */
7736 /* The first dimension is equal for all cells */
7737 c->c[0][0] = comm->cell_x0[dim0];
7740 c->bc[0] = c->c[0][0];
7745 /* This cell row is only seen from the first row */
7746 c->c[1][0] = comm->cell_x0[dim1];
7747 /* All rows can see this row */
7748 c->c[1][1] = comm->cell_x0[dim1];
7751 c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7754 /* For the multi-body distance we need the maximum */
7755 c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7758 /* Set the upper-right corner for rounding */
7759 c->cr0 = comm->cell_x1[dim0];
7764 for (j = 0; j < 4; j++)
7766 c->c[2][j] = comm->cell_x0[dim2];
7770 /* Use the maximum of the i-cells that see a j-cell */
7771 for (i = 0; i < zones->nizone; i++)
7773 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7779 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7785 /* For the multi-body distance we need the maximum */
7786 c->bc[2] = comm->cell_x0[dim2];
7787 for (i = 0; i < 2; i++)
7789 for (j = 0; j < 2; j++)
7791 c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0);
7797 /* Set the upper-right corner for rounding */
7798 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7799 * Only cell (0,0,0) can see cell 7 (1,1,1)
7801 c->cr1[0] = comm->cell_x1[dim1];
7802 c->cr1[3] = comm->cell_x1[dim1];
7805 c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7808 /* For the multi-body distance we need the maximum */
7809 c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7816 /* Determine which cg's we need to send in this pulse from this zone */
7818 get_zone_pulse_cgs(gmx_domdec_t *dd,
7819 int zonei, int zone,
7821 const int *index_gl,
7823 int dim, int dim_ind,
7824 int dim0, int dim1, int dim2,
7825 real r_comm2, real r_bcomm2,
7829 real skew_fac2_d, real skew_fac_01,
7830 rvec *v_d, rvec *v_0, rvec *v_1,
7831 const dd_corners_t *c,
7833 gmx_bool bDistBonded,
7839 gmx_domdec_ind_t *ind,
7840 int **ibuf, int *ibuf_nalloc,
7846 gmx_domdec_comm_t *comm;
7848 gmx_bool bDistMB_pulse;
7850 real r2, rb2, r, tric_sh;
7853 int nsend_z, nsend, nat;
7857 bScrew = (dd->bScrewPBC && dim == XX);
7859 bDistMB_pulse = (bDistMB && bDistBonded);
7865 for (cg = cg0; cg < cg1; cg++)
7869 if (tric_dist[dim_ind] == 0)
7871 /* Rectangular direction, easy */
7872 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7879 r = cg_cm[cg][dim] - c->bc[dim_ind];
7885 /* Rounding gives at most a 16% reduction
7886 * in communicated atoms
7888 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7890 r = cg_cm[cg][dim0] - c->cr0;
7891 /* This is the first dimension, so always r >= 0 */
7898 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7900 r = cg_cm[cg][dim1] - c->cr1[zone];
7907 r = cg_cm[cg][dim1] - c->bcr1;
7917 /* Triclinic direction, more complicated */
7920 /* Rounding, conservative as the skew_fac multiplication
7921 * will slightly underestimate the distance.
7923 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7925 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7926 for (i = dim0+1; i < DIM; i++)
7928 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7930 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7933 rb[dim0] = rn[dim0];
7936 /* Take care that the cell planes along dim0 might not
7937 * be orthogonal to those along dim1 and dim2.
7939 for (i = 1; i <= dim_ind; i++)
7942 if (normal[dim0][dimd] > 0)
7944 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7947 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7952 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7954 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7956 for (i = dim1+1; i < DIM; i++)
7958 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7960 rn[dim1] += tric_sh;
7963 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7964 /* Take care of coupling of the distances
7965 * to the planes along dim0 and dim1 through dim2.
7967 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7968 /* Take care that the cell planes along dim1
7969 * might not be orthogonal to that along dim2.
7971 if (normal[dim1][dim2] > 0)
7973 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7979 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7982 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7983 /* Take care of coupling of the distances
7984 * to the planes along dim0 and dim1 through dim2.
7986 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7987 /* Take care that the cell planes along dim1
7988 * might not be orthogonal to that along dim2.
7990 if (normal[dim1][dim2] > 0)
7992 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7997 /* The distance along the communication direction */
7998 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
8000 for (i = dim+1; i < DIM; i++)
8002 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
8007 r2 += rn[dim]*rn[dim]*skew_fac2_d;
8008 /* Take care of coupling of the distances
8009 * to the planes along dim0 and dim1 through dim2.
8011 if (dim_ind == 1 && zonei == 1)
8013 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
8019 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
8022 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
8023 /* Take care of coupling of the distances
8024 * to the planes along dim0 and dim1 through dim2.
8026 if (dim_ind == 1 && zonei == 1)
8028 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
8036 ((bDistMB && rb2 < r_bcomm2) ||
8037 (bDist2B && r2 < r_bcomm2)) &&
8039 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
8040 missing_link(comm->cglink, index_gl[cg],
8043 /* Make an index to the local charge groups */
8044 if (nsend+1 > ind->nalloc)
8046 ind->nalloc = over_alloc_large(nsend+1);
8047 srenew(ind->index, ind->nalloc);
8049 if (nsend+1 > *ibuf_nalloc)
8051 *ibuf_nalloc = over_alloc_large(nsend+1);
8052 srenew(*ibuf, *ibuf_nalloc);
8054 ind->index[nsend] = cg;
8055 (*ibuf)[nsend] = index_gl[cg];
8057 vec_rvec_check_alloc(vbuf, nsend+1);
8059 if (dd->ci[dim] == 0)
8061 /* Correct cg_cm for pbc */
8062 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
8065 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
8066 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
8071 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
8074 nat += cgindex[cg+1] - cgindex[cg];
8080 *nsend_z_ptr = nsend_z;
8083 static void setup_dd_communication(gmx_domdec_t *dd,
8084 matrix box, gmx_ddbox_t *ddbox,
8085 t_forcerec *fr, t_state *state, rvec **f)
8087 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
8088 int nzone, nzone_send, zone, zonei, cg0, cg1;
8089 int c, i, j, cg, cg_gl, nrcg;
8090 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
8091 gmx_domdec_comm_t *comm;
8092 gmx_domdec_zones_t *zones;
8093 gmx_domdec_comm_dim_t *cd;
8094 gmx_domdec_ind_t *ind;
8095 cginfo_mb_t *cginfo_mb;
8096 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
8097 real r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
8098 dd_corners_t corners;
8100 rvec *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
8101 real skew_fac2_d, skew_fac_01;
8108 fprintf(debug, "Setting up DD communication\n");
8113 switch (fr->cutoff_scheme)
8122 gmx_incons("unimplemented");
8126 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8128 dim = dd->dim[dim_ind];
8130 /* Check if we need to use triclinic distances */
8131 tric_dist[dim_ind] = 0;
8132 for (i = 0; i <= dim_ind; i++)
8134 if (ddbox->tric_dir[dd->dim[i]])
8136 tric_dist[dim_ind] = 1;
8141 bBondComm = comm->bBondComm;
8143 /* Do we need to determine extra distances for multi-body bondeds? */
8144 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8146 /* Do we need to determine extra distances for only two-body bondeds? */
8147 bDist2B = (bBondComm && !bDistMB);
8149 r_comm2 = sqr(comm->cutoff);
8150 r_bcomm2 = sqr(comm->cutoff_mbody);
8154 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, sqrt(r_bcomm2));
8157 zones = &comm->zones;
8160 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8161 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8163 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8165 /* Triclinic stuff */
8166 normal = ddbox->normal;
8170 v_0 = ddbox->v[dim0];
8171 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8173 /* Determine the coupling coefficient for the distances
8174 * to the cell planes along dim0 and dim1 through dim2.
8175 * This is required for correct rounding.
8178 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8181 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8187 v_1 = ddbox->v[dim1];
8190 zone_cg_range = zones->cg_range;
8191 index_gl = dd->index_gl;
8192 cgindex = dd->cgindex;
8193 cginfo_mb = fr->cginfo_mb;
8195 zone_cg_range[0] = 0;
8196 zone_cg_range[1] = dd->ncg_home;
8197 comm->zone_ncg1[0] = dd->ncg_home;
8198 pos_cg = dd->ncg_home;
8200 nat_tot = dd->nat_home;
8202 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8204 dim = dd->dim[dim_ind];
8205 cd = &comm->cd[dim_ind];
8207 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8209 /* No pbc in this dimension, the first node should not comm. */
8217 v_d = ddbox->v[dim];
8218 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
8220 cd->bInPlace = TRUE;
8221 for (p = 0; p < cd->np; p++)
8223 /* Only atoms communicated in the first pulse are used
8224 * for multi-body bonded interactions or for bBondComm.
8226 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8231 for (zone = 0; zone < nzone_send; zone++)
8233 if (tric_dist[dim_ind] && dim_ind > 0)
8235 /* Determine slightly more optimized skew_fac's
8237 * This reduces the number of communicated atoms
8238 * by about 10% for 3D DD of rhombic dodecahedra.
8240 for (dimd = 0; dimd < dim; dimd++)
8242 sf2_round[dimd] = 1;
8243 if (ddbox->tric_dir[dimd])
8245 for (i = dd->dim[dimd]+1; i < DIM; i++)
8247 /* If we are shifted in dimension i
8248 * and the cell plane is tilted forward
8249 * in dimension i, skip this coupling.
8251 if (!(zones->shift[nzone+zone][i] &&
8252 ddbox->v[dimd][i][dimd] >= 0))
8255 sqr(ddbox->v[dimd][i][dimd]);
8258 sf2_round[dimd] = 1/sf2_round[dimd];
8263 zonei = zone_perm[dim_ind][zone];
8266 /* Here we permutate the zones to obtain a convenient order
8267 * for neighbor searching
8269 cg0 = zone_cg_range[zonei];
8270 cg1 = zone_cg_range[zonei+1];
8274 /* Look only at the cg's received in the previous grid pulse
8276 cg1 = zone_cg_range[nzone+zone+1];
8277 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8280 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8281 for (th = 0; th < comm->nth; th++)
8283 gmx_domdec_ind_t *ind_p;
8284 int **ibuf_p, *ibuf_nalloc_p;
8286 int *nsend_p, *nat_p;
8292 /* Thread 0 writes in the comm buffers */
8294 ibuf_p = &comm->buf_int;
8295 ibuf_nalloc_p = &comm->nalloc_int;
8296 vbuf_p = &comm->vbuf;
8299 nsend_zone_p = &ind->nsend[zone];
8303 /* Other threads write into temp buffers */
8304 ind_p = &comm->dth[th].ind;
8305 ibuf_p = &comm->dth[th].ibuf;
8306 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8307 vbuf_p = &comm->dth[th].vbuf;
8308 nsend_p = &comm->dth[th].nsend;
8309 nat_p = &comm->dth[th].nat;
8310 nsend_zone_p = &comm->dth[th].nsend_zone;
8312 comm->dth[th].nsend = 0;
8313 comm->dth[th].nat = 0;
8314 comm->dth[th].nsend_zone = 0;
8324 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8325 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8328 /* Get the cg's for this pulse in this zone */
8329 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8331 dim, dim_ind, dim0, dim1, dim2,
8334 normal, skew_fac2_d, skew_fac_01,
8335 v_d, v_0, v_1, &corners, sf2_round,
8336 bDistBonded, bBondComm,
8340 ibuf_p, ibuf_nalloc_p,
8346 /* Append data of threads>=1 to the communication buffers */
8347 for (th = 1; th < comm->nth; th++)
8349 dd_comm_setup_work_t *dth;
8352 dth = &comm->dth[th];
8354 ns1 = nsend + dth->nsend_zone;
8355 if (ns1 > ind->nalloc)
8357 ind->nalloc = over_alloc_dd(ns1);
8358 srenew(ind->index, ind->nalloc);
8360 if (ns1 > comm->nalloc_int)
8362 comm->nalloc_int = over_alloc_dd(ns1);
8363 srenew(comm->buf_int, comm->nalloc_int);
8365 if (ns1 > comm->vbuf.nalloc)
8367 comm->vbuf.nalloc = over_alloc_dd(ns1);
8368 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8371 for (i = 0; i < dth->nsend_zone; i++)
8373 ind->index[nsend] = dth->ind.index[i];
8374 comm->buf_int[nsend] = dth->ibuf[i];
8375 copy_rvec(dth->vbuf.v[i],
8376 comm->vbuf.v[nsend]);
8380 ind->nsend[zone] += dth->nsend_zone;
8383 /* Clear the counts in case we do not have pbc */
8384 for (zone = nzone_send; zone < nzone; zone++)
8386 ind->nsend[zone] = 0;
8388 ind->nsend[nzone] = nsend;
8389 ind->nsend[nzone+1] = nat;
8390 /* Communicate the number of cg's and atoms to receive */
8391 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8392 ind->nsend, nzone+2,
8393 ind->nrecv, nzone+2);
8395 /* The rvec buffer is also required for atom buffers of size nsend
8396 * in dd_move_x and dd_move_f.
8398 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8402 /* We can receive in place if only the last zone is not empty */
8403 for (zone = 0; zone < nzone-1; zone++)
8405 if (ind->nrecv[zone] > 0)
8407 cd->bInPlace = FALSE;
8412 /* The int buffer is only required here for the cg indices */
8413 if (ind->nrecv[nzone] > comm->nalloc_int2)
8415 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8416 srenew(comm->buf_int2, comm->nalloc_int2);
8418 /* The rvec buffer is also required for atom buffers
8419 * of size nrecv in dd_move_x and dd_move_f.
8421 i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8422 vec_rvec_check_alloc(&comm->vbuf2, i);
8426 /* Make space for the global cg indices */
8427 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8428 || dd->cg_nalloc == 0)
8430 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8431 srenew(index_gl, dd->cg_nalloc);
8432 srenew(cgindex, dd->cg_nalloc+1);
8434 /* Communicate the global cg indices */
8437 recv_i = index_gl + pos_cg;
8441 recv_i = comm->buf_int2;
8443 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8444 comm->buf_int, nsend,
8445 recv_i, ind->nrecv[nzone]);
8447 /* Make space for cg_cm */
8448 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8449 if (fr->cutoff_scheme == ecutsGROUP)
8457 /* Communicate cg_cm */
8460 recv_vr = cg_cm + pos_cg;
8464 recv_vr = comm->vbuf2.v;
8466 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8467 comm->vbuf.v, nsend,
8468 recv_vr, ind->nrecv[nzone]);
8470 /* Make the charge group index */
8473 zone = (p == 0 ? 0 : nzone - 1);
8474 while (zone < nzone)
8476 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8478 cg_gl = index_gl[pos_cg];
8479 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8480 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8481 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8484 /* Update the charge group presence,
8485 * so we can use it in the next pass of the loop.
8487 comm->bLocalCG[cg_gl] = TRUE;
8493 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8496 zone_cg_range[nzone+zone] = pos_cg;
8501 /* This part of the code is never executed with bBondComm. */
8502 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8503 index_gl, recv_i, cg_cm, recv_vr,
8504 cgindex, fr->cginfo_mb, fr->cginfo);
8505 pos_cg += ind->nrecv[nzone];
8507 nat_tot += ind->nrecv[nzone+1];
8511 /* Store the atom block for easy copying of communication buffers */
8512 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8516 dd->index_gl = index_gl;
8517 dd->cgindex = cgindex;
8519 dd->ncg_tot = zone_cg_range[zones->n];
8520 dd->nat_tot = nat_tot;
8521 comm->nat[ddnatHOME] = dd->nat_home;
8522 for (i = ddnatZONE; i < ddnatNR; i++)
8524 comm->nat[i] = dd->nat_tot;
8529 /* We don't need to update cginfo, since that was alrady done above.
8530 * So we pass NULL for the forcerec.
8532 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8533 NULL, comm->bLocalCG);
8538 fprintf(debug, "Finished setting up DD communication, zones:");
8539 for (c = 0; c < zones->n; c++)
8541 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8543 fprintf(debug, "\n");
8547 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8551 for (c = 0; c < zones->nizone; c++)
8553 zones->izone[c].cg1 = zones->cg_range[c+1];
8554 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8555 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8559 static void set_zones_size(gmx_domdec_t *dd,
8560 matrix box, const gmx_ddbox_t *ddbox,
8561 int zone_start, int zone_end)
8563 gmx_domdec_comm_t *comm;
8564 gmx_domdec_zones_t *zones;
8566 int z, zi, zj0, zj1, d, dim;
8569 real size_j, add_tric;
8574 zones = &comm->zones;
8576 /* Do we need to determine extra distances for multi-body bondeds? */
8577 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8579 for (z = zone_start; z < zone_end; z++)
8581 /* Copy cell limits to zone limits.
8582 * Valid for non-DD dims and non-shifted dims.
8584 copy_rvec(comm->cell_x0, zones->size[z].x0);
8585 copy_rvec(comm->cell_x1, zones->size[z].x1);
8588 for (d = 0; d < dd->ndim; d++)
8592 for (z = 0; z < zones->n; z++)
8594 /* With a staggered grid we have different sizes
8595 * for non-shifted dimensions.
8597 if (dd->bGridJump && zones->shift[z][dim] == 0)
8601 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8602 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8606 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8607 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8613 rcmbs = comm->cutoff_mbody;
8614 if (ddbox->tric_dir[dim])
8616 rcs /= ddbox->skew_fac[dim];
8617 rcmbs /= ddbox->skew_fac[dim];
8620 /* Set the lower limit for the shifted zone dimensions */
8621 for (z = zone_start; z < zone_end; z++)
8623 if (zones->shift[z][dim] > 0)
8626 if (!dd->bGridJump || d == 0)
8628 zones->size[z].x0[dim] = comm->cell_x1[dim];
8629 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8633 /* Here we take the lower limit of the zone from
8634 * the lowest domain of the zone below.
8638 zones->size[z].x0[dim] =
8639 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8645 zones->size[z].x0[dim] =
8646 zones->size[zone_perm[2][z-4]].x0[dim];
8650 zones->size[z].x0[dim] =
8651 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8654 /* A temporary limit, is updated below */
8655 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8659 for (zi = 0; zi < zones->nizone; zi++)
8661 if (zones->shift[zi][dim] == 0)
8663 /* This takes the whole zone into account.
8664 * With multiple pulses this will lead
8665 * to a larger zone then strictly necessary.
8667 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8668 zones->size[zi].x1[dim]+rcmbs);
8676 /* Loop over the i-zones to set the upper limit of each
8679 for (zi = 0; zi < zones->nizone; zi++)
8681 if (zones->shift[zi][dim] == 0)
8683 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8685 if (zones->shift[z][dim] > 0)
8687 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8688 zones->size[zi].x1[dim]+rcs);
8695 for (z = zone_start; z < zone_end; z++)
8697 /* Initialization only required to keep the compiler happy */
8698 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8701 /* To determine the bounding box for a zone we need to find
8702 * the extreme corners of 4, 2 or 1 corners.
8704 nc = 1 << (ddbox->npbcdim - 1);
8706 for (c = 0; c < nc; c++)
8708 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8712 corner[YY] = zones->size[z].x0[YY];
8716 corner[YY] = zones->size[z].x1[YY];
8720 corner[ZZ] = zones->size[z].x0[ZZ];
8724 corner[ZZ] = zones->size[z].x1[ZZ];
8726 if (dd->ndim == 1 && box[ZZ][YY] != 0)
8728 /* With 1D domain decomposition the cg's are not in
8729 * the triclinic box, but triclinic x-y and rectangular y-z.
8730 * Shift y back, so it will later end up at 0.
8732 corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
8734 /* Apply the triclinic couplings */
8735 assert(ddbox->npbcdim <= DIM);
8736 for (i = YY; i < ddbox->npbcdim; i++)
8738 for (j = XX; j < i; j++)
8740 corner[j] += corner[i]*box[i][j]/box[i][i];
8745 copy_rvec(corner, corner_min);
8746 copy_rvec(corner, corner_max);
8750 for (i = 0; i < DIM; i++)
8752 corner_min[i] = min(corner_min[i], corner[i]);
8753 corner_max[i] = max(corner_max[i], corner[i]);
8757 /* Copy the extreme cornes without offset along x */
8758 for (i = 0; i < DIM; i++)
8760 zones->size[z].bb_x0[i] = corner_min[i];
8761 zones->size[z].bb_x1[i] = corner_max[i];
8763 /* Add the offset along x */
8764 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8765 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8768 if (zone_start == 0)
8771 for (dim = 0; dim < DIM; dim++)
8773 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8775 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
8780 for (z = zone_start; z < zone_end; z++)
8782 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8784 zones->size[z].x0[XX], zones->size[z].x1[XX],
8785 zones->size[z].x0[YY], zones->size[z].x1[YY],
8786 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8787 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8789 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8790 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8791 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8796 static int comp_cgsort(const void *a, const void *b)
8800 gmx_cgsort_t *cga, *cgb;
8801 cga = (gmx_cgsort_t *)a;
8802 cgb = (gmx_cgsort_t *)b;
8804 comp = cga->nsc - cgb->nsc;
8807 comp = cga->ind_gl - cgb->ind_gl;
8813 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8818 /* Order the data */
8819 for (i = 0; i < n; i++)
8821 buf[i] = a[sort[i].ind];
8824 /* Copy back to the original array */
8825 for (i = 0; i < n; i++)
8831 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8836 /* Order the data */
8837 for (i = 0; i < n; i++)
8839 copy_rvec(v[sort[i].ind], buf[i]);
8842 /* Copy back to the original array */
8843 for (i = 0; i < n; i++)
8845 copy_rvec(buf[i], v[i]);
8849 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8852 int a, atot, cg, cg0, cg1, i;
8854 if (cgindex == NULL)
8856 /* Avoid the useless loop of the atoms within a cg */
8857 order_vec_cg(ncg, sort, v, buf);
8862 /* Order the data */
8864 for (cg = 0; cg < ncg; cg++)
8866 cg0 = cgindex[sort[cg].ind];
8867 cg1 = cgindex[sort[cg].ind+1];
8868 for (i = cg0; i < cg1; i++)
8870 copy_rvec(v[i], buf[a]);
8876 /* Copy back to the original array */
8877 for (a = 0; a < atot; a++)
8879 copy_rvec(buf[a], v[a]);
8883 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8884 int nsort_new, gmx_cgsort_t *sort_new,
8885 gmx_cgsort_t *sort1)
8889 /* The new indices are not very ordered, so we qsort them */
8890 gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8892 /* sort2 is already ordered, so now we can merge the two arrays */
8896 while (i2 < nsort2 || i_new < nsort_new)
8900 sort1[i1++] = sort_new[i_new++];
8902 else if (i_new == nsort_new)
8904 sort1[i1++] = sort2[i2++];
8906 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8907 (sort2[i2].nsc == sort_new[i_new].nsc &&
8908 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8910 sort1[i1++] = sort2[i2++];
8914 sort1[i1++] = sort_new[i_new++];
8919 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8921 gmx_domdec_sort_t *sort;
8922 gmx_cgsort_t *cgsort, *sort_i;
8923 int ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
8924 int sort_last, sort_skip;
8926 sort = dd->comm->sort;
8928 a = fr->ns.grid->cell_index;
8930 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns.grid->ncells;
8932 if (ncg_home_old >= 0)
8934 /* The charge groups that remained in the same ns grid cell
8935 * are completely ordered. So we can sort efficiently by sorting
8936 * the charge groups that did move into the stationary list.
8941 for (i = 0; i < dd->ncg_home; i++)
8943 /* Check if this cg did not move to another node */
8946 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8948 /* This cg is new on this node or moved ns grid cell */
8949 if (nsort_new >= sort->sort_new_nalloc)
8951 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8952 srenew(sort->sort_new, sort->sort_new_nalloc);
8954 sort_i = &(sort->sort_new[nsort_new++]);
8958 /* This cg did not move */
8959 sort_i = &(sort->sort2[nsort2++]);
8961 /* Sort on the ns grid cell indices
8962 * and the global topology index.
8963 * index_gl is irrelevant with cell ns,
8964 * but we set it here anyhow to avoid a conditional.
8967 sort_i->ind_gl = dd->index_gl[i];
8974 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8977 /* Sort efficiently */
8978 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8983 cgsort = sort->sort;
8985 for (i = 0; i < dd->ncg_home; i++)
8987 /* Sort on the ns grid cell indices
8988 * and the global topology index
8990 cgsort[i].nsc = a[i];
8991 cgsort[i].ind_gl = dd->index_gl[i];
8993 if (cgsort[i].nsc < moved)
9000 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
9002 /* Determine the order of the charge groups using qsort */
9003 gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
9009 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
9012 int ncg_new, i, *a, na;
9014 sort = dd->comm->sort->sort;
9016 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
9019 for (i = 0; i < na; i++)
9023 sort[ncg_new].ind = a[i];
9031 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
9034 gmx_domdec_sort_t *sort;
9035 gmx_cgsort_t *cgsort, *sort_i;
9037 int ncg_new, i, *ibuf, cgsize;
9040 sort = dd->comm->sort;
9042 if (dd->ncg_home > sort->sort_nalloc)
9044 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
9045 srenew(sort->sort, sort->sort_nalloc);
9046 srenew(sort->sort2, sort->sort_nalloc);
9048 cgsort = sort->sort;
9050 switch (fr->cutoff_scheme)
9053 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
9056 ncg_new = dd_sort_order_nbnxn(dd, fr);
9059 gmx_incons("unimplemented");
9063 /* We alloc with the old size, since cgindex is still old */
9064 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9065 vbuf = dd->comm->vbuf.v;
9069 cgindex = dd->cgindex;
9076 /* Remove the charge groups which are no longer at home here */
9077 dd->ncg_home = ncg_new;
9080 fprintf(debug, "Set the new home charge group count to %d\n",
9084 /* Reorder the state */
9085 for (i = 0; i < estNR; i++)
9087 if (EST_DISTR(i) && (state->flags & (1<<i)))
9092 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
9095 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
9098 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->sd_X, vbuf);
9101 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
9105 case estDISRE_INITF:
9106 case estDISRE_RM3TAV:
9107 case estORIRE_INITF:
9109 /* No ordering required */
9112 gmx_incons("Unknown state entry encountered in dd_sort_state");
9117 if (fr->cutoff_scheme == ecutsGROUP)
9120 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9123 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9125 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9126 srenew(sort->ibuf, sort->ibuf_nalloc);
9129 /* Reorder the global cg index */
9130 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9131 /* Reorder the cginfo */
9132 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9133 /* Rebuild the local cg index */
9137 for (i = 0; i < dd->ncg_home; i++)
9139 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9140 ibuf[i+1] = ibuf[i] + cgsize;
9142 for (i = 0; i < dd->ncg_home+1; i++)
9144 dd->cgindex[i] = ibuf[i];
9149 for (i = 0; i < dd->ncg_home+1; i++)
9154 /* Set the home atom number */
9155 dd->nat_home = dd->cgindex[dd->ncg_home];
9157 if (fr->cutoff_scheme == ecutsVERLET)
9159 /* The atoms are now exactly in grid order, update the grid order */
9160 nbnxn_set_atomorder(fr->nbv->nbs);
9164 /* Copy the sorted ns cell indices back to the ns grid struct */
9165 for (i = 0; i < dd->ncg_home; i++)
9167 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
9169 fr->ns.grid->nr = dd->ncg_home;
9173 static void add_dd_statistics(gmx_domdec_t *dd)
9175 gmx_domdec_comm_t *comm;
9180 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9182 comm->sum_nat[ddnat-ddnatZONE] +=
9183 comm->nat[ddnat] - comm->nat[ddnat-1];
9188 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9190 gmx_domdec_comm_t *comm;
9195 /* Reset all the statistics and counters for total run counting */
9196 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9198 comm->sum_nat[ddnat-ddnatZONE] = 0;
9202 comm->load_step = 0;
9205 clear_ivec(comm->load_lim);
9210 void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
9212 gmx_domdec_comm_t *comm;
9216 comm = cr->dd->comm;
9218 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9225 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9227 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9229 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9234 " av. #atoms communicated per step for force: %d x %.1f\n",
9238 if (cr->dd->vsite_comm)
9241 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9242 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9247 if (cr->dd->constraint_comm)
9250 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9251 1 + ir->nLincsIter, av);
9255 gmx_incons(" Unknown type for DD statistics");
9258 fprintf(fplog, "\n");
9260 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9262 print_dd_load_av(fplog, cr->dd);
9266 void dd_partition_system(FILE *fplog,
9269 gmx_bool bMasterState,
9271 t_state *state_global,
9272 gmx_mtop_t *top_global,
9274 t_state *state_local,
9277 gmx_localtop_t *top_local,
9280 gmx_shellfc_t shellfc,
9281 gmx_constr_t constr,
9283 gmx_wallcycle_t wcycle,
9287 gmx_domdec_comm_t *comm;
9288 gmx_ddbox_t ddbox = {0};
9290 gmx_int64_t step_pcoupl;
9291 rvec cell_ns_x0, cell_ns_x1;
9292 int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9293 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
9294 gmx_bool bRedist, bSortCG, bResortAll;
9295 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9302 bBoxChanged = (bMasterState || DEFORM(*ir));
9303 if (ir->epc != epcNO)
9305 /* With nstpcouple > 1 pressure coupling happens.
9306 * one step after calculating the pressure.
9307 * Box scaling happens at the end of the MD step,
9308 * after the DD partitioning.
9309 * We therefore have to do DLB in the first partitioning
9310 * after an MD step where P-coupling occured.
9311 * We need to determine the last step in which p-coupling occurred.
9312 * MRS -- need to validate this for vv?
9317 step_pcoupl = step - 1;
9321 step_pcoupl = ((step - 1)/n)*n + 1;
9323 if (step_pcoupl >= comm->partition_step)
9329 bNStGlobalComm = (step % nstglobalcomm == 0);
9331 if (!comm->bDynLoadBal)
9337 /* Should we do dynamic load balacing this step?
9338 * Since it requires (possibly expensive) global communication,
9339 * we might want to do DLB less frequently.
9341 if (bBoxChanged || ir->epc != epcNO)
9343 bDoDLB = bBoxChanged;
9347 bDoDLB = bNStGlobalComm;
9351 /* Check if we have recorded loads on the nodes */
9352 if (comm->bRecordLoad && dd_load_count(comm))
9354 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
9356 /* Check if we should use DLB at the second partitioning
9357 * and every 100 partitionings,
9358 * so the extra communication cost is negligible.
9360 n = max(100, nstglobalcomm);
9361 bCheckDLB = (comm->n_load_collect == 0 ||
9362 comm->n_load_have % n == n-1);
9369 /* Print load every nstlog, first and last step to the log file */
9370 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9371 comm->n_load_collect == 0 ||
9373 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9375 /* Avoid extra communication due to verbose screen output
9376 * when nstglobalcomm is set.
9378 if (bDoDLB || bLogLoad || bCheckDLB ||
9379 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9381 get_load_distribution(dd, wcycle);
9386 dd_print_load(fplog, dd, step-1);
9390 dd_print_load_verbose(dd);
9393 comm->n_load_collect++;
9397 /* Since the timings are node dependent, the master decides */
9401 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
9404 fprintf(debug, "step %s, imb loss %f\n",
9405 gmx_step_str(step, sbuf),
9406 dd_force_imb_perf_loss(dd));
9409 dd_bcast(dd, sizeof(bTurnOnDLB), &bTurnOnDLB);
9412 turn_on_dlb(fplog, cr, step);
9417 comm->n_load_have++;
9420 cgs_gl = &comm->cgs_gl;
9425 /* Clear the old state */
9426 clear_dd_indices(dd, 0, 0);
9429 set_ddbox(dd, bMasterState, cr, ir, state_global->box,
9430 TRUE, cgs_gl, state_global->x, &ddbox);
9432 get_cg_distribution(fplog, step, dd, cgs_gl,
9433 state_global->box, &ddbox, state_global->x);
9435 dd_distribute_state(dd, cgs_gl,
9436 state_global, state_local, f);
9438 dd_make_local_cgs(dd, &top_local->cgs);
9440 /* Ensure that we have space for the new distribution */
9441 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9443 if (fr->cutoff_scheme == ecutsGROUP)
9445 calc_cgcm(fplog, 0, dd->ncg_home,
9446 &top_local->cgs, state_local->x, fr->cg_cm);
9449 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9451 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9453 else if (state_local->ddp_count != dd->ddp_count)
9455 if (state_local->ddp_count > dd->ddp_count)
9457 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9460 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9462 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9465 /* Clear the old state */
9466 clear_dd_indices(dd, 0, 0);
9468 /* Build the new indices */
9469 rebuild_cgindex(dd, cgs_gl->index, state_local);
9470 make_dd_indices(dd, cgs_gl->index, 0);
9471 ncgindex_set = dd->ncg_home;
9473 if (fr->cutoff_scheme == ecutsGROUP)
9475 /* Redetermine the cg COMs */
9476 calc_cgcm(fplog, 0, dd->ncg_home,
9477 &top_local->cgs, state_local->x, fr->cg_cm);
9480 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9482 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9484 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9485 TRUE, &top_local->cgs, state_local->x, &ddbox);
9487 bRedist = comm->bDynLoadBal;
9491 /* We have the full state, only redistribute the cgs */
9493 /* Clear the non-home indices */
9494 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9497 /* Avoid global communication for dim's without pbc and -gcom */
9498 if (!bNStGlobalComm)
9500 copy_rvec(comm->box0, ddbox.box0 );
9501 copy_rvec(comm->box_size, ddbox.box_size);
9503 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9504 bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
9509 /* For dim's without pbc and -gcom */
9510 copy_rvec(ddbox.box0, comm->box0 );
9511 copy_rvec(ddbox.box_size, comm->box_size);
9513 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9516 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9518 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9521 /* Check if we should sort the charge groups */
9522 if (comm->nstSortCG > 0)
9524 bSortCG = (bMasterState ||
9525 (bRedist && (step % comm->nstSortCG == 0)));
9532 ncg_home_old = dd->ncg_home;
9537 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9539 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9541 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9543 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9546 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9548 &comm->cell_x0, &comm->cell_x1,
9549 dd->ncg_home, fr->cg_cm,
9550 cell_ns_x0, cell_ns_x1, &grid_density);
9554 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9557 switch (fr->cutoff_scheme)
9560 copy_ivec(fr->ns.grid->n, ncells_old);
9561 grid_first(fplog, fr->ns.grid, dd, &ddbox,
9562 state_local->box, cell_ns_x0, cell_ns_x1,
9563 fr->rlistlong, grid_density);
9566 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9569 gmx_incons("unimplemented");
9571 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9572 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9576 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9578 /* Sort the state on charge group position.
9579 * This enables exact restarts from this step.
9580 * It also improves performance by about 15% with larger numbers
9581 * of atoms per node.
9584 /* Fill the ns grid with the home cell,
9585 * so we can sort with the indices.
9587 set_zones_ncg_home(dd);
9589 switch (fr->cutoff_scheme)
9592 set_zones_size(dd, state_local->box, &ddbox, 0, 1);
9594 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9596 comm->zones.size[0].bb_x0,
9597 comm->zones.size[0].bb_x1,
9599 comm->zones.dens_zone0,
9602 ncg_moved, bRedist ? comm->moved : NULL,
9603 fr->nbv->grp[eintLocal].kernel_type,
9604 fr->nbv->grp[eintLocal].nbat);
9606 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9609 fill_grid(&comm->zones, fr->ns.grid, dd->ncg_home,
9610 0, dd->ncg_home, fr->cg_cm);
9612 copy_ivec(fr->ns.grid->n, ncells_new);
9615 gmx_incons("unimplemented");
9618 bResortAll = bMasterState;
9620 /* Check if we can user the old order and ns grid cell indices
9621 * of the charge groups to sort the charge groups efficiently.
9623 if (ncells_new[XX] != ncells_old[XX] ||
9624 ncells_new[YY] != ncells_old[YY] ||
9625 ncells_new[ZZ] != ncells_old[ZZ])
9632 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9633 gmx_step_str(step, sbuf), dd->ncg_home);
9635 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9636 bResortAll ? -1 : ncg_home_old);
9637 /* Rebuild all the indices */
9638 ga2la_clear(dd->ga2la);
9641 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9644 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9646 /* Setup up the communication and communicate the coordinates */
9647 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9649 /* Set the indices */
9650 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9652 /* Set the charge group boundaries for neighbor searching */
9653 set_cg_boundaries(&comm->zones);
9655 if (fr->cutoff_scheme == ecutsVERLET)
9657 set_zones_size(dd, state_local->box, &ddbox,
9658 bSortCG ? 1 : 0, comm->zones.n);
9661 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9664 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9665 -1,state_local->x,state_local->box);
9668 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9670 /* Extract a local topology from the global topology */
9671 for (i = 0; i < dd->ndim; i++)
9673 np[dd->dim[i]] = comm->cd[i].np;
9675 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9676 comm->cellsize_min, np,
9678 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
9679 vsite, top_global, top_local);
9681 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9683 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9685 /* Set up the special atom communication */
9686 n = comm->nat[ddnatZONE];
9687 for (i = ddnatZONE+1; i < ddnatNR; i++)
9692 if (vsite && vsite->n_intercg_vsite)
9694 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9698 if (dd->bInterCGcons || dd->bInterCGsettles)
9700 /* Only for inter-cg constraints we need special code */
9701 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9702 constr, ir->nProjOrder,
9703 top_local->idef.il);
9707 gmx_incons("Unknown special atom type setup");
9712 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9714 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9716 /* Make space for the extra coordinates for virtual site
9717 * or constraint communication.
9719 state_local->natoms = comm->nat[ddnatNR-1];
9720 if (state_local->natoms > state_local->nalloc)
9722 dd_realloc_state(state_local, f, state_local->natoms);
9725 if (fr->bF_NoVirSum)
9727 if (vsite && vsite->n_intercg_vsite)
9729 nat_f_novirsum = comm->nat[ddnatVSITE];
9733 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9735 nat_f_novirsum = dd->nat_tot;
9739 nat_f_novirsum = dd->nat_home;
9748 /* Set the number of atoms required for the force calculation.
9749 * Forces need to be constrained when using a twin-range setup
9750 * or with energy minimization. For simple simulations we could
9751 * avoid some allocation, zeroing and copying, but this is
9752 * probably not worth the complications ande checking.
9754 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9755 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9757 /* We make the all mdatoms up to nat_tot_con.
9758 * We could save some work by only setting invmass
9759 * between nat_tot and nat_tot_con.
9761 /* This call also sets the new number of home particles to dd->nat_home */
9762 atoms2md(top_global, ir,
9763 comm->nat[ddnatCON], dd->gatindex, dd->nat_home, mdatoms);
9765 /* Now we have the charges we can sort the FE interactions */
9766 dd_sort_local_top(dd, mdatoms, top_local);
9770 /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
9771 split_vsites_over_threads(top_local->idef.il, top_local->idef.iparams,
9772 mdatoms, FALSE, vsite);
9777 /* Make the local shell stuff, currently no communication is done */
9778 make_local_shells(cr, mdatoms, shellfc);
9781 if (ir->implicit_solvent)
9783 make_local_gb(cr, fr->born, ir->gb_algorithm);
9786 setup_bonded_threading(fr, &top_local->idef);
9788 if (!(cr->duty & DUTY_PME))
9790 /* Send the charges and/or c6/sigmas to our PME only node */
9791 gmx_pme_send_parameters(cr,
9793 mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
9794 mdatoms->chargeA, mdatoms->chargeB,
9795 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
9796 mdatoms->sigmaA, mdatoms->sigmaB,
9797 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9802 set_constraints(constr, top_local, ir, mdatoms, cr);
9805 if (ir->ePull != epullNO)
9807 /* Update the local pull groups */
9808 dd_make_local_pull_groups(dd, ir->pull, mdatoms);
9813 /* Update the local rotation groups */
9814 dd_make_local_rotation_groups(dd, ir->rot);
9817 if (ir->eSwapCoords != eswapNO)
9819 /* Update the local groups needed for ion swapping */
9820 dd_make_local_swap_groups(dd, ir->swap);
9823 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
9824 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
9826 add_dd_statistics(dd);
9828 /* Make sure we only count the cycles for this DD partitioning */
9829 clear_dd_cycle_counts(dd);
9831 /* Because the order of the atoms might have changed since
9832 * the last vsite construction, we need to communicate the constructing
9833 * atom coordinates again (for spreading the forces this MD step).
9835 dd_move_x_vsites(dd, state_local->box, state_local->x);
9837 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9839 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9841 dd_move_x(dd, state_local->box, state_local->x);
9842 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9843 -1, state_local->x, state_local->box);
9846 /* Store the partitioning step */
9847 comm->partition_step = step;
9849 /* Increase the DD partitioning counter */
9851 /* The state currently matches this DD partitioning count, store it */
9852 state_local->ddp_count = dd->ddp_count;
9855 /* The DD master node knows the complete cg distribution,
9856 * store the count so we can possibly skip the cg info communication.
9858 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9861 if (comm->DD_debug > 0)
9863 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9864 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9865 "after partitioning");