2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
48 #include "gromacs/utility/smalloc.h"
49 #include "gmx_fatal.h"
50 #include "gmx_fatal_collective.h"
53 #include "domdec_network.h"
56 #include "chargegroup.h"
65 #include "mtop_util.h"
66 #include "gmx_ga2la.h"
68 #include "nbnxn_search.h"
70 #include "gmx_omp_nthreads.h"
71 #include "gpu_utils.h"
73 #include "gromacs/fileio/futil.h"
74 #include "gromacs/fileio/gmxfio.h"
75 #include "gromacs/fileio/pdbio.h"
76 #include "gromacs/timing/wallcycle.h"
77 #include "gromacs/utility/gmxmpi.h"
78 #include "gromacs/swap/swapcoords.h"
79 #include "gromacs/utility/qsort_threadsafe.h"
80 #include "gromacs/pulling/pull.h"
81 #include "gromacs/pulling/pull_rotation.h"
82 #include "gromacs/imd/imd.h"
84 #define DDRANK(dd, rank) (rank)
85 #define DDMASTERRANK(dd) (dd->masterrank)
87 typedef struct gmx_domdec_master
89 /* The cell boundaries */
91 /* The global charge group division */
92 int *ncg; /* Number of home charge groups for each node */
93 int *index; /* Index of nnodes+1 into cg */
94 int *cg; /* Global charge group index */
95 int *nat; /* Number of home atoms for each node. */
96 int *ibuf; /* Buffer for communication */
97 rvec *vbuf; /* Buffer for state scattering and gathering */
98 } gmx_domdec_master_t;
102 /* The numbers of charge groups to send and receive for each cell
103 * that requires communication, the last entry contains the total
104 * number of atoms that needs to be communicated.
106 int nsend[DD_MAXIZONE+2];
107 int nrecv[DD_MAXIZONE+2];
108 /* The charge groups to send */
111 /* The atom range for non-in-place communication */
112 int cell2at0[DD_MAXIZONE];
113 int cell2at1[DD_MAXIZONE];
118 int np; /* Number of grid pulses in this dimension */
119 int np_dlb; /* For dlb, for use with edlbAUTO */
120 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
122 gmx_bool bInPlace; /* Can we communicate in place? */
123 } gmx_domdec_comm_dim_t;
127 gmx_bool *bCellMin; /* Temp. var.: is this cell size at the limit */
128 real *cell_f; /* State var.: cell boundaries, box relative */
129 real *old_cell_f; /* Temp. var.: old cell size */
130 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
131 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
132 real *bound_min; /* Temp. var.: lower limit for cell boundary */
133 real *bound_max; /* Temp. var.: upper limit for cell boundary */
134 gmx_bool bLimited; /* State var.: is DLB limited in this dim and row */
135 real *buf_ncd; /* Temp. var. */
138 #define DD_NLOAD_MAX 9
140 /* Here floats are accurate enough, since these variables
141 * only influence the load balancing, not the actual MD results.
168 gmx_cgsort_t *sort_new;
180 /* This enum determines the order of the coordinates.
181 * ddnatHOME and ddnatZONE should be first and second,
182 * the others can be ordered as wanted.
185 ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR
189 edlbAUTO, edlbNO, edlbYES, edlbNR
191 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
195 int dim; /* The dimension */
196 gmx_bool dim_match; /* Tells if DD and PME dims match */
197 int nslab; /* The number of PME slabs in this dimension */
198 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
199 int *pp_min; /* The minimum pp node location, size nslab */
200 int *pp_max; /* The maximum pp node location,size nslab */
201 int maxshift; /* The maximum shift for coordinate redistribution in PME */
206 real min0; /* The minimum bottom of this zone */
207 real max1; /* The maximum top of this zone */
208 real min1; /* The minimum top of this zone */
209 real mch0; /* The maximum bottom communicaton height for this zone */
210 real mch1; /* The maximum top communicaton height for this zone */
211 real p1_0; /* The bottom value of the first cell in this zone */
212 real p1_1; /* The top value of the first cell in this zone */
217 gmx_domdec_ind_t ind;
224 } dd_comm_setup_work_t;
226 typedef struct gmx_domdec_comm
228 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
229 * unless stated otherwise.
232 /* The number of decomposition dimensions for PME, 0: no PME */
234 /* The number of nodes doing PME (PP/PME or only PME) */
238 /* The communication setup including the PME only nodes */
239 gmx_bool bCartesianPP_PME;
242 int *pmenodes; /* size npmenodes */
243 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
244 * but with bCartesianPP_PME */
245 gmx_ddpme_t ddpme[2];
247 /* The DD particle-particle nodes only */
248 gmx_bool bCartesianPP;
249 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
251 /* The global charge groups */
254 /* Should we sort the cgs */
256 gmx_domdec_sort_t *sort;
258 /* Are there charge groups? */
261 /* Are there bonded and multi-body interactions between charge groups? */
262 gmx_bool bInterCGBondeds;
263 gmx_bool bInterCGMultiBody;
265 /* Data for the optional bonded interaction atom communication range */
272 /* Are we actually using DLB? */
273 gmx_bool bDynLoadBal;
275 /* Cell sizes for static load balancing, first index cartesian */
278 /* The width of the communicated boundaries */
281 /* The minimum cell size (including triclinic correction) */
283 /* For dlb, for use with edlbAUTO */
284 rvec cellsize_min_dlb;
285 /* The lower limit for the DD cell size with DLB */
287 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
288 gmx_bool bVacDLBNoLimit;
290 /* With PME load balancing we set limits on DLB */
291 gmx_bool bPMELoadBalDLBLimits;
292 /* DLB needs to take into account that we want to allow this maximum
293 * cut-off (for PME load balancing), this could limit cell boundaries.
295 real PMELoadBal_max_cutoff;
297 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
299 /* box0 and box_size are required with dim's without pbc and -gcom */
303 /* The cell boundaries */
307 /* The old location of the cell boundaries, to check cg displacements */
311 /* The communication setup and charge group boundaries for the zones */
312 gmx_domdec_zones_t zones;
314 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
315 * cell boundaries of neighboring cells for dynamic load balancing.
317 gmx_ddzone_t zone_d1[2];
318 gmx_ddzone_t zone_d2[2][2];
320 /* The coordinate/force communication setup and indices */
321 gmx_domdec_comm_dim_t cd[DIM];
322 /* The maximum number of cells to communicate with in one dimension */
325 /* Which cg distribution is stored on the master node */
326 int master_cg_ddp_count;
328 /* The number of cg's received from the direct neighbors */
329 int zone_ncg1[DD_MAXZONE];
331 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
334 /* Array for signalling if atoms have moved to another domain */
338 /* Communication buffer for general use */
342 /* Communication buffer for general use */
345 /* Temporary storage for thread parallel communication setup */
347 dd_comm_setup_work_t *dth;
349 /* Communication buffers only used with multiple grid pulses */
354 /* Communication buffers for local redistribution */
356 int cggl_flag_nalloc[DIM*2];
358 int cgcm_state_nalloc[DIM*2];
360 /* Cell sizes for dynamic load balancing */
361 gmx_domdec_root_t **root;
365 real cell_f_max0[DIM];
366 real cell_f_min1[DIM];
368 /* Stuff for load communication */
369 gmx_bool bRecordLoad;
370 gmx_domdec_load_t *load;
371 int nrank_gpu_shared;
373 MPI_Comm *mpi_comm_load;
374 MPI_Comm mpi_comm_gpu_shared;
377 /* Maximum DLB scaling per load balancing step in percent */
381 float cycl[ddCyclNr];
382 int cycl_n[ddCyclNr];
383 float cycl_max[ddCyclNr];
384 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
388 /* Have often have did we have load measurements */
390 /* Have often have we collected the load measurements */
394 double sum_nat[ddnatNR-ddnatZONE];
404 /* The last partition step */
405 gmx_int64_t partition_step;
413 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
416 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
417 #define DD_FLAG_NRCG 65535
418 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
419 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
421 /* Zone permutation required to obtain consecutive charge groups
422 * for neighbor searching.
424 static const int zone_perm[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
426 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
427 * components see only j zones with that component 0.
430 /* The DD zone order */
431 static const ivec dd_zo[DD_MAXZONE] =
432 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
437 static const ivec dd_zp3[dd_zp3n] = {{0, 0, 8}, {1, 3, 6}, {2, 5, 6}, {3, 5, 7}};
442 static const ivec dd_zp2[dd_zp2n] = {{0, 0, 4}, {1, 3, 4}};
447 static const ivec dd_zp1[dd_zp1n] = {{0, 0, 2}};
449 /* Factors used to avoid problems due to rounding issues */
450 #define DD_CELL_MARGIN 1.0001
451 #define DD_CELL_MARGIN2 1.00005
452 /* Factor to account for pressure scaling during nstlist steps */
453 #define DD_PRES_SCALE_MARGIN 1.02
455 /* Turn on DLB when the load imbalance causes this amount of total loss.
456 * There is a bit of overhead with DLB and it's difficult to achieve
457 * a load imbalance of less than 2% with DLB.
459 #define DD_PERF_LOSS_DLB_ON 0.02
461 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
462 #define DD_PERF_LOSS_WARN 0.05
464 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
466 /* Use separate MPI send and receive commands
467 * when nnodes <= GMX_DD_NNODES_SENDRECV.
468 * This saves memory (and some copying for small nnodes).
469 * For high parallelization scatter and gather calls are used.
471 #define GMX_DD_NNODES_SENDRECV 4
475 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
477 static void index2xyz(ivec nc,int ind,ivec xyz)
479 xyz[XX] = ind % nc[XX];
480 xyz[YY] = (ind / nc[XX]) % nc[YY];
481 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
485 /* This order is required to minimize the coordinate communication in PME
486 * which uses decomposition in the x direction.
488 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
490 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
492 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
493 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
494 xyz[ZZ] = ind % nc[ZZ];
497 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
502 ddindex = dd_index(dd->nc, c);
503 if (dd->comm->bCartesianPP_PME)
505 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
507 else if (dd->comm->bCartesianPP)
510 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
521 static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox, t_inputrec *ir)
523 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
526 int ddglatnr(gmx_domdec_t *dd, int i)
536 if (i >= dd->comm->nat[ddnatNR-1])
538 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
540 atnr = dd->gatindex[i] + 1;
546 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
548 return &dd->comm->cgs_gl;
551 static void vec_rvec_init(vec_rvec_t *v)
557 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
561 v->nalloc = over_alloc_dd(n);
562 srenew(v->v, v->nalloc);
566 void dd_store_state(gmx_domdec_t *dd, t_state *state)
570 if (state->ddp_count != dd->ddp_count)
572 gmx_incons("The state does not the domain decomposition state");
575 state->ncg_gl = dd->ncg_home;
576 if (state->ncg_gl > state->cg_gl_nalloc)
578 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
579 srenew(state->cg_gl, state->cg_gl_nalloc);
581 for (i = 0; i < state->ncg_gl; i++)
583 state->cg_gl[i] = dd->index_gl[i];
586 state->ddp_count_cg_gl = dd->ddp_count;
589 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
591 return &dd->comm->zones;
594 void dd_get_ns_ranges(gmx_domdec_t *dd, int icg,
595 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
597 gmx_domdec_zones_t *zones;
600 zones = &dd->comm->zones;
603 while (icg >= zones->izone[izone].cg1)
612 else if (izone < zones->nizone)
614 *jcg0 = zones->izone[izone].jcg0;
618 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
619 icg, izone, zones->nizone);
622 *jcg1 = zones->izone[izone].jcg1;
624 for (d = 0; d < dd->ndim; d++)
627 shift0[dim] = zones->izone[izone].shift0[dim];
628 shift1[dim] = zones->izone[izone].shift1[dim];
629 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
631 /* A conservative approach, this can be optimized */
638 int dd_natoms_vsite(gmx_domdec_t *dd)
640 return dd->comm->nat[ddnatVSITE];
643 void dd_get_constraint_range(gmx_domdec_t *dd, int *at_start, int *at_end)
645 *at_start = dd->comm->nat[ddnatCON-1];
646 *at_end = dd->comm->nat[ddnatCON];
649 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
651 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
652 int *index, *cgindex;
653 gmx_domdec_comm_t *comm;
654 gmx_domdec_comm_dim_t *cd;
655 gmx_domdec_ind_t *ind;
656 rvec shift = {0, 0, 0}, *buf, *rbuf;
657 gmx_bool bPBC, bScrew;
661 cgindex = dd->cgindex;
666 nat_tot = dd->nat_home;
667 for (d = 0; d < dd->ndim; d++)
669 bPBC = (dd->ci[dd->dim[d]] == 0);
670 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
673 copy_rvec(box[dd->dim[d]], shift);
676 for (p = 0; p < cd->np; p++)
683 for (i = 0; i < ind->nsend[nzone]; i++)
685 at0 = cgindex[index[i]];
686 at1 = cgindex[index[i]+1];
687 for (j = at0; j < at1; j++)
689 copy_rvec(x[j], buf[n]);
696 for (i = 0; i < ind->nsend[nzone]; i++)
698 at0 = cgindex[index[i]];
699 at1 = cgindex[index[i]+1];
700 for (j = at0; j < at1; j++)
702 /* We need to shift the coordinates */
703 rvec_add(x[j], shift, buf[n]);
710 for (i = 0; i < ind->nsend[nzone]; i++)
712 at0 = cgindex[index[i]];
713 at1 = cgindex[index[i]+1];
714 for (j = at0; j < at1; j++)
717 buf[n][XX] = x[j][XX] + shift[XX];
719 * This operation requires a special shift force
720 * treatment, which is performed in calc_vir.
722 buf[n][YY] = box[YY][YY] - x[j][YY];
723 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
735 rbuf = comm->vbuf2.v;
737 /* Send and receive the coordinates */
738 dd_sendrecv_rvec(dd, d, dddirBackward,
739 buf, ind->nsend[nzone+1],
740 rbuf, ind->nrecv[nzone+1]);
744 for (zone = 0; zone < nzone; zone++)
746 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
748 copy_rvec(rbuf[j], x[i]);
753 nat_tot += ind->nrecv[nzone+1];
759 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
761 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
762 int *index, *cgindex;
763 gmx_domdec_comm_t *comm;
764 gmx_domdec_comm_dim_t *cd;
765 gmx_domdec_ind_t *ind;
769 gmx_bool bPBC, bScrew;
773 cgindex = dd->cgindex;
778 nzone = comm->zones.n/2;
779 nat_tot = dd->nat_tot;
780 for (d = dd->ndim-1; d >= 0; d--)
782 bPBC = (dd->ci[dd->dim[d]] == 0);
783 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
784 if (fshift == NULL && !bScrew)
788 /* Determine which shift vector we need */
794 for (p = cd->np-1; p >= 0; p--)
797 nat_tot -= ind->nrecv[nzone+1];
804 sbuf = comm->vbuf2.v;
806 for (zone = 0; zone < nzone; zone++)
808 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
810 copy_rvec(f[i], sbuf[j]);
815 /* Communicate the forces */
816 dd_sendrecv_rvec(dd, d, dddirForward,
817 sbuf, ind->nrecv[nzone+1],
818 buf, ind->nsend[nzone+1]);
820 /* Add the received forces */
824 for (i = 0; i < ind->nsend[nzone]; i++)
826 at0 = cgindex[index[i]];
827 at1 = cgindex[index[i]+1];
828 for (j = at0; j < at1; j++)
830 rvec_inc(f[j], buf[n]);
837 for (i = 0; i < ind->nsend[nzone]; i++)
839 at0 = cgindex[index[i]];
840 at1 = cgindex[index[i]+1];
841 for (j = at0; j < at1; j++)
843 rvec_inc(f[j], buf[n]);
844 /* Add this force to the shift force */
845 rvec_inc(fshift[is], buf[n]);
852 for (i = 0; i < ind->nsend[nzone]; i++)
854 at0 = cgindex[index[i]];
855 at1 = cgindex[index[i]+1];
856 for (j = at0; j < at1; j++)
858 /* Rotate the force */
859 f[j][XX] += buf[n][XX];
860 f[j][YY] -= buf[n][YY];
861 f[j][ZZ] -= buf[n][ZZ];
864 /* Add this force to the shift force */
865 rvec_inc(fshift[is], buf[n]);
876 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
878 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
879 int *index, *cgindex;
880 gmx_domdec_comm_t *comm;
881 gmx_domdec_comm_dim_t *cd;
882 gmx_domdec_ind_t *ind;
887 cgindex = dd->cgindex;
889 buf = &comm->vbuf.v[0][0];
892 nat_tot = dd->nat_home;
893 for (d = 0; d < dd->ndim; d++)
896 for (p = 0; p < cd->np; p++)
901 for (i = 0; i < ind->nsend[nzone]; i++)
903 at0 = cgindex[index[i]];
904 at1 = cgindex[index[i]+1];
905 for (j = at0; j < at1; j++)
918 rbuf = &comm->vbuf2.v[0][0];
920 /* Send and receive the coordinates */
921 dd_sendrecv_real(dd, d, dddirBackward,
922 buf, ind->nsend[nzone+1],
923 rbuf, ind->nrecv[nzone+1]);
927 for (zone = 0; zone < nzone; zone++)
929 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
936 nat_tot += ind->nrecv[nzone+1];
942 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
944 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
945 int *index, *cgindex;
946 gmx_domdec_comm_t *comm;
947 gmx_domdec_comm_dim_t *cd;
948 gmx_domdec_ind_t *ind;
953 cgindex = dd->cgindex;
955 buf = &comm->vbuf.v[0][0];
958 nzone = comm->zones.n/2;
959 nat_tot = dd->nat_tot;
960 for (d = dd->ndim-1; d >= 0; d--)
963 for (p = cd->np-1; p >= 0; p--)
966 nat_tot -= ind->nrecv[nzone+1];
973 sbuf = &comm->vbuf2.v[0][0];
975 for (zone = 0; zone < nzone; zone++)
977 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
984 /* Communicate the forces */
985 dd_sendrecv_real(dd, d, dddirForward,
986 sbuf, ind->nrecv[nzone+1],
987 buf, ind->nsend[nzone+1]);
989 /* Add the received forces */
991 for (i = 0; i < ind->nsend[nzone]; i++)
993 at0 = cgindex[index[i]];
994 at1 = cgindex[index[i]+1];
995 for (j = at0; j < at1; j++)
1006 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
1008 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
1010 zone->min0, zone->max1,
1011 zone->mch0, zone->mch0,
1012 zone->p1_0, zone->p1_1);
1016 #define DDZONECOMM_MAXZONE 5
1017 #define DDZONECOMM_BUFSIZE 3
1019 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
1020 int ddimind, int direction,
1021 gmx_ddzone_t *buf_s, int n_s,
1022 gmx_ddzone_t *buf_r, int n_r)
1024 #define ZBS DDZONECOMM_BUFSIZE
1025 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
1026 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
1029 for (i = 0; i < n_s; i++)
1031 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
1032 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
1033 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
1034 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
1035 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
1036 vbuf_s[i*ZBS+1][2] = 0;
1037 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
1038 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
1039 vbuf_s[i*ZBS+2][2] = 0;
1042 dd_sendrecv_rvec(dd, ddimind, direction,
1046 for (i = 0; i < n_r; i++)
1048 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
1049 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
1050 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
1051 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
1052 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
1053 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
1054 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
1060 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
1061 rvec cell_ns_x0, rvec cell_ns_x1)
1063 int d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
1065 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
1066 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
1067 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
1068 rvec extr_s[2], extr_r[2];
1070 real dist_d, c = 0, det;
1071 gmx_domdec_comm_t *comm;
1072 gmx_bool bPBC, bUse;
1076 for (d = 1; d < dd->ndim; d++)
1079 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
1080 zp->min0 = cell_ns_x0[dim];
1081 zp->max1 = cell_ns_x1[dim];
1082 zp->min1 = cell_ns_x1[dim];
1083 zp->mch0 = cell_ns_x0[dim];
1084 zp->mch1 = cell_ns_x1[dim];
1085 zp->p1_0 = cell_ns_x0[dim];
1086 zp->p1_1 = cell_ns_x1[dim];
1089 for (d = dd->ndim-2; d >= 0; d--)
1092 bPBC = (dim < ddbox->npbcdim);
1094 /* Use an rvec to store two reals */
1095 extr_s[d][0] = comm->cell_f0[d+1];
1096 extr_s[d][1] = comm->cell_f1[d+1];
1097 extr_s[d][2] = comm->cell_f1[d+1];
1100 /* Store the extremes in the backward sending buffer,
1101 * so the get updated separately from the forward communication.
1103 for (d1 = d; d1 < dd->ndim-1; d1++)
1105 /* We invert the order to be able to use the same loop for buf_e */
1106 buf_s[pos].min0 = extr_s[d1][1];
1107 buf_s[pos].max1 = extr_s[d1][0];
1108 buf_s[pos].min1 = extr_s[d1][2];
1109 buf_s[pos].mch0 = 0;
1110 buf_s[pos].mch1 = 0;
1111 /* Store the cell corner of the dimension we communicate along */
1112 buf_s[pos].p1_0 = comm->cell_x0[dim];
1113 buf_s[pos].p1_1 = 0;
1117 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1120 if (dd->ndim == 3 && d == 0)
1122 buf_s[pos] = comm->zone_d2[0][1];
1124 buf_s[pos] = comm->zone_d1[0];
1128 /* We only need to communicate the extremes
1129 * in the forward direction
1131 npulse = comm->cd[d].np;
1134 /* Take the minimum to avoid double communication */
1135 npulse_min = min(npulse, dd->nc[dim]-1-npulse);
1139 /* Without PBC we should really not communicate over
1140 * the boundaries, but implementing that complicates
1141 * the communication setup and therefore we simply
1142 * do all communication, but ignore some data.
1144 npulse_min = npulse;
1146 for (p = 0; p < npulse_min; p++)
1148 /* Communicate the extremes forward */
1149 bUse = (bPBC || dd->ci[dim] > 0);
1151 dd_sendrecv_rvec(dd, d, dddirForward,
1152 extr_s+d, dd->ndim-d-1,
1153 extr_r+d, dd->ndim-d-1);
1157 for (d1 = d; d1 < dd->ndim-1; d1++)
1159 extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]);
1160 extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]);
1161 extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]);
1167 for (p = 0; p < npulse; p++)
1169 /* Communicate all the zone information backward */
1170 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1172 dd_sendrecv_ddzone(dd, d, dddirBackward,
1179 for (d1 = d+1; d1 < dd->ndim; d1++)
1181 /* Determine the decrease of maximum required
1182 * communication height along d1 due to the distance along d,
1183 * this avoids a lot of useless atom communication.
1185 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1187 if (ddbox->tric_dir[dim])
1189 /* c is the off-diagonal coupling between the cell planes
1190 * along directions d and d1.
1192 c = ddbox->v[dim][dd->dim[d1]][dim];
1198 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1201 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1205 /* A negative value signals out of range */
1211 /* Accumulate the extremes over all pulses */
1212 for (i = 0; i < buf_size; i++)
1216 buf_e[i] = buf_r[i];
1222 buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0);
1223 buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1);
1224 buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1);
1227 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1235 if (bUse && dh[d1] >= 0)
1237 buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
1238 buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
1241 /* Copy the received buffer to the send buffer,
1242 * to pass the data through with the next pulse.
1244 buf_s[i] = buf_r[i];
1246 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1247 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1249 /* Store the extremes */
1252 for (d1 = d; d1 < dd->ndim-1; d1++)
1254 extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0);
1255 extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1);
1256 extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1);
1260 if (d == 1 || (d == 0 && dd->ndim == 3))
1262 for (i = d; i < 2; i++)
1264 comm->zone_d2[1-d][i] = buf_e[pos];
1270 comm->zone_d1[1] = buf_e[pos];
1280 for (i = 0; i < 2; i++)
1284 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1286 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1287 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1293 for (i = 0; i < 2; i++)
1295 for (j = 0; j < 2; j++)
1299 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1301 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1302 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1306 for (d = 1; d < dd->ndim; d++)
1308 comm->cell_f_max0[d] = extr_s[d-1][0];
1309 comm->cell_f_min1[d] = extr_s[d-1][1];
1312 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1313 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1318 static void dd_collect_cg(gmx_domdec_t *dd,
1319 t_state *state_local)
1321 gmx_domdec_master_t *ma = NULL;
1322 int buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
1325 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1327 /* The master has the correct distribution */
1331 if (state_local->ddp_count == dd->ddp_count)
1333 ncg_home = dd->ncg_home;
1335 nat_home = dd->nat_home;
1337 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1339 cgs_gl = &dd->comm->cgs_gl;
1341 ncg_home = state_local->ncg_gl;
1342 cg = state_local->cg_gl;
1344 for (i = 0; i < ncg_home; i++)
1346 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1351 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1354 buf2[0] = dd->ncg_home;
1355 buf2[1] = dd->nat_home;
1365 /* Collect the charge group and atom counts on the master */
1366 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1371 for (i = 0; i < dd->nnodes; i++)
1373 ma->ncg[i] = ma->ibuf[2*i];
1374 ma->nat[i] = ma->ibuf[2*i+1];
1375 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1378 /* Make byte counts and indices */
1379 for (i = 0; i < dd->nnodes; i++)
1381 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1382 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1386 fprintf(debug, "Initial charge group distribution: ");
1387 for (i = 0; i < dd->nnodes; i++)
1389 fprintf(debug, " %d", ma->ncg[i]);
1391 fprintf(debug, "\n");
1395 /* Collect the charge group indices on the master */
1397 dd->ncg_home*sizeof(int), dd->index_gl,
1398 DDMASTER(dd) ? ma->ibuf : NULL,
1399 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1400 DDMASTER(dd) ? ma->cg : NULL);
1402 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1405 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1408 gmx_domdec_master_t *ma;
1409 int n, i, c, a, nalloc = 0;
1418 MPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1419 dd->rank, dd->mpi_comm_all);
1424 /* Copy the master coordinates to the global array */
1425 cgs_gl = &dd->comm->cgs_gl;
1427 n = DDMASTERRANK(dd);
1429 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1431 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1433 copy_rvec(lv[a++], v[c]);
1437 for (n = 0; n < dd->nnodes; n++)
1441 if (ma->nat[n] > nalloc)
1443 nalloc = over_alloc_dd(ma->nat[n]);
1444 srenew(buf, nalloc);
1447 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1448 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1451 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1453 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1455 copy_rvec(buf[a++], v[c]);
1464 static void get_commbuffer_counts(gmx_domdec_t *dd,
1465 int **counts, int **disps)
1467 gmx_domdec_master_t *ma;
1472 /* Make the rvec count and displacment arrays */
1474 *disps = ma->ibuf + dd->nnodes;
1475 for (n = 0; n < dd->nnodes; n++)
1477 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1478 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1482 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1485 gmx_domdec_master_t *ma;
1486 int *rcounts = NULL, *disps = NULL;
1495 get_commbuffer_counts(dd, &rcounts, &disps);
1500 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv, rcounts, disps, buf);
1504 cgs_gl = &dd->comm->cgs_gl;
1507 for (n = 0; n < dd->nnodes; n++)
1509 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1511 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1513 copy_rvec(buf[a++], v[c]);
1520 void dd_collect_vec(gmx_domdec_t *dd,
1521 t_state *state_local, rvec *lv, rvec *v)
1523 gmx_domdec_master_t *ma;
1524 int n, i, c, a, nalloc = 0;
1527 dd_collect_cg(dd, state_local);
1529 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1531 dd_collect_vec_sendrecv(dd, lv, v);
1535 dd_collect_vec_gatherv(dd, lv, v);
1540 void dd_collect_state(gmx_domdec_t *dd,
1541 t_state *state_local, t_state *state)
1545 nh = state->nhchainlength;
1549 for (i = 0; i < efptNR; i++)
1551 state->lambda[i] = state_local->lambda[i];
1553 state->fep_state = state_local->fep_state;
1554 state->veta = state_local->veta;
1555 state->vol0 = state_local->vol0;
1556 copy_mat(state_local->box, state->box);
1557 copy_mat(state_local->boxv, state->boxv);
1558 copy_mat(state_local->svir_prev, state->svir_prev);
1559 copy_mat(state_local->fvir_prev, state->fvir_prev);
1560 copy_mat(state_local->pres_prev, state->pres_prev);
1562 for (i = 0; i < state_local->ngtc; i++)
1564 for (j = 0; j < nh; j++)
1566 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1567 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1569 state->therm_integral[i] = state_local->therm_integral[i];
1571 for (i = 0; i < state_local->nnhpres; i++)
1573 for (j = 0; j < nh; j++)
1575 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1576 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1580 for (est = 0; est < estNR; est++)
1582 if (EST_DISTR(est) && (state_local->flags & (1<<est)))
1587 dd_collect_vec(dd, state_local, state_local->x, state->x);
1590 dd_collect_vec(dd, state_local, state_local->v, state->v);
1593 dd_collect_vec(dd, state_local, state_local->sd_X, state->sd_X);
1596 dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
1598 case estDISRE_INITF:
1599 case estDISRE_RM3TAV:
1600 case estORIRE_INITF:
1604 gmx_incons("Unknown state entry encountered in dd_collect_state");
1610 static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
1616 fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
1619 state->nalloc = over_alloc_dd(nalloc);
1621 for (est = 0; est < estNR; est++)
1623 if (EST_DISTR(est) && (state->flags & (1<<est)))
1628 srenew(state->x, state->nalloc);
1631 srenew(state->v, state->nalloc);
1634 srenew(state->sd_X, state->nalloc);
1637 srenew(state->cg_p, state->nalloc);
1639 case estDISRE_INITF:
1640 case estDISRE_RM3TAV:
1641 case estORIRE_INITF:
1643 /* No reallocation required */
1646 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1653 srenew(*f, state->nalloc);
1657 static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
1660 if (nalloc > fr->cg_nalloc)
1664 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
1666 fr->cg_nalloc = over_alloc_dd(nalloc);
1667 srenew(fr->cginfo, fr->cg_nalloc);
1668 if (fr->cutoff_scheme == ecutsGROUP)
1670 srenew(fr->cg_cm, fr->cg_nalloc);
1673 if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
1675 /* We don't use charge groups, we use x in state to set up
1676 * the atom communication.
1678 dd_realloc_state(state, f, nalloc);
1682 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1685 gmx_domdec_master_t *ma;
1686 int n, i, c, a, nalloc = 0;
1693 for (n = 0; n < dd->nnodes; n++)
1697 if (ma->nat[n] > nalloc)
1699 nalloc = over_alloc_dd(ma->nat[n]);
1700 srenew(buf, nalloc);
1702 /* Use lv as a temporary buffer */
1704 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1706 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1708 copy_rvec(v[c], buf[a++]);
1711 if (a != ma->nat[n])
1713 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1718 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1719 DDRANK(dd, n), n, dd->mpi_comm_all);
1724 n = DDMASTERRANK(dd);
1726 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1728 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1730 copy_rvec(v[c], lv[a++]);
1737 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1738 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1743 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1746 gmx_domdec_master_t *ma;
1747 int *scounts = NULL, *disps = NULL;
1748 int n, i, c, a, nalloc = 0;
1755 get_commbuffer_counts(dd, &scounts, &disps);
1759 for (n = 0; n < dd->nnodes; n++)
1761 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1763 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1765 copy_rvec(v[c], buf[a++]);
1771 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1774 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
1776 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1778 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1782 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1786 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1789 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1790 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1791 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1793 if (dfhist->nlambda > 0)
1795 int nlam = dfhist->nlambda;
1796 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1797 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1798 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1799 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1800 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1801 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1803 for (i = 0; i < nlam; i++)
1805 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1806 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1807 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1808 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1809 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1810 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1815 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1816 t_state *state, t_state *state_local,
1821 nh = state->nhchainlength;
1825 for (i = 0; i < efptNR; i++)
1827 state_local->lambda[i] = state->lambda[i];
1829 state_local->fep_state = state->fep_state;
1830 state_local->veta = state->veta;
1831 state_local->vol0 = state->vol0;
1832 copy_mat(state->box, state_local->box);
1833 copy_mat(state->box_rel, state_local->box_rel);
1834 copy_mat(state->boxv, state_local->boxv);
1835 copy_mat(state->svir_prev, state_local->svir_prev);
1836 copy_mat(state->fvir_prev, state_local->fvir_prev);
1837 copy_df_history(&state_local->dfhist, &state->dfhist);
1838 for (i = 0; i < state_local->ngtc; i++)
1840 for (j = 0; j < nh; j++)
1842 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1843 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1845 state_local->therm_integral[i] = state->therm_integral[i];
1847 for (i = 0; i < state_local->nnhpres; i++)
1849 for (j = 0; j < nh; j++)
1851 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1852 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1856 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
1857 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1858 dd_bcast(dd, sizeof(real), &state_local->veta);
1859 dd_bcast(dd, sizeof(real), &state_local->vol0);
1860 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1861 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1862 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1863 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1864 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1865 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
1866 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
1867 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
1868 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
1869 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
1871 /* communicate df_history -- required for restarting from checkpoint */
1872 dd_distribute_dfhist(dd, &state_local->dfhist);
1874 if (dd->nat_home > state_local->nalloc)
1876 dd_realloc_state(state_local, f, dd->nat_home);
1878 for (i = 0; i < estNR; i++)
1880 if (EST_DISTR(i) && (state_local->flags & (1<<i)))
1885 dd_distribute_vec(dd, cgs, state->x, state_local->x);
1888 dd_distribute_vec(dd, cgs, state->v, state_local->v);
1891 dd_distribute_vec(dd, cgs, state->sd_X, state_local->sd_X);
1894 dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
1896 case estDISRE_INITF:
1897 case estDISRE_RM3TAV:
1898 case estORIRE_INITF:
1900 /* Not implemented yet */
1903 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1909 static char dim2char(int dim)
1915 case XX: c = 'X'; break;
1916 case YY: c = 'Y'; break;
1917 case ZZ: c = 'Z'; break;
1918 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1924 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1925 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1927 rvec grid_s[2], *grid_r = NULL, cx, r;
1928 char fname[STRLEN], format[STRLEN], buf[22];
1930 int a, i, d, z, y, x;
1934 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1935 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1939 snew(grid_r, 2*dd->nnodes);
1942 dd_gather(dd, 2*sizeof(rvec), grid_s[0], DDMASTER(dd) ? grid_r[0] : NULL);
1946 for (d = 0; d < DIM; d++)
1948 for (i = 0; i < DIM; i++)
1956 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1958 tric[d][i] = box[i][d]/box[i][i];
1967 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1968 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
1969 out = gmx_fio_fopen(fname, "w");
1970 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1972 for (i = 0; i < dd->nnodes; i++)
1974 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1975 for (d = 0; d < DIM; d++)
1977 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1979 for (z = 0; z < 2; z++)
1981 for (y = 0; y < 2; y++)
1983 for (x = 0; x < 2; x++)
1985 cx[XX] = grid_r[i*2+x][XX];
1986 cx[YY] = grid_r[i*2+y][YY];
1987 cx[ZZ] = grid_r[i*2+z][ZZ];
1989 fprintf(out, format, "ATOM", a++, "CA", "GLY", ' ', 1+i,
1990 ' ', 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol);
1994 for (d = 0; d < DIM; d++)
1996 for (x = 0; x < 4; x++)
2000 case 0: y = 1 + i*8 + 2*x; break;
2001 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
2002 case 2: y = 1 + i*8 + x; break;
2004 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
2008 gmx_fio_fclose(out);
2013 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
2014 gmx_mtop_t *mtop, t_commrec *cr,
2015 int natoms, rvec x[], matrix box)
2017 char fname[STRLEN], format[STRLEN], format4[STRLEN], buf[22];
2019 int i, ii, resnr, c;
2020 char *atomname, *resname;
2027 natoms = dd->comm->nat[ddnatVSITE];
2030 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
2032 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
2033 sprintf(format4, "%s%s\n", get_pdbformat4(), "%6.2f%6.2f");
2035 out = gmx_fio_fopen(fname, "w");
2037 fprintf(out, "TITLE %s\n", title);
2038 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2039 for (i = 0; i < natoms; i++)
2041 ii = dd->gatindex[i];
2042 gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
2043 if (i < dd->comm->nat[ddnatZONE])
2046 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
2052 else if (i < dd->comm->nat[ddnatVSITE])
2054 b = dd->comm->zones.n;
2058 b = dd->comm->zones.n + 1;
2060 fprintf(out, strlen(atomname) < 4 ? format : format4,
2061 "ATOM", (ii+1)%100000,
2062 atomname, resname, ' ', resnr%10000, ' ',
2063 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b);
2065 fprintf(out, "TER\n");
2067 gmx_fio_fclose(out);
2070 real dd_cutoff_mbody(gmx_domdec_t *dd)
2072 gmx_domdec_comm_t *comm;
2079 if (comm->bInterCGBondeds)
2081 if (comm->cutoff_mbody > 0)
2083 r = comm->cutoff_mbody;
2087 /* cutoff_mbody=0 means we do not have DLB */
2088 r = comm->cellsize_min[dd->dim[0]];
2089 for (di = 1; di < dd->ndim; di++)
2091 r = min(r, comm->cellsize_min[dd->dim[di]]);
2093 if (comm->bBondComm)
2095 r = max(r, comm->cutoff_mbody);
2099 r = min(r, comm->cutoff);
2107 real dd_cutoff_twobody(gmx_domdec_t *dd)
2111 r_mb = dd_cutoff_mbody(dd);
2113 return max(dd->comm->cutoff, r_mb);
2117 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd, ivec coord, ivec coord_pme)
2121 nc = dd->nc[dd->comm->cartpmedim];
2122 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2123 copy_ivec(coord, coord_pme);
2124 coord_pme[dd->comm->cartpmedim] =
2125 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2128 static int low_ddindex2pmeindex(int ndd, int npme, int ddindex)
2130 /* Here we assign a PME node to communicate with this DD node
2131 * by assuming that the major index of both is x.
2132 * We add cr->npmenodes/2 to obtain an even distribution.
2134 return (ddindex*npme + npme/2)/ndd;
2137 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
2139 return low_ddindex2pmeindex(dd->nnodes, dd->comm->npmenodes, ddindex);
2142 static int cr_ddindex2pmeindex(const t_commrec *cr, int ddindex)
2144 return low_ddindex2pmeindex(cr->dd->nnodes, cr->npmenodes, ddindex);
2147 static int *dd_pmenodes(t_commrec *cr)
2152 snew(pmenodes, cr->npmenodes);
2154 for (i = 0; i < cr->dd->nnodes; i++)
2156 p0 = cr_ddindex2pmeindex(cr, i);
2157 p1 = cr_ddindex2pmeindex(cr, i+1);
2158 if (i+1 == cr->dd->nnodes || p1 > p0)
2162 fprintf(debug, "pmenode[%d] = %d\n", n, i+1+n);
2164 pmenodes[n] = i + 1 + n;
2172 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
2175 ivec coords, coords_pme, nc;
2180 if (dd->comm->bCartesian) {
2181 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2182 dd_coords2pmecoords(dd,coords,coords_pme);
2183 copy_ivec(dd->ntot,nc);
2184 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2185 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2187 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2189 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2195 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
2200 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
2202 gmx_domdec_comm_t *comm;
2204 int ddindex, nodeid = -1;
2206 comm = cr->dd->comm;
2211 if (comm->bCartesianPP_PME)
2214 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
2219 ddindex = dd_index(cr->dd->nc, coords);
2220 if (comm->bCartesianPP)
2222 nodeid = comm->ddindex2simnodeid[ddindex];
2228 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
2240 static int dd_simnode2pmenode(t_commrec *cr, int sim_nodeid)
2243 gmx_domdec_comm_t *comm;
2244 ivec coord, coord_pme;
2251 /* This assumes a uniform x domain decomposition grid cell size */
2252 if (comm->bCartesianPP_PME)
2255 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
2256 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2258 /* This is a PP node */
2259 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2260 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
2264 else if (comm->bCartesianPP)
2266 if (sim_nodeid < dd->nnodes)
2268 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2273 /* This assumes DD cells with identical x coordinates
2274 * are numbered sequentially.
2276 if (dd->comm->pmenodes == NULL)
2278 if (sim_nodeid < dd->nnodes)
2280 /* The DD index equals the nodeid */
2281 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2287 while (sim_nodeid > dd->comm->pmenodes[i])
2291 if (sim_nodeid < dd->comm->pmenodes[i])
2293 pmenode = dd->comm->pmenodes[i];
2301 void get_pme_nnodes(const gmx_domdec_t *dd,
2302 int *npmenodes_x, int *npmenodes_y)
2306 *npmenodes_x = dd->comm->npmenodes_x;
2307 *npmenodes_y = dd->comm->npmenodes_y;
2316 gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
2318 gmx_bool bPMEOnlyNode;
2320 if (DOMAINDECOMP(cr))
2322 bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
2326 bPMEOnlyNode = FALSE;
2329 return bPMEOnlyNode;
2332 void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
2333 int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
2337 ivec coord, coord_pme;
2341 snew(*my_ddnodes, (dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2344 for (x = 0; x < dd->nc[XX]; x++)
2346 for (y = 0; y < dd->nc[YY]; y++)
2348 for (z = 0; z < dd->nc[ZZ]; z++)
2350 if (dd->comm->bCartesianPP_PME)
2355 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2356 if (dd->ci[XX] == coord_pme[XX] &&
2357 dd->ci[YY] == coord_pme[YY] &&
2358 dd->ci[ZZ] == coord_pme[ZZ])
2360 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2365 /* The slab corresponds to the nodeid in the PME group */
2366 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2368 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2375 /* The last PP-only node is the peer node */
2376 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2380 fprintf(debug, "Receive coordinates from PP nodes:");
2381 for (x = 0; x < *nmy_ddnodes; x++)
2383 fprintf(debug, " %d", (*my_ddnodes)[x]);
2385 fprintf(debug, "\n");
2389 static gmx_bool receive_vir_ener(t_commrec *cr)
2391 gmx_domdec_comm_t *comm;
2392 int pmenode, coords[DIM], rank;
2396 if (cr->npmenodes < cr->dd->nnodes)
2398 comm = cr->dd->comm;
2399 if (comm->bCartesianPP_PME)
2401 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2403 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2404 coords[comm->cartpmedim]++;
2405 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2407 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2408 if (dd_simnode2pmenode(cr, rank) == pmenode)
2410 /* This is not the last PP node for pmenode */
2418 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2419 if (cr->sim_nodeid+1 < cr->nnodes &&
2420 dd_simnode2pmenode(cr, cr->sim_nodeid+1) == pmenode)
2422 /* This is not the last PP node for pmenode */
2431 static void set_zones_ncg_home(gmx_domdec_t *dd)
2433 gmx_domdec_zones_t *zones;
2436 zones = &dd->comm->zones;
2438 zones->cg_range[0] = 0;
2439 for (i = 1; i < zones->n+1; i++)
2441 zones->cg_range[i] = dd->ncg_home;
2443 /* zone_ncg1[0] should always be equal to ncg_home */
2444 dd->comm->zone_ncg1[0] = dd->ncg_home;
2447 static void rebuild_cgindex(gmx_domdec_t *dd,
2448 const int *gcgs_index, t_state *state)
2450 int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
2453 dd_cg_gl = dd->index_gl;
2454 cgindex = dd->cgindex;
2457 for (i = 0; i < state->ncg_gl; i++)
2461 dd_cg_gl[i] = cg_gl;
2462 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2466 dd->ncg_home = state->ncg_gl;
2469 set_zones_ncg_home(dd);
2472 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2474 while (cg >= cginfo_mb->cg_end)
2479 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2482 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2483 t_forcerec *fr, char *bLocalCG)
2485 cginfo_mb_t *cginfo_mb;
2491 cginfo_mb = fr->cginfo_mb;
2492 cginfo = fr->cginfo;
2494 for (cg = cg0; cg < cg1; cg++)
2496 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2500 if (bLocalCG != NULL)
2502 for (cg = cg0; cg < cg1; cg++)
2504 bLocalCG[index_gl[cg]] = TRUE;
2509 static void make_dd_indices(gmx_domdec_t *dd,
2510 const int *gcgs_index, int cg_start)
2512 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2513 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2518 bLocalCG = dd->comm->bLocalCG;
2520 if (dd->nat_tot > dd->gatindex_nalloc)
2522 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2523 srenew(dd->gatindex, dd->gatindex_nalloc);
2526 nzone = dd->comm->zones.n;
2527 zone2cg = dd->comm->zones.cg_range;
2528 zone_ncg1 = dd->comm->zone_ncg1;
2529 index_gl = dd->index_gl;
2530 gatindex = dd->gatindex;
2531 bCGs = dd->comm->bCGs;
2533 if (zone2cg[1] != dd->ncg_home)
2535 gmx_incons("dd->ncg_zone is not up to date");
2538 /* Make the local to global and global to local atom index */
2539 a = dd->cgindex[cg_start];
2540 for (zone = 0; zone < nzone; zone++)
2548 cg0 = zone2cg[zone];
2550 cg1 = zone2cg[zone+1];
2551 cg1_p1 = cg0 + zone_ncg1[zone];
2553 for (cg = cg0; cg < cg1; cg++)
2558 /* Signal that this cg is from more than one pulse away */
2561 cg_gl = index_gl[cg];
2564 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2567 ga2la_set(dd->ga2la, a_gl, a, zone1);
2573 gatindex[a] = cg_gl;
2574 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2581 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2584 int ncg, i, ngl, nerr;
2587 if (bLocalCG == NULL)
2591 for (i = 0; i < dd->ncg_tot; i++)
2593 if (!bLocalCG[dd->index_gl[i]])
2596 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2601 for (i = 0; i < ncg_sys; i++)
2608 if (ngl != dd->ncg_tot)
2610 fprintf(stderr, "DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2617 static void check_index_consistency(gmx_domdec_t *dd,
2618 int natoms_sys, int ncg_sys,
2621 int nerr, ngl, i, a, cell;
2626 if (dd->comm->DD_debug > 1)
2628 snew(have, natoms_sys);
2629 for (a = 0; a < dd->nat_tot; a++)
2631 if (have[dd->gatindex[a]] > 0)
2633 fprintf(stderr, "DD node %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2637 have[dd->gatindex[a]] = a + 1;
2643 snew(have, dd->nat_tot);
2646 for (i = 0; i < natoms_sys; i++)
2648 if (ga2la_get(dd->ga2la, i, &a, &cell))
2650 if (a >= dd->nat_tot)
2652 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2658 if (dd->gatindex[a] != i)
2660 fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2667 if (ngl != dd->nat_tot)
2670 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2671 dd->rank, where, ngl, dd->nat_tot);
2673 for (a = 0; a < dd->nat_tot; a++)
2678 "DD node %d, %s: local atom %d, global %d has no global index\n",
2679 dd->rank, where, a+1, dd->gatindex[a]+1);
2684 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2688 gmx_fatal(FARGS, "DD node %d, %s: %d atom/cg index inconsistencies",
2689 dd->rank, where, nerr);
2693 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2700 /* Clear the whole list without searching */
2701 ga2la_clear(dd->ga2la);
2705 for (i = a_start; i < dd->nat_tot; i++)
2707 ga2la_del(dd->ga2la, dd->gatindex[i]);
2711 bLocalCG = dd->comm->bLocalCG;
2714 for (i = cg_start; i < dd->ncg_tot; i++)
2716 bLocalCG[dd->index_gl[i]] = FALSE;
2720 dd_clear_local_vsite_indices(dd);
2722 if (dd->constraints)
2724 dd_clear_local_constraint_indices(dd);
2728 /* This function should be used for moving the domain boudaries during DLB,
2729 * for obtaining the minimum cell size. It checks the initially set limit
2730 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2731 * and, possibly, a longer cut-off limit set for PME load balancing.
2733 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2737 cellsize_min = comm->cellsize_min[dim];
2739 if (!comm->bVacDLBNoLimit)
2741 /* The cut-off might have changed, e.g. by PME load balacning,
2742 * from the value used to set comm->cellsize_min, so check it.
2744 cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2746 if (comm->bPMELoadBalDLBLimits)
2748 /* Check for the cut-off limit set by the PME load balancing */
2749 cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2753 return cellsize_min;
2756 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2759 real grid_jump_limit;
2761 /* The distance between the boundaries of cells at distance
2762 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2763 * and by the fact that cells should not be shifted by more than
2764 * half their size, such that cg's only shift by one cell
2765 * at redecomposition.
2767 grid_jump_limit = comm->cellsize_limit;
2768 if (!comm->bVacDLBNoLimit)
2770 if (comm->bPMELoadBalDLBLimits)
2772 cutoff = max(cutoff, comm->PMELoadBal_max_cutoff);
2774 grid_jump_limit = max(grid_jump_limit,
2775 cutoff/comm->cd[dim_ind].np);
2778 return grid_jump_limit;
2781 static gmx_bool check_grid_jump(gmx_int64_t step,
2787 gmx_domdec_comm_t *comm;
2796 for (d = 1; d < dd->ndim; d++)
2799 limit = grid_jump_limit(comm, cutoff, d);
2800 bfac = ddbox->box_size[dim];
2801 if (ddbox->tric_dir[dim])
2803 bfac *= ddbox->skew_fac[dim];
2805 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2806 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2814 /* This error should never be triggered under normal
2815 * circumstances, but you never know ...
2817 gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with less nodes might avoid this issue.",
2818 gmx_step_str(step, buf),
2819 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2827 static int dd_load_count(gmx_domdec_comm_t *comm)
2829 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2832 static float dd_force_load(gmx_domdec_comm_t *comm)
2839 if (comm->eFlop > 1)
2841 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2846 load = comm->cycl[ddCyclF];
2847 if (comm->cycl_n[ddCyclF] > 1)
2849 /* Subtract the maximum of the last n cycle counts
2850 * to get rid of possible high counts due to other sources,
2851 * for instance system activity, that would otherwise
2852 * affect the dynamic load balancing.
2854 load -= comm->cycl_max[ddCyclF];
2858 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2860 float gpu_wait, gpu_wait_sum;
2862 gpu_wait = comm->cycl[ddCyclWaitGPU];
2863 if (comm->cycl_n[ddCyclF] > 1)
2865 /* We should remove the WaitGPU time of the same MD step
2866 * as the one with the maximum F time, since the F time
2867 * and the wait time are not independent.
2868 * Furthermore, the step for the max F time should be chosen
2869 * the same on all ranks that share the same GPU.
2870 * But to keep the code simple, we remove the average instead.
2871 * The main reason for artificially long times at some steps
2872 * is spurious CPU activity or MPI time, so we don't expect
2873 * that changes in the GPU wait time matter a lot here.
2875 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2877 /* Sum the wait times over the ranks that share the same GPU */
2878 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2879 comm->mpi_comm_gpu_shared);
2880 /* Replace the wait time by the average over the ranks */
2881 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2889 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2891 gmx_domdec_comm_t *comm;
2896 snew(*dim_f, dd->nc[dim]+1);
2898 for (i = 1; i < dd->nc[dim]; i++)
2900 if (comm->slb_frac[dim])
2902 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2906 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2909 (*dim_f)[dd->nc[dim]] = 1;
2912 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2914 int pmeindex, slab, nso, i;
2917 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2923 ddpme->dim = dimind;
2925 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2927 ddpme->nslab = (ddpme->dim == 0 ?
2928 dd->comm->npmenodes_x :
2929 dd->comm->npmenodes_y);
2931 if (ddpme->nslab <= 1)
2936 nso = dd->comm->npmenodes/ddpme->nslab;
2937 /* Determine for each PME slab the PP location range for dimension dim */
2938 snew(ddpme->pp_min, ddpme->nslab);
2939 snew(ddpme->pp_max, ddpme->nslab);
2940 for (slab = 0; slab < ddpme->nslab; slab++)
2942 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2943 ddpme->pp_max[slab] = 0;
2945 for (i = 0; i < dd->nnodes; i++)
2947 ddindex2xyz(dd->nc, i, xyz);
2948 /* For y only use our y/z slab.
2949 * This assumes that the PME x grid size matches the DD grid size.
2951 if (dimind == 0 || xyz[XX] == dd->ci[XX])
2953 pmeindex = ddindex2pmeindex(dd, i);
2956 slab = pmeindex/nso;
2960 slab = pmeindex % ddpme->nslab;
2962 ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]);
2963 ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]);
2967 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
2970 int dd_pme_maxshift_x(gmx_domdec_t *dd)
2972 if (dd->comm->ddpme[0].dim == XX)
2974 return dd->comm->ddpme[0].maxshift;
2982 int dd_pme_maxshift_y(gmx_domdec_t *dd)
2984 if (dd->comm->ddpme[0].dim == YY)
2986 return dd->comm->ddpme[0].maxshift;
2988 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
2990 return dd->comm->ddpme[1].maxshift;
2998 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
2999 gmx_bool bUniform, gmx_ddbox_t *ddbox, real *cell_f)
3001 gmx_domdec_comm_t *comm;
3004 real range, pme_boundary;
3008 nc = dd->nc[ddpme->dim];
3011 if (!ddpme->dim_match)
3013 /* PP decomposition is not along dim: the worst situation */
3016 else if (ns <= 3 || (bUniform && ns == nc))
3018 /* The optimal situation */
3023 /* We need to check for all pme nodes which nodes they
3024 * could possibly need to communicate with.
3026 xmin = ddpme->pp_min;
3027 xmax = ddpme->pp_max;
3028 /* Allow for atoms to be maximally 2/3 times the cut-off
3029 * out of their DD cell. This is a reasonable balance between
3030 * between performance and support for most charge-group/cut-off
3033 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
3034 /* Avoid extra communication when we are exactly at a boundary */
3038 for (s = 0; s < ns; s++)
3040 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
3041 pme_boundary = (real)s/ns;
3044 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
3046 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
3050 pme_boundary = (real)(s+1)/ns;
3053 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
3055 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
3062 ddpme->maxshift = sh;
3066 fprintf(debug, "PME slab communication range for dim %d is %d\n",
3067 ddpme->dim, ddpme->maxshift);
3071 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3075 for (d = 0; d < dd->ndim; d++)
3078 if (dim < ddbox->nboundeddim &&
3079 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
3080 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
3082 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
3083 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3084 dd->nc[dim], dd->comm->cellsize_limit);
3090 setcellsizeslbLOCAL, setcellsizeslbMASTER, setcellsizeslbPULSE_ONLY
3093 /* Set the domain boundaries. Use for static (or no) load balancing,
3094 * and also for the starting state for dynamic load balancing.
3095 * setmode determine if and where the boundaries are stored, use enum above.
3096 * Returns the number communication pulses in npulse.
3098 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
3099 int setmode, ivec npulse)
3101 gmx_domdec_comm_t *comm;
3104 real *cell_x, cell_dx, cellsize;
3108 for (d = 0; d < DIM; d++)
3110 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
3112 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
3115 cell_dx = ddbox->box_size[d]/dd->nc[d];
3118 case setcellsizeslbMASTER:
3119 for (j = 0; j < dd->nc[d]+1; j++)
3121 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
3124 case setcellsizeslbLOCAL:
3125 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
3126 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
3131 cellsize = cell_dx*ddbox->skew_fac[d];
3132 while (cellsize*npulse[d] < comm->cutoff)
3136 cellsize_min[d] = cellsize;
3140 /* Statically load balanced grid */
3141 /* Also when we are not doing a master distribution we determine
3142 * all cell borders in a loop to obtain identical values
3143 * to the master distribution case and to determine npulse.
3145 if (setmode == setcellsizeslbMASTER)
3147 cell_x = dd->ma->cell_x[d];
3151 snew(cell_x, dd->nc[d]+1);
3153 cell_x[0] = ddbox->box0[d];
3154 for (j = 0; j < dd->nc[d]; j++)
3156 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
3157 cell_x[j+1] = cell_x[j] + cell_dx;
3158 cellsize = cell_dx*ddbox->skew_fac[d];
3159 while (cellsize*npulse[d] < comm->cutoff &&
3160 npulse[d] < dd->nc[d]-1)
3164 cellsize_min[d] = min(cellsize_min[d], cellsize);
3166 if (setmode == setcellsizeslbLOCAL)
3168 comm->cell_x0[d] = cell_x[dd->ci[d]];
3169 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
3171 if (setmode != setcellsizeslbMASTER)
3176 /* The following limitation is to avoid that a cell would receive
3177 * some of its own home charge groups back over the periodic boundary.
3178 * Double charge groups cause trouble with the global indices.
3180 if (d < ddbox->npbcdim &&
3181 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
3183 char error_string[STRLEN];
3185 sprintf(error_string,
3186 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3187 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
3189 dd->nc[d], dd->nc[d],
3190 dd->nnodes > dd->nc[d] ? "cells" : "processors");
3192 if (setmode == setcellsizeslbLOCAL)
3194 gmx_fatal_collective(FARGS, NULL, dd, error_string);
3198 gmx_fatal(FARGS, error_string);
3203 if (!comm->bDynLoadBal)
3205 copy_rvec(cellsize_min, comm->cellsize_min);
3208 for (d = 0; d < comm->npmedecompdim; d++)
3210 set_pme_maxshift(dd, &comm->ddpme[d],
3211 comm->slb_frac[dd->dim[d]] == NULL, ddbox,
3212 comm->ddpme[d].slb_dim_f);
3217 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
3218 int d, int dim, gmx_domdec_root_t *root,
3220 gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
3222 gmx_domdec_comm_t *comm;
3223 int ncd, i, j, nmin, nmin_old;
3224 gmx_bool bLimLo, bLimHi;
3226 real fac, halfway, cellsize_limit_f_i, region_size;
3227 gmx_bool bPBC, bLastHi = FALSE;
3228 int nrange[] = {range[0], range[1]};
3230 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
3236 bPBC = (dim < ddbox->npbcdim);
3238 cell_size = root->buf_ncd;
3242 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
3245 /* First we need to check if the scaling does not make cells
3246 * smaller than the smallest allowed size.
3247 * We need to do this iteratively, since if a cell is too small,
3248 * it needs to be enlarged, which makes all the other cells smaller,
3249 * which could in turn make another cell smaller than allowed.
3251 for (i = range[0]; i < range[1]; i++)
3253 root->bCellMin[i] = FALSE;
3259 /* We need the total for normalization */
3261 for (i = range[0]; i < range[1]; i++)
3263 if (root->bCellMin[i] == FALSE)
3265 fac += cell_size[i];
3268 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3269 /* Determine the cell boundaries */
3270 for (i = range[0]; i < range[1]; i++)
3272 if (root->bCellMin[i] == FALSE)
3274 cell_size[i] *= fac;
3275 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3277 cellsize_limit_f_i = 0;
3281 cellsize_limit_f_i = cellsize_limit_f;
3283 if (cell_size[i] < cellsize_limit_f_i)
3285 root->bCellMin[i] = TRUE;
3286 cell_size[i] = cellsize_limit_f_i;
3290 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3293 while (nmin > nmin_old);
3296 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3297 /* For this check we should not use DD_CELL_MARGIN,
3298 * but a slightly smaller factor,
3299 * since rounding could get use below the limit.
3301 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3304 gmx_fatal(FARGS, "Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3305 gmx_step_str(step, buf),
3306 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3307 ncd, comm->cellsize_min[dim]);
3310 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
3314 /* Check if the boundary did not displace more than halfway
3315 * each of the cells it bounds, as this could cause problems,
3316 * especially when the differences between cell sizes are large.
3317 * If changes are applied, they will not make cells smaller
3318 * than the cut-off, as we check all the boundaries which
3319 * might be affected by a change and if the old state was ok,
3320 * the cells will at most be shrunk back to their old size.
3322 for (i = range[0]+1; i < range[1]; i++)
3324 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3325 if (root->cell_f[i] < halfway)
3327 root->cell_f[i] = halfway;
3328 /* Check if the change also causes shifts of the next boundaries */
3329 for (j = i+1; j < range[1]; j++)
3331 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3333 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3337 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3338 if (root->cell_f[i] > halfway)
3340 root->cell_f[i] = halfway;
3341 /* Check if the change also causes shifts of the next boundaries */
3342 for (j = i-1; j >= range[0]+1; j--)
3344 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3346 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3353 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3354 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3355 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3356 * for a and b nrange is used */
3359 /* Take care of the staggering of the cell boundaries */
3362 for (i = range[0]; i < range[1]; i++)
3364 root->cell_f_max0[i] = root->cell_f[i];
3365 root->cell_f_min1[i] = root->cell_f[i+1];
3370 for (i = range[0]+1; i < range[1]; i++)
3372 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3373 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3374 if (bLimLo && bLimHi)
3376 /* Both limits violated, try the best we can */
3377 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3378 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3379 nrange[0] = range[0];
3381 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3384 nrange[1] = range[1];
3385 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3391 /* root->cell_f[i] = root->bound_min[i]; */
3392 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3395 else if (bLimHi && !bLastHi)
3398 if (nrange[1] < range[1]) /* found a LimLo before */
3400 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3401 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3402 nrange[0] = nrange[1];
3404 root->cell_f[i] = root->bound_max[i];
3406 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3408 nrange[1] = range[1];
3411 if (nrange[1] < range[1]) /* found last a LimLo */
3413 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3414 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3415 nrange[0] = nrange[1];
3416 nrange[1] = range[1];
3417 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3419 else if (nrange[0] > range[0]) /* found at least one LimHi */
3421 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3428 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3429 int d, int dim, gmx_domdec_root_t *root,
3430 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3431 gmx_bool bUniform, gmx_int64_t step)
3433 gmx_domdec_comm_t *comm;
3434 int ncd, d1, i, j, pos;
3436 real load_aver, load_i, imbalance, change, change_max, sc;
3437 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3441 int range[] = { 0, 0 };
3445 /* Convert the maximum change from the input percentage to a fraction */
3446 change_limit = comm->dlb_scale_lim*0.01;
3450 bPBC = (dim < ddbox->npbcdim);
3452 cell_size = root->buf_ncd;
3454 /* Store the original boundaries */
3455 for (i = 0; i < ncd+1; i++)
3457 root->old_cell_f[i] = root->cell_f[i];
3461 for (i = 0; i < ncd; i++)
3463 cell_size[i] = 1.0/ncd;
3466 else if (dd_load_count(comm))
3468 load_aver = comm->load[d].sum_m/ncd;
3470 for (i = 0; i < ncd; i++)
3472 /* Determine the relative imbalance of cell i */
3473 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3474 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3475 /* Determine the change of the cell size using underrelaxation */
3476 change = -relax*imbalance;
3477 change_max = max(change_max, max(change, -change));
3479 /* Limit the amount of scaling.
3480 * We need to use the same rescaling for all cells in one row,
3481 * otherwise the load balancing might not converge.
3484 if (change_max > change_limit)
3486 sc *= change_limit/change_max;
3488 for (i = 0; i < ncd; i++)
3490 /* Determine the relative imbalance of cell i */
3491 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3492 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3493 /* Determine the change of the cell size using underrelaxation */
3494 change = -sc*imbalance;
3495 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3499 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3500 cellsize_limit_f *= DD_CELL_MARGIN;
3501 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3502 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3503 if (ddbox->tric_dir[dim])
3505 cellsize_limit_f /= ddbox->skew_fac[dim];
3506 dist_min_f /= ddbox->skew_fac[dim];
3508 if (bDynamicBox && d > 0)
3510 dist_min_f *= DD_PRES_SCALE_MARGIN;
3512 if (d > 0 && !bUniform)
3514 /* Make sure that the grid is not shifted too much */
3515 for (i = 1; i < ncd; i++)
3517 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3519 gmx_incons("Inconsistent DD boundary staggering limits!");
3521 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3522 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3525 root->bound_min[i] += 0.5*space;
3527 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3528 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3531 root->bound_max[i] += 0.5*space;
3536 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3538 root->cell_f_max0[i-1] + dist_min_f,
3539 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3540 root->cell_f_min1[i] - dist_min_f);
3545 root->cell_f[0] = 0;
3546 root->cell_f[ncd] = 1;
3547 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3550 /* After the checks above, the cells should obey the cut-off
3551 * restrictions, but it does not hurt to check.
3553 for (i = 0; i < ncd; i++)
3557 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3558 dim, i, root->cell_f[i], root->cell_f[i+1]);
3561 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3562 root->cell_f[i+1] - root->cell_f[i] <
3563 cellsize_limit_f/DD_CELL_MARGIN)
3567 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3568 gmx_step_str(step, buf), dim2char(dim), i,
3569 (root->cell_f[i+1] - root->cell_f[i])
3570 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3575 /* Store the cell boundaries of the lower dimensions at the end */
3576 for (d1 = 0; d1 < d; d1++)
3578 root->cell_f[pos++] = comm->cell_f0[d1];
3579 root->cell_f[pos++] = comm->cell_f1[d1];
3582 if (d < comm->npmedecompdim)
3584 /* The master determines the maximum shift for
3585 * the coordinate communication between separate PME nodes.
3587 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3589 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3592 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3596 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3597 gmx_ddbox_t *ddbox, int dimind)
3599 gmx_domdec_comm_t *comm;
3604 /* Set the cell dimensions */
3605 dim = dd->dim[dimind];
3606 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3607 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3608 if (dim >= ddbox->nboundeddim)
3610 comm->cell_x0[dim] += ddbox->box0[dim];
3611 comm->cell_x1[dim] += ddbox->box0[dim];
3615 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3616 int d, int dim, real *cell_f_row,
3619 gmx_domdec_comm_t *comm;
3625 /* Each node would only need to know two fractions,
3626 * but it is probably cheaper to broadcast the whole array.
3628 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3629 0, comm->mpi_comm_load[d]);
3631 /* Copy the fractions for this dimension from the buffer */
3632 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3633 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3634 /* The whole array was communicated, so set the buffer position */
3635 pos = dd->nc[dim] + 1;
3636 for (d1 = 0; d1 <= d; d1++)
3640 /* Copy the cell fractions of the lower dimensions */
3641 comm->cell_f0[d1] = cell_f_row[pos++];
3642 comm->cell_f1[d1] = cell_f_row[pos++];
3644 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3646 /* Convert the communicated shift from float to int */
3647 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3650 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3654 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3655 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3656 gmx_bool bUniform, gmx_int64_t step)
3658 gmx_domdec_comm_t *comm;
3660 gmx_bool bRowMember, bRowRoot;
3665 for (d = 0; d < dd->ndim; d++)
3670 for (d1 = d; d1 < dd->ndim; d1++)
3672 if (dd->ci[dd->dim[d1]] > 0)
3685 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3686 ddbox, bDynamicBox, bUniform, step);
3687 cell_f_row = comm->root[d]->cell_f;
3691 cell_f_row = comm->cell_f_row;
3693 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3698 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3702 /* This function assumes the box is static and should therefore
3703 * not be called when the box has changed since the last
3704 * call to dd_partition_system.
3706 for (d = 0; d < dd->ndim; d++)
3708 relative_to_absolute_cell_bounds(dd, ddbox, d);
3714 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3715 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3716 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3717 gmx_wallcycle_t wcycle)
3719 gmx_domdec_comm_t *comm;
3726 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3727 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3728 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3730 else if (bDynamicBox)
3732 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3735 /* Set the dimensions for which no DD is used */
3736 for (dim = 0; dim < DIM; dim++)
3738 if (dd->nc[dim] == 1)
3740 comm->cell_x0[dim] = 0;
3741 comm->cell_x1[dim] = ddbox->box_size[dim];
3742 if (dim >= ddbox->nboundeddim)
3744 comm->cell_x0[dim] += ddbox->box0[dim];
3745 comm->cell_x1[dim] += ddbox->box0[dim];
3751 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3754 gmx_domdec_comm_dim_t *cd;
3756 for (d = 0; d < dd->ndim; d++)
3758 cd = &dd->comm->cd[d];
3759 np = npulse[dd->dim[d]];
3760 if (np > cd->np_nalloc)
3764 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3765 dim2char(dd->dim[d]), np);
3767 if (DDMASTER(dd) && cd->np_nalloc > 0)
3769 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3771 srenew(cd->ind, np);
3772 for (i = cd->np_nalloc; i < np; i++)
3774 cd->ind[i].index = NULL;
3775 cd->ind[i].nalloc = 0;
3784 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3785 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3786 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3787 gmx_wallcycle_t wcycle)
3789 gmx_domdec_comm_t *comm;
3795 /* Copy the old cell boundaries for the cg displacement check */
3796 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3797 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3799 if (comm->bDynLoadBal)
3803 check_box_size(dd, ddbox);
3805 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3809 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbLOCAL, npulse);
3810 realloc_comm_ind(dd, npulse);
3815 for (d = 0; d < DIM; d++)
3817 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3818 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3823 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3825 rvec cell_ns_x0, rvec cell_ns_x1,
3828 gmx_domdec_comm_t *comm;
3833 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3835 dim = dd->dim[dim_ind];
3837 /* Without PBC we don't have restrictions on the outer cells */
3838 if (!(dim >= ddbox->npbcdim &&
3839 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3840 comm->bDynLoadBal &&
3841 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3842 comm->cellsize_min[dim])
3845 gmx_fatal(FARGS, "Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3846 gmx_step_str(step, buf), dim2char(dim),
3847 comm->cell_x1[dim] - comm->cell_x0[dim],
3848 ddbox->skew_fac[dim],
3849 dd->comm->cellsize_min[dim],
3850 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3854 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3856 /* Communicate the boundaries and update cell_ns_x0/1 */
3857 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3858 if (dd->bGridJump && dd->ndim > 1)
3860 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3865 static void make_tric_corr_matrix(int npbcdim, matrix box, matrix tcm)
3869 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3877 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3878 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3887 static void check_screw_box(matrix box)
3889 /* Mathematical limitation */
3890 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3892 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3895 /* Limitation due to the asymmetry of the eighth shell method */
3896 if (box[ZZ][YY] != 0)
3898 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3902 static void distribute_cg(FILE *fplog, gmx_int64_t step,
3903 matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3906 gmx_domdec_master_t *ma;
3907 int **tmp_ind = NULL, *tmp_nalloc = NULL;
3908 int i, icg, j, k, k0, k1, d, npbcdim;
3910 rvec box_size, cg_cm;
3912 real nrcg, inv_ncg, pos_d;
3914 gmx_bool bUnbounded, bScrew;
3918 if (tmp_ind == NULL)
3920 snew(tmp_nalloc, dd->nnodes);
3921 snew(tmp_ind, dd->nnodes);
3922 for (i = 0; i < dd->nnodes; i++)
3924 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3925 snew(tmp_ind[i], tmp_nalloc[i]);
3929 /* Clear the count */
3930 for (i = 0; i < dd->nnodes; i++)
3936 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3938 cgindex = cgs->index;
3940 /* Compute the center of geometry for all charge groups */
3941 for (icg = 0; icg < cgs->nr; icg++)
3944 k1 = cgindex[icg+1];
3948 copy_rvec(pos[k0], cg_cm);
3955 for (k = k0; (k < k1); k++)
3957 rvec_inc(cg_cm, pos[k]);
3959 for (d = 0; (d < DIM); d++)
3961 cg_cm[d] *= inv_ncg;
3964 /* Put the charge group in the box and determine the cell index */
3965 for (d = DIM-1; d >= 0; d--)
3968 if (d < dd->npbcdim)
3970 bScrew = (dd->bScrewPBC && d == XX);
3971 if (tric_dir[d] && dd->nc[d] > 1)
3973 /* Use triclinic coordintates for this dimension */
3974 for (j = d+1; j < DIM; j++)
3976 pos_d += cg_cm[j]*tcm[j][d];
3979 while (pos_d >= box[d][d])
3982 rvec_dec(cg_cm, box[d]);
3985 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3986 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3988 for (k = k0; (k < k1); k++)
3990 rvec_dec(pos[k], box[d]);
3993 pos[k][YY] = box[YY][YY] - pos[k][YY];
3994 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4001 rvec_inc(cg_cm, box[d]);
4004 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
4005 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
4007 for (k = k0; (k < k1); k++)
4009 rvec_inc(pos[k], box[d]);
4012 pos[k][YY] = box[YY][YY] - pos[k][YY];
4013 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4018 /* This could be done more efficiently */
4020 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
4025 i = dd_index(dd->nc, ind);
4026 if (ma->ncg[i] == tmp_nalloc[i])
4028 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
4029 srenew(tmp_ind[i], tmp_nalloc[i]);
4031 tmp_ind[i][ma->ncg[i]] = icg;
4033 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
4037 for (i = 0; i < dd->nnodes; i++)
4040 for (k = 0; k < ma->ncg[i]; k++)
4042 ma->cg[k1++] = tmp_ind[i][k];
4045 ma->index[dd->nnodes] = k1;
4047 for (i = 0; i < dd->nnodes; i++)
4057 fprintf(fplog, "Charge group distribution at step %s:",
4058 gmx_step_str(step, buf));
4059 for (i = 0; i < dd->nnodes; i++)
4061 fprintf(fplog, " %d", ma->ncg[i]);
4063 fprintf(fplog, "\n");
4067 static void get_cg_distribution(FILE *fplog, gmx_int64_t step, gmx_domdec_t *dd,
4068 t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
4071 gmx_domdec_master_t *ma = NULL;
4074 int *ibuf, buf2[2] = { 0, 0 };
4075 gmx_bool bMaster = DDMASTER(dd);
4083 check_screw_box(box);
4086 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbMASTER, npulse);
4088 distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
4089 for (i = 0; i < dd->nnodes; i++)
4091 ma->ibuf[2*i] = ma->ncg[i];
4092 ma->ibuf[2*i+1] = ma->nat[i];
4100 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
4102 dd->ncg_home = buf2[0];
4103 dd->nat_home = buf2[1];
4104 dd->ncg_tot = dd->ncg_home;
4105 dd->nat_tot = dd->nat_home;
4106 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
4108 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
4109 srenew(dd->index_gl, dd->cg_nalloc);
4110 srenew(dd->cgindex, dd->cg_nalloc+1);
4114 for (i = 0; i < dd->nnodes; i++)
4116 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
4117 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
4122 DDMASTER(dd) ? ma->ibuf : NULL,
4123 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
4124 DDMASTER(dd) ? ma->cg : NULL,
4125 dd->ncg_home*sizeof(int), dd->index_gl);
4127 /* Determine the home charge group sizes */
4129 for (i = 0; i < dd->ncg_home; i++)
4131 cg_gl = dd->index_gl[i];
4133 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
4138 fprintf(debug, "Home charge groups:\n");
4139 for (i = 0; i < dd->ncg_home; i++)
4141 fprintf(debug, " %d", dd->index_gl[i]);
4144 fprintf(debug, "\n");
4147 fprintf(debug, "\n");
4151 static int compact_and_copy_vec_at(int ncg, int *move,
4154 rvec *src, gmx_domdec_comm_t *comm,
4157 int m, icg, i, i0, i1, nrcg;
4163 for (m = 0; m < DIM*2; m++)
4169 for (icg = 0; icg < ncg; icg++)
4171 i1 = cgindex[icg+1];
4177 /* Compact the home array in place */
4178 for (i = i0; i < i1; i++)
4180 copy_rvec(src[i], src[home_pos++]);
4186 /* Copy to the communication buffer */
4188 pos_vec[m] += 1 + vec*nrcg;
4189 for (i = i0; i < i1; i++)
4191 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
4193 pos_vec[m] += (nvec - vec - 1)*nrcg;
4197 home_pos += i1 - i0;
4205 static int compact_and_copy_vec_cg(int ncg, int *move,
4207 int nvec, rvec *src, gmx_domdec_comm_t *comm,
4210 int m, icg, i0, i1, nrcg;
4216 for (m = 0; m < DIM*2; m++)
4222 for (icg = 0; icg < ncg; icg++)
4224 i1 = cgindex[icg+1];
4230 /* Compact the home array in place */
4231 copy_rvec(src[icg], src[home_pos++]);
4237 /* Copy to the communication buffer */
4238 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
4239 pos_vec[m] += 1 + nrcg*nvec;
4251 static int compact_ind(int ncg, int *move,
4252 int *index_gl, int *cgindex,
4254 gmx_ga2la_t ga2la, char *bLocalCG,
4257 int cg, nat, a0, a1, a, a_gl;
4262 for (cg = 0; cg < ncg; cg++)
4268 /* Compact the home arrays in place.
4269 * Anything that can be done here avoids access to global arrays.
4271 cgindex[home_pos] = nat;
4272 for (a = a0; a < a1; a++)
4275 gatindex[nat] = a_gl;
4276 /* The cell number stays 0, so we don't need to set it */
4277 ga2la_change_la(ga2la, a_gl, nat);
4280 index_gl[home_pos] = index_gl[cg];
4281 cginfo[home_pos] = cginfo[cg];
4282 /* The charge group remains local, so bLocalCG does not change */
4287 /* Clear the global indices */
4288 for (a = a0; a < a1; a++)
4290 ga2la_del(ga2la, gatindex[a]);
4294 bLocalCG[index_gl[cg]] = FALSE;
4298 cgindex[home_pos] = nat;
4303 static void clear_and_mark_ind(int ncg, int *move,
4304 int *index_gl, int *cgindex, int *gatindex,
4305 gmx_ga2la_t ga2la, char *bLocalCG,
4310 for (cg = 0; cg < ncg; cg++)
4316 /* Clear the global indices */
4317 for (a = a0; a < a1; a++)
4319 ga2la_del(ga2la, gatindex[a]);
4323 bLocalCG[index_gl[cg]] = FALSE;
4325 /* Signal that this cg has moved using the ns cell index.
4326 * Here we set it to -1. fill_grid will change it
4327 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4329 cell_index[cg] = -1;
4334 static void print_cg_move(FILE *fplog,
4336 gmx_int64_t step, int cg, int dim, int dir,
4337 gmx_bool bHaveLimitdAndCMOld, real limitd,
4338 rvec cm_old, rvec cm_new, real pos_d)
4340 gmx_domdec_comm_t *comm;
4345 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4346 if (bHaveLimitdAndCMOld)
4348 fprintf(fplog, "The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4349 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4353 fprintf(fplog, "The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4354 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4356 fprintf(fplog, "distance out of cell %f\n",
4357 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4358 if (bHaveLimitdAndCMOld)
4360 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4361 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4363 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4364 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4365 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4367 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4368 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4370 comm->cell_x0[dim], comm->cell_x1[dim]);
4373 static void cg_move_error(FILE *fplog,
4375 gmx_int64_t step, int cg, int dim, int dir,
4376 gmx_bool bHaveLimitdAndCMOld, real limitd,
4377 rvec cm_old, rvec cm_new, real pos_d)
4381 print_cg_move(fplog, dd, step, cg, dim, dir,
4382 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4384 print_cg_move(stderr, dd, step, cg, dim, dir,
4385 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4387 "A charge group moved too far between two domain decomposition steps\n"
4388 "This usually means that your system is not well equilibrated");
4391 static void rotate_state_atom(t_state *state, int a)
4395 for (est = 0; est < estNR; est++)
4397 if (EST_DISTR(est) && (state->flags & (1<<est)))
4402 /* Rotate the complete state; for a rectangular box only */
4403 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4404 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4407 state->v[a][YY] = -state->v[a][YY];
4408 state->v[a][ZZ] = -state->v[a][ZZ];
4411 state->sd_X[a][YY] = -state->sd_X[a][YY];
4412 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4415 state->cg_p[a][YY] = -state->cg_p[a][YY];
4416 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4418 case estDISRE_INITF:
4419 case estDISRE_RM3TAV:
4420 case estORIRE_INITF:
4422 /* These are distances, so not affected by rotation */
4425 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4431 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4433 if (natoms > comm->moved_nalloc)
4435 /* Contents should be preserved here */
4436 comm->moved_nalloc = over_alloc_dd(natoms);
4437 srenew(comm->moved, comm->moved_nalloc);
4443 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
4446 ivec tric_dir, matrix tcm,
4447 rvec cell_x0, rvec cell_x1,
4448 rvec limitd, rvec limit0, rvec limit1,
4450 int cg_start, int cg_end,
4455 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4456 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4460 real inv_ncg, pos_d;
4463 npbcdim = dd->npbcdim;
4465 for (cg = cg_start; cg < cg_end; cg++)
4472 copy_rvec(state->x[k0], cm_new);
4479 for (k = k0; (k < k1); k++)
4481 rvec_inc(cm_new, state->x[k]);
4483 for (d = 0; (d < DIM); d++)
4485 cm_new[d] = inv_ncg*cm_new[d];
4490 /* Do pbc and check DD cell boundary crossings */
4491 for (d = DIM-1; d >= 0; d--)
4495 bScrew = (dd->bScrewPBC && d == XX);
4496 /* Determine the location of this cg in lattice coordinates */
4500 for (d2 = d+1; d2 < DIM; d2++)
4502 pos_d += cm_new[d2]*tcm[d2][d];
4505 /* Put the charge group in the triclinic unit-cell */
4506 if (pos_d >= cell_x1[d])
4508 if (pos_d >= limit1[d])
4510 cg_move_error(fplog, dd, step, cg, d, 1, TRUE, limitd[d],
4511 cg_cm[cg], cm_new, pos_d);
4514 if (dd->ci[d] == dd->nc[d] - 1)
4516 rvec_dec(cm_new, state->box[d]);
4519 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4520 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4522 for (k = k0; (k < k1); k++)
4524 rvec_dec(state->x[k], state->box[d]);
4527 rotate_state_atom(state, k);
4532 else if (pos_d < cell_x0[d])
4534 if (pos_d < limit0[d])
4536 cg_move_error(fplog, dd, step, cg, d, -1, TRUE, limitd[d],
4537 cg_cm[cg], cm_new, pos_d);
4542 rvec_inc(cm_new, state->box[d]);
4545 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4546 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4548 for (k = k0; (k < k1); k++)
4550 rvec_inc(state->x[k], state->box[d]);
4553 rotate_state_atom(state, k);
4559 else if (d < npbcdim)
4561 /* Put the charge group in the rectangular unit-cell */
4562 while (cm_new[d] >= state->box[d][d])
4564 rvec_dec(cm_new, state->box[d]);
4565 for (k = k0; (k < k1); k++)
4567 rvec_dec(state->x[k], state->box[d]);
4570 while (cm_new[d] < 0)
4572 rvec_inc(cm_new, state->box[d]);
4573 for (k = k0; (k < k1); k++)
4575 rvec_inc(state->x[k], state->box[d]);
4581 copy_rvec(cm_new, cg_cm[cg]);
4583 /* Determine where this cg should go */
4586 for (d = 0; d < dd->ndim; d++)
4591 flag |= DD_FLAG_FW(d);
4597 else if (dev[dim] == -1)
4599 flag |= DD_FLAG_BW(d);
4602 if (dd->nc[dim] > 2)
4613 /* Temporarily store the flag in move */
4614 move[cg] = mc + flag;
4618 static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
4619 gmx_domdec_t *dd, ivec tric_dir,
4620 t_state *state, rvec **f,
4629 int ncg[DIM*2], nat[DIM*2];
4630 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4631 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4632 int sbuf[2], rbuf[2];
4633 int home_pos_cg, home_pos_at, buf_pos;
4635 gmx_bool bV = FALSE, bSDX = FALSE, bCGP = FALSE;
4638 real inv_ncg, pos_d;
4640 rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
4642 cginfo_mb_t *cginfo_mb;
4643 gmx_domdec_comm_t *comm;
4645 int nthread, thread;
4649 check_screw_box(state->box);
4653 if (fr->cutoff_scheme == ecutsGROUP)
4658 for (i = 0; i < estNR; i++)
4664 case estX: /* Always present */ break;
4665 case estV: bV = (state->flags & (1<<i)); break;
4666 case estSDX: bSDX = (state->flags & (1<<i)); break;
4667 case estCGP: bCGP = (state->flags & (1<<i)); break;
4670 case estDISRE_INITF:
4671 case estDISRE_RM3TAV:
4672 case estORIRE_INITF:
4674 /* No processing required */
4677 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4682 if (dd->ncg_tot > comm->nalloc_int)
4684 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4685 srenew(comm->buf_int, comm->nalloc_int);
4687 move = comm->buf_int;
4689 /* Clear the count */
4690 for (c = 0; c < dd->ndim*2; c++)
4696 npbcdim = dd->npbcdim;
4698 for (d = 0; (d < DIM); d++)
4700 limitd[d] = dd->comm->cellsize_min[d];
4701 if (d >= npbcdim && dd->ci[d] == 0)
4703 cell_x0[d] = -GMX_FLOAT_MAX;
4707 cell_x0[d] = comm->cell_x0[d];
4709 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4711 cell_x1[d] = GMX_FLOAT_MAX;
4715 cell_x1[d] = comm->cell_x1[d];
4719 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4720 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4724 /* We check after communication if a charge group moved
4725 * more than one cell. Set the pre-comm check limit to float_max.
4727 limit0[d] = -GMX_FLOAT_MAX;
4728 limit1[d] = GMX_FLOAT_MAX;
4732 make_tric_corr_matrix(npbcdim, state->box, tcm);
4734 cgindex = dd->cgindex;
4736 nthread = gmx_omp_nthreads_get(emntDomdec);
4738 /* Compute the center of geometry for all home charge groups
4739 * and put them in the box and determine where they should go.
4741 #pragma omp parallel for num_threads(nthread) schedule(static)
4742 for (thread = 0; thread < nthread; thread++)
4744 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4745 cell_x0, cell_x1, limitd, limit0, limit1,
4747 ( thread *dd->ncg_home)/nthread,
4748 ((thread+1)*dd->ncg_home)/nthread,
4749 fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
4753 for (cg = 0; cg < dd->ncg_home; cg++)
4758 flag = mc & ~DD_FLAG_NRCG;
4759 mc = mc & DD_FLAG_NRCG;
4762 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4764 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4765 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4767 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4768 /* We store the cg size in the lower 16 bits
4769 * and the place where the charge group should go
4770 * in the next 6 bits. This saves some communication volume.
4772 nrcg = cgindex[cg+1] - cgindex[cg];
4773 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4779 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4780 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4783 for (i = 0; i < dd->ndim*2; i++)
4785 *ncg_moved += ncg[i];
4802 /* Make sure the communication buffers are large enough */
4803 for (mc = 0; mc < dd->ndim*2; mc++)
4805 nvr = ncg[mc] + nat[mc]*nvec;
4806 if (nvr > comm->cgcm_state_nalloc[mc])
4808 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4809 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4813 switch (fr->cutoff_scheme)
4816 /* Recalculating cg_cm might be cheaper than communicating,
4817 * but that could give rise to rounding issues.
4820 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4821 nvec, cg_cm, comm, bCompact);
4824 /* Without charge groups we send the moved atom coordinates
4825 * over twice. This is so the code below can be used without
4826 * many conditionals for both for with and without charge groups.
4829 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4830 nvec, state->x, comm, FALSE);
4833 home_pos_cg -= *ncg_moved;
4837 gmx_incons("unimplemented");
4843 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4844 nvec, vec++, state->x, comm, bCompact);
4847 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4848 nvec, vec++, state->v, comm, bCompact);
4852 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4853 nvec, vec++, state->sd_X, comm, bCompact);
4857 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4858 nvec, vec++, state->cg_p, comm, bCompact);
4863 compact_ind(dd->ncg_home, move,
4864 dd->index_gl, dd->cgindex, dd->gatindex,
4865 dd->ga2la, comm->bLocalCG,
4870 if (fr->cutoff_scheme == ecutsVERLET)
4872 moved = get_moved(comm, dd->ncg_home);
4874 for (k = 0; k < dd->ncg_home; k++)
4881 moved = fr->ns.grid->cell_index;
4884 clear_and_mark_ind(dd->ncg_home, move,
4885 dd->index_gl, dd->cgindex, dd->gatindex,
4886 dd->ga2la, comm->bLocalCG,
4890 cginfo_mb = fr->cginfo_mb;
4892 *ncg_stay_home = home_pos_cg;
4893 for (d = 0; d < dd->ndim; d++)
4899 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4902 /* Communicate the cg and atom counts */
4907 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4908 d, dir, sbuf[0], sbuf[1]);
4910 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4912 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4914 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4915 srenew(comm->buf_int, comm->nalloc_int);
4918 /* Communicate the charge group indices, sizes and flags */
4919 dd_sendrecv_int(dd, d, dir,
4920 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4921 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4923 nvs = ncg[cdd] + nat[cdd]*nvec;
4924 i = rbuf[0] + rbuf[1] *nvec;
4925 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4927 /* Communicate cgcm and state */
4928 dd_sendrecv_rvec(dd, d, dir,
4929 comm->cgcm_state[cdd], nvs,
4930 comm->vbuf.v+nvr, i);
4931 ncg_recv += rbuf[0];
4932 nat_recv += rbuf[1];
4936 /* Process the received charge groups */
4938 for (cg = 0; cg < ncg_recv; cg++)
4940 flag = comm->buf_int[cg*DD_CGIBS+1];
4942 if (dim >= npbcdim && dd->nc[dim] > 2)
4944 /* No pbc in this dim and more than one domain boundary.
4945 * We do a separate check if a charge group didn't move too far.
4947 if (((flag & DD_FLAG_FW(d)) &&
4948 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4949 ((flag & DD_FLAG_BW(d)) &&
4950 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4952 cg_move_error(fplog, dd, step, cg, dim,
4953 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4955 comm->vbuf.v[buf_pos],
4956 comm->vbuf.v[buf_pos],
4957 comm->vbuf.v[buf_pos][dim]);
4964 /* Check which direction this cg should go */
4965 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4969 /* The cell boundaries for dimension d2 are not equal
4970 * for each cell row of the lower dimension(s),
4971 * therefore we might need to redetermine where
4972 * this cg should go.
4975 /* If this cg crosses the box boundary in dimension d2
4976 * we can use the communicated flag, so we do not
4977 * have to worry about pbc.
4979 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4980 (flag & DD_FLAG_FW(d2))) ||
4981 (dd->ci[dim2] == 0 &&
4982 (flag & DD_FLAG_BW(d2)))))
4984 /* Clear the two flags for this dimension */
4985 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4986 /* Determine the location of this cg
4987 * in lattice coordinates
4989 pos_d = comm->vbuf.v[buf_pos][dim2];
4992 for (d3 = dim2+1; d3 < DIM; d3++)
4995 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
4998 /* Check of we are not at the box edge.
4999 * pbc is only handled in the first step above,
5000 * but this check could move over pbc while
5001 * the first step did not due to different rounding.
5003 if (pos_d >= cell_x1[dim2] &&
5004 dd->ci[dim2] != dd->nc[dim2]-1)
5006 flag |= DD_FLAG_FW(d2);
5008 else if (pos_d < cell_x0[dim2] &&
5011 flag |= DD_FLAG_BW(d2);
5013 comm->buf_int[cg*DD_CGIBS+1] = flag;
5016 /* Set to which neighboring cell this cg should go */
5017 if (flag & DD_FLAG_FW(d2))
5021 else if (flag & DD_FLAG_BW(d2))
5023 if (dd->nc[dd->dim[d2]] > 2)
5035 nrcg = flag & DD_FLAG_NRCG;
5038 if (home_pos_cg+1 > dd->cg_nalloc)
5040 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
5041 srenew(dd->index_gl, dd->cg_nalloc);
5042 srenew(dd->cgindex, dd->cg_nalloc+1);
5044 /* Set the global charge group index and size */
5045 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
5046 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
5047 /* Copy the state from the buffer */
5048 dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
5049 if (fr->cutoff_scheme == ecutsGROUP)
5052 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
5056 /* Set the cginfo */
5057 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
5058 dd->index_gl[home_pos_cg]);
5061 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
5064 if (home_pos_at+nrcg > state->nalloc)
5066 dd_realloc_state(state, f, home_pos_at+nrcg);
5068 for (i = 0; i < nrcg; i++)
5070 copy_rvec(comm->vbuf.v[buf_pos++],
5071 state->x[home_pos_at+i]);
5075 for (i = 0; i < nrcg; i++)
5077 copy_rvec(comm->vbuf.v[buf_pos++],
5078 state->v[home_pos_at+i]);
5083 for (i = 0; i < nrcg; i++)
5085 copy_rvec(comm->vbuf.v[buf_pos++],
5086 state->sd_X[home_pos_at+i]);
5091 for (i = 0; i < nrcg; i++)
5093 copy_rvec(comm->vbuf.v[buf_pos++],
5094 state->cg_p[home_pos_at+i]);
5098 home_pos_at += nrcg;
5102 /* Reallocate the buffers if necessary */
5103 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
5105 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
5106 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
5108 nvr = ncg[mc] + nat[mc]*nvec;
5109 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
5111 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
5112 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
5114 /* Copy from the receive to the send buffers */
5115 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
5116 comm->buf_int + cg*DD_CGIBS,
5117 DD_CGIBS*sizeof(int));
5118 memcpy(comm->cgcm_state[mc][nvr],
5119 comm->vbuf.v[buf_pos],
5120 (1+nrcg*nvec)*sizeof(rvec));
5121 buf_pos += 1 + nrcg*nvec;
5128 /* With sorting (!bCompact) the indices are now only partially up to date
5129 * and ncg_home and nat_home are not the real count, since there are
5130 * "holes" in the arrays for the charge groups that moved to neighbors.
5132 if (fr->cutoff_scheme == ecutsVERLET)
5134 moved = get_moved(comm, home_pos_cg);
5136 for (i = dd->ncg_home; i < home_pos_cg; i++)
5141 dd->ncg_home = home_pos_cg;
5142 dd->nat_home = home_pos_at;
5147 "Finished repartitioning: cgs moved out %d, new home %d\n",
5148 *ncg_moved, dd->ncg_home-*ncg_moved);
5153 void dd_cycles_add(gmx_domdec_t *dd, float cycles, int ddCycl)
5155 dd->comm->cycl[ddCycl] += cycles;
5156 dd->comm->cycl_n[ddCycl]++;
5157 if (cycles > dd->comm->cycl_max[ddCycl])
5159 dd->comm->cycl_max[ddCycl] = cycles;
5163 static double force_flop_count(t_nrnb *nrnb)
5170 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
5172 /* To get closer to the real timings, we half the count
5173 * for the normal loops and again half it for water loops.
5176 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5178 sum += nrnb->n[i]*0.25*cost_nrnb(i);
5182 sum += nrnb->n[i]*0.50*cost_nrnb(i);
5185 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
5188 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5190 sum += nrnb->n[i]*cost_nrnb(i);
5193 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
5195 sum += nrnb->n[i]*cost_nrnb(i);
5201 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
5203 if (dd->comm->eFlop)
5205 dd->comm->flop -= force_flop_count(nrnb);
5208 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
5210 if (dd->comm->eFlop)
5212 dd->comm->flop += force_flop_count(nrnb);
5217 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
5221 for (i = 0; i < ddCyclNr; i++)
5223 dd->comm->cycl[i] = 0;
5224 dd->comm->cycl_n[i] = 0;
5225 dd->comm->cycl_max[i] = 0;
5228 dd->comm->flop_n = 0;
5231 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
5233 gmx_domdec_comm_t *comm;
5234 gmx_domdec_load_t *load;
5235 gmx_domdec_root_t *root = NULL;
5236 int d, dim, cid, i, pos;
5237 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
5242 fprintf(debug, "get_load_distribution start\n");
5245 wallcycle_start(wcycle, ewcDDCOMMLOAD);
5249 bSepPME = (dd->pme_nodeid >= 0);
5251 for (d = dd->ndim-1; d >= 0; d--)
5254 /* Check if we participate in the communication in this dimension */
5255 if (d == dd->ndim-1 ||
5256 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
5258 load = &comm->load[d];
5261 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
5264 if (d == dd->ndim-1)
5266 sbuf[pos++] = dd_force_load(comm);
5267 sbuf[pos++] = sbuf[0];
5270 sbuf[pos++] = sbuf[0];
5271 sbuf[pos++] = cell_frac;
5274 sbuf[pos++] = comm->cell_f_max0[d];
5275 sbuf[pos++] = comm->cell_f_min1[d];
5280 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
5281 sbuf[pos++] = comm->cycl[ddCyclPME];
5286 sbuf[pos++] = comm->load[d+1].sum;
5287 sbuf[pos++] = comm->load[d+1].max;
5290 sbuf[pos++] = comm->load[d+1].sum_m;
5291 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
5292 sbuf[pos++] = comm->load[d+1].flags;
5295 sbuf[pos++] = comm->cell_f_max0[d];
5296 sbuf[pos++] = comm->cell_f_min1[d];
5301 sbuf[pos++] = comm->load[d+1].mdf;
5302 sbuf[pos++] = comm->load[d+1].pme;
5306 /* Communicate a row in DD direction d.
5307 * The communicators are setup such that the root always has rank 0.
5310 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
5311 load->load, load->nload*sizeof(float), MPI_BYTE,
5312 0, comm->mpi_comm_load[d]);
5314 if (dd->ci[dim] == dd->master_ci[dim])
5316 /* We are the root, process this row */
5317 if (comm->bDynLoadBal)
5319 root = comm->root[d];
5329 for (i = 0; i < dd->nc[dim]; i++)
5331 load->sum += load->load[pos++];
5332 load->max = max(load->max, load->load[pos]);
5338 /* This direction could not be load balanced properly,
5339 * therefore we need to use the maximum iso the average load.
5341 load->sum_m = max(load->sum_m, load->load[pos]);
5345 load->sum_m += load->load[pos];
5348 load->cvol_min = min(load->cvol_min, load->load[pos]);
5352 load->flags = (int)(load->load[pos++] + 0.5);
5356 root->cell_f_max0[i] = load->load[pos++];
5357 root->cell_f_min1[i] = load->load[pos++];
5362 load->mdf = max(load->mdf, load->load[pos]);
5364 load->pme = max(load->pme, load->load[pos]);
5368 if (comm->bDynLoadBal && root->bLimited)
5370 load->sum_m *= dd->nc[dim];
5371 load->flags |= (1<<d);
5379 comm->nload += dd_load_count(comm);
5380 comm->load_step += comm->cycl[ddCyclStep];
5381 comm->load_sum += comm->load[0].sum;
5382 comm->load_max += comm->load[0].max;
5383 if (comm->bDynLoadBal)
5385 for (d = 0; d < dd->ndim; d++)
5387 if (comm->load[0].flags & (1<<d))
5389 comm->load_lim[d]++;
5395 comm->load_mdf += comm->load[0].mdf;
5396 comm->load_pme += comm->load[0].pme;
5400 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5404 fprintf(debug, "get_load_distribution finished\n");
5408 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5410 /* Return the relative performance loss on the total run time
5411 * due to the force calculation load imbalance.
5413 if (dd->comm->nload > 0)
5416 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5417 (dd->comm->load_step*dd->nnodes);
5425 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5428 int npp, npme, nnodes, d, limp;
5429 float imbal, pme_f_ratio, lossf, lossp = 0;
5431 gmx_domdec_comm_t *comm;
5434 if (DDMASTER(dd) && comm->nload > 0)
5437 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5438 nnodes = npp + npme;
5439 imbal = comm->load_max*npp/comm->load_sum - 1;
5440 lossf = dd_force_imb_perf_loss(dd);
5441 sprintf(buf, " Average load imbalance: %.1f %%\n", imbal*100);
5442 fprintf(fplog, "%s", buf);
5443 fprintf(stderr, "\n");
5444 fprintf(stderr, "%s", buf);
5445 sprintf(buf, " Part of the total run time spent waiting due to load imbalance: %.1f %%\n", lossf*100);
5446 fprintf(fplog, "%s", buf);
5447 fprintf(stderr, "%s", buf);
5449 if (comm->bDynLoadBal)
5451 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5452 for (d = 0; d < dd->ndim; d++)
5454 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5455 sprintf(buf+strlen(buf), " %c %d %%", dim2char(dd->dim[d]), limp);
5461 sprintf(buf+strlen(buf), "\n");
5462 fprintf(fplog, "%s", buf);
5463 fprintf(stderr, "%s", buf);
5467 pme_f_ratio = comm->load_pme/comm->load_mdf;
5468 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5471 lossp *= (float)npme/(float)nnodes;
5475 lossp *= (float)npp/(float)nnodes;
5477 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pme_f_ratio);
5478 fprintf(fplog, "%s", buf);
5479 fprintf(stderr, "%s", buf);
5480 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossp)*100);
5481 fprintf(fplog, "%s", buf);
5482 fprintf(stderr, "%s", buf);
5484 fprintf(fplog, "\n");
5485 fprintf(stderr, "\n");
5487 if (lossf >= DD_PERF_LOSS_WARN)
5490 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5491 " in the domain decomposition.\n", lossf*100);
5492 if (!comm->bDynLoadBal)
5494 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5498 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5500 fprintf(fplog, "%s\n", buf);
5501 fprintf(stderr, "%s\n", buf);
5503 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS_WARN)
5506 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5507 " had %s work to do than the PP nodes.\n"
5508 " You might want to %s the number of PME nodes\n"
5509 " or %s the cut-off and the grid spacing.\n",
5511 (lossp < 0) ? "less" : "more",
5512 (lossp < 0) ? "decrease" : "increase",
5513 (lossp < 0) ? "decrease" : "increase");
5514 fprintf(fplog, "%s\n", buf);
5515 fprintf(stderr, "%s\n", buf);
5520 static float dd_vol_min(gmx_domdec_t *dd)
5522 return dd->comm->load[0].cvol_min*dd->nnodes;
5525 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5527 return dd->comm->load[0].flags;
5530 static float dd_f_imbal(gmx_domdec_t *dd)
5532 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5535 float dd_pme_f_ratio(gmx_domdec_t *dd)
5537 if (dd->comm->cycl_n[ddCyclPME] > 0)
5539 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5547 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
5552 flags = dd_load_flags(dd);
5556 "DD load balancing is limited by minimum cell size in dimension");
5557 for (d = 0; d < dd->ndim; d++)
5561 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5564 fprintf(fplog, "\n");
5566 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5567 if (dd->comm->bDynLoadBal)
5569 fprintf(fplog, " vol min/aver %5.3f%c",
5570 dd_vol_min(dd), flags ? '!' : ' ');
5572 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5573 if (dd->comm->cycl_n[ddCyclPME])
5575 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5577 fprintf(fplog, "\n\n");
5580 static void dd_print_load_verbose(gmx_domdec_t *dd)
5582 if (dd->comm->bDynLoadBal)
5584 fprintf(stderr, "vol %4.2f%c ",
5585 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5587 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5588 if (dd->comm->cycl_n[ddCyclPME])
5590 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5595 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5600 gmx_domdec_root_t *root;
5601 gmx_bool bPartOfGroup = FALSE;
5603 dim = dd->dim[dim_ind];
5604 copy_ivec(loc, loc_c);
5605 for (i = 0; i < dd->nc[dim]; i++)
5608 rank = dd_index(dd->nc, loc_c);
5609 if (rank == dd->rank)
5611 /* This process is part of the group */
5612 bPartOfGroup = TRUE;
5615 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5619 dd->comm->mpi_comm_load[dim_ind] = c_row;
5620 if (dd->comm->eDLB != edlbNO)
5622 if (dd->ci[dim] == dd->master_ci[dim])
5624 /* This is the root process of this row */
5625 snew(dd->comm->root[dim_ind], 1);
5626 root = dd->comm->root[dim_ind];
5627 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5628 snew(root->old_cell_f, dd->nc[dim]+1);
5629 snew(root->bCellMin, dd->nc[dim]);
5632 snew(root->cell_f_max0, dd->nc[dim]);
5633 snew(root->cell_f_min1, dd->nc[dim]);
5634 snew(root->bound_min, dd->nc[dim]);
5635 snew(root->bound_max, dd->nc[dim]);
5637 snew(root->buf_ncd, dd->nc[dim]);
5641 /* This is not a root process, we only need to receive cell_f */
5642 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5645 if (dd->ci[dim] == dd->master_ci[dim])
5647 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5653 void dd_setup_dlb_resource_sharing(t_commrec gmx_unused *cr,
5654 const gmx_hw_info_t gmx_unused *hwinfo,
5655 const gmx_hw_opt_t gmx_unused *hw_opt)
5658 int physicalnode_id_hash;
5661 MPI_Comm mpi_comm_pp_physicalnode;
5663 if (!(cr->duty & DUTY_PP) ||
5664 hw_opt->gpu_opt.ncuda_dev_use == 0)
5666 /* Only PP nodes (currently) use GPUs.
5667 * If we don't have GPUs, there are no resources to share.
5672 physicalnode_id_hash = gmx_physicalnode_id_hash();
5674 gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
5680 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5681 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5682 dd->rank, physicalnode_id_hash, gpu_id);
5684 /* Split the PP communicator over the physical nodes */
5685 /* TODO: See if we should store this (before), as it's also used for
5686 * for the nodecomm summution.
5688 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5689 &mpi_comm_pp_physicalnode);
5690 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5691 &dd->comm->mpi_comm_gpu_shared);
5692 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5693 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5697 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5700 /* Note that some ranks could share a GPU, while others don't */
5702 if (dd->comm->nrank_gpu_shared == 1)
5704 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5709 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
5712 int dim0, dim1, i, j;
5717 fprintf(debug, "Making load communicators\n");
5720 snew(dd->comm->load, dd->ndim);
5721 snew(dd->comm->mpi_comm_load, dd->ndim);
5724 make_load_communicator(dd, 0, loc);
5728 for (i = 0; i < dd->nc[dim0]; i++)
5731 make_load_communicator(dd, 1, loc);
5737 for (i = 0; i < dd->nc[dim0]; i++)
5741 for (j = 0; j < dd->nc[dim1]; j++)
5744 make_load_communicator(dd, 2, loc);
5751 fprintf(debug, "Finished making load communicators\n");
5756 void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
5759 int d, dim, i, j, m;
5762 ivec dd_zp[DD_MAXIZONE];
5763 gmx_domdec_zones_t *zones;
5764 gmx_domdec_ns_ranges_t *izone;
5766 for (d = 0; d < dd->ndim; d++)
5769 copy_ivec(dd->ci, tmp);
5770 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5771 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5772 copy_ivec(dd->ci, tmp);
5773 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5774 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5777 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5780 dd->neighbor[d][1]);
5786 fprintf(fplog, "\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5788 dd->nc[XX], dd->nc[YY], dd->nc[ZZ],
5789 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5796 for (i = 0; i < nzonep; i++)
5798 copy_ivec(dd_zp3[i], dd_zp[i]);
5804 for (i = 0; i < nzonep; i++)
5806 copy_ivec(dd_zp2[i], dd_zp[i]);
5812 for (i = 0; i < nzonep; i++)
5814 copy_ivec(dd_zp1[i], dd_zp[i]);
5818 gmx_fatal(FARGS, "Can only do 1, 2 or 3D domain decomposition");
5823 zones = &dd->comm->zones;
5825 for (i = 0; i < nzone; i++)
5828 clear_ivec(zones->shift[i]);
5829 for (d = 0; d < dd->ndim; d++)
5831 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5836 for (i = 0; i < nzone; i++)
5838 for (d = 0; d < DIM; d++)
5840 s[d] = dd->ci[d] - zones->shift[i][d];
5845 else if (s[d] >= dd->nc[d])
5851 zones->nizone = nzonep;
5852 for (i = 0; i < zones->nizone; i++)
5854 if (dd_zp[i][0] != i)
5856 gmx_fatal(FARGS, "Internal inconsistency in the dd grid setup");
5858 izone = &zones->izone[i];
5859 izone->j0 = dd_zp[i][1];
5860 izone->j1 = dd_zp[i][2];
5861 for (dim = 0; dim < DIM; dim++)
5863 if (dd->nc[dim] == 1)
5865 /* All shifts should be allowed */
5866 izone->shift0[dim] = -1;
5867 izone->shift1[dim] = 1;
5872 izone->shift0[d] = 0;
5873 izone->shift1[d] = 0;
5874 for(j=izone->j0; j<izone->j1; j++) {
5875 if (dd->shift[j][d] > dd->shift[i][d])
5876 izone->shift0[d] = -1;
5877 if (dd->shift[j][d] < dd->shift[i][d])
5878 izone->shift1[d] = 1;
5884 /* Assume the shift are not more than 1 cell */
5885 izone->shift0[dim] = 1;
5886 izone->shift1[dim] = -1;
5887 for (j = izone->j0; j < izone->j1; j++)
5889 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5890 if (shift_diff < izone->shift0[dim])
5892 izone->shift0[dim] = shift_diff;
5894 if (shift_diff > izone->shift1[dim])
5896 izone->shift1[dim] = shift_diff;
5903 if (dd->comm->eDLB != edlbNO)
5905 snew(dd->comm->root, dd->ndim);
5908 if (dd->comm->bRecordLoad)
5910 make_load_communicators(dd);
5914 static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reorder)
5917 gmx_domdec_comm_t *comm;
5928 if (comm->bCartesianPP)
5930 /* Set up cartesian communication for the particle-particle part */
5933 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5934 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5937 for (i = 0; i < DIM; i++)
5941 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5943 /* We overwrite the old communicator with the new cartesian one */
5944 cr->mpi_comm_mygroup = comm_cart;
5947 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5948 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5950 if (comm->bCartesianPP_PME)
5952 /* Since we want to use the original cartesian setup for sim,
5953 * and not the one after split, we need to make an index.
5955 snew(comm->ddindex2ddnodeid, dd->nnodes);
5956 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
5957 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5958 /* Get the rank of the DD master,
5959 * above we made sure that the master node is a PP node.
5969 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
5971 else if (comm->bCartesianPP)
5973 if (cr->npmenodes == 0)
5975 /* The PP communicator is also
5976 * the communicator for this simulation
5978 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5980 cr->nodeid = dd->rank;
5982 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
5984 /* We need to make an index to go from the coordinates
5985 * to the nodeid of this simulation.
5987 snew(comm->ddindex2simnodeid, dd->nnodes);
5988 snew(buf, dd->nnodes);
5989 if (cr->duty & DUTY_PP)
5991 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5993 /* Communicate the ddindex to simulation nodeid index */
5994 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
5995 cr->mpi_comm_mysim);
5998 /* Determine the master coordinates and rank.
5999 * The DD master should be the same node as the master of this sim.
6001 for (i = 0; i < dd->nnodes; i++)
6003 if (comm->ddindex2simnodeid[i] == 0)
6005 ddindex2xyz(dd->nc, i, dd->master_ci);
6006 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
6011 fprintf(debug, "The master rank is %d\n", dd->masterrank);
6016 /* No Cartesian communicators */
6017 /* We use the rank in dd->comm->all as DD index */
6018 ddindex2xyz(dd->nc, dd->rank, dd->ci);
6019 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
6021 clear_ivec(dd->master_ci);
6028 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6029 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6034 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6035 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6039 static void receive_ddindex2simnodeid(t_commrec *cr)
6043 gmx_domdec_comm_t *comm;
6050 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
6052 snew(comm->ddindex2simnodeid, dd->nnodes);
6053 snew(buf, dd->nnodes);
6054 if (cr->duty & DUTY_PP)
6056 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
6059 /* Communicate the ddindex to simulation nodeid index */
6060 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6061 cr->mpi_comm_mysim);
6068 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
6069 int ncg, int natoms)
6071 gmx_domdec_master_t *ma;
6076 snew(ma->ncg, dd->nnodes);
6077 snew(ma->index, dd->nnodes+1);
6079 snew(ma->nat, dd->nnodes);
6080 snew(ma->ibuf, dd->nnodes*2);
6081 snew(ma->cell_x, DIM);
6082 for (i = 0; i < DIM; i++)
6084 snew(ma->cell_x[i], dd->nc[i]+1);
6087 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
6093 snew(ma->vbuf, natoms);
6099 static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_node_order,
6100 int gmx_unused reorder)
6103 gmx_domdec_comm_t *comm;
6114 if (comm->bCartesianPP)
6116 for (i = 1; i < DIM; i++)
6118 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
6120 if (bDiv[YY] || bDiv[ZZ])
6122 comm->bCartesianPP_PME = TRUE;
6123 /* If we have 2D PME decomposition, which is always in x+y,
6124 * we stack the PME only nodes in z.
6125 * Otherwise we choose the direction that provides the thinnest slab
6126 * of PME only nodes as this will have the least effect
6127 * on the PP communication.
6128 * But for the PME communication the opposite might be better.
6130 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
6132 dd->nc[YY] > dd->nc[ZZ]))
6134 comm->cartpmedim = ZZ;
6138 comm->cartpmedim = YY;
6140 comm->ntot[comm->cartpmedim]
6141 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
6145 fprintf(fplog, "#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
6147 "Will not use a Cartesian communicator for PP <-> PME\n\n");
6152 if (comm->bCartesianPP_PME)
6156 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
6159 for (i = 0; i < DIM; i++)
6163 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
6166 MPI_Comm_rank(comm_cart, &rank);
6167 if (MASTERNODE(cr) && rank != 0)
6169 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
6172 /* With this assigment we loose the link to the original communicator
6173 * which will usually be MPI_COMM_WORLD, unless have multisim.
6175 cr->mpi_comm_mysim = comm_cart;
6176 cr->sim_nodeid = rank;
6178 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
6182 fprintf(fplog, "Cartesian nodeid %d, coordinates %d %d %d\n\n",
6183 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6186 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
6190 if (cr->npmenodes == 0 ||
6191 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
6193 cr->duty = DUTY_PME;
6196 /* Split the sim communicator into PP and PME only nodes */
6197 MPI_Comm_split(cr->mpi_comm_mysim,
6199 dd_index(comm->ntot, dd->ci),
6200 &cr->mpi_comm_mygroup);
6204 switch (dd_node_order)
6209 fprintf(fplog, "Order of the nodes: PP first, PME last\n");
6212 case ddnoINTERLEAVE:
6213 /* Interleave the PP-only and PME-only nodes,
6214 * as on clusters with dual-core machines this will double
6215 * the communication bandwidth of the PME processes
6216 * and thus speed up the PP <-> PME and inter PME communication.
6220 fprintf(fplog, "Interleaving PP and PME nodes\n");
6222 comm->pmenodes = dd_pmenodes(cr);
6227 gmx_fatal(FARGS, "Unknown dd_node_order=%d", dd_node_order);
6230 if (dd_simnode2pmenode(cr, cr->sim_nodeid) == -1)
6232 cr->duty = DUTY_PME;
6239 /* Split the sim communicator into PP and PME only nodes */
6240 MPI_Comm_split(cr->mpi_comm_mysim,
6243 &cr->mpi_comm_mygroup);
6244 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
6250 fprintf(fplog, "This is a %s only node\n\n",
6251 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
6255 void make_dd_communicators(FILE *fplog, t_commrec *cr, int dd_node_order)
6258 gmx_domdec_comm_t *comm;
6264 copy_ivec(dd->nc, comm->ntot);
6266 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
6267 comm->bCartesianPP_PME = FALSE;
6269 /* Reorder the nodes by default. This might change the MPI ranks.
6270 * Real reordering is only supported on very few architectures,
6271 * Blue Gene is one of them.
6273 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
6275 if (cr->npmenodes > 0)
6277 /* Split the communicator into a PP and PME part */
6278 split_communicator(fplog, cr, dd_node_order, CartReorder);
6279 if (comm->bCartesianPP_PME)
6281 /* We (possibly) reordered the nodes in split_communicator,
6282 * so it is no longer required in make_pp_communicator.
6284 CartReorder = FALSE;
6289 /* All nodes do PP and PME */
6291 /* We do not require separate communicators */
6292 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
6296 if (cr->duty & DUTY_PP)
6298 /* Copy or make a new PP communicator */
6299 make_pp_communicator(fplog, cr, CartReorder);
6303 receive_ddindex2simnodeid(cr);
6306 if (!(cr->duty & DUTY_PME))
6308 /* Set up the commnuication to our PME node */
6309 dd->pme_nodeid = dd_simnode2pmenode(cr, cr->sim_nodeid);
6310 dd->pme_receive_vir_ener = receive_vir_ener(cr);
6313 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
6314 dd->pme_nodeid, dd->pme_receive_vir_ener);
6319 dd->pme_nodeid = -1;
6324 dd->ma = init_gmx_domdec_master_t(dd,
6326 comm->cgs_gl.index[comm->cgs_gl.nr]);
6330 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6332 real *slb_frac, tot;
6337 if (nc > 1 && size_string != NULL)
6341 fprintf(fplog, "Using static load balancing for the %s direction\n",
6346 for (i = 0; i < nc; i++)
6349 sscanf(size_string, "%lf%n", &dbl, &n);
6352 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6361 fprintf(fplog, "Relative cell sizes:");
6363 for (i = 0; i < nc; i++)
6368 fprintf(fplog, " %5.3f", slb_frac[i]);
6373 fprintf(fplog, "\n");
6380 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
6383 gmx_mtop_ilistloop_t iloop;
6387 iloop = gmx_mtop_ilistloop_init(mtop);
6388 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6390 for (ftype = 0; ftype < F_NRE; ftype++)
6392 if ((interaction_function[ftype].flags & IF_BOND) &&
6395 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6403 static int dd_getenv(FILE *fplog, const char *env_var, int def)
6409 val = getenv(env_var);
6412 if (sscanf(val, "%d", &nst) <= 0)
6418 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6426 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6430 fprintf(stderr, "\n%s\n", warn_string);
6434 fprintf(fplog, "\n%s\n", warn_string);
6438 static void check_dd_restrictions(t_commrec *cr, gmx_domdec_t *dd,
6439 t_inputrec *ir, FILE *fplog)
6441 if (ir->ePBC == epbcSCREW &&
6442 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6444 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6447 if (ir->ns_type == ensSIMPLE)
6449 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
6452 if (ir->nstlist == 0)
6454 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6457 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6459 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6463 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6468 r = ddbox->box_size[XX];
6469 for (di = 0; di < dd->ndim; di++)
6472 /* Check using the initial average cell size */
6473 r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6479 static int check_dlb_support(FILE *fplog, t_commrec *cr,
6480 const char *dlb_opt, gmx_bool bRecordLoad,
6481 unsigned long Flags, t_inputrec *ir)
6489 case 'a': eDLB = edlbAUTO; break;
6490 case 'n': eDLB = edlbNO; break;
6491 case 'y': eDLB = edlbYES; break;
6492 default: gmx_incons("Unknown dlb_opt");
6495 if (Flags & MD_RERUN)
6500 if (!EI_DYNAMICS(ir->eI))
6502 if (eDLB == edlbYES)
6504 sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
6505 dd_warning(cr, fplog, buf);
6513 dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6518 if (Flags & MD_REPRODUCIBLE)
6525 dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
6529 dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6532 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", eDLB);
6540 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6545 if (getenv("GMX_DD_ORDER_ZYX") != NULL)
6547 /* Decomposition order z,y,x */
6550 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6552 for (dim = DIM-1; dim >= 0; dim--)
6554 if (dd->nc[dim] > 1)
6556 dd->dim[dd->ndim++] = dim;
6562 /* Decomposition order x,y,z */
6563 for (dim = 0; dim < DIM; dim++)
6565 if (dd->nc[dim] > 1)
6567 dd->dim[dd->ndim++] = dim;
6573 static gmx_domdec_comm_t *init_dd_comm()
6575 gmx_domdec_comm_t *comm;
6579 snew(comm->cggl_flag, DIM*2);
6580 snew(comm->cgcm_state, DIM*2);
6581 for (i = 0; i < DIM*2; i++)
6583 comm->cggl_flag_nalloc[i] = 0;
6584 comm->cgcm_state_nalloc[i] = 0;
6587 comm->nalloc_int = 0;
6588 comm->buf_int = NULL;
6590 vec_rvec_init(&comm->vbuf);
6592 comm->n_load_have = 0;
6593 comm->n_load_collect = 0;
6595 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6597 comm->sum_nat[i] = 0;
6601 comm->load_step = 0;
6604 clear_ivec(comm->load_lim);
6611 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
6612 unsigned long Flags,
6614 real comm_distance_min, real rconstr,
6615 const char *dlb_opt, real dlb_scale,
6616 const char *sizex, const char *sizey, const char *sizez,
6617 gmx_mtop_t *mtop, t_inputrec *ir,
6618 matrix box, rvec *x,
6620 int *npme_x, int *npme_y)
6623 gmx_domdec_comm_t *comm;
6626 real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
6633 "\nInitializing Domain Decomposition on %d nodes\n", cr->nnodes);
6638 dd->comm = init_dd_comm();
6640 snew(comm->cggl_flag, DIM*2);
6641 snew(comm->cgcm_state, DIM*2);
6643 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6644 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6646 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
6647 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
6648 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
6649 recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
6650 comm->nstSortCG = dd_getenv(fplog, "GMX_DD_NST_SORT_CHARGE_GROUPS", 1);
6651 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
6652 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
6653 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
6655 dd->pme_recv_f_alloc = 0;
6656 dd->pme_recv_f_buf = NULL;
6658 if (dd->bSendRecv2 && fplog)
6660 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6666 fprintf(fplog, "Will load balance based on FLOP count\n");
6668 if (comm->eFlop > 1)
6670 srand(1+cr->nodeid);
6672 comm->bRecordLoad = TRUE;
6676 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6680 /* Initialize to GPU share count to 0, might change later */
6681 comm->nrank_gpu_shared = 0;
6683 comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
6685 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6688 fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
6690 dd->bGridJump = comm->bDynLoadBal;
6691 comm->bPMELoadBalDLBLimits = FALSE;
6693 if (comm->nstSortCG)
6697 if (comm->nstSortCG == 1)
6699 fprintf(fplog, "Will sort the charge groups at every domain (re)decomposition\n");
6703 fprintf(fplog, "Will sort the charge groups every %d steps\n",
6707 snew(comm->sort, 1);
6713 fprintf(fplog, "Will not sort the charge groups\n");
6717 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6719 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6720 if (comm->bInterCGBondeds)
6722 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6726 comm->bInterCGMultiBody = FALSE;
6729 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6730 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6732 if (ir->rlistlong == 0)
6734 /* Set the cut-off to some very large value,
6735 * so we don't need if statements everywhere in the code.
6736 * We use sqrt, since the cut-off is squared in some places.
6738 comm->cutoff = GMX_CUTOFF_INF;
6742 comm->cutoff = ir->rlistlong;
6744 comm->cutoff_mbody = 0;
6746 comm->cellsize_limit = 0;
6747 comm->bBondComm = FALSE;
6749 if (comm->bInterCGBondeds)
6751 if (comm_distance_min > 0)
6753 comm->cutoff_mbody = comm_distance_min;
6754 if (Flags & MD_DDBONDCOMM)
6756 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6760 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6762 r_bonded_limit = comm->cutoff_mbody;
6764 else if (ir->bPeriodicMols)
6766 /* Can not easily determine the required cut-off */
6767 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6768 comm->cutoff_mbody = comm->cutoff/2;
6769 r_bonded_limit = comm->cutoff_mbody;
6775 dd_bonded_cg_distance(fplog, mtop, ir, x, box,
6776 Flags & MD_DDBONDCHECK, &r_2b, &r_mb);
6778 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6779 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6781 /* We use an initial margin of 10% for the minimum cell size,
6782 * except when we are just below the non-bonded cut-off.
6784 if (Flags & MD_DDBONDCOMM)
6786 if (max(r_2b, r_mb) > comm->cutoff)
6788 r_bonded = max(r_2b, r_mb);
6789 r_bonded_limit = 1.1*r_bonded;
6790 comm->bBondComm = TRUE;
6795 r_bonded_limit = min(1.1*r_bonded, comm->cutoff);
6797 /* We determine cutoff_mbody later */
6801 /* No special bonded communication,
6802 * simply increase the DD cut-off.
6804 r_bonded_limit = 1.1*max(r_2b, r_mb);
6805 comm->cutoff_mbody = r_bonded_limit;
6806 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6809 comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit);
6813 "Minimum cell size due to bonded interactions: %.3f nm\n",
6814 comm->cellsize_limit);
6818 if (dd->bInterCGcons && rconstr <= 0)
6820 /* There is a cell size limit due to the constraints (P-LINCS) */
6821 rconstr = constr_r_max(fplog, mtop, ir);
6825 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6827 if (rconstr > comm->cellsize_limit)
6829 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6833 else if (rconstr > 0 && fplog)
6835 /* Here we do not check for dd->bInterCGcons,
6836 * because one can also set a cell size limit for virtual sites only
6837 * and at this point we don't know yet if there are intercg v-sites.
6840 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6843 comm->cellsize_limit = max(comm->cellsize_limit, rconstr);
6845 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6849 copy_ivec(nc, dd->nc);
6850 set_dd_dim(fplog, dd);
6851 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, x, ddbox);
6853 if (cr->npmenodes == -1)
6857 acs = average_cellsize_min(dd, ddbox);
6858 if (acs < comm->cellsize_limit)
6862 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6864 gmx_fatal_collective(FARGS, cr, NULL,
6865 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6866 acs, comm->cellsize_limit);
6871 set_ddbox_cr(cr, NULL, ir, box, &comm->cgs_gl, x, ddbox);
6873 /* We need to choose the optimal DD grid and possibly PME nodes */
6874 limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6875 comm->eDLB != edlbNO, dlb_scale,
6876 comm->cellsize_limit, comm->cutoff,
6877 comm->bInterCGBondeds);
6879 if (dd->nc[XX] == 0)
6881 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6882 sprintf(buf, "Change the number of nodes or mdrun option %s%s%s",
6883 !bC ? "-rdd" : "-rcon",
6884 comm->eDLB != edlbNO ? " or -dds" : "",
6885 bC ? " or your LINCS settings" : "");
6887 gmx_fatal_collective(FARGS, cr, NULL,
6888 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6890 "Look in the log file for details on the domain decomposition",
6891 cr->nnodes-cr->npmenodes, limit, buf);
6893 set_dd_dim(fplog, dd);
6899 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6900 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6903 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6904 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6906 gmx_fatal_collective(FARGS, cr, NULL,
6907 "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6908 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6910 if (cr->npmenodes > dd->nnodes)
6912 gmx_fatal_collective(FARGS, cr, NULL,
6913 "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6915 if (cr->npmenodes > 0)
6917 comm->npmenodes = cr->npmenodes;
6921 comm->npmenodes = dd->nnodes;
6924 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
6926 /* The following choices should match those
6927 * in comm_cost_est in domdec_setup.c.
6928 * Note that here the checks have to take into account
6929 * that the decomposition might occur in a different order than xyz
6930 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6931 * in which case they will not match those in comm_cost_est,
6932 * but since that is mainly for testing purposes that's fine.
6934 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6935 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6936 getenv("GMX_PMEONEDD") == NULL)
6938 comm->npmedecompdim = 2;
6939 comm->npmenodes_x = dd->nc[XX];
6940 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6944 /* In case nc is 1 in both x and y we could still choose to
6945 * decompose pme in y instead of x, but we use x for simplicity.
6947 comm->npmedecompdim = 1;
6948 if (dd->dim[0] == YY)
6950 comm->npmenodes_x = 1;
6951 comm->npmenodes_y = comm->npmenodes;
6955 comm->npmenodes_x = comm->npmenodes;
6956 comm->npmenodes_y = 1;
6961 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6962 comm->npmenodes_x, comm->npmenodes_y, 1);
6967 comm->npmedecompdim = 0;
6968 comm->npmenodes_x = 0;
6969 comm->npmenodes_y = 0;
6972 /* Technically we don't need both of these,
6973 * but it simplifies code not having to recalculate it.
6975 *npme_x = comm->npmenodes_x;
6976 *npme_y = comm->npmenodes_y;
6978 snew(comm->slb_frac, DIM);
6979 if (comm->eDLB == edlbNO)
6981 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
6982 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
6983 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], sizez);
6986 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6988 if (comm->bBondComm || comm->eDLB != edlbNO)
6990 /* Set the bonded communication distance to halfway
6991 * the minimum and the maximum,
6992 * since the extra communication cost is nearly zero.
6994 acs = average_cellsize_min(dd, ddbox);
6995 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6996 if (comm->eDLB != edlbNO)
6998 /* Check if this does not limit the scaling */
6999 comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs);
7001 if (!comm->bBondComm)
7003 /* Without bBondComm do not go beyond the n.b. cut-off */
7004 comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff);
7005 if (comm->cellsize_limit >= comm->cutoff)
7007 /* We don't loose a lot of efficieny
7008 * when increasing it to the n.b. cut-off.
7009 * It can even be slightly faster, because we need
7010 * less checks for the communication setup.
7012 comm->cutoff_mbody = comm->cutoff;
7015 /* Check if we did not end up below our original limit */
7016 comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit);
7018 if (comm->cutoff_mbody > comm->cellsize_limit)
7020 comm->cellsize_limit = comm->cutoff_mbody;
7023 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
7028 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
7029 "cellsize limit %f\n",
7030 comm->bBondComm, comm->cellsize_limit);
7035 check_dd_restrictions(cr, dd, ir, fplog);
7038 comm->partition_step = INT_MIN;
7041 clear_dd_cycle_counts(dd);
7046 static void set_dlb_limits(gmx_domdec_t *dd)
7051 for (d = 0; d < dd->ndim; d++)
7053 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
7054 dd->comm->cellsize_min[dd->dim[d]] =
7055 dd->comm->cellsize_min_dlb[dd->dim[d]];
7060 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
7063 gmx_domdec_comm_t *comm;
7073 fprintf(fplog, "At step %s the performance loss due to force load imbalance is %.1f %%\n", gmx_step_str(step, buf), dd_force_imb_perf_loss(dd)*100);
7076 cellsize_min = comm->cellsize_min[dd->dim[0]];
7077 for (d = 1; d < dd->ndim; d++)
7079 cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
7082 if (cellsize_min < comm->cellsize_limit*1.05)
7084 dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
7086 /* Change DLB from "auto" to "no". */
7087 comm->eDLB = edlbNO;
7092 dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
7093 comm->bDynLoadBal = TRUE;
7094 dd->bGridJump = TRUE;
7098 /* We can set the required cell size info here,
7099 * so we do not need to communicate this.
7100 * The grid is completely uniform.
7102 for (d = 0; d < dd->ndim; d++)
7106 comm->load[d].sum_m = comm->load[d].sum;
7108 nc = dd->nc[dd->dim[d]];
7109 for (i = 0; i < nc; i++)
7111 comm->root[d]->cell_f[i] = i/(real)nc;
7114 comm->root[d]->cell_f_max0[i] = i /(real)nc;
7115 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
7118 comm->root[d]->cell_f[nc] = 1.0;
7123 static char *init_bLocalCG(gmx_mtop_t *mtop)
7128 ncg = ncg_mtop(mtop);
7129 snew(bLocalCG, ncg);
7130 for (cg = 0; cg < ncg; cg++)
7132 bLocalCG[cg] = FALSE;
7138 void dd_init_bondeds(FILE *fplog,
7139 gmx_domdec_t *dd, gmx_mtop_t *mtop,
7141 t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
7143 gmx_domdec_comm_t *comm;
7147 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
7151 if (comm->bBondComm)
7153 /* Communicate atoms beyond the cut-off for bonded interactions */
7156 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
7158 comm->bLocalCG = init_bLocalCG(mtop);
7162 /* Only communicate atoms based on cut-off */
7163 comm->cglink = NULL;
7164 comm->bLocalCG = NULL;
7168 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
7170 gmx_bool bDynLoadBal, real dlb_scale,
7173 gmx_domdec_comm_t *comm;
7188 fprintf(fplog, "The maximum number of communication pulses is:");
7189 for (d = 0; d < dd->ndim; d++)
7191 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
7193 fprintf(fplog, "\n");
7194 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
7195 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
7196 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
7197 for (d = 0; d < DIM; d++)
7201 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
7208 comm->cellsize_min_dlb[d]/
7209 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
7211 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
7214 fprintf(fplog, "\n");
7218 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
7219 fprintf(fplog, "The initial number of communication pulses is:");
7220 for (d = 0; d < dd->ndim; d++)
7222 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
7224 fprintf(fplog, "\n");
7225 fprintf(fplog, "The initial domain decomposition cell size is:");
7226 for (d = 0; d < DIM; d++)
7230 fprintf(fplog, " %c %.2f nm",
7231 dim2char(d), dd->comm->cellsize_min[d]);
7234 fprintf(fplog, "\n\n");
7237 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
7239 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
7240 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7241 "non-bonded interactions", "", comm->cutoff);
7245 limit = dd->comm->cellsize_limit;
7249 if (dynamic_dd_box(ddbox, ir))
7251 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
7253 limit = dd->comm->cellsize_min[XX];
7254 for (d = 1; d < DIM; d++)
7256 limit = min(limit, dd->comm->cellsize_min[d]);
7260 if (comm->bInterCGBondeds)
7262 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7263 "two-body bonded interactions", "(-rdd)",
7264 max(comm->cutoff, comm->cutoff_mbody));
7265 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7266 "multi-body bonded interactions", "(-rdd)",
7267 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit));
7271 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7272 "virtual site constructions", "(-rcon)", limit);
7274 if (dd->constraint_comm)
7276 sprintf(buf, "atoms separated by up to %d constraints",
7278 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7279 buf, "(-rcon)", limit);
7281 fprintf(fplog, "\n");
7287 static void set_cell_limits_dlb(gmx_domdec_t *dd,
7289 const t_inputrec *ir,
7290 const gmx_ddbox_t *ddbox)
7292 gmx_domdec_comm_t *comm;
7293 int d, dim, npulse, npulse_d_max, npulse_d;
7298 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7300 /* Determine the maximum number of comm. pulses in one dimension */
7302 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7304 /* Determine the maximum required number of grid pulses */
7305 if (comm->cellsize_limit >= comm->cutoff)
7307 /* Only a single pulse is required */
7310 else if (!bNoCutOff && comm->cellsize_limit > 0)
7312 /* We round down slightly here to avoid overhead due to the latency
7313 * of extra communication calls when the cut-off
7314 * would be only slightly longer than the cell size.
7315 * Later cellsize_limit is redetermined,
7316 * so we can not miss interactions due to this rounding.
7318 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7322 /* There is no cell size limit */
7323 npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7326 if (!bNoCutOff && npulse > 1)
7328 /* See if we can do with less pulses, based on dlb_scale */
7330 for (d = 0; d < dd->ndim; d++)
7333 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7334 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7335 npulse_d_max = max(npulse_d_max, npulse_d);
7337 npulse = min(npulse, npulse_d_max);
7340 /* This env var can override npulse */
7341 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
7348 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7349 for (d = 0; d < dd->ndim; d++)
7351 comm->cd[d].np_dlb = min(npulse, dd->nc[dd->dim[d]]-1);
7352 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7353 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7354 comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb);
7355 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7357 comm->bVacDLBNoLimit = FALSE;
7361 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7362 if (!comm->bVacDLBNoLimit)
7364 comm->cellsize_limit = max(comm->cellsize_limit,
7365 comm->cutoff/comm->maxpulse);
7367 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7368 /* Set the minimum cell size for each DD dimension */
7369 for (d = 0; d < dd->ndim; d++)
7371 if (comm->bVacDLBNoLimit ||
7372 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7374 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7378 comm->cellsize_min_dlb[dd->dim[d]] =
7379 comm->cutoff/comm->cd[d].np_dlb;
7382 if (comm->cutoff_mbody <= 0)
7384 comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit);
7386 if (comm->bDynLoadBal)
7392 gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd, int ePBC)
7394 /* If each molecule is a single charge group
7395 * or we use domain decomposition for each periodic dimension,
7396 * we do not need to take pbc into account for the bonded interactions.
7398 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7401 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7404 void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7405 t_inputrec *ir, gmx_ddbox_t *ddbox)
7407 gmx_domdec_comm_t *comm;
7413 /* Initialize the thread data.
7414 * This can not be done in init_domain_decomposition,
7415 * as the numbers of threads is determined later.
7417 comm->nth = gmx_omp_nthreads_get(emntDomdec);
7420 snew(comm->dth, comm->nth);
7423 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
7425 init_ddpme(dd, &comm->ddpme[0], 0);
7426 if (comm->npmedecompdim >= 2)
7428 init_ddpme(dd, &comm->ddpme[1], 1);
7433 comm->npmenodes = 0;
7434 if (dd->pme_nodeid >= 0)
7436 gmx_fatal_collective(FARGS, NULL, dd,
7437 "Can not have separate PME nodes without PME electrostatics");
7443 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7445 if (comm->eDLB != edlbNO)
7447 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7450 print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
7451 if (comm->eDLB == edlbAUTO)
7455 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7457 print_dd_settings(fplog, dd, ir, TRUE, dlb_scale, ddbox);
7460 if (ir->ePBC == epbcNONE)
7462 vol_frac = 1 - 1/(double)dd->nnodes;
7467 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7471 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7473 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7475 dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
7478 static gmx_bool test_dd_cutoff(t_commrec *cr,
7479 t_state *state, t_inputrec *ir,
7490 set_ddbox(dd, FALSE, cr, ir, state->box,
7491 TRUE, &dd->comm->cgs_gl, state->x, &ddbox);
7495 for (d = 0; d < dd->ndim; d++)
7499 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7500 if (dynamic_dd_box(&ddbox, ir))
7502 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7505 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7507 if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
7508 dd->comm->cd[d].np_dlb > 0)
7510 if (np > dd->comm->cd[d].np_dlb)
7515 /* If a current local cell size is smaller than the requested
7516 * cut-off, we could still fix it, but this gets very complicated.
7517 * Without fixing here, we might actually need more checks.
7519 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7526 if (dd->comm->eDLB != edlbNO)
7528 /* If DLB is not active yet, we don't need to check the grid jumps.
7529 * Actually we shouldn't, because then the grid jump data is not set.
7531 if (dd->comm->bDynLoadBal &&
7532 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7537 gmx_sumi(1, &LocallyLimited, cr);
7539 if (LocallyLimited > 0)
7548 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, t_inputrec *ir,
7551 gmx_bool bCutoffAllowed;
7553 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7557 cr->dd->comm->cutoff = cutoff_req;
7560 return bCutoffAllowed;
7563 void change_dd_dlb_cutoff_limit(t_commrec *cr)
7565 gmx_domdec_comm_t *comm;
7567 comm = cr->dd->comm;
7569 /* Turn on the DLB limiting (might have been on already) */
7570 comm->bPMELoadBalDLBLimits = TRUE;
7572 /* Change the cut-off limit */
7573 comm->PMELoadBal_max_cutoff = comm->cutoff;
7576 static void merge_cg_buffers(int ncell,
7577 gmx_domdec_comm_dim_t *cd, int pulse,
7579 int *index_gl, int *recv_i,
7580 rvec *cg_cm, rvec *recv_vr,
7582 cginfo_mb_t *cginfo_mb, int *cginfo)
7584 gmx_domdec_ind_t *ind, *ind_p;
7585 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7586 int shift, shift_at;
7588 ind = &cd->ind[pulse];
7590 /* First correct the already stored data */
7591 shift = ind->nrecv[ncell];
7592 for (cell = ncell-1; cell >= 0; cell--)
7594 shift -= ind->nrecv[cell];
7597 /* Move the cg's present from previous grid pulses */
7598 cg0 = ncg_cell[ncell+cell];
7599 cg1 = ncg_cell[ncell+cell+1];
7600 cgindex[cg1+shift] = cgindex[cg1];
7601 for (cg = cg1-1; cg >= cg0; cg--)
7603 index_gl[cg+shift] = index_gl[cg];
7604 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7605 cgindex[cg+shift] = cgindex[cg];
7606 cginfo[cg+shift] = cginfo[cg];
7608 /* Correct the already stored send indices for the shift */
7609 for (p = 1; p <= pulse; p++)
7611 ind_p = &cd->ind[p];
7613 for (c = 0; c < cell; c++)
7615 cg0 += ind_p->nsend[c];
7617 cg1 = cg0 + ind_p->nsend[cell];
7618 for (cg = cg0; cg < cg1; cg++)
7620 ind_p->index[cg] += shift;
7626 /* Merge in the communicated buffers */
7630 for (cell = 0; cell < ncell; cell++)
7632 cg1 = ncg_cell[ncell+cell+1] + shift;
7635 /* Correct the old cg indices */
7636 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7638 cgindex[cg+1] += shift_at;
7641 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7643 /* Copy this charge group from the buffer */
7644 index_gl[cg1] = recv_i[cg0];
7645 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7646 /* Add it to the cgindex */
7647 cg_gl = index_gl[cg1];
7648 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7649 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7650 cgindex[cg1+1] = cgindex[cg1] + nat;
7655 shift += ind->nrecv[cell];
7656 ncg_cell[ncell+cell+1] = cg1;
7660 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7661 int nzone, int cg0, const int *cgindex)
7665 /* Store the atom block boundaries for easy copying of communication buffers
7668 for (zone = 0; zone < nzone; zone++)
7670 for (p = 0; p < cd->np; p++)
7672 cd->ind[p].cell2at0[zone] = cgindex[cg];
7673 cg += cd->ind[p].nrecv[zone];
7674 cd->ind[p].cell2at1[zone] = cgindex[cg];
7679 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7685 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7687 if (!bLocalCG[link->a[i]])
7696 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7698 real c[DIM][4]; /* the corners for the non-bonded communication */
7699 real cr0; /* corner for rounding */
7700 real cr1[4]; /* corners for rounding */
7701 real bc[DIM]; /* corners for bounded communication */
7702 real bcr1; /* corner for rounding for bonded communication */
7705 /* Determine the corners of the domain(s) we are communicating with */
7707 set_dd_corners(const gmx_domdec_t *dd,
7708 int dim0, int dim1, int dim2,
7712 const gmx_domdec_comm_t *comm;
7713 const gmx_domdec_zones_t *zones;
7718 zones = &comm->zones;
7720 /* Keep the compiler happy */
7724 /* The first dimension is equal for all cells */
7725 c->c[0][0] = comm->cell_x0[dim0];
7728 c->bc[0] = c->c[0][0];
7733 /* This cell row is only seen from the first row */
7734 c->c[1][0] = comm->cell_x0[dim1];
7735 /* All rows can see this row */
7736 c->c[1][1] = comm->cell_x0[dim1];
7739 c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7742 /* For the multi-body distance we need the maximum */
7743 c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7746 /* Set the upper-right corner for rounding */
7747 c->cr0 = comm->cell_x1[dim0];
7752 for (j = 0; j < 4; j++)
7754 c->c[2][j] = comm->cell_x0[dim2];
7758 /* Use the maximum of the i-cells that see a j-cell */
7759 for (i = 0; i < zones->nizone; i++)
7761 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7767 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7773 /* For the multi-body distance we need the maximum */
7774 c->bc[2] = comm->cell_x0[dim2];
7775 for (i = 0; i < 2; i++)
7777 for (j = 0; j < 2; j++)
7779 c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0);
7785 /* Set the upper-right corner for rounding */
7786 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7787 * Only cell (0,0,0) can see cell 7 (1,1,1)
7789 c->cr1[0] = comm->cell_x1[dim1];
7790 c->cr1[3] = comm->cell_x1[dim1];
7793 c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7796 /* For the multi-body distance we need the maximum */
7797 c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7804 /* Determine which cg's we need to send in this pulse from this zone */
7806 get_zone_pulse_cgs(gmx_domdec_t *dd,
7807 int zonei, int zone,
7809 const int *index_gl,
7811 int dim, int dim_ind,
7812 int dim0, int dim1, int dim2,
7813 real r_comm2, real r_bcomm2,
7817 real skew_fac2_d, real skew_fac_01,
7818 rvec *v_d, rvec *v_0, rvec *v_1,
7819 const dd_corners_t *c,
7821 gmx_bool bDistBonded,
7827 gmx_domdec_ind_t *ind,
7828 int **ibuf, int *ibuf_nalloc,
7834 gmx_domdec_comm_t *comm;
7836 gmx_bool bDistMB_pulse;
7838 real r2, rb2, r, tric_sh;
7841 int nsend_z, nsend, nat;
7845 bScrew = (dd->bScrewPBC && dim == XX);
7847 bDistMB_pulse = (bDistMB && bDistBonded);
7853 for (cg = cg0; cg < cg1; cg++)
7857 if (tric_dist[dim_ind] == 0)
7859 /* Rectangular direction, easy */
7860 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7867 r = cg_cm[cg][dim] - c->bc[dim_ind];
7873 /* Rounding gives at most a 16% reduction
7874 * in communicated atoms
7876 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7878 r = cg_cm[cg][dim0] - c->cr0;
7879 /* This is the first dimension, so always r >= 0 */
7886 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7888 r = cg_cm[cg][dim1] - c->cr1[zone];
7895 r = cg_cm[cg][dim1] - c->bcr1;
7905 /* Triclinic direction, more complicated */
7908 /* Rounding, conservative as the skew_fac multiplication
7909 * will slightly underestimate the distance.
7911 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7913 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7914 for (i = dim0+1; i < DIM; i++)
7916 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7918 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7921 rb[dim0] = rn[dim0];
7924 /* Take care that the cell planes along dim0 might not
7925 * be orthogonal to those along dim1 and dim2.
7927 for (i = 1; i <= dim_ind; i++)
7930 if (normal[dim0][dimd] > 0)
7932 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7935 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7940 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7942 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7944 for (i = dim1+1; i < DIM; i++)
7946 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7948 rn[dim1] += tric_sh;
7951 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7952 /* Take care of coupling of the distances
7953 * to the planes along dim0 and dim1 through dim2.
7955 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7956 /* Take care that the cell planes along dim1
7957 * might not be orthogonal to that along dim2.
7959 if (normal[dim1][dim2] > 0)
7961 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7967 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7970 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7971 /* Take care of coupling of the distances
7972 * to the planes along dim0 and dim1 through dim2.
7974 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7975 /* Take care that the cell planes along dim1
7976 * might not be orthogonal to that along dim2.
7978 if (normal[dim1][dim2] > 0)
7980 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7985 /* The distance along the communication direction */
7986 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
7988 for (i = dim+1; i < DIM; i++)
7990 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7995 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7996 /* Take care of coupling of the distances
7997 * to the planes along dim0 and dim1 through dim2.
7999 if (dim_ind == 1 && zonei == 1)
8001 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
8007 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
8010 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
8011 /* Take care of coupling of the distances
8012 * to the planes along dim0 and dim1 through dim2.
8014 if (dim_ind == 1 && zonei == 1)
8016 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
8024 ((bDistMB && rb2 < r_bcomm2) ||
8025 (bDist2B && r2 < r_bcomm2)) &&
8027 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
8028 missing_link(comm->cglink, index_gl[cg],
8031 /* Make an index to the local charge groups */
8032 if (nsend+1 > ind->nalloc)
8034 ind->nalloc = over_alloc_large(nsend+1);
8035 srenew(ind->index, ind->nalloc);
8037 if (nsend+1 > *ibuf_nalloc)
8039 *ibuf_nalloc = over_alloc_large(nsend+1);
8040 srenew(*ibuf, *ibuf_nalloc);
8042 ind->index[nsend] = cg;
8043 (*ibuf)[nsend] = index_gl[cg];
8045 vec_rvec_check_alloc(vbuf, nsend+1);
8047 if (dd->ci[dim] == 0)
8049 /* Correct cg_cm for pbc */
8050 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
8053 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
8054 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
8059 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
8062 nat += cgindex[cg+1] - cgindex[cg];
8068 *nsend_z_ptr = nsend_z;
8071 static void setup_dd_communication(gmx_domdec_t *dd,
8072 matrix box, gmx_ddbox_t *ddbox,
8073 t_forcerec *fr, t_state *state, rvec **f)
8075 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
8076 int nzone, nzone_send, zone, zonei, cg0, cg1;
8077 int c, i, j, cg, cg_gl, nrcg;
8078 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
8079 gmx_domdec_comm_t *comm;
8080 gmx_domdec_zones_t *zones;
8081 gmx_domdec_comm_dim_t *cd;
8082 gmx_domdec_ind_t *ind;
8083 cginfo_mb_t *cginfo_mb;
8084 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
8085 real r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
8086 dd_corners_t corners;
8088 rvec *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
8089 real skew_fac2_d, skew_fac_01;
8096 fprintf(debug, "Setting up DD communication\n");
8101 switch (fr->cutoff_scheme)
8110 gmx_incons("unimplemented");
8114 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8116 dim = dd->dim[dim_ind];
8118 /* Check if we need to use triclinic distances */
8119 tric_dist[dim_ind] = 0;
8120 for (i = 0; i <= dim_ind; i++)
8122 if (ddbox->tric_dir[dd->dim[i]])
8124 tric_dist[dim_ind] = 1;
8129 bBondComm = comm->bBondComm;
8131 /* Do we need to determine extra distances for multi-body bondeds? */
8132 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8134 /* Do we need to determine extra distances for only two-body bondeds? */
8135 bDist2B = (bBondComm && !bDistMB);
8137 r_comm2 = sqr(comm->cutoff);
8138 r_bcomm2 = sqr(comm->cutoff_mbody);
8142 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, sqrt(r_bcomm2));
8145 zones = &comm->zones;
8148 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8149 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8151 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8153 /* Triclinic stuff */
8154 normal = ddbox->normal;
8158 v_0 = ddbox->v[dim0];
8159 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8161 /* Determine the coupling coefficient for the distances
8162 * to the cell planes along dim0 and dim1 through dim2.
8163 * This is required for correct rounding.
8166 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8169 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8175 v_1 = ddbox->v[dim1];
8178 zone_cg_range = zones->cg_range;
8179 index_gl = dd->index_gl;
8180 cgindex = dd->cgindex;
8181 cginfo_mb = fr->cginfo_mb;
8183 zone_cg_range[0] = 0;
8184 zone_cg_range[1] = dd->ncg_home;
8185 comm->zone_ncg1[0] = dd->ncg_home;
8186 pos_cg = dd->ncg_home;
8188 nat_tot = dd->nat_home;
8190 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8192 dim = dd->dim[dim_ind];
8193 cd = &comm->cd[dim_ind];
8195 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8197 /* No pbc in this dimension, the first node should not comm. */
8205 v_d = ddbox->v[dim];
8206 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
8208 cd->bInPlace = TRUE;
8209 for (p = 0; p < cd->np; p++)
8211 /* Only atoms communicated in the first pulse are used
8212 * for multi-body bonded interactions or for bBondComm.
8214 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8219 for (zone = 0; zone < nzone_send; zone++)
8221 if (tric_dist[dim_ind] && dim_ind > 0)
8223 /* Determine slightly more optimized skew_fac's
8225 * This reduces the number of communicated atoms
8226 * by about 10% for 3D DD of rhombic dodecahedra.
8228 for (dimd = 0; dimd < dim; dimd++)
8230 sf2_round[dimd] = 1;
8231 if (ddbox->tric_dir[dimd])
8233 for (i = dd->dim[dimd]+1; i < DIM; i++)
8235 /* If we are shifted in dimension i
8236 * and the cell plane is tilted forward
8237 * in dimension i, skip this coupling.
8239 if (!(zones->shift[nzone+zone][i] &&
8240 ddbox->v[dimd][i][dimd] >= 0))
8243 sqr(ddbox->v[dimd][i][dimd]);
8246 sf2_round[dimd] = 1/sf2_round[dimd];
8251 zonei = zone_perm[dim_ind][zone];
8254 /* Here we permutate the zones to obtain a convenient order
8255 * for neighbor searching
8257 cg0 = zone_cg_range[zonei];
8258 cg1 = zone_cg_range[zonei+1];
8262 /* Look only at the cg's received in the previous grid pulse
8264 cg1 = zone_cg_range[nzone+zone+1];
8265 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8268 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8269 for (th = 0; th < comm->nth; th++)
8271 gmx_domdec_ind_t *ind_p;
8272 int **ibuf_p, *ibuf_nalloc_p;
8274 int *nsend_p, *nat_p;
8280 /* Thread 0 writes in the comm buffers */
8282 ibuf_p = &comm->buf_int;
8283 ibuf_nalloc_p = &comm->nalloc_int;
8284 vbuf_p = &comm->vbuf;
8287 nsend_zone_p = &ind->nsend[zone];
8291 /* Other threads write into temp buffers */
8292 ind_p = &comm->dth[th].ind;
8293 ibuf_p = &comm->dth[th].ibuf;
8294 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8295 vbuf_p = &comm->dth[th].vbuf;
8296 nsend_p = &comm->dth[th].nsend;
8297 nat_p = &comm->dth[th].nat;
8298 nsend_zone_p = &comm->dth[th].nsend_zone;
8300 comm->dth[th].nsend = 0;
8301 comm->dth[th].nat = 0;
8302 comm->dth[th].nsend_zone = 0;
8312 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8313 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8316 /* Get the cg's for this pulse in this zone */
8317 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8319 dim, dim_ind, dim0, dim1, dim2,
8322 normal, skew_fac2_d, skew_fac_01,
8323 v_d, v_0, v_1, &corners, sf2_round,
8324 bDistBonded, bBondComm,
8328 ibuf_p, ibuf_nalloc_p,
8334 /* Append data of threads>=1 to the communication buffers */
8335 for (th = 1; th < comm->nth; th++)
8337 dd_comm_setup_work_t *dth;
8340 dth = &comm->dth[th];
8342 ns1 = nsend + dth->nsend_zone;
8343 if (ns1 > ind->nalloc)
8345 ind->nalloc = over_alloc_dd(ns1);
8346 srenew(ind->index, ind->nalloc);
8348 if (ns1 > comm->nalloc_int)
8350 comm->nalloc_int = over_alloc_dd(ns1);
8351 srenew(comm->buf_int, comm->nalloc_int);
8353 if (ns1 > comm->vbuf.nalloc)
8355 comm->vbuf.nalloc = over_alloc_dd(ns1);
8356 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8359 for (i = 0; i < dth->nsend_zone; i++)
8361 ind->index[nsend] = dth->ind.index[i];
8362 comm->buf_int[nsend] = dth->ibuf[i];
8363 copy_rvec(dth->vbuf.v[i],
8364 comm->vbuf.v[nsend]);
8368 ind->nsend[zone] += dth->nsend_zone;
8371 /* Clear the counts in case we do not have pbc */
8372 for (zone = nzone_send; zone < nzone; zone++)
8374 ind->nsend[zone] = 0;
8376 ind->nsend[nzone] = nsend;
8377 ind->nsend[nzone+1] = nat;
8378 /* Communicate the number of cg's and atoms to receive */
8379 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8380 ind->nsend, nzone+2,
8381 ind->nrecv, nzone+2);
8383 /* The rvec buffer is also required for atom buffers of size nsend
8384 * in dd_move_x and dd_move_f.
8386 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8390 /* We can receive in place if only the last zone is not empty */
8391 for (zone = 0; zone < nzone-1; zone++)
8393 if (ind->nrecv[zone] > 0)
8395 cd->bInPlace = FALSE;
8400 /* The int buffer is only required here for the cg indices */
8401 if (ind->nrecv[nzone] > comm->nalloc_int2)
8403 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8404 srenew(comm->buf_int2, comm->nalloc_int2);
8406 /* The rvec buffer is also required for atom buffers
8407 * of size nrecv in dd_move_x and dd_move_f.
8409 i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8410 vec_rvec_check_alloc(&comm->vbuf2, i);
8414 /* Make space for the global cg indices */
8415 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8416 || dd->cg_nalloc == 0)
8418 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8419 srenew(index_gl, dd->cg_nalloc);
8420 srenew(cgindex, dd->cg_nalloc+1);
8422 /* Communicate the global cg indices */
8425 recv_i = index_gl + pos_cg;
8429 recv_i = comm->buf_int2;
8431 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8432 comm->buf_int, nsend,
8433 recv_i, ind->nrecv[nzone]);
8435 /* Make space for cg_cm */
8436 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8437 if (fr->cutoff_scheme == ecutsGROUP)
8445 /* Communicate cg_cm */
8448 recv_vr = cg_cm + pos_cg;
8452 recv_vr = comm->vbuf2.v;
8454 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8455 comm->vbuf.v, nsend,
8456 recv_vr, ind->nrecv[nzone]);
8458 /* Make the charge group index */
8461 zone = (p == 0 ? 0 : nzone - 1);
8462 while (zone < nzone)
8464 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8466 cg_gl = index_gl[pos_cg];
8467 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8468 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8469 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8472 /* Update the charge group presence,
8473 * so we can use it in the next pass of the loop.
8475 comm->bLocalCG[cg_gl] = TRUE;
8481 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8484 zone_cg_range[nzone+zone] = pos_cg;
8489 /* This part of the code is never executed with bBondComm. */
8490 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8491 index_gl, recv_i, cg_cm, recv_vr,
8492 cgindex, fr->cginfo_mb, fr->cginfo);
8493 pos_cg += ind->nrecv[nzone];
8495 nat_tot += ind->nrecv[nzone+1];
8499 /* Store the atom block for easy copying of communication buffers */
8500 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8504 dd->index_gl = index_gl;
8505 dd->cgindex = cgindex;
8507 dd->ncg_tot = zone_cg_range[zones->n];
8508 dd->nat_tot = nat_tot;
8509 comm->nat[ddnatHOME] = dd->nat_home;
8510 for (i = ddnatZONE; i < ddnatNR; i++)
8512 comm->nat[i] = dd->nat_tot;
8517 /* We don't need to update cginfo, since that was alrady done above.
8518 * So we pass NULL for the forcerec.
8520 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8521 NULL, comm->bLocalCG);
8526 fprintf(debug, "Finished setting up DD communication, zones:");
8527 for (c = 0; c < zones->n; c++)
8529 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8531 fprintf(debug, "\n");
8535 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8539 for (c = 0; c < zones->nizone; c++)
8541 zones->izone[c].cg1 = zones->cg_range[c+1];
8542 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8543 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8547 static void set_zones_size(gmx_domdec_t *dd,
8548 matrix box, const gmx_ddbox_t *ddbox,
8549 int zone_start, int zone_end)
8551 gmx_domdec_comm_t *comm;
8552 gmx_domdec_zones_t *zones;
8554 int z, zi, zj0, zj1, d, dim;
8557 real size_j, add_tric;
8562 zones = &comm->zones;
8564 /* Do we need to determine extra distances for multi-body bondeds? */
8565 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8567 for (z = zone_start; z < zone_end; z++)
8569 /* Copy cell limits to zone limits.
8570 * Valid for non-DD dims and non-shifted dims.
8572 copy_rvec(comm->cell_x0, zones->size[z].x0);
8573 copy_rvec(comm->cell_x1, zones->size[z].x1);
8576 for (d = 0; d < dd->ndim; d++)
8580 for (z = 0; z < zones->n; z++)
8582 /* With a staggered grid we have different sizes
8583 * for non-shifted dimensions.
8585 if (dd->bGridJump && zones->shift[z][dim] == 0)
8589 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8590 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8594 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8595 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8601 rcmbs = comm->cutoff_mbody;
8602 if (ddbox->tric_dir[dim])
8604 rcs /= ddbox->skew_fac[dim];
8605 rcmbs /= ddbox->skew_fac[dim];
8608 /* Set the lower limit for the shifted zone dimensions */
8609 for (z = zone_start; z < zone_end; z++)
8611 if (zones->shift[z][dim] > 0)
8614 if (!dd->bGridJump || d == 0)
8616 zones->size[z].x0[dim] = comm->cell_x1[dim];
8617 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8621 /* Here we take the lower limit of the zone from
8622 * the lowest domain of the zone below.
8626 zones->size[z].x0[dim] =
8627 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8633 zones->size[z].x0[dim] =
8634 zones->size[zone_perm[2][z-4]].x0[dim];
8638 zones->size[z].x0[dim] =
8639 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8642 /* A temporary limit, is updated below */
8643 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8647 for (zi = 0; zi < zones->nizone; zi++)
8649 if (zones->shift[zi][dim] == 0)
8651 /* This takes the whole zone into account.
8652 * With multiple pulses this will lead
8653 * to a larger zone then strictly necessary.
8655 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8656 zones->size[zi].x1[dim]+rcmbs);
8664 /* Loop over the i-zones to set the upper limit of each
8667 for (zi = 0; zi < zones->nizone; zi++)
8669 if (zones->shift[zi][dim] == 0)
8671 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8673 if (zones->shift[z][dim] > 0)
8675 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8676 zones->size[zi].x1[dim]+rcs);
8683 for (z = zone_start; z < zone_end; z++)
8685 /* Initialization only required to keep the compiler happy */
8686 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8689 /* To determine the bounding box for a zone we need to find
8690 * the extreme corners of 4, 2 or 1 corners.
8692 nc = 1 << (ddbox->npbcdim - 1);
8694 for (c = 0; c < nc; c++)
8696 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8700 corner[YY] = zones->size[z].x0[YY];
8704 corner[YY] = zones->size[z].x1[YY];
8708 corner[ZZ] = zones->size[z].x0[ZZ];
8712 corner[ZZ] = zones->size[z].x1[ZZ];
8714 if (dd->ndim == 1 && box[ZZ][YY] != 0)
8716 /* With 1D domain decomposition the cg's are not in
8717 * the triclinic box, but triclinic x-y and rectangular y-z.
8718 * Shift y back, so it will later end up at 0.
8720 corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
8722 /* Apply the triclinic couplings */
8723 assert(ddbox->npbcdim <= DIM);
8724 for (i = YY; i < ddbox->npbcdim; i++)
8726 for (j = XX; j < i; j++)
8728 corner[j] += corner[i]*box[i][j]/box[i][i];
8733 copy_rvec(corner, corner_min);
8734 copy_rvec(corner, corner_max);
8738 for (i = 0; i < DIM; i++)
8740 corner_min[i] = min(corner_min[i], corner[i]);
8741 corner_max[i] = max(corner_max[i], corner[i]);
8745 /* Copy the extreme cornes without offset along x */
8746 for (i = 0; i < DIM; i++)
8748 zones->size[z].bb_x0[i] = corner_min[i];
8749 zones->size[z].bb_x1[i] = corner_max[i];
8751 /* Add the offset along x */
8752 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8753 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8756 if (zone_start == 0)
8759 for (dim = 0; dim < DIM; dim++)
8761 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8763 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
8768 for (z = zone_start; z < zone_end; z++)
8770 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8772 zones->size[z].x0[XX], zones->size[z].x1[XX],
8773 zones->size[z].x0[YY], zones->size[z].x1[YY],
8774 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8775 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8777 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8778 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8779 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8784 static int comp_cgsort(const void *a, const void *b)
8788 gmx_cgsort_t *cga, *cgb;
8789 cga = (gmx_cgsort_t *)a;
8790 cgb = (gmx_cgsort_t *)b;
8792 comp = cga->nsc - cgb->nsc;
8795 comp = cga->ind_gl - cgb->ind_gl;
8801 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8806 /* Order the data */
8807 for (i = 0; i < n; i++)
8809 buf[i] = a[sort[i].ind];
8812 /* Copy back to the original array */
8813 for (i = 0; i < n; i++)
8819 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8824 /* Order the data */
8825 for (i = 0; i < n; i++)
8827 copy_rvec(v[sort[i].ind], buf[i]);
8830 /* Copy back to the original array */
8831 for (i = 0; i < n; i++)
8833 copy_rvec(buf[i], v[i]);
8837 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8840 int a, atot, cg, cg0, cg1, i;
8842 if (cgindex == NULL)
8844 /* Avoid the useless loop of the atoms within a cg */
8845 order_vec_cg(ncg, sort, v, buf);
8850 /* Order the data */
8852 for (cg = 0; cg < ncg; cg++)
8854 cg0 = cgindex[sort[cg].ind];
8855 cg1 = cgindex[sort[cg].ind+1];
8856 for (i = cg0; i < cg1; i++)
8858 copy_rvec(v[i], buf[a]);
8864 /* Copy back to the original array */
8865 for (a = 0; a < atot; a++)
8867 copy_rvec(buf[a], v[a]);
8871 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8872 int nsort_new, gmx_cgsort_t *sort_new,
8873 gmx_cgsort_t *sort1)
8877 /* The new indices are not very ordered, so we qsort them */
8878 gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8880 /* sort2 is already ordered, so now we can merge the two arrays */
8884 while (i2 < nsort2 || i_new < nsort_new)
8888 sort1[i1++] = sort_new[i_new++];
8890 else if (i_new == nsort_new)
8892 sort1[i1++] = sort2[i2++];
8894 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8895 (sort2[i2].nsc == sort_new[i_new].nsc &&
8896 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8898 sort1[i1++] = sort2[i2++];
8902 sort1[i1++] = sort_new[i_new++];
8907 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8909 gmx_domdec_sort_t *sort;
8910 gmx_cgsort_t *cgsort, *sort_i;
8911 int ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
8912 int sort_last, sort_skip;
8914 sort = dd->comm->sort;
8916 a = fr->ns.grid->cell_index;
8918 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns.grid->ncells;
8920 if (ncg_home_old >= 0)
8922 /* The charge groups that remained in the same ns grid cell
8923 * are completely ordered. So we can sort efficiently by sorting
8924 * the charge groups that did move into the stationary list.
8929 for (i = 0; i < dd->ncg_home; i++)
8931 /* Check if this cg did not move to another node */
8934 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8936 /* This cg is new on this node or moved ns grid cell */
8937 if (nsort_new >= sort->sort_new_nalloc)
8939 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8940 srenew(sort->sort_new, sort->sort_new_nalloc);
8942 sort_i = &(sort->sort_new[nsort_new++]);
8946 /* This cg did not move */
8947 sort_i = &(sort->sort2[nsort2++]);
8949 /* Sort on the ns grid cell indices
8950 * and the global topology index.
8951 * index_gl is irrelevant with cell ns,
8952 * but we set it here anyhow to avoid a conditional.
8955 sort_i->ind_gl = dd->index_gl[i];
8962 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8965 /* Sort efficiently */
8966 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8971 cgsort = sort->sort;
8973 for (i = 0; i < dd->ncg_home; i++)
8975 /* Sort on the ns grid cell indices
8976 * and the global topology index
8978 cgsort[i].nsc = a[i];
8979 cgsort[i].ind_gl = dd->index_gl[i];
8981 if (cgsort[i].nsc < moved)
8988 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
8990 /* Determine the order of the charge groups using qsort */
8991 gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
8997 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
9000 int ncg_new, i, *a, na;
9002 sort = dd->comm->sort->sort;
9004 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
9007 for (i = 0; i < na; i++)
9011 sort[ncg_new].ind = a[i];
9019 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
9022 gmx_domdec_sort_t *sort;
9023 gmx_cgsort_t *cgsort, *sort_i;
9025 int ncg_new, i, *ibuf, cgsize;
9028 sort = dd->comm->sort;
9030 if (dd->ncg_home > sort->sort_nalloc)
9032 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
9033 srenew(sort->sort, sort->sort_nalloc);
9034 srenew(sort->sort2, sort->sort_nalloc);
9036 cgsort = sort->sort;
9038 switch (fr->cutoff_scheme)
9041 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
9044 ncg_new = dd_sort_order_nbnxn(dd, fr);
9047 gmx_incons("unimplemented");
9051 /* We alloc with the old size, since cgindex is still old */
9052 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9053 vbuf = dd->comm->vbuf.v;
9057 cgindex = dd->cgindex;
9064 /* Remove the charge groups which are no longer at home here */
9065 dd->ncg_home = ncg_new;
9068 fprintf(debug, "Set the new home charge group count to %d\n",
9072 /* Reorder the state */
9073 for (i = 0; i < estNR; i++)
9075 if (EST_DISTR(i) && (state->flags & (1<<i)))
9080 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
9083 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
9086 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->sd_X, vbuf);
9089 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
9093 case estDISRE_INITF:
9094 case estDISRE_RM3TAV:
9095 case estORIRE_INITF:
9097 /* No ordering required */
9100 gmx_incons("Unknown state entry encountered in dd_sort_state");
9105 if (fr->cutoff_scheme == ecutsGROUP)
9108 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9111 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9113 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9114 srenew(sort->ibuf, sort->ibuf_nalloc);
9117 /* Reorder the global cg index */
9118 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9119 /* Reorder the cginfo */
9120 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9121 /* Rebuild the local cg index */
9125 for (i = 0; i < dd->ncg_home; i++)
9127 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9128 ibuf[i+1] = ibuf[i] + cgsize;
9130 for (i = 0; i < dd->ncg_home+1; i++)
9132 dd->cgindex[i] = ibuf[i];
9137 for (i = 0; i < dd->ncg_home+1; i++)
9142 /* Set the home atom number */
9143 dd->nat_home = dd->cgindex[dd->ncg_home];
9145 if (fr->cutoff_scheme == ecutsVERLET)
9147 /* The atoms are now exactly in grid order, update the grid order */
9148 nbnxn_set_atomorder(fr->nbv->nbs);
9152 /* Copy the sorted ns cell indices back to the ns grid struct */
9153 for (i = 0; i < dd->ncg_home; i++)
9155 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
9157 fr->ns.grid->nr = dd->ncg_home;
9161 static void add_dd_statistics(gmx_domdec_t *dd)
9163 gmx_domdec_comm_t *comm;
9168 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9170 comm->sum_nat[ddnat-ddnatZONE] +=
9171 comm->nat[ddnat] - comm->nat[ddnat-1];
9176 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9178 gmx_domdec_comm_t *comm;
9183 /* Reset all the statistics and counters for total run counting */
9184 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9186 comm->sum_nat[ddnat-ddnatZONE] = 0;
9190 comm->load_step = 0;
9193 clear_ivec(comm->load_lim);
9198 void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
9200 gmx_domdec_comm_t *comm;
9204 comm = cr->dd->comm;
9206 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9213 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9215 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9217 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9222 " av. #atoms communicated per step for force: %d x %.1f\n",
9226 if (cr->dd->vsite_comm)
9229 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9230 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9235 if (cr->dd->constraint_comm)
9238 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9239 1 + ir->nLincsIter, av);
9243 gmx_incons(" Unknown type for DD statistics");
9246 fprintf(fplog, "\n");
9248 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9250 print_dd_load_av(fplog, cr->dd);
9254 void dd_partition_system(FILE *fplog,
9257 gmx_bool bMasterState,
9259 t_state *state_global,
9260 gmx_mtop_t *top_global,
9262 t_state *state_local,
9265 gmx_localtop_t *top_local,
9268 gmx_shellfc_t shellfc,
9269 gmx_constr_t constr,
9271 gmx_wallcycle_t wcycle,
9275 gmx_domdec_comm_t *comm;
9276 gmx_ddbox_t ddbox = {0};
9278 gmx_int64_t step_pcoupl;
9279 rvec cell_ns_x0, cell_ns_x1;
9280 int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9281 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
9282 gmx_bool bRedist, bSortCG, bResortAll;
9283 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9290 bBoxChanged = (bMasterState || DEFORM(*ir));
9291 if (ir->epc != epcNO)
9293 /* With nstpcouple > 1 pressure coupling happens.
9294 * one step after calculating the pressure.
9295 * Box scaling happens at the end of the MD step,
9296 * after the DD partitioning.
9297 * We therefore have to do DLB in the first partitioning
9298 * after an MD step where P-coupling occured.
9299 * We need to determine the last step in which p-coupling occurred.
9300 * MRS -- need to validate this for vv?
9305 step_pcoupl = step - 1;
9309 step_pcoupl = ((step - 1)/n)*n + 1;
9311 if (step_pcoupl >= comm->partition_step)
9317 bNStGlobalComm = (step % nstglobalcomm == 0);
9319 if (!comm->bDynLoadBal)
9325 /* Should we do dynamic load balacing this step?
9326 * Since it requires (possibly expensive) global communication,
9327 * we might want to do DLB less frequently.
9329 if (bBoxChanged || ir->epc != epcNO)
9331 bDoDLB = bBoxChanged;
9335 bDoDLB = bNStGlobalComm;
9339 /* Check if we have recorded loads on the nodes */
9340 if (comm->bRecordLoad && dd_load_count(comm))
9342 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
9344 /* Check if we should use DLB at the second partitioning
9345 * and every 100 partitionings,
9346 * so the extra communication cost is negligible.
9348 n = max(100, nstglobalcomm);
9349 bCheckDLB = (comm->n_load_collect == 0 ||
9350 comm->n_load_have % n == n-1);
9357 /* Print load every nstlog, first and last step to the log file */
9358 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9359 comm->n_load_collect == 0 ||
9361 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9363 /* Avoid extra communication due to verbose screen output
9364 * when nstglobalcomm is set.
9366 if (bDoDLB || bLogLoad || bCheckDLB ||
9367 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9369 get_load_distribution(dd, wcycle);
9374 dd_print_load(fplog, dd, step-1);
9378 dd_print_load_verbose(dd);
9381 comm->n_load_collect++;
9385 /* Since the timings are node dependent, the master decides */
9389 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
9392 fprintf(debug, "step %s, imb loss %f\n",
9393 gmx_step_str(step, sbuf),
9394 dd_force_imb_perf_loss(dd));
9397 dd_bcast(dd, sizeof(bTurnOnDLB), &bTurnOnDLB);
9400 turn_on_dlb(fplog, cr, step);
9405 comm->n_load_have++;
9408 cgs_gl = &comm->cgs_gl;
9413 /* Clear the old state */
9414 clear_dd_indices(dd, 0, 0);
9417 set_ddbox(dd, bMasterState, cr, ir, state_global->box,
9418 TRUE, cgs_gl, state_global->x, &ddbox);
9420 get_cg_distribution(fplog, step, dd, cgs_gl,
9421 state_global->box, &ddbox, state_global->x);
9423 dd_distribute_state(dd, cgs_gl,
9424 state_global, state_local, f);
9426 dd_make_local_cgs(dd, &top_local->cgs);
9428 /* Ensure that we have space for the new distribution */
9429 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9431 if (fr->cutoff_scheme == ecutsGROUP)
9433 calc_cgcm(fplog, 0, dd->ncg_home,
9434 &top_local->cgs, state_local->x, fr->cg_cm);
9437 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9439 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9441 else if (state_local->ddp_count != dd->ddp_count)
9443 if (state_local->ddp_count > dd->ddp_count)
9445 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9448 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9450 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9453 /* Clear the old state */
9454 clear_dd_indices(dd, 0, 0);
9456 /* Build the new indices */
9457 rebuild_cgindex(dd, cgs_gl->index, state_local);
9458 make_dd_indices(dd, cgs_gl->index, 0);
9459 ncgindex_set = dd->ncg_home;
9461 if (fr->cutoff_scheme == ecutsGROUP)
9463 /* Redetermine the cg COMs */
9464 calc_cgcm(fplog, 0, dd->ncg_home,
9465 &top_local->cgs, state_local->x, fr->cg_cm);
9468 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9470 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9472 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9473 TRUE, &top_local->cgs, state_local->x, &ddbox);
9475 bRedist = comm->bDynLoadBal;
9479 /* We have the full state, only redistribute the cgs */
9481 /* Clear the non-home indices */
9482 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9485 /* Avoid global communication for dim's without pbc and -gcom */
9486 if (!bNStGlobalComm)
9488 copy_rvec(comm->box0, ddbox.box0 );
9489 copy_rvec(comm->box_size, ddbox.box_size);
9491 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9492 bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
9497 /* For dim's without pbc and -gcom */
9498 copy_rvec(ddbox.box0, comm->box0 );
9499 copy_rvec(ddbox.box_size, comm->box_size);
9501 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9504 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9506 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9509 /* Check if we should sort the charge groups */
9510 if (comm->nstSortCG > 0)
9512 bSortCG = (bMasterState ||
9513 (bRedist && (step % comm->nstSortCG == 0)));
9520 ncg_home_old = dd->ncg_home;
9525 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9527 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9529 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9531 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9534 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9536 &comm->cell_x0, &comm->cell_x1,
9537 dd->ncg_home, fr->cg_cm,
9538 cell_ns_x0, cell_ns_x1, &grid_density);
9542 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9545 switch (fr->cutoff_scheme)
9548 copy_ivec(fr->ns.grid->n, ncells_old);
9549 grid_first(fplog, fr->ns.grid, dd, &ddbox,
9550 state_local->box, cell_ns_x0, cell_ns_x1,
9551 fr->rlistlong, grid_density);
9554 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9557 gmx_incons("unimplemented");
9559 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9560 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9564 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9566 /* Sort the state on charge group position.
9567 * This enables exact restarts from this step.
9568 * It also improves performance by about 15% with larger numbers
9569 * of atoms per node.
9572 /* Fill the ns grid with the home cell,
9573 * so we can sort with the indices.
9575 set_zones_ncg_home(dd);
9577 switch (fr->cutoff_scheme)
9580 set_zones_size(dd, state_local->box, &ddbox, 0, 1);
9582 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9584 comm->zones.size[0].bb_x0,
9585 comm->zones.size[0].bb_x1,
9587 comm->zones.dens_zone0,
9590 ncg_moved, bRedist ? comm->moved : NULL,
9591 fr->nbv->grp[eintLocal].kernel_type,
9592 fr->nbv->grp[eintLocal].nbat);
9594 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9597 fill_grid(&comm->zones, fr->ns.grid, dd->ncg_home,
9598 0, dd->ncg_home, fr->cg_cm);
9600 copy_ivec(fr->ns.grid->n, ncells_new);
9603 gmx_incons("unimplemented");
9606 bResortAll = bMasterState;
9608 /* Check if we can user the old order and ns grid cell indices
9609 * of the charge groups to sort the charge groups efficiently.
9611 if (ncells_new[XX] != ncells_old[XX] ||
9612 ncells_new[YY] != ncells_old[YY] ||
9613 ncells_new[ZZ] != ncells_old[ZZ])
9620 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9621 gmx_step_str(step, sbuf), dd->ncg_home);
9623 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9624 bResortAll ? -1 : ncg_home_old);
9625 /* Rebuild all the indices */
9626 ga2la_clear(dd->ga2la);
9629 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9632 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9634 /* Setup up the communication and communicate the coordinates */
9635 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9637 /* Set the indices */
9638 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9640 /* Set the charge group boundaries for neighbor searching */
9641 set_cg_boundaries(&comm->zones);
9643 if (fr->cutoff_scheme == ecutsVERLET)
9645 set_zones_size(dd, state_local->box, &ddbox,
9646 bSortCG ? 1 : 0, comm->zones.n);
9649 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9652 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9653 -1,state_local->x,state_local->box);
9656 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9658 /* Extract a local topology from the global topology */
9659 for (i = 0; i < dd->ndim; i++)
9661 np[dd->dim[i]] = comm->cd[i].np;
9663 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9664 comm->cellsize_min, np,
9666 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
9667 vsite, top_global, top_local);
9669 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9671 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9673 /* Set up the special atom communication */
9674 n = comm->nat[ddnatZONE];
9675 for (i = ddnatZONE+1; i < ddnatNR; i++)
9680 if (vsite && vsite->n_intercg_vsite)
9682 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9686 if (dd->bInterCGcons || dd->bInterCGsettles)
9688 /* Only for inter-cg constraints we need special code */
9689 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9690 constr, ir->nProjOrder,
9691 top_local->idef.il);
9695 gmx_incons("Unknown special atom type setup");
9700 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9702 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9704 /* Make space for the extra coordinates for virtual site
9705 * or constraint communication.
9707 state_local->natoms = comm->nat[ddnatNR-1];
9708 if (state_local->natoms > state_local->nalloc)
9710 dd_realloc_state(state_local, f, state_local->natoms);
9713 if (fr->bF_NoVirSum)
9715 if (vsite && vsite->n_intercg_vsite)
9717 nat_f_novirsum = comm->nat[ddnatVSITE];
9721 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9723 nat_f_novirsum = dd->nat_tot;
9727 nat_f_novirsum = dd->nat_home;
9736 /* Set the number of atoms required for the force calculation.
9737 * Forces need to be constrained when using a twin-range setup
9738 * or with energy minimization. For simple simulations we could
9739 * avoid some allocation, zeroing and copying, but this is
9740 * probably not worth the complications ande checking.
9742 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9743 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9745 /* We make the all mdatoms up to nat_tot_con.
9746 * We could save some work by only setting invmass
9747 * between nat_tot and nat_tot_con.
9749 /* This call also sets the new number of home particles to dd->nat_home */
9750 atoms2md(top_global, ir,
9751 comm->nat[ddnatCON], dd->gatindex, dd->nat_home, mdatoms);
9753 /* Now we have the charges we can sort the FE interactions */
9754 dd_sort_local_top(dd, mdatoms, top_local);
9758 /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
9759 split_vsites_over_threads(top_local->idef.il, mdatoms, FALSE, vsite);
9764 /* Make the local shell stuff, currently no communication is done */
9765 make_local_shells(cr, mdatoms, shellfc);
9768 if (ir->implicit_solvent)
9770 make_local_gb(cr, fr->born, ir->gb_algorithm);
9773 setup_bonded_threading(fr, &top_local->idef);
9775 if (!(cr->duty & DUTY_PME))
9777 /* Send the charges and/or c6/sigmas to our PME only node */
9778 gmx_pme_send_parameters(cr, mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
9779 mdatoms->chargeA, mdatoms->chargeB,
9780 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
9781 mdatoms->sigmaA, mdatoms->sigmaB,
9782 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9787 set_constraints(constr, top_local, ir, mdatoms, cr);
9790 if (ir->ePull != epullNO)
9792 /* Update the local pull groups */
9793 dd_make_local_pull_groups(dd, ir->pull, mdatoms);
9798 /* Update the local rotation groups */
9799 dd_make_local_rotation_groups(dd, ir->rot);
9802 if (ir->eSwapCoords != eswapNO)
9804 /* Update the local groups needed for ion swapping */
9805 dd_make_local_swap_groups(dd, ir->swap);
9808 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
9809 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
9811 add_dd_statistics(dd);
9813 /* Make sure we only count the cycles for this DD partitioning */
9814 clear_dd_cycle_counts(dd);
9816 /* Because the order of the atoms might have changed since
9817 * the last vsite construction, we need to communicate the constructing
9818 * atom coordinates again (for spreading the forces this MD step).
9820 dd_move_x_vsites(dd, state_local->box, state_local->x);
9822 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9824 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9826 dd_move_x(dd, state_local->box, state_local->x);
9827 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9828 -1, state_local->x, state_local->box);
9831 /* Store the partitioning step */
9832 comm->partition_step = step;
9834 /* Increase the DD partitioning counter */
9836 /* The state currently matches this DD partitioning count, store it */
9837 state_local->ddp_count = dd->ddp_count;
9840 /* The DD master node knows the complete cg distribution,
9841 * store the count so we can possibly skip the cg info communication.
9843 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9846 if (comm->DD_debug > 0)
9848 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9849 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9850 "after partitioning");