2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
47 #include "gromacs/math/vec.h"
49 #include "domdec_network.h"
51 #include "chargegroup.h"
60 #include "gromacs/topology/mtop_util.h"
61 #include "gmx_ga2la.h"
63 #include "nbnxn_search.h"
65 #include "gmx_omp_nthreads.h"
66 #include "gpu_utils.h"
68 #include "gromacs/utility/futil.h"
69 #include "gromacs/fileio/gmxfio.h"
70 #include "gromacs/fileio/pdbio.h"
71 #include "gromacs/imd/imd.h"
72 #include "gromacs/pbcutil/ishift.h"
73 #include "gromacs/pbcutil/pbc.h"
74 #include "gromacs/pulling/pull.h"
75 #include "gromacs/pulling/pull_rotation.h"
76 #include "gromacs/swap/swapcoords.h"
77 #include "gromacs/timing/wallcycle.h"
78 #include "gromacs/utility/basenetwork.h"
79 #include "gromacs/utility/fatalerror.h"
80 #include "gromacs/utility/gmxmpi.h"
81 #include "gromacs/utility/qsort_threadsafe.h"
82 #include "gromacs/utility/smalloc.h"
84 #define DDRANK(dd, rank) (rank)
85 #define DDMASTERRANK(dd) (dd->masterrank)
87 typedef struct gmx_domdec_master
89 /* The cell boundaries */
91 /* The global charge group division */
92 int *ncg; /* Number of home charge groups for each node */
93 int *index; /* Index of nnodes+1 into cg */
94 int *cg; /* Global charge group index */
95 int *nat; /* Number of home atoms for each node. */
96 int *ibuf; /* Buffer for communication */
97 rvec *vbuf; /* Buffer for state scattering and gathering */
98 } gmx_domdec_master_t;
102 /* The numbers of charge groups to send and receive for each cell
103 * that requires communication, the last entry contains the total
104 * number of atoms that needs to be communicated.
106 int nsend[DD_MAXIZONE+2];
107 int nrecv[DD_MAXIZONE+2];
108 /* The charge groups to send */
111 /* The atom range for non-in-place communication */
112 int cell2at0[DD_MAXIZONE];
113 int cell2at1[DD_MAXIZONE];
118 int np; /* Number of grid pulses in this dimension */
119 int np_dlb; /* For dlb, for use with edlbAUTO */
120 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
122 gmx_bool bInPlace; /* Can we communicate in place? */
123 } gmx_domdec_comm_dim_t;
127 gmx_bool *bCellMin; /* Temp. var.: is this cell size at the limit */
128 real *cell_f; /* State var.: cell boundaries, box relative */
129 real *old_cell_f; /* Temp. var.: old cell size */
130 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
131 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
132 real *bound_min; /* Temp. var.: lower limit for cell boundary */
133 real *bound_max; /* Temp. var.: upper limit for cell boundary */
134 gmx_bool bLimited; /* State var.: is DLB limited in this dim and row */
135 real *buf_ncd; /* Temp. var. */
138 #define DD_NLOAD_MAX 9
140 /* Here floats are accurate enough, since these variables
141 * only influence the load balancing, not the actual MD results.
168 gmx_cgsort_t *sort_new;
180 /* This enum determines the order of the coordinates.
181 * ddnatHOME and ddnatZONE should be first and second,
182 * the others can be ordered as wanted.
185 ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR
189 edlbAUTO, edlbNO, edlbYES, edlbNR
191 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
195 int dim; /* The dimension */
196 gmx_bool dim_match; /* Tells if DD and PME dims match */
197 int nslab; /* The number of PME slabs in this dimension */
198 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
199 int *pp_min; /* The minimum pp node location, size nslab */
200 int *pp_max; /* The maximum pp node location,size nslab */
201 int maxshift; /* The maximum shift for coordinate redistribution in PME */
206 real min0; /* The minimum bottom of this zone */
207 real max1; /* The maximum top of this zone */
208 real min1; /* The minimum top of this zone */
209 real mch0; /* The maximum bottom communicaton height for this zone */
210 real mch1; /* The maximum top communicaton height for this zone */
211 real p1_0; /* The bottom value of the first cell in this zone */
212 real p1_1; /* The top value of the first cell in this zone */
217 gmx_domdec_ind_t ind;
224 } dd_comm_setup_work_t;
226 typedef struct gmx_domdec_comm
228 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
229 * unless stated otherwise.
232 /* The number of decomposition dimensions for PME, 0: no PME */
234 /* The number of nodes doing PME (PP/PME or only PME) */
238 /* The communication setup including the PME only nodes */
239 gmx_bool bCartesianPP_PME;
242 int *pmenodes; /* size npmenodes */
243 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
244 * but with bCartesianPP_PME */
245 gmx_ddpme_t ddpme[2];
247 /* The DD particle-particle nodes only */
248 gmx_bool bCartesianPP;
249 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
251 /* The global charge groups */
254 /* Should we sort the cgs */
256 gmx_domdec_sort_t *sort;
258 /* Are there charge groups? */
261 /* Are there bonded and multi-body interactions between charge groups? */
262 gmx_bool bInterCGBondeds;
263 gmx_bool bInterCGMultiBody;
265 /* Data for the optional bonded interaction atom communication range */
272 /* Are we actually using DLB? */
273 gmx_bool bDynLoadBal;
275 /* Cell sizes for static load balancing, first index cartesian */
278 /* The width of the communicated boundaries */
281 /* The minimum cell size (including triclinic correction) */
283 /* For dlb, for use with edlbAUTO */
284 rvec cellsize_min_dlb;
285 /* The lower limit for the DD cell size with DLB */
287 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
288 gmx_bool bVacDLBNoLimit;
290 /* With PME load balancing we set limits on DLB */
291 gmx_bool bPMELoadBalDLBLimits;
292 /* DLB needs to take into account that we want to allow this maximum
293 * cut-off (for PME load balancing), this could limit cell boundaries.
295 real PMELoadBal_max_cutoff;
297 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
299 /* box0 and box_size are required with dim's without pbc and -gcom */
303 /* The cell boundaries */
307 /* The old location of the cell boundaries, to check cg displacements */
311 /* The communication setup and charge group boundaries for the zones */
312 gmx_domdec_zones_t zones;
314 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
315 * cell boundaries of neighboring cells for dynamic load balancing.
317 gmx_ddzone_t zone_d1[2];
318 gmx_ddzone_t zone_d2[2][2];
320 /* The coordinate/force communication setup and indices */
321 gmx_domdec_comm_dim_t cd[DIM];
322 /* The maximum number of cells to communicate with in one dimension */
325 /* Which cg distribution is stored on the master node */
326 int master_cg_ddp_count;
328 /* The number of cg's received from the direct neighbors */
329 int zone_ncg1[DD_MAXZONE];
331 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
334 /* Array for signalling if atoms have moved to another domain */
338 /* Communication buffer for general use */
342 /* Communication buffer for general use */
345 /* Temporary storage for thread parallel communication setup */
347 dd_comm_setup_work_t *dth;
349 /* Communication buffers only used with multiple grid pulses */
354 /* Communication buffers for local redistribution */
356 int cggl_flag_nalloc[DIM*2];
358 int cgcm_state_nalloc[DIM*2];
360 /* Cell sizes for dynamic load balancing */
361 gmx_domdec_root_t **root;
365 real cell_f_max0[DIM];
366 real cell_f_min1[DIM];
368 /* Stuff for load communication */
369 gmx_bool bRecordLoad;
370 gmx_domdec_load_t *load;
371 int nrank_gpu_shared;
373 MPI_Comm *mpi_comm_load;
374 MPI_Comm mpi_comm_gpu_shared;
377 /* Maximum DLB scaling per load balancing step in percent */
381 float cycl[ddCyclNr];
382 int cycl_n[ddCyclNr];
383 float cycl_max[ddCyclNr];
384 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
388 /* Have often have did we have load measurements */
390 /* Have often have we collected the load measurements */
394 double sum_nat[ddnatNR-ddnatZONE];
404 /* The last partition step */
405 gmx_int64_t partition_step;
413 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
416 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
417 #define DD_FLAG_NRCG 65535
418 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
419 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
421 /* Zone permutation required to obtain consecutive charge groups
422 * for neighbor searching.
424 static const int zone_perm[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
426 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
427 * components see only j zones with that component 0.
430 /* The DD zone order */
431 static const ivec dd_zo[DD_MAXZONE] =
432 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
437 static const ivec dd_zp3[dd_zp3n] = {{0, 0, 8}, {1, 3, 6}, {2, 5, 6}, {3, 5, 7}};
442 static const ivec dd_zp2[dd_zp2n] = {{0, 0, 4}, {1, 3, 4}};
447 static const ivec dd_zp1[dd_zp1n] = {{0, 0, 2}};
449 /* Factors used to avoid problems due to rounding issues */
450 #define DD_CELL_MARGIN 1.0001
451 #define DD_CELL_MARGIN2 1.00005
452 /* Factor to account for pressure scaling during nstlist steps */
453 #define DD_PRES_SCALE_MARGIN 1.02
455 /* Turn on DLB when the load imbalance causes this amount of total loss.
456 * There is a bit of overhead with DLB and it's difficult to achieve
457 * a load imbalance of less than 2% with DLB.
459 #define DD_PERF_LOSS_DLB_ON 0.02
461 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
462 #define DD_PERF_LOSS_WARN 0.05
464 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
466 /* Use separate MPI send and receive commands
467 * when nnodes <= GMX_DD_NNODES_SENDRECV.
468 * This saves memory (and some copying for small nnodes).
469 * For high parallelization scatter and gather calls are used.
471 #define GMX_DD_NNODES_SENDRECV 4
475 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
477 static void index2xyz(ivec nc,int ind,ivec xyz)
479 xyz[XX] = ind % nc[XX];
480 xyz[YY] = (ind / nc[XX]) % nc[YY];
481 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
485 /* This order is required to minimize the coordinate communication in PME
486 * which uses decomposition in the x direction.
488 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
490 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
492 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
493 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
494 xyz[ZZ] = ind % nc[ZZ];
497 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
502 ddindex = dd_index(dd->nc, c);
503 if (dd->comm->bCartesianPP_PME)
505 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
507 else if (dd->comm->bCartesianPP)
510 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
521 static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox, t_inputrec *ir)
523 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
526 int ddglatnr(gmx_domdec_t *dd, int i)
536 if (i >= dd->comm->nat[ddnatNR-1])
538 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
540 atnr = dd->gatindex[i] + 1;
546 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
548 return &dd->comm->cgs_gl;
551 static void vec_rvec_init(vec_rvec_t *v)
557 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
561 v->nalloc = over_alloc_dd(n);
562 srenew(v->v, v->nalloc);
566 void dd_store_state(gmx_domdec_t *dd, t_state *state)
570 if (state->ddp_count != dd->ddp_count)
572 gmx_incons("The state does not the domain decomposition state");
575 state->ncg_gl = dd->ncg_home;
576 if (state->ncg_gl > state->cg_gl_nalloc)
578 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
579 srenew(state->cg_gl, state->cg_gl_nalloc);
581 for (i = 0; i < state->ncg_gl; i++)
583 state->cg_gl[i] = dd->index_gl[i];
586 state->ddp_count_cg_gl = dd->ddp_count;
589 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
591 return &dd->comm->zones;
594 void dd_get_ns_ranges(gmx_domdec_t *dd, int icg,
595 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
597 gmx_domdec_zones_t *zones;
600 zones = &dd->comm->zones;
603 while (icg >= zones->izone[izone].cg1)
612 else if (izone < zones->nizone)
614 *jcg0 = zones->izone[izone].jcg0;
618 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
619 icg, izone, zones->nizone);
622 *jcg1 = zones->izone[izone].jcg1;
624 for (d = 0; d < dd->ndim; d++)
627 shift0[dim] = zones->izone[izone].shift0[dim];
628 shift1[dim] = zones->izone[izone].shift1[dim];
629 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
631 /* A conservative approach, this can be optimized */
638 int dd_natoms_vsite(gmx_domdec_t *dd)
640 return dd->comm->nat[ddnatVSITE];
643 void dd_get_constraint_range(gmx_domdec_t *dd, int *at_start, int *at_end)
645 *at_start = dd->comm->nat[ddnatCON-1];
646 *at_end = dd->comm->nat[ddnatCON];
649 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
651 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
652 int *index, *cgindex;
653 gmx_domdec_comm_t *comm;
654 gmx_domdec_comm_dim_t *cd;
655 gmx_domdec_ind_t *ind;
656 rvec shift = {0, 0, 0}, *buf, *rbuf;
657 gmx_bool bPBC, bScrew;
661 cgindex = dd->cgindex;
666 nat_tot = dd->nat_home;
667 for (d = 0; d < dd->ndim; d++)
669 bPBC = (dd->ci[dd->dim[d]] == 0);
670 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
673 copy_rvec(box[dd->dim[d]], shift);
676 for (p = 0; p < cd->np; p++)
683 for (i = 0; i < ind->nsend[nzone]; i++)
685 at0 = cgindex[index[i]];
686 at1 = cgindex[index[i]+1];
687 for (j = at0; j < at1; j++)
689 copy_rvec(x[j], buf[n]);
696 for (i = 0; i < ind->nsend[nzone]; i++)
698 at0 = cgindex[index[i]];
699 at1 = cgindex[index[i]+1];
700 for (j = at0; j < at1; j++)
702 /* We need to shift the coordinates */
703 rvec_add(x[j], shift, buf[n]);
710 for (i = 0; i < ind->nsend[nzone]; i++)
712 at0 = cgindex[index[i]];
713 at1 = cgindex[index[i]+1];
714 for (j = at0; j < at1; j++)
717 buf[n][XX] = x[j][XX] + shift[XX];
719 * This operation requires a special shift force
720 * treatment, which is performed in calc_vir.
722 buf[n][YY] = box[YY][YY] - x[j][YY];
723 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
735 rbuf = comm->vbuf2.v;
737 /* Send and receive the coordinates */
738 dd_sendrecv_rvec(dd, d, dddirBackward,
739 buf, ind->nsend[nzone+1],
740 rbuf, ind->nrecv[nzone+1]);
744 for (zone = 0; zone < nzone; zone++)
746 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
748 copy_rvec(rbuf[j], x[i]);
753 nat_tot += ind->nrecv[nzone+1];
759 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
761 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
762 int *index, *cgindex;
763 gmx_domdec_comm_t *comm;
764 gmx_domdec_comm_dim_t *cd;
765 gmx_domdec_ind_t *ind;
769 gmx_bool bPBC, bScrew;
773 cgindex = dd->cgindex;
778 nzone = comm->zones.n/2;
779 nat_tot = dd->nat_tot;
780 for (d = dd->ndim-1; d >= 0; d--)
782 bPBC = (dd->ci[dd->dim[d]] == 0);
783 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
784 if (fshift == NULL && !bScrew)
788 /* Determine which shift vector we need */
794 for (p = cd->np-1; p >= 0; p--)
797 nat_tot -= ind->nrecv[nzone+1];
804 sbuf = comm->vbuf2.v;
806 for (zone = 0; zone < nzone; zone++)
808 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
810 copy_rvec(f[i], sbuf[j]);
815 /* Communicate the forces */
816 dd_sendrecv_rvec(dd, d, dddirForward,
817 sbuf, ind->nrecv[nzone+1],
818 buf, ind->nsend[nzone+1]);
820 /* Add the received forces */
824 for (i = 0; i < ind->nsend[nzone]; i++)
826 at0 = cgindex[index[i]];
827 at1 = cgindex[index[i]+1];
828 for (j = at0; j < at1; j++)
830 rvec_inc(f[j], buf[n]);
837 for (i = 0; i < ind->nsend[nzone]; i++)
839 at0 = cgindex[index[i]];
840 at1 = cgindex[index[i]+1];
841 for (j = at0; j < at1; j++)
843 rvec_inc(f[j], buf[n]);
844 /* Add this force to the shift force */
845 rvec_inc(fshift[is], buf[n]);
852 for (i = 0; i < ind->nsend[nzone]; i++)
854 at0 = cgindex[index[i]];
855 at1 = cgindex[index[i]+1];
856 for (j = at0; j < at1; j++)
858 /* Rotate the force */
859 f[j][XX] += buf[n][XX];
860 f[j][YY] -= buf[n][YY];
861 f[j][ZZ] -= buf[n][ZZ];
864 /* Add this force to the shift force */
865 rvec_inc(fshift[is], buf[n]);
876 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
878 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
879 int *index, *cgindex;
880 gmx_domdec_comm_t *comm;
881 gmx_domdec_comm_dim_t *cd;
882 gmx_domdec_ind_t *ind;
887 cgindex = dd->cgindex;
889 buf = &comm->vbuf.v[0][0];
892 nat_tot = dd->nat_home;
893 for (d = 0; d < dd->ndim; d++)
896 for (p = 0; p < cd->np; p++)
901 for (i = 0; i < ind->nsend[nzone]; i++)
903 at0 = cgindex[index[i]];
904 at1 = cgindex[index[i]+1];
905 for (j = at0; j < at1; j++)
918 rbuf = &comm->vbuf2.v[0][0];
920 /* Send and receive the coordinates */
921 dd_sendrecv_real(dd, d, dddirBackward,
922 buf, ind->nsend[nzone+1],
923 rbuf, ind->nrecv[nzone+1]);
927 for (zone = 0; zone < nzone; zone++)
929 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
936 nat_tot += ind->nrecv[nzone+1];
942 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
944 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
945 int *index, *cgindex;
946 gmx_domdec_comm_t *comm;
947 gmx_domdec_comm_dim_t *cd;
948 gmx_domdec_ind_t *ind;
953 cgindex = dd->cgindex;
955 buf = &comm->vbuf.v[0][0];
958 nzone = comm->zones.n/2;
959 nat_tot = dd->nat_tot;
960 for (d = dd->ndim-1; d >= 0; d--)
963 for (p = cd->np-1; p >= 0; p--)
966 nat_tot -= ind->nrecv[nzone+1];
973 sbuf = &comm->vbuf2.v[0][0];
975 for (zone = 0; zone < nzone; zone++)
977 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
984 /* Communicate the forces */
985 dd_sendrecv_real(dd, d, dddirForward,
986 sbuf, ind->nrecv[nzone+1],
987 buf, ind->nsend[nzone+1]);
989 /* Add the received forces */
991 for (i = 0; i < ind->nsend[nzone]; i++)
993 at0 = cgindex[index[i]];
994 at1 = cgindex[index[i]+1];
995 for (j = at0; j < at1; j++)
1006 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
1008 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
1010 zone->min0, zone->max1,
1011 zone->mch0, zone->mch0,
1012 zone->p1_0, zone->p1_1);
1016 #define DDZONECOMM_MAXZONE 5
1017 #define DDZONECOMM_BUFSIZE 3
1019 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
1020 int ddimind, int direction,
1021 gmx_ddzone_t *buf_s, int n_s,
1022 gmx_ddzone_t *buf_r, int n_r)
1024 #define ZBS DDZONECOMM_BUFSIZE
1025 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
1026 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
1029 for (i = 0; i < n_s; i++)
1031 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
1032 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
1033 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
1034 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
1035 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
1036 vbuf_s[i*ZBS+1][2] = 0;
1037 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
1038 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
1039 vbuf_s[i*ZBS+2][2] = 0;
1042 dd_sendrecv_rvec(dd, ddimind, direction,
1046 for (i = 0; i < n_r; i++)
1048 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
1049 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
1050 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
1051 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
1052 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
1053 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
1054 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
1060 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
1061 rvec cell_ns_x0, rvec cell_ns_x1)
1063 int d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
1065 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
1066 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
1067 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
1068 rvec extr_s[2], extr_r[2];
1070 real dist_d, c = 0, det;
1071 gmx_domdec_comm_t *comm;
1072 gmx_bool bPBC, bUse;
1076 for (d = 1; d < dd->ndim; d++)
1079 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
1080 zp->min0 = cell_ns_x0[dim];
1081 zp->max1 = cell_ns_x1[dim];
1082 zp->min1 = cell_ns_x1[dim];
1083 zp->mch0 = cell_ns_x0[dim];
1084 zp->mch1 = cell_ns_x1[dim];
1085 zp->p1_0 = cell_ns_x0[dim];
1086 zp->p1_1 = cell_ns_x1[dim];
1089 for (d = dd->ndim-2; d >= 0; d--)
1092 bPBC = (dim < ddbox->npbcdim);
1094 /* Use an rvec to store two reals */
1095 extr_s[d][0] = comm->cell_f0[d+1];
1096 extr_s[d][1] = comm->cell_f1[d+1];
1097 extr_s[d][2] = comm->cell_f1[d+1];
1100 /* Store the extremes in the backward sending buffer,
1101 * so the get updated separately from the forward communication.
1103 for (d1 = d; d1 < dd->ndim-1; d1++)
1105 /* We invert the order to be able to use the same loop for buf_e */
1106 buf_s[pos].min0 = extr_s[d1][1];
1107 buf_s[pos].max1 = extr_s[d1][0];
1108 buf_s[pos].min1 = extr_s[d1][2];
1109 buf_s[pos].mch0 = 0;
1110 buf_s[pos].mch1 = 0;
1111 /* Store the cell corner of the dimension we communicate along */
1112 buf_s[pos].p1_0 = comm->cell_x0[dim];
1113 buf_s[pos].p1_1 = 0;
1117 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1120 if (dd->ndim == 3 && d == 0)
1122 buf_s[pos] = comm->zone_d2[0][1];
1124 buf_s[pos] = comm->zone_d1[0];
1128 /* We only need to communicate the extremes
1129 * in the forward direction
1131 npulse = comm->cd[d].np;
1134 /* Take the minimum to avoid double communication */
1135 npulse_min = min(npulse, dd->nc[dim]-1-npulse);
1139 /* Without PBC we should really not communicate over
1140 * the boundaries, but implementing that complicates
1141 * the communication setup and therefore we simply
1142 * do all communication, but ignore some data.
1144 npulse_min = npulse;
1146 for (p = 0; p < npulse_min; p++)
1148 /* Communicate the extremes forward */
1149 bUse = (bPBC || dd->ci[dim] > 0);
1151 dd_sendrecv_rvec(dd, d, dddirForward,
1152 extr_s+d, dd->ndim-d-1,
1153 extr_r+d, dd->ndim-d-1);
1157 for (d1 = d; d1 < dd->ndim-1; d1++)
1159 extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]);
1160 extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]);
1161 extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]);
1167 for (p = 0; p < npulse; p++)
1169 /* Communicate all the zone information backward */
1170 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1172 dd_sendrecv_ddzone(dd, d, dddirBackward,
1179 for (d1 = d+1; d1 < dd->ndim; d1++)
1181 /* Determine the decrease of maximum required
1182 * communication height along d1 due to the distance along d,
1183 * this avoids a lot of useless atom communication.
1185 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1187 if (ddbox->tric_dir[dim])
1189 /* c is the off-diagonal coupling between the cell planes
1190 * along directions d and d1.
1192 c = ddbox->v[dim][dd->dim[d1]][dim];
1198 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1201 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1205 /* A negative value signals out of range */
1211 /* Accumulate the extremes over all pulses */
1212 for (i = 0; i < buf_size; i++)
1216 buf_e[i] = buf_r[i];
1222 buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0);
1223 buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1);
1224 buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1);
1227 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1235 if (bUse && dh[d1] >= 0)
1237 buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
1238 buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
1241 /* Copy the received buffer to the send buffer,
1242 * to pass the data through with the next pulse.
1244 buf_s[i] = buf_r[i];
1246 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1247 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1249 /* Store the extremes */
1252 for (d1 = d; d1 < dd->ndim-1; d1++)
1254 extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0);
1255 extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1);
1256 extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1);
1260 if (d == 1 || (d == 0 && dd->ndim == 3))
1262 for (i = d; i < 2; i++)
1264 comm->zone_d2[1-d][i] = buf_e[pos];
1270 comm->zone_d1[1] = buf_e[pos];
1280 for (i = 0; i < 2; i++)
1284 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1286 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1287 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1293 for (i = 0; i < 2; i++)
1295 for (j = 0; j < 2; j++)
1299 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1301 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1302 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1306 for (d = 1; d < dd->ndim; d++)
1308 comm->cell_f_max0[d] = extr_s[d-1][0];
1309 comm->cell_f_min1[d] = extr_s[d-1][1];
1312 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1313 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1318 static void dd_collect_cg(gmx_domdec_t *dd,
1319 t_state *state_local)
1321 gmx_domdec_master_t *ma = NULL;
1322 int buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
1325 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1327 /* The master has the correct distribution */
1331 if (state_local->ddp_count == dd->ddp_count)
1333 ncg_home = dd->ncg_home;
1335 nat_home = dd->nat_home;
1337 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1339 cgs_gl = &dd->comm->cgs_gl;
1341 ncg_home = state_local->ncg_gl;
1342 cg = state_local->cg_gl;
1344 for (i = 0; i < ncg_home; i++)
1346 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1351 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1354 buf2[0] = dd->ncg_home;
1355 buf2[1] = dd->nat_home;
1365 /* Collect the charge group and atom counts on the master */
1366 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1371 for (i = 0; i < dd->nnodes; i++)
1373 ma->ncg[i] = ma->ibuf[2*i];
1374 ma->nat[i] = ma->ibuf[2*i+1];
1375 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1378 /* Make byte counts and indices */
1379 for (i = 0; i < dd->nnodes; i++)
1381 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1382 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1386 fprintf(debug, "Initial charge group distribution: ");
1387 for (i = 0; i < dd->nnodes; i++)
1389 fprintf(debug, " %d", ma->ncg[i]);
1391 fprintf(debug, "\n");
1395 /* Collect the charge group indices on the master */
1397 dd->ncg_home*sizeof(int), dd->index_gl,
1398 DDMASTER(dd) ? ma->ibuf : NULL,
1399 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1400 DDMASTER(dd) ? ma->cg : NULL);
1402 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1405 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1408 gmx_domdec_master_t *ma;
1409 int n, i, c, a, nalloc = 0;
1418 MPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1419 dd->rank, dd->mpi_comm_all);
1424 /* Copy the master coordinates to the global array */
1425 cgs_gl = &dd->comm->cgs_gl;
1427 n = DDMASTERRANK(dd);
1429 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1431 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1433 copy_rvec(lv[a++], v[c]);
1437 for (n = 0; n < dd->nnodes; n++)
1441 if (ma->nat[n] > nalloc)
1443 nalloc = over_alloc_dd(ma->nat[n]);
1444 srenew(buf, nalloc);
1447 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1448 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1451 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1453 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1455 copy_rvec(buf[a++], v[c]);
1464 static void get_commbuffer_counts(gmx_domdec_t *dd,
1465 int **counts, int **disps)
1467 gmx_domdec_master_t *ma;
1472 /* Make the rvec count and displacment arrays */
1474 *disps = ma->ibuf + dd->nnodes;
1475 for (n = 0; n < dd->nnodes; n++)
1477 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1478 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1482 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1485 gmx_domdec_master_t *ma;
1486 int *rcounts = NULL, *disps = NULL;
1495 get_commbuffer_counts(dd, &rcounts, &disps);
1500 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv, rcounts, disps, buf);
1504 cgs_gl = &dd->comm->cgs_gl;
1507 for (n = 0; n < dd->nnodes; n++)
1509 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1511 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1513 copy_rvec(buf[a++], v[c]);
1520 void dd_collect_vec(gmx_domdec_t *dd,
1521 t_state *state_local, rvec *lv, rvec *v)
1523 gmx_domdec_master_t *ma;
1524 int n, i, c, a, nalloc = 0;
1527 dd_collect_cg(dd, state_local);
1529 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1531 dd_collect_vec_sendrecv(dd, lv, v);
1535 dd_collect_vec_gatherv(dd, lv, v);
1540 void dd_collect_state(gmx_domdec_t *dd,
1541 t_state *state_local, t_state *state)
1545 nh = state->nhchainlength;
1549 for (i = 0; i < efptNR; i++)
1551 state->lambda[i] = state_local->lambda[i];
1553 state->fep_state = state_local->fep_state;
1554 state->veta = state_local->veta;
1555 state->vol0 = state_local->vol0;
1556 copy_mat(state_local->box, state->box);
1557 copy_mat(state_local->boxv, state->boxv);
1558 copy_mat(state_local->svir_prev, state->svir_prev);
1559 copy_mat(state_local->fvir_prev, state->fvir_prev);
1560 copy_mat(state_local->pres_prev, state->pres_prev);
1562 for (i = 0; i < state_local->ngtc; i++)
1564 for (j = 0; j < nh; j++)
1566 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1567 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1569 state->therm_integral[i] = state_local->therm_integral[i];
1571 for (i = 0; i < state_local->nnhpres; i++)
1573 for (j = 0; j < nh; j++)
1575 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1576 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1580 for (est = 0; est < estNR; est++)
1582 if (EST_DISTR(est) && (state_local->flags & (1<<est)))
1587 dd_collect_vec(dd, state_local, state_local->x, state->x);
1590 dd_collect_vec(dd, state_local, state_local->v, state->v);
1593 dd_collect_vec(dd, state_local, state_local->sd_X, state->sd_X);
1596 dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
1598 case estDISRE_INITF:
1599 case estDISRE_RM3TAV:
1600 case estORIRE_INITF:
1604 gmx_incons("Unknown state entry encountered in dd_collect_state");
1610 static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
1616 fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
1619 state->nalloc = over_alloc_dd(nalloc);
1621 for (est = 0; est < estNR; est++)
1623 if (EST_DISTR(est) && (state->flags & (1<<est)))
1628 srenew(state->x, state->nalloc);
1631 srenew(state->v, state->nalloc);
1634 srenew(state->sd_X, state->nalloc);
1637 srenew(state->cg_p, state->nalloc);
1639 case estDISRE_INITF:
1640 case estDISRE_RM3TAV:
1641 case estORIRE_INITF:
1643 /* No reallocation required */
1646 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1653 srenew(*f, state->nalloc);
1657 static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
1660 if (nalloc > fr->cg_nalloc)
1664 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
1666 fr->cg_nalloc = over_alloc_dd(nalloc);
1667 srenew(fr->cginfo, fr->cg_nalloc);
1668 if (fr->cutoff_scheme == ecutsGROUP)
1670 srenew(fr->cg_cm, fr->cg_nalloc);
1673 if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
1675 /* We don't use charge groups, we use x in state to set up
1676 * the atom communication.
1678 dd_realloc_state(state, f, nalloc);
1682 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1685 gmx_domdec_master_t *ma;
1686 int n, i, c, a, nalloc = 0;
1693 for (n = 0; n < dd->nnodes; n++)
1697 if (ma->nat[n] > nalloc)
1699 nalloc = over_alloc_dd(ma->nat[n]);
1700 srenew(buf, nalloc);
1702 /* Use lv as a temporary buffer */
1704 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1706 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1708 copy_rvec(v[c], buf[a++]);
1711 if (a != ma->nat[n])
1713 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1718 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1719 DDRANK(dd, n), n, dd->mpi_comm_all);
1724 n = DDMASTERRANK(dd);
1726 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1728 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1730 copy_rvec(v[c], lv[a++]);
1737 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1738 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1743 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1746 gmx_domdec_master_t *ma;
1747 int *scounts = NULL, *disps = NULL;
1748 int n, i, c, a, nalloc = 0;
1755 get_commbuffer_counts(dd, &scounts, &disps);
1759 for (n = 0; n < dd->nnodes; n++)
1761 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1763 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1765 copy_rvec(v[c], buf[a++]);
1771 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1774 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
1776 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1778 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1782 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1786 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1789 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1790 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1791 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1793 if (dfhist->nlambda > 0)
1795 int nlam = dfhist->nlambda;
1796 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1797 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1798 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1799 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1800 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1801 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1803 for (i = 0; i < nlam; i++)
1805 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1806 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1807 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1808 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1809 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1810 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1815 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1816 t_state *state, t_state *state_local,
1821 nh = state->nhchainlength;
1825 for (i = 0; i < efptNR; i++)
1827 state_local->lambda[i] = state->lambda[i];
1829 state_local->fep_state = state->fep_state;
1830 state_local->veta = state->veta;
1831 state_local->vol0 = state->vol0;
1832 copy_mat(state->box, state_local->box);
1833 copy_mat(state->box_rel, state_local->box_rel);
1834 copy_mat(state->boxv, state_local->boxv);
1835 copy_mat(state->svir_prev, state_local->svir_prev);
1836 copy_mat(state->fvir_prev, state_local->fvir_prev);
1837 copy_df_history(&state_local->dfhist, &state->dfhist);
1838 for (i = 0; i < state_local->ngtc; i++)
1840 for (j = 0; j < nh; j++)
1842 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1843 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1845 state_local->therm_integral[i] = state->therm_integral[i];
1847 for (i = 0; i < state_local->nnhpres; i++)
1849 for (j = 0; j < nh; j++)
1851 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1852 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1856 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
1857 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1858 dd_bcast(dd, sizeof(real), &state_local->veta);
1859 dd_bcast(dd, sizeof(real), &state_local->vol0);
1860 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1861 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1862 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1863 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1864 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1865 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
1866 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
1867 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
1868 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
1869 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
1871 /* communicate df_history -- required for restarting from checkpoint */
1872 dd_distribute_dfhist(dd, &state_local->dfhist);
1874 if (dd->nat_home > state_local->nalloc)
1876 dd_realloc_state(state_local, f, dd->nat_home);
1878 for (i = 0; i < estNR; i++)
1880 if (EST_DISTR(i) && (state_local->flags & (1<<i)))
1885 dd_distribute_vec(dd, cgs, state->x, state_local->x);
1888 dd_distribute_vec(dd, cgs, state->v, state_local->v);
1891 dd_distribute_vec(dd, cgs, state->sd_X, state_local->sd_X);
1894 dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
1896 case estDISRE_INITF:
1897 case estDISRE_RM3TAV:
1898 case estORIRE_INITF:
1900 /* Not implemented yet */
1903 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1909 static char dim2char(int dim)
1915 case XX: c = 'X'; break;
1916 case YY: c = 'Y'; break;
1917 case ZZ: c = 'Z'; break;
1918 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1924 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1925 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1927 rvec grid_s[2], *grid_r = NULL, cx, r;
1928 char fname[STRLEN], buf[22];
1930 int a, i, d, z, y, x;
1934 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1935 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1939 snew(grid_r, 2*dd->nnodes);
1942 dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : NULL);
1946 for (d = 0; d < DIM; d++)
1948 for (i = 0; i < DIM; i++)
1956 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1958 tric[d][i] = box[i][d]/box[i][i];
1967 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1968 out = gmx_fio_fopen(fname, "w");
1969 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1971 for (i = 0; i < dd->nnodes; i++)
1973 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1974 for (d = 0; d < DIM; d++)
1976 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1978 for (z = 0; z < 2; z++)
1980 for (y = 0; y < 2; y++)
1982 for (x = 0; x < 2; x++)
1984 cx[XX] = grid_r[i*2+x][XX];
1985 cx[YY] = grid_r[i*2+y][YY];
1986 cx[ZZ] = grid_r[i*2+z][ZZ];
1988 gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
1989 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
1993 for (d = 0; d < DIM; d++)
1995 for (x = 0; x < 4; x++)
1999 case 0: y = 1 + i*8 + 2*x; break;
2000 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
2001 case 2: y = 1 + i*8 + x; break;
2003 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
2007 gmx_fio_fclose(out);
2012 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
2013 gmx_mtop_t *mtop, t_commrec *cr,
2014 int natoms, rvec x[], matrix box)
2016 char fname[STRLEN], buf[22];
2018 int i, ii, resnr, c;
2019 char *atomname, *resname;
2026 natoms = dd->comm->nat[ddnatVSITE];
2029 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
2031 out = gmx_fio_fopen(fname, "w");
2033 fprintf(out, "TITLE %s\n", title);
2034 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2035 for (i = 0; i < natoms; i++)
2037 ii = dd->gatindex[i];
2038 gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
2039 if (i < dd->comm->nat[ddnatZONE])
2042 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
2048 else if (i < dd->comm->nat[ddnatVSITE])
2050 b = dd->comm->zones.n;
2054 b = dd->comm->zones.n + 1;
2056 gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
2057 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
2059 fprintf(out, "TER\n");
2061 gmx_fio_fclose(out);
2064 real dd_cutoff_mbody(gmx_domdec_t *dd)
2066 gmx_domdec_comm_t *comm;
2073 if (comm->bInterCGBondeds)
2075 if (comm->cutoff_mbody > 0)
2077 r = comm->cutoff_mbody;
2081 /* cutoff_mbody=0 means we do not have DLB */
2082 r = comm->cellsize_min[dd->dim[0]];
2083 for (di = 1; di < dd->ndim; di++)
2085 r = min(r, comm->cellsize_min[dd->dim[di]]);
2087 if (comm->bBondComm)
2089 r = max(r, comm->cutoff_mbody);
2093 r = min(r, comm->cutoff);
2101 real dd_cutoff_twobody(gmx_domdec_t *dd)
2105 r_mb = dd_cutoff_mbody(dd);
2107 return max(dd->comm->cutoff, r_mb);
2111 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd, ivec coord, ivec coord_pme)
2115 nc = dd->nc[dd->comm->cartpmedim];
2116 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2117 copy_ivec(coord, coord_pme);
2118 coord_pme[dd->comm->cartpmedim] =
2119 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2122 static int low_ddindex2pmeindex(int ndd, int npme, int ddindex)
2124 /* Here we assign a PME node to communicate with this DD node
2125 * by assuming that the major index of both is x.
2126 * We add cr->npmenodes/2 to obtain an even distribution.
2128 return (ddindex*npme + npme/2)/ndd;
2131 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
2133 return low_ddindex2pmeindex(dd->nnodes, dd->comm->npmenodes, ddindex);
2136 static int cr_ddindex2pmeindex(const t_commrec *cr, int ddindex)
2138 return low_ddindex2pmeindex(cr->dd->nnodes, cr->npmenodes, ddindex);
2141 static int *dd_pmenodes(t_commrec *cr)
2146 snew(pmenodes, cr->npmenodes);
2148 for (i = 0; i < cr->dd->nnodes; i++)
2150 p0 = cr_ddindex2pmeindex(cr, i);
2151 p1 = cr_ddindex2pmeindex(cr, i+1);
2152 if (i+1 == cr->dd->nnodes || p1 > p0)
2156 fprintf(debug, "pmenode[%d] = %d\n", n, i+1+n);
2158 pmenodes[n] = i + 1 + n;
2166 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
2169 ivec coords, coords_pme, nc;
2174 if (dd->comm->bCartesian) {
2175 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2176 dd_coords2pmecoords(dd,coords,coords_pme);
2177 copy_ivec(dd->ntot,nc);
2178 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2179 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2181 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2183 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2189 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
2194 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
2196 gmx_domdec_comm_t *comm;
2198 int ddindex, nodeid = -1;
2200 comm = cr->dd->comm;
2205 if (comm->bCartesianPP_PME)
2208 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
2213 ddindex = dd_index(cr->dd->nc, coords);
2214 if (comm->bCartesianPP)
2216 nodeid = comm->ddindex2simnodeid[ddindex];
2222 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
2234 static int dd_simnode2pmenode(t_commrec *cr, int sim_nodeid)
2237 gmx_domdec_comm_t *comm;
2238 ivec coord, coord_pme;
2245 /* This assumes a uniform x domain decomposition grid cell size */
2246 if (comm->bCartesianPP_PME)
2249 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
2250 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2252 /* This is a PP node */
2253 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2254 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
2258 else if (comm->bCartesianPP)
2260 if (sim_nodeid < dd->nnodes)
2262 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2267 /* This assumes DD cells with identical x coordinates
2268 * are numbered sequentially.
2270 if (dd->comm->pmenodes == NULL)
2272 if (sim_nodeid < dd->nnodes)
2274 /* The DD index equals the nodeid */
2275 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2281 while (sim_nodeid > dd->comm->pmenodes[i])
2285 if (sim_nodeid < dd->comm->pmenodes[i])
2287 pmenode = dd->comm->pmenodes[i];
2295 void get_pme_nnodes(const gmx_domdec_t *dd,
2296 int *npmenodes_x, int *npmenodes_y)
2300 *npmenodes_x = dd->comm->npmenodes_x;
2301 *npmenodes_y = dd->comm->npmenodes_y;
2310 gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
2312 gmx_bool bPMEOnlyNode;
2314 if (DOMAINDECOMP(cr))
2316 bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
2320 bPMEOnlyNode = FALSE;
2323 return bPMEOnlyNode;
2326 void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
2327 int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
2331 ivec coord, coord_pme;
2335 snew(*my_ddnodes, (dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2338 for (x = 0; x < dd->nc[XX]; x++)
2340 for (y = 0; y < dd->nc[YY]; y++)
2342 for (z = 0; z < dd->nc[ZZ]; z++)
2344 if (dd->comm->bCartesianPP_PME)
2349 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2350 if (dd->ci[XX] == coord_pme[XX] &&
2351 dd->ci[YY] == coord_pme[YY] &&
2352 dd->ci[ZZ] == coord_pme[ZZ])
2354 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2359 /* The slab corresponds to the nodeid in the PME group */
2360 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2362 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2369 /* The last PP-only node is the peer node */
2370 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2374 fprintf(debug, "Receive coordinates from PP ranks:");
2375 for (x = 0; x < *nmy_ddnodes; x++)
2377 fprintf(debug, " %d", (*my_ddnodes)[x]);
2379 fprintf(debug, "\n");
2383 static gmx_bool receive_vir_ener(t_commrec *cr)
2385 gmx_domdec_comm_t *comm;
2386 int pmenode, coords[DIM], rank;
2390 if (cr->npmenodes < cr->dd->nnodes)
2392 comm = cr->dd->comm;
2393 if (comm->bCartesianPP_PME)
2395 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2397 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2398 coords[comm->cartpmedim]++;
2399 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2401 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2402 if (dd_simnode2pmenode(cr, rank) == pmenode)
2404 /* This is not the last PP node for pmenode */
2412 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2413 if (cr->sim_nodeid+1 < cr->nnodes &&
2414 dd_simnode2pmenode(cr, cr->sim_nodeid+1) == pmenode)
2416 /* This is not the last PP node for pmenode */
2425 static void set_zones_ncg_home(gmx_domdec_t *dd)
2427 gmx_domdec_zones_t *zones;
2430 zones = &dd->comm->zones;
2432 zones->cg_range[0] = 0;
2433 for (i = 1; i < zones->n+1; i++)
2435 zones->cg_range[i] = dd->ncg_home;
2437 /* zone_ncg1[0] should always be equal to ncg_home */
2438 dd->comm->zone_ncg1[0] = dd->ncg_home;
2441 static void rebuild_cgindex(gmx_domdec_t *dd,
2442 const int *gcgs_index, t_state *state)
2444 int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
2447 dd_cg_gl = dd->index_gl;
2448 cgindex = dd->cgindex;
2451 for (i = 0; i < state->ncg_gl; i++)
2455 dd_cg_gl[i] = cg_gl;
2456 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2460 dd->ncg_home = state->ncg_gl;
2463 set_zones_ncg_home(dd);
2466 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2468 while (cg >= cginfo_mb->cg_end)
2473 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2476 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2477 t_forcerec *fr, char *bLocalCG)
2479 cginfo_mb_t *cginfo_mb;
2485 cginfo_mb = fr->cginfo_mb;
2486 cginfo = fr->cginfo;
2488 for (cg = cg0; cg < cg1; cg++)
2490 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2494 if (bLocalCG != NULL)
2496 for (cg = cg0; cg < cg1; cg++)
2498 bLocalCG[index_gl[cg]] = TRUE;
2503 static void make_dd_indices(gmx_domdec_t *dd,
2504 const int *gcgs_index, int cg_start)
2506 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2507 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2512 bLocalCG = dd->comm->bLocalCG;
2514 if (dd->nat_tot > dd->gatindex_nalloc)
2516 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2517 srenew(dd->gatindex, dd->gatindex_nalloc);
2520 nzone = dd->comm->zones.n;
2521 zone2cg = dd->comm->zones.cg_range;
2522 zone_ncg1 = dd->comm->zone_ncg1;
2523 index_gl = dd->index_gl;
2524 gatindex = dd->gatindex;
2525 bCGs = dd->comm->bCGs;
2527 if (zone2cg[1] != dd->ncg_home)
2529 gmx_incons("dd->ncg_zone is not up to date");
2532 /* Make the local to global and global to local atom index */
2533 a = dd->cgindex[cg_start];
2534 for (zone = 0; zone < nzone; zone++)
2542 cg0 = zone2cg[zone];
2544 cg1 = zone2cg[zone+1];
2545 cg1_p1 = cg0 + zone_ncg1[zone];
2547 for (cg = cg0; cg < cg1; cg++)
2552 /* Signal that this cg is from more than one pulse away */
2555 cg_gl = index_gl[cg];
2558 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2561 ga2la_set(dd->ga2la, a_gl, a, zone1);
2567 gatindex[a] = cg_gl;
2568 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2575 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2578 int ncg, i, ngl, nerr;
2581 if (bLocalCG == NULL)
2585 for (i = 0; i < dd->ncg_tot; i++)
2587 if (!bLocalCG[dd->index_gl[i]])
2590 "DD rank %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2595 for (i = 0; i < ncg_sys; i++)
2602 if (ngl != dd->ncg_tot)
2604 fprintf(stderr, "DD rank %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2611 static void check_index_consistency(gmx_domdec_t *dd,
2612 int natoms_sys, int ncg_sys,
2615 int nerr, ngl, i, a, cell;
2620 if (dd->comm->DD_debug > 1)
2622 snew(have, natoms_sys);
2623 for (a = 0; a < dd->nat_tot; a++)
2625 if (have[dd->gatindex[a]] > 0)
2627 fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2631 have[dd->gatindex[a]] = a + 1;
2637 snew(have, dd->nat_tot);
2640 for (i = 0; i < natoms_sys; i++)
2642 if (ga2la_get(dd->ga2la, i, &a, &cell))
2644 if (a >= dd->nat_tot)
2646 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2652 if (dd->gatindex[a] != i)
2654 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2661 if (ngl != dd->nat_tot)
2664 "DD rank %d, %s: %d global atom indices, %d local atoms\n",
2665 dd->rank, where, ngl, dd->nat_tot);
2667 for (a = 0; a < dd->nat_tot; a++)
2672 "DD rank %d, %s: local atom %d, global %d has no global index\n",
2673 dd->rank, where, a+1, dd->gatindex[a]+1);
2678 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2682 gmx_fatal(FARGS, "DD rank %d, %s: %d atom/cg index inconsistencies",
2683 dd->rank, where, nerr);
2687 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2694 /* Clear the whole list without searching */
2695 ga2la_clear(dd->ga2la);
2699 for (i = a_start; i < dd->nat_tot; i++)
2701 ga2la_del(dd->ga2la, dd->gatindex[i]);
2705 bLocalCG = dd->comm->bLocalCG;
2708 for (i = cg_start; i < dd->ncg_tot; i++)
2710 bLocalCG[dd->index_gl[i]] = FALSE;
2714 dd_clear_local_vsite_indices(dd);
2716 if (dd->constraints)
2718 dd_clear_local_constraint_indices(dd);
2722 /* This function should be used for moving the domain boudaries during DLB,
2723 * for obtaining the minimum cell size. It checks the initially set limit
2724 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2725 * and, possibly, a longer cut-off limit set for PME load balancing.
2727 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2731 cellsize_min = comm->cellsize_min[dim];
2733 if (!comm->bVacDLBNoLimit)
2735 /* The cut-off might have changed, e.g. by PME load balacning,
2736 * from the value used to set comm->cellsize_min, so check it.
2738 cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2740 if (comm->bPMELoadBalDLBLimits)
2742 /* Check for the cut-off limit set by the PME load balancing */
2743 cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2747 return cellsize_min;
2750 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2753 real grid_jump_limit;
2755 /* The distance between the boundaries of cells at distance
2756 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2757 * and by the fact that cells should not be shifted by more than
2758 * half their size, such that cg's only shift by one cell
2759 * at redecomposition.
2761 grid_jump_limit = comm->cellsize_limit;
2762 if (!comm->bVacDLBNoLimit)
2764 if (comm->bPMELoadBalDLBLimits)
2766 cutoff = max(cutoff, comm->PMELoadBal_max_cutoff);
2768 grid_jump_limit = max(grid_jump_limit,
2769 cutoff/comm->cd[dim_ind].np);
2772 return grid_jump_limit;
2775 static gmx_bool check_grid_jump(gmx_int64_t step,
2781 gmx_domdec_comm_t *comm;
2790 for (d = 1; d < dd->ndim; d++)
2793 limit = grid_jump_limit(comm, cutoff, d);
2794 bfac = ddbox->box_size[dim];
2795 if (ddbox->tric_dir[dim])
2797 bfac *= ddbox->skew_fac[dim];
2799 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2800 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2808 /* This error should never be triggered under normal
2809 * circumstances, but you never know ...
2811 gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
2812 gmx_step_str(step, buf),
2813 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2821 static int dd_load_count(gmx_domdec_comm_t *comm)
2823 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2826 static float dd_force_load(gmx_domdec_comm_t *comm)
2833 if (comm->eFlop > 1)
2835 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2840 load = comm->cycl[ddCyclF];
2841 if (comm->cycl_n[ddCyclF] > 1)
2843 /* Subtract the maximum of the last n cycle counts
2844 * to get rid of possible high counts due to other sources,
2845 * for instance system activity, that would otherwise
2846 * affect the dynamic load balancing.
2848 load -= comm->cycl_max[ddCyclF];
2852 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2854 float gpu_wait, gpu_wait_sum;
2856 gpu_wait = comm->cycl[ddCyclWaitGPU];
2857 if (comm->cycl_n[ddCyclF] > 1)
2859 /* We should remove the WaitGPU time of the same MD step
2860 * as the one with the maximum F time, since the F time
2861 * and the wait time are not independent.
2862 * Furthermore, the step for the max F time should be chosen
2863 * the same on all ranks that share the same GPU.
2864 * But to keep the code simple, we remove the average instead.
2865 * The main reason for artificially long times at some steps
2866 * is spurious CPU activity or MPI time, so we don't expect
2867 * that changes in the GPU wait time matter a lot here.
2869 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2871 /* Sum the wait times over the ranks that share the same GPU */
2872 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2873 comm->mpi_comm_gpu_shared);
2874 /* Replace the wait time by the average over the ranks */
2875 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2883 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2885 gmx_domdec_comm_t *comm;
2890 snew(*dim_f, dd->nc[dim]+1);
2892 for (i = 1; i < dd->nc[dim]; i++)
2894 if (comm->slb_frac[dim])
2896 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2900 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2903 (*dim_f)[dd->nc[dim]] = 1;
2906 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2908 int pmeindex, slab, nso, i;
2911 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2917 ddpme->dim = dimind;
2919 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2921 ddpme->nslab = (ddpme->dim == 0 ?
2922 dd->comm->npmenodes_x :
2923 dd->comm->npmenodes_y);
2925 if (ddpme->nslab <= 1)
2930 nso = dd->comm->npmenodes/ddpme->nslab;
2931 /* Determine for each PME slab the PP location range for dimension dim */
2932 snew(ddpme->pp_min, ddpme->nslab);
2933 snew(ddpme->pp_max, ddpme->nslab);
2934 for (slab = 0; slab < ddpme->nslab; slab++)
2936 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2937 ddpme->pp_max[slab] = 0;
2939 for (i = 0; i < dd->nnodes; i++)
2941 ddindex2xyz(dd->nc, i, xyz);
2942 /* For y only use our y/z slab.
2943 * This assumes that the PME x grid size matches the DD grid size.
2945 if (dimind == 0 || xyz[XX] == dd->ci[XX])
2947 pmeindex = ddindex2pmeindex(dd, i);
2950 slab = pmeindex/nso;
2954 slab = pmeindex % ddpme->nslab;
2956 ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]);
2957 ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]);
2961 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
2964 int dd_pme_maxshift_x(gmx_domdec_t *dd)
2966 if (dd->comm->ddpme[0].dim == XX)
2968 return dd->comm->ddpme[0].maxshift;
2976 int dd_pme_maxshift_y(gmx_domdec_t *dd)
2978 if (dd->comm->ddpme[0].dim == YY)
2980 return dd->comm->ddpme[0].maxshift;
2982 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
2984 return dd->comm->ddpme[1].maxshift;
2992 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
2993 gmx_bool bUniform, gmx_ddbox_t *ddbox, real *cell_f)
2995 gmx_domdec_comm_t *comm;
2998 real range, pme_boundary;
3002 nc = dd->nc[ddpme->dim];
3005 if (!ddpme->dim_match)
3007 /* PP decomposition is not along dim: the worst situation */
3010 else if (ns <= 3 || (bUniform && ns == nc))
3012 /* The optimal situation */
3017 /* We need to check for all pme nodes which nodes they
3018 * could possibly need to communicate with.
3020 xmin = ddpme->pp_min;
3021 xmax = ddpme->pp_max;
3022 /* Allow for atoms to be maximally 2/3 times the cut-off
3023 * out of their DD cell. This is a reasonable balance between
3024 * between performance and support for most charge-group/cut-off
3027 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
3028 /* Avoid extra communication when we are exactly at a boundary */
3032 for (s = 0; s < ns; s++)
3034 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
3035 pme_boundary = (real)s/ns;
3038 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
3040 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
3044 pme_boundary = (real)(s+1)/ns;
3047 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
3049 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
3056 ddpme->maxshift = sh;
3060 fprintf(debug, "PME slab communication range for dim %d is %d\n",
3061 ddpme->dim, ddpme->maxshift);
3065 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3069 for (d = 0; d < dd->ndim; d++)
3072 if (dim < ddbox->nboundeddim &&
3073 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
3074 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
3076 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
3077 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3078 dd->nc[dim], dd->comm->cellsize_limit);
3084 setcellsizeslbLOCAL, setcellsizeslbMASTER, setcellsizeslbPULSE_ONLY
3087 /* Set the domain boundaries. Use for static (or no) load balancing,
3088 * and also for the starting state for dynamic load balancing.
3089 * setmode determine if and where the boundaries are stored, use enum above.
3090 * Returns the number communication pulses in npulse.
3092 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
3093 int setmode, ivec npulse)
3095 gmx_domdec_comm_t *comm;
3098 real *cell_x, cell_dx, cellsize;
3102 for (d = 0; d < DIM; d++)
3104 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
3106 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
3109 cell_dx = ddbox->box_size[d]/dd->nc[d];
3112 case setcellsizeslbMASTER:
3113 for (j = 0; j < dd->nc[d]+1; j++)
3115 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
3118 case setcellsizeslbLOCAL:
3119 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
3120 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
3125 cellsize = cell_dx*ddbox->skew_fac[d];
3126 while (cellsize*npulse[d] < comm->cutoff)
3130 cellsize_min[d] = cellsize;
3134 /* Statically load balanced grid */
3135 /* Also when we are not doing a master distribution we determine
3136 * all cell borders in a loop to obtain identical values
3137 * to the master distribution case and to determine npulse.
3139 if (setmode == setcellsizeslbMASTER)
3141 cell_x = dd->ma->cell_x[d];
3145 snew(cell_x, dd->nc[d]+1);
3147 cell_x[0] = ddbox->box0[d];
3148 for (j = 0; j < dd->nc[d]; j++)
3150 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
3151 cell_x[j+1] = cell_x[j] + cell_dx;
3152 cellsize = cell_dx*ddbox->skew_fac[d];
3153 while (cellsize*npulse[d] < comm->cutoff &&
3154 npulse[d] < dd->nc[d]-1)
3158 cellsize_min[d] = min(cellsize_min[d], cellsize);
3160 if (setmode == setcellsizeslbLOCAL)
3162 comm->cell_x0[d] = cell_x[dd->ci[d]];
3163 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
3165 if (setmode != setcellsizeslbMASTER)
3170 /* The following limitation is to avoid that a cell would receive
3171 * some of its own home charge groups back over the periodic boundary.
3172 * Double charge groups cause trouble with the global indices.
3174 if (d < ddbox->npbcdim &&
3175 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
3177 char error_string[STRLEN];
3179 sprintf(error_string,
3180 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3181 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
3183 dd->nc[d], dd->nc[d],
3184 dd->nnodes > dd->nc[d] ? "cells" : "ranks");
3186 if (setmode == setcellsizeslbLOCAL)
3188 gmx_fatal_collective(FARGS, NULL, dd, error_string);
3192 gmx_fatal(FARGS, error_string);
3197 if (!comm->bDynLoadBal)
3199 copy_rvec(cellsize_min, comm->cellsize_min);
3202 for (d = 0; d < comm->npmedecompdim; d++)
3204 set_pme_maxshift(dd, &comm->ddpme[d],
3205 comm->slb_frac[dd->dim[d]] == NULL, ddbox,
3206 comm->ddpme[d].slb_dim_f);
3211 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
3212 int d, int dim, gmx_domdec_root_t *root,
3214 gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
3216 gmx_domdec_comm_t *comm;
3217 int ncd, i, j, nmin, nmin_old;
3218 gmx_bool bLimLo, bLimHi;
3220 real fac, halfway, cellsize_limit_f_i, region_size;
3221 gmx_bool bPBC, bLastHi = FALSE;
3222 int nrange[] = {range[0], range[1]};
3224 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
3230 bPBC = (dim < ddbox->npbcdim);
3232 cell_size = root->buf_ncd;
3236 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
3239 /* First we need to check if the scaling does not make cells
3240 * smaller than the smallest allowed size.
3241 * We need to do this iteratively, since if a cell is too small,
3242 * it needs to be enlarged, which makes all the other cells smaller,
3243 * which could in turn make another cell smaller than allowed.
3245 for (i = range[0]; i < range[1]; i++)
3247 root->bCellMin[i] = FALSE;
3253 /* We need the total for normalization */
3255 for (i = range[0]; i < range[1]; i++)
3257 if (root->bCellMin[i] == FALSE)
3259 fac += cell_size[i];
3262 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3263 /* Determine the cell boundaries */
3264 for (i = range[0]; i < range[1]; i++)
3266 if (root->bCellMin[i] == FALSE)
3268 cell_size[i] *= fac;
3269 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3271 cellsize_limit_f_i = 0;
3275 cellsize_limit_f_i = cellsize_limit_f;
3277 if (cell_size[i] < cellsize_limit_f_i)
3279 root->bCellMin[i] = TRUE;
3280 cell_size[i] = cellsize_limit_f_i;
3284 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3287 while (nmin > nmin_old);
3290 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3291 /* For this check we should not use DD_CELL_MARGIN,
3292 * but a slightly smaller factor,
3293 * since rounding could get use below the limit.
3295 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3298 gmx_fatal(FARGS, "Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3299 gmx_step_str(step, buf),
3300 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3301 ncd, comm->cellsize_min[dim]);
3304 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
3308 /* Check if the boundary did not displace more than halfway
3309 * each of the cells it bounds, as this could cause problems,
3310 * especially when the differences between cell sizes are large.
3311 * If changes are applied, they will not make cells smaller
3312 * than the cut-off, as we check all the boundaries which
3313 * might be affected by a change and if the old state was ok,
3314 * the cells will at most be shrunk back to their old size.
3316 for (i = range[0]+1; i < range[1]; i++)
3318 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3319 if (root->cell_f[i] < halfway)
3321 root->cell_f[i] = halfway;
3322 /* Check if the change also causes shifts of the next boundaries */
3323 for (j = i+1; j < range[1]; j++)
3325 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3327 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3331 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3332 if (root->cell_f[i] > halfway)
3334 root->cell_f[i] = halfway;
3335 /* Check if the change also causes shifts of the next boundaries */
3336 for (j = i-1; j >= range[0]+1; j--)
3338 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3340 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3347 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3348 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3349 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3350 * for a and b nrange is used */
3353 /* Take care of the staggering of the cell boundaries */
3356 for (i = range[0]; i < range[1]; i++)
3358 root->cell_f_max0[i] = root->cell_f[i];
3359 root->cell_f_min1[i] = root->cell_f[i+1];
3364 for (i = range[0]+1; i < range[1]; i++)
3366 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3367 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3368 if (bLimLo && bLimHi)
3370 /* Both limits violated, try the best we can */
3371 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3372 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3373 nrange[0] = range[0];
3375 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3378 nrange[1] = range[1];
3379 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3385 /* root->cell_f[i] = root->bound_min[i]; */
3386 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3389 else if (bLimHi && !bLastHi)
3392 if (nrange[1] < range[1]) /* found a LimLo before */
3394 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3395 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3396 nrange[0] = nrange[1];
3398 root->cell_f[i] = root->bound_max[i];
3400 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3402 nrange[1] = range[1];
3405 if (nrange[1] < range[1]) /* found last a LimLo */
3407 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3408 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3409 nrange[0] = nrange[1];
3410 nrange[1] = range[1];
3411 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3413 else if (nrange[0] > range[0]) /* found at least one LimHi */
3415 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3422 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3423 int d, int dim, gmx_domdec_root_t *root,
3424 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3425 gmx_bool bUniform, gmx_int64_t step)
3427 gmx_domdec_comm_t *comm;
3428 int ncd, d1, i, j, pos;
3430 real load_aver, load_i, imbalance, change, change_max, sc;
3431 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3435 int range[] = { 0, 0 };
3439 /* Convert the maximum change from the input percentage to a fraction */
3440 change_limit = comm->dlb_scale_lim*0.01;
3444 bPBC = (dim < ddbox->npbcdim);
3446 cell_size = root->buf_ncd;
3448 /* Store the original boundaries */
3449 for (i = 0; i < ncd+1; i++)
3451 root->old_cell_f[i] = root->cell_f[i];
3455 for (i = 0; i < ncd; i++)
3457 cell_size[i] = 1.0/ncd;
3460 else if (dd_load_count(comm))
3462 load_aver = comm->load[d].sum_m/ncd;
3464 for (i = 0; i < ncd; i++)
3466 /* Determine the relative imbalance of cell i */
3467 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3468 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3469 /* Determine the change of the cell size using underrelaxation */
3470 change = -relax*imbalance;
3471 change_max = max(change_max, max(change, -change));
3473 /* Limit the amount of scaling.
3474 * We need to use the same rescaling for all cells in one row,
3475 * otherwise the load balancing might not converge.
3478 if (change_max > change_limit)
3480 sc *= change_limit/change_max;
3482 for (i = 0; i < ncd; i++)
3484 /* Determine the relative imbalance of cell i */
3485 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3486 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3487 /* Determine the change of the cell size using underrelaxation */
3488 change = -sc*imbalance;
3489 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3493 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3494 cellsize_limit_f *= DD_CELL_MARGIN;
3495 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3496 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3497 if (ddbox->tric_dir[dim])
3499 cellsize_limit_f /= ddbox->skew_fac[dim];
3500 dist_min_f /= ddbox->skew_fac[dim];
3502 if (bDynamicBox && d > 0)
3504 dist_min_f *= DD_PRES_SCALE_MARGIN;
3506 if (d > 0 && !bUniform)
3508 /* Make sure that the grid is not shifted too much */
3509 for (i = 1; i < ncd; i++)
3511 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3513 gmx_incons("Inconsistent DD boundary staggering limits!");
3515 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3516 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3519 root->bound_min[i] += 0.5*space;
3521 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3522 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3525 root->bound_max[i] += 0.5*space;
3530 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3532 root->cell_f_max0[i-1] + dist_min_f,
3533 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3534 root->cell_f_min1[i] - dist_min_f);
3539 root->cell_f[0] = 0;
3540 root->cell_f[ncd] = 1;
3541 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3544 /* After the checks above, the cells should obey the cut-off
3545 * restrictions, but it does not hurt to check.
3547 for (i = 0; i < ncd; i++)
3551 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3552 dim, i, root->cell_f[i], root->cell_f[i+1]);
3555 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3556 root->cell_f[i+1] - root->cell_f[i] <
3557 cellsize_limit_f/DD_CELL_MARGIN)
3561 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3562 gmx_step_str(step, buf), dim2char(dim), i,
3563 (root->cell_f[i+1] - root->cell_f[i])
3564 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3569 /* Store the cell boundaries of the lower dimensions at the end */
3570 for (d1 = 0; d1 < d; d1++)
3572 root->cell_f[pos++] = comm->cell_f0[d1];
3573 root->cell_f[pos++] = comm->cell_f1[d1];
3576 if (d < comm->npmedecompdim)
3578 /* The master determines the maximum shift for
3579 * the coordinate communication between separate PME nodes.
3581 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3583 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3586 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3590 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3591 gmx_ddbox_t *ddbox, int dimind)
3593 gmx_domdec_comm_t *comm;
3598 /* Set the cell dimensions */
3599 dim = dd->dim[dimind];
3600 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3601 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3602 if (dim >= ddbox->nboundeddim)
3604 comm->cell_x0[dim] += ddbox->box0[dim];
3605 comm->cell_x1[dim] += ddbox->box0[dim];
3609 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3610 int d, int dim, real *cell_f_row,
3613 gmx_domdec_comm_t *comm;
3619 /* Each node would only need to know two fractions,
3620 * but it is probably cheaper to broadcast the whole array.
3622 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3623 0, comm->mpi_comm_load[d]);
3625 /* Copy the fractions for this dimension from the buffer */
3626 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3627 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3628 /* The whole array was communicated, so set the buffer position */
3629 pos = dd->nc[dim] + 1;
3630 for (d1 = 0; d1 <= d; d1++)
3634 /* Copy the cell fractions of the lower dimensions */
3635 comm->cell_f0[d1] = cell_f_row[pos++];
3636 comm->cell_f1[d1] = cell_f_row[pos++];
3638 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3640 /* Convert the communicated shift from float to int */
3641 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3644 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3648 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3649 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3650 gmx_bool bUniform, gmx_int64_t step)
3652 gmx_domdec_comm_t *comm;
3654 gmx_bool bRowMember, bRowRoot;
3659 for (d = 0; d < dd->ndim; d++)
3664 for (d1 = d; d1 < dd->ndim; d1++)
3666 if (dd->ci[dd->dim[d1]] > 0)
3679 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3680 ddbox, bDynamicBox, bUniform, step);
3681 cell_f_row = comm->root[d]->cell_f;
3685 cell_f_row = comm->cell_f_row;
3687 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3692 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3696 /* This function assumes the box is static and should therefore
3697 * not be called when the box has changed since the last
3698 * call to dd_partition_system.
3700 for (d = 0; d < dd->ndim; d++)
3702 relative_to_absolute_cell_bounds(dd, ddbox, d);
3708 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3709 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3710 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3711 gmx_wallcycle_t wcycle)
3713 gmx_domdec_comm_t *comm;
3720 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3721 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3722 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3724 else if (bDynamicBox)
3726 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3729 /* Set the dimensions for which no DD is used */
3730 for (dim = 0; dim < DIM; dim++)
3732 if (dd->nc[dim] == 1)
3734 comm->cell_x0[dim] = 0;
3735 comm->cell_x1[dim] = ddbox->box_size[dim];
3736 if (dim >= ddbox->nboundeddim)
3738 comm->cell_x0[dim] += ddbox->box0[dim];
3739 comm->cell_x1[dim] += ddbox->box0[dim];
3745 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3748 gmx_domdec_comm_dim_t *cd;
3750 for (d = 0; d < dd->ndim; d++)
3752 cd = &dd->comm->cd[d];
3753 np = npulse[dd->dim[d]];
3754 if (np > cd->np_nalloc)
3758 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3759 dim2char(dd->dim[d]), np);
3761 if (DDMASTER(dd) && cd->np_nalloc > 0)
3763 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3765 srenew(cd->ind, np);
3766 for (i = cd->np_nalloc; i < np; i++)
3768 cd->ind[i].index = NULL;
3769 cd->ind[i].nalloc = 0;
3778 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3779 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3780 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3781 gmx_wallcycle_t wcycle)
3783 gmx_domdec_comm_t *comm;
3789 /* Copy the old cell boundaries for the cg displacement check */
3790 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3791 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3793 if (comm->bDynLoadBal)
3797 check_box_size(dd, ddbox);
3799 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3803 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbLOCAL, npulse);
3804 realloc_comm_ind(dd, npulse);
3809 for (d = 0; d < DIM; d++)
3811 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3812 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3817 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3819 rvec cell_ns_x0, rvec cell_ns_x1,
3822 gmx_domdec_comm_t *comm;
3827 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3829 dim = dd->dim[dim_ind];
3831 /* Without PBC we don't have restrictions on the outer cells */
3832 if (!(dim >= ddbox->npbcdim &&
3833 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3834 comm->bDynLoadBal &&
3835 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3836 comm->cellsize_min[dim])
3839 gmx_fatal(FARGS, "Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3840 gmx_step_str(step, buf), dim2char(dim),
3841 comm->cell_x1[dim] - comm->cell_x0[dim],
3842 ddbox->skew_fac[dim],
3843 dd->comm->cellsize_min[dim],
3844 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3848 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3850 /* Communicate the boundaries and update cell_ns_x0/1 */
3851 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3852 if (dd->bGridJump && dd->ndim > 1)
3854 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3859 static void make_tric_corr_matrix(int npbcdim, matrix box, matrix tcm)
3863 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3871 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3872 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3881 static void check_screw_box(matrix box)
3883 /* Mathematical limitation */
3884 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3886 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3889 /* Limitation due to the asymmetry of the eighth shell method */
3890 if (box[ZZ][YY] != 0)
3892 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3896 static void distribute_cg(FILE *fplog, gmx_int64_t step,
3897 matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3900 gmx_domdec_master_t *ma;
3901 int **tmp_ind = NULL, *tmp_nalloc = NULL;
3902 int i, icg, j, k, k0, k1, d, npbcdim;
3904 rvec box_size, cg_cm;
3906 real nrcg, inv_ncg, pos_d;
3908 gmx_bool bUnbounded, bScrew;
3912 if (tmp_ind == NULL)
3914 snew(tmp_nalloc, dd->nnodes);
3915 snew(tmp_ind, dd->nnodes);
3916 for (i = 0; i < dd->nnodes; i++)
3918 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3919 snew(tmp_ind[i], tmp_nalloc[i]);
3923 /* Clear the count */
3924 for (i = 0; i < dd->nnodes; i++)
3930 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3932 cgindex = cgs->index;
3934 /* Compute the center of geometry for all charge groups */
3935 for (icg = 0; icg < cgs->nr; icg++)
3938 k1 = cgindex[icg+1];
3942 copy_rvec(pos[k0], cg_cm);
3949 for (k = k0; (k < k1); k++)
3951 rvec_inc(cg_cm, pos[k]);
3953 for (d = 0; (d < DIM); d++)
3955 cg_cm[d] *= inv_ncg;
3958 /* Put the charge group in the box and determine the cell index */
3959 for (d = DIM-1; d >= 0; d--)
3962 if (d < dd->npbcdim)
3964 bScrew = (dd->bScrewPBC && d == XX);
3965 if (tric_dir[d] && dd->nc[d] > 1)
3967 /* Use triclinic coordintates for this dimension */
3968 for (j = d+1; j < DIM; j++)
3970 pos_d += cg_cm[j]*tcm[j][d];
3973 while (pos_d >= box[d][d])
3976 rvec_dec(cg_cm, box[d]);
3979 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3980 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3982 for (k = k0; (k < k1); k++)
3984 rvec_dec(pos[k], box[d]);
3987 pos[k][YY] = box[YY][YY] - pos[k][YY];
3988 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3995 rvec_inc(cg_cm, box[d]);
3998 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3999 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
4001 for (k = k0; (k < k1); k++)
4003 rvec_inc(pos[k], box[d]);
4006 pos[k][YY] = box[YY][YY] - pos[k][YY];
4007 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4012 /* This could be done more efficiently */
4014 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
4019 i = dd_index(dd->nc, ind);
4020 if (ma->ncg[i] == tmp_nalloc[i])
4022 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
4023 srenew(tmp_ind[i], tmp_nalloc[i]);
4025 tmp_ind[i][ma->ncg[i]] = icg;
4027 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
4031 for (i = 0; i < dd->nnodes; i++)
4034 for (k = 0; k < ma->ncg[i]; k++)
4036 ma->cg[k1++] = tmp_ind[i][k];
4039 ma->index[dd->nnodes] = k1;
4041 for (i = 0; i < dd->nnodes; i++)
4051 fprintf(fplog, "Charge group distribution at step %s:",
4052 gmx_step_str(step, buf));
4053 for (i = 0; i < dd->nnodes; i++)
4055 fprintf(fplog, " %d", ma->ncg[i]);
4057 fprintf(fplog, "\n");
4061 static void get_cg_distribution(FILE *fplog, gmx_int64_t step, gmx_domdec_t *dd,
4062 t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
4065 gmx_domdec_master_t *ma = NULL;
4068 int *ibuf, buf2[2] = { 0, 0 };
4069 gmx_bool bMaster = DDMASTER(dd);
4077 check_screw_box(box);
4080 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbMASTER, npulse);
4082 distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
4083 for (i = 0; i < dd->nnodes; i++)
4085 ma->ibuf[2*i] = ma->ncg[i];
4086 ma->ibuf[2*i+1] = ma->nat[i];
4094 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
4096 dd->ncg_home = buf2[0];
4097 dd->nat_home = buf2[1];
4098 dd->ncg_tot = dd->ncg_home;
4099 dd->nat_tot = dd->nat_home;
4100 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
4102 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
4103 srenew(dd->index_gl, dd->cg_nalloc);
4104 srenew(dd->cgindex, dd->cg_nalloc+1);
4108 for (i = 0; i < dd->nnodes; i++)
4110 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
4111 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
4116 DDMASTER(dd) ? ma->ibuf : NULL,
4117 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
4118 DDMASTER(dd) ? ma->cg : NULL,
4119 dd->ncg_home*sizeof(int), dd->index_gl);
4121 /* Determine the home charge group sizes */
4123 for (i = 0; i < dd->ncg_home; i++)
4125 cg_gl = dd->index_gl[i];
4127 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
4132 fprintf(debug, "Home charge groups:\n");
4133 for (i = 0; i < dd->ncg_home; i++)
4135 fprintf(debug, " %d", dd->index_gl[i]);
4138 fprintf(debug, "\n");
4141 fprintf(debug, "\n");
4145 static int compact_and_copy_vec_at(int ncg, int *move,
4148 rvec *src, gmx_domdec_comm_t *comm,
4151 int m, icg, i, i0, i1, nrcg;
4157 for (m = 0; m < DIM*2; m++)
4163 for (icg = 0; icg < ncg; icg++)
4165 i1 = cgindex[icg+1];
4171 /* Compact the home array in place */
4172 for (i = i0; i < i1; i++)
4174 copy_rvec(src[i], src[home_pos++]);
4180 /* Copy to the communication buffer */
4182 pos_vec[m] += 1 + vec*nrcg;
4183 for (i = i0; i < i1; i++)
4185 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
4187 pos_vec[m] += (nvec - vec - 1)*nrcg;
4191 home_pos += i1 - i0;
4199 static int compact_and_copy_vec_cg(int ncg, int *move,
4201 int nvec, rvec *src, gmx_domdec_comm_t *comm,
4204 int m, icg, i0, i1, nrcg;
4210 for (m = 0; m < DIM*2; m++)
4216 for (icg = 0; icg < ncg; icg++)
4218 i1 = cgindex[icg+1];
4224 /* Compact the home array in place */
4225 copy_rvec(src[icg], src[home_pos++]);
4231 /* Copy to the communication buffer */
4232 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
4233 pos_vec[m] += 1 + nrcg*nvec;
4245 static int compact_ind(int ncg, int *move,
4246 int *index_gl, int *cgindex,
4248 gmx_ga2la_t ga2la, char *bLocalCG,
4251 int cg, nat, a0, a1, a, a_gl;
4256 for (cg = 0; cg < ncg; cg++)
4262 /* Compact the home arrays in place.
4263 * Anything that can be done here avoids access to global arrays.
4265 cgindex[home_pos] = nat;
4266 for (a = a0; a < a1; a++)
4269 gatindex[nat] = a_gl;
4270 /* The cell number stays 0, so we don't need to set it */
4271 ga2la_change_la(ga2la, a_gl, nat);
4274 index_gl[home_pos] = index_gl[cg];
4275 cginfo[home_pos] = cginfo[cg];
4276 /* The charge group remains local, so bLocalCG does not change */
4281 /* Clear the global indices */
4282 for (a = a0; a < a1; a++)
4284 ga2la_del(ga2la, gatindex[a]);
4288 bLocalCG[index_gl[cg]] = FALSE;
4292 cgindex[home_pos] = nat;
4297 static void clear_and_mark_ind(int ncg, int *move,
4298 int *index_gl, int *cgindex, int *gatindex,
4299 gmx_ga2la_t ga2la, char *bLocalCG,
4304 for (cg = 0; cg < ncg; cg++)
4310 /* Clear the global indices */
4311 for (a = a0; a < a1; a++)
4313 ga2la_del(ga2la, gatindex[a]);
4317 bLocalCG[index_gl[cg]] = FALSE;
4319 /* Signal that this cg has moved using the ns cell index.
4320 * Here we set it to -1. fill_grid will change it
4321 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4323 cell_index[cg] = -1;
4328 static void print_cg_move(FILE *fplog,
4330 gmx_int64_t step, int cg, int dim, int dir,
4331 gmx_bool bHaveLimitdAndCMOld, real limitd,
4332 rvec cm_old, rvec cm_new, real pos_d)
4334 gmx_domdec_comm_t *comm;
4339 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4340 if (bHaveLimitdAndCMOld)
4342 fprintf(fplog, "The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4343 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4347 fprintf(fplog, "The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4348 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4350 fprintf(fplog, "distance out of cell %f\n",
4351 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4352 if (bHaveLimitdAndCMOld)
4354 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4355 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4357 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4358 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4359 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4361 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4362 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4364 comm->cell_x0[dim], comm->cell_x1[dim]);
4367 static void cg_move_error(FILE *fplog,
4369 gmx_int64_t step, int cg, int dim, int dir,
4370 gmx_bool bHaveLimitdAndCMOld, real limitd,
4371 rvec cm_old, rvec cm_new, real pos_d)
4375 print_cg_move(fplog, dd, step, cg, dim, dir,
4376 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4378 print_cg_move(stderr, dd, step, cg, dim, dir,
4379 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4381 "A charge group moved too far between two domain decomposition steps\n"
4382 "This usually means that your system is not well equilibrated");
4385 static void rotate_state_atom(t_state *state, int a)
4389 for (est = 0; est < estNR; est++)
4391 if (EST_DISTR(est) && (state->flags & (1<<est)))
4396 /* Rotate the complete state; for a rectangular box only */
4397 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4398 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4401 state->v[a][YY] = -state->v[a][YY];
4402 state->v[a][ZZ] = -state->v[a][ZZ];
4405 state->sd_X[a][YY] = -state->sd_X[a][YY];
4406 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4409 state->cg_p[a][YY] = -state->cg_p[a][YY];
4410 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4412 case estDISRE_INITF:
4413 case estDISRE_RM3TAV:
4414 case estORIRE_INITF:
4416 /* These are distances, so not affected by rotation */
4419 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4425 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4427 if (natoms > comm->moved_nalloc)
4429 /* Contents should be preserved here */
4430 comm->moved_nalloc = over_alloc_dd(natoms);
4431 srenew(comm->moved, comm->moved_nalloc);
4437 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
4440 ivec tric_dir, matrix tcm,
4441 rvec cell_x0, rvec cell_x1,
4442 rvec limitd, rvec limit0, rvec limit1,
4444 int cg_start, int cg_end,
4449 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4450 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4454 real inv_ncg, pos_d;
4457 npbcdim = dd->npbcdim;
4459 for (cg = cg_start; cg < cg_end; cg++)
4466 copy_rvec(state->x[k0], cm_new);
4473 for (k = k0; (k < k1); k++)
4475 rvec_inc(cm_new, state->x[k]);
4477 for (d = 0; (d < DIM); d++)
4479 cm_new[d] = inv_ncg*cm_new[d];
4484 /* Do pbc and check DD cell boundary crossings */
4485 for (d = DIM-1; d >= 0; d--)
4489 bScrew = (dd->bScrewPBC && d == XX);
4490 /* Determine the location of this cg in lattice coordinates */
4494 for (d2 = d+1; d2 < DIM; d2++)
4496 pos_d += cm_new[d2]*tcm[d2][d];
4499 /* Put the charge group in the triclinic unit-cell */
4500 if (pos_d >= cell_x1[d])
4502 if (pos_d >= limit1[d])
4504 cg_move_error(fplog, dd, step, cg, d, 1, TRUE, limitd[d],
4505 cg_cm[cg], cm_new, pos_d);
4508 if (dd->ci[d] == dd->nc[d] - 1)
4510 rvec_dec(cm_new, state->box[d]);
4513 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4514 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4516 for (k = k0; (k < k1); k++)
4518 rvec_dec(state->x[k], state->box[d]);
4521 rotate_state_atom(state, k);
4526 else if (pos_d < cell_x0[d])
4528 if (pos_d < limit0[d])
4530 cg_move_error(fplog, dd, step, cg, d, -1, TRUE, limitd[d],
4531 cg_cm[cg], cm_new, pos_d);
4536 rvec_inc(cm_new, state->box[d]);
4539 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4540 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4542 for (k = k0; (k < k1); k++)
4544 rvec_inc(state->x[k], state->box[d]);
4547 rotate_state_atom(state, k);
4553 else if (d < npbcdim)
4555 /* Put the charge group in the rectangular unit-cell */
4556 while (cm_new[d] >= state->box[d][d])
4558 rvec_dec(cm_new, state->box[d]);
4559 for (k = k0; (k < k1); k++)
4561 rvec_dec(state->x[k], state->box[d]);
4564 while (cm_new[d] < 0)
4566 rvec_inc(cm_new, state->box[d]);
4567 for (k = k0; (k < k1); k++)
4569 rvec_inc(state->x[k], state->box[d]);
4575 copy_rvec(cm_new, cg_cm[cg]);
4577 /* Determine where this cg should go */
4580 for (d = 0; d < dd->ndim; d++)
4585 flag |= DD_FLAG_FW(d);
4591 else if (dev[dim] == -1)
4593 flag |= DD_FLAG_BW(d);
4596 if (dd->nc[dim] > 2)
4607 /* Temporarily store the flag in move */
4608 move[cg] = mc + flag;
4612 static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
4613 gmx_domdec_t *dd, ivec tric_dir,
4614 t_state *state, rvec **f,
4623 int ncg[DIM*2], nat[DIM*2];
4624 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4625 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4626 int sbuf[2], rbuf[2];
4627 int home_pos_cg, home_pos_at, buf_pos;
4629 gmx_bool bV = FALSE, bSDX = FALSE, bCGP = FALSE;
4632 real inv_ncg, pos_d;
4634 rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
4636 cginfo_mb_t *cginfo_mb;
4637 gmx_domdec_comm_t *comm;
4639 int nthread, thread;
4643 check_screw_box(state->box);
4647 if (fr->cutoff_scheme == ecutsGROUP)
4652 for (i = 0; i < estNR; i++)
4658 case estX: /* Always present */ break;
4659 case estV: bV = (state->flags & (1<<i)); break;
4660 case estSDX: bSDX = (state->flags & (1<<i)); break;
4661 case estCGP: bCGP = (state->flags & (1<<i)); break;
4664 case estDISRE_INITF:
4665 case estDISRE_RM3TAV:
4666 case estORIRE_INITF:
4668 /* No processing required */
4671 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4676 if (dd->ncg_tot > comm->nalloc_int)
4678 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4679 srenew(comm->buf_int, comm->nalloc_int);
4681 move = comm->buf_int;
4683 /* Clear the count */
4684 for (c = 0; c < dd->ndim*2; c++)
4690 npbcdim = dd->npbcdim;
4692 for (d = 0; (d < DIM); d++)
4694 limitd[d] = dd->comm->cellsize_min[d];
4695 if (d >= npbcdim && dd->ci[d] == 0)
4697 cell_x0[d] = -GMX_FLOAT_MAX;
4701 cell_x0[d] = comm->cell_x0[d];
4703 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4705 cell_x1[d] = GMX_FLOAT_MAX;
4709 cell_x1[d] = comm->cell_x1[d];
4713 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4714 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4718 /* We check after communication if a charge group moved
4719 * more than one cell. Set the pre-comm check limit to float_max.
4721 limit0[d] = -GMX_FLOAT_MAX;
4722 limit1[d] = GMX_FLOAT_MAX;
4726 make_tric_corr_matrix(npbcdim, state->box, tcm);
4728 cgindex = dd->cgindex;
4730 nthread = gmx_omp_nthreads_get(emntDomdec);
4732 /* Compute the center of geometry for all home charge groups
4733 * and put them in the box and determine where they should go.
4735 #pragma omp parallel for num_threads(nthread) schedule(static)
4736 for (thread = 0; thread < nthread; thread++)
4738 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4739 cell_x0, cell_x1, limitd, limit0, limit1,
4741 ( thread *dd->ncg_home)/nthread,
4742 ((thread+1)*dd->ncg_home)/nthread,
4743 fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
4747 for (cg = 0; cg < dd->ncg_home; cg++)
4752 flag = mc & ~DD_FLAG_NRCG;
4753 mc = mc & DD_FLAG_NRCG;
4756 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4758 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4759 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4761 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4762 /* We store the cg size in the lower 16 bits
4763 * and the place where the charge group should go
4764 * in the next 6 bits. This saves some communication volume.
4766 nrcg = cgindex[cg+1] - cgindex[cg];
4767 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4773 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4774 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4777 for (i = 0; i < dd->ndim*2; i++)
4779 *ncg_moved += ncg[i];
4796 /* Make sure the communication buffers are large enough */
4797 for (mc = 0; mc < dd->ndim*2; mc++)
4799 nvr = ncg[mc] + nat[mc]*nvec;
4800 if (nvr > comm->cgcm_state_nalloc[mc])
4802 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4803 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4807 switch (fr->cutoff_scheme)
4810 /* Recalculating cg_cm might be cheaper than communicating,
4811 * but that could give rise to rounding issues.
4814 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4815 nvec, cg_cm, comm, bCompact);
4818 /* Without charge groups we send the moved atom coordinates
4819 * over twice. This is so the code below can be used without
4820 * many conditionals for both for with and without charge groups.
4823 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4824 nvec, state->x, comm, FALSE);
4827 home_pos_cg -= *ncg_moved;
4831 gmx_incons("unimplemented");
4837 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4838 nvec, vec++, state->x, comm, bCompact);
4841 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4842 nvec, vec++, state->v, comm, bCompact);
4846 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4847 nvec, vec++, state->sd_X, comm, bCompact);
4851 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4852 nvec, vec++, state->cg_p, comm, bCompact);
4857 compact_ind(dd->ncg_home, move,
4858 dd->index_gl, dd->cgindex, dd->gatindex,
4859 dd->ga2la, comm->bLocalCG,
4864 if (fr->cutoff_scheme == ecutsVERLET)
4866 moved = get_moved(comm, dd->ncg_home);
4868 for (k = 0; k < dd->ncg_home; k++)
4875 moved = fr->ns.grid->cell_index;
4878 clear_and_mark_ind(dd->ncg_home, move,
4879 dd->index_gl, dd->cgindex, dd->gatindex,
4880 dd->ga2la, comm->bLocalCG,
4884 cginfo_mb = fr->cginfo_mb;
4886 *ncg_stay_home = home_pos_cg;
4887 for (d = 0; d < dd->ndim; d++)
4893 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4896 /* Communicate the cg and atom counts */
4901 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4902 d, dir, sbuf[0], sbuf[1]);
4904 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4906 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4908 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4909 srenew(comm->buf_int, comm->nalloc_int);
4912 /* Communicate the charge group indices, sizes and flags */
4913 dd_sendrecv_int(dd, d, dir,
4914 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4915 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4917 nvs = ncg[cdd] + nat[cdd]*nvec;
4918 i = rbuf[0] + rbuf[1] *nvec;
4919 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4921 /* Communicate cgcm and state */
4922 dd_sendrecv_rvec(dd, d, dir,
4923 comm->cgcm_state[cdd], nvs,
4924 comm->vbuf.v+nvr, i);
4925 ncg_recv += rbuf[0];
4926 nat_recv += rbuf[1];
4930 /* Process the received charge groups */
4932 for (cg = 0; cg < ncg_recv; cg++)
4934 flag = comm->buf_int[cg*DD_CGIBS+1];
4936 if (dim >= npbcdim && dd->nc[dim] > 2)
4938 /* No pbc in this dim and more than one domain boundary.
4939 * We do a separate check if a charge group didn't move too far.
4941 if (((flag & DD_FLAG_FW(d)) &&
4942 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4943 ((flag & DD_FLAG_BW(d)) &&
4944 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4946 cg_move_error(fplog, dd, step, cg, dim,
4947 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4949 comm->vbuf.v[buf_pos],
4950 comm->vbuf.v[buf_pos],
4951 comm->vbuf.v[buf_pos][dim]);
4958 /* Check which direction this cg should go */
4959 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4963 /* The cell boundaries for dimension d2 are not equal
4964 * for each cell row of the lower dimension(s),
4965 * therefore we might need to redetermine where
4966 * this cg should go.
4969 /* If this cg crosses the box boundary in dimension d2
4970 * we can use the communicated flag, so we do not
4971 * have to worry about pbc.
4973 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4974 (flag & DD_FLAG_FW(d2))) ||
4975 (dd->ci[dim2] == 0 &&
4976 (flag & DD_FLAG_BW(d2)))))
4978 /* Clear the two flags for this dimension */
4979 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4980 /* Determine the location of this cg
4981 * in lattice coordinates
4983 pos_d = comm->vbuf.v[buf_pos][dim2];
4986 for (d3 = dim2+1; d3 < DIM; d3++)
4989 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
4992 /* Check of we are not at the box edge.
4993 * pbc is only handled in the first step above,
4994 * but this check could move over pbc while
4995 * the first step did not due to different rounding.
4997 if (pos_d >= cell_x1[dim2] &&
4998 dd->ci[dim2] != dd->nc[dim2]-1)
5000 flag |= DD_FLAG_FW(d2);
5002 else if (pos_d < cell_x0[dim2] &&
5005 flag |= DD_FLAG_BW(d2);
5007 comm->buf_int[cg*DD_CGIBS+1] = flag;
5010 /* Set to which neighboring cell this cg should go */
5011 if (flag & DD_FLAG_FW(d2))
5015 else if (flag & DD_FLAG_BW(d2))
5017 if (dd->nc[dd->dim[d2]] > 2)
5029 nrcg = flag & DD_FLAG_NRCG;
5032 if (home_pos_cg+1 > dd->cg_nalloc)
5034 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
5035 srenew(dd->index_gl, dd->cg_nalloc);
5036 srenew(dd->cgindex, dd->cg_nalloc+1);
5038 /* Set the global charge group index and size */
5039 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
5040 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
5041 /* Copy the state from the buffer */
5042 dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
5043 if (fr->cutoff_scheme == ecutsGROUP)
5046 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
5050 /* Set the cginfo */
5051 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
5052 dd->index_gl[home_pos_cg]);
5055 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
5058 if (home_pos_at+nrcg > state->nalloc)
5060 dd_realloc_state(state, f, home_pos_at+nrcg);
5062 for (i = 0; i < nrcg; i++)
5064 copy_rvec(comm->vbuf.v[buf_pos++],
5065 state->x[home_pos_at+i]);
5069 for (i = 0; i < nrcg; i++)
5071 copy_rvec(comm->vbuf.v[buf_pos++],
5072 state->v[home_pos_at+i]);
5077 for (i = 0; i < nrcg; i++)
5079 copy_rvec(comm->vbuf.v[buf_pos++],
5080 state->sd_X[home_pos_at+i]);
5085 for (i = 0; i < nrcg; i++)
5087 copy_rvec(comm->vbuf.v[buf_pos++],
5088 state->cg_p[home_pos_at+i]);
5092 home_pos_at += nrcg;
5096 /* Reallocate the buffers if necessary */
5097 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
5099 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
5100 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
5102 nvr = ncg[mc] + nat[mc]*nvec;
5103 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
5105 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
5106 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
5108 /* Copy from the receive to the send buffers */
5109 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
5110 comm->buf_int + cg*DD_CGIBS,
5111 DD_CGIBS*sizeof(int));
5112 memcpy(comm->cgcm_state[mc][nvr],
5113 comm->vbuf.v[buf_pos],
5114 (1+nrcg*nvec)*sizeof(rvec));
5115 buf_pos += 1 + nrcg*nvec;
5122 /* With sorting (!bCompact) the indices are now only partially up to date
5123 * and ncg_home and nat_home are not the real count, since there are
5124 * "holes" in the arrays for the charge groups that moved to neighbors.
5126 if (fr->cutoff_scheme == ecutsVERLET)
5128 moved = get_moved(comm, home_pos_cg);
5130 for (i = dd->ncg_home; i < home_pos_cg; i++)
5135 dd->ncg_home = home_pos_cg;
5136 dd->nat_home = home_pos_at;
5141 "Finished repartitioning: cgs moved out %d, new home %d\n",
5142 *ncg_moved, dd->ncg_home-*ncg_moved);
5147 void dd_cycles_add(gmx_domdec_t *dd, float cycles, int ddCycl)
5149 dd->comm->cycl[ddCycl] += cycles;
5150 dd->comm->cycl_n[ddCycl]++;
5151 if (cycles > dd->comm->cycl_max[ddCycl])
5153 dd->comm->cycl_max[ddCycl] = cycles;
5157 static double force_flop_count(t_nrnb *nrnb)
5164 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
5166 /* To get closer to the real timings, we half the count
5167 * for the normal loops and again half it for water loops.
5170 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5172 sum += nrnb->n[i]*0.25*cost_nrnb(i);
5176 sum += nrnb->n[i]*0.50*cost_nrnb(i);
5179 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
5182 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5184 sum += nrnb->n[i]*cost_nrnb(i);
5187 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
5189 sum += nrnb->n[i]*cost_nrnb(i);
5195 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
5197 if (dd->comm->eFlop)
5199 dd->comm->flop -= force_flop_count(nrnb);
5202 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
5204 if (dd->comm->eFlop)
5206 dd->comm->flop += force_flop_count(nrnb);
5211 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
5215 for (i = 0; i < ddCyclNr; i++)
5217 dd->comm->cycl[i] = 0;
5218 dd->comm->cycl_n[i] = 0;
5219 dd->comm->cycl_max[i] = 0;
5222 dd->comm->flop_n = 0;
5225 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
5227 gmx_domdec_comm_t *comm;
5228 gmx_domdec_load_t *load;
5229 gmx_domdec_root_t *root = NULL;
5230 int d, dim, cid, i, pos;
5231 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
5236 fprintf(debug, "get_load_distribution start\n");
5239 wallcycle_start(wcycle, ewcDDCOMMLOAD);
5243 bSepPME = (dd->pme_nodeid >= 0);
5245 for (d = dd->ndim-1; d >= 0; d--)
5248 /* Check if we participate in the communication in this dimension */
5249 if (d == dd->ndim-1 ||
5250 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
5252 load = &comm->load[d];
5255 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
5258 if (d == dd->ndim-1)
5260 sbuf[pos++] = dd_force_load(comm);
5261 sbuf[pos++] = sbuf[0];
5264 sbuf[pos++] = sbuf[0];
5265 sbuf[pos++] = cell_frac;
5268 sbuf[pos++] = comm->cell_f_max0[d];
5269 sbuf[pos++] = comm->cell_f_min1[d];
5274 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
5275 sbuf[pos++] = comm->cycl[ddCyclPME];
5280 sbuf[pos++] = comm->load[d+1].sum;
5281 sbuf[pos++] = comm->load[d+1].max;
5284 sbuf[pos++] = comm->load[d+1].sum_m;
5285 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
5286 sbuf[pos++] = comm->load[d+1].flags;
5289 sbuf[pos++] = comm->cell_f_max0[d];
5290 sbuf[pos++] = comm->cell_f_min1[d];
5295 sbuf[pos++] = comm->load[d+1].mdf;
5296 sbuf[pos++] = comm->load[d+1].pme;
5300 /* Communicate a row in DD direction d.
5301 * The communicators are setup such that the root always has rank 0.
5304 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
5305 load->load, load->nload*sizeof(float), MPI_BYTE,
5306 0, comm->mpi_comm_load[d]);
5308 if (dd->ci[dim] == dd->master_ci[dim])
5310 /* We are the root, process this row */
5311 if (comm->bDynLoadBal)
5313 root = comm->root[d];
5323 for (i = 0; i < dd->nc[dim]; i++)
5325 load->sum += load->load[pos++];
5326 load->max = max(load->max, load->load[pos]);
5332 /* This direction could not be load balanced properly,
5333 * therefore we need to use the maximum iso the average load.
5335 load->sum_m = max(load->sum_m, load->load[pos]);
5339 load->sum_m += load->load[pos];
5342 load->cvol_min = min(load->cvol_min, load->load[pos]);
5346 load->flags = (int)(load->load[pos++] + 0.5);
5350 root->cell_f_max0[i] = load->load[pos++];
5351 root->cell_f_min1[i] = load->load[pos++];
5356 load->mdf = max(load->mdf, load->load[pos]);
5358 load->pme = max(load->pme, load->load[pos]);
5362 if (comm->bDynLoadBal && root->bLimited)
5364 load->sum_m *= dd->nc[dim];
5365 load->flags |= (1<<d);
5373 comm->nload += dd_load_count(comm);
5374 comm->load_step += comm->cycl[ddCyclStep];
5375 comm->load_sum += comm->load[0].sum;
5376 comm->load_max += comm->load[0].max;
5377 if (comm->bDynLoadBal)
5379 for (d = 0; d < dd->ndim; d++)
5381 if (comm->load[0].flags & (1<<d))
5383 comm->load_lim[d]++;
5389 comm->load_mdf += comm->load[0].mdf;
5390 comm->load_pme += comm->load[0].pme;
5394 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5398 fprintf(debug, "get_load_distribution finished\n");
5402 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5404 /* Return the relative performance loss on the total run time
5405 * due to the force calculation load imbalance.
5407 if (dd->comm->nload > 0)
5410 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5411 (dd->comm->load_step*dd->nnodes);
5419 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5422 int npp, npme, nnodes, d, limp;
5423 float imbal, pme_f_ratio, lossf, lossp = 0;
5425 gmx_domdec_comm_t *comm;
5428 if (DDMASTER(dd) && comm->nload > 0)
5431 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5432 nnodes = npp + npme;
5433 imbal = comm->load_max*npp/comm->load_sum - 1;
5434 lossf = dd_force_imb_perf_loss(dd);
5435 sprintf(buf, " Average load imbalance: %.1f %%\n", imbal*100);
5436 fprintf(fplog, "%s", buf);
5437 fprintf(stderr, "\n");
5438 fprintf(stderr, "%s", buf);
5439 sprintf(buf, " Part of the total run time spent waiting due to load imbalance: %.1f %%\n", lossf*100);
5440 fprintf(fplog, "%s", buf);
5441 fprintf(stderr, "%s", buf);
5443 if (comm->bDynLoadBal)
5445 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5446 for (d = 0; d < dd->ndim; d++)
5448 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5449 sprintf(buf+strlen(buf), " %c %d %%", dim2char(dd->dim[d]), limp);
5455 sprintf(buf+strlen(buf), "\n");
5456 fprintf(fplog, "%s", buf);
5457 fprintf(stderr, "%s", buf);
5461 pme_f_ratio = comm->load_pme/comm->load_mdf;
5462 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5465 lossp *= (float)npme/(float)nnodes;
5469 lossp *= (float)npp/(float)nnodes;
5471 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pme_f_ratio);
5472 fprintf(fplog, "%s", buf);
5473 fprintf(stderr, "%s", buf);
5474 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossp)*100);
5475 fprintf(fplog, "%s", buf);
5476 fprintf(stderr, "%s", buf);
5478 fprintf(fplog, "\n");
5479 fprintf(stderr, "\n");
5481 if (lossf >= DD_PERF_LOSS_WARN)
5484 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5485 " in the domain decomposition.\n", lossf*100);
5486 if (!comm->bDynLoadBal)
5488 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5492 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5494 fprintf(fplog, "%s\n", buf);
5495 fprintf(stderr, "%s\n", buf);
5497 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS_WARN)
5500 "NOTE: %.1f %% performance was lost because the PME ranks\n"
5501 " had %s work to do than the PP ranks.\n"
5502 " You might want to %s the number of PME ranks\n"
5503 " or %s the cut-off and the grid spacing.\n",
5505 (lossp < 0) ? "less" : "more",
5506 (lossp < 0) ? "decrease" : "increase",
5507 (lossp < 0) ? "decrease" : "increase");
5508 fprintf(fplog, "%s\n", buf);
5509 fprintf(stderr, "%s\n", buf);
5514 static float dd_vol_min(gmx_domdec_t *dd)
5516 return dd->comm->load[0].cvol_min*dd->nnodes;
5519 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5521 return dd->comm->load[0].flags;
5524 static float dd_f_imbal(gmx_domdec_t *dd)
5526 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5529 float dd_pme_f_ratio(gmx_domdec_t *dd)
5531 if (dd->comm->cycl_n[ddCyclPME] > 0)
5533 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5541 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
5546 flags = dd_load_flags(dd);
5550 "DD load balancing is limited by minimum cell size in dimension");
5551 for (d = 0; d < dd->ndim; d++)
5555 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5558 fprintf(fplog, "\n");
5560 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5561 if (dd->comm->bDynLoadBal)
5563 fprintf(fplog, " vol min/aver %5.3f%c",
5564 dd_vol_min(dd), flags ? '!' : ' ');
5566 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5567 if (dd->comm->cycl_n[ddCyclPME])
5569 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5571 fprintf(fplog, "\n\n");
5574 static void dd_print_load_verbose(gmx_domdec_t *dd)
5576 if (dd->comm->bDynLoadBal)
5578 fprintf(stderr, "vol %4.2f%c ",
5579 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5581 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5582 if (dd->comm->cycl_n[ddCyclPME])
5584 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5589 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5594 gmx_domdec_root_t *root;
5595 gmx_bool bPartOfGroup = FALSE;
5597 dim = dd->dim[dim_ind];
5598 copy_ivec(loc, loc_c);
5599 for (i = 0; i < dd->nc[dim]; i++)
5602 rank = dd_index(dd->nc, loc_c);
5603 if (rank == dd->rank)
5605 /* This process is part of the group */
5606 bPartOfGroup = TRUE;
5609 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5613 dd->comm->mpi_comm_load[dim_ind] = c_row;
5614 if (dd->comm->eDLB != edlbNO)
5616 if (dd->ci[dim] == dd->master_ci[dim])
5618 /* This is the root process of this row */
5619 snew(dd->comm->root[dim_ind], 1);
5620 root = dd->comm->root[dim_ind];
5621 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5622 snew(root->old_cell_f, dd->nc[dim]+1);
5623 snew(root->bCellMin, dd->nc[dim]);
5626 snew(root->cell_f_max0, dd->nc[dim]);
5627 snew(root->cell_f_min1, dd->nc[dim]);
5628 snew(root->bound_min, dd->nc[dim]);
5629 snew(root->bound_max, dd->nc[dim]);
5631 snew(root->buf_ncd, dd->nc[dim]);
5635 /* This is not a root process, we only need to receive cell_f */
5636 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5639 if (dd->ci[dim] == dd->master_ci[dim])
5641 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5647 void dd_setup_dlb_resource_sharing(t_commrec gmx_unused *cr,
5648 const gmx_hw_info_t gmx_unused *hwinfo,
5649 const gmx_hw_opt_t gmx_unused *hw_opt)
5652 int physicalnode_id_hash;
5655 MPI_Comm mpi_comm_pp_physicalnode;
5657 if (!(cr->duty & DUTY_PP) ||
5658 hw_opt->gpu_opt.ncuda_dev_use == 0)
5660 /* Only PP nodes (currently) use GPUs.
5661 * If we don't have GPUs, there are no resources to share.
5666 physicalnode_id_hash = gmx_physicalnode_id_hash();
5668 gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
5674 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5675 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5676 dd->rank, physicalnode_id_hash, gpu_id);
5678 /* Split the PP communicator over the physical nodes */
5679 /* TODO: See if we should store this (before), as it's also used for
5680 * for the nodecomm summution.
5682 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5683 &mpi_comm_pp_physicalnode);
5684 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5685 &dd->comm->mpi_comm_gpu_shared);
5686 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5687 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5691 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5694 /* Note that some ranks could share a GPU, while others don't */
5696 if (dd->comm->nrank_gpu_shared == 1)
5698 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5703 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
5706 int dim0, dim1, i, j;
5711 fprintf(debug, "Making load communicators\n");
5714 snew(dd->comm->load, dd->ndim);
5715 snew(dd->comm->mpi_comm_load, dd->ndim);
5718 make_load_communicator(dd, 0, loc);
5722 for (i = 0; i < dd->nc[dim0]; i++)
5725 make_load_communicator(dd, 1, loc);
5731 for (i = 0; i < dd->nc[dim0]; i++)
5735 for (j = 0; j < dd->nc[dim1]; j++)
5738 make_load_communicator(dd, 2, loc);
5745 fprintf(debug, "Finished making load communicators\n");
5750 void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
5753 int d, dim, i, j, m;
5756 ivec dd_zp[DD_MAXIZONE];
5757 gmx_domdec_zones_t *zones;
5758 gmx_domdec_ns_ranges_t *izone;
5760 for (d = 0; d < dd->ndim; d++)
5763 copy_ivec(dd->ci, tmp);
5764 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5765 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5766 copy_ivec(dd->ci, tmp);
5767 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5768 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5771 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5774 dd->neighbor[d][1]);
5780 fprintf(fplog, "\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5782 dd->nc[XX], dd->nc[YY], dd->nc[ZZ],
5783 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5790 for (i = 0; i < nzonep; i++)
5792 copy_ivec(dd_zp3[i], dd_zp[i]);
5798 for (i = 0; i < nzonep; i++)
5800 copy_ivec(dd_zp2[i], dd_zp[i]);
5806 for (i = 0; i < nzonep; i++)
5808 copy_ivec(dd_zp1[i], dd_zp[i]);
5812 gmx_fatal(FARGS, "Can only do 1, 2 or 3D domain decomposition");
5817 zones = &dd->comm->zones;
5819 for (i = 0; i < nzone; i++)
5822 clear_ivec(zones->shift[i]);
5823 for (d = 0; d < dd->ndim; d++)
5825 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5830 for (i = 0; i < nzone; i++)
5832 for (d = 0; d < DIM; d++)
5834 s[d] = dd->ci[d] - zones->shift[i][d];
5839 else if (s[d] >= dd->nc[d])
5845 zones->nizone = nzonep;
5846 for (i = 0; i < zones->nizone; i++)
5848 if (dd_zp[i][0] != i)
5850 gmx_fatal(FARGS, "Internal inconsistency in the dd grid setup");
5852 izone = &zones->izone[i];
5853 izone->j0 = dd_zp[i][1];
5854 izone->j1 = dd_zp[i][2];
5855 for (dim = 0; dim < DIM; dim++)
5857 if (dd->nc[dim] == 1)
5859 /* All shifts should be allowed */
5860 izone->shift0[dim] = -1;
5861 izone->shift1[dim] = 1;
5866 izone->shift0[d] = 0;
5867 izone->shift1[d] = 0;
5868 for(j=izone->j0; j<izone->j1; j++) {
5869 if (dd->shift[j][d] > dd->shift[i][d])
5870 izone->shift0[d] = -1;
5871 if (dd->shift[j][d] < dd->shift[i][d])
5872 izone->shift1[d] = 1;
5878 /* Assume the shift are not more than 1 cell */
5879 izone->shift0[dim] = 1;
5880 izone->shift1[dim] = -1;
5881 for (j = izone->j0; j < izone->j1; j++)
5883 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5884 if (shift_diff < izone->shift0[dim])
5886 izone->shift0[dim] = shift_diff;
5888 if (shift_diff > izone->shift1[dim])
5890 izone->shift1[dim] = shift_diff;
5897 if (dd->comm->eDLB != edlbNO)
5899 snew(dd->comm->root, dd->ndim);
5902 if (dd->comm->bRecordLoad)
5904 make_load_communicators(dd);
5908 static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reorder)
5911 gmx_domdec_comm_t *comm;
5922 if (comm->bCartesianPP)
5924 /* Set up cartesian communication for the particle-particle part */
5927 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5928 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5931 for (i = 0; i < DIM; i++)
5935 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5937 /* We overwrite the old communicator with the new cartesian one */
5938 cr->mpi_comm_mygroup = comm_cart;
5941 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5942 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5944 if (comm->bCartesianPP_PME)
5946 /* Since we want to use the original cartesian setup for sim,
5947 * and not the one after split, we need to make an index.
5949 snew(comm->ddindex2ddnodeid, dd->nnodes);
5950 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
5951 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5952 /* Get the rank of the DD master,
5953 * above we made sure that the master node is a PP node.
5963 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
5965 else if (comm->bCartesianPP)
5967 if (cr->npmenodes == 0)
5969 /* The PP communicator is also
5970 * the communicator for this simulation
5972 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5974 cr->nodeid = dd->rank;
5976 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
5978 /* We need to make an index to go from the coordinates
5979 * to the nodeid of this simulation.
5981 snew(comm->ddindex2simnodeid, dd->nnodes);
5982 snew(buf, dd->nnodes);
5983 if (cr->duty & DUTY_PP)
5985 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5987 /* Communicate the ddindex to simulation nodeid index */
5988 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
5989 cr->mpi_comm_mysim);
5992 /* Determine the master coordinates and rank.
5993 * The DD master should be the same node as the master of this sim.
5995 for (i = 0; i < dd->nnodes; i++)
5997 if (comm->ddindex2simnodeid[i] == 0)
5999 ddindex2xyz(dd->nc, i, dd->master_ci);
6000 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
6005 fprintf(debug, "The master rank is %d\n", dd->masterrank);
6010 /* No Cartesian communicators */
6011 /* We use the rank in dd->comm->all as DD index */
6012 ddindex2xyz(dd->nc, dd->rank, dd->ci);
6013 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
6015 clear_ivec(dd->master_ci);
6022 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
6023 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6028 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
6029 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6033 static void receive_ddindex2simnodeid(t_commrec *cr)
6037 gmx_domdec_comm_t *comm;
6044 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
6046 snew(comm->ddindex2simnodeid, dd->nnodes);
6047 snew(buf, dd->nnodes);
6048 if (cr->duty & DUTY_PP)
6050 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
6053 /* Communicate the ddindex to simulation nodeid index */
6054 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6055 cr->mpi_comm_mysim);
6062 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
6063 int ncg, int natoms)
6065 gmx_domdec_master_t *ma;
6070 snew(ma->ncg, dd->nnodes);
6071 snew(ma->index, dd->nnodes+1);
6073 snew(ma->nat, dd->nnodes);
6074 snew(ma->ibuf, dd->nnodes*2);
6075 snew(ma->cell_x, DIM);
6076 for (i = 0; i < DIM; i++)
6078 snew(ma->cell_x[i], dd->nc[i]+1);
6081 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
6087 snew(ma->vbuf, natoms);
6093 static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_node_order,
6094 int gmx_unused reorder)
6097 gmx_domdec_comm_t *comm;
6108 if (comm->bCartesianPP)
6110 for (i = 1; i < DIM; i++)
6112 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
6114 if (bDiv[YY] || bDiv[ZZ])
6116 comm->bCartesianPP_PME = TRUE;
6117 /* If we have 2D PME decomposition, which is always in x+y,
6118 * we stack the PME only nodes in z.
6119 * Otherwise we choose the direction that provides the thinnest slab
6120 * of PME only nodes as this will have the least effect
6121 * on the PP communication.
6122 * But for the PME communication the opposite might be better.
6124 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
6126 dd->nc[YY] > dd->nc[ZZ]))
6128 comm->cartpmedim = ZZ;
6132 comm->cartpmedim = YY;
6134 comm->ntot[comm->cartpmedim]
6135 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
6139 fprintf(fplog, "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
6141 "Will not use a Cartesian communicator for PP <-> PME\n\n");
6146 if (comm->bCartesianPP_PME)
6150 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
6153 for (i = 0; i < DIM; i++)
6157 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
6160 MPI_Comm_rank(comm_cart, &rank);
6161 if (MASTERNODE(cr) && rank != 0)
6163 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
6166 /* With this assigment we loose the link to the original communicator
6167 * which will usually be MPI_COMM_WORLD, unless have multisim.
6169 cr->mpi_comm_mysim = comm_cart;
6170 cr->sim_nodeid = rank;
6172 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
6176 fprintf(fplog, "Cartesian rank %d, coordinates %d %d %d\n\n",
6177 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6180 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
6184 if (cr->npmenodes == 0 ||
6185 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
6187 cr->duty = DUTY_PME;
6190 /* Split the sim communicator into PP and PME only nodes */
6191 MPI_Comm_split(cr->mpi_comm_mysim,
6193 dd_index(comm->ntot, dd->ci),
6194 &cr->mpi_comm_mygroup);
6198 switch (dd_node_order)
6203 fprintf(fplog, "Order of the ranks: PP first, PME last\n");
6206 case ddnoINTERLEAVE:
6207 /* Interleave the PP-only and PME-only nodes,
6208 * as on clusters with dual-core machines this will double
6209 * the communication bandwidth of the PME processes
6210 * and thus speed up the PP <-> PME and inter PME communication.
6214 fprintf(fplog, "Interleaving PP and PME ranks\n");
6216 comm->pmenodes = dd_pmenodes(cr);
6221 gmx_fatal(FARGS, "Unknown dd_node_order=%d", dd_node_order);
6224 if (dd_simnode2pmenode(cr, cr->sim_nodeid) == -1)
6226 cr->duty = DUTY_PME;
6233 /* Split the sim communicator into PP and PME only nodes */
6234 MPI_Comm_split(cr->mpi_comm_mysim,
6237 &cr->mpi_comm_mygroup);
6238 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
6244 fprintf(fplog, "This rank does only %s work.\n\n",
6245 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
6249 void make_dd_communicators(FILE *fplog, t_commrec *cr, int dd_node_order)
6252 gmx_domdec_comm_t *comm;
6258 copy_ivec(dd->nc, comm->ntot);
6260 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
6261 comm->bCartesianPP_PME = FALSE;
6263 /* Reorder the nodes by default. This might change the MPI ranks.
6264 * Real reordering is only supported on very few architectures,
6265 * Blue Gene is one of them.
6267 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
6269 if (cr->npmenodes > 0)
6271 /* Split the communicator into a PP and PME part */
6272 split_communicator(fplog, cr, dd_node_order, CartReorder);
6273 if (comm->bCartesianPP_PME)
6275 /* We (possibly) reordered the nodes in split_communicator,
6276 * so it is no longer required in make_pp_communicator.
6278 CartReorder = FALSE;
6283 /* All nodes do PP and PME */
6285 /* We do not require separate communicators */
6286 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
6290 if (cr->duty & DUTY_PP)
6292 /* Copy or make a new PP communicator */
6293 make_pp_communicator(fplog, cr, CartReorder);
6297 receive_ddindex2simnodeid(cr);
6300 if (!(cr->duty & DUTY_PME))
6302 /* Set up the commnuication to our PME node */
6303 dd->pme_nodeid = dd_simnode2pmenode(cr, cr->sim_nodeid);
6304 dd->pme_receive_vir_ener = receive_vir_ener(cr);
6307 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
6308 dd->pme_nodeid, dd->pme_receive_vir_ener);
6313 dd->pme_nodeid = -1;
6318 dd->ma = init_gmx_domdec_master_t(dd,
6320 comm->cgs_gl.index[comm->cgs_gl.nr]);
6324 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6326 real *slb_frac, tot;
6331 if (nc > 1 && size_string != NULL)
6335 fprintf(fplog, "Using static load balancing for the %s direction\n",
6340 for (i = 0; i < nc; i++)
6343 sscanf(size_string, "%lf%n", &dbl, &n);
6346 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6355 fprintf(fplog, "Relative cell sizes:");
6357 for (i = 0; i < nc; i++)
6362 fprintf(fplog, " %5.3f", slb_frac[i]);
6367 fprintf(fplog, "\n");
6374 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
6377 gmx_mtop_ilistloop_t iloop;
6381 iloop = gmx_mtop_ilistloop_init(mtop);
6382 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6384 for (ftype = 0; ftype < F_NRE; ftype++)
6386 if ((interaction_function[ftype].flags & IF_BOND) &&
6389 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6397 static int dd_getenv(FILE *fplog, const char *env_var, int def)
6403 val = getenv(env_var);
6406 if (sscanf(val, "%d", &nst) <= 0)
6412 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6420 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6424 fprintf(stderr, "\n%s\n", warn_string);
6428 fprintf(fplog, "\n%s\n", warn_string);
6432 static void check_dd_restrictions(t_commrec *cr, gmx_domdec_t *dd,
6433 t_inputrec *ir, FILE *fplog)
6435 if (ir->ePBC == epbcSCREW &&
6436 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6438 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6441 if (ir->ns_type == ensSIMPLE)
6443 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
6446 if (ir->nstlist == 0)
6448 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6451 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6453 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6457 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6462 r = ddbox->box_size[XX];
6463 for (di = 0; di < dd->ndim; di++)
6466 /* Check using the initial average cell size */
6467 r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6473 static int check_dlb_support(FILE *fplog, t_commrec *cr,
6474 const char *dlb_opt, gmx_bool bRecordLoad,
6475 unsigned long Flags, t_inputrec *ir)
6483 case 'a': eDLB = edlbAUTO; break;
6484 case 'n': eDLB = edlbNO; break;
6485 case 'y': eDLB = edlbYES; break;
6486 default: gmx_incons("Unknown dlb_opt");
6489 if (Flags & MD_RERUN)
6494 if (!EI_DYNAMICS(ir->eI))
6496 if (eDLB == edlbYES)
6498 sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
6499 dd_warning(cr, fplog, buf);
6507 dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6512 if (Flags & MD_REPRODUCIBLE)
6519 dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
6523 dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6526 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", eDLB);
6534 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6539 if (getenv("GMX_DD_ORDER_ZYX") != NULL)
6541 /* Decomposition order z,y,x */
6544 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6546 for (dim = DIM-1; dim >= 0; dim--)
6548 if (dd->nc[dim] > 1)
6550 dd->dim[dd->ndim++] = dim;
6556 /* Decomposition order x,y,z */
6557 for (dim = 0; dim < DIM; dim++)
6559 if (dd->nc[dim] > 1)
6561 dd->dim[dd->ndim++] = dim;
6567 static gmx_domdec_comm_t *init_dd_comm()
6569 gmx_domdec_comm_t *comm;
6573 snew(comm->cggl_flag, DIM*2);
6574 snew(comm->cgcm_state, DIM*2);
6575 for (i = 0; i < DIM*2; i++)
6577 comm->cggl_flag_nalloc[i] = 0;
6578 comm->cgcm_state_nalloc[i] = 0;
6581 comm->nalloc_int = 0;
6582 comm->buf_int = NULL;
6584 vec_rvec_init(&comm->vbuf);
6586 comm->n_load_have = 0;
6587 comm->n_load_collect = 0;
6589 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6591 comm->sum_nat[i] = 0;
6595 comm->load_step = 0;
6598 clear_ivec(comm->load_lim);
6605 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
6606 unsigned long Flags,
6608 real comm_distance_min, real rconstr,
6609 const char *dlb_opt, real dlb_scale,
6610 const char *sizex, const char *sizey, const char *sizez,
6611 gmx_mtop_t *mtop, t_inputrec *ir,
6612 matrix box, rvec *x,
6614 int *npme_x, int *npme_y)
6617 gmx_domdec_comm_t *comm;
6620 real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
6627 "\nInitializing Domain Decomposition on %d ranks\n", cr->nnodes);
6632 dd->comm = init_dd_comm();
6634 snew(comm->cggl_flag, DIM*2);
6635 snew(comm->cgcm_state, DIM*2);
6637 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6638 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6640 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
6641 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
6642 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
6643 recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
6644 comm->nstSortCG = dd_getenv(fplog, "GMX_DD_NST_SORT_CHARGE_GROUPS", 1);
6645 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
6646 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
6647 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
6649 dd->pme_recv_f_alloc = 0;
6650 dd->pme_recv_f_buf = NULL;
6652 if (dd->bSendRecv2 && fplog)
6654 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6660 fprintf(fplog, "Will load balance based on FLOP count\n");
6662 if (comm->eFlop > 1)
6664 srand(1+cr->nodeid);
6666 comm->bRecordLoad = TRUE;
6670 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6674 /* Initialize to GPU share count to 0, might change later */
6675 comm->nrank_gpu_shared = 0;
6677 comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
6679 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6682 fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
6684 dd->bGridJump = comm->bDynLoadBal;
6685 comm->bPMELoadBalDLBLimits = FALSE;
6687 if (comm->nstSortCG)
6691 if (comm->nstSortCG == 1)
6693 fprintf(fplog, "Will sort the charge groups at every domain (re)decomposition\n");
6697 fprintf(fplog, "Will sort the charge groups every %d steps\n",
6701 snew(comm->sort, 1);
6707 fprintf(fplog, "Will not sort the charge groups\n");
6711 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6713 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6714 if (comm->bInterCGBondeds)
6716 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6720 comm->bInterCGMultiBody = FALSE;
6723 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6724 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6726 if (ir->rlistlong == 0)
6728 /* Set the cut-off to some very large value,
6729 * so we don't need if statements everywhere in the code.
6730 * We use sqrt, since the cut-off is squared in some places.
6732 comm->cutoff = GMX_CUTOFF_INF;
6736 comm->cutoff = ir->rlistlong;
6738 comm->cutoff_mbody = 0;
6740 comm->cellsize_limit = 0;
6741 comm->bBondComm = FALSE;
6743 if (comm->bInterCGBondeds)
6745 if (comm_distance_min > 0)
6747 comm->cutoff_mbody = comm_distance_min;
6748 if (Flags & MD_DDBONDCOMM)
6750 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6754 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6756 r_bonded_limit = comm->cutoff_mbody;
6758 else if (ir->bPeriodicMols)
6760 /* Can not easily determine the required cut-off */
6761 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6762 comm->cutoff_mbody = comm->cutoff/2;
6763 r_bonded_limit = comm->cutoff_mbody;
6769 dd_bonded_cg_distance(fplog, mtop, ir, x, box,
6770 Flags & MD_DDBONDCHECK, &r_2b, &r_mb);
6772 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6773 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6775 /* We use an initial margin of 10% for the minimum cell size,
6776 * except when we are just below the non-bonded cut-off.
6778 if (Flags & MD_DDBONDCOMM)
6780 if (max(r_2b, r_mb) > comm->cutoff)
6782 r_bonded = max(r_2b, r_mb);
6783 r_bonded_limit = 1.1*r_bonded;
6784 comm->bBondComm = TRUE;
6789 r_bonded_limit = min(1.1*r_bonded, comm->cutoff);
6791 /* We determine cutoff_mbody later */
6795 /* No special bonded communication,
6796 * simply increase the DD cut-off.
6798 r_bonded_limit = 1.1*max(r_2b, r_mb);
6799 comm->cutoff_mbody = r_bonded_limit;
6800 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6803 comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit);
6807 "Minimum cell size due to bonded interactions: %.3f nm\n",
6808 comm->cellsize_limit);
6812 if (dd->bInterCGcons && rconstr <= 0)
6814 /* There is a cell size limit due to the constraints (P-LINCS) */
6815 rconstr = constr_r_max(fplog, mtop, ir);
6819 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6821 if (rconstr > comm->cellsize_limit)
6823 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6827 else if (rconstr > 0 && fplog)
6829 /* Here we do not check for dd->bInterCGcons,
6830 * because one can also set a cell size limit for virtual sites only
6831 * and at this point we don't know yet if there are intercg v-sites.
6834 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6837 comm->cellsize_limit = max(comm->cellsize_limit, rconstr);
6839 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6843 copy_ivec(nc, dd->nc);
6844 set_dd_dim(fplog, dd);
6845 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, x, ddbox);
6847 if (cr->npmenodes == -1)
6851 acs = average_cellsize_min(dd, ddbox);
6852 if (acs < comm->cellsize_limit)
6856 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6858 gmx_fatal_collective(FARGS, cr, NULL,
6859 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6860 acs, comm->cellsize_limit);
6865 set_ddbox_cr(cr, NULL, ir, box, &comm->cgs_gl, x, ddbox);
6867 /* We need to choose the optimal DD grid and possibly PME nodes */
6868 limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6869 comm->eDLB != edlbNO, dlb_scale,
6870 comm->cellsize_limit, comm->cutoff,
6871 comm->bInterCGBondeds);
6873 if (dd->nc[XX] == 0)
6875 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6876 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
6877 !bC ? "-rdd" : "-rcon",
6878 comm->eDLB != edlbNO ? " or -dds" : "",
6879 bC ? " or your LINCS settings" : "");
6881 gmx_fatal_collective(FARGS, cr, NULL,
6882 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
6884 "Look in the log file for details on the domain decomposition",
6885 cr->nnodes-cr->npmenodes, limit, buf);
6887 set_dd_dim(fplog, dd);
6893 "Domain decomposition grid %d x %d x %d, separate PME ranks %d\n",
6894 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6897 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6898 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6900 gmx_fatal_collective(FARGS, cr, NULL,
6901 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
6902 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6904 if (cr->npmenodes > dd->nnodes)
6906 gmx_fatal_collective(FARGS, cr, NULL,
6907 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6909 if (cr->npmenodes > 0)
6911 comm->npmenodes = cr->npmenodes;
6915 comm->npmenodes = dd->nnodes;
6918 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
6920 /* The following choices should match those
6921 * in comm_cost_est in domdec_setup.c.
6922 * Note that here the checks have to take into account
6923 * that the decomposition might occur in a different order than xyz
6924 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6925 * in which case they will not match those in comm_cost_est,
6926 * but since that is mainly for testing purposes that's fine.
6928 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6929 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6930 getenv("GMX_PMEONEDD") == NULL)
6932 comm->npmedecompdim = 2;
6933 comm->npmenodes_x = dd->nc[XX];
6934 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6938 /* In case nc is 1 in both x and y we could still choose to
6939 * decompose pme in y instead of x, but we use x for simplicity.
6941 comm->npmedecompdim = 1;
6942 if (dd->dim[0] == YY)
6944 comm->npmenodes_x = 1;
6945 comm->npmenodes_y = comm->npmenodes;
6949 comm->npmenodes_x = comm->npmenodes;
6950 comm->npmenodes_y = 1;
6955 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6956 comm->npmenodes_x, comm->npmenodes_y, 1);
6961 comm->npmedecompdim = 0;
6962 comm->npmenodes_x = 0;
6963 comm->npmenodes_y = 0;
6966 /* Technically we don't need both of these,
6967 * but it simplifies code not having to recalculate it.
6969 *npme_x = comm->npmenodes_x;
6970 *npme_y = comm->npmenodes_y;
6972 snew(comm->slb_frac, DIM);
6973 if (comm->eDLB == edlbNO)
6975 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
6976 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
6977 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], sizez);
6980 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6982 if (comm->bBondComm || comm->eDLB != edlbNO)
6984 /* Set the bonded communication distance to halfway
6985 * the minimum and the maximum,
6986 * since the extra communication cost is nearly zero.
6988 acs = average_cellsize_min(dd, ddbox);
6989 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6990 if (comm->eDLB != edlbNO)
6992 /* Check if this does not limit the scaling */
6993 comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs);
6995 if (!comm->bBondComm)
6997 /* Without bBondComm do not go beyond the n.b. cut-off */
6998 comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff);
6999 if (comm->cellsize_limit >= comm->cutoff)
7001 /* We don't loose a lot of efficieny
7002 * when increasing it to the n.b. cut-off.
7003 * It can even be slightly faster, because we need
7004 * less checks for the communication setup.
7006 comm->cutoff_mbody = comm->cutoff;
7009 /* Check if we did not end up below our original limit */
7010 comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit);
7012 if (comm->cutoff_mbody > comm->cellsize_limit)
7014 comm->cellsize_limit = comm->cutoff_mbody;
7017 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
7022 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
7023 "cellsize limit %f\n",
7024 comm->bBondComm, comm->cellsize_limit);
7029 check_dd_restrictions(cr, dd, ir, fplog);
7032 comm->partition_step = INT_MIN;
7035 clear_dd_cycle_counts(dd);
7040 static void set_dlb_limits(gmx_domdec_t *dd)
7045 for (d = 0; d < dd->ndim; d++)
7047 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
7048 dd->comm->cellsize_min[dd->dim[d]] =
7049 dd->comm->cellsize_min_dlb[dd->dim[d]];
7054 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
7057 gmx_domdec_comm_t *comm;
7067 fprintf(fplog, "At step %s the performance loss due to force load imbalance is %.1f %%\n", gmx_step_str(step, buf), dd_force_imb_perf_loss(dd)*100);
7070 cellsize_min = comm->cellsize_min[dd->dim[0]];
7071 for (d = 1; d < dd->ndim; d++)
7073 cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
7076 if (cellsize_min < comm->cellsize_limit*1.05)
7078 dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
7080 /* Change DLB from "auto" to "no". */
7081 comm->eDLB = edlbNO;
7086 dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
7087 comm->bDynLoadBal = TRUE;
7088 dd->bGridJump = TRUE;
7092 /* We can set the required cell size info here,
7093 * so we do not need to communicate this.
7094 * The grid is completely uniform.
7096 for (d = 0; d < dd->ndim; d++)
7100 comm->load[d].sum_m = comm->load[d].sum;
7102 nc = dd->nc[dd->dim[d]];
7103 for (i = 0; i < nc; i++)
7105 comm->root[d]->cell_f[i] = i/(real)nc;
7108 comm->root[d]->cell_f_max0[i] = i /(real)nc;
7109 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
7112 comm->root[d]->cell_f[nc] = 1.0;
7117 static char *init_bLocalCG(gmx_mtop_t *mtop)
7122 ncg = ncg_mtop(mtop);
7123 snew(bLocalCG, ncg);
7124 for (cg = 0; cg < ncg; cg++)
7126 bLocalCG[cg] = FALSE;
7132 void dd_init_bondeds(FILE *fplog,
7133 gmx_domdec_t *dd, gmx_mtop_t *mtop,
7135 t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
7137 gmx_domdec_comm_t *comm;
7141 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
7145 if (comm->bBondComm)
7147 /* Communicate atoms beyond the cut-off for bonded interactions */
7150 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
7152 comm->bLocalCG = init_bLocalCG(mtop);
7156 /* Only communicate atoms based on cut-off */
7157 comm->cglink = NULL;
7158 comm->bLocalCG = NULL;
7162 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
7164 gmx_bool bDynLoadBal, real dlb_scale,
7167 gmx_domdec_comm_t *comm;
7182 fprintf(fplog, "The maximum number of communication pulses is:");
7183 for (d = 0; d < dd->ndim; d++)
7185 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
7187 fprintf(fplog, "\n");
7188 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
7189 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
7190 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
7191 for (d = 0; d < DIM; d++)
7195 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
7202 comm->cellsize_min_dlb[d]/
7203 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
7205 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
7208 fprintf(fplog, "\n");
7212 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
7213 fprintf(fplog, "The initial number of communication pulses is:");
7214 for (d = 0; d < dd->ndim; d++)
7216 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
7218 fprintf(fplog, "\n");
7219 fprintf(fplog, "The initial domain decomposition cell size is:");
7220 for (d = 0; d < DIM; d++)
7224 fprintf(fplog, " %c %.2f nm",
7225 dim2char(d), dd->comm->cellsize_min[d]);
7228 fprintf(fplog, "\n\n");
7231 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
7233 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
7234 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7235 "non-bonded interactions", "", comm->cutoff);
7239 limit = dd->comm->cellsize_limit;
7243 if (dynamic_dd_box(ddbox, ir))
7245 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
7247 limit = dd->comm->cellsize_min[XX];
7248 for (d = 1; d < DIM; d++)
7250 limit = min(limit, dd->comm->cellsize_min[d]);
7254 if (comm->bInterCGBondeds)
7256 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7257 "two-body bonded interactions", "(-rdd)",
7258 max(comm->cutoff, comm->cutoff_mbody));
7259 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7260 "multi-body bonded interactions", "(-rdd)",
7261 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit));
7265 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7266 "virtual site constructions", "(-rcon)", limit);
7268 if (dd->constraint_comm)
7270 sprintf(buf, "atoms separated by up to %d constraints",
7272 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7273 buf, "(-rcon)", limit);
7275 fprintf(fplog, "\n");
7281 static void set_cell_limits_dlb(gmx_domdec_t *dd,
7283 const t_inputrec *ir,
7284 const gmx_ddbox_t *ddbox)
7286 gmx_domdec_comm_t *comm;
7287 int d, dim, npulse, npulse_d_max, npulse_d;
7292 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7294 /* Determine the maximum number of comm. pulses in one dimension */
7296 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7298 /* Determine the maximum required number of grid pulses */
7299 if (comm->cellsize_limit >= comm->cutoff)
7301 /* Only a single pulse is required */
7304 else if (!bNoCutOff && comm->cellsize_limit > 0)
7306 /* We round down slightly here to avoid overhead due to the latency
7307 * of extra communication calls when the cut-off
7308 * would be only slightly longer than the cell size.
7309 * Later cellsize_limit is redetermined,
7310 * so we can not miss interactions due to this rounding.
7312 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7316 /* There is no cell size limit */
7317 npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7320 if (!bNoCutOff && npulse > 1)
7322 /* See if we can do with less pulses, based on dlb_scale */
7324 for (d = 0; d < dd->ndim; d++)
7327 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7328 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7329 npulse_d_max = max(npulse_d_max, npulse_d);
7331 npulse = min(npulse, npulse_d_max);
7334 /* This env var can override npulse */
7335 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
7342 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7343 for (d = 0; d < dd->ndim; d++)
7345 comm->cd[d].np_dlb = min(npulse, dd->nc[dd->dim[d]]-1);
7346 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7347 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7348 comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb);
7349 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7351 comm->bVacDLBNoLimit = FALSE;
7355 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7356 if (!comm->bVacDLBNoLimit)
7358 comm->cellsize_limit = max(comm->cellsize_limit,
7359 comm->cutoff/comm->maxpulse);
7361 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7362 /* Set the minimum cell size for each DD dimension */
7363 for (d = 0; d < dd->ndim; d++)
7365 if (comm->bVacDLBNoLimit ||
7366 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7368 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7372 comm->cellsize_min_dlb[dd->dim[d]] =
7373 comm->cutoff/comm->cd[d].np_dlb;
7376 if (comm->cutoff_mbody <= 0)
7378 comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit);
7380 if (comm->bDynLoadBal)
7386 gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd, int ePBC)
7388 /* If each molecule is a single charge group
7389 * or we use domain decomposition for each periodic dimension,
7390 * we do not need to take pbc into account for the bonded interactions.
7392 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7395 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7398 void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7399 t_inputrec *ir, gmx_ddbox_t *ddbox)
7401 gmx_domdec_comm_t *comm;
7407 /* Initialize the thread data.
7408 * This can not be done in init_domain_decomposition,
7409 * as the numbers of threads is determined later.
7411 comm->nth = gmx_omp_nthreads_get(emntDomdec);
7414 snew(comm->dth, comm->nth);
7417 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
7419 init_ddpme(dd, &comm->ddpme[0], 0);
7420 if (comm->npmedecompdim >= 2)
7422 init_ddpme(dd, &comm->ddpme[1], 1);
7427 comm->npmenodes = 0;
7428 if (dd->pme_nodeid >= 0)
7430 gmx_fatal_collective(FARGS, NULL, dd,
7431 "Can not have separate PME ranks without PME electrostatics");
7437 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7439 if (comm->eDLB != edlbNO)
7441 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7444 print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
7445 if (comm->eDLB == edlbAUTO)
7449 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7451 print_dd_settings(fplog, dd, ir, TRUE, dlb_scale, ddbox);
7454 if (ir->ePBC == epbcNONE)
7456 vol_frac = 1 - 1/(double)dd->nnodes;
7461 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7465 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7467 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7469 dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
7472 static gmx_bool test_dd_cutoff(t_commrec *cr,
7473 t_state *state, t_inputrec *ir,
7484 set_ddbox(dd, FALSE, cr, ir, state->box,
7485 TRUE, &dd->comm->cgs_gl, state->x, &ddbox);
7489 for (d = 0; d < dd->ndim; d++)
7493 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7494 if (dynamic_dd_box(&ddbox, ir))
7496 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7499 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7501 if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
7502 dd->comm->cd[d].np_dlb > 0)
7504 if (np > dd->comm->cd[d].np_dlb)
7509 /* If a current local cell size is smaller than the requested
7510 * cut-off, we could still fix it, but this gets very complicated.
7511 * Without fixing here, we might actually need more checks.
7513 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7520 if (dd->comm->eDLB != edlbNO)
7522 /* If DLB is not active yet, we don't need to check the grid jumps.
7523 * Actually we shouldn't, because then the grid jump data is not set.
7525 if (dd->comm->bDynLoadBal &&
7526 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7531 gmx_sumi(1, &LocallyLimited, cr);
7533 if (LocallyLimited > 0)
7542 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, t_inputrec *ir,
7545 gmx_bool bCutoffAllowed;
7547 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7551 cr->dd->comm->cutoff = cutoff_req;
7554 return bCutoffAllowed;
7557 void change_dd_dlb_cutoff_limit(t_commrec *cr)
7559 gmx_domdec_comm_t *comm;
7561 comm = cr->dd->comm;
7563 /* Turn on the DLB limiting (might have been on already) */
7564 comm->bPMELoadBalDLBLimits = TRUE;
7566 /* Change the cut-off limit */
7567 comm->PMELoadBal_max_cutoff = comm->cutoff;
7570 static void merge_cg_buffers(int ncell,
7571 gmx_domdec_comm_dim_t *cd, int pulse,
7573 int *index_gl, int *recv_i,
7574 rvec *cg_cm, rvec *recv_vr,
7576 cginfo_mb_t *cginfo_mb, int *cginfo)
7578 gmx_domdec_ind_t *ind, *ind_p;
7579 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7580 int shift, shift_at;
7582 ind = &cd->ind[pulse];
7584 /* First correct the already stored data */
7585 shift = ind->nrecv[ncell];
7586 for (cell = ncell-1; cell >= 0; cell--)
7588 shift -= ind->nrecv[cell];
7591 /* Move the cg's present from previous grid pulses */
7592 cg0 = ncg_cell[ncell+cell];
7593 cg1 = ncg_cell[ncell+cell+1];
7594 cgindex[cg1+shift] = cgindex[cg1];
7595 for (cg = cg1-1; cg >= cg0; cg--)
7597 index_gl[cg+shift] = index_gl[cg];
7598 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7599 cgindex[cg+shift] = cgindex[cg];
7600 cginfo[cg+shift] = cginfo[cg];
7602 /* Correct the already stored send indices for the shift */
7603 for (p = 1; p <= pulse; p++)
7605 ind_p = &cd->ind[p];
7607 for (c = 0; c < cell; c++)
7609 cg0 += ind_p->nsend[c];
7611 cg1 = cg0 + ind_p->nsend[cell];
7612 for (cg = cg0; cg < cg1; cg++)
7614 ind_p->index[cg] += shift;
7620 /* Merge in the communicated buffers */
7624 for (cell = 0; cell < ncell; cell++)
7626 cg1 = ncg_cell[ncell+cell+1] + shift;
7629 /* Correct the old cg indices */
7630 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7632 cgindex[cg+1] += shift_at;
7635 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7637 /* Copy this charge group from the buffer */
7638 index_gl[cg1] = recv_i[cg0];
7639 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7640 /* Add it to the cgindex */
7641 cg_gl = index_gl[cg1];
7642 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7643 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7644 cgindex[cg1+1] = cgindex[cg1] + nat;
7649 shift += ind->nrecv[cell];
7650 ncg_cell[ncell+cell+1] = cg1;
7654 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7655 int nzone, int cg0, const int *cgindex)
7659 /* Store the atom block boundaries for easy copying of communication buffers
7662 for (zone = 0; zone < nzone; zone++)
7664 for (p = 0; p < cd->np; p++)
7666 cd->ind[p].cell2at0[zone] = cgindex[cg];
7667 cg += cd->ind[p].nrecv[zone];
7668 cd->ind[p].cell2at1[zone] = cgindex[cg];
7673 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7679 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7681 if (!bLocalCG[link->a[i]])
7690 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7692 real c[DIM][4]; /* the corners for the non-bonded communication */
7693 real cr0; /* corner for rounding */
7694 real cr1[4]; /* corners for rounding */
7695 real bc[DIM]; /* corners for bounded communication */
7696 real bcr1; /* corner for rounding for bonded communication */
7699 /* Determine the corners of the domain(s) we are communicating with */
7701 set_dd_corners(const gmx_domdec_t *dd,
7702 int dim0, int dim1, int dim2,
7706 const gmx_domdec_comm_t *comm;
7707 const gmx_domdec_zones_t *zones;
7712 zones = &comm->zones;
7714 /* Keep the compiler happy */
7718 /* The first dimension is equal for all cells */
7719 c->c[0][0] = comm->cell_x0[dim0];
7722 c->bc[0] = c->c[0][0];
7727 /* This cell row is only seen from the first row */
7728 c->c[1][0] = comm->cell_x0[dim1];
7729 /* All rows can see this row */
7730 c->c[1][1] = comm->cell_x0[dim1];
7733 c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7736 /* For the multi-body distance we need the maximum */
7737 c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7740 /* Set the upper-right corner for rounding */
7741 c->cr0 = comm->cell_x1[dim0];
7746 for (j = 0; j < 4; j++)
7748 c->c[2][j] = comm->cell_x0[dim2];
7752 /* Use the maximum of the i-cells that see a j-cell */
7753 for (i = 0; i < zones->nizone; i++)
7755 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7761 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7767 /* For the multi-body distance we need the maximum */
7768 c->bc[2] = comm->cell_x0[dim2];
7769 for (i = 0; i < 2; i++)
7771 for (j = 0; j < 2; j++)
7773 c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0);
7779 /* Set the upper-right corner for rounding */
7780 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7781 * Only cell (0,0,0) can see cell 7 (1,1,1)
7783 c->cr1[0] = comm->cell_x1[dim1];
7784 c->cr1[3] = comm->cell_x1[dim1];
7787 c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7790 /* For the multi-body distance we need the maximum */
7791 c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7798 /* Determine which cg's we need to send in this pulse from this zone */
7800 get_zone_pulse_cgs(gmx_domdec_t *dd,
7801 int zonei, int zone,
7803 const int *index_gl,
7805 int dim, int dim_ind,
7806 int dim0, int dim1, int dim2,
7807 real r_comm2, real r_bcomm2,
7811 real skew_fac2_d, real skew_fac_01,
7812 rvec *v_d, rvec *v_0, rvec *v_1,
7813 const dd_corners_t *c,
7815 gmx_bool bDistBonded,
7821 gmx_domdec_ind_t *ind,
7822 int **ibuf, int *ibuf_nalloc,
7828 gmx_domdec_comm_t *comm;
7830 gmx_bool bDistMB_pulse;
7832 real r2, rb2, r, tric_sh;
7835 int nsend_z, nsend, nat;
7839 bScrew = (dd->bScrewPBC && dim == XX);
7841 bDistMB_pulse = (bDistMB && bDistBonded);
7847 for (cg = cg0; cg < cg1; cg++)
7851 if (tric_dist[dim_ind] == 0)
7853 /* Rectangular direction, easy */
7854 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7861 r = cg_cm[cg][dim] - c->bc[dim_ind];
7867 /* Rounding gives at most a 16% reduction
7868 * in communicated atoms
7870 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7872 r = cg_cm[cg][dim0] - c->cr0;
7873 /* This is the first dimension, so always r >= 0 */
7880 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7882 r = cg_cm[cg][dim1] - c->cr1[zone];
7889 r = cg_cm[cg][dim1] - c->bcr1;
7899 /* Triclinic direction, more complicated */
7902 /* Rounding, conservative as the skew_fac multiplication
7903 * will slightly underestimate the distance.
7905 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7907 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7908 for (i = dim0+1; i < DIM; i++)
7910 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7912 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7915 rb[dim0] = rn[dim0];
7918 /* Take care that the cell planes along dim0 might not
7919 * be orthogonal to those along dim1 and dim2.
7921 for (i = 1; i <= dim_ind; i++)
7924 if (normal[dim0][dimd] > 0)
7926 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7929 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7934 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7936 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7938 for (i = dim1+1; i < DIM; i++)
7940 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7942 rn[dim1] += tric_sh;
7945 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7946 /* Take care of coupling of the distances
7947 * to the planes along dim0 and dim1 through dim2.
7949 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7950 /* Take care that the cell planes along dim1
7951 * might not be orthogonal to that along dim2.
7953 if (normal[dim1][dim2] > 0)
7955 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7961 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7964 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7965 /* Take care of coupling of the distances
7966 * to the planes along dim0 and dim1 through dim2.
7968 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7969 /* Take care that the cell planes along dim1
7970 * might not be orthogonal to that along dim2.
7972 if (normal[dim1][dim2] > 0)
7974 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7979 /* The distance along the communication direction */
7980 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
7982 for (i = dim+1; i < DIM; i++)
7984 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7989 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7990 /* Take care of coupling of the distances
7991 * to the planes along dim0 and dim1 through dim2.
7993 if (dim_ind == 1 && zonei == 1)
7995 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
8001 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
8004 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
8005 /* Take care of coupling of the distances
8006 * to the planes along dim0 and dim1 through dim2.
8008 if (dim_ind == 1 && zonei == 1)
8010 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
8018 ((bDistMB && rb2 < r_bcomm2) ||
8019 (bDist2B && r2 < r_bcomm2)) &&
8021 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
8022 missing_link(comm->cglink, index_gl[cg],
8025 /* Make an index to the local charge groups */
8026 if (nsend+1 > ind->nalloc)
8028 ind->nalloc = over_alloc_large(nsend+1);
8029 srenew(ind->index, ind->nalloc);
8031 if (nsend+1 > *ibuf_nalloc)
8033 *ibuf_nalloc = over_alloc_large(nsend+1);
8034 srenew(*ibuf, *ibuf_nalloc);
8036 ind->index[nsend] = cg;
8037 (*ibuf)[nsend] = index_gl[cg];
8039 vec_rvec_check_alloc(vbuf, nsend+1);
8041 if (dd->ci[dim] == 0)
8043 /* Correct cg_cm for pbc */
8044 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
8047 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
8048 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
8053 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
8056 nat += cgindex[cg+1] - cgindex[cg];
8062 *nsend_z_ptr = nsend_z;
8065 static void setup_dd_communication(gmx_domdec_t *dd,
8066 matrix box, gmx_ddbox_t *ddbox,
8067 t_forcerec *fr, t_state *state, rvec **f)
8069 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
8070 int nzone, nzone_send, zone, zonei, cg0, cg1;
8071 int c, i, j, cg, cg_gl, nrcg;
8072 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
8073 gmx_domdec_comm_t *comm;
8074 gmx_domdec_zones_t *zones;
8075 gmx_domdec_comm_dim_t *cd;
8076 gmx_domdec_ind_t *ind;
8077 cginfo_mb_t *cginfo_mb;
8078 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
8079 real r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
8080 dd_corners_t corners;
8082 rvec *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
8083 real skew_fac2_d, skew_fac_01;
8090 fprintf(debug, "Setting up DD communication\n");
8095 switch (fr->cutoff_scheme)
8104 gmx_incons("unimplemented");
8108 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8110 dim = dd->dim[dim_ind];
8112 /* Check if we need to use triclinic distances */
8113 tric_dist[dim_ind] = 0;
8114 for (i = 0; i <= dim_ind; i++)
8116 if (ddbox->tric_dir[dd->dim[i]])
8118 tric_dist[dim_ind] = 1;
8123 bBondComm = comm->bBondComm;
8125 /* Do we need to determine extra distances for multi-body bondeds? */
8126 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8128 /* Do we need to determine extra distances for only two-body bondeds? */
8129 bDist2B = (bBondComm && !bDistMB);
8131 r_comm2 = sqr(comm->cutoff);
8132 r_bcomm2 = sqr(comm->cutoff_mbody);
8136 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, sqrt(r_bcomm2));
8139 zones = &comm->zones;
8142 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8143 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8145 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8147 /* Triclinic stuff */
8148 normal = ddbox->normal;
8152 v_0 = ddbox->v[dim0];
8153 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8155 /* Determine the coupling coefficient for the distances
8156 * to the cell planes along dim0 and dim1 through dim2.
8157 * This is required for correct rounding.
8160 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8163 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8169 v_1 = ddbox->v[dim1];
8172 zone_cg_range = zones->cg_range;
8173 index_gl = dd->index_gl;
8174 cgindex = dd->cgindex;
8175 cginfo_mb = fr->cginfo_mb;
8177 zone_cg_range[0] = 0;
8178 zone_cg_range[1] = dd->ncg_home;
8179 comm->zone_ncg1[0] = dd->ncg_home;
8180 pos_cg = dd->ncg_home;
8182 nat_tot = dd->nat_home;
8184 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8186 dim = dd->dim[dim_ind];
8187 cd = &comm->cd[dim_ind];
8189 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8191 /* No pbc in this dimension, the first node should not comm. */
8199 v_d = ddbox->v[dim];
8200 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
8202 cd->bInPlace = TRUE;
8203 for (p = 0; p < cd->np; p++)
8205 /* Only atoms communicated in the first pulse are used
8206 * for multi-body bonded interactions or for bBondComm.
8208 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8213 for (zone = 0; zone < nzone_send; zone++)
8215 if (tric_dist[dim_ind] && dim_ind > 0)
8217 /* Determine slightly more optimized skew_fac's
8219 * This reduces the number of communicated atoms
8220 * by about 10% for 3D DD of rhombic dodecahedra.
8222 for (dimd = 0; dimd < dim; dimd++)
8224 sf2_round[dimd] = 1;
8225 if (ddbox->tric_dir[dimd])
8227 for (i = dd->dim[dimd]+1; i < DIM; i++)
8229 /* If we are shifted in dimension i
8230 * and the cell plane is tilted forward
8231 * in dimension i, skip this coupling.
8233 if (!(zones->shift[nzone+zone][i] &&
8234 ddbox->v[dimd][i][dimd] >= 0))
8237 sqr(ddbox->v[dimd][i][dimd]);
8240 sf2_round[dimd] = 1/sf2_round[dimd];
8245 zonei = zone_perm[dim_ind][zone];
8248 /* Here we permutate the zones to obtain a convenient order
8249 * for neighbor searching
8251 cg0 = zone_cg_range[zonei];
8252 cg1 = zone_cg_range[zonei+1];
8256 /* Look only at the cg's received in the previous grid pulse
8258 cg1 = zone_cg_range[nzone+zone+1];
8259 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8262 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8263 for (th = 0; th < comm->nth; th++)
8265 gmx_domdec_ind_t *ind_p;
8266 int **ibuf_p, *ibuf_nalloc_p;
8268 int *nsend_p, *nat_p;
8274 /* Thread 0 writes in the comm buffers */
8276 ibuf_p = &comm->buf_int;
8277 ibuf_nalloc_p = &comm->nalloc_int;
8278 vbuf_p = &comm->vbuf;
8281 nsend_zone_p = &ind->nsend[zone];
8285 /* Other threads write into temp buffers */
8286 ind_p = &comm->dth[th].ind;
8287 ibuf_p = &comm->dth[th].ibuf;
8288 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8289 vbuf_p = &comm->dth[th].vbuf;
8290 nsend_p = &comm->dth[th].nsend;
8291 nat_p = &comm->dth[th].nat;
8292 nsend_zone_p = &comm->dth[th].nsend_zone;
8294 comm->dth[th].nsend = 0;
8295 comm->dth[th].nat = 0;
8296 comm->dth[th].nsend_zone = 0;
8306 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8307 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8310 /* Get the cg's for this pulse in this zone */
8311 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8313 dim, dim_ind, dim0, dim1, dim2,
8316 normal, skew_fac2_d, skew_fac_01,
8317 v_d, v_0, v_1, &corners, sf2_round,
8318 bDistBonded, bBondComm,
8322 ibuf_p, ibuf_nalloc_p,
8328 /* Append data of threads>=1 to the communication buffers */
8329 for (th = 1; th < comm->nth; th++)
8331 dd_comm_setup_work_t *dth;
8334 dth = &comm->dth[th];
8336 ns1 = nsend + dth->nsend_zone;
8337 if (ns1 > ind->nalloc)
8339 ind->nalloc = over_alloc_dd(ns1);
8340 srenew(ind->index, ind->nalloc);
8342 if (ns1 > comm->nalloc_int)
8344 comm->nalloc_int = over_alloc_dd(ns1);
8345 srenew(comm->buf_int, comm->nalloc_int);
8347 if (ns1 > comm->vbuf.nalloc)
8349 comm->vbuf.nalloc = over_alloc_dd(ns1);
8350 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8353 for (i = 0; i < dth->nsend_zone; i++)
8355 ind->index[nsend] = dth->ind.index[i];
8356 comm->buf_int[nsend] = dth->ibuf[i];
8357 copy_rvec(dth->vbuf.v[i],
8358 comm->vbuf.v[nsend]);
8362 ind->nsend[zone] += dth->nsend_zone;
8365 /* Clear the counts in case we do not have pbc */
8366 for (zone = nzone_send; zone < nzone; zone++)
8368 ind->nsend[zone] = 0;
8370 ind->nsend[nzone] = nsend;
8371 ind->nsend[nzone+1] = nat;
8372 /* Communicate the number of cg's and atoms to receive */
8373 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8374 ind->nsend, nzone+2,
8375 ind->nrecv, nzone+2);
8377 /* The rvec buffer is also required for atom buffers of size nsend
8378 * in dd_move_x and dd_move_f.
8380 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8384 /* We can receive in place if only the last zone is not empty */
8385 for (zone = 0; zone < nzone-1; zone++)
8387 if (ind->nrecv[zone] > 0)
8389 cd->bInPlace = FALSE;
8394 /* The int buffer is only required here for the cg indices */
8395 if (ind->nrecv[nzone] > comm->nalloc_int2)
8397 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8398 srenew(comm->buf_int2, comm->nalloc_int2);
8400 /* The rvec buffer is also required for atom buffers
8401 * of size nrecv in dd_move_x and dd_move_f.
8403 i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8404 vec_rvec_check_alloc(&comm->vbuf2, i);
8408 /* Make space for the global cg indices */
8409 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8410 || dd->cg_nalloc == 0)
8412 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8413 srenew(index_gl, dd->cg_nalloc);
8414 srenew(cgindex, dd->cg_nalloc+1);
8416 /* Communicate the global cg indices */
8419 recv_i = index_gl + pos_cg;
8423 recv_i = comm->buf_int2;
8425 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8426 comm->buf_int, nsend,
8427 recv_i, ind->nrecv[nzone]);
8429 /* Make space for cg_cm */
8430 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8431 if (fr->cutoff_scheme == ecutsGROUP)
8439 /* Communicate cg_cm */
8442 recv_vr = cg_cm + pos_cg;
8446 recv_vr = comm->vbuf2.v;
8448 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8449 comm->vbuf.v, nsend,
8450 recv_vr, ind->nrecv[nzone]);
8452 /* Make the charge group index */
8455 zone = (p == 0 ? 0 : nzone - 1);
8456 while (zone < nzone)
8458 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8460 cg_gl = index_gl[pos_cg];
8461 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8462 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8463 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8466 /* Update the charge group presence,
8467 * so we can use it in the next pass of the loop.
8469 comm->bLocalCG[cg_gl] = TRUE;
8475 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8478 zone_cg_range[nzone+zone] = pos_cg;
8483 /* This part of the code is never executed with bBondComm. */
8484 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8485 index_gl, recv_i, cg_cm, recv_vr,
8486 cgindex, fr->cginfo_mb, fr->cginfo);
8487 pos_cg += ind->nrecv[nzone];
8489 nat_tot += ind->nrecv[nzone+1];
8493 /* Store the atom block for easy copying of communication buffers */
8494 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8498 dd->index_gl = index_gl;
8499 dd->cgindex = cgindex;
8501 dd->ncg_tot = zone_cg_range[zones->n];
8502 dd->nat_tot = nat_tot;
8503 comm->nat[ddnatHOME] = dd->nat_home;
8504 for (i = ddnatZONE; i < ddnatNR; i++)
8506 comm->nat[i] = dd->nat_tot;
8511 /* We don't need to update cginfo, since that was alrady done above.
8512 * So we pass NULL for the forcerec.
8514 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8515 NULL, comm->bLocalCG);
8520 fprintf(debug, "Finished setting up DD communication, zones:");
8521 for (c = 0; c < zones->n; c++)
8523 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8525 fprintf(debug, "\n");
8529 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8533 for (c = 0; c < zones->nizone; c++)
8535 zones->izone[c].cg1 = zones->cg_range[c+1];
8536 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8537 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8541 static void set_zones_size(gmx_domdec_t *dd,
8542 matrix box, const gmx_ddbox_t *ddbox,
8543 int zone_start, int zone_end)
8545 gmx_domdec_comm_t *comm;
8546 gmx_domdec_zones_t *zones;
8548 int z, zi, zj0, zj1, d, dim;
8551 real size_j, add_tric;
8556 zones = &comm->zones;
8558 /* Do we need to determine extra distances for multi-body bondeds? */
8559 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8561 for (z = zone_start; z < zone_end; z++)
8563 /* Copy cell limits to zone limits.
8564 * Valid for non-DD dims and non-shifted dims.
8566 copy_rvec(comm->cell_x0, zones->size[z].x0);
8567 copy_rvec(comm->cell_x1, zones->size[z].x1);
8570 for (d = 0; d < dd->ndim; d++)
8574 for (z = 0; z < zones->n; z++)
8576 /* With a staggered grid we have different sizes
8577 * for non-shifted dimensions.
8579 if (dd->bGridJump && zones->shift[z][dim] == 0)
8583 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8584 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8588 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8589 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8595 rcmbs = comm->cutoff_mbody;
8596 if (ddbox->tric_dir[dim])
8598 rcs /= ddbox->skew_fac[dim];
8599 rcmbs /= ddbox->skew_fac[dim];
8602 /* Set the lower limit for the shifted zone dimensions */
8603 for (z = zone_start; z < zone_end; z++)
8605 if (zones->shift[z][dim] > 0)
8608 if (!dd->bGridJump || d == 0)
8610 zones->size[z].x0[dim] = comm->cell_x1[dim];
8611 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8615 /* Here we take the lower limit of the zone from
8616 * the lowest domain of the zone below.
8620 zones->size[z].x0[dim] =
8621 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8627 zones->size[z].x0[dim] =
8628 zones->size[zone_perm[2][z-4]].x0[dim];
8632 zones->size[z].x0[dim] =
8633 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8636 /* A temporary limit, is updated below */
8637 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8641 for (zi = 0; zi < zones->nizone; zi++)
8643 if (zones->shift[zi][dim] == 0)
8645 /* This takes the whole zone into account.
8646 * With multiple pulses this will lead
8647 * to a larger zone then strictly necessary.
8649 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8650 zones->size[zi].x1[dim]+rcmbs);
8658 /* Loop over the i-zones to set the upper limit of each
8661 for (zi = 0; zi < zones->nizone; zi++)
8663 if (zones->shift[zi][dim] == 0)
8665 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8667 if (zones->shift[z][dim] > 0)
8669 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8670 zones->size[zi].x1[dim]+rcs);
8677 for (z = zone_start; z < zone_end; z++)
8679 /* Initialization only required to keep the compiler happy */
8680 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8683 /* To determine the bounding box for a zone we need to find
8684 * the extreme corners of 4, 2 or 1 corners.
8686 nc = 1 << (ddbox->npbcdim - 1);
8688 for (c = 0; c < nc; c++)
8690 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8694 corner[YY] = zones->size[z].x0[YY];
8698 corner[YY] = zones->size[z].x1[YY];
8702 corner[ZZ] = zones->size[z].x0[ZZ];
8706 corner[ZZ] = zones->size[z].x1[ZZ];
8708 if (dd->ndim == 1 && box[ZZ][YY] != 0)
8710 /* With 1D domain decomposition the cg's are not in
8711 * the triclinic box, but triclinic x-y and rectangular y-z.
8712 * Shift y back, so it will later end up at 0.
8714 corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
8716 /* Apply the triclinic couplings */
8717 assert(ddbox->npbcdim <= DIM);
8718 for (i = YY; i < ddbox->npbcdim; i++)
8720 for (j = XX; j < i; j++)
8722 corner[j] += corner[i]*box[i][j]/box[i][i];
8727 copy_rvec(corner, corner_min);
8728 copy_rvec(corner, corner_max);
8732 for (i = 0; i < DIM; i++)
8734 corner_min[i] = min(corner_min[i], corner[i]);
8735 corner_max[i] = max(corner_max[i], corner[i]);
8739 /* Copy the extreme cornes without offset along x */
8740 for (i = 0; i < DIM; i++)
8742 zones->size[z].bb_x0[i] = corner_min[i];
8743 zones->size[z].bb_x1[i] = corner_max[i];
8745 /* Add the offset along x */
8746 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8747 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8750 if (zone_start == 0)
8753 for (dim = 0; dim < DIM; dim++)
8755 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8757 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
8762 for (z = zone_start; z < zone_end; z++)
8764 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8766 zones->size[z].x0[XX], zones->size[z].x1[XX],
8767 zones->size[z].x0[YY], zones->size[z].x1[YY],
8768 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8769 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8771 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8772 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8773 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8778 static int comp_cgsort(const void *a, const void *b)
8782 gmx_cgsort_t *cga, *cgb;
8783 cga = (gmx_cgsort_t *)a;
8784 cgb = (gmx_cgsort_t *)b;
8786 comp = cga->nsc - cgb->nsc;
8789 comp = cga->ind_gl - cgb->ind_gl;
8795 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8800 /* Order the data */
8801 for (i = 0; i < n; i++)
8803 buf[i] = a[sort[i].ind];
8806 /* Copy back to the original array */
8807 for (i = 0; i < n; i++)
8813 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8818 /* Order the data */
8819 for (i = 0; i < n; i++)
8821 copy_rvec(v[sort[i].ind], buf[i]);
8824 /* Copy back to the original array */
8825 for (i = 0; i < n; i++)
8827 copy_rvec(buf[i], v[i]);
8831 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8834 int a, atot, cg, cg0, cg1, i;
8836 if (cgindex == NULL)
8838 /* Avoid the useless loop of the atoms within a cg */
8839 order_vec_cg(ncg, sort, v, buf);
8844 /* Order the data */
8846 for (cg = 0; cg < ncg; cg++)
8848 cg0 = cgindex[sort[cg].ind];
8849 cg1 = cgindex[sort[cg].ind+1];
8850 for (i = cg0; i < cg1; i++)
8852 copy_rvec(v[i], buf[a]);
8858 /* Copy back to the original array */
8859 for (a = 0; a < atot; a++)
8861 copy_rvec(buf[a], v[a]);
8865 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8866 int nsort_new, gmx_cgsort_t *sort_new,
8867 gmx_cgsort_t *sort1)
8871 /* The new indices are not very ordered, so we qsort them */
8872 gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8874 /* sort2 is already ordered, so now we can merge the two arrays */
8878 while (i2 < nsort2 || i_new < nsort_new)
8882 sort1[i1++] = sort_new[i_new++];
8884 else if (i_new == nsort_new)
8886 sort1[i1++] = sort2[i2++];
8888 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8889 (sort2[i2].nsc == sort_new[i_new].nsc &&
8890 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8892 sort1[i1++] = sort2[i2++];
8896 sort1[i1++] = sort_new[i_new++];
8901 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8903 gmx_domdec_sort_t *sort;
8904 gmx_cgsort_t *cgsort, *sort_i;
8905 int ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
8906 int sort_last, sort_skip;
8908 sort = dd->comm->sort;
8910 a = fr->ns.grid->cell_index;
8912 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns.grid->ncells;
8914 if (ncg_home_old >= 0)
8916 /* The charge groups that remained in the same ns grid cell
8917 * are completely ordered. So we can sort efficiently by sorting
8918 * the charge groups that did move into the stationary list.
8923 for (i = 0; i < dd->ncg_home; i++)
8925 /* Check if this cg did not move to another node */
8928 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8930 /* This cg is new on this node or moved ns grid cell */
8931 if (nsort_new >= sort->sort_new_nalloc)
8933 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8934 srenew(sort->sort_new, sort->sort_new_nalloc);
8936 sort_i = &(sort->sort_new[nsort_new++]);
8940 /* This cg did not move */
8941 sort_i = &(sort->sort2[nsort2++]);
8943 /* Sort on the ns grid cell indices
8944 * and the global topology index.
8945 * index_gl is irrelevant with cell ns,
8946 * but we set it here anyhow to avoid a conditional.
8949 sort_i->ind_gl = dd->index_gl[i];
8956 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8959 /* Sort efficiently */
8960 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8965 cgsort = sort->sort;
8967 for (i = 0; i < dd->ncg_home; i++)
8969 /* Sort on the ns grid cell indices
8970 * and the global topology index
8972 cgsort[i].nsc = a[i];
8973 cgsort[i].ind_gl = dd->index_gl[i];
8975 if (cgsort[i].nsc < moved)
8982 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
8984 /* Determine the order of the charge groups using qsort */
8985 gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
8991 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
8994 int ncg_new, i, *a, na;
8996 sort = dd->comm->sort->sort;
8998 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
9001 for (i = 0; i < na; i++)
9005 sort[ncg_new].ind = a[i];
9013 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
9016 gmx_domdec_sort_t *sort;
9017 gmx_cgsort_t *cgsort, *sort_i;
9019 int ncg_new, i, *ibuf, cgsize;
9022 sort = dd->comm->sort;
9024 if (dd->ncg_home > sort->sort_nalloc)
9026 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
9027 srenew(sort->sort, sort->sort_nalloc);
9028 srenew(sort->sort2, sort->sort_nalloc);
9030 cgsort = sort->sort;
9032 switch (fr->cutoff_scheme)
9035 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
9038 ncg_new = dd_sort_order_nbnxn(dd, fr);
9041 gmx_incons("unimplemented");
9045 /* We alloc with the old size, since cgindex is still old */
9046 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9047 vbuf = dd->comm->vbuf.v;
9051 cgindex = dd->cgindex;
9058 /* Remove the charge groups which are no longer at home here */
9059 dd->ncg_home = ncg_new;
9062 fprintf(debug, "Set the new home charge group count to %d\n",
9066 /* Reorder the state */
9067 for (i = 0; i < estNR; i++)
9069 if (EST_DISTR(i) && (state->flags & (1<<i)))
9074 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
9077 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
9080 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->sd_X, vbuf);
9083 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
9087 case estDISRE_INITF:
9088 case estDISRE_RM3TAV:
9089 case estORIRE_INITF:
9091 /* No ordering required */
9094 gmx_incons("Unknown state entry encountered in dd_sort_state");
9099 if (fr->cutoff_scheme == ecutsGROUP)
9102 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9105 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9107 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9108 srenew(sort->ibuf, sort->ibuf_nalloc);
9111 /* Reorder the global cg index */
9112 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9113 /* Reorder the cginfo */
9114 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9115 /* Rebuild the local cg index */
9119 for (i = 0; i < dd->ncg_home; i++)
9121 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9122 ibuf[i+1] = ibuf[i] + cgsize;
9124 for (i = 0; i < dd->ncg_home+1; i++)
9126 dd->cgindex[i] = ibuf[i];
9131 for (i = 0; i < dd->ncg_home+1; i++)
9136 /* Set the home atom number */
9137 dd->nat_home = dd->cgindex[dd->ncg_home];
9139 if (fr->cutoff_scheme == ecutsVERLET)
9141 /* The atoms are now exactly in grid order, update the grid order */
9142 nbnxn_set_atomorder(fr->nbv->nbs);
9146 /* Copy the sorted ns cell indices back to the ns grid struct */
9147 for (i = 0; i < dd->ncg_home; i++)
9149 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
9151 fr->ns.grid->nr = dd->ncg_home;
9155 static void add_dd_statistics(gmx_domdec_t *dd)
9157 gmx_domdec_comm_t *comm;
9162 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9164 comm->sum_nat[ddnat-ddnatZONE] +=
9165 comm->nat[ddnat] - comm->nat[ddnat-1];
9170 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9172 gmx_domdec_comm_t *comm;
9177 /* Reset all the statistics and counters for total run counting */
9178 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9180 comm->sum_nat[ddnat-ddnatZONE] = 0;
9184 comm->load_step = 0;
9187 clear_ivec(comm->load_lim);
9192 void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
9194 gmx_domdec_comm_t *comm;
9198 comm = cr->dd->comm;
9200 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9207 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9209 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9211 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9216 " av. #atoms communicated per step for force: %d x %.1f\n",
9220 if (cr->dd->vsite_comm)
9223 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9224 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9229 if (cr->dd->constraint_comm)
9232 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9233 1 + ir->nLincsIter, av);
9237 gmx_incons(" Unknown type for DD statistics");
9240 fprintf(fplog, "\n");
9242 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9244 print_dd_load_av(fplog, cr->dd);
9248 void dd_partition_system(FILE *fplog,
9251 gmx_bool bMasterState,
9253 t_state *state_global,
9254 gmx_mtop_t *top_global,
9256 t_state *state_local,
9259 gmx_localtop_t *top_local,
9262 gmx_shellfc_t shellfc,
9263 gmx_constr_t constr,
9265 gmx_wallcycle_t wcycle,
9269 gmx_domdec_comm_t *comm;
9270 gmx_ddbox_t ddbox = {0};
9272 gmx_int64_t step_pcoupl;
9273 rvec cell_ns_x0, cell_ns_x1;
9274 int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9275 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
9276 gmx_bool bRedist, bSortCG, bResortAll;
9277 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9284 bBoxChanged = (bMasterState || DEFORM(*ir));
9285 if (ir->epc != epcNO)
9287 /* With nstpcouple > 1 pressure coupling happens.
9288 * one step after calculating the pressure.
9289 * Box scaling happens at the end of the MD step,
9290 * after the DD partitioning.
9291 * We therefore have to do DLB in the first partitioning
9292 * after an MD step where P-coupling occured.
9293 * We need to determine the last step in which p-coupling occurred.
9294 * MRS -- need to validate this for vv?
9299 step_pcoupl = step - 1;
9303 step_pcoupl = ((step - 1)/n)*n + 1;
9305 if (step_pcoupl >= comm->partition_step)
9311 bNStGlobalComm = (step % nstglobalcomm == 0);
9313 if (!comm->bDynLoadBal)
9319 /* Should we do dynamic load balacing this step?
9320 * Since it requires (possibly expensive) global communication,
9321 * we might want to do DLB less frequently.
9323 if (bBoxChanged || ir->epc != epcNO)
9325 bDoDLB = bBoxChanged;
9329 bDoDLB = bNStGlobalComm;
9333 /* Check if we have recorded loads on the nodes */
9334 if (comm->bRecordLoad && dd_load_count(comm))
9336 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
9338 /* Check if we should use DLB at the second partitioning
9339 * and every 100 partitionings,
9340 * so the extra communication cost is negligible.
9342 n = max(100, nstglobalcomm);
9343 bCheckDLB = (comm->n_load_collect == 0 ||
9344 comm->n_load_have % n == n-1);
9351 /* Print load every nstlog, first and last step to the log file */
9352 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9353 comm->n_load_collect == 0 ||
9355 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9357 /* Avoid extra communication due to verbose screen output
9358 * when nstglobalcomm is set.
9360 if (bDoDLB || bLogLoad || bCheckDLB ||
9361 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9363 get_load_distribution(dd, wcycle);
9368 dd_print_load(fplog, dd, step-1);
9372 dd_print_load_verbose(dd);
9375 comm->n_load_collect++;
9379 /* Since the timings are node dependent, the master decides */
9383 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
9386 fprintf(debug, "step %s, imb loss %f\n",
9387 gmx_step_str(step, sbuf),
9388 dd_force_imb_perf_loss(dd));
9391 dd_bcast(dd, sizeof(bTurnOnDLB), &bTurnOnDLB);
9394 turn_on_dlb(fplog, cr, step);
9399 comm->n_load_have++;
9402 cgs_gl = &comm->cgs_gl;
9407 /* Clear the old state */
9408 clear_dd_indices(dd, 0, 0);
9411 set_ddbox(dd, bMasterState, cr, ir, state_global->box,
9412 TRUE, cgs_gl, state_global->x, &ddbox);
9414 get_cg_distribution(fplog, step, dd, cgs_gl,
9415 state_global->box, &ddbox, state_global->x);
9417 dd_distribute_state(dd, cgs_gl,
9418 state_global, state_local, f);
9420 dd_make_local_cgs(dd, &top_local->cgs);
9422 /* Ensure that we have space for the new distribution */
9423 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9425 if (fr->cutoff_scheme == ecutsGROUP)
9427 calc_cgcm(fplog, 0, dd->ncg_home,
9428 &top_local->cgs, state_local->x, fr->cg_cm);
9431 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9433 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9435 else if (state_local->ddp_count != dd->ddp_count)
9437 if (state_local->ddp_count > dd->ddp_count)
9439 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9442 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9444 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9447 /* Clear the old state */
9448 clear_dd_indices(dd, 0, 0);
9450 /* Build the new indices */
9451 rebuild_cgindex(dd, cgs_gl->index, state_local);
9452 make_dd_indices(dd, cgs_gl->index, 0);
9453 ncgindex_set = dd->ncg_home;
9455 if (fr->cutoff_scheme == ecutsGROUP)
9457 /* Redetermine the cg COMs */
9458 calc_cgcm(fplog, 0, dd->ncg_home,
9459 &top_local->cgs, state_local->x, fr->cg_cm);
9462 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9464 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9466 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9467 TRUE, &top_local->cgs, state_local->x, &ddbox);
9469 bRedist = comm->bDynLoadBal;
9473 /* We have the full state, only redistribute the cgs */
9475 /* Clear the non-home indices */
9476 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9479 /* Avoid global communication for dim's without pbc and -gcom */
9480 if (!bNStGlobalComm)
9482 copy_rvec(comm->box0, ddbox.box0 );
9483 copy_rvec(comm->box_size, ddbox.box_size);
9485 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9486 bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
9491 /* For dim's without pbc and -gcom */
9492 copy_rvec(ddbox.box0, comm->box0 );
9493 copy_rvec(ddbox.box_size, comm->box_size);
9495 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9498 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9500 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9503 /* Check if we should sort the charge groups */
9504 if (comm->nstSortCG > 0)
9506 bSortCG = (bMasterState ||
9507 (bRedist && (step % comm->nstSortCG == 0)));
9514 ncg_home_old = dd->ncg_home;
9519 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9521 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9523 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9525 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9528 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9530 &comm->cell_x0, &comm->cell_x1,
9531 dd->ncg_home, fr->cg_cm,
9532 cell_ns_x0, cell_ns_x1, &grid_density);
9536 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9539 switch (fr->cutoff_scheme)
9542 copy_ivec(fr->ns.grid->n, ncells_old);
9543 grid_first(fplog, fr->ns.grid, dd, &ddbox,
9544 state_local->box, cell_ns_x0, cell_ns_x1,
9545 fr->rlistlong, grid_density);
9548 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9551 gmx_incons("unimplemented");
9553 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9554 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9558 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9560 /* Sort the state on charge group position.
9561 * This enables exact restarts from this step.
9562 * It also improves performance by about 15% with larger numbers
9563 * of atoms per node.
9566 /* Fill the ns grid with the home cell,
9567 * so we can sort with the indices.
9569 set_zones_ncg_home(dd);
9571 switch (fr->cutoff_scheme)
9574 set_zones_size(dd, state_local->box, &ddbox, 0, 1);
9576 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9578 comm->zones.size[0].bb_x0,
9579 comm->zones.size[0].bb_x1,
9581 comm->zones.dens_zone0,
9584 ncg_moved, bRedist ? comm->moved : NULL,
9585 fr->nbv->grp[eintLocal].kernel_type,
9586 fr->nbv->grp[eintLocal].nbat);
9588 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9591 fill_grid(&comm->zones, fr->ns.grid, dd->ncg_home,
9592 0, dd->ncg_home, fr->cg_cm);
9594 copy_ivec(fr->ns.grid->n, ncells_new);
9597 gmx_incons("unimplemented");
9600 bResortAll = bMasterState;
9602 /* Check if we can user the old order and ns grid cell indices
9603 * of the charge groups to sort the charge groups efficiently.
9605 if (ncells_new[XX] != ncells_old[XX] ||
9606 ncells_new[YY] != ncells_old[YY] ||
9607 ncells_new[ZZ] != ncells_old[ZZ])
9614 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9615 gmx_step_str(step, sbuf), dd->ncg_home);
9617 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9618 bResortAll ? -1 : ncg_home_old);
9619 /* Rebuild all the indices */
9620 ga2la_clear(dd->ga2la);
9623 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9626 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9628 /* Setup up the communication and communicate the coordinates */
9629 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9631 /* Set the indices */
9632 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9634 /* Set the charge group boundaries for neighbor searching */
9635 set_cg_boundaries(&comm->zones);
9637 if (fr->cutoff_scheme == ecutsVERLET)
9639 set_zones_size(dd, state_local->box, &ddbox,
9640 bSortCG ? 1 : 0, comm->zones.n);
9643 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9646 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9647 -1,state_local->x,state_local->box);
9650 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9652 /* Extract a local topology from the global topology */
9653 for (i = 0; i < dd->ndim; i++)
9655 np[dd->dim[i]] = comm->cd[i].np;
9657 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9658 comm->cellsize_min, np,
9660 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
9661 vsite, top_global, top_local);
9663 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9665 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9667 /* Set up the special atom communication */
9668 n = comm->nat[ddnatZONE];
9669 for (i = ddnatZONE+1; i < ddnatNR; i++)
9674 if (vsite && vsite->n_intercg_vsite)
9676 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9680 if (dd->bInterCGcons || dd->bInterCGsettles)
9682 /* Only for inter-cg constraints we need special code */
9683 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9684 constr, ir->nProjOrder,
9685 top_local->idef.il);
9689 gmx_incons("Unknown special atom type setup");
9694 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9696 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9698 /* Make space for the extra coordinates for virtual site
9699 * or constraint communication.
9701 state_local->natoms = comm->nat[ddnatNR-1];
9702 if (state_local->natoms > state_local->nalloc)
9704 dd_realloc_state(state_local, f, state_local->natoms);
9707 if (fr->bF_NoVirSum)
9709 if (vsite && vsite->n_intercg_vsite)
9711 nat_f_novirsum = comm->nat[ddnatVSITE];
9715 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9717 nat_f_novirsum = dd->nat_tot;
9721 nat_f_novirsum = dd->nat_home;
9730 /* Set the number of atoms required for the force calculation.
9731 * Forces need to be constrained when using a twin-range setup
9732 * or with energy minimization. For simple simulations we could
9733 * avoid some allocation, zeroing and copying, but this is
9734 * probably not worth the complications ande checking.
9736 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9737 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9739 /* We make the all mdatoms up to nat_tot_con.
9740 * We could save some work by only setting invmass
9741 * between nat_tot and nat_tot_con.
9743 /* This call also sets the new number of home particles to dd->nat_home */
9744 atoms2md(top_global, ir,
9745 comm->nat[ddnatCON], dd->gatindex, dd->nat_home, mdatoms);
9747 /* Now we have the charges we can sort the FE interactions */
9748 dd_sort_local_top(dd, mdatoms, top_local);
9752 /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
9753 split_vsites_over_threads(top_local->idef.il, mdatoms, FALSE, vsite);
9758 /* Make the local shell stuff, currently no communication is done */
9759 make_local_shells(cr, mdatoms, shellfc);
9762 if (ir->implicit_solvent)
9764 make_local_gb(cr, fr->born, ir->gb_algorithm);
9767 setup_bonded_threading(fr, &top_local->idef);
9769 if (!(cr->duty & DUTY_PME))
9771 /* Send the charges and/or c6/sigmas to our PME only node */
9772 gmx_pme_send_parameters(cr, mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
9773 mdatoms->chargeA, mdatoms->chargeB,
9774 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
9775 mdatoms->sigmaA, mdatoms->sigmaB,
9776 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9781 set_constraints(constr, top_local, ir, mdatoms, cr);
9784 if (ir->ePull != epullNO)
9786 /* Update the local pull groups */
9787 dd_make_local_pull_groups(dd, ir->pull, mdatoms);
9792 /* Update the local rotation groups */
9793 dd_make_local_rotation_groups(dd, ir->rot);
9796 if (ir->eSwapCoords != eswapNO)
9798 /* Update the local groups needed for ion swapping */
9799 dd_make_local_swap_groups(dd, ir->swap);
9802 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
9803 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
9805 add_dd_statistics(dd);
9807 /* Make sure we only count the cycles for this DD partitioning */
9808 clear_dd_cycle_counts(dd);
9810 /* Because the order of the atoms might have changed since
9811 * the last vsite construction, we need to communicate the constructing
9812 * atom coordinates again (for spreading the forces this MD step).
9814 dd_move_x_vsites(dd, state_local->box, state_local->x);
9816 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9818 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9820 dd_move_x(dd, state_local->box, state_local->x);
9821 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9822 -1, state_local->x, state_local->box);
9825 /* Store the partitioning step */
9826 comm->partition_step = step;
9828 /* Increase the DD partitioning counter */
9830 /* The state currently matches this DD partitioning count, store it */
9831 state_local->ddp_count = dd->ddp_count;
9834 /* The DD master node knows the complete cg distribution,
9835 * store the count so we can possibly skip the cg info communication.
9837 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9840 if (comm->DD_debug > 0)
9842 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9843 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9844 "after partitioning");