2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
47 #include "gromacs/legacyheaders/typedefs.h"
48 #include "gromacs/legacyheaders/network.h"
49 #include "gromacs/math/vec.h"
50 #include "gromacs/legacyheaders/domdec.h"
51 #include "gromacs/legacyheaders/domdec_network.h"
52 #include "gromacs/legacyheaders/nrnb.h"
53 #include "gromacs/legacyheaders/chargegroup.h"
54 #include "gromacs/legacyheaders/constr.h"
55 #include "gromacs/legacyheaders/mdatoms.h"
56 #include "gromacs/legacyheaders/names.h"
57 #include "gromacs/legacyheaders/force.h"
58 #include "gromacs/legacyheaders/pme.h"
59 #include "gromacs/legacyheaders/mdrun.h"
60 #include "gromacs/legacyheaders/nsgrid.h"
61 #include "gromacs/legacyheaders/shellfc.h"
62 #include "gromacs/topology/mtop_util.h"
63 #include "gromacs/legacyheaders/gmx_ga2la.h"
64 #include "gromacs/legacyheaders/macros.h"
65 #include "nbnxn_search.h"
66 #include "gromacs/legacyheaders/bondf.h"
67 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
68 #include "gromacs/legacyheaders/gpu_utils.h"
70 #include "gromacs/utility/futil.h"
71 #include "gromacs/fileio/gmxfio.h"
72 #include "gromacs/fileio/pdbio.h"
73 #include "gromacs/imd/imd.h"
74 #include "gromacs/mdlib/nb_verlet.h"
75 #include "gromacs/pbcutil/ishift.h"
76 #include "gromacs/pbcutil/pbc.h"
77 #include "gromacs/pulling/pull.h"
78 #include "gromacs/pulling/pull_rotation.h"
79 #include "gromacs/swap/swapcoords.h"
80 #include "gromacs/timing/wallcycle.h"
81 #include "gromacs/utility/basenetwork.h"
82 #include "gromacs/utility/fatalerror.h"
83 #include "gromacs/utility/gmxmpi.h"
84 #include "gromacs/utility/qsort_threadsafe.h"
85 #include "gromacs/utility/smalloc.h"
87 #define DDRANK(dd, rank) (rank)
88 #define DDMASTERRANK(dd) (dd->masterrank)
90 typedef struct gmx_domdec_master
92 /* The cell boundaries */
94 /* The global charge group division */
95 int *ncg; /* Number of home charge groups for each node */
96 int *index; /* Index of nnodes+1 into cg */
97 int *cg; /* Global charge group index */
98 int *nat; /* Number of home atoms for each node. */
99 int *ibuf; /* Buffer for communication */
100 rvec *vbuf; /* Buffer for state scattering and gathering */
101 } gmx_domdec_master_t;
105 /* The numbers of charge groups to send and receive for each cell
106 * that requires communication, the last entry contains the total
107 * number of atoms that needs to be communicated.
109 int nsend[DD_MAXIZONE+2];
110 int nrecv[DD_MAXIZONE+2];
111 /* The charge groups to send */
114 /* The atom range for non-in-place communication */
115 int cell2at0[DD_MAXIZONE];
116 int cell2at1[DD_MAXIZONE];
121 int np; /* Number of grid pulses in this dimension */
122 int np_dlb; /* For dlb, for use with edlbAUTO */
123 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
125 gmx_bool bInPlace; /* Can we communicate in place? */
126 } gmx_domdec_comm_dim_t;
130 gmx_bool *bCellMin; /* Temp. var.: is this cell size at the limit */
131 real *cell_f; /* State var.: cell boundaries, box relative */
132 real *old_cell_f; /* Temp. var.: old cell size */
133 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
134 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
135 real *bound_min; /* Temp. var.: lower limit for cell boundary */
136 real *bound_max; /* Temp. var.: upper limit for cell boundary */
137 gmx_bool bLimited; /* State var.: is DLB limited in this dim and row */
138 real *buf_ncd; /* Temp. var. */
141 #define DD_NLOAD_MAX 9
143 /* Here floats are accurate enough, since these variables
144 * only influence the load balancing, not the actual MD results.
171 gmx_cgsort_t *sort_new;
183 /* This enum determines the order of the coordinates.
184 * ddnatHOME and ddnatZONE should be first and second,
185 * the others can be ordered as wanted.
188 ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR
192 edlbAUTO, edlbNO, edlbYES, edlbNR
194 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
198 int dim; /* The dimension */
199 gmx_bool dim_match; /* Tells if DD and PME dims match */
200 int nslab; /* The number of PME slabs in this dimension */
201 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
202 int *pp_min; /* The minimum pp node location, size nslab */
203 int *pp_max; /* The maximum pp node location,size nslab */
204 int maxshift; /* The maximum shift for coordinate redistribution in PME */
209 real min0; /* The minimum bottom of this zone */
210 real max1; /* The maximum top of this zone */
211 real min1; /* The minimum top of this zone */
212 real mch0; /* The maximum bottom communicaton height for this zone */
213 real mch1; /* The maximum top communicaton height for this zone */
214 real p1_0; /* The bottom value of the first cell in this zone */
215 real p1_1; /* The top value of the first cell in this zone */
220 gmx_domdec_ind_t ind;
227 } dd_comm_setup_work_t;
229 typedef struct gmx_domdec_comm
231 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
232 * unless stated otherwise.
235 /* The number of decomposition dimensions for PME, 0: no PME */
237 /* The number of nodes doing PME (PP/PME or only PME) */
241 /* The communication setup including the PME only nodes */
242 gmx_bool bCartesianPP_PME;
245 int *pmenodes; /* size npmenodes */
246 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
247 * but with bCartesianPP_PME */
248 gmx_ddpme_t ddpme[2];
250 /* The DD particle-particle nodes only */
251 gmx_bool bCartesianPP;
252 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
254 /* The global charge groups */
257 /* Should we sort the cgs */
259 gmx_domdec_sort_t *sort;
261 /* Are there charge groups? */
264 /* Are there bonded and multi-body interactions between charge groups? */
265 gmx_bool bInterCGBondeds;
266 gmx_bool bInterCGMultiBody;
268 /* Data for the optional bonded interaction atom communication range */
275 /* Are we actually using DLB? */
276 gmx_bool bDynLoadBal;
278 /* Cell sizes for static load balancing, first index cartesian */
281 /* The width of the communicated boundaries */
284 /* The minimum cell size (including triclinic correction) */
286 /* For dlb, for use with edlbAUTO */
287 rvec cellsize_min_dlb;
288 /* The lower limit for the DD cell size with DLB */
290 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
291 gmx_bool bVacDLBNoLimit;
293 /* With PME load balancing we set limits on DLB */
294 gmx_bool bPMELoadBalDLBLimits;
295 /* DLB needs to take into account that we want to allow this maximum
296 * cut-off (for PME load balancing), this could limit cell boundaries.
298 real PMELoadBal_max_cutoff;
300 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
302 /* box0 and box_size are required with dim's without pbc and -gcom */
306 /* The cell boundaries */
310 /* The old location of the cell boundaries, to check cg displacements */
314 /* The communication setup and charge group boundaries for the zones */
315 gmx_domdec_zones_t zones;
317 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
318 * cell boundaries of neighboring cells for dynamic load balancing.
320 gmx_ddzone_t zone_d1[2];
321 gmx_ddzone_t zone_d2[2][2];
323 /* The coordinate/force communication setup and indices */
324 gmx_domdec_comm_dim_t cd[DIM];
325 /* The maximum number of cells to communicate with in one dimension */
328 /* Which cg distribution is stored on the master node */
329 int master_cg_ddp_count;
331 /* The number of cg's received from the direct neighbors */
332 int zone_ncg1[DD_MAXZONE];
334 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
337 /* Array for signalling if atoms have moved to another domain */
341 /* Communication buffer for general use */
345 /* Communication buffer for general use */
348 /* Temporary storage for thread parallel communication setup */
350 dd_comm_setup_work_t *dth;
352 /* Communication buffers only used with multiple grid pulses */
357 /* Communication buffers for local redistribution */
359 int cggl_flag_nalloc[DIM*2];
361 int cgcm_state_nalloc[DIM*2];
363 /* Cell sizes for dynamic load balancing */
364 gmx_domdec_root_t **root;
368 real cell_f_max0[DIM];
369 real cell_f_min1[DIM];
371 /* Stuff for load communication */
372 gmx_bool bRecordLoad;
373 gmx_domdec_load_t *load;
374 int nrank_gpu_shared;
376 MPI_Comm *mpi_comm_load;
377 MPI_Comm mpi_comm_gpu_shared;
380 /* Maximum DLB scaling per load balancing step in percent */
384 float cycl[ddCyclNr];
385 int cycl_n[ddCyclNr];
386 float cycl_max[ddCyclNr];
387 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
391 /* Have often have did we have load measurements */
393 /* Have often have we collected the load measurements */
397 double sum_nat[ddnatNR-ddnatZONE];
407 /* The last partition step */
408 gmx_int64_t partition_step;
416 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
419 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
420 #define DD_FLAG_NRCG 65535
421 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
422 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
424 /* Zone permutation required to obtain consecutive charge groups
425 * for neighbor searching.
427 static const int zone_perm[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
429 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
430 * components see only j zones with that component 0.
433 /* The DD zone order */
434 static const ivec dd_zo[DD_MAXZONE] =
435 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
440 static const ivec dd_zp3[dd_zp3n] = {{0, 0, 8}, {1, 3, 6}, {2, 5, 6}, {3, 5, 7}};
445 static const ivec dd_zp2[dd_zp2n] = {{0, 0, 4}, {1, 3, 4}};
450 static const ivec dd_zp1[dd_zp1n] = {{0, 0, 2}};
452 /* Factors used to avoid problems due to rounding issues */
453 #define DD_CELL_MARGIN 1.0001
454 #define DD_CELL_MARGIN2 1.00005
455 /* Factor to account for pressure scaling during nstlist steps */
456 #define DD_PRES_SCALE_MARGIN 1.02
458 /* Turn on DLB when the load imbalance causes this amount of total loss.
459 * There is a bit of overhead with DLB and it's difficult to achieve
460 * a load imbalance of less than 2% with DLB.
462 #define DD_PERF_LOSS_DLB_ON 0.02
464 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
465 #define DD_PERF_LOSS_WARN 0.05
467 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
469 /* Use separate MPI send and receive commands
470 * when nnodes <= GMX_DD_NNODES_SENDRECV.
471 * This saves memory (and some copying for small nnodes).
472 * For high parallelization scatter and gather calls are used.
474 #define GMX_DD_NNODES_SENDRECV 4
478 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
480 static void index2xyz(ivec nc,int ind,ivec xyz)
482 xyz[XX] = ind % nc[XX];
483 xyz[YY] = (ind / nc[XX]) % nc[YY];
484 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
488 /* This order is required to minimize the coordinate communication in PME
489 * which uses decomposition in the x direction.
491 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
493 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
495 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
496 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
497 xyz[ZZ] = ind % nc[ZZ];
500 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
505 ddindex = dd_index(dd->nc, c);
506 if (dd->comm->bCartesianPP_PME)
508 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
510 else if (dd->comm->bCartesianPP)
513 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
524 static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox, t_inputrec *ir)
526 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
529 int ddglatnr(gmx_domdec_t *dd, int i)
539 if (i >= dd->comm->nat[ddnatNR-1])
541 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
543 atnr = dd->gatindex[i] + 1;
549 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
551 return &dd->comm->cgs_gl;
554 static void vec_rvec_init(vec_rvec_t *v)
560 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
564 v->nalloc = over_alloc_dd(n);
565 srenew(v->v, v->nalloc);
569 void dd_store_state(gmx_domdec_t *dd, t_state *state)
573 if (state->ddp_count != dd->ddp_count)
575 gmx_incons("The state does not the domain decomposition state");
578 state->ncg_gl = dd->ncg_home;
579 if (state->ncg_gl > state->cg_gl_nalloc)
581 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
582 srenew(state->cg_gl, state->cg_gl_nalloc);
584 for (i = 0; i < state->ncg_gl; i++)
586 state->cg_gl[i] = dd->index_gl[i];
589 state->ddp_count_cg_gl = dd->ddp_count;
592 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
594 return &dd->comm->zones;
597 void dd_get_ns_ranges(gmx_domdec_t *dd, int icg,
598 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
600 gmx_domdec_zones_t *zones;
603 zones = &dd->comm->zones;
606 while (icg >= zones->izone[izone].cg1)
615 else if (izone < zones->nizone)
617 *jcg0 = zones->izone[izone].jcg0;
621 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
622 icg, izone, zones->nizone);
625 *jcg1 = zones->izone[izone].jcg1;
627 for (d = 0; d < dd->ndim; d++)
630 shift0[dim] = zones->izone[izone].shift0[dim];
631 shift1[dim] = zones->izone[izone].shift1[dim];
632 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
634 /* A conservative approach, this can be optimized */
641 int dd_natoms_vsite(gmx_domdec_t *dd)
643 return dd->comm->nat[ddnatVSITE];
646 void dd_get_constraint_range(gmx_domdec_t *dd, int *at_start, int *at_end)
648 *at_start = dd->comm->nat[ddnatCON-1];
649 *at_end = dd->comm->nat[ddnatCON];
652 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
654 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
655 int *index, *cgindex;
656 gmx_domdec_comm_t *comm;
657 gmx_domdec_comm_dim_t *cd;
658 gmx_domdec_ind_t *ind;
659 rvec shift = {0, 0, 0}, *buf, *rbuf;
660 gmx_bool bPBC, bScrew;
664 cgindex = dd->cgindex;
669 nat_tot = dd->nat_home;
670 for (d = 0; d < dd->ndim; d++)
672 bPBC = (dd->ci[dd->dim[d]] == 0);
673 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
676 copy_rvec(box[dd->dim[d]], shift);
679 for (p = 0; p < cd->np; p++)
686 for (i = 0; i < ind->nsend[nzone]; i++)
688 at0 = cgindex[index[i]];
689 at1 = cgindex[index[i]+1];
690 for (j = at0; j < at1; j++)
692 copy_rvec(x[j], buf[n]);
699 for (i = 0; i < ind->nsend[nzone]; i++)
701 at0 = cgindex[index[i]];
702 at1 = cgindex[index[i]+1];
703 for (j = at0; j < at1; j++)
705 /* We need to shift the coordinates */
706 rvec_add(x[j], shift, buf[n]);
713 for (i = 0; i < ind->nsend[nzone]; i++)
715 at0 = cgindex[index[i]];
716 at1 = cgindex[index[i]+1];
717 for (j = at0; j < at1; j++)
720 buf[n][XX] = x[j][XX] + shift[XX];
722 * This operation requires a special shift force
723 * treatment, which is performed in calc_vir.
725 buf[n][YY] = box[YY][YY] - x[j][YY];
726 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
738 rbuf = comm->vbuf2.v;
740 /* Send and receive the coordinates */
741 dd_sendrecv_rvec(dd, d, dddirBackward,
742 buf, ind->nsend[nzone+1],
743 rbuf, ind->nrecv[nzone+1]);
747 for (zone = 0; zone < nzone; zone++)
749 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
751 copy_rvec(rbuf[j], x[i]);
756 nat_tot += ind->nrecv[nzone+1];
762 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
764 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
765 int *index, *cgindex;
766 gmx_domdec_comm_t *comm;
767 gmx_domdec_comm_dim_t *cd;
768 gmx_domdec_ind_t *ind;
772 gmx_bool bPBC, bScrew;
776 cgindex = dd->cgindex;
781 nzone = comm->zones.n/2;
782 nat_tot = dd->nat_tot;
783 for (d = dd->ndim-1; d >= 0; d--)
785 bPBC = (dd->ci[dd->dim[d]] == 0);
786 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
787 if (fshift == NULL && !bScrew)
791 /* Determine which shift vector we need */
797 for (p = cd->np-1; p >= 0; p--)
800 nat_tot -= ind->nrecv[nzone+1];
807 sbuf = comm->vbuf2.v;
809 for (zone = 0; zone < nzone; zone++)
811 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
813 copy_rvec(f[i], sbuf[j]);
818 /* Communicate the forces */
819 dd_sendrecv_rvec(dd, d, dddirForward,
820 sbuf, ind->nrecv[nzone+1],
821 buf, ind->nsend[nzone+1]);
823 /* Add the received forces */
827 for (i = 0; i < ind->nsend[nzone]; i++)
829 at0 = cgindex[index[i]];
830 at1 = cgindex[index[i]+1];
831 for (j = at0; j < at1; j++)
833 rvec_inc(f[j], buf[n]);
840 for (i = 0; i < ind->nsend[nzone]; i++)
842 at0 = cgindex[index[i]];
843 at1 = cgindex[index[i]+1];
844 for (j = at0; j < at1; j++)
846 rvec_inc(f[j], buf[n]);
847 /* Add this force to the shift force */
848 rvec_inc(fshift[is], buf[n]);
855 for (i = 0; i < ind->nsend[nzone]; i++)
857 at0 = cgindex[index[i]];
858 at1 = cgindex[index[i]+1];
859 for (j = at0; j < at1; j++)
861 /* Rotate the force */
862 f[j][XX] += buf[n][XX];
863 f[j][YY] -= buf[n][YY];
864 f[j][ZZ] -= buf[n][ZZ];
867 /* Add this force to the shift force */
868 rvec_inc(fshift[is], buf[n]);
879 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
881 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
882 int *index, *cgindex;
883 gmx_domdec_comm_t *comm;
884 gmx_domdec_comm_dim_t *cd;
885 gmx_domdec_ind_t *ind;
890 cgindex = dd->cgindex;
892 buf = &comm->vbuf.v[0][0];
895 nat_tot = dd->nat_home;
896 for (d = 0; d < dd->ndim; d++)
899 for (p = 0; p < cd->np; p++)
904 for (i = 0; i < ind->nsend[nzone]; i++)
906 at0 = cgindex[index[i]];
907 at1 = cgindex[index[i]+1];
908 for (j = at0; j < at1; j++)
921 rbuf = &comm->vbuf2.v[0][0];
923 /* Send and receive the coordinates */
924 dd_sendrecv_real(dd, d, dddirBackward,
925 buf, ind->nsend[nzone+1],
926 rbuf, ind->nrecv[nzone+1]);
930 for (zone = 0; zone < nzone; zone++)
932 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
939 nat_tot += ind->nrecv[nzone+1];
945 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
947 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
948 int *index, *cgindex;
949 gmx_domdec_comm_t *comm;
950 gmx_domdec_comm_dim_t *cd;
951 gmx_domdec_ind_t *ind;
956 cgindex = dd->cgindex;
958 buf = &comm->vbuf.v[0][0];
961 nzone = comm->zones.n/2;
962 nat_tot = dd->nat_tot;
963 for (d = dd->ndim-1; d >= 0; d--)
966 for (p = cd->np-1; p >= 0; p--)
969 nat_tot -= ind->nrecv[nzone+1];
976 sbuf = &comm->vbuf2.v[0][0];
978 for (zone = 0; zone < nzone; zone++)
980 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
987 /* Communicate the forces */
988 dd_sendrecv_real(dd, d, dddirForward,
989 sbuf, ind->nrecv[nzone+1],
990 buf, ind->nsend[nzone+1]);
992 /* Add the received forces */
994 for (i = 0; i < ind->nsend[nzone]; i++)
996 at0 = cgindex[index[i]];
997 at1 = cgindex[index[i]+1];
998 for (j = at0; j < at1; j++)
1009 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
1011 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
1013 zone->min0, zone->max1,
1014 zone->mch0, zone->mch0,
1015 zone->p1_0, zone->p1_1);
1019 #define DDZONECOMM_MAXZONE 5
1020 #define DDZONECOMM_BUFSIZE 3
1022 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
1023 int ddimind, int direction,
1024 gmx_ddzone_t *buf_s, int n_s,
1025 gmx_ddzone_t *buf_r, int n_r)
1027 #define ZBS DDZONECOMM_BUFSIZE
1028 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
1029 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
1032 for (i = 0; i < n_s; i++)
1034 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
1035 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
1036 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
1037 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
1038 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
1039 vbuf_s[i*ZBS+1][2] = 0;
1040 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
1041 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
1042 vbuf_s[i*ZBS+2][2] = 0;
1045 dd_sendrecv_rvec(dd, ddimind, direction,
1049 for (i = 0; i < n_r; i++)
1051 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
1052 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
1053 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
1054 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
1055 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
1056 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
1057 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
1063 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
1064 rvec cell_ns_x0, rvec cell_ns_x1)
1066 int d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
1068 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
1069 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
1070 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
1071 rvec extr_s[2], extr_r[2];
1073 real dist_d, c = 0, det;
1074 gmx_domdec_comm_t *comm;
1075 gmx_bool bPBC, bUse;
1079 for (d = 1; d < dd->ndim; d++)
1082 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
1083 zp->min0 = cell_ns_x0[dim];
1084 zp->max1 = cell_ns_x1[dim];
1085 zp->min1 = cell_ns_x1[dim];
1086 zp->mch0 = cell_ns_x0[dim];
1087 zp->mch1 = cell_ns_x1[dim];
1088 zp->p1_0 = cell_ns_x0[dim];
1089 zp->p1_1 = cell_ns_x1[dim];
1092 for (d = dd->ndim-2; d >= 0; d--)
1095 bPBC = (dim < ddbox->npbcdim);
1097 /* Use an rvec to store two reals */
1098 extr_s[d][0] = comm->cell_f0[d+1];
1099 extr_s[d][1] = comm->cell_f1[d+1];
1100 extr_s[d][2] = comm->cell_f1[d+1];
1103 /* Store the extremes in the backward sending buffer,
1104 * so the get updated separately from the forward communication.
1106 for (d1 = d; d1 < dd->ndim-1; d1++)
1108 /* We invert the order to be able to use the same loop for buf_e */
1109 buf_s[pos].min0 = extr_s[d1][1];
1110 buf_s[pos].max1 = extr_s[d1][0];
1111 buf_s[pos].min1 = extr_s[d1][2];
1112 buf_s[pos].mch0 = 0;
1113 buf_s[pos].mch1 = 0;
1114 /* Store the cell corner of the dimension we communicate along */
1115 buf_s[pos].p1_0 = comm->cell_x0[dim];
1116 buf_s[pos].p1_1 = 0;
1120 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1123 if (dd->ndim == 3 && d == 0)
1125 buf_s[pos] = comm->zone_d2[0][1];
1127 buf_s[pos] = comm->zone_d1[0];
1131 /* We only need to communicate the extremes
1132 * in the forward direction
1134 npulse = comm->cd[d].np;
1137 /* Take the minimum to avoid double communication */
1138 npulse_min = min(npulse, dd->nc[dim]-1-npulse);
1142 /* Without PBC we should really not communicate over
1143 * the boundaries, but implementing that complicates
1144 * the communication setup and therefore we simply
1145 * do all communication, but ignore some data.
1147 npulse_min = npulse;
1149 for (p = 0; p < npulse_min; p++)
1151 /* Communicate the extremes forward */
1152 bUse = (bPBC || dd->ci[dim] > 0);
1154 dd_sendrecv_rvec(dd, d, dddirForward,
1155 extr_s+d, dd->ndim-d-1,
1156 extr_r+d, dd->ndim-d-1);
1160 for (d1 = d; d1 < dd->ndim-1; d1++)
1162 extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]);
1163 extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]);
1164 extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]);
1170 for (p = 0; p < npulse; p++)
1172 /* Communicate all the zone information backward */
1173 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1175 dd_sendrecv_ddzone(dd, d, dddirBackward,
1182 for (d1 = d+1; d1 < dd->ndim; d1++)
1184 /* Determine the decrease of maximum required
1185 * communication height along d1 due to the distance along d,
1186 * this avoids a lot of useless atom communication.
1188 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1190 if (ddbox->tric_dir[dim])
1192 /* c is the off-diagonal coupling between the cell planes
1193 * along directions d and d1.
1195 c = ddbox->v[dim][dd->dim[d1]][dim];
1201 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1204 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1208 /* A negative value signals out of range */
1214 /* Accumulate the extremes over all pulses */
1215 for (i = 0; i < buf_size; i++)
1219 buf_e[i] = buf_r[i];
1225 buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0);
1226 buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1);
1227 buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1);
1230 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1238 if (bUse && dh[d1] >= 0)
1240 buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
1241 buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
1244 /* Copy the received buffer to the send buffer,
1245 * to pass the data through with the next pulse.
1247 buf_s[i] = buf_r[i];
1249 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1250 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1252 /* Store the extremes */
1255 for (d1 = d; d1 < dd->ndim-1; d1++)
1257 extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0);
1258 extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1);
1259 extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1);
1263 if (d == 1 || (d == 0 && dd->ndim == 3))
1265 for (i = d; i < 2; i++)
1267 comm->zone_d2[1-d][i] = buf_e[pos];
1273 comm->zone_d1[1] = buf_e[pos];
1283 for (i = 0; i < 2; i++)
1287 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1289 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1290 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1296 for (i = 0; i < 2; i++)
1298 for (j = 0; j < 2; j++)
1302 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1304 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1305 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1309 for (d = 1; d < dd->ndim; d++)
1311 comm->cell_f_max0[d] = extr_s[d-1][0];
1312 comm->cell_f_min1[d] = extr_s[d-1][1];
1315 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1316 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1321 static void dd_collect_cg(gmx_domdec_t *dd,
1322 t_state *state_local)
1324 gmx_domdec_master_t *ma = NULL;
1325 int buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
1328 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1330 /* The master has the correct distribution */
1334 if (state_local->ddp_count == dd->ddp_count)
1336 ncg_home = dd->ncg_home;
1338 nat_home = dd->nat_home;
1340 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1342 cgs_gl = &dd->comm->cgs_gl;
1344 ncg_home = state_local->ncg_gl;
1345 cg = state_local->cg_gl;
1347 for (i = 0; i < ncg_home; i++)
1349 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1354 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1357 buf2[0] = dd->ncg_home;
1358 buf2[1] = dd->nat_home;
1368 /* Collect the charge group and atom counts on the master */
1369 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1374 for (i = 0; i < dd->nnodes; i++)
1376 ma->ncg[i] = ma->ibuf[2*i];
1377 ma->nat[i] = ma->ibuf[2*i+1];
1378 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1381 /* Make byte counts and indices */
1382 for (i = 0; i < dd->nnodes; i++)
1384 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1385 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1389 fprintf(debug, "Initial charge group distribution: ");
1390 for (i = 0; i < dd->nnodes; i++)
1392 fprintf(debug, " %d", ma->ncg[i]);
1394 fprintf(debug, "\n");
1398 /* Collect the charge group indices on the master */
1400 dd->ncg_home*sizeof(int), dd->index_gl,
1401 DDMASTER(dd) ? ma->ibuf : NULL,
1402 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1403 DDMASTER(dd) ? ma->cg : NULL);
1405 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1408 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1411 gmx_domdec_master_t *ma;
1412 int n, i, c, a, nalloc = 0;
1421 MPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1422 dd->rank, dd->mpi_comm_all);
1427 /* Copy the master coordinates to the global array */
1428 cgs_gl = &dd->comm->cgs_gl;
1430 n = DDMASTERRANK(dd);
1432 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1434 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1436 copy_rvec(lv[a++], v[c]);
1440 for (n = 0; n < dd->nnodes; n++)
1444 if (ma->nat[n] > nalloc)
1446 nalloc = over_alloc_dd(ma->nat[n]);
1447 srenew(buf, nalloc);
1450 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1451 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1454 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1456 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1458 copy_rvec(buf[a++], v[c]);
1467 static void get_commbuffer_counts(gmx_domdec_t *dd,
1468 int **counts, int **disps)
1470 gmx_domdec_master_t *ma;
1475 /* Make the rvec count and displacment arrays */
1477 *disps = ma->ibuf + dd->nnodes;
1478 for (n = 0; n < dd->nnodes; n++)
1480 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1481 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1485 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1488 gmx_domdec_master_t *ma;
1489 int *rcounts = NULL, *disps = NULL;
1498 get_commbuffer_counts(dd, &rcounts, &disps);
1503 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv, rcounts, disps, buf);
1507 cgs_gl = &dd->comm->cgs_gl;
1510 for (n = 0; n < dd->nnodes; n++)
1512 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1514 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1516 copy_rvec(buf[a++], v[c]);
1523 void dd_collect_vec(gmx_domdec_t *dd,
1524 t_state *state_local, rvec *lv, rvec *v)
1526 gmx_domdec_master_t *ma;
1527 int n, i, c, a, nalloc = 0;
1530 dd_collect_cg(dd, state_local);
1532 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1534 dd_collect_vec_sendrecv(dd, lv, v);
1538 dd_collect_vec_gatherv(dd, lv, v);
1543 void dd_collect_state(gmx_domdec_t *dd,
1544 t_state *state_local, t_state *state)
1548 nh = state->nhchainlength;
1552 for (i = 0; i < efptNR; i++)
1554 state->lambda[i] = state_local->lambda[i];
1556 state->fep_state = state_local->fep_state;
1557 state->veta = state_local->veta;
1558 state->vol0 = state_local->vol0;
1559 copy_mat(state_local->box, state->box);
1560 copy_mat(state_local->boxv, state->boxv);
1561 copy_mat(state_local->svir_prev, state->svir_prev);
1562 copy_mat(state_local->fvir_prev, state->fvir_prev);
1563 copy_mat(state_local->pres_prev, state->pres_prev);
1565 for (i = 0; i < state_local->ngtc; i++)
1567 for (j = 0; j < nh; j++)
1569 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1570 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1572 state->therm_integral[i] = state_local->therm_integral[i];
1574 for (i = 0; i < state_local->nnhpres; i++)
1576 for (j = 0; j < nh; j++)
1578 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1579 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1583 for (est = 0; est < estNR; est++)
1585 if (EST_DISTR(est) && (state_local->flags & (1<<est)))
1590 dd_collect_vec(dd, state_local, state_local->x, state->x);
1593 dd_collect_vec(dd, state_local, state_local->v, state->v);
1596 dd_collect_vec(dd, state_local, state_local->sd_X, state->sd_X);
1599 dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
1601 case estDISRE_INITF:
1602 case estDISRE_RM3TAV:
1603 case estORIRE_INITF:
1607 gmx_incons("Unknown state entry encountered in dd_collect_state");
1613 static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
1619 fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
1622 state->nalloc = over_alloc_dd(nalloc);
1624 for (est = 0; est < estNR; est++)
1626 if (EST_DISTR(est) && (state->flags & (1<<est)))
1631 srenew(state->x, state->nalloc);
1634 srenew(state->v, state->nalloc);
1637 srenew(state->sd_X, state->nalloc);
1640 srenew(state->cg_p, state->nalloc);
1642 case estDISRE_INITF:
1643 case estDISRE_RM3TAV:
1644 case estORIRE_INITF:
1646 /* No reallocation required */
1649 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1656 srenew(*f, state->nalloc);
1660 static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
1663 if (nalloc > fr->cg_nalloc)
1667 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
1669 fr->cg_nalloc = over_alloc_dd(nalloc);
1670 srenew(fr->cginfo, fr->cg_nalloc);
1671 if (fr->cutoff_scheme == ecutsGROUP)
1673 srenew(fr->cg_cm, fr->cg_nalloc);
1676 if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
1678 /* We don't use charge groups, we use x in state to set up
1679 * the atom communication.
1681 dd_realloc_state(state, f, nalloc);
1685 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1688 gmx_domdec_master_t *ma;
1689 int n, i, c, a, nalloc = 0;
1696 for (n = 0; n < dd->nnodes; n++)
1700 if (ma->nat[n] > nalloc)
1702 nalloc = over_alloc_dd(ma->nat[n]);
1703 srenew(buf, nalloc);
1705 /* Use lv as a temporary buffer */
1707 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1709 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1711 copy_rvec(v[c], buf[a++]);
1714 if (a != ma->nat[n])
1716 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1721 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1722 DDRANK(dd, n), n, dd->mpi_comm_all);
1727 n = DDMASTERRANK(dd);
1729 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1731 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1733 copy_rvec(v[c], lv[a++]);
1740 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1741 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1746 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1749 gmx_domdec_master_t *ma;
1750 int *scounts = NULL, *disps = NULL;
1751 int n, i, c, a, nalloc = 0;
1758 get_commbuffer_counts(dd, &scounts, &disps);
1762 for (n = 0; n < dd->nnodes; n++)
1764 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1766 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1768 copy_rvec(v[c], buf[a++]);
1774 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1777 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
1779 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1781 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1785 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1789 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1792 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1793 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1794 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1796 if (dfhist->nlambda > 0)
1798 int nlam = dfhist->nlambda;
1799 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1800 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1801 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1802 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1803 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1804 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1806 for (i = 0; i < nlam; i++)
1808 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1809 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1810 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1811 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1812 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1813 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1818 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1819 t_state *state, t_state *state_local,
1824 nh = state->nhchainlength;
1828 for (i = 0; i < efptNR; i++)
1830 state_local->lambda[i] = state->lambda[i];
1832 state_local->fep_state = state->fep_state;
1833 state_local->veta = state->veta;
1834 state_local->vol0 = state->vol0;
1835 copy_mat(state->box, state_local->box);
1836 copy_mat(state->box_rel, state_local->box_rel);
1837 copy_mat(state->boxv, state_local->boxv);
1838 copy_mat(state->svir_prev, state_local->svir_prev);
1839 copy_mat(state->fvir_prev, state_local->fvir_prev);
1840 copy_df_history(&state_local->dfhist, &state->dfhist);
1841 for (i = 0; i < state_local->ngtc; i++)
1843 for (j = 0; j < nh; j++)
1845 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1846 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1848 state_local->therm_integral[i] = state->therm_integral[i];
1850 for (i = 0; i < state_local->nnhpres; i++)
1852 for (j = 0; j < nh; j++)
1854 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1855 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1859 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
1860 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1861 dd_bcast(dd, sizeof(real), &state_local->veta);
1862 dd_bcast(dd, sizeof(real), &state_local->vol0);
1863 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1864 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1865 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1866 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1867 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1868 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
1869 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
1870 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
1871 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
1872 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
1874 /* communicate df_history -- required for restarting from checkpoint */
1875 dd_distribute_dfhist(dd, &state_local->dfhist);
1877 if (dd->nat_home > state_local->nalloc)
1879 dd_realloc_state(state_local, f, dd->nat_home);
1881 for (i = 0; i < estNR; i++)
1883 if (EST_DISTR(i) && (state_local->flags & (1<<i)))
1888 dd_distribute_vec(dd, cgs, state->x, state_local->x);
1891 dd_distribute_vec(dd, cgs, state->v, state_local->v);
1894 dd_distribute_vec(dd, cgs, state->sd_X, state_local->sd_X);
1897 dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
1899 case estDISRE_INITF:
1900 case estDISRE_RM3TAV:
1901 case estORIRE_INITF:
1903 /* Not implemented yet */
1906 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1912 static char dim2char(int dim)
1918 case XX: c = 'X'; break;
1919 case YY: c = 'Y'; break;
1920 case ZZ: c = 'Z'; break;
1921 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1927 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1928 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1930 rvec grid_s[2], *grid_r = NULL, cx, r;
1931 char fname[STRLEN], buf[22];
1933 int a, i, d, z, y, x;
1937 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1938 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1942 snew(grid_r, 2*dd->nnodes);
1945 dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : NULL);
1949 for (d = 0; d < DIM; d++)
1951 for (i = 0; i < DIM; i++)
1959 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1961 tric[d][i] = box[i][d]/box[i][i];
1970 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1971 out = gmx_fio_fopen(fname, "w");
1972 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1974 for (i = 0; i < dd->nnodes; i++)
1976 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1977 for (d = 0; d < DIM; d++)
1979 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1981 for (z = 0; z < 2; z++)
1983 for (y = 0; y < 2; y++)
1985 for (x = 0; x < 2; x++)
1987 cx[XX] = grid_r[i*2+x][XX];
1988 cx[YY] = grid_r[i*2+y][YY];
1989 cx[ZZ] = grid_r[i*2+z][ZZ];
1991 gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
1992 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
1996 for (d = 0; d < DIM; d++)
1998 for (x = 0; x < 4; x++)
2002 case 0: y = 1 + i*8 + 2*x; break;
2003 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
2004 case 2: y = 1 + i*8 + x; break;
2006 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
2010 gmx_fio_fclose(out);
2015 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
2016 gmx_mtop_t *mtop, t_commrec *cr,
2017 int natoms, rvec x[], matrix box)
2019 char fname[STRLEN], buf[22];
2021 int i, ii, resnr, c;
2022 char *atomname, *resname;
2029 natoms = dd->comm->nat[ddnatVSITE];
2032 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
2034 out = gmx_fio_fopen(fname, "w");
2036 fprintf(out, "TITLE %s\n", title);
2037 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2038 for (i = 0; i < natoms; i++)
2040 ii = dd->gatindex[i];
2041 gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
2042 if (i < dd->comm->nat[ddnatZONE])
2045 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
2051 else if (i < dd->comm->nat[ddnatVSITE])
2053 b = dd->comm->zones.n;
2057 b = dd->comm->zones.n + 1;
2059 gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
2060 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
2062 fprintf(out, "TER\n");
2064 gmx_fio_fclose(out);
2067 real dd_cutoff_mbody(gmx_domdec_t *dd)
2069 gmx_domdec_comm_t *comm;
2076 if (comm->bInterCGBondeds)
2078 if (comm->cutoff_mbody > 0)
2080 r = comm->cutoff_mbody;
2084 /* cutoff_mbody=0 means we do not have DLB */
2085 r = comm->cellsize_min[dd->dim[0]];
2086 for (di = 1; di < dd->ndim; di++)
2088 r = min(r, comm->cellsize_min[dd->dim[di]]);
2090 if (comm->bBondComm)
2092 r = max(r, comm->cutoff_mbody);
2096 r = min(r, comm->cutoff);
2104 real dd_cutoff_twobody(gmx_domdec_t *dd)
2108 r_mb = dd_cutoff_mbody(dd);
2110 return max(dd->comm->cutoff, r_mb);
2114 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd, ivec coord, ivec coord_pme)
2118 nc = dd->nc[dd->comm->cartpmedim];
2119 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2120 copy_ivec(coord, coord_pme);
2121 coord_pme[dd->comm->cartpmedim] =
2122 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2125 static int low_ddindex2pmeindex(int ndd, int npme, int ddindex)
2127 /* Here we assign a PME node to communicate with this DD node
2128 * by assuming that the major index of both is x.
2129 * We add cr->npmenodes/2 to obtain an even distribution.
2131 return (ddindex*npme + npme/2)/ndd;
2134 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
2136 return low_ddindex2pmeindex(dd->nnodes, dd->comm->npmenodes, ddindex);
2139 static int cr_ddindex2pmeindex(const t_commrec *cr, int ddindex)
2141 return low_ddindex2pmeindex(cr->dd->nnodes, cr->npmenodes, ddindex);
2144 static int *dd_pmenodes(t_commrec *cr)
2149 snew(pmenodes, cr->npmenodes);
2151 for (i = 0; i < cr->dd->nnodes; i++)
2153 p0 = cr_ddindex2pmeindex(cr, i);
2154 p1 = cr_ddindex2pmeindex(cr, i+1);
2155 if (i+1 == cr->dd->nnodes || p1 > p0)
2159 fprintf(debug, "pmenode[%d] = %d\n", n, i+1+n);
2161 pmenodes[n] = i + 1 + n;
2169 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
2172 ivec coords, coords_pme, nc;
2177 if (dd->comm->bCartesian) {
2178 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2179 dd_coords2pmecoords(dd,coords,coords_pme);
2180 copy_ivec(dd->ntot,nc);
2181 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2182 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2184 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2186 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2192 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
2197 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
2199 gmx_domdec_comm_t *comm;
2201 int ddindex, nodeid = -1;
2203 comm = cr->dd->comm;
2208 if (comm->bCartesianPP_PME)
2211 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
2216 ddindex = dd_index(cr->dd->nc, coords);
2217 if (comm->bCartesianPP)
2219 nodeid = comm->ddindex2simnodeid[ddindex];
2225 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
2237 static int dd_simnode2pmenode(t_commrec *cr, int sim_nodeid)
2240 gmx_domdec_comm_t *comm;
2241 ivec coord, coord_pme;
2248 /* This assumes a uniform x domain decomposition grid cell size */
2249 if (comm->bCartesianPP_PME)
2252 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
2253 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2255 /* This is a PP node */
2256 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2257 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
2261 else if (comm->bCartesianPP)
2263 if (sim_nodeid < dd->nnodes)
2265 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2270 /* This assumes DD cells with identical x coordinates
2271 * are numbered sequentially.
2273 if (dd->comm->pmenodes == NULL)
2275 if (sim_nodeid < dd->nnodes)
2277 /* The DD index equals the nodeid */
2278 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2284 while (sim_nodeid > dd->comm->pmenodes[i])
2288 if (sim_nodeid < dd->comm->pmenodes[i])
2290 pmenode = dd->comm->pmenodes[i];
2298 void get_pme_nnodes(const gmx_domdec_t *dd,
2299 int *npmenodes_x, int *npmenodes_y)
2303 *npmenodes_x = dd->comm->npmenodes_x;
2304 *npmenodes_y = dd->comm->npmenodes_y;
2313 gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
2315 gmx_bool bPMEOnlyNode;
2317 if (DOMAINDECOMP(cr))
2319 bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
2323 bPMEOnlyNode = FALSE;
2326 return bPMEOnlyNode;
2329 void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
2330 int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
2334 ivec coord, coord_pme;
2338 snew(*my_ddnodes, (dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2341 for (x = 0; x < dd->nc[XX]; x++)
2343 for (y = 0; y < dd->nc[YY]; y++)
2345 for (z = 0; z < dd->nc[ZZ]; z++)
2347 if (dd->comm->bCartesianPP_PME)
2352 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2353 if (dd->ci[XX] == coord_pme[XX] &&
2354 dd->ci[YY] == coord_pme[YY] &&
2355 dd->ci[ZZ] == coord_pme[ZZ])
2357 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2362 /* The slab corresponds to the nodeid in the PME group */
2363 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2365 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2372 /* The last PP-only node is the peer node */
2373 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2377 fprintf(debug, "Receive coordinates from PP ranks:");
2378 for (x = 0; x < *nmy_ddnodes; x++)
2380 fprintf(debug, " %d", (*my_ddnodes)[x]);
2382 fprintf(debug, "\n");
2386 static gmx_bool receive_vir_ener(t_commrec *cr)
2388 gmx_domdec_comm_t *comm;
2389 int pmenode, coords[DIM], rank;
2393 if (cr->npmenodes < cr->dd->nnodes)
2395 comm = cr->dd->comm;
2396 if (comm->bCartesianPP_PME)
2398 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2400 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2401 coords[comm->cartpmedim]++;
2402 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2404 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2405 if (dd_simnode2pmenode(cr, rank) == pmenode)
2407 /* This is not the last PP node for pmenode */
2415 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2416 if (cr->sim_nodeid+1 < cr->nnodes &&
2417 dd_simnode2pmenode(cr, cr->sim_nodeid+1) == pmenode)
2419 /* This is not the last PP node for pmenode */
2428 static void set_zones_ncg_home(gmx_domdec_t *dd)
2430 gmx_domdec_zones_t *zones;
2433 zones = &dd->comm->zones;
2435 zones->cg_range[0] = 0;
2436 for (i = 1; i < zones->n+1; i++)
2438 zones->cg_range[i] = dd->ncg_home;
2440 /* zone_ncg1[0] should always be equal to ncg_home */
2441 dd->comm->zone_ncg1[0] = dd->ncg_home;
2444 static void rebuild_cgindex(gmx_domdec_t *dd,
2445 const int *gcgs_index, t_state *state)
2447 int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
2450 dd_cg_gl = dd->index_gl;
2451 cgindex = dd->cgindex;
2454 for (i = 0; i < state->ncg_gl; i++)
2458 dd_cg_gl[i] = cg_gl;
2459 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2463 dd->ncg_home = state->ncg_gl;
2466 set_zones_ncg_home(dd);
2469 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2471 while (cg >= cginfo_mb->cg_end)
2476 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2479 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2480 t_forcerec *fr, char *bLocalCG)
2482 cginfo_mb_t *cginfo_mb;
2488 cginfo_mb = fr->cginfo_mb;
2489 cginfo = fr->cginfo;
2491 for (cg = cg0; cg < cg1; cg++)
2493 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2497 if (bLocalCG != NULL)
2499 for (cg = cg0; cg < cg1; cg++)
2501 bLocalCG[index_gl[cg]] = TRUE;
2506 static void make_dd_indices(gmx_domdec_t *dd,
2507 const int *gcgs_index, int cg_start)
2509 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2510 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2515 bLocalCG = dd->comm->bLocalCG;
2517 if (dd->nat_tot > dd->gatindex_nalloc)
2519 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2520 srenew(dd->gatindex, dd->gatindex_nalloc);
2523 nzone = dd->comm->zones.n;
2524 zone2cg = dd->comm->zones.cg_range;
2525 zone_ncg1 = dd->comm->zone_ncg1;
2526 index_gl = dd->index_gl;
2527 gatindex = dd->gatindex;
2528 bCGs = dd->comm->bCGs;
2530 if (zone2cg[1] != dd->ncg_home)
2532 gmx_incons("dd->ncg_zone is not up to date");
2535 /* Make the local to global and global to local atom index */
2536 a = dd->cgindex[cg_start];
2537 for (zone = 0; zone < nzone; zone++)
2545 cg0 = zone2cg[zone];
2547 cg1 = zone2cg[zone+1];
2548 cg1_p1 = cg0 + zone_ncg1[zone];
2550 for (cg = cg0; cg < cg1; cg++)
2555 /* Signal that this cg is from more than one pulse away */
2558 cg_gl = index_gl[cg];
2561 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2564 ga2la_set(dd->ga2la, a_gl, a, zone1);
2570 gatindex[a] = cg_gl;
2571 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2578 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2581 int ncg, i, ngl, nerr;
2584 if (bLocalCG == NULL)
2588 for (i = 0; i < dd->ncg_tot; i++)
2590 if (!bLocalCG[dd->index_gl[i]])
2593 "DD rank %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2598 for (i = 0; i < ncg_sys; i++)
2605 if (ngl != dd->ncg_tot)
2607 fprintf(stderr, "DD rank %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2614 static void check_index_consistency(gmx_domdec_t *dd,
2615 int natoms_sys, int ncg_sys,
2618 int nerr, ngl, i, a, cell;
2623 if (dd->comm->DD_debug > 1)
2625 snew(have, natoms_sys);
2626 for (a = 0; a < dd->nat_tot; a++)
2628 if (have[dd->gatindex[a]] > 0)
2630 fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2634 have[dd->gatindex[a]] = a + 1;
2640 snew(have, dd->nat_tot);
2643 for (i = 0; i < natoms_sys; i++)
2645 if (ga2la_get(dd->ga2la, i, &a, &cell))
2647 if (a >= dd->nat_tot)
2649 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2655 if (dd->gatindex[a] != i)
2657 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2664 if (ngl != dd->nat_tot)
2667 "DD rank %d, %s: %d global atom indices, %d local atoms\n",
2668 dd->rank, where, ngl, dd->nat_tot);
2670 for (a = 0; a < dd->nat_tot; a++)
2675 "DD rank %d, %s: local atom %d, global %d has no global index\n",
2676 dd->rank, where, a+1, dd->gatindex[a]+1);
2681 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2685 gmx_fatal(FARGS, "DD rank %d, %s: %d atom/cg index inconsistencies",
2686 dd->rank, where, nerr);
2690 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2697 /* Clear the whole list without searching */
2698 ga2la_clear(dd->ga2la);
2702 for (i = a_start; i < dd->nat_tot; i++)
2704 ga2la_del(dd->ga2la, dd->gatindex[i]);
2708 bLocalCG = dd->comm->bLocalCG;
2711 for (i = cg_start; i < dd->ncg_tot; i++)
2713 bLocalCG[dd->index_gl[i]] = FALSE;
2717 dd_clear_local_vsite_indices(dd);
2719 if (dd->constraints)
2721 dd_clear_local_constraint_indices(dd);
2725 /* This function should be used for moving the domain boudaries during DLB,
2726 * for obtaining the minimum cell size. It checks the initially set limit
2727 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2728 * and, possibly, a longer cut-off limit set for PME load balancing.
2730 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2734 cellsize_min = comm->cellsize_min[dim];
2736 if (!comm->bVacDLBNoLimit)
2738 /* The cut-off might have changed, e.g. by PME load balacning,
2739 * from the value used to set comm->cellsize_min, so check it.
2741 cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2743 if (comm->bPMELoadBalDLBLimits)
2745 /* Check for the cut-off limit set by the PME load balancing */
2746 cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2750 return cellsize_min;
2753 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2756 real grid_jump_limit;
2758 /* The distance between the boundaries of cells at distance
2759 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2760 * and by the fact that cells should not be shifted by more than
2761 * half their size, such that cg's only shift by one cell
2762 * at redecomposition.
2764 grid_jump_limit = comm->cellsize_limit;
2765 if (!comm->bVacDLBNoLimit)
2767 if (comm->bPMELoadBalDLBLimits)
2769 cutoff = max(cutoff, comm->PMELoadBal_max_cutoff);
2771 grid_jump_limit = max(grid_jump_limit,
2772 cutoff/comm->cd[dim_ind].np);
2775 return grid_jump_limit;
2778 static gmx_bool check_grid_jump(gmx_int64_t step,
2784 gmx_domdec_comm_t *comm;
2793 for (d = 1; d < dd->ndim; d++)
2796 limit = grid_jump_limit(comm, cutoff, d);
2797 bfac = ddbox->box_size[dim];
2798 if (ddbox->tric_dir[dim])
2800 bfac *= ddbox->skew_fac[dim];
2802 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2803 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2811 /* This error should never be triggered under normal
2812 * circumstances, but you never know ...
2814 gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
2815 gmx_step_str(step, buf),
2816 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2824 static int dd_load_count(gmx_domdec_comm_t *comm)
2826 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2829 static float dd_force_load(gmx_domdec_comm_t *comm)
2836 if (comm->eFlop > 1)
2838 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2843 load = comm->cycl[ddCyclF];
2844 if (comm->cycl_n[ddCyclF] > 1)
2846 /* Subtract the maximum of the last n cycle counts
2847 * to get rid of possible high counts due to other sources,
2848 * for instance system activity, that would otherwise
2849 * affect the dynamic load balancing.
2851 load -= comm->cycl_max[ddCyclF];
2855 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2857 float gpu_wait, gpu_wait_sum;
2859 gpu_wait = comm->cycl[ddCyclWaitGPU];
2860 if (comm->cycl_n[ddCyclF] > 1)
2862 /* We should remove the WaitGPU time of the same MD step
2863 * as the one with the maximum F time, since the F time
2864 * and the wait time are not independent.
2865 * Furthermore, the step for the max F time should be chosen
2866 * the same on all ranks that share the same GPU.
2867 * But to keep the code simple, we remove the average instead.
2868 * The main reason for artificially long times at some steps
2869 * is spurious CPU activity or MPI time, so we don't expect
2870 * that changes in the GPU wait time matter a lot here.
2872 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2874 /* Sum the wait times over the ranks that share the same GPU */
2875 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2876 comm->mpi_comm_gpu_shared);
2877 /* Replace the wait time by the average over the ranks */
2878 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2886 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2888 gmx_domdec_comm_t *comm;
2893 snew(*dim_f, dd->nc[dim]+1);
2895 for (i = 1; i < dd->nc[dim]; i++)
2897 if (comm->slb_frac[dim])
2899 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2903 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2906 (*dim_f)[dd->nc[dim]] = 1;
2909 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2911 int pmeindex, slab, nso, i;
2914 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2920 ddpme->dim = dimind;
2922 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2924 ddpme->nslab = (ddpme->dim == 0 ?
2925 dd->comm->npmenodes_x :
2926 dd->comm->npmenodes_y);
2928 if (ddpme->nslab <= 1)
2933 nso = dd->comm->npmenodes/ddpme->nslab;
2934 /* Determine for each PME slab the PP location range for dimension dim */
2935 snew(ddpme->pp_min, ddpme->nslab);
2936 snew(ddpme->pp_max, ddpme->nslab);
2937 for (slab = 0; slab < ddpme->nslab; slab++)
2939 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2940 ddpme->pp_max[slab] = 0;
2942 for (i = 0; i < dd->nnodes; i++)
2944 ddindex2xyz(dd->nc, i, xyz);
2945 /* For y only use our y/z slab.
2946 * This assumes that the PME x grid size matches the DD grid size.
2948 if (dimind == 0 || xyz[XX] == dd->ci[XX])
2950 pmeindex = ddindex2pmeindex(dd, i);
2953 slab = pmeindex/nso;
2957 slab = pmeindex % ddpme->nslab;
2959 ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]);
2960 ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]);
2964 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
2967 int dd_pme_maxshift_x(gmx_domdec_t *dd)
2969 if (dd->comm->ddpme[0].dim == XX)
2971 return dd->comm->ddpme[0].maxshift;
2979 int dd_pme_maxshift_y(gmx_domdec_t *dd)
2981 if (dd->comm->ddpme[0].dim == YY)
2983 return dd->comm->ddpme[0].maxshift;
2985 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
2987 return dd->comm->ddpme[1].maxshift;
2995 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
2996 gmx_bool bUniform, gmx_ddbox_t *ddbox, real *cell_f)
2998 gmx_domdec_comm_t *comm;
3001 real range, pme_boundary;
3005 nc = dd->nc[ddpme->dim];
3008 if (!ddpme->dim_match)
3010 /* PP decomposition is not along dim: the worst situation */
3013 else if (ns <= 3 || (bUniform && ns == nc))
3015 /* The optimal situation */
3020 /* We need to check for all pme nodes which nodes they
3021 * could possibly need to communicate with.
3023 xmin = ddpme->pp_min;
3024 xmax = ddpme->pp_max;
3025 /* Allow for atoms to be maximally 2/3 times the cut-off
3026 * out of their DD cell. This is a reasonable balance between
3027 * between performance and support for most charge-group/cut-off
3030 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
3031 /* Avoid extra communication when we are exactly at a boundary */
3035 for (s = 0; s < ns; s++)
3037 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
3038 pme_boundary = (real)s/ns;
3041 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
3043 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
3047 pme_boundary = (real)(s+1)/ns;
3050 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
3052 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
3059 ddpme->maxshift = sh;
3063 fprintf(debug, "PME slab communication range for dim %d is %d\n",
3064 ddpme->dim, ddpme->maxshift);
3068 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3072 for (d = 0; d < dd->ndim; d++)
3075 if (dim < ddbox->nboundeddim &&
3076 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
3077 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
3079 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
3080 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3081 dd->nc[dim], dd->comm->cellsize_limit);
3087 setcellsizeslbLOCAL, setcellsizeslbMASTER, setcellsizeslbPULSE_ONLY
3090 /* Set the domain boundaries. Use for static (or no) load balancing,
3091 * and also for the starting state for dynamic load balancing.
3092 * setmode determine if and where the boundaries are stored, use enum above.
3093 * Returns the number communication pulses in npulse.
3095 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
3096 int setmode, ivec npulse)
3098 gmx_domdec_comm_t *comm;
3101 real *cell_x, cell_dx, cellsize;
3105 for (d = 0; d < DIM; d++)
3107 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
3109 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
3112 cell_dx = ddbox->box_size[d]/dd->nc[d];
3115 case setcellsizeslbMASTER:
3116 for (j = 0; j < dd->nc[d]+1; j++)
3118 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
3121 case setcellsizeslbLOCAL:
3122 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
3123 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
3128 cellsize = cell_dx*ddbox->skew_fac[d];
3129 while (cellsize*npulse[d] < comm->cutoff)
3133 cellsize_min[d] = cellsize;
3137 /* Statically load balanced grid */
3138 /* Also when we are not doing a master distribution we determine
3139 * all cell borders in a loop to obtain identical values
3140 * to the master distribution case and to determine npulse.
3142 if (setmode == setcellsizeslbMASTER)
3144 cell_x = dd->ma->cell_x[d];
3148 snew(cell_x, dd->nc[d]+1);
3150 cell_x[0] = ddbox->box0[d];
3151 for (j = 0; j < dd->nc[d]; j++)
3153 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
3154 cell_x[j+1] = cell_x[j] + cell_dx;
3155 cellsize = cell_dx*ddbox->skew_fac[d];
3156 while (cellsize*npulse[d] < comm->cutoff &&
3157 npulse[d] < dd->nc[d]-1)
3161 cellsize_min[d] = min(cellsize_min[d], cellsize);
3163 if (setmode == setcellsizeslbLOCAL)
3165 comm->cell_x0[d] = cell_x[dd->ci[d]];
3166 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
3168 if (setmode != setcellsizeslbMASTER)
3173 /* The following limitation is to avoid that a cell would receive
3174 * some of its own home charge groups back over the periodic boundary.
3175 * Double charge groups cause trouble with the global indices.
3177 if (d < ddbox->npbcdim &&
3178 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
3180 char error_string[STRLEN];
3182 sprintf(error_string,
3183 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3184 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
3186 dd->nc[d], dd->nc[d],
3187 dd->nnodes > dd->nc[d] ? "cells" : "ranks");
3189 if (setmode == setcellsizeslbLOCAL)
3191 gmx_fatal_collective(FARGS, NULL, dd, error_string);
3195 gmx_fatal(FARGS, error_string);
3200 if (!comm->bDynLoadBal)
3202 copy_rvec(cellsize_min, comm->cellsize_min);
3205 for (d = 0; d < comm->npmedecompdim; d++)
3207 set_pme_maxshift(dd, &comm->ddpme[d],
3208 comm->slb_frac[dd->dim[d]] == NULL, ddbox,
3209 comm->ddpme[d].slb_dim_f);
3214 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
3215 int d, int dim, gmx_domdec_root_t *root,
3217 gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
3219 gmx_domdec_comm_t *comm;
3220 int ncd, i, j, nmin, nmin_old;
3221 gmx_bool bLimLo, bLimHi;
3223 real fac, halfway, cellsize_limit_f_i, region_size;
3224 gmx_bool bPBC, bLastHi = FALSE;
3225 int nrange[] = {range[0], range[1]};
3227 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
3233 bPBC = (dim < ddbox->npbcdim);
3235 cell_size = root->buf_ncd;
3239 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
3242 /* First we need to check if the scaling does not make cells
3243 * smaller than the smallest allowed size.
3244 * We need to do this iteratively, since if a cell is too small,
3245 * it needs to be enlarged, which makes all the other cells smaller,
3246 * which could in turn make another cell smaller than allowed.
3248 for (i = range[0]; i < range[1]; i++)
3250 root->bCellMin[i] = FALSE;
3256 /* We need the total for normalization */
3258 for (i = range[0]; i < range[1]; i++)
3260 if (root->bCellMin[i] == FALSE)
3262 fac += cell_size[i];
3265 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3266 /* Determine the cell boundaries */
3267 for (i = range[0]; i < range[1]; i++)
3269 if (root->bCellMin[i] == FALSE)
3271 cell_size[i] *= fac;
3272 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3274 cellsize_limit_f_i = 0;
3278 cellsize_limit_f_i = cellsize_limit_f;
3280 if (cell_size[i] < cellsize_limit_f_i)
3282 root->bCellMin[i] = TRUE;
3283 cell_size[i] = cellsize_limit_f_i;
3287 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3290 while (nmin > nmin_old);
3293 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3294 /* For this check we should not use DD_CELL_MARGIN,
3295 * but a slightly smaller factor,
3296 * since rounding could get use below the limit.
3298 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3301 gmx_fatal(FARGS, "Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3302 gmx_step_str(step, buf),
3303 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3304 ncd, comm->cellsize_min[dim]);
3307 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
3311 /* Check if the boundary did not displace more than halfway
3312 * each of the cells it bounds, as this could cause problems,
3313 * especially when the differences between cell sizes are large.
3314 * If changes are applied, they will not make cells smaller
3315 * than the cut-off, as we check all the boundaries which
3316 * might be affected by a change and if the old state was ok,
3317 * the cells will at most be shrunk back to their old size.
3319 for (i = range[0]+1; i < range[1]; i++)
3321 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3322 if (root->cell_f[i] < halfway)
3324 root->cell_f[i] = halfway;
3325 /* Check if the change also causes shifts of the next boundaries */
3326 for (j = i+1; j < range[1]; j++)
3328 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3330 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3334 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3335 if (root->cell_f[i] > halfway)
3337 root->cell_f[i] = halfway;
3338 /* Check if the change also causes shifts of the next boundaries */
3339 for (j = i-1; j >= range[0]+1; j--)
3341 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3343 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3350 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3351 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3352 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3353 * for a and b nrange is used */
3356 /* Take care of the staggering of the cell boundaries */
3359 for (i = range[0]; i < range[1]; i++)
3361 root->cell_f_max0[i] = root->cell_f[i];
3362 root->cell_f_min1[i] = root->cell_f[i+1];
3367 for (i = range[0]+1; i < range[1]; i++)
3369 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3370 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3371 if (bLimLo && bLimHi)
3373 /* Both limits violated, try the best we can */
3374 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3375 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3376 nrange[0] = range[0];
3378 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3381 nrange[1] = range[1];
3382 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3388 /* root->cell_f[i] = root->bound_min[i]; */
3389 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3392 else if (bLimHi && !bLastHi)
3395 if (nrange[1] < range[1]) /* found a LimLo before */
3397 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3398 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3399 nrange[0] = nrange[1];
3401 root->cell_f[i] = root->bound_max[i];
3403 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3405 nrange[1] = range[1];
3408 if (nrange[1] < range[1]) /* found last a LimLo */
3410 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3411 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3412 nrange[0] = nrange[1];
3413 nrange[1] = range[1];
3414 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3416 else if (nrange[0] > range[0]) /* found at least one LimHi */
3418 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3425 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3426 int d, int dim, gmx_domdec_root_t *root,
3427 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3428 gmx_bool bUniform, gmx_int64_t step)
3430 gmx_domdec_comm_t *comm;
3431 int ncd, d1, i, j, pos;
3433 real load_aver, load_i, imbalance, change, change_max, sc;
3434 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3438 int range[] = { 0, 0 };
3442 /* Convert the maximum change from the input percentage to a fraction */
3443 change_limit = comm->dlb_scale_lim*0.01;
3447 bPBC = (dim < ddbox->npbcdim);
3449 cell_size = root->buf_ncd;
3451 /* Store the original boundaries */
3452 for (i = 0; i < ncd+1; i++)
3454 root->old_cell_f[i] = root->cell_f[i];
3458 for (i = 0; i < ncd; i++)
3460 cell_size[i] = 1.0/ncd;
3463 else if (dd_load_count(comm))
3465 load_aver = comm->load[d].sum_m/ncd;
3467 for (i = 0; i < ncd; i++)
3469 /* Determine the relative imbalance of cell i */
3470 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3471 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3472 /* Determine the change of the cell size using underrelaxation */
3473 change = -relax*imbalance;
3474 change_max = max(change_max, max(change, -change));
3476 /* Limit the amount of scaling.
3477 * We need to use the same rescaling for all cells in one row,
3478 * otherwise the load balancing might not converge.
3481 if (change_max > change_limit)
3483 sc *= change_limit/change_max;
3485 for (i = 0; i < ncd; i++)
3487 /* Determine the relative imbalance of cell i */
3488 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3489 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3490 /* Determine the change of the cell size using underrelaxation */
3491 change = -sc*imbalance;
3492 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3496 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3497 cellsize_limit_f *= DD_CELL_MARGIN;
3498 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3499 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3500 if (ddbox->tric_dir[dim])
3502 cellsize_limit_f /= ddbox->skew_fac[dim];
3503 dist_min_f /= ddbox->skew_fac[dim];
3505 if (bDynamicBox && d > 0)
3507 dist_min_f *= DD_PRES_SCALE_MARGIN;
3509 if (d > 0 && !bUniform)
3511 /* Make sure that the grid is not shifted too much */
3512 for (i = 1; i < ncd; i++)
3514 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3516 gmx_incons("Inconsistent DD boundary staggering limits!");
3518 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3519 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3522 root->bound_min[i] += 0.5*space;
3524 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3525 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3528 root->bound_max[i] += 0.5*space;
3533 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3535 root->cell_f_max0[i-1] + dist_min_f,
3536 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3537 root->cell_f_min1[i] - dist_min_f);
3542 root->cell_f[0] = 0;
3543 root->cell_f[ncd] = 1;
3544 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3547 /* After the checks above, the cells should obey the cut-off
3548 * restrictions, but it does not hurt to check.
3550 for (i = 0; i < ncd; i++)
3554 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3555 dim, i, root->cell_f[i], root->cell_f[i+1]);
3558 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3559 root->cell_f[i+1] - root->cell_f[i] <
3560 cellsize_limit_f/DD_CELL_MARGIN)
3564 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3565 gmx_step_str(step, buf), dim2char(dim), i,
3566 (root->cell_f[i+1] - root->cell_f[i])
3567 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3572 /* Store the cell boundaries of the lower dimensions at the end */
3573 for (d1 = 0; d1 < d; d1++)
3575 root->cell_f[pos++] = comm->cell_f0[d1];
3576 root->cell_f[pos++] = comm->cell_f1[d1];
3579 if (d < comm->npmedecompdim)
3581 /* The master determines the maximum shift for
3582 * the coordinate communication between separate PME nodes.
3584 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3586 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3589 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3593 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3594 gmx_ddbox_t *ddbox, int dimind)
3596 gmx_domdec_comm_t *comm;
3601 /* Set the cell dimensions */
3602 dim = dd->dim[dimind];
3603 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3604 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3605 if (dim >= ddbox->nboundeddim)
3607 comm->cell_x0[dim] += ddbox->box0[dim];
3608 comm->cell_x1[dim] += ddbox->box0[dim];
3612 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3613 int d, int dim, real *cell_f_row,
3616 gmx_domdec_comm_t *comm;
3622 /* Each node would only need to know two fractions,
3623 * but it is probably cheaper to broadcast the whole array.
3625 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3626 0, comm->mpi_comm_load[d]);
3628 /* Copy the fractions for this dimension from the buffer */
3629 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3630 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3631 /* The whole array was communicated, so set the buffer position */
3632 pos = dd->nc[dim] + 1;
3633 for (d1 = 0; d1 <= d; d1++)
3637 /* Copy the cell fractions of the lower dimensions */
3638 comm->cell_f0[d1] = cell_f_row[pos++];
3639 comm->cell_f1[d1] = cell_f_row[pos++];
3641 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3643 /* Convert the communicated shift from float to int */
3644 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3647 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3651 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3652 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3653 gmx_bool bUniform, gmx_int64_t step)
3655 gmx_domdec_comm_t *comm;
3657 gmx_bool bRowMember, bRowRoot;
3662 for (d = 0; d < dd->ndim; d++)
3667 for (d1 = d; d1 < dd->ndim; d1++)
3669 if (dd->ci[dd->dim[d1]] > 0)
3682 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3683 ddbox, bDynamicBox, bUniform, step);
3684 cell_f_row = comm->root[d]->cell_f;
3688 cell_f_row = comm->cell_f_row;
3690 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3695 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3699 /* This function assumes the box is static and should therefore
3700 * not be called when the box has changed since the last
3701 * call to dd_partition_system.
3703 for (d = 0; d < dd->ndim; d++)
3705 relative_to_absolute_cell_bounds(dd, ddbox, d);
3711 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3712 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3713 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3714 gmx_wallcycle_t wcycle)
3716 gmx_domdec_comm_t *comm;
3723 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3724 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3725 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3727 else if (bDynamicBox)
3729 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3732 /* Set the dimensions for which no DD is used */
3733 for (dim = 0; dim < DIM; dim++)
3735 if (dd->nc[dim] == 1)
3737 comm->cell_x0[dim] = 0;
3738 comm->cell_x1[dim] = ddbox->box_size[dim];
3739 if (dim >= ddbox->nboundeddim)
3741 comm->cell_x0[dim] += ddbox->box0[dim];
3742 comm->cell_x1[dim] += ddbox->box0[dim];
3748 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3751 gmx_domdec_comm_dim_t *cd;
3753 for (d = 0; d < dd->ndim; d++)
3755 cd = &dd->comm->cd[d];
3756 np = npulse[dd->dim[d]];
3757 if (np > cd->np_nalloc)
3761 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3762 dim2char(dd->dim[d]), np);
3764 if (DDMASTER(dd) && cd->np_nalloc > 0)
3766 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3768 srenew(cd->ind, np);
3769 for (i = cd->np_nalloc; i < np; i++)
3771 cd->ind[i].index = NULL;
3772 cd->ind[i].nalloc = 0;
3781 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3782 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3783 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3784 gmx_wallcycle_t wcycle)
3786 gmx_domdec_comm_t *comm;
3792 /* Copy the old cell boundaries for the cg displacement check */
3793 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3794 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3796 if (comm->bDynLoadBal)
3800 check_box_size(dd, ddbox);
3802 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3806 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbLOCAL, npulse);
3807 realloc_comm_ind(dd, npulse);
3812 for (d = 0; d < DIM; d++)
3814 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3815 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3820 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3822 rvec cell_ns_x0, rvec cell_ns_x1,
3825 gmx_domdec_comm_t *comm;
3830 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3832 dim = dd->dim[dim_ind];
3834 /* Without PBC we don't have restrictions on the outer cells */
3835 if (!(dim >= ddbox->npbcdim &&
3836 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3837 comm->bDynLoadBal &&
3838 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3839 comm->cellsize_min[dim])
3842 gmx_fatal(FARGS, "Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3843 gmx_step_str(step, buf), dim2char(dim),
3844 comm->cell_x1[dim] - comm->cell_x0[dim],
3845 ddbox->skew_fac[dim],
3846 dd->comm->cellsize_min[dim],
3847 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3851 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3853 /* Communicate the boundaries and update cell_ns_x0/1 */
3854 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3855 if (dd->bGridJump && dd->ndim > 1)
3857 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3862 static void make_tric_corr_matrix(int npbcdim, matrix box, matrix tcm)
3866 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3874 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3875 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3884 static void check_screw_box(matrix box)
3886 /* Mathematical limitation */
3887 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3889 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3892 /* Limitation due to the asymmetry of the eighth shell method */
3893 if (box[ZZ][YY] != 0)
3895 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3899 static void distribute_cg(FILE *fplog, gmx_int64_t step,
3900 matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3903 gmx_domdec_master_t *ma;
3904 int **tmp_ind = NULL, *tmp_nalloc = NULL;
3905 int i, icg, j, k, k0, k1, d, npbcdim;
3907 rvec box_size, cg_cm;
3909 real nrcg, inv_ncg, pos_d;
3911 gmx_bool bUnbounded, bScrew;
3915 if (tmp_ind == NULL)
3917 snew(tmp_nalloc, dd->nnodes);
3918 snew(tmp_ind, dd->nnodes);
3919 for (i = 0; i < dd->nnodes; i++)
3921 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3922 snew(tmp_ind[i], tmp_nalloc[i]);
3926 /* Clear the count */
3927 for (i = 0; i < dd->nnodes; i++)
3933 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3935 cgindex = cgs->index;
3937 /* Compute the center of geometry for all charge groups */
3938 for (icg = 0; icg < cgs->nr; icg++)
3941 k1 = cgindex[icg+1];
3945 copy_rvec(pos[k0], cg_cm);
3952 for (k = k0; (k < k1); k++)
3954 rvec_inc(cg_cm, pos[k]);
3956 for (d = 0; (d < DIM); d++)
3958 cg_cm[d] *= inv_ncg;
3961 /* Put the charge group in the box and determine the cell index */
3962 for (d = DIM-1; d >= 0; d--)
3965 if (d < dd->npbcdim)
3967 bScrew = (dd->bScrewPBC && d == XX);
3968 if (tric_dir[d] && dd->nc[d] > 1)
3970 /* Use triclinic coordintates for this dimension */
3971 for (j = d+1; j < DIM; j++)
3973 pos_d += cg_cm[j]*tcm[j][d];
3976 while (pos_d >= box[d][d])
3979 rvec_dec(cg_cm, box[d]);
3982 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3983 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3985 for (k = k0; (k < k1); k++)
3987 rvec_dec(pos[k], box[d]);
3990 pos[k][YY] = box[YY][YY] - pos[k][YY];
3991 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3998 rvec_inc(cg_cm, box[d]);
4001 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
4002 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
4004 for (k = k0; (k < k1); k++)
4006 rvec_inc(pos[k], box[d]);
4009 pos[k][YY] = box[YY][YY] - pos[k][YY];
4010 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4015 /* This could be done more efficiently */
4017 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
4022 i = dd_index(dd->nc, ind);
4023 if (ma->ncg[i] == tmp_nalloc[i])
4025 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
4026 srenew(tmp_ind[i], tmp_nalloc[i]);
4028 tmp_ind[i][ma->ncg[i]] = icg;
4030 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
4034 for (i = 0; i < dd->nnodes; i++)
4037 for (k = 0; k < ma->ncg[i]; k++)
4039 ma->cg[k1++] = tmp_ind[i][k];
4042 ma->index[dd->nnodes] = k1;
4044 for (i = 0; i < dd->nnodes; i++)
4054 fprintf(fplog, "Charge group distribution at step %s:",
4055 gmx_step_str(step, buf));
4056 for (i = 0; i < dd->nnodes; i++)
4058 fprintf(fplog, " %d", ma->ncg[i]);
4060 fprintf(fplog, "\n");
4064 static void get_cg_distribution(FILE *fplog, gmx_int64_t step, gmx_domdec_t *dd,
4065 t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
4068 gmx_domdec_master_t *ma = NULL;
4071 int *ibuf, buf2[2] = { 0, 0 };
4072 gmx_bool bMaster = DDMASTER(dd);
4080 check_screw_box(box);
4083 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbMASTER, npulse);
4085 distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
4086 for (i = 0; i < dd->nnodes; i++)
4088 ma->ibuf[2*i] = ma->ncg[i];
4089 ma->ibuf[2*i+1] = ma->nat[i];
4097 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
4099 dd->ncg_home = buf2[0];
4100 dd->nat_home = buf2[1];
4101 dd->ncg_tot = dd->ncg_home;
4102 dd->nat_tot = dd->nat_home;
4103 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
4105 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
4106 srenew(dd->index_gl, dd->cg_nalloc);
4107 srenew(dd->cgindex, dd->cg_nalloc+1);
4111 for (i = 0; i < dd->nnodes; i++)
4113 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
4114 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
4119 DDMASTER(dd) ? ma->ibuf : NULL,
4120 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
4121 DDMASTER(dd) ? ma->cg : NULL,
4122 dd->ncg_home*sizeof(int), dd->index_gl);
4124 /* Determine the home charge group sizes */
4126 for (i = 0; i < dd->ncg_home; i++)
4128 cg_gl = dd->index_gl[i];
4130 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
4135 fprintf(debug, "Home charge groups:\n");
4136 for (i = 0; i < dd->ncg_home; i++)
4138 fprintf(debug, " %d", dd->index_gl[i]);
4141 fprintf(debug, "\n");
4144 fprintf(debug, "\n");
4148 static int compact_and_copy_vec_at(int ncg, int *move,
4151 rvec *src, gmx_domdec_comm_t *comm,
4154 int m, icg, i, i0, i1, nrcg;
4160 for (m = 0; m < DIM*2; m++)
4166 for (icg = 0; icg < ncg; icg++)
4168 i1 = cgindex[icg+1];
4174 /* Compact the home array in place */
4175 for (i = i0; i < i1; i++)
4177 copy_rvec(src[i], src[home_pos++]);
4183 /* Copy to the communication buffer */
4185 pos_vec[m] += 1 + vec*nrcg;
4186 for (i = i0; i < i1; i++)
4188 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
4190 pos_vec[m] += (nvec - vec - 1)*nrcg;
4194 home_pos += i1 - i0;
4202 static int compact_and_copy_vec_cg(int ncg, int *move,
4204 int nvec, rvec *src, gmx_domdec_comm_t *comm,
4207 int m, icg, i0, i1, nrcg;
4213 for (m = 0; m < DIM*2; m++)
4219 for (icg = 0; icg < ncg; icg++)
4221 i1 = cgindex[icg+1];
4227 /* Compact the home array in place */
4228 copy_rvec(src[icg], src[home_pos++]);
4234 /* Copy to the communication buffer */
4235 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
4236 pos_vec[m] += 1 + nrcg*nvec;
4248 static int compact_ind(int ncg, int *move,
4249 int *index_gl, int *cgindex,
4251 gmx_ga2la_t ga2la, char *bLocalCG,
4254 int cg, nat, a0, a1, a, a_gl;
4259 for (cg = 0; cg < ncg; cg++)
4265 /* Compact the home arrays in place.
4266 * Anything that can be done here avoids access to global arrays.
4268 cgindex[home_pos] = nat;
4269 for (a = a0; a < a1; a++)
4272 gatindex[nat] = a_gl;
4273 /* The cell number stays 0, so we don't need to set it */
4274 ga2la_change_la(ga2la, a_gl, nat);
4277 index_gl[home_pos] = index_gl[cg];
4278 cginfo[home_pos] = cginfo[cg];
4279 /* The charge group remains local, so bLocalCG does not change */
4284 /* Clear the global indices */
4285 for (a = a0; a < a1; a++)
4287 ga2la_del(ga2la, gatindex[a]);
4291 bLocalCG[index_gl[cg]] = FALSE;
4295 cgindex[home_pos] = nat;
4300 static void clear_and_mark_ind(int ncg, int *move,
4301 int *index_gl, int *cgindex, int *gatindex,
4302 gmx_ga2la_t ga2la, char *bLocalCG,
4307 for (cg = 0; cg < ncg; cg++)
4313 /* Clear the global indices */
4314 for (a = a0; a < a1; a++)
4316 ga2la_del(ga2la, gatindex[a]);
4320 bLocalCG[index_gl[cg]] = FALSE;
4322 /* Signal that this cg has moved using the ns cell index.
4323 * Here we set it to -1. fill_grid will change it
4324 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4326 cell_index[cg] = -1;
4331 static void print_cg_move(FILE *fplog,
4333 gmx_int64_t step, int cg, int dim, int dir,
4334 gmx_bool bHaveLimitdAndCMOld, real limitd,
4335 rvec cm_old, rvec cm_new, real pos_d)
4337 gmx_domdec_comm_t *comm;
4342 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4343 if (bHaveLimitdAndCMOld)
4345 fprintf(fplog, "The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4346 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4350 fprintf(fplog, "The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4351 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4353 fprintf(fplog, "distance out of cell %f\n",
4354 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4355 if (bHaveLimitdAndCMOld)
4357 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4358 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4360 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4361 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4362 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4364 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4365 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4367 comm->cell_x0[dim], comm->cell_x1[dim]);
4370 static void cg_move_error(FILE *fplog,
4372 gmx_int64_t step, int cg, int dim, int dir,
4373 gmx_bool bHaveLimitdAndCMOld, real limitd,
4374 rvec cm_old, rvec cm_new, real pos_d)
4378 print_cg_move(fplog, dd, step, cg, dim, dir,
4379 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4381 print_cg_move(stderr, dd, step, cg, dim, dir,
4382 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4384 "A charge group moved too far between two domain decomposition steps\n"
4385 "This usually means that your system is not well equilibrated");
4388 static void rotate_state_atom(t_state *state, int a)
4392 for (est = 0; est < estNR; est++)
4394 if (EST_DISTR(est) && (state->flags & (1<<est)))
4399 /* Rotate the complete state; for a rectangular box only */
4400 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4401 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4404 state->v[a][YY] = -state->v[a][YY];
4405 state->v[a][ZZ] = -state->v[a][ZZ];
4408 state->sd_X[a][YY] = -state->sd_X[a][YY];
4409 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4412 state->cg_p[a][YY] = -state->cg_p[a][YY];
4413 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4415 case estDISRE_INITF:
4416 case estDISRE_RM3TAV:
4417 case estORIRE_INITF:
4419 /* These are distances, so not affected by rotation */
4422 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4428 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4430 if (natoms > comm->moved_nalloc)
4432 /* Contents should be preserved here */
4433 comm->moved_nalloc = over_alloc_dd(natoms);
4434 srenew(comm->moved, comm->moved_nalloc);
4440 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
4443 ivec tric_dir, matrix tcm,
4444 rvec cell_x0, rvec cell_x1,
4445 rvec limitd, rvec limit0, rvec limit1,
4447 int cg_start, int cg_end,
4452 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4453 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4457 real inv_ncg, pos_d;
4460 npbcdim = dd->npbcdim;
4462 for (cg = cg_start; cg < cg_end; cg++)
4469 copy_rvec(state->x[k0], cm_new);
4476 for (k = k0; (k < k1); k++)
4478 rvec_inc(cm_new, state->x[k]);
4480 for (d = 0; (d < DIM); d++)
4482 cm_new[d] = inv_ncg*cm_new[d];
4487 /* Do pbc and check DD cell boundary crossings */
4488 for (d = DIM-1; d >= 0; d--)
4492 bScrew = (dd->bScrewPBC && d == XX);
4493 /* Determine the location of this cg in lattice coordinates */
4497 for (d2 = d+1; d2 < DIM; d2++)
4499 pos_d += cm_new[d2]*tcm[d2][d];
4502 /* Put the charge group in the triclinic unit-cell */
4503 if (pos_d >= cell_x1[d])
4505 if (pos_d >= limit1[d])
4507 cg_move_error(fplog, dd, step, cg, d, 1, TRUE, limitd[d],
4508 cg_cm[cg], cm_new, pos_d);
4511 if (dd->ci[d] == dd->nc[d] - 1)
4513 rvec_dec(cm_new, state->box[d]);
4516 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4517 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4519 for (k = k0; (k < k1); k++)
4521 rvec_dec(state->x[k], state->box[d]);
4524 rotate_state_atom(state, k);
4529 else if (pos_d < cell_x0[d])
4531 if (pos_d < limit0[d])
4533 cg_move_error(fplog, dd, step, cg, d, -1, TRUE, limitd[d],
4534 cg_cm[cg], cm_new, pos_d);
4539 rvec_inc(cm_new, state->box[d]);
4542 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4543 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4545 for (k = k0; (k < k1); k++)
4547 rvec_inc(state->x[k], state->box[d]);
4550 rotate_state_atom(state, k);
4556 else if (d < npbcdim)
4558 /* Put the charge group in the rectangular unit-cell */
4559 while (cm_new[d] >= state->box[d][d])
4561 rvec_dec(cm_new, state->box[d]);
4562 for (k = k0; (k < k1); k++)
4564 rvec_dec(state->x[k], state->box[d]);
4567 while (cm_new[d] < 0)
4569 rvec_inc(cm_new, state->box[d]);
4570 for (k = k0; (k < k1); k++)
4572 rvec_inc(state->x[k], state->box[d]);
4578 copy_rvec(cm_new, cg_cm[cg]);
4580 /* Determine where this cg should go */
4583 for (d = 0; d < dd->ndim; d++)
4588 flag |= DD_FLAG_FW(d);
4594 else if (dev[dim] == -1)
4596 flag |= DD_FLAG_BW(d);
4599 if (dd->nc[dim] > 2)
4610 /* Temporarily store the flag in move */
4611 move[cg] = mc + flag;
4615 static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
4616 gmx_domdec_t *dd, ivec tric_dir,
4617 t_state *state, rvec **f,
4626 int ncg[DIM*2], nat[DIM*2];
4627 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4628 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4629 int sbuf[2], rbuf[2];
4630 int home_pos_cg, home_pos_at, buf_pos;
4632 gmx_bool bV = FALSE, bSDX = FALSE, bCGP = FALSE;
4635 real inv_ncg, pos_d;
4637 rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
4639 cginfo_mb_t *cginfo_mb;
4640 gmx_domdec_comm_t *comm;
4642 int nthread, thread;
4646 check_screw_box(state->box);
4650 if (fr->cutoff_scheme == ecutsGROUP)
4655 for (i = 0; i < estNR; i++)
4661 case estX: /* Always present */ break;
4662 case estV: bV = (state->flags & (1<<i)); break;
4663 case estSDX: bSDX = (state->flags & (1<<i)); break;
4664 case estCGP: bCGP = (state->flags & (1<<i)); break;
4667 case estDISRE_INITF:
4668 case estDISRE_RM3TAV:
4669 case estORIRE_INITF:
4671 /* No processing required */
4674 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4679 if (dd->ncg_tot > comm->nalloc_int)
4681 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4682 srenew(comm->buf_int, comm->nalloc_int);
4684 move = comm->buf_int;
4686 /* Clear the count */
4687 for (c = 0; c < dd->ndim*2; c++)
4693 npbcdim = dd->npbcdim;
4695 for (d = 0; (d < DIM); d++)
4697 limitd[d] = dd->comm->cellsize_min[d];
4698 if (d >= npbcdim && dd->ci[d] == 0)
4700 cell_x0[d] = -GMX_FLOAT_MAX;
4704 cell_x0[d] = comm->cell_x0[d];
4706 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4708 cell_x1[d] = GMX_FLOAT_MAX;
4712 cell_x1[d] = comm->cell_x1[d];
4716 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4717 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4721 /* We check after communication if a charge group moved
4722 * more than one cell. Set the pre-comm check limit to float_max.
4724 limit0[d] = -GMX_FLOAT_MAX;
4725 limit1[d] = GMX_FLOAT_MAX;
4729 make_tric_corr_matrix(npbcdim, state->box, tcm);
4731 cgindex = dd->cgindex;
4733 nthread = gmx_omp_nthreads_get(emntDomdec);
4735 /* Compute the center of geometry for all home charge groups
4736 * and put them in the box and determine where they should go.
4738 #pragma omp parallel for num_threads(nthread) schedule(static)
4739 for (thread = 0; thread < nthread; thread++)
4741 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4742 cell_x0, cell_x1, limitd, limit0, limit1,
4744 ( thread *dd->ncg_home)/nthread,
4745 ((thread+1)*dd->ncg_home)/nthread,
4746 fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
4750 for (cg = 0; cg < dd->ncg_home; cg++)
4755 flag = mc & ~DD_FLAG_NRCG;
4756 mc = mc & DD_FLAG_NRCG;
4759 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4761 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4762 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4764 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4765 /* We store the cg size in the lower 16 bits
4766 * and the place where the charge group should go
4767 * in the next 6 bits. This saves some communication volume.
4769 nrcg = cgindex[cg+1] - cgindex[cg];
4770 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4776 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4777 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4780 for (i = 0; i < dd->ndim*2; i++)
4782 *ncg_moved += ncg[i];
4799 /* Make sure the communication buffers are large enough */
4800 for (mc = 0; mc < dd->ndim*2; mc++)
4802 nvr = ncg[mc] + nat[mc]*nvec;
4803 if (nvr > comm->cgcm_state_nalloc[mc])
4805 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4806 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4810 switch (fr->cutoff_scheme)
4813 /* Recalculating cg_cm might be cheaper than communicating,
4814 * but that could give rise to rounding issues.
4817 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4818 nvec, cg_cm, comm, bCompact);
4821 /* Without charge groups we send the moved atom coordinates
4822 * over twice. This is so the code below can be used without
4823 * many conditionals for both for with and without charge groups.
4826 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4827 nvec, state->x, comm, FALSE);
4830 home_pos_cg -= *ncg_moved;
4834 gmx_incons("unimplemented");
4840 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4841 nvec, vec++, state->x, comm, bCompact);
4844 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4845 nvec, vec++, state->v, comm, bCompact);
4849 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4850 nvec, vec++, state->sd_X, comm, bCompact);
4854 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4855 nvec, vec++, state->cg_p, comm, bCompact);
4860 compact_ind(dd->ncg_home, move,
4861 dd->index_gl, dd->cgindex, dd->gatindex,
4862 dd->ga2la, comm->bLocalCG,
4867 if (fr->cutoff_scheme == ecutsVERLET)
4869 moved = get_moved(comm, dd->ncg_home);
4871 for (k = 0; k < dd->ncg_home; k++)
4878 moved = fr->ns.grid->cell_index;
4881 clear_and_mark_ind(dd->ncg_home, move,
4882 dd->index_gl, dd->cgindex, dd->gatindex,
4883 dd->ga2la, comm->bLocalCG,
4887 cginfo_mb = fr->cginfo_mb;
4889 *ncg_stay_home = home_pos_cg;
4890 for (d = 0; d < dd->ndim; d++)
4896 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4899 /* Communicate the cg and atom counts */
4904 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4905 d, dir, sbuf[0], sbuf[1]);
4907 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4909 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4911 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4912 srenew(comm->buf_int, comm->nalloc_int);
4915 /* Communicate the charge group indices, sizes and flags */
4916 dd_sendrecv_int(dd, d, dir,
4917 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4918 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4920 nvs = ncg[cdd] + nat[cdd]*nvec;
4921 i = rbuf[0] + rbuf[1] *nvec;
4922 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4924 /* Communicate cgcm and state */
4925 dd_sendrecv_rvec(dd, d, dir,
4926 comm->cgcm_state[cdd], nvs,
4927 comm->vbuf.v+nvr, i);
4928 ncg_recv += rbuf[0];
4929 nat_recv += rbuf[1];
4933 /* Process the received charge groups */
4935 for (cg = 0; cg < ncg_recv; cg++)
4937 flag = comm->buf_int[cg*DD_CGIBS+1];
4939 if (dim >= npbcdim && dd->nc[dim] > 2)
4941 /* No pbc in this dim and more than one domain boundary.
4942 * We do a separate check if a charge group didn't move too far.
4944 if (((flag & DD_FLAG_FW(d)) &&
4945 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4946 ((flag & DD_FLAG_BW(d)) &&
4947 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4949 cg_move_error(fplog, dd, step, cg, dim,
4950 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4952 comm->vbuf.v[buf_pos],
4953 comm->vbuf.v[buf_pos],
4954 comm->vbuf.v[buf_pos][dim]);
4961 /* Check which direction this cg should go */
4962 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4966 /* The cell boundaries for dimension d2 are not equal
4967 * for each cell row of the lower dimension(s),
4968 * therefore we might need to redetermine where
4969 * this cg should go.
4972 /* If this cg crosses the box boundary in dimension d2
4973 * we can use the communicated flag, so we do not
4974 * have to worry about pbc.
4976 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4977 (flag & DD_FLAG_FW(d2))) ||
4978 (dd->ci[dim2] == 0 &&
4979 (flag & DD_FLAG_BW(d2)))))
4981 /* Clear the two flags for this dimension */
4982 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4983 /* Determine the location of this cg
4984 * in lattice coordinates
4986 pos_d = comm->vbuf.v[buf_pos][dim2];
4989 for (d3 = dim2+1; d3 < DIM; d3++)
4992 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
4995 /* Check of we are not at the box edge.
4996 * pbc is only handled in the first step above,
4997 * but this check could move over pbc while
4998 * the first step did not due to different rounding.
5000 if (pos_d >= cell_x1[dim2] &&
5001 dd->ci[dim2] != dd->nc[dim2]-1)
5003 flag |= DD_FLAG_FW(d2);
5005 else if (pos_d < cell_x0[dim2] &&
5008 flag |= DD_FLAG_BW(d2);
5010 comm->buf_int[cg*DD_CGIBS+1] = flag;
5013 /* Set to which neighboring cell this cg should go */
5014 if (flag & DD_FLAG_FW(d2))
5018 else if (flag & DD_FLAG_BW(d2))
5020 if (dd->nc[dd->dim[d2]] > 2)
5032 nrcg = flag & DD_FLAG_NRCG;
5035 if (home_pos_cg+1 > dd->cg_nalloc)
5037 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
5038 srenew(dd->index_gl, dd->cg_nalloc);
5039 srenew(dd->cgindex, dd->cg_nalloc+1);
5041 /* Set the global charge group index and size */
5042 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
5043 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
5044 /* Copy the state from the buffer */
5045 dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
5046 if (fr->cutoff_scheme == ecutsGROUP)
5049 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
5053 /* Set the cginfo */
5054 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
5055 dd->index_gl[home_pos_cg]);
5058 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
5061 if (home_pos_at+nrcg > state->nalloc)
5063 dd_realloc_state(state, f, home_pos_at+nrcg);
5065 for (i = 0; i < nrcg; i++)
5067 copy_rvec(comm->vbuf.v[buf_pos++],
5068 state->x[home_pos_at+i]);
5072 for (i = 0; i < nrcg; i++)
5074 copy_rvec(comm->vbuf.v[buf_pos++],
5075 state->v[home_pos_at+i]);
5080 for (i = 0; i < nrcg; i++)
5082 copy_rvec(comm->vbuf.v[buf_pos++],
5083 state->sd_X[home_pos_at+i]);
5088 for (i = 0; i < nrcg; i++)
5090 copy_rvec(comm->vbuf.v[buf_pos++],
5091 state->cg_p[home_pos_at+i]);
5095 home_pos_at += nrcg;
5099 /* Reallocate the buffers if necessary */
5100 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
5102 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
5103 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
5105 nvr = ncg[mc] + nat[mc]*nvec;
5106 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
5108 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
5109 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
5111 /* Copy from the receive to the send buffers */
5112 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
5113 comm->buf_int + cg*DD_CGIBS,
5114 DD_CGIBS*sizeof(int));
5115 memcpy(comm->cgcm_state[mc][nvr],
5116 comm->vbuf.v[buf_pos],
5117 (1+nrcg*nvec)*sizeof(rvec));
5118 buf_pos += 1 + nrcg*nvec;
5125 /* With sorting (!bCompact) the indices are now only partially up to date
5126 * and ncg_home and nat_home are not the real count, since there are
5127 * "holes" in the arrays for the charge groups that moved to neighbors.
5129 if (fr->cutoff_scheme == ecutsVERLET)
5131 moved = get_moved(comm, home_pos_cg);
5133 for (i = dd->ncg_home; i < home_pos_cg; i++)
5138 dd->ncg_home = home_pos_cg;
5139 dd->nat_home = home_pos_at;
5144 "Finished repartitioning: cgs moved out %d, new home %d\n",
5145 *ncg_moved, dd->ncg_home-*ncg_moved);
5150 void dd_cycles_add(gmx_domdec_t *dd, float cycles, int ddCycl)
5152 dd->comm->cycl[ddCycl] += cycles;
5153 dd->comm->cycl_n[ddCycl]++;
5154 if (cycles > dd->comm->cycl_max[ddCycl])
5156 dd->comm->cycl_max[ddCycl] = cycles;
5160 static double force_flop_count(t_nrnb *nrnb)
5167 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
5169 /* To get closer to the real timings, we half the count
5170 * for the normal loops and again half it for water loops.
5173 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5175 sum += nrnb->n[i]*0.25*cost_nrnb(i);
5179 sum += nrnb->n[i]*0.50*cost_nrnb(i);
5182 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
5185 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5187 sum += nrnb->n[i]*cost_nrnb(i);
5190 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
5192 sum += nrnb->n[i]*cost_nrnb(i);
5198 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
5200 if (dd->comm->eFlop)
5202 dd->comm->flop -= force_flop_count(nrnb);
5205 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
5207 if (dd->comm->eFlop)
5209 dd->comm->flop += force_flop_count(nrnb);
5214 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
5218 for (i = 0; i < ddCyclNr; i++)
5220 dd->comm->cycl[i] = 0;
5221 dd->comm->cycl_n[i] = 0;
5222 dd->comm->cycl_max[i] = 0;
5225 dd->comm->flop_n = 0;
5228 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
5230 gmx_domdec_comm_t *comm;
5231 gmx_domdec_load_t *load;
5232 gmx_domdec_root_t *root = NULL;
5233 int d, dim, cid, i, pos;
5234 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
5239 fprintf(debug, "get_load_distribution start\n");
5242 wallcycle_start(wcycle, ewcDDCOMMLOAD);
5246 bSepPME = (dd->pme_nodeid >= 0);
5248 for (d = dd->ndim-1; d >= 0; d--)
5251 /* Check if we participate in the communication in this dimension */
5252 if (d == dd->ndim-1 ||
5253 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
5255 load = &comm->load[d];
5258 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
5261 if (d == dd->ndim-1)
5263 sbuf[pos++] = dd_force_load(comm);
5264 sbuf[pos++] = sbuf[0];
5267 sbuf[pos++] = sbuf[0];
5268 sbuf[pos++] = cell_frac;
5271 sbuf[pos++] = comm->cell_f_max0[d];
5272 sbuf[pos++] = comm->cell_f_min1[d];
5277 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
5278 sbuf[pos++] = comm->cycl[ddCyclPME];
5283 sbuf[pos++] = comm->load[d+1].sum;
5284 sbuf[pos++] = comm->load[d+1].max;
5287 sbuf[pos++] = comm->load[d+1].sum_m;
5288 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
5289 sbuf[pos++] = comm->load[d+1].flags;
5292 sbuf[pos++] = comm->cell_f_max0[d];
5293 sbuf[pos++] = comm->cell_f_min1[d];
5298 sbuf[pos++] = comm->load[d+1].mdf;
5299 sbuf[pos++] = comm->load[d+1].pme;
5303 /* Communicate a row in DD direction d.
5304 * The communicators are setup such that the root always has rank 0.
5307 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
5308 load->load, load->nload*sizeof(float), MPI_BYTE,
5309 0, comm->mpi_comm_load[d]);
5311 if (dd->ci[dim] == dd->master_ci[dim])
5313 /* We are the root, process this row */
5314 if (comm->bDynLoadBal)
5316 root = comm->root[d];
5326 for (i = 0; i < dd->nc[dim]; i++)
5328 load->sum += load->load[pos++];
5329 load->max = max(load->max, load->load[pos]);
5335 /* This direction could not be load balanced properly,
5336 * therefore we need to use the maximum iso the average load.
5338 load->sum_m = max(load->sum_m, load->load[pos]);
5342 load->sum_m += load->load[pos];
5345 load->cvol_min = min(load->cvol_min, load->load[pos]);
5349 load->flags = (int)(load->load[pos++] + 0.5);
5353 root->cell_f_max0[i] = load->load[pos++];
5354 root->cell_f_min1[i] = load->load[pos++];
5359 load->mdf = max(load->mdf, load->load[pos]);
5361 load->pme = max(load->pme, load->load[pos]);
5365 if (comm->bDynLoadBal && root->bLimited)
5367 load->sum_m *= dd->nc[dim];
5368 load->flags |= (1<<d);
5376 comm->nload += dd_load_count(comm);
5377 comm->load_step += comm->cycl[ddCyclStep];
5378 comm->load_sum += comm->load[0].sum;
5379 comm->load_max += comm->load[0].max;
5380 if (comm->bDynLoadBal)
5382 for (d = 0; d < dd->ndim; d++)
5384 if (comm->load[0].flags & (1<<d))
5386 comm->load_lim[d]++;
5392 comm->load_mdf += comm->load[0].mdf;
5393 comm->load_pme += comm->load[0].pme;
5397 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5401 fprintf(debug, "get_load_distribution finished\n");
5405 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5407 /* Return the relative performance loss on the total run time
5408 * due to the force calculation load imbalance.
5410 if (dd->comm->nload > 0)
5413 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5414 (dd->comm->load_step*dd->nnodes);
5422 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5425 int npp, npme, nnodes, d, limp;
5426 float imbal, pme_f_ratio, lossf, lossp = 0;
5428 gmx_domdec_comm_t *comm;
5431 if (DDMASTER(dd) && comm->nload > 0)
5434 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5435 nnodes = npp + npme;
5436 imbal = comm->load_max*npp/comm->load_sum - 1;
5437 lossf = dd_force_imb_perf_loss(dd);
5438 sprintf(buf, " Average load imbalance: %.1f %%\n", imbal*100);
5439 fprintf(fplog, "%s", buf);
5440 fprintf(stderr, "\n");
5441 fprintf(stderr, "%s", buf);
5442 sprintf(buf, " Part of the total run time spent waiting due to load imbalance: %.1f %%\n", lossf*100);
5443 fprintf(fplog, "%s", buf);
5444 fprintf(stderr, "%s", buf);
5446 if (comm->bDynLoadBal)
5448 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5449 for (d = 0; d < dd->ndim; d++)
5451 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5452 sprintf(buf+strlen(buf), " %c %d %%", dim2char(dd->dim[d]), limp);
5458 sprintf(buf+strlen(buf), "\n");
5459 fprintf(fplog, "%s", buf);
5460 fprintf(stderr, "%s", buf);
5464 pme_f_ratio = comm->load_pme/comm->load_mdf;
5465 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5468 lossp *= (float)npme/(float)nnodes;
5472 lossp *= (float)npp/(float)nnodes;
5474 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pme_f_ratio);
5475 fprintf(fplog, "%s", buf);
5476 fprintf(stderr, "%s", buf);
5477 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossp)*100);
5478 fprintf(fplog, "%s", buf);
5479 fprintf(stderr, "%s", buf);
5481 fprintf(fplog, "\n");
5482 fprintf(stderr, "\n");
5484 if (lossf >= DD_PERF_LOSS_WARN)
5487 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5488 " in the domain decomposition.\n", lossf*100);
5489 if (!comm->bDynLoadBal)
5491 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5495 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5497 fprintf(fplog, "%s\n", buf);
5498 fprintf(stderr, "%s\n", buf);
5500 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS_WARN)
5503 "NOTE: %.1f %% performance was lost because the PME ranks\n"
5504 " had %s work to do than the PP ranks.\n"
5505 " You might want to %s the number of PME ranks\n"
5506 " or %s the cut-off and the grid spacing.\n",
5508 (lossp < 0) ? "less" : "more",
5509 (lossp < 0) ? "decrease" : "increase",
5510 (lossp < 0) ? "decrease" : "increase");
5511 fprintf(fplog, "%s\n", buf);
5512 fprintf(stderr, "%s\n", buf);
5517 static float dd_vol_min(gmx_domdec_t *dd)
5519 return dd->comm->load[0].cvol_min*dd->nnodes;
5522 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5524 return dd->comm->load[0].flags;
5527 static float dd_f_imbal(gmx_domdec_t *dd)
5529 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5532 float dd_pme_f_ratio(gmx_domdec_t *dd)
5534 if (dd->comm->cycl_n[ddCyclPME] > 0)
5536 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5544 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
5549 flags = dd_load_flags(dd);
5553 "DD load balancing is limited by minimum cell size in dimension");
5554 for (d = 0; d < dd->ndim; d++)
5558 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5561 fprintf(fplog, "\n");
5563 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5564 if (dd->comm->bDynLoadBal)
5566 fprintf(fplog, " vol min/aver %5.3f%c",
5567 dd_vol_min(dd), flags ? '!' : ' ');
5569 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5570 if (dd->comm->cycl_n[ddCyclPME])
5572 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5574 fprintf(fplog, "\n\n");
5577 static void dd_print_load_verbose(gmx_domdec_t *dd)
5579 if (dd->comm->bDynLoadBal)
5581 fprintf(stderr, "vol %4.2f%c ",
5582 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5584 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5585 if (dd->comm->cycl_n[ddCyclPME])
5587 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5592 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5597 gmx_domdec_root_t *root;
5598 gmx_bool bPartOfGroup = FALSE;
5600 dim = dd->dim[dim_ind];
5601 copy_ivec(loc, loc_c);
5602 for (i = 0; i < dd->nc[dim]; i++)
5605 rank = dd_index(dd->nc, loc_c);
5606 if (rank == dd->rank)
5608 /* This process is part of the group */
5609 bPartOfGroup = TRUE;
5612 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5616 dd->comm->mpi_comm_load[dim_ind] = c_row;
5617 if (dd->comm->eDLB != edlbNO)
5619 if (dd->ci[dim] == dd->master_ci[dim])
5621 /* This is the root process of this row */
5622 snew(dd->comm->root[dim_ind], 1);
5623 root = dd->comm->root[dim_ind];
5624 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5625 snew(root->old_cell_f, dd->nc[dim]+1);
5626 snew(root->bCellMin, dd->nc[dim]);
5629 snew(root->cell_f_max0, dd->nc[dim]);
5630 snew(root->cell_f_min1, dd->nc[dim]);
5631 snew(root->bound_min, dd->nc[dim]);
5632 snew(root->bound_max, dd->nc[dim]);
5634 snew(root->buf_ncd, dd->nc[dim]);
5638 /* This is not a root process, we only need to receive cell_f */
5639 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5642 if (dd->ci[dim] == dd->master_ci[dim])
5644 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5650 void dd_setup_dlb_resource_sharing(t_commrec gmx_unused *cr,
5651 const gmx_hw_info_t gmx_unused *hwinfo,
5652 const gmx_hw_opt_t gmx_unused *hw_opt)
5655 int physicalnode_id_hash;
5658 MPI_Comm mpi_comm_pp_physicalnode;
5660 if (!(cr->duty & DUTY_PP) ||
5661 hw_opt->gpu_opt.ncuda_dev_use == 0)
5663 /* Only PP nodes (currently) use GPUs.
5664 * If we don't have GPUs, there are no resources to share.
5669 physicalnode_id_hash = gmx_physicalnode_id_hash();
5671 gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
5677 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5678 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5679 dd->rank, physicalnode_id_hash, gpu_id);
5681 /* Split the PP communicator over the physical nodes */
5682 /* TODO: See if we should store this (before), as it's also used for
5683 * for the nodecomm summution.
5685 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5686 &mpi_comm_pp_physicalnode);
5687 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5688 &dd->comm->mpi_comm_gpu_shared);
5689 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5690 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5694 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5697 /* Note that some ranks could share a GPU, while others don't */
5699 if (dd->comm->nrank_gpu_shared == 1)
5701 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5706 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
5709 int dim0, dim1, i, j;
5714 fprintf(debug, "Making load communicators\n");
5717 snew(dd->comm->load, dd->ndim);
5718 snew(dd->comm->mpi_comm_load, dd->ndim);
5721 make_load_communicator(dd, 0, loc);
5725 for (i = 0; i < dd->nc[dim0]; i++)
5728 make_load_communicator(dd, 1, loc);
5734 for (i = 0; i < dd->nc[dim0]; i++)
5738 for (j = 0; j < dd->nc[dim1]; j++)
5741 make_load_communicator(dd, 2, loc);
5748 fprintf(debug, "Finished making load communicators\n");
5753 void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
5756 int d, dim, i, j, m;
5759 ivec dd_zp[DD_MAXIZONE];
5760 gmx_domdec_zones_t *zones;
5761 gmx_domdec_ns_ranges_t *izone;
5763 for (d = 0; d < dd->ndim; d++)
5766 copy_ivec(dd->ci, tmp);
5767 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5768 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5769 copy_ivec(dd->ci, tmp);
5770 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5771 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5774 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5777 dd->neighbor[d][1]);
5783 fprintf(fplog, "\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5785 dd->nc[XX], dd->nc[YY], dd->nc[ZZ],
5786 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5793 for (i = 0; i < nzonep; i++)
5795 copy_ivec(dd_zp3[i], dd_zp[i]);
5801 for (i = 0; i < nzonep; i++)
5803 copy_ivec(dd_zp2[i], dd_zp[i]);
5809 for (i = 0; i < nzonep; i++)
5811 copy_ivec(dd_zp1[i], dd_zp[i]);
5815 gmx_fatal(FARGS, "Can only do 1, 2 or 3D domain decomposition");
5820 zones = &dd->comm->zones;
5822 for (i = 0; i < nzone; i++)
5825 clear_ivec(zones->shift[i]);
5826 for (d = 0; d < dd->ndim; d++)
5828 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5833 for (i = 0; i < nzone; i++)
5835 for (d = 0; d < DIM; d++)
5837 s[d] = dd->ci[d] - zones->shift[i][d];
5842 else if (s[d] >= dd->nc[d])
5848 zones->nizone = nzonep;
5849 for (i = 0; i < zones->nizone; i++)
5851 if (dd_zp[i][0] != i)
5853 gmx_fatal(FARGS, "Internal inconsistency in the dd grid setup");
5855 izone = &zones->izone[i];
5856 izone->j0 = dd_zp[i][1];
5857 izone->j1 = dd_zp[i][2];
5858 for (dim = 0; dim < DIM; dim++)
5860 if (dd->nc[dim] == 1)
5862 /* All shifts should be allowed */
5863 izone->shift0[dim] = -1;
5864 izone->shift1[dim] = 1;
5869 izone->shift0[d] = 0;
5870 izone->shift1[d] = 0;
5871 for(j=izone->j0; j<izone->j1; j++) {
5872 if (dd->shift[j][d] > dd->shift[i][d])
5873 izone->shift0[d] = -1;
5874 if (dd->shift[j][d] < dd->shift[i][d])
5875 izone->shift1[d] = 1;
5881 /* Assume the shift are not more than 1 cell */
5882 izone->shift0[dim] = 1;
5883 izone->shift1[dim] = -1;
5884 for (j = izone->j0; j < izone->j1; j++)
5886 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5887 if (shift_diff < izone->shift0[dim])
5889 izone->shift0[dim] = shift_diff;
5891 if (shift_diff > izone->shift1[dim])
5893 izone->shift1[dim] = shift_diff;
5900 if (dd->comm->eDLB != edlbNO)
5902 snew(dd->comm->root, dd->ndim);
5905 if (dd->comm->bRecordLoad)
5907 make_load_communicators(dd);
5911 static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reorder)
5914 gmx_domdec_comm_t *comm;
5925 if (comm->bCartesianPP)
5927 /* Set up cartesian communication for the particle-particle part */
5930 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5931 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5934 for (i = 0; i < DIM; i++)
5938 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5940 /* We overwrite the old communicator with the new cartesian one */
5941 cr->mpi_comm_mygroup = comm_cart;
5944 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5945 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5947 if (comm->bCartesianPP_PME)
5949 /* Since we want to use the original cartesian setup for sim,
5950 * and not the one after split, we need to make an index.
5952 snew(comm->ddindex2ddnodeid, dd->nnodes);
5953 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
5954 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5955 /* Get the rank of the DD master,
5956 * above we made sure that the master node is a PP node.
5966 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
5968 else if (comm->bCartesianPP)
5970 if (cr->npmenodes == 0)
5972 /* The PP communicator is also
5973 * the communicator for this simulation
5975 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5977 cr->nodeid = dd->rank;
5979 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
5981 /* We need to make an index to go from the coordinates
5982 * to the nodeid of this simulation.
5984 snew(comm->ddindex2simnodeid, dd->nnodes);
5985 snew(buf, dd->nnodes);
5986 if (cr->duty & DUTY_PP)
5988 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5990 /* Communicate the ddindex to simulation nodeid index */
5991 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
5992 cr->mpi_comm_mysim);
5995 /* Determine the master coordinates and rank.
5996 * The DD master should be the same node as the master of this sim.
5998 for (i = 0; i < dd->nnodes; i++)
6000 if (comm->ddindex2simnodeid[i] == 0)
6002 ddindex2xyz(dd->nc, i, dd->master_ci);
6003 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
6008 fprintf(debug, "The master rank is %d\n", dd->masterrank);
6013 /* No Cartesian communicators */
6014 /* We use the rank in dd->comm->all as DD index */
6015 ddindex2xyz(dd->nc, dd->rank, dd->ci);
6016 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
6018 clear_ivec(dd->master_ci);
6025 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
6026 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6031 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
6032 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6036 static void receive_ddindex2simnodeid(t_commrec *cr)
6040 gmx_domdec_comm_t *comm;
6047 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
6049 snew(comm->ddindex2simnodeid, dd->nnodes);
6050 snew(buf, dd->nnodes);
6051 if (cr->duty & DUTY_PP)
6053 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
6056 /* Communicate the ddindex to simulation nodeid index */
6057 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6058 cr->mpi_comm_mysim);
6065 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
6066 int ncg, int natoms)
6068 gmx_domdec_master_t *ma;
6073 snew(ma->ncg, dd->nnodes);
6074 snew(ma->index, dd->nnodes+1);
6076 snew(ma->nat, dd->nnodes);
6077 snew(ma->ibuf, dd->nnodes*2);
6078 snew(ma->cell_x, DIM);
6079 for (i = 0; i < DIM; i++)
6081 snew(ma->cell_x[i], dd->nc[i]+1);
6084 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
6090 snew(ma->vbuf, natoms);
6096 static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_node_order,
6097 int gmx_unused reorder)
6100 gmx_domdec_comm_t *comm;
6111 if (comm->bCartesianPP)
6113 for (i = 1; i < DIM; i++)
6115 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
6117 if (bDiv[YY] || bDiv[ZZ])
6119 comm->bCartesianPP_PME = TRUE;
6120 /* If we have 2D PME decomposition, which is always in x+y,
6121 * we stack the PME only nodes in z.
6122 * Otherwise we choose the direction that provides the thinnest slab
6123 * of PME only nodes as this will have the least effect
6124 * on the PP communication.
6125 * But for the PME communication the opposite might be better.
6127 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
6129 dd->nc[YY] > dd->nc[ZZ]))
6131 comm->cartpmedim = ZZ;
6135 comm->cartpmedim = YY;
6137 comm->ntot[comm->cartpmedim]
6138 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
6142 fprintf(fplog, "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
6144 "Will not use a Cartesian communicator for PP <-> PME\n\n");
6149 if (comm->bCartesianPP_PME)
6153 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
6156 for (i = 0; i < DIM; i++)
6160 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
6163 MPI_Comm_rank(comm_cart, &rank);
6164 if (MASTERNODE(cr) && rank != 0)
6166 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
6169 /* With this assigment we loose the link to the original communicator
6170 * which will usually be MPI_COMM_WORLD, unless have multisim.
6172 cr->mpi_comm_mysim = comm_cart;
6173 cr->sim_nodeid = rank;
6175 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
6179 fprintf(fplog, "Cartesian rank %d, coordinates %d %d %d\n\n",
6180 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6183 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
6187 if (cr->npmenodes == 0 ||
6188 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
6190 cr->duty = DUTY_PME;
6193 /* Split the sim communicator into PP and PME only nodes */
6194 MPI_Comm_split(cr->mpi_comm_mysim,
6196 dd_index(comm->ntot, dd->ci),
6197 &cr->mpi_comm_mygroup);
6201 switch (dd_node_order)
6206 fprintf(fplog, "Order of the ranks: PP first, PME last\n");
6209 case ddnoINTERLEAVE:
6210 /* Interleave the PP-only and PME-only nodes,
6211 * as on clusters with dual-core machines this will double
6212 * the communication bandwidth of the PME processes
6213 * and thus speed up the PP <-> PME and inter PME communication.
6217 fprintf(fplog, "Interleaving PP and PME ranks\n");
6219 comm->pmenodes = dd_pmenodes(cr);
6224 gmx_fatal(FARGS, "Unknown dd_node_order=%d", dd_node_order);
6227 if (dd_simnode2pmenode(cr, cr->sim_nodeid) == -1)
6229 cr->duty = DUTY_PME;
6236 /* Split the sim communicator into PP and PME only nodes */
6237 MPI_Comm_split(cr->mpi_comm_mysim,
6240 &cr->mpi_comm_mygroup);
6241 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
6247 fprintf(fplog, "This rank does only %s work.\n\n",
6248 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
6252 void make_dd_communicators(FILE *fplog, t_commrec *cr, int dd_node_order)
6255 gmx_domdec_comm_t *comm;
6261 copy_ivec(dd->nc, comm->ntot);
6263 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
6264 comm->bCartesianPP_PME = FALSE;
6266 /* Reorder the nodes by default. This might change the MPI ranks.
6267 * Real reordering is only supported on very few architectures,
6268 * Blue Gene is one of them.
6270 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
6272 if (cr->npmenodes > 0)
6274 /* Split the communicator into a PP and PME part */
6275 split_communicator(fplog, cr, dd_node_order, CartReorder);
6276 if (comm->bCartesianPP_PME)
6278 /* We (possibly) reordered the nodes in split_communicator,
6279 * so it is no longer required in make_pp_communicator.
6281 CartReorder = FALSE;
6286 /* All nodes do PP and PME */
6288 /* We do not require separate communicators */
6289 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
6293 if (cr->duty & DUTY_PP)
6295 /* Copy or make a new PP communicator */
6296 make_pp_communicator(fplog, cr, CartReorder);
6300 receive_ddindex2simnodeid(cr);
6303 if (!(cr->duty & DUTY_PME))
6305 /* Set up the commnuication to our PME node */
6306 dd->pme_nodeid = dd_simnode2pmenode(cr, cr->sim_nodeid);
6307 dd->pme_receive_vir_ener = receive_vir_ener(cr);
6310 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
6311 dd->pme_nodeid, dd->pme_receive_vir_ener);
6316 dd->pme_nodeid = -1;
6321 dd->ma = init_gmx_domdec_master_t(dd,
6323 comm->cgs_gl.index[comm->cgs_gl.nr]);
6327 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6329 real *slb_frac, tot;
6334 if (nc > 1 && size_string != NULL)
6338 fprintf(fplog, "Using static load balancing for the %s direction\n",
6343 for (i = 0; i < nc; i++)
6346 sscanf(size_string, "%lf%n", &dbl, &n);
6349 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6358 fprintf(fplog, "Relative cell sizes:");
6360 for (i = 0; i < nc; i++)
6365 fprintf(fplog, " %5.3f", slb_frac[i]);
6370 fprintf(fplog, "\n");
6377 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
6380 gmx_mtop_ilistloop_t iloop;
6384 iloop = gmx_mtop_ilistloop_init(mtop);
6385 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6387 for (ftype = 0; ftype < F_NRE; ftype++)
6389 if ((interaction_function[ftype].flags & IF_BOND) &&
6392 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6400 static int dd_getenv(FILE *fplog, const char *env_var, int def)
6406 val = getenv(env_var);
6409 if (sscanf(val, "%d", &nst) <= 0)
6415 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6423 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6427 fprintf(stderr, "\n%s\n", warn_string);
6431 fprintf(fplog, "\n%s\n", warn_string);
6435 static void check_dd_restrictions(t_commrec *cr, gmx_domdec_t *dd,
6436 t_inputrec *ir, FILE *fplog)
6438 if (ir->ePBC == epbcSCREW &&
6439 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6441 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6444 if (ir->ns_type == ensSIMPLE)
6446 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
6449 if (ir->nstlist == 0)
6451 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6454 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6456 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6460 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6465 r = ddbox->box_size[XX];
6466 for (di = 0; di < dd->ndim; di++)
6469 /* Check using the initial average cell size */
6470 r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6476 static int check_dlb_support(FILE *fplog, t_commrec *cr,
6477 const char *dlb_opt, gmx_bool bRecordLoad,
6478 unsigned long Flags, t_inputrec *ir)
6486 case 'a': eDLB = edlbAUTO; break;
6487 case 'n': eDLB = edlbNO; break;
6488 case 'y': eDLB = edlbYES; break;
6489 default: gmx_incons("Unknown dlb_opt");
6492 if (Flags & MD_RERUN)
6497 if (!EI_DYNAMICS(ir->eI))
6499 if (eDLB == edlbYES)
6501 sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
6502 dd_warning(cr, fplog, buf);
6510 dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6515 if (Flags & MD_REPRODUCIBLE)
6522 dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
6526 dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6529 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", eDLB);
6537 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6542 if (getenv("GMX_DD_ORDER_ZYX") != NULL)
6544 /* Decomposition order z,y,x */
6547 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6549 for (dim = DIM-1; dim >= 0; dim--)
6551 if (dd->nc[dim] > 1)
6553 dd->dim[dd->ndim++] = dim;
6559 /* Decomposition order x,y,z */
6560 for (dim = 0; dim < DIM; dim++)
6562 if (dd->nc[dim] > 1)
6564 dd->dim[dd->ndim++] = dim;
6570 static gmx_domdec_comm_t *init_dd_comm()
6572 gmx_domdec_comm_t *comm;
6576 snew(comm->cggl_flag, DIM*2);
6577 snew(comm->cgcm_state, DIM*2);
6578 for (i = 0; i < DIM*2; i++)
6580 comm->cggl_flag_nalloc[i] = 0;
6581 comm->cgcm_state_nalloc[i] = 0;
6584 comm->nalloc_int = 0;
6585 comm->buf_int = NULL;
6587 vec_rvec_init(&comm->vbuf);
6589 comm->n_load_have = 0;
6590 comm->n_load_collect = 0;
6592 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6594 comm->sum_nat[i] = 0;
6598 comm->load_step = 0;
6601 clear_ivec(comm->load_lim);
6608 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
6609 unsigned long Flags,
6611 real comm_distance_min, real rconstr,
6612 const char *dlb_opt, real dlb_scale,
6613 const char *sizex, const char *sizey, const char *sizez,
6614 gmx_mtop_t *mtop, t_inputrec *ir,
6615 matrix box, rvec *x,
6617 int *npme_x, int *npme_y)
6620 gmx_domdec_comm_t *comm;
6623 real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
6630 "\nInitializing Domain Decomposition on %d ranks\n", cr->nnodes);
6635 dd->comm = init_dd_comm();
6637 snew(comm->cggl_flag, DIM*2);
6638 snew(comm->cgcm_state, DIM*2);
6640 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6641 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6643 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
6644 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
6645 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
6646 recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
6647 comm->nstSortCG = dd_getenv(fplog, "GMX_DD_NST_SORT_CHARGE_GROUPS", 1);
6648 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
6649 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
6650 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
6652 dd->pme_recv_f_alloc = 0;
6653 dd->pme_recv_f_buf = NULL;
6655 if (dd->bSendRecv2 && fplog)
6657 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6663 fprintf(fplog, "Will load balance based on FLOP count\n");
6665 if (comm->eFlop > 1)
6667 srand(1+cr->nodeid);
6669 comm->bRecordLoad = TRUE;
6673 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6677 /* Initialize to GPU share count to 0, might change later */
6678 comm->nrank_gpu_shared = 0;
6680 comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
6682 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6685 fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
6687 dd->bGridJump = comm->bDynLoadBal;
6688 comm->bPMELoadBalDLBLimits = FALSE;
6690 if (comm->nstSortCG)
6694 if (comm->nstSortCG == 1)
6696 fprintf(fplog, "Will sort the charge groups at every domain (re)decomposition\n");
6700 fprintf(fplog, "Will sort the charge groups every %d steps\n",
6704 snew(comm->sort, 1);
6710 fprintf(fplog, "Will not sort the charge groups\n");
6714 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6716 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6717 if (comm->bInterCGBondeds)
6719 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6723 comm->bInterCGMultiBody = FALSE;
6726 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6727 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6729 if (ir->rlistlong == 0)
6731 /* Set the cut-off to some very large value,
6732 * so we don't need if statements everywhere in the code.
6733 * We use sqrt, since the cut-off is squared in some places.
6735 comm->cutoff = GMX_CUTOFF_INF;
6739 comm->cutoff = ir->rlistlong;
6741 comm->cutoff_mbody = 0;
6743 comm->cellsize_limit = 0;
6744 comm->bBondComm = FALSE;
6746 if (comm->bInterCGBondeds)
6748 if (comm_distance_min > 0)
6750 comm->cutoff_mbody = comm_distance_min;
6751 if (Flags & MD_DDBONDCOMM)
6753 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6757 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6759 r_bonded_limit = comm->cutoff_mbody;
6761 else if (ir->bPeriodicMols)
6763 /* Can not easily determine the required cut-off */
6764 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6765 comm->cutoff_mbody = comm->cutoff/2;
6766 r_bonded_limit = comm->cutoff_mbody;
6772 dd_bonded_cg_distance(fplog, mtop, ir, x, box,
6773 Flags & MD_DDBONDCHECK, &r_2b, &r_mb);
6775 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6776 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6778 /* We use an initial margin of 10% for the minimum cell size,
6779 * except when we are just below the non-bonded cut-off.
6781 if (Flags & MD_DDBONDCOMM)
6783 if (max(r_2b, r_mb) > comm->cutoff)
6785 r_bonded = max(r_2b, r_mb);
6786 r_bonded_limit = 1.1*r_bonded;
6787 comm->bBondComm = TRUE;
6792 r_bonded_limit = min(1.1*r_bonded, comm->cutoff);
6794 /* We determine cutoff_mbody later */
6798 /* No special bonded communication,
6799 * simply increase the DD cut-off.
6801 r_bonded_limit = 1.1*max(r_2b, r_mb);
6802 comm->cutoff_mbody = r_bonded_limit;
6803 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6806 comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit);
6810 "Minimum cell size due to bonded interactions: %.3f nm\n",
6811 comm->cellsize_limit);
6815 if (dd->bInterCGcons && rconstr <= 0)
6817 /* There is a cell size limit due to the constraints (P-LINCS) */
6818 rconstr = constr_r_max(fplog, mtop, ir);
6822 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6824 if (rconstr > comm->cellsize_limit)
6826 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6830 else if (rconstr > 0 && fplog)
6832 /* Here we do not check for dd->bInterCGcons,
6833 * because one can also set a cell size limit for virtual sites only
6834 * and at this point we don't know yet if there are intercg v-sites.
6837 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6840 comm->cellsize_limit = max(comm->cellsize_limit, rconstr);
6842 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6846 copy_ivec(nc, dd->nc);
6847 set_dd_dim(fplog, dd);
6848 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, x, ddbox);
6850 if (cr->npmenodes == -1)
6854 acs = average_cellsize_min(dd, ddbox);
6855 if (acs < comm->cellsize_limit)
6859 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6861 gmx_fatal_collective(FARGS, cr, NULL,
6862 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6863 acs, comm->cellsize_limit);
6868 set_ddbox_cr(cr, NULL, ir, box, &comm->cgs_gl, x, ddbox);
6870 /* We need to choose the optimal DD grid and possibly PME nodes */
6871 limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6872 comm->eDLB != edlbNO, dlb_scale,
6873 comm->cellsize_limit, comm->cutoff,
6874 comm->bInterCGBondeds);
6876 if (dd->nc[XX] == 0)
6878 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6879 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
6880 !bC ? "-rdd" : "-rcon",
6881 comm->eDLB != edlbNO ? " or -dds" : "",
6882 bC ? " or your LINCS settings" : "");
6884 gmx_fatal_collective(FARGS, cr, NULL,
6885 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
6887 "Look in the log file for details on the domain decomposition",
6888 cr->nnodes-cr->npmenodes, limit, buf);
6890 set_dd_dim(fplog, dd);
6896 "Domain decomposition grid %d x %d x %d, separate PME ranks %d\n",
6897 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6900 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6901 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6903 gmx_fatal_collective(FARGS, cr, NULL,
6904 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
6905 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6907 if (cr->npmenodes > dd->nnodes)
6909 gmx_fatal_collective(FARGS, cr, NULL,
6910 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6912 if (cr->npmenodes > 0)
6914 comm->npmenodes = cr->npmenodes;
6918 comm->npmenodes = dd->nnodes;
6921 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
6923 /* The following choices should match those
6924 * in comm_cost_est in domdec_setup.c.
6925 * Note that here the checks have to take into account
6926 * that the decomposition might occur in a different order than xyz
6927 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6928 * in which case they will not match those in comm_cost_est,
6929 * but since that is mainly for testing purposes that's fine.
6931 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6932 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6933 getenv("GMX_PMEONEDD") == NULL)
6935 comm->npmedecompdim = 2;
6936 comm->npmenodes_x = dd->nc[XX];
6937 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6941 /* In case nc is 1 in both x and y we could still choose to
6942 * decompose pme in y instead of x, but we use x for simplicity.
6944 comm->npmedecompdim = 1;
6945 if (dd->dim[0] == YY)
6947 comm->npmenodes_x = 1;
6948 comm->npmenodes_y = comm->npmenodes;
6952 comm->npmenodes_x = comm->npmenodes;
6953 comm->npmenodes_y = 1;
6958 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6959 comm->npmenodes_x, comm->npmenodes_y, 1);
6964 comm->npmedecompdim = 0;
6965 comm->npmenodes_x = 0;
6966 comm->npmenodes_y = 0;
6969 /* Technically we don't need both of these,
6970 * but it simplifies code not having to recalculate it.
6972 *npme_x = comm->npmenodes_x;
6973 *npme_y = comm->npmenodes_y;
6975 snew(comm->slb_frac, DIM);
6976 if (comm->eDLB == edlbNO)
6978 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
6979 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
6980 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], sizez);
6983 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6985 if (comm->bBondComm || comm->eDLB != edlbNO)
6987 /* Set the bonded communication distance to halfway
6988 * the minimum and the maximum,
6989 * since the extra communication cost is nearly zero.
6991 acs = average_cellsize_min(dd, ddbox);
6992 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6993 if (comm->eDLB != edlbNO)
6995 /* Check if this does not limit the scaling */
6996 comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs);
6998 if (!comm->bBondComm)
7000 /* Without bBondComm do not go beyond the n.b. cut-off */
7001 comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff);
7002 if (comm->cellsize_limit >= comm->cutoff)
7004 /* We don't loose a lot of efficieny
7005 * when increasing it to the n.b. cut-off.
7006 * It can even be slightly faster, because we need
7007 * less checks for the communication setup.
7009 comm->cutoff_mbody = comm->cutoff;
7012 /* Check if we did not end up below our original limit */
7013 comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit);
7015 if (comm->cutoff_mbody > comm->cellsize_limit)
7017 comm->cellsize_limit = comm->cutoff_mbody;
7020 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
7025 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
7026 "cellsize limit %f\n",
7027 comm->bBondComm, comm->cellsize_limit);
7032 check_dd_restrictions(cr, dd, ir, fplog);
7035 comm->partition_step = INT_MIN;
7038 clear_dd_cycle_counts(dd);
7043 static void set_dlb_limits(gmx_domdec_t *dd)
7048 for (d = 0; d < dd->ndim; d++)
7050 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
7051 dd->comm->cellsize_min[dd->dim[d]] =
7052 dd->comm->cellsize_min_dlb[dd->dim[d]];
7057 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
7060 gmx_domdec_comm_t *comm;
7070 fprintf(fplog, "At step %s the performance loss due to force load imbalance is %.1f %%\n", gmx_step_str(step, buf), dd_force_imb_perf_loss(dd)*100);
7073 cellsize_min = comm->cellsize_min[dd->dim[0]];
7074 for (d = 1; d < dd->ndim; d++)
7076 cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
7079 if (cellsize_min < comm->cellsize_limit*1.05)
7081 dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
7083 /* Change DLB from "auto" to "no". */
7084 comm->eDLB = edlbNO;
7089 dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
7090 comm->bDynLoadBal = TRUE;
7091 dd->bGridJump = TRUE;
7095 /* We can set the required cell size info here,
7096 * so we do not need to communicate this.
7097 * The grid is completely uniform.
7099 for (d = 0; d < dd->ndim; d++)
7103 comm->load[d].sum_m = comm->load[d].sum;
7105 nc = dd->nc[dd->dim[d]];
7106 for (i = 0; i < nc; i++)
7108 comm->root[d]->cell_f[i] = i/(real)nc;
7111 comm->root[d]->cell_f_max0[i] = i /(real)nc;
7112 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
7115 comm->root[d]->cell_f[nc] = 1.0;
7120 static char *init_bLocalCG(gmx_mtop_t *mtop)
7125 ncg = ncg_mtop(mtop);
7126 snew(bLocalCG, ncg);
7127 for (cg = 0; cg < ncg; cg++)
7129 bLocalCG[cg] = FALSE;
7135 void dd_init_bondeds(FILE *fplog,
7136 gmx_domdec_t *dd, gmx_mtop_t *mtop,
7138 t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
7140 gmx_domdec_comm_t *comm;
7144 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
7148 if (comm->bBondComm)
7150 /* Communicate atoms beyond the cut-off for bonded interactions */
7153 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
7155 comm->bLocalCG = init_bLocalCG(mtop);
7159 /* Only communicate atoms based on cut-off */
7160 comm->cglink = NULL;
7161 comm->bLocalCG = NULL;
7165 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
7167 gmx_bool bDynLoadBal, real dlb_scale,
7170 gmx_domdec_comm_t *comm;
7185 fprintf(fplog, "The maximum number of communication pulses is:");
7186 for (d = 0; d < dd->ndim; d++)
7188 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
7190 fprintf(fplog, "\n");
7191 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
7192 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
7193 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
7194 for (d = 0; d < DIM; d++)
7198 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
7205 comm->cellsize_min_dlb[d]/
7206 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
7208 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
7211 fprintf(fplog, "\n");
7215 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
7216 fprintf(fplog, "The initial number of communication pulses is:");
7217 for (d = 0; d < dd->ndim; d++)
7219 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
7221 fprintf(fplog, "\n");
7222 fprintf(fplog, "The initial domain decomposition cell size is:");
7223 for (d = 0; d < DIM; d++)
7227 fprintf(fplog, " %c %.2f nm",
7228 dim2char(d), dd->comm->cellsize_min[d]);
7231 fprintf(fplog, "\n\n");
7234 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
7236 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
7237 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7238 "non-bonded interactions", "", comm->cutoff);
7242 limit = dd->comm->cellsize_limit;
7246 if (dynamic_dd_box(ddbox, ir))
7248 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
7250 limit = dd->comm->cellsize_min[XX];
7251 for (d = 1; d < DIM; d++)
7253 limit = min(limit, dd->comm->cellsize_min[d]);
7257 if (comm->bInterCGBondeds)
7259 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7260 "two-body bonded interactions", "(-rdd)",
7261 max(comm->cutoff, comm->cutoff_mbody));
7262 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7263 "multi-body bonded interactions", "(-rdd)",
7264 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit));
7268 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7269 "virtual site constructions", "(-rcon)", limit);
7271 if (dd->constraint_comm)
7273 sprintf(buf, "atoms separated by up to %d constraints",
7275 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7276 buf, "(-rcon)", limit);
7278 fprintf(fplog, "\n");
7284 static void set_cell_limits_dlb(gmx_domdec_t *dd,
7286 const t_inputrec *ir,
7287 const gmx_ddbox_t *ddbox)
7289 gmx_domdec_comm_t *comm;
7290 int d, dim, npulse, npulse_d_max, npulse_d;
7295 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7297 /* Determine the maximum number of comm. pulses in one dimension */
7299 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7301 /* Determine the maximum required number of grid pulses */
7302 if (comm->cellsize_limit >= comm->cutoff)
7304 /* Only a single pulse is required */
7307 else if (!bNoCutOff && comm->cellsize_limit > 0)
7309 /* We round down slightly here to avoid overhead due to the latency
7310 * of extra communication calls when the cut-off
7311 * would be only slightly longer than the cell size.
7312 * Later cellsize_limit is redetermined,
7313 * so we can not miss interactions due to this rounding.
7315 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7319 /* There is no cell size limit */
7320 npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7323 if (!bNoCutOff && npulse > 1)
7325 /* See if we can do with less pulses, based on dlb_scale */
7327 for (d = 0; d < dd->ndim; d++)
7330 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7331 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7332 npulse_d_max = max(npulse_d_max, npulse_d);
7334 npulse = min(npulse, npulse_d_max);
7337 /* This env var can override npulse */
7338 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
7345 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7346 for (d = 0; d < dd->ndim; d++)
7348 comm->cd[d].np_dlb = min(npulse, dd->nc[dd->dim[d]]-1);
7349 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7350 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7351 comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb);
7352 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7354 comm->bVacDLBNoLimit = FALSE;
7358 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7359 if (!comm->bVacDLBNoLimit)
7361 comm->cellsize_limit = max(comm->cellsize_limit,
7362 comm->cutoff/comm->maxpulse);
7364 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7365 /* Set the minimum cell size for each DD dimension */
7366 for (d = 0; d < dd->ndim; d++)
7368 if (comm->bVacDLBNoLimit ||
7369 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7371 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7375 comm->cellsize_min_dlb[dd->dim[d]] =
7376 comm->cutoff/comm->cd[d].np_dlb;
7379 if (comm->cutoff_mbody <= 0)
7381 comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit);
7383 if (comm->bDynLoadBal)
7389 gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd, int ePBC)
7391 /* If each molecule is a single charge group
7392 * or we use domain decomposition for each periodic dimension,
7393 * we do not need to take pbc into account for the bonded interactions.
7395 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7398 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7401 void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7402 t_inputrec *ir, gmx_ddbox_t *ddbox)
7404 gmx_domdec_comm_t *comm;
7410 /* Initialize the thread data.
7411 * This can not be done in init_domain_decomposition,
7412 * as the numbers of threads is determined later.
7414 comm->nth = gmx_omp_nthreads_get(emntDomdec);
7417 snew(comm->dth, comm->nth);
7420 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
7422 init_ddpme(dd, &comm->ddpme[0], 0);
7423 if (comm->npmedecompdim >= 2)
7425 init_ddpme(dd, &comm->ddpme[1], 1);
7430 comm->npmenodes = 0;
7431 if (dd->pme_nodeid >= 0)
7433 gmx_fatal_collective(FARGS, NULL, dd,
7434 "Can not have separate PME ranks without PME electrostatics");
7440 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7442 if (comm->eDLB != edlbNO)
7444 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7447 print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
7448 if (comm->eDLB == edlbAUTO)
7452 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7454 print_dd_settings(fplog, dd, ir, TRUE, dlb_scale, ddbox);
7457 if (ir->ePBC == epbcNONE)
7459 vol_frac = 1 - 1/(double)dd->nnodes;
7464 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7468 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7470 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7472 dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
7475 static gmx_bool test_dd_cutoff(t_commrec *cr,
7476 t_state *state, t_inputrec *ir,
7487 set_ddbox(dd, FALSE, cr, ir, state->box,
7488 TRUE, &dd->comm->cgs_gl, state->x, &ddbox);
7492 for (d = 0; d < dd->ndim; d++)
7496 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7497 if (dynamic_dd_box(&ddbox, ir))
7499 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7502 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7504 if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
7505 dd->comm->cd[d].np_dlb > 0)
7507 if (np > dd->comm->cd[d].np_dlb)
7512 /* If a current local cell size is smaller than the requested
7513 * cut-off, we could still fix it, but this gets very complicated.
7514 * Without fixing here, we might actually need more checks.
7516 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7523 if (dd->comm->eDLB != edlbNO)
7525 /* If DLB is not active yet, we don't need to check the grid jumps.
7526 * Actually we shouldn't, because then the grid jump data is not set.
7528 if (dd->comm->bDynLoadBal &&
7529 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7534 gmx_sumi(1, &LocallyLimited, cr);
7536 if (LocallyLimited > 0)
7545 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, t_inputrec *ir,
7548 gmx_bool bCutoffAllowed;
7550 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7554 cr->dd->comm->cutoff = cutoff_req;
7557 return bCutoffAllowed;
7560 void change_dd_dlb_cutoff_limit(t_commrec *cr)
7562 gmx_domdec_comm_t *comm;
7564 comm = cr->dd->comm;
7566 /* Turn on the DLB limiting (might have been on already) */
7567 comm->bPMELoadBalDLBLimits = TRUE;
7569 /* Change the cut-off limit */
7570 comm->PMELoadBal_max_cutoff = comm->cutoff;
7573 static void merge_cg_buffers(int ncell,
7574 gmx_domdec_comm_dim_t *cd, int pulse,
7576 int *index_gl, int *recv_i,
7577 rvec *cg_cm, rvec *recv_vr,
7579 cginfo_mb_t *cginfo_mb, int *cginfo)
7581 gmx_domdec_ind_t *ind, *ind_p;
7582 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7583 int shift, shift_at;
7585 ind = &cd->ind[pulse];
7587 /* First correct the already stored data */
7588 shift = ind->nrecv[ncell];
7589 for (cell = ncell-1; cell >= 0; cell--)
7591 shift -= ind->nrecv[cell];
7594 /* Move the cg's present from previous grid pulses */
7595 cg0 = ncg_cell[ncell+cell];
7596 cg1 = ncg_cell[ncell+cell+1];
7597 cgindex[cg1+shift] = cgindex[cg1];
7598 for (cg = cg1-1; cg >= cg0; cg--)
7600 index_gl[cg+shift] = index_gl[cg];
7601 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7602 cgindex[cg+shift] = cgindex[cg];
7603 cginfo[cg+shift] = cginfo[cg];
7605 /* Correct the already stored send indices for the shift */
7606 for (p = 1; p <= pulse; p++)
7608 ind_p = &cd->ind[p];
7610 for (c = 0; c < cell; c++)
7612 cg0 += ind_p->nsend[c];
7614 cg1 = cg0 + ind_p->nsend[cell];
7615 for (cg = cg0; cg < cg1; cg++)
7617 ind_p->index[cg] += shift;
7623 /* Merge in the communicated buffers */
7627 for (cell = 0; cell < ncell; cell++)
7629 cg1 = ncg_cell[ncell+cell+1] + shift;
7632 /* Correct the old cg indices */
7633 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7635 cgindex[cg+1] += shift_at;
7638 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7640 /* Copy this charge group from the buffer */
7641 index_gl[cg1] = recv_i[cg0];
7642 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7643 /* Add it to the cgindex */
7644 cg_gl = index_gl[cg1];
7645 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7646 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7647 cgindex[cg1+1] = cgindex[cg1] + nat;
7652 shift += ind->nrecv[cell];
7653 ncg_cell[ncell+cell+1] = cg1;
7657 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7658 int nzone, int cg0, const int *cgindex)
7662 /* Store the atom block boundaries for easy copying of communication buffers
7665 for (zone = 0; zone < nzone; zone++)
7667 for (p = 0; p < cd->np; p++)
7669 cd->ind[p].cell2at0[zone] = cgindex[cg];
7670 cg += cd->ind[p].nrecv[zone];
7671 cd->ind[p].cell2at1[zone] = cgindex[cg];
7676 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7682 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7684 if (!bLocalCG[link->a[i]])
7693 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7695 real c[DIM][4]; /* the corners for the non-bonded communication */
7696 real cr0; /* corner for rounding */
7697 real cr1[4]; /* corners for rounding */
7698 real bc[DIM]; /* corners for bounded communication */
7699 real bcr1; /* corner for rounding for bonded communication */
7702 /* Determine the corners of the domain(s) we are communicating with */
7704 set_dd_corners(const gmx_domdec_t *dd,
7705 int dim0, int dim1, int dim2,
7709 const gmx_domdec_comm_t *comm;
7710 const gmx_domdec_zones_t *zones;
7715 zones = &comm->zones;
7717 /* Keep the compiler happy */
7721 /* The first dimension is equal for all cells */
7722 c->c[0][0] = comm->cell_x0[dim0];
7725 c->bc[0] = c->c[0][0];
7730 /* This cell row is only seen from the first row */
7731 c->c[1][0] = comm->cell_x0[dim1];
7732 /* All rows can see this row */
7733 c->c[1][1] = comm->cell_x0[dim1];
7736 c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7739 /* For the multi-body distance we need the maximum */
7740 c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7743 /* Set the upper-right corner for rounding */
7744 c->cr0 = comm->cell_x1[dim0];
7749 for (j = 0; j < 4; j++)
7751 c->c[2][j] = comm->cell_x0[dim2];
7755 /* Use the maximum of the i-cells that see a j-cell */
7756 for (i = 0; i < zones->nizone; i++)
7758 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7764 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7770 /* For the multi-body distance we need the maximum */
7771 c->bc[2] = comm->cell_x0[dim2];
7772 for (i = 0; i < 2; i++)
7774 for (j = 0; j < 2; j++)
7776 c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0);
7782 /* Set the upper-right corner for rounding */
7783 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7784 * Only cell (0,0,0) can see cell 7 (1,1,1)
7786 c->cr1[0] = comm->cell_x1[dim1];
7787 c->cr1[3] = comm->cell_x1[dim1];
7790 c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7793 /* For the multi-body distance we need the maximum */
7794 c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7801 /* Determine which cg's we need to send in this pulse from this zone */
7803 get_zone_pulse_cgs(gmx_domdec_t *dd,
7804 int zonei, int zone,
7806 const int *index_gl,
7808 int dim, int dim_ind,
7809 int dim0, int dim1, int dim2,
7810 real r_comm2, real r_bcomm2,
7814 real skew_fac2_d, real skew_fac_01,
7815 rvec *v_d, rvec *v_0, rvec *v_1,
7816 const dd_corners_t *c,
7818 gmx_bool bDistBonded,
7824 gmx_domdec_ind_t *ind,
7825 int **ibuf, int *ibuf_nalloc,
7831 gmx_domdec_comm_t *comm;
7833 gmx_bool bDistMB_pulse;
7835 real r2, rb2, r, tric_sh;
7838 int nsend_z, nsend, nat;
7842 bScrew = (dd->bScrewPBC && dim == XX);
7844 bDistMB_pulse = (bDistMB && bDistBonded);
7850 for (cg = cg0; cg < cg1; cg++)
7854 if (tric_dist[dim_ind] == 0)
7856 /* Rectangular direction, easy */
7857 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7864 r = cg_cm[cg][dim] - c->bc[dim_ind];
7870 /* Rounding gives at most a 16% reduction
7871 * in communicated atoms
7873 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7875 r = cg_cm[cg][dim0] - c->cr0;
7876 /* This is the first dimension, so always r >= 0 */
7883 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7885 r = cg_cm[cg][dim1] - c->cr1[zone];
7892 r = cg_cm[cg][dim1] - c->bcr1;
7902 /* Triclinic direction, more complicated */
7905 /* Rounding, conservative as the skew_fac multiplication
7906 * will slightly underestimate the distance.
7908 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7910 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7911 for (i = dim0+1; i < DIM; i++)
7913 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7915 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7918 rb[dim0] = rn[dim0];
7921 /* Take care that the cell planes along dim0 might not
7922 * be orthogonal to those along dim1 and dim2.
7924 for (i = 1; i <= dim_ind; i++)
7927 if (normal[dim0][dimd] > 0)
7929 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7932 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7937 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7939 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7941 for (i = dim1+1; i < DIM; i++)
7943 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7945 rn[dim1] += tric_sh;
7948 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7949 /* Take care of coupling of the distances
7950 * to the planes along dim0 and dim1 through dim2.
7952 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7953 /* Take care that the cell planes along dim1
7954 * might not be orthogonal to that along dim2.
7956 if (normal[dim1][dim2] > 0)
7958 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7964 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7967 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7968 /* Take care of coupling of the distances
7969 * to the planes along dim0 and dim1 through dim2.
7971 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7972 /* Take care that the cell planes along dim1
7973 * might not be orthogonal to that along dim2.
7975 if (normal[dim1][dim2] > 0)
7977 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7982 /* The distance along the communication direction */
7983 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
7985 for (i = dim+1; i < DIM; i++)
7987 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7992 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7993 /* Take care of coupling of the distances
7994 * to the planes along dim0 and dim1 through dim2.
7996 if (dim_ind == 1 && zonei == 1)
7998 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
8004 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
8007 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
8008 /* Take care of coupling of the distances
8009 * to the planes along dim0 and dim1 through dim2.
8011 if (dim_ind == 1 && zonei == 1)
8013 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
8021 ((bDistMB && rb2 < r_bcomm2) ||
8022 (bDist2B && r2 < r_bcomm2)) &&
8024 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
8025 missing_link(comm->cglink, index_gl[cg],
8028 /* Make an index to the local charge groups */
8029 if (nsend+1 > ind->nalloc)
8031 ind->nalloc = over_alloc_large(nsend+1);
8032 srenew(ind->index, ind->nalloc);
8034 if (nsend+1 > *ibuf_nalloc)
8036 *ibuf_nalloc = over_alloc_large(nsend+1);
8037 srenew(*ibuf, *ibuf_nalloc);
8039 ind->index[nsend] = cg;
8040 (*ibuf)[nsend] = index_gl[cg];
8042 vec_rvec_check_alloc(vbuf, nsend+1);
8044 if (dd->ci[dim] == 0)
8046 /* Correct cg_cm for pbc */
8047 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
8050 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
8051 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
8056 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
8059 nat += cgindex[cg+1] - cgindex[cg];
8065 *nsend_z_ptr = nsend_z;
8068 static void setup_dd_communication(gmx_domdec_t *dd,
8069 matrix box, gmx_ddbox_t *ddbox,
8070 t_forcerec *fr, t_state *state, rvec **f)
8072 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
8073 int nzone, nzone_send, zone, zonei, cg0, cg1;
8074 int c, i, j, cg, cg_gl, nrcg;
8075 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
8076 gmx_domdec_comm_t *comm;
8077 gmx_domdec_zones_t *zones;
8078 gmx_domdec_comm_dim_t *cd;
8079 gmx_domdec_ind_t *ind;
8080 cginfo_mb_t *cginfo_mb;
8081 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
8082 real r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
8083 dd_corners_t corners;
8085 rvec *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
8086 real skew_fac2_d, skew_fac_01;
8093 fprintf(debug, "Setting up DD communication\n");
8098 switch (fr->cutoff_scheme)
8107 gmx_incons("unimplemented");
8111 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8113 dim = dd->dim[dim_ind];
8115 /* Check if we need to use triclinic distances */
8116 tric_dist[dim_ind] = 0;
8117 for (i = 0; i <= dim_ind; i++)
8119 if (ddbox->tric_dir[dd->dim[i]])
8121 tric_dist[dim_ind] = 1;
8126 bBondComm = comm->bBondComm;
8128 /* Do we need to determine extra distances for multi-body bondeds? */
8129 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8131 /* Do we need to determine extra distances for only two-body bondeds? */
8132 bDist2B = (bBondComm && !bDistMB);
8134 r_comm2 = sqr(comm->cutoff);
8135 r_bcomm2 = sqr(comm->cutoff_mbody);
8139 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, sqrt(r_bcomm2));
8142 zones = &comm->zones;
8145 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8146 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8148 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8150 /* Triclinic stuff */
8151 normal = ddbox->normal;
8155 v_0 = ddbox->v[dim0];
8156 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8158 /* Determine the coupling coefficient for the distances
8159 * to the cell planes along dim0 and dim1 through dim2.
8160 * This is required for correct rounding.
8163 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8166 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8172 v_1 = ddbox->v[dim1];
8175 zone_cg_range = zones->cg_range;
8176 index_gl = dd->index_gl;
8177 cgindex = dd->cgindex;
8178 cginfo_mb = fr->cginfo_mb;
8180 zone_cg_range[0] = 0;
8181 zone_cg_range[1] = dd->ncg_home;
8182 comm->zone_ncg1[0] = dd->ncg_home;
8183 pos_cg = dd->ncg_home;
8185 nat_tot = dd->nat_home;
8187 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8189 dim = dd->dim[dim_ind];
8190 cd = &comm->cd[dim_ind];
8192 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8194 /* No pbc in this dimension, the first node should not comm. */
8202 v_d = ddbox->v[dim];
8203 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
8205 cd->bInPlace = TRUE;
8206 for (p = 0; p < cd->np; p++)
8208 /* Only atoms communicated in the first pulse are used
8209 * for multi-body bonded interactions or for bBondComm.
8211 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8216 for (zone = 0; zone < nzone_send; zone++)
8218 if (tric_dist[dim_ind] && dim_ind > 0)
8220 /* Determine slightly more optimized skew_fac's
8222 * This reduces the number of communicated atoms
8223 * by about 10% for 3D DD of rhombic dodecahedra.
8225 for (dimd = 0; dimd < dim; dimd++)
8227 sf2_round[dimd] = 1;
8228 if (ddbox->tric_dir[dimd])
8230 for (i = dd->dim[dimd]+1; i < DIM; i++)
8232 /* If we are shifted in dimension i
8233 * and the cell plane is tilted forward
8234 * in dimension i, skip this coupling.
8236 if (!(zones->shift[nzone+zone][i] &&
8237 ddbox->v[dimd][i][dimd] >= 0))
8240 sqr(ddbox->v[dimd][i][dimd]);
8243 sf2_round[dimd] = 1/sf2_round[dimd];
8248 zonei = zone_perm[dim_ind][zone];
8251 /* Here we permutate the zones to obtain a convenient order
8252 * for neighbor searching
8254 cg0 = zone_cg_range[zonei];
8255 cg1 = zone_cg_range[zonei+1];
8259 /* Look only at the cg's received in the previous grid pulse
8261 cg1 = zone_cg_range[nzone+zone+1];
8262 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8265 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8266 for (th = 0; th < comm->nth; th++)
8268 gmx_domdec_ind_t *ind_p;
8269 int **ibuf_p, *ibuf_nalloc_p;
8271 int *nsend_p, *nat_p;
8277 /* Thread 0 writes in the comm buffers */
8279 ibuf_p = &comm->buf_int;
8280 ibuf_nalloc_p = &comm->nalloc_int;
8281 vbuf_p = &comm->vbuf;
8284 nsend_zone_p = &ind->nsend[zone];
8288 /* Other threads write into temp buffers */
8289 ind_p = &comm->dth[th].ind;
8290 ibuf_p = &comm->dth[th].ibuf;
8291 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8292 vbuf_p = &comm->dth[th].vbuf;
8293 nsend_p = &comm->dth[th].nsend;
8294 nat_p = &comm->dth[th].nat;
8295 nsend_zone_p = &comm->dth[th].nsend_zone;
8297 comm->dth[th].nsend = 0;
8298 comm->dth[th].nat = 0;
8299 comm->dth[th].nsend_zone = 0;
8309 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8310 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8313 /* Get the cg's for this pulse in this zone */
8314 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8316 dim, dim_ind, dim0, dim1, dim2,
8319 normal, skew_fac2_d, skew_fac_01,
8320 v_d, v_0, v_1, &corners, sf2_round,
8321 bDistBonded, bBondComm,
8325 ibuf_p, ibuf_nalloc_p,
8331 /* Append data of threads>=1 to the communication buffers */
8332 for (th = 1; th < comm->nth; th++)
8334 dd_comm_setup_work_t *dth;
8337 dth = &comm->dth[th];
8339 ns1 = nsend + dth->nsend_zone;
8340 if (ns1 > ind->nalloc)
8342 ind->nalloc = over_alloc_dd(ns1);
8343 srenew(ind->index, ind->nalloc);
8345 if (ns1 > comm->nalloc_int)
8347 comm->nalloc_int = over_alloc_dd(ns1);
8348 srenew(comm->buf_int, comm->nalloc_int);
8350 if (ns1 > comm->vbuf.nalloc)
8352 comm->vbuf.nalloc = over_alloc_dd(ns1);
8353 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8356 for (i = 0; i < dth->nsend_zone; i++)
8358 ind->index[nsend] = dth->ind.index[i];
8359 comm->buf_int[nsend] = dth->ibuf[i];
8360 copy_rvec(dth->vbuf.v[i],
8361 comm->vbuf.v[nsend]);
8365 ind->nsend[zone] += dth->nsend_zone;
8368 /* Clear the counts in case we do not have pbc */
8369 for (zone = nzone_send; zone < nzone; zone++)
8371 ind->nsend[zone] = 0;
8373 ind->nsend[nzone] = nsend;
8374 ind->nsend[nzone+1] = nat;
8375 /* Communicate the number of cg's and atoms to receive */
8376 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8377 ind->nsend, nzone+2,
8378 ind->nrecv, nzone+2);
8380 /* The rvec buffer is also required for atom buffers of size nsend
8381 * in dd_move_x and dd_move_f.
8383 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8387 /* We can receive in place if only the last zone is not empty */
8388 for (zone = 0; zone < nzone-1; zone++)
8390 if (ind->nrecv[zone] > 0)
8392 cd->bInPlace = FALSE;
8397 /* The int buffer is only required here for the cg indices */
8398 if (ind->nrecv[nzone] > comm->nalloc_int2)
8400 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8401 srenew(comm->buf_int2, comm->nalloc_int2);
8403 /* The rvec buffer is also required for atom buffers
8404 * of size nrecv in dd_move_x and dd_move_f.
8406 i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8407 vec_rvec_check_alloc(&comm->vbuf2, i);
8411 /* Make space for the global cg indices */
8412 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8413 || dd->cg_nalloc == 0)
8415 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8416 srenew(index_gl, dd->cg_nalloc);
8417 srenew(cgindex, dd->cg_nalloc+1);
8419 /* Communicate the global cg indices */
8422 recv_i = index_gl + pos_cg;
8426 recv_i = comm->buf_int2;
8428 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8429 comm->buf_int, nsend,
8430 recv_i, ind->nrecv[nzone]);
8432 /* Make space for cg_cm */
8433 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8434 if (fr->cutoff_scheme == ecutsGROUP)
8442 /* Communicate cg_cm */
8445 recv_vr = cg_cm + pos_cg;
8449 recv_vr = comm->vbuf2.v;
8451 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8452 comm->vbuf.v, nsend,
8453 recv_vr, ind->nrecv[nzone]);
8455 /* Make the charge group index */
8458 zone = (p == 0 ? 0 : nzone - 1);
8459 while (zone < nzone)
8461 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8463 cg_gl = index_gl[pos_cg];
8464 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8465 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8466 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8469 /* Update the charge group presence,
8470 * so we can use it in the next pass of the loop.
8472 comm->bLocalCG[cg_gl] = TRUE;
8478 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8481 zone_cg_range[nzone+zone] = pos_cg;
8486 /* This part of the code is never executed with bBondComm. */
8487 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8488 index_gl, recv_i, cg_cm, recv_vr,
8489 cgindex, fr->cginfo_mb, fr->cginfo);
8490 pos_cg += ind->nrecv[nzone];
8492 nat_tot += ind->nrecv[nzone+1];
8496 /* Store the atom block for easy copying of communication buffers */
8497 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8501 dd->index_gl = index_gl;
8502 dd->cgindex = cgindex;
8504 dd->ncg_tot = zone_cg_range[zones->n];
8505 dd->nat_tot = nat_tot;
8506 comm->nat[ddnatHOME] = dd->nat_home;
8507 for (i = ddnatZONE; i < ddnatNR; i++)
8509 comm->nat[i] = dd->nat_tot;
8514 /* We don't need to update cginfo, since that was alrady done above.
8515 * So we pass NULL for the forcerec.
8517 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8518 NULL, comm->bLocalCG);
8523 fprintf(debug, "Finished setting up DD communication, zones:");
8524 for (c = 0; c < zones->n; c++)
8526 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8528 fprintf(debug, "\n");
8532 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8536 for (c = 0; c < zones->nizone; c++)
8538 zones->izone[c].cg1 = zones->cg_range[c+1];
8539 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8540 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8544 static void set_zones_size(gmx_domdec_t *dd,
8545 matrix box, const gmx_ddbox_t *ddbox,
8546 int zone_start, int zone_end)
8548 gmx_domdec_comm_t *comm;
8549 gmx_domdec_zones_t *zones;
8551 int z, zi, zj0, zj1, d, dim;
8554 real size_j, add_tric;
8559 zones = &comm->zones;
8561 /* Do we need to determine extra distances for multi-body bondeds? */
8562 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8564 for (z = zone_start; z < zone_end; z++)
8566 /* Copy cell limits to zone limits.
8567 * Valid for non-DD dims and non-shifted dims.
8569 copy_rvec(comm->cell_x0, zones->size[z].x0);
8570 copy_rvec(comm->cell_x1, zones->size[z].x1);
8573 for (d = 0; d < dd->ndim; d++)
8577 for (z = 0; z < zones->n; z++)
8579 /* With a staggered grid we have different sizes
8580 * for non-shifted dimensions.
8582 if (dd->bGridJump && zones->shift[z][dim] == 0)
8586 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8587 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8591 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8592 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8598 rcmbs = comm->cutoff_mbody;
8599 if (ddbox->tric_dir[dim])
8601 rcs /= ddbox->skew_fac[dim];
8602 rcmbs /= ddbox->skew_fac[dim];
8605 /* Set the lower limit for the shifted zone dimensions */
8606 for (z = zone_start; z < zone_end; z++)
8608 if (zones->shift[z][dim] > 0)
8611 if (!dd->bGridJump || d == 0)
8613 zones->size[z].x0[dim] = comm->cell_x1[dim];
8614 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8618 /* Here we take the lower limit of the zone from
8619 * the lowest domain of the zone below.
8623 zones->size[z].x0[dim] =
8624 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8630 zones->size[z].x0[dim] =
8631 zones->size[zone_perm[2][z-4]].x0[dim];
8635 zones->size[z].x0[dim] =
8636 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8639 /* A temporary limit, is updated below */
8640 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8644 for (zi = 0; zi < zones->nizone; zi++)
8646 if (zones->shift[zi][dim] == 0)
8648 /* This takes the whole zone into account.
8649 * With multiple pulses this will lead
8650 * to a larger zone then strictly necessary.
8652 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8653 zones->size[zi].x1[dim]+rcmbs);
8661 /* Loop over the i-zones to set the upper limit of each
8664 for (zi = 0; zi < zones->nizone; zi++)
8666 if (zones->shift[zi][dim] == 0)
8668 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8670 if (zones->shift[z][dim] > 0)
8672 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8673 zones->size[zi].x1[dim]+rcs);
8680 for (z = zone_start; z < zone_end; z++)
8682 /* Initialization only required to keep the compiler happy */
8683 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8686 /* To determine the bounding box for a zone we need to find
8687 * the extreme corners of 4, 2 or 1 corners.
8689 nc = 1 << (ddbox->npbcdim - 1);
8691 for (c = 0; c < nc; c++)
8693 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8697 corner[YY] = zones->size[z].x0[YY];
8701 corner[YY] = zones->size[z].x1[YY];
8705 corner[ZZ] = zones->size[z].x0[ZZ];
8709 corner[ZZ] = zones->size[z].x1[ZZ];
8711 if (dd->ndim == 1 && box[ZZ][YY] != 0)
8713 /* With 1D domain decomposition the cg's are not in
8714 * the triclinic box, but triclinic x-y and rectangular y-z.
8715 * Shift y back, so it will later end up at 0.
8717 corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
8719 /* Apply the triclinic couplings */
8720 assert(ddbox->npbcdim <= DIM);
8721 for (i = YY; i < ddbox->npbcdim; i++)
8723 for (j = XX; j < i; j++)
8725 corner[j] += corner[i]*box[i][j]/box[i][i];
8730 copy_rvec(corner, corner_min);
8731 copy_rvec(corner, corner_max);
8735 for (i = 0; i < DIM; i++)
8737 corner_min[i] = min(corner_min[i], corner[i]);
8738 corner_max[i] = max(corner_max[i], corner[i]);
8742 /* Copy the extreme cornes without offset along x */
8743 for (i = 0; i < DIM; i++)
8745 zones->size[z].bb_x0[i] = corner_min[i];
8746 zones->size[z].bb_x1[i] = corner_max[i];
8748 /* Add the offset along x */
8749 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8750 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8753 if (zone_start == 0)
8756 for (dim = 0; dim < DIM; dim++)
8758 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8760 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
8765 for (z = zone_start; z < zone_end; z++)
8767 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8769 zones->size[z].x0[XX], zones->size[z].x1[XX],
8770 zones->size[z].x0[YY], zones->size[z].x1[YY],
8771 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8772 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8774 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8775 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8776 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8781 static int comp_cgsort(const void *a, const void *b)
8785 gmx_cgsort_t *cga, *cgb;
8786 cga = (gmx_cgsort_t *)a;
8787 cgb = (gmx_cgsort_t *)b;
8789 comp = cga->nsc - cgb->nsc;
8792 comp = cga->ind_gl - cgb->ind_gl;
8798 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8803 /* Order the data */
8804 for (i = 0; i < n; i++)
8806 buf[i] = a[sort[i].ind];
8809 /* Copy back to the original array */
8810 for (i = 0; i < n; i++)
8816 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8821 /* Order the data */
8822 for (i = 0; i < n; i++)
8824 copy_rvec(v[sort[i].ind], buf[i]);
8827 /* Copy back to the original array */
8828 for (i = 0; i < n; i++)
8830 copy_rvec(buf[i], v[i]);
8834 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8837 int a, atot, cg, cg0, cg1, i;
8839 if (cgindex == NULL)
8841 /* Avoid the useless loop of the atoms within a cg */
8842 order_vec_cg(ncg, sort, v, buf);
8847 /* Order the data */
8849 for (cg = 0; cg < ncg; cg++)
8851 cg0 = cgindex[sort[cg].ind];
8852 cg1 = cgindex[sort[cg].ind+1];
8853 for (i = cg0; i < cg1; i++)
8855 copy_rvec(v[i], buf[a]);
8861 /* Copy back to the original array */
8862 for (a = 0; a < atot; a++)
8864 copy_rvec(buf[a], v[a]);
8868 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8869 int nsort_new, gmx_cgsort_t *sort_new,
8870 gmx_cgsort_t *sort1)
8874 /* The new indices are not very ordered, so we qsort them */
8875 gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8877 /* sort2 is already ordered, so now we can merge the two arrays */
8881 while (i2 < nsort2 || i_new < nsort_new)
8885 sort1[i1++] = sort_new[i_new++];
8887 else if (i_new == nsort_new)
8889 sort1[i1++] = sort2[i2++];
8891 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8892 (sort2[i2].nsc == sort_new[i_new].nsc &&
8893 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8895 sort1[i1++] = sort2[i2++];
8899 sort1[i1++] = sort_new[i_new++];
8904 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8906 gmx_domdec_sort_t *sort;
8907 gmx_cgsort_t *cgsort, *sort_i;
8908 int ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
8909 int sort_last, sort_skip;
8911 sort = dd->comm->sort;
8913 a = fr->ns.grid->cell_index;
8915 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns.grid->ncells;
8917 if (ncg_home_old >= 0)
8919 /* The charge groups that remained in the same ns grid cell
8920 * are completely ordered. So we can sort efficiently by sorting
8921 * the charge groups that did move into the stationary list.
8926 for (i = 0; i < dd->ncg_home; i++)
8928 /* Check if this cg did not move to another node */
8931 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8933 /* This cg is new on this node or moved ns grid cell */
8934 if (nsort_new >= sort->sort_new_nalloc)
8936 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8937 srenew(sort->sort_new, sort->sort_new_nalloc);
8939 sort_i = &(sort->sort_new[nsort_new++]);
8943 /* This cg did not move */
8944 sort_i = &(sort->sort2[nsort2++]);
8946 /* Sort on the ns grid cell indices
8947 * and the global topology index.
8948 * index_gl is irrelevant with cell ns,
8949 * but we set it here anyhow to avoid a conditional.
8952 sort_i->ind_gl = dd->index_gl[i];
8959 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8962 /* Sort efficiently */
8963 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8968 cgsort = sort->sort;
8970 for (i = 0; i < dd->ncg_home; i++)
8972 /* Sort on the ns grid cell indices
8973 * and the global topology index
8975 cgsort[i].nsc = a[i];
8976 cgsort[i].ind_gl = dd->index_gl[i];
8978 if (cgsort[i].nsc < moved)
8985 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
8987 /* Determine the order of the charge groups using qsort */
8988 gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
8994 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
8997 int ncg_new, i, *a, na;
8999 sort = dd->comm->sort->sort;
9001 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
9004 for (i = 0; i < na; i++)
9008 sort[ncg_new].ind = a[i];
9016 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
9019 gmx_domdec_sort_t *sort;
9020 gmx_cgsort_t *cgsort, *sort_i;
9022 int ncg_new, i, *ibuf, cgsize;
9025 sort = dd->comm->sort;
9027 if (dd->ncg_home > sort->sort_nalloc)
9029 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
9030 srenew(sort->sort, sort->sort_nalloc);
9031 srenew(sort->sort2, sort->sort_nalloc);
9033 cgsort = sort->sort;
9035 switch (fr->cutoff_scheme)
9038 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
9041 ncg_new = dd_sort_order_nbnxn(dd, fr);
9044 gmx_incons("unimplemented");
9048 /* We alloc with the old size, since cgindex is still old */
9049 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9050 vbuf = dd->comm->vbuf.v;
9054 cgindex = dd->cgindex;
9061 /* Remove the charge groups which are no longer at home here */
9062 dd->ncg_home = ncg_new;
9065 fprintf(debug, "Set the new home charge group count to %d\n",
9069 /* Reorder the state */
9070 for (i = 0; i < estNR; i++)
9072 if (EST_DISTR(i) && (state->flags & (1<<i)))
9077 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
9080 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
9083 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->sd_X, vbuf);
9086 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
9090 case estDISRE_INITF:
9091 case estDISRE_RM3TAV:
9092 case estORIRE_INITF:
9094 /* No ordering required */
9097 gmx_incons("Unknown state entry encountered in dd_sort_state");
9102 if (fr->cutoff_scheme == ecutsGROUP)
9105 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9108 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9110 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9111 srenew(sort->ibuf, sort->ibuf_nalloc);
9114 /* Reorder the global cg index */
9115 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9116 /* Reorder the cginfo */
9117 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9118 /* Rebuild the local cg index */
9122 for (i = 0; i < dd->ncg_home; i++)
9124 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9125 ibuf[i+1] = ibuf[i] + cgsize;
9127 for (i = 0; i < dd->ncg_home+1; i++)
9129 dd->cgindex[i] = ibuf[i];
9134 for (i = 0; i < dd->ncg_home+1; i++)
9139 /* Set the home atom number */
9140 dd->nat_home = dd->cgindex[dd->ncg_home];
9142 if (fr->cutoff_scheme == ecutsVERLET)
9144 /* The atoms are now exactly in grid order, update the grid order */
9145 nbnxn_set_atomorder(fr->nbv->nbs);
9149 /* Copy the sorted ns cell indices back to the ns grid struct */
9150 for (i = 0; i < dd->ncg_home; i++)
9152 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
9154 fr->ns.grid->nr = dd->ncg_home;
9158 static void add_dd_statistics(gmx_domdec_t *dd)
9160 gmx_domdec_comm_t *comm;
9165 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9167 comm->sum_nat[ddnat-ddnatZONE] +=
9168 comm->nat[ddnat] - comm->nat[ddnat-1];
9173 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9175 gmx_domdec_comm_t *comm;
9180 /* Reset all the statistics and counters for total run counting */
9181 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9183 comm->sum_nat[ddnat-ddnatZONE] = 0;
9187 comm->load_step = 0;
9190 clear_ivec(comm->load_lim);
9195 void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
9197 gmx_domdec_comm_t *comm;
9201 comm = cr->dd->comm;
9203 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9210 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9212 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9214 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9219 " av. #atoms communicated per step for force: %d x %.1f\n",
9223 if (cr->dd->vsite_comm)
9226 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9227 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9232 if (cr->dd->constraint_comm)
9235 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9236 1 + ir->nLincsIter, av);
9240 gmx_incons(" Unknown type for DD statistics");
9243 fprintf(fplog, "\n");
9245 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9247 print_dd_load_av(fplog, cr->dd);
9251 void dd_partition_system(FILE *fplog,
9254 gmx_bool bMasterState,
9256 t_state *state_global,
9257 gmx_mtop_t *top_global,
9259 t_state *state_local,
9262 gmx_localtop_t *top_local,
9265 gmx_shellfc_t shellfc,
9266 gmx_constr_t constr,
9268 gmx_wallcycle_t wcycle,
9272 gmx_domdec_comm_t *comm;
9273 gmx_ddbox_t ddbox = {0};
9275 gmx_int64_t step_pcoupl;
9276 rvec cell_ns_x0, cell_ns_x1;
9277 int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9278 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
9279 gmx_bool bRedist, bSortCG, bResortAll;
9280 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9287 bBoxChanged = (bMasterState || DEFORM(*ir));
9288 if (ir->epc != epcNO)
9290 /* With nstpcouple > 1 pressure coupling happens.
9291 * one step after calculating the pressure.
9292 * Box scaling happens at the end of the MD step,
9293 * after the DD partitioning.
9294 * We therefore have to do DLB in the first partitioning
9295 * after an MD step where P-coupling occured.
9296 * We need to determine the last step in which p-coupling occurred.
9297 * MRS -- need to validate this for vv?
9302 step_pcoupl = step - 1;
9306 step_pcoupl = ((step - 1)/n)*n + 1;
9308 if (step_pcoupl >= comm->partition_step)
9314 bNStGlobalComm = (step % nstglobalcomm == 0);
9316 if (!comm->bDynLoadBal)
9322 /* Should we do dynamic load balacing this step?
9323 * Since it requires (possibly expensive) global communication,
9324 * we might want to do DLB less frequently.
9326 if (bBoxChanged || ir->epc != epcNO)
9328 bDoDLB = bBoxChanged;
9332 bDoDLB = bNStGlobalComm;
9336 /* Check if we have recorded loads on the nodes */
9337 if (comm->bRecordLoad && dd_load_count(comm))
9339 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
9341 /* Check if we should use DLB at the second partitioning
9342 * and every 100 partitionings,
9343 * so the extra communication cost is negligible.
9345 n = max(100, nstglobalcomm);
9346 bCheckDLB = (comm->n_load_collect == 0 ||
9347 comm->n_load_have % n == n-1);
9354 /* Print load every nstlog, first and last step to the log file */
9355 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9356 comm->n_load_collect == 0 ||
9358 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9360 /* Avoid extra communication due to verbose screen output
9361 * when nstglobalcomm is set.
9363 if (bDoDLB || bLogLoad || bCheckDLB ||
9364 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9366 get_load_distribution(dd, wcycle);
9371 dd_print_load(fplog, dd, step-1);
9375 dd_print_load_verbose(dd);
9378 comm->n_load_collect++;
9382 /* Since the timings are node dependent, the master decides */
9386 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
9389 fprintf(debug, "step %s, imb loss %f\n",
9390 gmx_step_str(step, sbuf),
9391 dd_force_imb_perf_loss(dd));
9394 dd_bcast(dd, sizeof(bTurnOnDLB), &bTurnOnDLB);
9397 turn_on_dlb(fplog, cr, step);
9402 comm->n_load_have++;
9405 cgs_gl = &comm->cgs_gl;
9410 /* Clear the old state */
9411 clear_dd_indices(dd, 0, 0);
9414 set_ddbox(dd, bMasterState, cr, ir, state_global->box,
9415 TRUE, cgs_gl, state_global->x, &ddbox);
9417 get_cg_distribution(fplog, step, dd, cgs_gl,
9418 state_global->box, &ddbox, state_global->x);
9420 dd_distribute_state(dd, cgs_gl,
9421 state_global, state_local, f);
9423 dd_make_local_cgs(dd, &top_local->cgs);
9425 /* Ensure that we have space for the new distribution */
9426 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9428 if (fr->cutoff_scheme == ecutsGROUP)
9430 calc_cgcm(fplog, 0, dd->ncg_home,
9431 &top_local->cgs, state_local->x, fr->cg_cm);
9434 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9436 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9438 else if (state_local->ddp_count != dd->ddp_count)
9440 if (state_local->ddp_count > dd->ddp_count)
9442 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9445 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9447 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9450 /* Clear the old state */
9451 clear_dd_indices(dd, 0, 0);
9453 /* Build the new indices */
9454 rebuild_cgindex(dd, cgs_gl->index, state_local);
9455 make_dd_indices(dd, cgs_gl->index, 0);
9456 ncgindex_set = dd->ncg_home;
9458 if (fr->cutoff_scheme == ecutsGROUP)
9460 /* Redetermine the cg COMs */
9461 calc_cgcm(fplog, 0, dd->ncg_home,
9462 &top_local->cgs, state_local->x, fr->cg_cm);
9465 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9467 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9469 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9470 TRUE, &top_local->cgs, state_local->x, &ddbox);
9472 bRedist = comm->bDynLoadBal;
9476 /* We have the full state, only redistribute the cgs */
9478 /* Clear the non-home indices */
9479 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9482 /* Avoid global communication for dim's without pbc and -gcom */
9483 if (!bNStGlobalComm)
9485 copy_rvec(comm->box0, ddbox.box0 );
9486 copy_rvec(comm->box_size, ddbox.box_size);
9488 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9489 bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
9494 /* For dim's without pbc and -gcom */
9495 copy_rvec(ddbox.box0, comm->box0 );
9496 copy_rvec(ddbox.box_size, comm->box_size);
9498 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9501 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9503 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9506 /* Check if we should sort the charge groups */
9507 if (comm->nstSortCG > 0)
9509 bSortCG = (bMasterState ||
9510 (bRedist && (step % comm->nstSortCG == 0)));
9517 ncg_home_old = dd->ncg_home;
9522 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9524 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9526 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9528 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9531 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9533 &comm->cell_x0, &comm->cell_x1,
9534 dd->ncg_home, fr->cg_cm,
9535 cell_ns_x0, cell_ns_x1, &grid_density);
9539 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9542 switch (fr->cutoff_scheme)
9545 copy_ivec(fr->ns.grid->n, ncells_old);
9546 grid_first(fplog, fr->ns.grid, dd, &ddbox,
9547 state_local->box, cell_ns_x0, cell_ns_x1,
9548 fr->rlistlong, grid_density);
9551 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9554 gmx_incons("unimplemented");
9556 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9557 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9561 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9563 /* Sort the state on charge group position.
9564 * This enables exact restarts from this step.
9565 * It also improves performance by about 15% with larger numbers
9566 * of atoms per node.
9569 /* Fill the ns grid with the home cell,
9570 * so we can sort with the indices.
9572 set_zones_ncg_home(dd);
9574 switch (fr->cutoff_scheme)
9577 set_zones_size(dd, state_local->box, &ddbox, 0, 1);
9579 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9581 comm->zones.size[0].bb_x0,
9582 comm->zones.size[0].bb_x1,
9584 comm->zones.dens_zone0,
9587 ncg_moved, bRedist ? comm->moved : NULL,
9588 fr->nbv->grp[eintLocal].kernel_type,
9589 fr->nbv->grp[eintLocal].nbat);
9591 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9594 fill_grid(&comm->zones, fr->ns.grid, dd->ncg_home,
9595 0, dd->ncg_home, fr->cg_cm);
9597 copy_ivec(fr->ns.grid->n, ncells_new);
9600 gmx_incons("unimplemented");
9603 bResortAll = bMasterState;
9605 /* Check if we can user the old order and ns grid cell indices
9606 * of the charge groups to sort the charge groups efficiently.
9608 if (ncells_new[XX] != ncells_old[XX] ||
9609 ncells_new[YY] != ncells_old[YY] ||
9610 ncells_new[ZZ] != ncells_old[ZZ])
9617 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9618 gmx_step_str(step, sbuf), dd->ncg_home);
9620 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9621 bResortAll ? -1 : ncg_home_old);
9622 /* Rebuild all the indices */
9623 ga2la_clear(dd->ga2la);
9626 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9629 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9631 /* Setup up the communication and communicate the coordinates */
9632 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9634 /* Set the indices */
9635 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9637 /* Set the charge group boundaries for neighbor searching */
9638 set_cg_boundaries(&comm->zones);
9640 if (fr->cutoff_scheme == ecutsVERLET)
9642 set_zones_size(dd, state_local->box, &ddbox,
9643 bSortCG ? 1 : 0, comm->zones.n);
9646 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9649 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9650 -1,state_local->x,state_local->box);
9653 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9655 /* Extract a local topology from the global topology */
9656 for (i = 0; i < dd->ndim; i++)
9658 np[dd->dim[i]] = comm->cd[i].np;
9660 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9661 comm->cellsize_min, np,
9663 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
9664 vsite, top_global, top_local);
9666 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9668 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9670 /* Set up the special atom communication */
9671 n = comm->nat[ddnatZONE];
9672 for (i = ddnatZONE+1; i < ddnatNR; i++)
9677 if (vsite && vsite->n_intercg_vsite)
9679 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9683 if (dd->bInterCGcons || dd->bInterCGsettles)
9685 /* Only for inter-cg constraints we need special code */
9686 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9687 constr, ir->nProjOrder,
9688 top_local->idef.il);
9692 gmx_incons("Unknown special atom type setup");
9697 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9699 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9701 /* Make space for the extra coordinates for virtual site
9702 * or constraint communication.
9704 state_local->natoms = comm->nat[ddnatNR-1];
9705 if (state_local->natoms > state_local->nalloc)
9707 dd_realloc_state(state_local, f, state_local->natoms);
9710 if (fr->bF_NoVirSum)
9712 if (vsite && vsite->n_intercg_vsite)
9714 nat_f_novirsum = comm->nat[ddnatVSITE];
9718 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9720 nat_f_novirsum = dd->nat_tot;
9724 nat_f_novirsum = dd->nat_home;
9733 /* Set the number of atoms required for the force calculation.
9734 * Forces need to be constrained when using a twin-range setup
9735 * or with energy minimization. For simple simulations we could
9736 * avoid some allocation, zeroing and copying, but this is
9737 * probably not worth the complications ande checking.
9739 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9740 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9742 /* We make the all mdatoms up to nat_tot_con.
9743 * We could save some work by only setting invmass
9744 * between nat_tot and nat_tot_con.
9746 /* This call also sets the new number of home particles to dd->nat_home */
9747 atoms2md(top_global, ir,
9748 comm->nat[ddnatCON], dd->gatindex, dd->nat_home, mdatoms);
9750 /* Now we have the charges we can sort the FE interactions */
9751 dd_sort_local_top(dd, mdatoms, top_local);
9755 /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
9756 split_vsites_over_threads(top_local->idef.il, mdatoms, FALSE, vsite);
9761 /* Make the local shell stuff, currently no communication is done */
9762 make_local_shells(cr, mdatoms, shellfc);
9765 if (ir->implicit_solvent)
9767 make_local_gb(cr, fr->born, ir->gb_algorithm);
9770 setup_bonded_threading(fr, &top_local->idef);
9772 if (!(cr->duty & DUTY_PME))
9774 /* Send the charges and/or c6/sigmas to our PME only node */
9775 gmx_pme_send_parameters(cr,
9777 mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
9778 mdatoms->chargeA, mdatoms->chargeB,
9779 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
9780 mdatoms->sigmaA, mdatoms->sigmaB,
9781 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9786 set_constraints(constr, top_local, ir, mdatoms, cr);
9789 if (ir->ePull != epullNO)
9791 /* Update the local pull groups */
9792 dd_make_local_pull_groups(dd, ir->pull, mdatoms);
9797 /* Update the local rotation groups */
9798 dd_make_local_rotation_groups(dd, ir->rot);
9801 if (ir->eSwapCoords != eswapNO)
9803 /* Update the local groups needed for ion swapping */
9804 dd_make_local_swap_groups(dd, ir->swap);
9807 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
9808 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
9810 add_dd_statistics(dd);
9812 /* Make sure we only count the cycles for this DD partitioning */
9813 clear_dd_cycle_counts(dd);
9815 /* Because the order of the atoms might have changed since
9816 * the last vsite construction, we need to communicate the constructing
9817 * atom coordinates again (for spreading the forces this MD step).
9819 dd_move_x_vsites(dd, state_local->box, state_local->x);
9821 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9823 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9825 dd_move_x(dd, state_local->box, state_local->x);
9826 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9827 -1, state_local->x, state_local->box);
9830 /* Store the partitioning step */
9831 comm->partition_step = step;
9833 /* Increase the DD partitioning counter */
9835 /* The state currently matches this DD partitioning count, store it */
9836 state_local->ddp_count = dd->ddp_count;
9839 /* The DD master node knows the complete cg distribution,
9840 * store the count so we can possibly skip the cg info communication.
9842 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9845 if (comm->DD_debug > 0)
9847 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9848 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9849 "after partitioning");