2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
47 #include "gromacs/bonded/bonded.h"
48 #include "gromacs/legacyheaders/typedefs.h"
49 #include "gromacs/legacyheaders/network.h"
50 #include "gromacs/math/vec.h"
51 #include "gromacs/legacyheaders/domdec.h"
52 #include "gromacs/legacyheaders/domdec_network.h"
53 #include "gromacs/legacyheaders/nrnb.h"
54 #include "gromacs/legacyheaders/chargegroup.h"
55 #include "gromacs/legacyheaders/constr.h"
56 #include "gromacs/legacyheaders/mdatoms.h"
57 #include "gromacs/legacyheaders/names.h"
58 #include "gromacs/legacyheaders/force.h"
59 #include "gromacs/legacyheaders/pme.h"
60 #include "gromacs/legacyheaders/mdrun.h"
61 #include "gromacs/legacyheaders/nsgrid.h"
62 #include "gromacs/legacyheaders/shellfc.h"
63 #include "gromacs/topology/mtop_util.h"
64 #include "gromacs/legacyheaders/gmx_ga2la.h"
65 #include "gromacs/legacyheaders/macros.h"
66 #include "nbnxn_search.h"
67 #include "gromacs/legacyheaders/bonded-threading.h"
68 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
69 #include "gromacs/legacyheaders/gpu_utils.h"
71 #include "gromacs/utility/futil.h"
72 #include "gromacs/fileio/gmxfio.h"
73 #include "gromacs/fileio/pdbio.h"
74 #include "gromacs/imd/imd.h"
75 #include "gromacs/mdlib/nb_verlet.h"
76 #include "gromacs/pbcutil/ishift.h"
77 #include "gromacs/pbcutil/pbc.h"
78 #include "gromacs/pulling/pull.h"
79 #include "gromacs/pulling/pull_rotation.h"
80 #include "gromacs/swap/swapcoords.h"
81 #include "gromacs/timing/wallcycle.h"
82 #include "gromacs/utility/basenetwork.h"
83 #include "gromacs/utility/fatalerror.h"
84 #include "gromacs/utility/gmxmpi.h"
85 #include "gromacs/utility/qsort_threadsafe.h"
86 #include "gromacs/utility/smalloc.h"
88 #define DDRANK(dd, rank) (rank)
89 #define DDMASTERRANK(dd) (dd->masterrank)
91 typedef struct gmx_domdec_master
93 /* The cell boundaries */
95 /* The global charge group division */
96 int *ncg; /* Number of home charge groups for each node */
97 int *index; /* Index of nnodes+1 into cg */
98 int *cg; /* Global charge group index */
99 int *nat; /* Number of home atoms for each node. */
100 int *ibuf; /* Buffer for communication */
101 rvec *vbuf; /* Buffer for state scattering and gathering */
102 } gmx_domdec_master_t;
106 /* The numbers of charge groups to send and receive for each cell
107 * that requires communication, the last entry contains the total
108 * number of atoms that needs to be communicated.
110 int nsend[DD_MAXIZONE+2];
111 int nrecv[DD_MAXIZONE+2];
112 /* The charge groups to send */
115 /* The atom range for non-in-place communication */
116 int cell2at0[DD_MAXIZONE];
117 int cell2at1[DD_MAXIZONE];
122 int np; /* Number of grid pulses in this dimension */
123 int np_dlb; /* For dlb, for use with edlbAUTO */
124 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
126 gmx_bool bInPlace; /* Can we communicate in place? */
127 } gmx_domdec_comm_dim_t;
131 gmx_bool *bCellMin; /* Temp. var.: is this cell size at the limit */
132 real *cell_f; /* State var.: cell boundaries, box relative */
133 real *old_cell_f; /* Temp. var.: old cell size */
134 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
135 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
136 real *bound_min; /* Temp. var.: lower limit for cell boundary */
137 real *bound_max; /* Temp. var.: upper limit for cell boundary */
138 gmx_bool bLimited; /* State var.: is DLB limited in this dim and row */
139 real *buf_ncd; /* Temp. var. */
142 #define DD_NLOAD_MAX 9
144 /* Here floats are accurate enough, since these variables
145 * only influence the load balancing, not the actual MD results.
172 gmx_cgsort_t *sort_new;
184 /* This enum determines the order of the coordinates.
185 * ddnatHOME and ddnatZONE should be first and second,
186 * the others can be ordered as wanted.
189 ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR
193 edlbAUTO, edlbNO, edlbYES, edlbNR
195 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
199 int dim; /* The dimension */
200 gmx_bool dim_match; /* Tells if DD and PME dims match */
201 int nslab; /* The number of PME slabs in this dimension */
202 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
203 int *pp_min; /* The minimum pp node location, size nslab */
204 int *pp_max; /* The maximum pp node location,size nslab */
205 int maxshift; /* The maximum shift for coordinate redistribution in PME */
210 real min0; /* The minimum bottom of this zone */
211 real max1; /* The maximum top of this zone */
212 real min1; /* The minimum top of this zone */
213 real mch0; /* The maximum bottom communicaton height for this zone */
214 real mch1; /* The maximum top communicaton height for this zone */
215 real p1_0; /* The bottom value of the first cell in this zone */
216 real p1_1; /* The top value of the first cell in this zone */
221 gmx_domdec_ind_t ind;
228 } dd_comm_setup_work_t;
230 typedef struct gmx_domdec_comm
232 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
233 * unless stated otherwise.
236 /* The number of decomposition dimensions for PME, 0: no PME */
238 /* The number of nodes doing PME (PP/PME or only PME) */
242 /* The communication setup including the PME only nodes */
243 gmx_bool bCartesianPP_PME;
246 int *pmenodes; /* size npmenodes */
247 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
248 * but with bCartesianPP_PME */
249 gmx_ddpme_t ddpme[2];
251 /* The DD particle-particle nodes only */
252 gmx_bool bCartesianPP;
253 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
255 /* The global charge groups */
258 /* Should we sort the cgs */
260 gmx_domdec_sort_t *sort;
262 /* Are there charge groups? */
265 /* Are there bonded and multi-body interactions between charge groups? */
266 gmx_bool bInterCGBondeds;
267 gmx_bool bInterCGMultiBody;
269 /* Data for the optional bonded interaction atom communication range */
276 /* Are we actually using DLB? */
277 gmx_bool bDynLoadBal;
279 /* Cell sizes for static load balancing, first index cartesian */
282 /* The width of the communicated boundaries */
285 /* The minimum cell size (including triclinic correction) */
287 /* For dlb, for use with edlbAUTO */
288 rvec cellsize_min_dlb;
289 /* The lower limit for the DD cell size with DLB */
291 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
292 gmx_bool bVacDLBNoLimit;
294 /* With PME load balancing we set limits on DLB */
295 gmx_bool bPMELoadBalDLBLimits;
296 /* DLB needs to take into account that we want to allow this maximum
297 * cut-off (for PME load balancing), this could limit cell boundaries.
299 real PMELoadBal_max_cutoff;
301 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
303 /* box0 and box_size are required with dim's without pbc and -gcom */
307 /* The cell boundaries */
311 /* The old location of the cell boundaries, to check cg displacements */
315 /* The communication setup and charge group boundaries for the zones */
316 gmx_domdec_zones_t zones;
318 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
319 * cell boundaries of neighboring cells for dynamic load balancing.
321 gmx_ddzone_t zone_d1[2];
322 gmx_ddzone_t zone_d2[2][2];
324 /* The coordinate/force communication setup and indices */
325 gmx_domdec_comm_dim_t cd[DIM];
326 /* The maximum number of cells to communicate with in one dimension */
329 /* Which cg distribution is stored on the master node */
330 int master_cg_ddp_count;
332 /* The number of cg's received from the direct neighbors */
333 int zone_ncg1[DD_MAXZONE];
335 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
338 /* Array for signalling if atoms have moved to another domain */
342 /* Communication buffer for general use */
346 /* Communication buffer for general use */
349 /* Temporary storage for thread parallel communication setup */
351 dd_comm_setup_work_t *dth;
353 /* Communication buffers only used with multiple grid pulses */
358 /* Communication buffers for local redistribution */
360 int cggl_flag_nalloc[DIM*2];
362 int cgcm_state_nalloc[DIM*2];
364 /* Cell sizes for dynamic load balancing */
365 gmx_domdec_root_t **root;
369 real cell_f_max0[DIM];
370 real cell_f_min1[DIM];
372 /* Stuff for load communication */
373 gmx_bool bRecordLoad;
374 gmx_domdec_load_t *load;
375 int nrank_gpu_shared;
377 MPI_Comm *mpi_comm_load;
378 MPI_Comm mpi_comm_gpu_shared;
381 /* Maximum DLB scaling per load balancing step in percent */
385 float cycl[ddCyclNr];
386 int cycl_n[ddCyclNr];
387 float cycl_max[ddCyclNr];
388 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
392 /* Have often have did we have load measurements */
394 /* Have often have we collected the load measurements */
398 double sum_nat[ddnatNR-ddnatZONE];
408 /* The last partition step */
409 gmx_int64_t partition_step;
417 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
420 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
421 #define DD_FLAG_NRCG 65535
422 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
423 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
425 /* Zone permutation required to obtain consecutive charge groups
426 * for neighbor searching.
428 static const int zone_perm[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
430 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
431 * components see only j zones with that component 0.
434 /* The DD zone order */
435 static const ivec dd_zo[DD_MAXZONE] =
436 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
441 static const ivec dd_zp3[dd_zp3n] = {{0, 0, 8}, {1, 3, 6}, {2, 5, 6}, {3, 5, 7}};
446 static const ivec dd_zp2[dd_zp2n] = {{0, 0, 4}, {1, 3, 4}};
451 static const ivec dd_zp1[dd_zp1n] = {{0, 0, 2}};
453 /* Factors used to avoid problems due to rounding issues */
454 #define DD_CELL_MARGIN 1.0001
455 #define DD_CELL_MARGIN2 1.00005
456 /* Factor to account for pressure scaling during nstlist steps */
457 #define DD_PRES_SCALE_MARGIN 1.02
459 /* Turn on DLB when the load imbalance causes this amount of total loss.
460 * There is a bit of overhead with DLB and it's difficult to achieve
461 * a load imbalance of less than 2% with DLB.
463 #define DD_PERF_LOSS_DLB_ON 0.02
465 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
466 #define DD_PERF_LOSS_WARN 0.05
468 #define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
470 /* Use separate MPI send and receive commands
471 * when nnodes <= GMX_DD_NNODES_SENDRECV.
472 * This saves memory (and some copying for small nnodes).
473 * For high parallelization scatter and gather calls are used.
475 #define GMX_DD_NNODES_SENDRECV 4
479 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
481 static void index2xyz(ivec nc,int ind,ivec xyz)
483 xyz[XX] = ind % nc[XX];
484 xyz[YY] = (ind / nc[XX]) % nc[YY];
485 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
489 /* This order is required to minimize the coordinate communication in PME
490 * which uses decomposition in the x direction.
492 #define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
494 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
496 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
497 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
498 xyz[ZZ] = ind % nc[ZZ];
501 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
506 ddindex = dd_index(dd->nc, c);
507 if (dd->comm->bCartesianPP_PME)
509 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
511 else if (dd->comm->bCartesianPP)
514 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
525 static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox, t_inputrec *ir)
527 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
530 int ddglatnr(gmx_domdec_t *dd, int i)
540 if (i >= dd->comm->nat[ddnatNR-1])
542 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
544 atnr = dd->gatindex[i] + 1;
550 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
552 return &dd->comm->cgs_gl;
555 static void vec_rvec_init(vec_rvec_t *v)
561 static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
565 v->nalloc = over_alloc_dd(n);
566 srenew(v->v, v->nalloc);
570 void dd_store_state(gmx_domdec_t *dd, t_state *state)
574 if (state->ddp_count != dd->ddp_count)
576 gmx_incons("The state does not the domain decomposition state");
579 state->ncg_gl = dd->ncg_home;
580 if (state->ncg_gl > state->cg_gl_nalloc)
582 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
583 srenew(state->cg_gl, state->cg_gl_nalloc);
585 for (i = 0; i < state->ncg_gl; i++)
587 state->cg_gl[i] = dd->index_gl[i];
590 state->ddp_count_cg_gl = dd->ddp_count;
593 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
595 return &dd->comm->zones;
598 void dd_get_ns_ranges(gmx_domdec_t *dd, int icg,
599 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
601 gmx_domdec_zones_t *zones;
604 zones = &dd->comm->zones;
607 while (icg >= zones->izone[izone].cg1)
616 else if (izone < zones->nizone)
618 *jcg0 = zones->izone[izone].jcg0;
622 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
623 icg, izone, zones->nizone);
626 *jcg1 = zones->izone[izone].jcg1;
628 for (d = 0; d < dd->ndim; d++)
631 shift0[dim] = zones->izone[izone].shift0[dim];
632 shift1[dim] = zones->izone[izone].shift1[dim];
633 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
635 /* A conservative approach, this can be optimized */
642 int dd_natoms_vsite(gmx_domdec_t *dd)
644 return dd->comm->nat[ddnatVSITE];
647 void dd_get_constraint_range(gmx_domdec_t *dd, int *at_start, int *at_end)
649 *at_start = dd->comm->nat[ddnatCON-1];
650 *at_end = dd->comm->nat[ddnatCON];
653 void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
655 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
656 int *index, *cgindex;
657 gmx_domdec_comm_t *comm;
658 gmx_domdec_comm_dim_t *cd;
659 gmx_domdec_ind_t *ind;
660 rvec shift = {0, 0, 0}, *buf, *rbuf;
661 gmx_bool bPBC, bScrew;
665 cgindex = dd->cgindex;
670 nat_tot = dd->nat_home;
671 for (d = 0; d < dd->ndim; d++)
673 bPBC = (dd->ci[dd->dim[d]] == 0);
674 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
677 copy_rvec(box[dd->dim[d]], shift);
680 for (p = 0; p < cd->np; p++)
687 for (i = 0; i < ind->nsend[nzone]; i++)
689 at0 = cgindex[index[i]];
690 at1 = cgindex[index[i]+1];
691 for (j = at0; j < at1; j++)
693 copy_rvec(x[j], buf[n]);
700 for (i = 0; i < ind->nsend[nzone]; i++)
702 at0 = cgindex[index[i]];
703 at1 = cgindex[index[i]+1];
704 for (j = at0; j < at1; j++)
706 /* We need to shift the coordinates */
707 rvec_add(x[j], shift, buf[n]);
714 for (i = 0; i < ind->nsend[nzone]; i++)
716 at0 = cgindex[index[i]];
717 at1 = cgindex[index[i]+1];
718 for (j = at0; j < at1; j++)
721 buf[n][XX] = x[j][XX] + shift[XX];
723 * This operation requires a special shift force
724 * treatment, which is performed in calc_vir.
726 buf[n][YY] = box[YY][YY] - x[j][YY];
727 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
739 rbuf = comm->vbuf2.v;
741 /* Send and receive the coordinates */
742 dd_sendrecv_rvec(dd, d, dddirBackward,
743 buf, ind->nsend[nzone+1],
744 rbuf, ind->nrecv[nzone+1]);
748 for (zone = 0; zone < nzone; zone++)
750 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
752 copy_rvec(rbuf[j], x[i]);
757 nat_tot += ind->nrecv[nzone+1];
763 void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
765 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
766 int *index, *cgindex;
767 gmx_domdec_comm_t *comm;
768 gmx_domdec_comm_dim_t *cd;
769 gmx_domdec_ind_t *ind;
773 gmx_bool bPBC, bScrew;
777 cgindex = dd->cgindex;
782 nzone = comm->zones.n/2;
783 nat_tot = dd->nat_tot;
784 for (d = dd->ndim-1; d >= 0; d--)
786 bPBC = (dd->ci[dd->dim[d]] == 0);
787 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
788 if (fshift == NULL && !bScrew)
792 /* Determine which shift vector we need */
798 for (p = cd->np-1; p >= 0; p--)
801 nat_tot -= ind->nrecv[nzone+1];
808 sbuf = comm->vbuf2.v;
810 for (zone = 0; zone < nzone; zone++)
812 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
814 copy_rvec(f[i], sbuf[j]);
819 /* Communicate the forces */
820 dd_sendrecv_rvec(dd, d, dddirForward,
821 sbuf, ind->nrecv[nzone+1],
822 buf, ind->nsend[nzone+1]);
824 /* Add the received forces */
828 for (i = 0; i < ind->nsend[nzone]; i++)
830 at0 = cgindex[index[i]];
831 at1 = cgindex[index[i]+1];
832 for (j = at0; j < at1; j++)
834 rvec_inc(f[j], buf[n]);
841 for (i = 0; i < ind->nsend[nzone]; i++)
843 at0 = cgindex[index[i]];
844 at1 = cgindex[index[i]+1];
845 for (j = at0; j < at1; j++)
847 rvec_inc(f[j], buf[n]);
848 /* Add this force to the shift force */
849 rvec_inc(fshift[is], buf[n]);
856 for (i = 0; i < ind->nsend[nzone]; i++)
858 at0 = cgindex[index[i]];
859 at1 = cgindex[index[i]+1];
860 for (j = at0; j < at1; j++)
862 /* Rotate the force */
863 f[j][XX] += buf[n][XX];
864 f[j][YY] -= buf[n][YY];
865 f[j][ZZ] -= buf[n][ZZ];
868 /* Add this force to the shift force */
869 rvec_inc(fshift[is], buf[n]);
880 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
882 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
883 int *index, *cgindex;
884 gmx_domdec_comm_t *comm;
885 gmx_domdec_comm_dim_t *cd;
886 gmx_domdec_ind_t *ind;
891 cgindex = dd->cgindex;
893 buf = &comm->vbuf.v[0][0];
896 nat_tot = dd->nat_home;
897 for (d = 0; d < dd->ndim; d++)
900 for (p = 0; p < cd->np; p++)
905 for (i = 0; i < ind->nsend[nzone]; i++)
907 at0 = cgindex[index[i]];
908 at1 = cgindex[index[i]+1];
909 for (j = at0; j < at1; j++)
922 rbuf = &comm->vbuf2.v[0][0];
924 /* Send and receive the coordinates */
925 dd_sendrecv_real(dd, d, dddirBackward,
926 buf, ind->nsend[nzone+1],
927 rbuf, ind->nrecv[nzone+1]);
931 for (zone = 0; zone < nzone; zone++)
933 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
940 nat_tot += ind->nrecv[nzone+1];
946 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
948 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
949 int *index, *cgindex;
950 gmx_domdec_comm_t *comm;
951 gmx_domdec_comm_dim_t *cd;
952 gmx_domdec_ind_t *ind;
957 cgindex = dd->cgindex;
959 buf = &comm->vbuf.v[0][0];
962 nzone = comm->zones.n/2;
963 nat_tot = dd->nat_tot;
964 for (d = dd->ndim-1; d >= 0; d--)
967 for (p = cd->np-1; p >= 0; p--)
970 nat_tot -= ind->nrecv[nzone+1];
977 sbuf = &comm->vbuf2.v[0][0];
979 for (zone = 0; zone < nzone; zone++)
981 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
988 /* Communicate the forces */
989 dd_sendrecv_real(dd, d, dddirForward,
990 sbuf, ind->nrecv[nzone+1],
991 buf, ind->nsend[nzone+1]);
993 /* Add the received forces */
995 for (i = 0; i < ind->nsend[nzone]; i++)
997 at0 = cgindex[index[i]];
998 at1 = cgindex[index[i]+1];
999 for (j = at0; j < at1; j++)
1010 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
1012 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
1014 zone->min0, zone->max1,
1015 zone->mch0, zone->mch0,
1016 zone->p1_0, zone->p1_1);
1020 #define DDZONECOMM_MAXZONE 5
1021 #define DDZONECOMM_BUFSIZE 3
1023 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
1024 int ddimind, int direction,
1025 gmx_ddzone_t *buf_s, int n_s,
1026 gmx_ddzone_t *buf_r, int n_r)
1028 #define ZBS DDZONECOMM_BUFSIZE
1029 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
1030 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
1033 for (i = 0; i < n_s; i++)
1035 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
1036 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
1037 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
1038 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
1039 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
1040 vbuf_s[i*ZBS+1][2] = 0;
1041 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
1042 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
1043 vbuf_s[i*ZBS+2][2] = 0;
1046 dd_sendrecv_rvec(dd, ddimind, direction,
1050 for (i = 0; i < n_r; i++)
1052 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
1053 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
1054 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
1055 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
1056 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
1057 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
1058 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
1064 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
1065 rvec cell_ns_x0, rvec cell_ns_x1)
1067 int d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
1069 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
1070 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
1071 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
1072 rvec extr_s[2], extr_r[2];
1074 real dist_d, c = 0, det;
1075 gmx_domdec_comm_t *comm;
1076 gmx_bool bPBC, bUse;
1080 for (d = 1; d < dd->ndim; d++)
1083 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
1084 zp->min0 = cell_ns_x0[dim];
1085 zp->max1 = cell_ns_x1[dim];
1086 zp->min1 = cell_ns_x1[dim];
1087 zp->mch0 = cell_ns_x0[dim];
1088 zp->mch1 = cell_ns_x1[dim];
1089 zp->p1_0 = cell_ns_x0[dim];
1090 zp->p1_1 = cell_ns_x1[dim];
1093 for (d = dd->ndim-2; d >= 0; d--)
1096 bPBC = (dim < ddbox->npbcdim);
1098 /* Use an rvec to store two reals */
1099 extr_s[d][0] = comm->cell_f0[d+1];
1100 extr_s[d][1] = comm->cell_f1[d+1];
1101 extr_s[d][2] = comm->cell_f1[d+1];
1104 /* Store the extremes in the backward sending buffer,
1105 * so the get updated separately from the forward communication.
1107 for (d1 = d; d1 < dd->ndim-1; d1++)
1109 /* We invert the order to be able to use the same loop for buf_e */
1110 buf_s[pos].min0 = extr_s[d1][1];
1111 buf_s[pos].max1 = extr_s[d1][0];
1112 buf_s[pos].min1 = extr_s[d1][2];
1113 buf_s[pos].mch0 = 0;
1114 buf_s[pos].mch1 = 0;
1115 /* Store the cell corner of the dimension we communicate along */
1116 buf_s[pos].p1_0 = comm->cell_x0[dim];
1117 buf_s[pos].p1_1 = 0;
1121 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1124 if (dd->ndim == 3 && d == 0)
1126 buf_s[pos] = comm->zone_d2[0][1];
1128 buf_s[pos] = comm->zone_d1[0];
1132 /* We only need to communicate the extremes
1133 * in the forward direction
1135 npulse = comm->cd[d].np;
1138 /* Take the minimum to avoid double communication */
1139 npulse_min = min(npulse, dd->nc[dim]-1-npulse);
1143 /* Without PBC we should really not communicate over
1144 * the boundaries, but implementing that complicates
1145 * the communication setup and therefore we simply
1146 * do all communication, but ignore some data.
1148 npulse_min = npulse;
1150 for (p = 0; p < npulse_min; p++)
1152 /* Communicate the extremes forward */
1153 bUse = (bPBC || dd->ci[dim] > 0);
1155 dd_sendrecv_rvec(dd, d, dddirForward,
1156 extr_s+d, dd->ndim-d-1,
1157 extr_r+d, dd->ndim-d-1);
1161 for (d1 = d; d1 < dd->ndim-1; d1++)
1163 extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]);
1164 extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]);
1165 extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]);
1171 for (p = 0; p < npulse; p++)
1173 /* Communicate all the zone information backward */
1174 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1176 dd_sendrecv_ddzone(dd, d, dddirBackward,
1183 for (d1 = d+1; d1 < dd->ndim; d1++)
1185 /* Determine the decrease of maximum required
1186 * communication height along d1 due to the distance along d,
1187 * this avoids a lot of useless atom communication.
1189 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1191 if (ddbox->tric_dir[dim])
1193 /* c is the off-diagonal coupling between the cell planes
1194 * along directions d and d1.
1196 c = ddbox->v[dim][dd->dim[d1]][dim];
1202 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1205 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1209 /* A negative value signals out of range */
1215 /* Accumulate the extremes over all pulses */
1216 for (i = 0; i < buf_size; i++)
1220 buf_e[i] = buf_r[i];
1226 buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0);
1227 buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1);
1228 buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1);
1231 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1239 if (bUse && dh[d1] >= 0)
1241 buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
1242 buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
1245 /* Copy the received buffer to the send buffer,
1246 * to pass the data through with the next pulse.
1248 buf_s[i] = buf_r[i];
1250 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1251 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1253 /* Store the extremes */
1256 for (d1 = d; d1 < dd->ndim-1; d1++)
1258 extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0);
1259 extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1);
1260 extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1);
1264 if (d == 1 || (d == 0 && dd->ndim == 3))
1266 for (i = d; i < 2; i++)
1268 comm->zone_d2[1-d][i] = buf_e[pos];
1274 comm->zone_d1[1] = buf_e[pos];
1284 for (i = 0; i < 2; i++)
1288 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1290 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1291 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1297 for (i = 0; i < 2; i++)
1299 for (j = 0; j < 2; j++)
1303 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1305 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1306 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1310 for (d = 1; d < dd->ndim; d++)
1312 comm->cell_f_max0[d] = extr_s[d-1][0];
1313 comm->cell_f_min1[d] = extr_s[d-1][1];
1316 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1317 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1322 static void dd_collect_cg(gmx_domdec_t *dd,
1323 t_state *state_local)
1325 gmx_domdec_master_t *ma = NULL;
1326 int buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
1329 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1331 /* The master has the correct distribution */
1335 if (state_local->ddp_count == dd->ddp_count)
1337 ncg_home = dd->ncg_home;
1339 nat_home = dd->nat_home;
1341 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1343 cgs_gl = &dd->comm->cgs_gl;
1345 ncg_home = state_local->ncg_gl;
1346 cg = state_local->cg_gl;
1348 for (i = 0; i < ncg_home; i++)
1350 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1355 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1358 buf2[0] = dd->ncg_home;
1359 buf2[1] = dd->nat_home;
1369 /* Collect the charge group and atom counts on the master */
1370 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1375 for (i = 0; i < dd->nnodes; i++)
1377 ma->ncg[i] = ma->ibuf[2*i];
1378 ma->nat[i] = ma->ibuf[2*i+1];
1379 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1382 /* Make byte counts and indices */
1383 for (i = 0; i < dd->nnodes; i++)
1385 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1386 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1390 fprintf(debug, "Initial charge group distribution: ");
1391 for (i = 0; i < dd->nnodes; i++)
1393 fprintf(debug, " %d", ma->ncg[i]);
1395 fprintf(debug, "\n");
1399 /* Collect the charge group indices on the master */
1401 dd->ncg_home*sizeof(int), dd->index_gl,
1402 DDMASTER(dd) ? ma->ibuf : NULL,
1403 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1404 DDMASTER(dd) ? ma->cg : NULL);
1406 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1409 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1412 gmx_domdec_master_t *ma;
1413 int n, i, c, a, nalloc = 0;
1422 MPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1423 dd->rank, dd->mpi_comm_all);
1428 /* Copy the master coordinates to the global array */
1429 cgs_gl = &dd->comm->cgs_gl;
1431 n = DDMASTERRANK(dd);
1433 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1435 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1437 copy_rvec(lv[a++], v[c]);
1441 for (n = 0; n < dd->nnodes; n++)
1445 if (ma->nat[n] > nalloc)
1447 nalloc = over_alloc_dd(ma->nat[n]);
1448 srenew(buf, nalloc);
1451 MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
1452 n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1455 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1457 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1459 copy_rvec(buf[a++], v[c]);
1468 static void get_commbuffer_counts(gmx_domdec_t *dd,
1469 int **counts, int **disps)
1471 gmx_domdec_master_t *ma;
1476 /* Make the rvec count and displacment arrays */
1478 *disps = ma->ibuf + dd->nnodes;
1479 for (n = 0; n < dd->nnodes; n++)
1481 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1482 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1486 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1489 gmx_domdec_master_t *ma;
1490 int *rcounts = NULL, *disps = NULL;
1499 get_commbuffer_counts(dd, &rcounts, &disps);
1504 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv, rcounts, disps, buf);
1508 cgs_gl = &dd->comm->cgs_gl;
1511 for (n = 0; n < dd->nnodes; n++)
1513 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1515 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1517 copy_rvec(buf[a++], v[c]);
1524 void dd_collect_vec(gmx_domdec_t *dd,
1525 t_state *state_local, rvec *lv, rvec *v)
1527 gmx_domdec_master_t *ma;
1528 int n, i, c, a, nalloc = 0;
1531 dd_collect_cg(dd, state_local);
1533 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1535 dd_collect_vec_sendrecv(dd, lv, v);
1539 dd_collect_vec_gatherv(dd, lv, v);
1544 void dd_collect_state(gmx_domdec_t *dd,
1545 t_state *state_local, t_state *state)
1549 nh = state->nhchainlength;
1553 for (i = 0; i < efptNR; i++)
1555 state->lambda[i] = state_local->lambda[i];
1557 state->fep_state = state_local->fep_state;
1558 state->veta = state_local->veta;
1559 state->vol0 = state_local->vol0;
1560 copy_mat(state_local->box, state->box);
1561 copy_mat(state_local->boxv, state->boxv);
1562 copy_mat(state_local->svir_prev, state->svir_prev);
1563 copy_mat(state_local->fvir_prev, state->fvir_prev);
1564 copy_mat(state_local->pres_prev, state->pres_prev);
1566 for (i = 0; i < state_local->ngtc; i++)
1568 for (j = 0; j < nh; j++)
1570 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1571 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1573 state->therm_integral[i] = state_local->therm_integral[i];
1575 for (i = 0; i < state_local->nnhpres; i++)
1577 for (j = 0; j < nh; j++)
1579 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1580 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1584 for (est = 0; est < estNR; est++)
1586 if (EST_DISTR(est) && (state_local->flags & (1<<est)))
1591 dd_collect_vec(dd, state_local, state_local->x, state->x);
1594 dd_collect_vec(dd, state_local, state_local->v, state->v);
1597 dd_collect_vec(dd, state_local, state_local->sd_X, state->sd_X);
1600 dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
1602 case estDISRE_INITF:
1603 case estDISRE_RM3TAV:
1604 case estORIRE_INITF:
1608 gmx_incons("Unknown state entry encountered in dd_collect_state");
1614 static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
1620 fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
1623 state->nalloc = over_alloc_dd(nalloc);
1625 for (est = 0; est < estNR; est++)
1627 if (EST_DISTR(est) && (state->flags & (1<<est)))
1632 srenew(state->x, state->nalloc);
1635 srenew(state->v, state->nalloc);
1638 srenew(state->sd_X, state->nalloc);
1641 srenew(state->cg_p, state->nalloc);
1643 case estDISRE_INITF:
1644 case estDISRE_RM3TAV:
1645 case estORIRE_INITF:
1647 /* No reallocation required */
1650 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1657 srenew(*f, state->nalloc);
1661 static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
1664 if (nalloc > fr->cg_nalloc)
1668 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
1670 fr->cg_nalloc = over_alloc_dd(nalloc);
1671 srenew(fr->cginfo, fr->cg_nalloc);
1672 if (fr->cutoff_scheme == ecutsGROUP)
1674 srenew(fr->cg_cm, fr->cg_nalloc);
1677 if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
1679 /* We don't use charge groups, we use x in state to set up
1680 * the atom communication.
1682 dd_realloc_state(state, f, nalloc);
1686 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1689 gmx_domdec_master_t *ma;
1690 int n, i, c, a, nalloc = 0;
1697 for (n = 0; n < dd->nnodes; n++)
1701 if (ma->nat[n] > nalloc)
1703 nalloc = over_alloc_dd(ma->nat[n]);
1704 srenew(buf, nalloc);
1706 /* Use lv as a temporary buffer */
1708 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1710 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1712 copy_rvec(v[c], buf[a++]);
1715 if (a != ma->nat[n])
1717 gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
1722 MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
1723 DDRANK(dd, n), n, dd->mpi_comm_all);
1728 n = DDMASTERRANK(dd);
1730 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1732 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1734 copy_rvec(v[c], lv[a++]);
1741 MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
1742 MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
1747 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1750 gmx_domdec_master_t *ma;
1751 int *scounts = NULL, *disps = NULL;
1752 int n, i, c, a, nalloc = 0;
1759 get_commbuffer_counts(dd, &scounts, &disps);
1763 for (n = 0; n < dd->nnodes; n++)
1765 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1767 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1769 copy_rvec(v[c], buf[a++]);
1775 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1778 static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
1780 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1782 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1786 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1790 static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1793 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1794 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1795 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1797 if (dfhist->nlambda > 0)
1799 int nlam = dfhist->nlambda;
1800 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1801 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1802 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1803 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1804 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1805 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1807 for (i = 0; i < nlam; i++)
1809 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1810 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1811 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1812 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1813 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1814 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1819 static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1820 t_state *state, t_state *state_local,
1825 nh = state->nhchainlength;
1829 for (i = 0; i < efptNR; i++)
1831 state_local->lambda[i] = state->lambda[i];
1833 state_local->fep_state = state->fep_state;
1834 state_local->veta = state->veta;
1835 state_local->vol0 = state->vol0;
1836 copy_mat(state->box, state_local->box);
1837 copy_mat(state->box_rel, state_local->box_rel);
1838 copy_mat(state->boxv, state_local->boxv);
1839 copy_mat(state->svir_prev, state_local->svir_prev);
1840 copy_mat(state->fvir_prev, state_local->fvir_prev);
1841 copy_df_history(&state_local->dfhist, &state->dfhist);
1842 for (i = 0; i < state_local->ngtc; i++)
1844 for (j = 0; j < nh; j++)
1846 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1847 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1849 state_local->therm_integral[i] = state->therm_integral[i];
1851 for (i = 0; i < state_local->nnhpres; i++)
1853 for (j = 0; j < nh; j++)
1855 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1856 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1860 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
1861 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1862 dd_bcast(dd, sizeof(real), &state_local->veta);
1863 dd_bcast(dd, sizeof(real), &state_local->vol0);
1864 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1865 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1866 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1867 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1868 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1869 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
1870 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
1871 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
1872 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
1873 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
1875 /* communicate df_history -- required for restarting from checkpoint */
1876 dd_distribute_dfhist(dd, &state_local->dfhist);
1878 if (dd->nat_home > state_local->nalloc)
1880 dd_realloc_state(state_local, f, dd->nat_home);
1882 for (i = 0; i < estNR; i++)
1884 if (EST_DISTR(i) && (state_local->flags & (1<<i)))
1889 dd_distribute_vec(dd, cgs, state->x, state_local->x);
1892 dd_distribute_vec(dd, cgs, state->v, state_local->v);
1895 dd_distribute_vec(dd, cgs, state->sd_X, state_local->sd_X);
1898 dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
1900 case estDISRE_INITF:
1901 case estDISRE_RM3TAV:
1902 case estORIRE_INITF:
1904 /* Not implemented yet */
1907 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1913 static char dim2char(int dim)
1919 case XX: c = 'X'; break;
1920 case YY: c = 'Y'; break;
1921 case ZZ: c = 'Z'; break;
1922 default: gmx_fatal(FARGS, "Unknown dim %d", dim);
1928 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1929 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1931 rvec grid_s[2], *grid_r = NULL, cx, r;
1932 char fname[STRLEN], buf[22];
1934 int a, i, d, z, y, x;
1938 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1939 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1943 snew(grid_r, 2*dd->nnodes);
1946 dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : NULL);
1950 for (d = 0; d < DIM; d++)
1952 for (i = 0; i < DIM; i++)
1960 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1962 tric[d][i] = box[i][d]/box[i][i];
1971 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1972 out = gmx_fio_fopen(fname, "w");
1973 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1975 for (i = 0; i < dd->nnodes; i++)
1977 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1978 for (d = 0; d < DIM; d++)
1980 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1982 for (z = 0; z < 2; z++)
1984 for (y = 0; y < 2; y++)
1986 for (x = 0; x < 2; x++)
1988 cx[XX] = grid_r[i*2+x][XX];
1989 cx[YY] = grid_r[i*2+y][YY];
1990 cx[ZZ] = grid_r[i*2+z][ZZ];
1992 gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
1993 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
1997 for (d = 0; d < DIM; d++)
1999 for (x = 0; x < 4; x++)
2003 case 0: y = 1 + i*8 + 2*x; break;
2004 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
2005 case 2: y = 1 + i*8 + x; break;
2007 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
2011 gmx_fio_fclose(out);
2016 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
2017 gmx_mtop_t *mtop, t_commrec *cr,
2018 int natoms, rvec x[], matrix box)
2020 char fname[STRLEN], buf[22];
2022 int i, ii, resnr, c;
2023 char *atomname, *resname;
2030 natoms = dd->comm->nat[ddnatVSITE];
2033 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
2035 out = gmx_fio_fopen(fname, "w");
2037 fprintf(out, "TITLE %s\n", title);
2038 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2039 for (i = 0; i < natoms; i++)
2041 ii = dd->gatindex[i];
2042 gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
2043 if (i < dd->comm->nat[ddnatZONE])
2046 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
2052 else if (i < dd->comm->nat[ddnatVSITE])
2054 b = dd->comm->zones.n;
2058 b = dd->comm->zones.n + 1;
2060 gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
2061 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
2063 fprintf(out, "TER\n");
2065 gmx_fio_fclose(out);
2068 real dd_cutoff_mbody(gmx_domdec_t *dd)
2070 gmx_domdec_comm_t *comm;
2077 if (comm->bInterCGBondeds)
2079 if (comm->cutoff_mbody > 0)
2081 r = comm->cutoff_mbody;
2085 /* cutoff_mbody=0 means we do not have DLB */
2086 r = comm->cellsize_min[dd->dim[0]];
2087 for (di = 1; di < dd->ndim; di++)
2089 r = min(r, comm->cellsize_min[dd->dim[di]]);
2091 if (comm->bBondComm)
2093 r = max(r, comm->cutoff_mbody);
2097 r = min(r, comm->cutoff);
2105 real dd_cutoff_twobody(gmx_domdec_t *dd)
2109 r_mb = dd_cutoff_mbody(dd);
2111 return max(dd->comm->cutoff, r_mb);
2115 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd, ivec coord, ivec coord_pme)
2119 nc = dd->nc[dd->comm->cartpmedim];
2120 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2121 copy_ivec(coord, coord_pme);
2122 coord_pme[dd->comm->cartpmedim] =
2123 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2126 static int low_ddindex2pmeindex(int ndd, int npme, int ddindex)
2128 /* Here we assign a PME node to communicate with this DD node
2129 * by assuming that the major index of both is x.
2130 * We add cr->npmenodes/2 to obtain an even distribution.
2132 return (ddindex*npme + npme/2)/ndd;
2135 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
2137 return low_ddindex2pmeindex(dd->nnodes, dd->comm->npmenodes, ddindex);
2140 static int cr_ddindex2pmeindex(const t_commrec *cr, int ddindex)
2142 return low_ddindex2pmeindex(cr->dd->nnodes, cr->npmenodes, ddindex);
2145 static int *dd_pmenodes(t_commrec *cr)
2150 snew(pmenodes, cr->npmenodes);
2152 for (i = 0; i < cr->dd->nnodes; i++)
2154 p0 = cr_ddindex2pmeindex(cr, i);
2155 p1 = cr_ddindex2pmeindex(cr, i+1);
2156 if (i+1 == cr->dd->nnodes || p1 > p0)
2160 fprintf(debug, "pmenode[%d] = %d\n", n, i+1+n);
2162 pmenodes[n] = i + 1 + n;
2170 static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
2173 ivec coords, coords_pme, nc;
2178 if (dd->comm->bCartesian) {
2179 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2180 dd_coords2pmecoords(dd,coords,coords_pme);
2181 copy_ivec(dd->ntot,nc);
2182 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2183 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2185 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2187 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2193 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
2198 static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
2200 gmx_domdec_comm_t *comm;
2202 int ddindex, nodeid = -1;
2204 comm = cr->dd->comm;
2209 if (comm->bCartesianPP_PME)
2212 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
2217 ddindex = dd_index(cr->dd->nc, coords);
2218 if (comm->bCartesianPP)
2220 nodeid = comm->ddindex2simnodeid[ddindex];
2226 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
2238 static int dd_simnode2pmenode(t_commrec *cr, int sim_nodeid)
2241 gmx_domdec_comm_t *comm;
2242 ivec coord, coord_pme;
2249 /* This assumes a uniform x domain decomposition grid cell size */
2250 if (comm->bCartesianPP_PME)
2253 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
2254 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2256 /* This is a PP node */
2257 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2258 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
2262 else if (comm->bCartesianPP)
2264 if (sim_nodeid < dd->nnodes)
2266 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2271 /* This assumes DD cells with identical x coordinates
2272 * are numbered sequentially.
2274 if (dd->comm->pmenodes == NULL)
2276 if (sim_nodeid < dd->nnodes)
2278 /* The DD index equals the nodeid */
2279 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2285 while (sim_nodeid > dd->comm->pmenodes[i])
2289 if (sim_nodeid < dd->comm->pmenodes[i])
2291 pmenode = dd->comm->pmenodes[i];
2299 void get_pme_nnodes(const gmx_domdec_t *dd,
2300 int *npmenodes_x, int *npmenodes_y)
2304 *npmenodes_x = dd->comm->npmenodes_x;
2305 *npmenodes_y = dd->comm->npmenodes_y;
2314 gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
2316 gmx_bool bPMEOnlyNode;
2318 if (DOMAINDECOMP(cr))
2320 bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
2324 bPMEOnlyNode = FALSE;
2327 return bPMEOnlyNode;
2330 void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
2331 int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
2335 ivec coord, coord_pme;
2339 snew(*my_ddnodes, (dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2342 for (x = 0; x < dd->nc[XX]; x++)
2344 for (y = 0; y < dd->nc[YY]; y++)
2346 for (z = 0; z < dd->nc[ZZ]; z++)
2348 if (dd->comm->bCartesianPP_PME)
2353 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2354 if (dd->ci[XX] == coord_pme[XX] &&
2355 dd->ci[YY] == coord_pme[YY] &&
2356 dd->ci[ZZ] == coord_pme[ZZ])
2358 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2363 /* The slab corresponds to the nodeid in the PME group */
2364 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2366 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2373 /* The last PP-only node is the peer node */
2374 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2378 fprintf(debug, "Receive coordinates from PP ranks:");
2379 for (x = 0; x < *nmy_ddnodes; x++)
2381 fprintf(debug, " %d", (*my_ddnodes)[x]);
2383 fprintf(debug, "\n");
2387 static gmx_bool receive_vir_ener(t_commrec *cr)
2389 gmx_domdec_comm_t *comm;
2390 int pmenode, coords[DIM], rank;
2394 if (cr->npmenodes < cr->dd->nnodes)
2396 comm = cr->dd->comm;
2397 if (comm->bCartesianPP_PME)
2399 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2401 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
2402 coords[comm->cartpmedim]++;
2403 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2405 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2406 if (dd_simnode2pmenode(cr, rank) == pmenode)
2408 /* This is not the last PP node for pmenode */
2416 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2417 if (cr->sim_nodeid+1 < cr->nnodes &&
2418 dd_simnode2pmenode(cr, cr->sim_nodeid+1) == pmenode)
2420 /* This is not the last PP node for pmenode */
2429 static void set_zones_ncg_home(gmx_domdec_t *dd)
2431 gmx_domdec_zones_t *zones;
2434 zones = &dd->comm->zones;
2436 zones->cg_range[0] = 0;
2437 for (i = 1; i < zones->n+1; i++)
2439 zones->cg_range[i] = dd->ncg_home;
2441 /* zone_ncg1[0] should always be equal to ncg_home */
2442 dd->comm->zone_ncg1[0] = dd->ncg_home;
2445 static void rebuild_cgindex(gmx_domdec_t *dd,
2446 const int *gcgs_index, t_state *state)
2448 int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
2451 dd_cg_gl = dd->index_gl;
2452 cgindex = dd->cgindex;
2455 for (i = 0; i < state->ncg_gl; i++)
2459 dd_cg_gl[i] = cg_gl;
2460 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2464 dd->ncg_home = state->ncg_gl;
2467 set_zones_ncg_home(dd);
2470 static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2472 while (cg >= cginfo_mb->cg_end)
2477 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2480 static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2481 t_forcerec *fr, char *bLocalCG)
2483 cginfo_mb_t *cginfo_mb;
2489 cginfo_mb = fr->cginfo_mb;
2490 cginfo = fr->cginfo;
2492 for (cg = cg0; cg < cg1; cg++)
2494 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2498 if (bLocalCG != NULL)
2500 for (cg = cg0; cg < cg1; cg++)
2502 bLocalCG[index_gl[cg]] = TRUE;
2507 static void make_dd_indices(gmx_domdec_t *dd,
2508 const int *gcgs_index, int cg_start)
2510 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2511 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2516 bLocalCG = dd->comm->bLocalCG;
2518 if (dd->nat_tot > dd->gatindex_nalloc)
2520 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2521 srenew(dd->gatindex, dd->gatindex_nalloc);
2524 nzone = dd->comm->zones.n;
2525 zone2cg = dd->comm->zones.cg_range;
2526 zone_ncg1 = dd->comm->zone_ncg1;
2527 index_gl = dd->index_gl;
2528 gatindex = dd->gatindex;
2529 bCGs = dd->comm->bCGs;
2531 if (zone2cg[1] != dd->ncg_home)
2533 gmx_incons("dd->ncg_zone is not up to date");
2536 /* Make the local to global and global to local atom index */
2537 a = dd->cgindex[cg_start];
2538 for (zone = 0; zone < nzone; zone++)
2546 cg0 = zone2cg[zone];
2548 cg1 = zone2cg[zone+1];
2549 cg1_p1 = cg0 + zone_ncg1[zone];
2551 for (cg = cg0; cg < cg1; cg++)
2556 /* Signal that this cg is from more than one pulse away */
2559 cg_gl = index_gl[cg];
2562 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2565 ga2la_set(dd->ga2la, a_gl, a, zone1);
2571 gatindex[a] = cg_gl;
2572 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2579 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2582 int ncg, i, ngl, nerr;
2585 if (bLocalCG == NULL)
2589 for (i = 0; i < dd->ncg_tot; i++)
2591 if (!bLocalCG[dd->index_gl[i]])
2594 "DD rank %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2599 for (i = 0; i < ncg_sys; i++)
2606 if (ngl != dd->ncg_tot)
2608 fprintf(stderr, "DD rank %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2615 static void check_index_consistency(gmx_domdec_t *dd,
2616 int natoms_sys, int ncg_sys,
2619 int nerr, ngl, i, a, cell;
2624 if (dd->comm->DD_debug > 1)
2626 snew(have, natoms_sys);
2627 for (a = 0; a < dd->nat_tot; a++)
2629 if (have[dd->gatindex[a]] > 0)
2631 fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2635 have[dd->gatindex[a]] = a + 1;
2641 snew(have, dd->nat_tot);
2644 for (i = 0; i < natoms_sys; i++)
2646 if (ga2la_get(dd->ga2la, i, &a, &cell))
2648 if (a >= dd->nat_tot)
2650 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2656 if (dd->gatindex[a] != i)
2658 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2665 if (ngl != dd->nat_tot)
2668 "DD rank %d, %s: %d global atom indices, %d local atoms\n",
2669 dd->rank, where, ngl, dd->nat_tot);
2671 for (a = 0; a < dd->nat_tot; a++)
2676 "DD rank %d, %s: local atom %d, global %d has no global index\n",
2677 dd->rank, where, a+1, dd->gatindex[a]+1);
2682 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2686 gmx_fatal(FARGS, "DD rank %d, %s: %d atom/cg index inconsistencies",
2687 dd->rank, where, nerr);
2691 static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2698 /* Clear the whole list without searching */
2699 ga2la_clear(dd->ga2la);
2703 for (i = a_start; i < dd->nat_tot; i++)
2705 ga2la_del(dd->ga2la, dd->gatindex[i]);
2709 bLocalCG = dd->comm->bLocalCG;
2712 for (i = cg_start; i < dd->ncg_tot; i++)
2714 bLocalCG[dd->index_gl[i]] = FALSE;
2718 dd_clear_local_vsite_indices(dd);
2720 if (dd->constraints)
2722 dd_clear_local_constraint_indices(dd);
2726 /* This function should be used for moving the domain boudaries during DLB,
2727 * for obtaining the minimum cell size. It checks the initially set limit
2728 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2729 * and, possibly, a longer cut-off limit set for PME load balancing.
2731 static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2735 cellsize_min = comm->cellsize_min[dim];
2737 if (!comm->bVacDLBNoLimit)
2739 /* The cut-off might have changed, e.g. by PME load balacning,
2740 * from the value used to set comm->cellsize_min, so check it.
2742 cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
2744 if (comm->bPMELoadBalDLBLimits)
2746 /* Check for the cut-off limit set by the PME load balancing */
2747 cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
2751 return cellsize_min;
2754 static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2757 real grid_jump_limit;
2759 /* The distance between the boundaries of cells at distance
2760 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2761 * and by the fact that cells should not be shifted by more than
2762 * half their size, such that cg's only shift by one cell
2763 * at redecomposition.
2765 grid_jump_limit = comm->cellsize_limit;
2766 if (!comm->bVacDLBNoLimit)
2768 if (comm->bPMELoadBalDLBLimits)
2770 cutoff = max(cutoff, comm->PMELoadBal_max_cutoff);
2772 grid_jump_limit = max(grid_jump_limit,
2773 cutoff/comm->cd[dim_ind].np);
2776 return grid_jump_limit;
2779 static gmx_bool check_grid_jump(gmx_int64_t step,
2785 gmx_domdec_comm_t *comm;
2794 for (d = 1; d < dd->ndim; d++)
2797 limit = grid_jump_limit(comm, cutoff, d);
2798 bfac = ddbox->box_size[dim];
2799 if (ddbox->tric_dir[dim])
2801 bfac *= ddbox->skew_fac[dim];
2803 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2804 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2812 /* This error should never be triggered under normal
2813 * circumstances, but you never know ...
2815 gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
2816 gmx_step_str(step, buf),
2817 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2825 static int dd_load_count(gmx_domdec_comm_t *comm)
2827 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2830 static float dd_force_load(gmx_domdec_comm_t *comm)
2837 if (comm->eFlop > 1)
2839 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2844 load = comm->cycl[ddCyclF];
2845 if (comm->cycl_n[ddCyclF] > 1)
2847 /* Subtract the maximum of the last n cycle counts
2848 * to get rid of possible high counts due to other sources,
2849 * for instance system activity, that would otherwise
2850 * affect the dynamic load balancing.
2852 load -= comm->cycl_max[ddCyclF];
2856 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2858 float gpu_wait, gpu_wait_sum;
2860 gpu_wait = comm->cycl[ddCyclWaitGPU];
2861 if (comm->cycl_n[ddCyclF] > 1)
2863 /* We should remove the WaitGPU time of the same MD step
2864 * as the one with the maximum F time, since the F time
2865 * and the wait time are not independent.
2866 * Furthermore, the step for the max F time should be chosen
2867 * the same on all ranks that share the same GPU.
2868 * But to keep the code simple, we remove the average instead.
2869 * The main reason for artificially long times at some steps
2870 * is spurious CPU activity or MPI time, so we don't expect
2871 * that changes in the GPU wait time matter a lot here.
2873 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2875 /* Sum the wait times over the ranks that share the same GPU */
2876 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
2877 comm->mpi_comm_gpu_shared);
2878 /* Replace the wait time by the average over the ranks */
2879 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2887 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2889 gmx_domdec_comm_t *comm;
2894 snew(*dim_f, dd->nc[dim]+1);
2896 for (i = 1; i < dd->nc[dim]; i++)
2898 if (comm->slb_frac[dim])
2900 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2904 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2907 (*dim_f)[dd->nc[dim]] = 1;
2910 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2912 int pmeindex, slab, nso, i;
2915 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2921 ddpme->dim = dimind;
2923 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2925 ddpme->nslab = (ddpme->dim == 0 ?
2926 dd->comm->npmenodes_x :
2927 dd->comm->npmenodes_y);
2929 if (ddpme->nslab <= 1)
2934 nso = dd->comm->npmenodes/ddpme->nslab;
2935 /* Determine for each PME slab the PP location range for dimension dim */
2936 snew(ddpme->pp_min, ddpme->nslab);
2937 snew(ddpme->pp_max, ddpme->nslab);
2938 for (slab = 0; slab < ddpme->nslab; slab++)
2940 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2941 ddpme->pp_max[slab] = 0;
2943 for (i = 0; i < dd->nnodes; i++)
2945 ddindex2xyz(dd->nc, i, xyz);
2946 /* For y only use our y/z slab.
2947 * This assumes that the PME x grid size matches the DD grid size.
2949 if (dimind == 0 || xyz[XX] == dd->ci[XX])
2951 pmeindex = ddindex2pmeindex(dd, i);
2954 slab = pmeindex/nso;
2958 slab = pmeindex % ddpme->nslab;
2960 ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]);
2961 ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]);
2965 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
2968 int dd_pme_maxshift_x(gmx_domdec_t *dd)
2970 if (dd->comm->ddpme[0].dim == XX)
2972 return dd->comm->ddpme[0].maxshift;
2980 int dd_pme_maxshift_y(gmx_domdec_t *dd)
2982 if (dd->comm->ddpme[0].dim == YY)
2984 return dd->comm->ddpme[0].maxshift;
2986 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
2988 return dd->comm->ddpme[1].maxshift;
2996 static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
2997 gmx_bool bUniform, gmx_ddbox_t *ddbox, real *cell_f)
2999 gmx_domdec_comm_t *comm;
3002 real range, pme_boundary;
3006 nc = dd->nc[ddpme->dim];
3009 if (!ddpme->dim_match)
3011 /* PP decomposition is not along dim: the worst situation */
3014 else if (ns <= 3 || (bUniform && ns == nc))
3016 /* The optimal situation */
3021 /* We need to check for all pme nodes which nodes they
3022 * could possibly need to communicate with.
3024 xmin = ddpme->pp_min;
3025 xmax = ddpme->pp_max;
3026 /* Allow for atoms to be maximally 2/3 times the cut-off
3027 * out of their DD cell. This is a reasonable balance between
3028 * between performance and support for most charge-group/cut-off
3031 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
3032 /* Avoid extra communication when we are exactly at a boundary */
3036 for (s = 0; s < ns; s++)
3038 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
3039 pme_boundary = (real)s/ns;
3042 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
3044 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
3048 pme_boundary = (real)(s+1)/ns;
3051 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
3053 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
3060 ddpme->maxshift = sh;
3064 fprintf(debug, "PME slab communication range for dim %d is %d\n",
3065 ddpme->dim, ddpme->maxshift);
3069 static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3073 for (d = 0; d < dd->ndim; d++)
3076 if (dim < ddbox->nboundeddim &&
3077 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
3078 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
3080 gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
3081 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3082 dd->nc[dim], dd->comm->cellsize_limit);
3088 setcellsizeslbLOCAL, setcellsizeslbMASTER, setcellsizeslbPULSE_ONLY
3091 /* Set the domain boundaries. Use for static (or no) load balancing,
3092 * and also for the starting state for dynamic load balancing.
3093 * setmode determine if and where the boundaries are stored, use enum above.
3094 * Returns the number communication pulses in npulse.
3096 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
3097 int setmode, ivec npulse)
3099 gmx_domdec_comm_t *comm;
3102 real *cell_x, cell_dx, cellsize;
3106 for (d = 0; d < DIM; d++)
3108 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
3110 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
3113 cell_dx = ddbox->box_size[d]/dd->nc[d];
3116 case setcellsizeslbMASTER:
3117 for (j = 0; j < dd->nc[d]+1; j++)
3119 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
3122 case setcellsizeslbLOCAL:
3123 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
3124 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
3129 cellsize = cell_dx*ddbox->skew_fac[d];
3130 while (cellsize*npulse[d] < comm->cutoff)
3134 cellsize_min[d] = cellsize;
3138 /* Statically load balanced grid */
3139 /* Also when we are not doing a master distribution we determine
3140 * all cell borders in a loop to obtain identical values
3141 * to the master distribution case and to determine npulse.
3143 if (setmode == setcellsizeslbMASTER)
3145 cell_x = dd->ma->cell_x[d];
3149 snew(cell_x, dd->nc[d]+1);
3151 cell_x[0] = ddbox->box0[d];
3152 for (j = 0; j < dd->nc[d]; j++)
3154 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
3155 cell_x[j+1] = cell_x[j] + cell_dx;
3156 cellsize = cell_dx*ddbox->skew_fac[d];
3157 while (cellsize*npulse[d] < comm->cutoff &&
3158 npulse[d] < dd->nc[d]-1)
3162 cellsize_min[d] = min(cellsize_min[d], cellsize);
3164 if (setmode == setcellsizeslbLOCAL)
3166 comm->cell_x0[d] = cell_x[dd->ci[d]];
3167 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
3169 if (setmode != setcellsizeslbMASTER)
3174 /* The following limitation is to avoid that a cell would receive
3175 * some of its own home charge groups back over the periodic boundary.
3176 * Double charge groups cause trouble with the global indices.
3178 if (d < ddbox->npbcdim &&
3179 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
3181 char error_string[STRLEN];
3183 sprintf(error_string,
3184 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3185 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
3187 dd->nc[d], dd->nc[d],
3188 dd->nnodes > dd->nc[d] ? "cells" : "ranks");
3190 if (setmode == setcellsizeslbLOCAL)
3192 gmx_fatal_collective(FARGS, NULL, dd, error_string);
3196 gmx_fatal(FARGS, error_string);
3201 if (!comm->bDynLoadBal)
3203 copy_rvec(cellsize_min, comm->cellsize_min);
3206 for (d = 0; d < comm->npmedecompdim; d++)
3208 set_pme_maxshift(dd, &comm->ddpme[d],
3209 comm->slb_frac[dd->dim[d]] == NULL, ddbox,
3210 comm->ddpme[d].slb_dim_f);
3215 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
3216 int d, int dim, gmx_domdec_root_t *root,
3218 gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
3220 gmx_domdec_comm_t *comm;
3221 int ncd, i, j, nmin, nmin_old;
3222 gmx_bool bLimLo, bLimHi;
3224 real fac, halfway, cellsize_limit_f_i, region_size;
3225 gmx_bool bPBC, bLastHi = FALSE;
3226 int nrange[] = {range[0], range[1]};
3228 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
3234 bPBC = (dim < ddbox->npbcdim);
3236 cell_size = root->buf_ncd;
3240 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
3243 /* First we need to check if the scaling does not make cells
3244 * smaller than the smallest allowed size.
3245 * We need to do this iteratively, since if a cell is too small,
3246 * it needs to be enlarged, which makes all the other cells smaller,
3247 * which could in turn make another cell smaller than allowed.
3249 for (i = range[0]; i < range[1]; i++)
3251 root->bCellMin[i] = FALSE;
3257 /* We need the total for normalization */
3259 for (i = range[0]; i < range[1]; i++)
3261 if (root->bCellMin[i] == FALSE)
3263 fac += cell_size[i];
3266 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3267 /* Determine the cell boundaries */
3268 for (i = range[0]; i < range[1]; i++)
3270 if (root->bCellMin[i] == FALSE)
3272 cell_size[i] *= fac;
3273 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3275 cellsize_limit_f_i = 0;
3279 cellsize_limit_f_i = cellsize_limit_f;
3281 if (cell_size[i] < cellsize_limit_f_i)
3283 root->bCellMin[i] = TRUE;
3284 cell_size[i] = cellsize_limit_f_i;
3288 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3291 while (nmin > nmin_old);
3294 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3295 /* For this check we should not use DD_CELL_MARGIN,
3296 * but a slightly smaller factor,
3297 * since rounding could get use below the limit.
3299 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3302 gmx_fatal(FARGS, "Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3303 gmx_step_str(step, buf),
3304 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3305 ncd, comm->cellsize_min[dim]);
3308 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
3312 /* Check if the boundary did not displace more than halfway
3313 * each of the cells it bounds, as this could cause problems,
3314 * especially when the differences between cell sizes are large.
3315 * If changes are applied, they will not make cells smaller
3316 * than the cut-off, as we check all the boundaries which
3317 * might be affected by a change and if the old state was ok,
3318 * the cells will at most be shrunk back to their old size.
3320 for (i = range[0]+1; i < range[1]; i++)
3322 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3323 if (root->cell_f[i] < halfway)
3325 root->cell_f[i] = halfway;
3326 /* Check if the change also causes shifts of the next boundaries */
3327 for (j = i+1; j < range[1]; j++)
3329 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3331 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3335 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3336 if (root->cell_f[i] > halfway)
3338 root->cell_f[i] = halfway;
3339 /* Check if the change also causes shifts of the next boundaries */
3340 for (j = i-1; j >= range[0]+1; j--)
3342 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3344 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3351 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3352 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3353 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3354 * for a and b nrange is used */
3357 /* Take care of the staggering of the cell boundaries */
3360 for (i = range[0]; i < range[1]; i++)
3362 root->cell_f_max0[i] = root->cell_f[i];
3363 root->cell_f_min1[i] = root->cell_f[i+1];
3368 for (i = range[0]+1; i < range[1]; i++)
3370 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3371 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3372 if (bLimLo && bLimHi)
3374 /* Both limits violated, try the best we can */
3375 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3376 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3377 nrange[0] = range[0];
3379 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3382 nrange[1] = range[1];
3383 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3389 /* root->cell_f[i] = root->bound_min[i]; */
3390 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3393 else if (bLimHi && !bLastHi)
3396 if (nrange[1] < range[1]) /* found a LimLo before */
3398 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3399 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3400 nrange[0] = nrange[1];
3402 root->cell_f[i] = root->bound_max[i];
3404 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3406 nrange[1] = range[1];
3409 if (nrange[1] < range[1]) /* found last a LimLo */
3411 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3412 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3413 nrange[0] = nrange[1];
3414 nrange[1] = range[1];
3415 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3417 else if (nrange[0] > range[0]) /* found at least one LimHi */
3419 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3426 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3427 int d, int dim, gmx_domdec_root_t *root,
3428 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3429 gmx_bool bUniform, gmx_int64_t step)
3431 gmx_domdec_comm_t *comm;
3432 int ncd, d1, i, j, pos;
3434 real load_aver, load_i, imbalance, change, change_max, sc;
3435 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3439 int range[] = { 0, 0 };
3443 /* Convert the maximum change from the input percentage to a fraction */
3444 change_limit = comm->dlb_scale_lim*0.01;
3448 bPBC = (dim < ddbox->npbcdim);
3450 cell_size = root->buf_ncd;
3452 /* Store the original boundaries */
3453 for (i = 0; i < ncd+1; i++)
3455 root->old_cell_f[i] = root->cell_f[i];
3459 for (i = 0; i < ncd; i++)
3461 cell_size[i] = 1.0/ncd;
3464 else if (dd_load_count(comm))
3466 load_aver = comm->load[d].sum_m/ncd;
3468 for (i = 0; i < ncd; i++)
3470 /* Determine the relative imbalance of cell i */
3471 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3472 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3473 /* Determine the change of the cell size using underrelaxation */
3474 change = -relax*imbalance;
3475 change_max = max(change_max, max(change, -change));
3477 /* Limit the amount of scaling.
3478 * We need to use the same rescaling for all cells in one row,
3479 * otherwise the load balancing might not converge.
3482 if (change_max > change_limit)
3484 sc *= change_limit/change_max;
3486 for (i = 0; i < ncd; i++)
3488 /* Determine the relative imbalance of cell i */
3489 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3490 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3491 /* Determine the change of the cell size using underrelaxation */
3492 change = -sc*imbalance;
3493 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3497 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3498 cellsize_limit_f *= DD_CELL_MARGIN;
3499 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3500 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3501 if (ddbox->tric_dir[dim])
3503 cellsize_limit_f /= ddbox->skew_fac[dim];
3504 dist_min_f /= ddbox->skew_fac[dim];
3506 if (bDynamicBox && d > 0)
3508 dist_min_f *= DD_PRES_SCALE_MARGIN;
3510 if (d > 0 && !bUniform)
3512 /* Make sure that the grid is not shifted too much */
3513 for (i = 1; i < ncd; i++)
3515 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3517 gmx_incons("Inconsistent DD boundary staggering limits!");
3519 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3520 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3523 root->bound_min[i] += 0.5*space;
3525 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3526 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3529 root->bound_max[i] += 0.5*space;
3534 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3536 root->cell_f_max0[i-1] + dist_min_f,
3537 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3538 root->cell_f_min1[i] - dist_min_f);
3543 root->cell_f[0] = 0;
3544 root->cell_f[ncd] = 1;
3545 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3548 /* After the checks above, the cells should obey the cut-off
3549 * restrictions, but it does not hurt to check.
3551 for (i = 0; i < ncd; i++)
3555 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3556 dim, i, root->cell_f[i], root->cell_f[i+1]);
3559 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3560 root->cell_f[i+1] - root->cell_f[i] <
3561 cellsize_limit_f/DD_CELL_MARGIN)
3565 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3566 gmx_step_str(step, buf), dim2char(dim), i,
3567 (root->cell_f[i+1] - root->cell_f[i])
3568 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3573 /* Store the cell boundaries of the lower dimensions at the end */
3574 for (d1 = 0; d1 < d; d1++)
3576 root->cell_f[pos++] = comm->cell_f0[d1];
3577 root->cell_f[pos++] = comm->cell_f1[d1];
3580 if (d < comm->npmedecompdim)
3582 /* The master determines the maximum shift for
3583 * the coordinate communication between separate PME nodes.
3585 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3587 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3590 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3594 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3595 gmx_ddbox_t *ddbox, int dimind)
3597 gmx_domdec_comm_t *comm;
3602 /* Set the cell dimensions */
3603 dim = dd->dim[dimind];
3604 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3605 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3606 if (dim >= ddbox->nboundeddim)
3608 comm->cell_x0[dim] += ddbox->box0[dim];
3609 comm->cell_x1[dim] += ddbox->box0[dim];
3613 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3614 int d, int dim, real *cell_f_row,
3617 gmx_domdec_comm_t *comm;
3623 /* Each node would only need to know two fractions,
3624 * but it is probably cheaper to broadcast the whole array.
3626 MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
3627 0, comm->mpi_comm_load[d]);
3629 /* Copy the fractions for this dimension from the buffer */
3630 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3631 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3632 /* The whole array was communicated, so set the buffer position */
3633 pos = dd->nc[dim] + 1;
3634 for (d1 = 0; d1 <= d; d1++)
3638 /* Copy the cell fractions of the lower dimensions */
3639 comm->cell_f0[d1] = cell_f_row[pos++];
3640 comm->cell_f1[d1] = cell_f_row[pos++];
3642 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3644 /* Convert the communicated shift from float to int */
3645 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3648 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3652 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3653 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3654 gmx_bool bUniform, gmx_int64_t step)
3656 gmx_domdec_comm_t *comm;
3658 gmx_bool bRowMember, bRowRoot;
3663 for (d = 0; d < dd->ndim; d++)
3668 for (d1 = d; d1 < dd->ndim; d1++)
3670 if (dd->ci[dd->dim[d1]] > 0)
3683 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3684 ddbox, bDynamicBox, bUniform, step);
3685 cell_f_row = comm->root[d]->cell_f;
3689 cell_f_row = comm->cell_f_row;
3691 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3696 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3700 /* This function assumes the box is static and should therefore
3701 * not be called when the box has changed since the last
3702 * call to dd_partition_system.
3704 for (d = 0; d < dd->ndim; d++)
3706 relative_to_absolute_cell_bounds(dd, ddbox, d);
3712 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3713 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3714 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3715 gmx_wallcycle_t wcycle)
3717 gmx_domdec_comm_t *comm;
3724 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3725 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3726 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3728 else if (bDynamicBox)
3730 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3733 /* Set the dimensions for which no DD is used */
3734 for (dim = 0; dim < DIM; dim++)
3736 if (dd->nc[dim] == 1)
3738 comm->cell_x0[dim] = 0;
3739 comm->cell_x1[dim] = ddbox->box_size[dim];
3740 if (dim >= ddbox->nboundeddim)
3742 comm->cell_x0[dim] += ddbox->box0[dim];
3743 comm->cell_x1[dim] += ddbox->box0[dim];
3749 static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3752 gmx_domdec_comm_dim_t *cd;
3754 for (d = 0; d < dd->ndim; d++)
3756 cd = &dd->comm->cd[d];
3757 np = npulse[dd->dim[d]];
3758 if (np > cd->np_nalloc)
3762 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3763 dim2char(dd->dim[d]), np);
3765 if (DDMASTER(dd) && cd->np_nalloc > 0)
3767 fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3769 srenew(cd->ind, np);
3770 for (i = cd->np_nalloc; i < np; i++)
3772 cd->ind[i].index = NULL;
3773 cd->ind[i].nalloc = 0;
3782 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3783 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3784 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3785 gmx_wallcycle_t wcycle)
3787 gmx_domdec_comm_t *comm;
3793 /* Copy the old cell boundaries for the cg displacement check */
3794 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3795 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3797 if (comm->bDynLoadBal)
3801 check_box_size(dd, ddbox);
3803 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3807 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbLOCAL, npulse);
3808 realloc_comm_ind(dd, npulse);
3813 for (d = 0; d < DIM; d++)
3815 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3816 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3821 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3823 rvec cell_ns_x0, rvec cell_ns_x1,
3826 gmx_domdec_comm_t *comm;
3831 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3833 dim = dd->dim[dim_ind];
3835 /* Without PBC we don't have restrictions on the outer cells */
3836 if (!(dim >= ddbox->npbcdim &&
3837 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3838 comm->bDynLoadBal &&
3839 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3840 comm->cellsize_min[dim])
3843 gmx_fatal(FARGS, "Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3844 gmx_step_str(step, buf), dim2char(dim),
3845 comm->cell_x1[dim] - comm->cell_x0[dim],
3846 ddbox->skew_fac[dim],
3847 dd->comm->cellsize_min[dim],
3848 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3852 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3854 /* Communicate the boundaries and update cell_ns_x0/1 */
3855 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3856 if (dd->bGridJump && dd->ndim > 1)
3858 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
3863 static void make_tric_corr_matrix(int npbcdim, matrix box, matrix tcm)
3867 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3875 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3876 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3885 static void check_screw_box(matrix box)
3887 /* Mathematical limitation */
3888 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3890 gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3893 /* Limitation due to the asymmetry of the eighth shell method */
3894 if (box[ZZ][YY] != 0)
3896 gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
3900 static void distribute_cg(FILE *fplog, gmx_int64_t step,
3901 matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3904 gmx_domdec_master_t *ma;
3905 int **tmp_ind = NULL, *tmp_nalloc = NULL;
3906 int i, icg, j, k, k0, k1, d, npbcdim;
3908 rvec box_size, cg_cm;
3910 real nrcg, inv_ncg, pos_d;
3912 gmx_bool bUnbounded, bScrew;
3916 if (tmp_ind == NULL)
3918 snew(tmp_nalloc, dd->nnodes);
3919 snew(tmp_ind, dd->nnodes);
3920 for (i = 0; i < dd->nnodes; i++)
3922 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3923 snew(tmp_ind[i], tmp_nalloc[i]);
3927 /* Clear the count */
3928 for (i = 0; i < dd->nnodes; i++)
3934 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3936 cgindex = cgs->index;
3938 /* Compute the center of geometry for all charge groups */
3939 for (icg = 0; icg < cgs->nr; icg++)
3942 k1 = cgindex[icg+1];
3946 copy_rvec(pos[k0], cg_cm);
3953 for (k = k0; (k < k1); k++)
3955 rvec_inc(cg_cm, pos[k]);
3957 for (d = 0; (d < DIM); d++)
3959 cg_cm[d] *= inv_ncg;
3962 /* Put the charge group in the box and determine the cell index */
3963 for (d = DIM-1; d >= 0; d--)
3966 if (d < dd->npbcdim)
3968 bScrew = (dd->bScrewPBC && d == XX);
3969 if (tric_dir[d] && dd->nc[d] > 1)
3971 /* Use triclinic coordintates for this dimension */
3972 for (j = d+1; j < DIM; j++)
3974 pos_d += cg_cm[j]*tcm[j][d];
3977 while (pos_d >= box[d][d])
3980 rvec_dec(cg_cm, box[d]);
3983 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3984 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3986 for (k = k0; (k < k1); k++)
3988 rvec_dec(pos[k], box[d]);
3991 pos[k][YY] = box[YY][YY] - pos[k][YY];
3992 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3999 rvec_inc(cg_cm, box[d]);
4002 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
4003 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
4005 for (k = k0; (k < k1); k++)
4007 rvec_inc(pos[k], box[d]);
4010 pos[k][YY] = box[YY][YY] - pos[k][YY];
4011 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
4016 /* This could be done more efficiently */
4018 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
4023 i = dd_index(dd->nc, ind);
4024 if (ma->ncg[i] == tmp_nalloc[i])
4026 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
4027 srenew(tmp_ind[i], tmp_nalloc[i]);
4029 tmp_ind[i][ma->ncg[i]] = icg;
4031 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
4035 for (i = 0; i < dd->nnodes; i++)
4038 for (k = 0; k < ma->ncg[i]; k++)
4040 ma->cg[k1++] = tmp_ind[i][k];
4043 ma->index[dd->nnodes] = k1;
4045 for (i = 0; i < dd->nnodes; i++)
4055 fprintf(fplog, "Charge group distribution at step %s:",
4056 gmx_step_str(step, buf));
4057 for (i = 0; i < dd->nnodes; i++)
4059 fprintf(fplog, " %d", ma->ncg[i]);
4061 fprintf(fplog, "\n");
4065 static void get_cg_distribution(FILE *fplog, gmx_int64_t step, gmx_domdec_t *dd,
4066 t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
4069 gmx_domdec_master_t *ma = NULL;
4072 int *ibuf, buf2[2] = { 0, 0 };
4073 gmx_bool bMaster = DDMASTER(dd);
4081 check_screw_box(box);
4084 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbMASTER, npulse);
4086 distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
4087 for (i = 0; i < dd->nnodes; i++)
4089 ma->ibuf[2*i] = ma->ncg[i];
4090 ma->ibuf[2*i+1] = ma->nat[i];
4098 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
4100 dd->ncg_home = buf2[0];
4101 dd->nat_home = buf2[1];
4102 dd->ncg_tot = dd->ncg_home;
4103 dd->nat_tot = dd->nat_home;
4104 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
4106 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
4107 srenew(dd->index_gl, dd->cg_nalloc);
4108 srenew(dd->cgindex, dd->cg_nalloc+1);
4112 for (i = 0; i < dd->nnodes; i++)
4114 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
4115 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
4120 DDMASTER(dd) ? ma->ibuf : NULL,
4121 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
4122 DDMASTER(dd) ? ma->cg : NULL,
4123 dd->ncg_home*sizeof(int), dd->index_gl);
4125 /* Determine the home charge group sizes */
4127 for (i = 0; i < dd->ncg_home; i++)
4129 cg_gl = dd->index_gl[i];
4131 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
4136 fprintf(debug, "Home charge groups:\n");
4137 for (i = 0; i < dd->ncg_home; i++)
4139 fprintf(debug, " %d", dd->index_gl[i]);
4142 fprintf(debug, "\n");
4145 fprintf(debug, "\n");
4149 static int compact_and_copy_vec_at(int ncg, int *move,
4152 rvec *src, gmx_domdec_comm_t *comm,
4155 int m, icg, i, i0, i1, nrcg;
4161 for (m = 0; m < DIM*2; m++)
4167 for (icg = 0; icg < ncg; icg++)
4169 i1 = cgindex[icg+1];
4175 /* Compact the home array in place */
4176 for (i = i0; i < i1; i++)
4178 copy_rvec(src[i], src[home_pos++]);
4184 /* Copy to the communication buffer */
4186 pos_vec[m] += 1 + vec*nrcg;
4187 for (i = i0; i < i1; i++)
4189 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
4191 pos_vec[m] += (nvec - vec - 1)*nrcg;
4195 home_pos += i1 - i0;
4203 static int compact_and_copy_vec_cg(int ncg, int *move,
4205 int nvec, rvec *src, gmx_domdec_comm_t *comm,
4208 int m, icg, i0, i1, nrcg;
4214 for (m = 0; m < DIM*2; m++)
4220 for (icg = 0; icg < ncg; icg++)
4222 i1 = cgindex[icg+1];
4228 /* Compact the home array in place */
4229 copy_rvec(src[icg], src[home_pos++]);
4235 /* Copy to the communication buffer */
4236 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
4237 pos_vec[m] += 1 + nrcg*nvec;
4249 static int compact_ind(int ncg, int *move,
4250 int *index_gl, int *cgindex,
4252 gmx_ga2la_t ga2la, char *bLocalCG,
4255 int cg, nat, a0, a1, a, a_gl;
4260 for (cg = 0; cg < ncg; cg++)
4266 /* Compact the home arrays in place.
4267 * Anything that can be done here avoids access to global arrays.
4269 cgindex[home_pos] = nat;
4270 for (a = a0; a < a1; a++)
4273 gatindex[nat] = a_gl;
4274 /* The cell number stays 0, so we don't need to set it */
4275 ga2la_change_la(ga2la, a_gl, nat);
4278 index_gl[home_pos] = index_gl[cg];
4279 cginfo[home_pos] = cginfo[cg];
4280 /* The charge group remains local, so bLocalCG does not change */
4285 /* Clear the global indices */
4286 for (a = a0; a < a1; a++)
4288 ga2la_del(ga2la, gatindex[a]);
4292 bLocalCG[index_gl[cg]] = FALSE;
4296 cgindex[home_pos] = nat;
4301 static void clear_and_mark_ind(int ncg, int *move,
4302 int *index_gl, int *cgindex, int *gatindex,
4303 gmx_ga2la_t ga2la, char *bLocalCG,
4308 for (cg = 0; cg < ncg; cg++)
4314 /* Clear the global indices */
4315 for (a = a0; a < a1; a++)
4317 ga2la_del(ga2la, gatindex[a]);
4321 bLocalCG[index_gl[cg]] = FALSE;
4323 /* Signal that this cg has moved using the ns cell index.
4324 * Here we set it to -1. fill_grid will change it
4325 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4327 cell_index[cg] = -1;
4332 static void print_cg_move(FILE *fplog,
4334 gmx_int64_t step, int cg, int dim, int dir,
4335 gmx_bool bHaveLimitdAndCMOld, real limitd,
4336 rvec cm_old, rvec cm_new, real pos_d)
4338 gmx_domdec_comm_t *comm;
4343 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4344 if (bHaveLimitdAndCMOld)
4346 fprintf(fplog, "The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4347 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4351 fprintf(fplog, "The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4352 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4354 fprintf(fplog, "distance out of cell %f\n",
4355 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4356 if (bHaveLimitdAndCMOld)
4358 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4359 cm_old[XX], cm_old[YY], cm_old[ZZ]);
4361 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4362 cm_new[XX], cm_new[YY], cm_new[ZZ]);
4363 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4365 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4366 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4368 comm->cell_x0[dim], comm->cell_x1[dim]);
4371 static void cg_move_error(FILE *fplog,
4373 gmx_int64_t step, int cg, int dim, int dir,
4374 gmx_bool bHaveLimitdAndCMOld, real limitd,
4375 rvec cm_old, rvec cm_new, real pos_d)
4379 print_cg_move(fplog, dd, step, cg, dim, dir,
4380 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4382 print_cg_move(stderr, dd, step, cg, dim, dir,
4383 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4385 "A charge group moved too far between two domain decomposition steps\n"
4386 "This usually means that your system is not well equilibrated");
4389 static void rotate_state_atom(t_state *state, int a)
4393 for (est = 0; est < estNR; est++)
4395 if (EST_DISTR(est) && (state->flags & (1<<est)))
4400 /* Rotate the complete state; for a rectangular box only */
4401 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4402 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4405 state->v[a][YY] = -state->v[a][YY];
4406 state->v[a][ZZ] = -state->v[a][ZZ];
4409 state->sd_X[a][YY] = -state->sd_X[a][YY];
4410 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4413 state->cg_p[a][YY] = -state->cg_p[a][YY];
4414 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4416 case estDISRE_INITF:
4417 case estDISRE_RM3TAV:
4418 case estORIRE_INITF:
4420 /* These are distances, so not affected by rotation */
4423 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4429 static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4431 if (natoms > comm->moved_nalloc)
4433 /* Contents should be preserved here */
4434 comm->moved_nalloc = over_alloc_dd(natoms);
4435 srenew(comm->moved, comm->moved_nalloc);
4441 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
4444 ivec tric_dir, matrix tcm,
4445 rvec cell_x0, rvec cell_x1,
4446 rvec limitd, rvec limit0, rvec limit1,
4448 int cg_start, int cg_end,
4453 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4454 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4458 real inv_ncg, pos_d;
4461 npbcdim = dd->npbcdim;
4463 for (cg = cg_start; cg < cg_end; cg++)
4470 copy_rvec(state->x[k0], cm_new);
4477 for (k = k0; (k < k1); k++)
4479 rvec_inc(cm_new, state->x[k]);
4481 for (d = 0; (d < DIM); d++)
4483 cm_new[d] = inv_ncg*cm_new[d];
4488 /* Do pbc and check DD cell boundary crossings */
4489 for (d = DIM-1; d >= 0; d--)
4493 bScrew = (dd->bScrewPBC && d == XX);
4494 /* Determine the location of this cg in lattice coordinates */
4498 for (d2 = d+1; d2 < DIM; d2++)
4500 pos_d += cm_new[d2]*tcm[d2][d];
4503 /* Put the charge group in the triclinic unit-cell */
4504 if (pos_d >= cell_x1[d])
4506 if (pos_d >= limit1[d])
4508 cg_move_error(fplog, dd, step, cg, d, 1, TRUE, limitd[d],
4509 cg_cm[cg], cm_new, pos_d);
4512 if (dd->ci[d] == dd->nc[d] - 1)
4514 rvec_dec(cm_new, state->box[d]);
4517 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4518 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4520 for (k = k0; (k < k1); k++)
4522 rvec_dec(state->x[k], state->box[d]);
4525 rotate_state_atom(state, k);
4530 else if (pos_d < cell_x0[d])
4532 if (pos_d < limit0[d])
4534 cg_move_error(fplog, dd, step, cg, d, -1, TRUE, limitd[d],
4535 cg_cm[cg], cm_new, pos_d);
4540 rvec_inc(cm_new, state->box[d]);
4543 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4544 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4546 for (k = k0; (k < k1); k++)
4548 rvec_inc(state->x[k], state->box[d]);
4551 rotate_state_atom(state, k);
4557 else if (d < npbcdim)
4559 /* Put the charge group in the rectangular unit-cell */
4560 while (cm_new[d] >= state->box[d][d])
4562 rvec_dec(cm_new, state->box[d]);
4563 for (k = k0; (k < k1); k++)
4565 rvec_dec(state->x[k], state->box[d]);
4568 while (cm_new[d] < 0)
4570 rvec_inc(cm_new, state->box[d]);
4571 for (k = k0; (k < k1); k++)
4573 rvec_inc(state->x[k], state->box[d]);
4579 copy_rvec(cm_new, cg_cm[cg]);
4581 /* Determine where this cg should go */
4584 for (d = 0; d < dd->ndim; d++)
4589 flag |= DD_FLAG_FW(d);
4595 else if (dev[dim] == -1)
4597 flag |= DD_FLAG_BW(d);
4600 if (dd->nc[dim] > 2)
4611 /* Temporarily store the flag in move */
4612 move[cg] = mc + flag;
4616 static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
4617 gmx_domdec_t *dd, ivec tric_dir,
4618 t_state *state, rvec **f,
4627 int ncg[DIM*2], nat[DIM*2];
4628 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4629 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4630 int sbuf[2], rbuf[2];
4631 int home_pos_cg, home_pos_at, buf_pos;
4633 gmx_bool bV = FALSE, bSDX = FALSE, bCGP = FALSE;
4636 real inv_ncg, pos_d;
4638 rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
4640 cginfo_mb_t *cginfo_mb;
4641 gmx_domdec_comm_t *comm;
4643 int nthread, thread;
4647 check_screw_box(state->box);
4651 if (fr->cutoff_scheme == ecutsGROUP)
4656 for (i = 0; i < estNR; i++)
4662 case estX: /* Always present */ break;
4663 case estV: bV = (state->flags & (1<<i)); break;
4664 case estSDX: bSDX = (state->flags & (1<<i)); break;
4665 case estCGP: bCGP = (state->flags & (1<<i)); break;
4668 case estDISRE_INITF:
4669 case estDISRE_RM3TAV:
4670 case estORIRE_INITF:
4672 /* No processing required */
4675 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4680 if (dd->ncg_tot > comm->nalloc_int)
4682 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4683 srenew(comm->buf_int, comm->nalloc_int);
4685 move = comm->buf_int;
4687 /* Clear the count */
4688 for (c = 0; c < dd->ndim*2; c++)
4694 npbcdim = dd->npbcdim;
4696 for (d = 0; (d < DIM); d++)
4698 limitd[d] = dd->comm->cellsize_min[d];
4699 if (d >= npbcdim && dd->ci[d] == 0)
4701 cell_x0[d] = -GMX_FLOAT_MAX;
4705 cell_x0[d] = comm->cell_x0[d];
4707 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4709 cell_x1[d] = GMX_FLOAT_MAX;
4713 cell_x1[d] = comm->cell_x1[d];
4717 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4718 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4722 /* We check after communication if a charge group moved
4723 * more than one cell. Set the pre-comm check limit to float_max.
4725 limit0[d] = -GMX_FLOAT_MAX;
4726 limit1[d] = GMX_FLOAT_MAX;
4730 make_tric_corr_matrix(npbcdim, state->box, tcm);
4732 cgindex = dd->cgindex;
4734 nthread = gmx_omp_nthreads_get(emntDomdec);
4736 /* Compute the center of geometry for all home charge groups
4737 * and put them in the box and determine where they should go.
4739 #pragma omp parallel for num_threads(nthread) schedule(static)
4740 for (thread = 0; thread < nthread; thread++)
4742 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4743 cell_x0, cell_x1, limitd, limit0, limit1,
4745 ( thread *dd->ncg_home)/nthread,
4746 ((thread+1)*dd->ncg_home)/nthread,
4747 fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
4751 for (cg = 0; cg < dd->ncg_home; cg++)
4756 flag = mc & ~DD_FLAG_NRCG;
4757 mc = mc & DD_FLAG_NRCG;
4760 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4762 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4763 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4765 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4766 /* We store the cg size in the lower 16 bits
4767 * and the place where the charge group should go
4768 * in the next 6 bits. This saves some communication volume.
4770 nrcg = cgindex[cg+1] - cgindex[cg];
4771 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4777 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
4778 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
4781 for (i = 0; i < dd->ndim*2; i++)
4783 *ncg_moved += ncg[i];
4800 /* Make sure the communication buffers are large enough */
4801 for (mc = 0; mc < dd->ndim*2; mc++)
4803 nvr = ncg[mc] + nat[mc]*nvec;
4804 if (nvr > comm->cgcm_state_nalloc[mc])
4806 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4807 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
4811 switch (fr->cutoff_scheme)
4814 /* Recalculating cg_cm might be cheaper than communicating,
4815 * but that could give rise to rounding issues.
4818 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4819 nvec, cg_cm, comm, bCompact);
4822 /* Without charge groups we send the moved atom coordinates
4823 * over twice. This is so the code below can be used without
4824 * many conditionals for both for with and without charge groups.
4827 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4828 nvec, state->x, comm, FALSE);
4831 home_pos_cg -= *ncg_moved;
4835 gmx_incons("unimplemented");
4841 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4842 nvec, vec++, state->x, comm, bCompact);
4845 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4846 nvec, vec++, state->v, comm, bCompact);
4850 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4851 nvec, vec++, state->sd_X, comm, bCompact);
4855 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4856 nvec, vec++, state->cg_p, comm, bCompact);
4861 compact_ind(dd->ncg_home, move,
4862 dd->index_gl, dd->cgindex, dd->gatindex,
4863 dd->ga2la, comm->bLocalCG,
4868 if (fr->cutoff_scheme == ecutsVERLET)
4870 moved = get_moved(comm, dd->ncg_home);
4872 for (k = 0; k < dd->ncg_home; k++)
4879 moved = fr->ns.grid->cell_index;
4882 clear_and_mark_ind(dd->ncg_home, move,
4883 dd->index_gl, dd->cgindex, dd->gatindex,
4884 dd->ga2la, comm->bLocalCG,
4888 cginfo_mb = fr->cginfo_mb;
4890 *ncg_stay_home = home_pos_cg;
4891 for (d = 0; d < dd->ndim; d++)
4897 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4900 /* Communicate the cg and atom counts */
4905 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4906 d, dir, sbuf[0], sbuf[1]);
4908 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4910 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4912 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4913 srenew(comm->buf_int, comm->nalloc_int);
4916 /* Communicate the charge group indices, sizes and flags */
4917 dd_sendrecv_int(dd, d, dir,
4918 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4919 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4921 nvs = ncg[cdd] + nat[cdd]*nvec;
4922 i = rbuf[0] + rbuf[1] *nvec;
4923 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4925 /* Communicate cgcm and state */
4926 dd_sendrecv_rvec(dd, d, dir,
4927 comm->cgcm_state[cdd], nvs,
4928 comm->vbuf.v+nvr, i);
4929 ncg_recv += rbuf[0];
4930 nat_recv += rbuf[1];
4934 /* Process the received charge groups */
4936 for (cg = 0; cg < ncg_recv; cg++)
4938 flag = comm->buf_int[cg*DD_CGIBS+1];
4940 if (dim >= npbcdim && dd->nc[dim] > 2)
4942 /* No pbc in this dim and more than one domain boundary.
4943 * We do a separate check if a charge group didn't move too far.
4945 if (((flag & DD_FLAG_FW(d)) &&
4946 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4947 ((flag & DD_FLAG_BW(d)) &&
4948 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4950 cg_move_error(fplog, dd, step, cg, dim,
4951 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4953 comm->vbuf.v[buf_pos],
4954 comm->vbuf.v[buf_pos],
4955 comm->vbuf.v[buf_pos][dim]);
4962 /* Check which direction this cg should go */
4963 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4967 /* The cell boundaries for dimension d2 are not equal
4968 * for each cell row of the lower dimension(s),
4969 * therefore we might need to redetermine where
4970 * this cg should go.
4973 /* If this cg crosses the box boundary in dimension d2
4974 * we can use the communicated flag, so we do not
4975 * have to worry about pbc.
4977 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4978 (flag & DD_FLAG_FW(d2))) ||
4979 (dd->ci[dim2] == 0 &&
4980 (flag & DD_FLAG_BW(d2)))))
4982 /* Clear the two flags for this dimension */
4983 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4984 /* Determine the location of this cg
4985 * in lattice coordinates
4987 pos_d = comm->vbuf.v[buf_pos][dim2];
4990 for (d3 = dim2+1; d3 < DIM; d3++)
4993 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
4996 /* Check of we are not at the box edge.
4997 * pbc is only handled in the first step above,
4998 * but this check could move over pbc while
4999 * the first step did not due to different rounding.
5001 if (pos_d >= cell_x1[dim2] &&
5002 dd->ci[dim2] != dd->nc[dim2]-1)
5004 flag |= DD_FLAG_FW(d2);
5006 else if (pos_d < cell_x0[dim2] &&
5009 flag |= DD_FLAG_BW(d2);
5011 comm->buf_int[cg*DD_CGIBS+1] = flag;
5014 /* Set to which neighboring cell this cg should go */
5015 if (flag & DD_FLAG_FW(d2))
5019 else if (flag & DD_FLAG_BW(d2))
5021 if (dd->nc[dd->dim[d2]] > 2)
5033 nrcg = flag & DD_FLAG_NRCG;
5036 if (home_pos_cg+1 > dd->cg_nalloc)
5038 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
5039 srenew(dd->index_gl, dd->cg_nalloc);
5040 srenew(dd->cgindex, dd->cg_nalloc+1);
5042 /* Set the global charge group index and size */
5043 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
5044 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
5045 /* Copy the state from the buffer */
5046 dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
5047 if (fr->cutoff_scheme == ecutsGROUP)
5050 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
5054 /* Set the cginfo */
5055 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
5056 dd->index_gl[home_pos_cg]);
5059 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
5062 if (home_pos_at+nrcg > state->nalloc)
5064 dd_realloc_state(state, f, home_pos_at+nrcg);
5066 for (i = 0; i < nrcg; i++)
5068 copy_rvec(comm->vbuf.v[buf_pos++],
5069 state->x[home_pos_at+i]);
5073 for (i = 0; i < nrcg; i++)
5075 copy_rvec(comm->vbuf.v[buf_pos++],
5076 state->v[home_pos_at+i]);
5081 for (i = 0; i < nrcg; i++)
5083 copy_rvec(comm->vbuf.v[buf_pos++],
5084 state->sd_X[home_pos_at+i]);
5089 for (i = 0; i < nrcg; i++)
5091 copy_rvec(comm->vbuf.v[buf_pos++],
5092 state->cg_p[home_pos_at+i]);
5096 home_pos_at += nrcg;
5100 /* Reallocate the buffers if necessary */
5101 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
5103 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
5104 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
5106 nvr = ncg[mc] + nat[mc]*nvec;
5107 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
5109 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
5110 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
5112 /* Copy from the receive to the send buffers */
5113 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
5114 comm->buf_int + cg*DD_CGIBS,
5115 DD_CGIBS*sizeof(int));
5116 memcpy(comm->cgcm_state[mc][nvr],
5117 comm->vbuf.v[buf_pos],
5118 (1+nrcg*nvec)*sizeof(rvec));
5119 buf_pos += 1 + nrcg*nvec;
5126 /* With sorting (!bCompact) the indices are now only partially up to date
5127 * and ncg_home and nat_home are not the real count, since there are
5128 * "holes" in the arrays for the charge groups that moved to neighbors.
5130 if (fr->cutoff_scheme == ecutsVERLET)
5132 moved = get_moved(comm, home_pos_cg);
5134 for (i = dd->ncg_home; i < home_pos_cg; i++)
5139 dd->ncg_home = home_pos_cg;
5140 dd->nat_home = home_pos_at;
5145 "Finished repartitioning: cgs moved out %d, new home %d\n",
5146 *ncg_moved, dd->ncg_home-*ncg_moved);
5151 void dd_cycles_add(gmx_domdec_t *dd, float cycles, int ddCycl)
5153 dd->comm->cycl[ddCycl] += cycles;
5154 dd->comm->cycl_n[ddCycl]++;
5155 if (cycles > dd->comm->cycl_max[ddCycl])
5157 dd->comm->cycl_max[ddCycl] = cycles;
5161 static double force_flop_count(t_nrnb *nrnb)
5168 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
5170 /* To get closer to the real timings, we half the count
5171 * for the normal loops and again half it for water loops.
5174 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5176 sum += nrnb->n[i]*0.25*cost_nrnb(i);
5180 sum += nrnb->n[i]*0.50*cost_nrnb(i);
5183 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
5186 if (strstr(name, "W3") != NULL || strstr(name, "W4") != NULL)
5188 sum += nrnb->n[i]*cost_nrnb(i);
5191 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
5193 sum += nrnb->n[i]*cost_nrnb(i);
5199 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
5201 if (dd->comm->eFlop)
5203 dd->comm->flop -= force_flop_count(nrnb);
5206 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
5208 if (dd->comm->eFlop)
5210 dd->comm->flop += force_flop_count(nrnb);
5215 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
5219 for (i = 0; i < ddCyclNr; i++)
5221 dd->comm->cycl[i] = 0;
5222 dd->comm->cycl_n[i] = 0;
5223 dd->comm->cycl_max[i] = 0;
5226 dd->comm->flop_n = 0;
5229 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
5231 gmx_domdec_comm_t *comm;
5232 gmx_domdec_load_t *load;
5233 gmx_domdec_root_t *root = NULL;
5234 int d, dim, cid, i, pos;
5235 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
5240 fprintf(debug, "get_load_distribution start\n");
5243 wallcycle_start(wcycle, ewcDDCOMMLOAD);
5247 bSepPME = (dd->pme_nodeid >= 0);
5249 for (d = dd->ndim-1; d >= 0; d--)
5252 /* Check if we participate in the communication in this dimension */
5253 if (d == dd->ndim-1 ||
5254 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
5256 load = &comm->load[d];
5259 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
5262 if (d == dd->ndim-1)
5264 sbuf[pos++] = dd_force_load(comm);
5265 sbuf[pos++] = sbuf[0];
5268 sbuf[pos++] = sbuf[0];
5269 sbuf[pos++] = cell_frac;
5272 sbuf[pos++] = comm->cell_f_max0[d];
5273 sbuf[pos++] = comm->cell_f_min1[d];
5278 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
5279 sbuf[pos++] = comm->cycl[ddCyclPME];
5284 sbuf[pos++] = comm->load[d+1].sum;
5285 sbuf[pos++] = comm->load[d+1].max;
5288 sbuf[pos++] = comm->load[d+1].sum_m;
5289 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
5290 sbuf[pos++] = comm->load[d+1].flags;
5293 sbuf[pos++] = comm->cell_f_max0[d];
5294 sbuf[pos++] = comm->cell_f_min1[d];
5299 sbuf[pos++] = comm->load[d+1].mdf;
5300 sbuf[pos++] = comm->load[d+1].pme;
5304 /* Communicate a row in DD direction d.
5305 * The communicators are setup such that the root always has rank 0.
5308 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
5309 load->load, load->nload*sizeof(float), MPI_BYTE,
5310 0, comm->mpi_comm_load[d]);
5312 if (dd->ci[dim] == dd->master_ci[dim])
5314 /* We are the root, process this row */
5315 if (comm->bDynLoadBal)
5317 root = comm->root[d];
5327 for (i = 0; i < dd->nc[dim]; i++)
5329 load->sum += load->load[pos++];
5330 load->max = max(load->max, load->load[pos]);
5336 /* This direction could not be load balanced properly,
5337 * therefore we need to use the maximum iso the average load.
5339 load->sum_m = max(load->sum_m, load->load[pos]);
5343 load->sum_m += load->load[pos];
5346 load->cvol_min = min(load->cvol_min, load->load[pos]);
5350 load->flags = (int)(load->load[pos++] + 0.5);
5354 root->cell_f_max0[i] = load->load[pos++];
5355 root->cell_f_min1[i] = load->load[pos++];
5360 load->mdf = max(load->mdf, load->load[pos]);
5362 load->pme = max(load->pme, load->load[pos]);
5366 if (comm->bDynLoadBal && root->bLimited)
5368 load->sum_m *= dd->nc[dim];
5369 load->flags |= (1<<d);
5377 comm->nload += dd_load_count(comm);
5378 comm->load_step += comm->cycl[ddCyclStep];
5379 comm->load_sum += comm->load[0].sum;
5380 comm->load_max += comm->load[0].max;
5381 if (comm->bDynLoadBal)
5383 for (d = 0; d < dd->ndim; d++)
5385 if (comm->load[0].flags & (1<<d))
5387 comm->load_lim[d]++;
5393 comm->load_mdf += comm->load[0].mdf;
5394 comm->load_pme += comm->load[0].pme;
5398 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5402 fprintf(debug, "get_load_distribution finished\n");
5406 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5408 /* Return the relative performance loss on the total run time
5409 * due to the force calculation load imbalance.
5411 if (dd->comm->nload > 0)
5414 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5415 (dd->comm->load_step*dd->nnodes);
5423 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5426 int npp, npme, nnodes, d, limp;
5427 float imbal, pme_f_ratio, lossf, lossp = 0;
5429 gmx_domdec_comm_t *comm;
5432 if (DDMASTER(dd) && comm->nload > 0)
5435 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5436 nnodes = npp + npme;
5437 imbal = comm->load_max*npp/comm->load_sum - 1;
5438 lossf = dd_force_imb_perf_loss(dd);
5439 sprintf(buf, " Average load imbalance: %.1f %%\n", imbal*100);
5440 fprintf(fplog, "%s", buf);
5441 fprintf(stderr, "\n");
5442 fprintf(stderr, "%s", buf);
5443 sprintf(buf, " Part of the total run time spent waiting due to load imbalance: %.1f %%\n", lossf*100);
5444 fprintf(fplog, "%s", buf);
5445 fprintf(stderr, "%s", buf);
5447 if (comm->bDynLoadBal)
5449 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5450 for (d = 0; d < dd->ndim; d++)
5452 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5453 sprintf(buf+strlen(buf), " %c %d %%", dim2char(dd->dim[d]), limp);
5459 sprintf(buf+strlen(buf), "\n");
5460 fprintf(fplog, "%s", buf);
5461 fprintf(stderr, "%s", buf);
5465 pme_f_ratio = comm->load_pme/comm->load_mdf;
5466 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5469 lossp *= (float)npme/(float)nnodes;
5473 lossp *= (float)npp/(float)nnodes;
5475 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pme_f_ratio);
5476 fprintf(fplog, "%s", buf);
5477 fprintf(stderr, "%s", buf);
5478 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossp)*100);
5479 fprintf(fplog, "%s", buf);
5480 fprintf(stderr, "%s", buf);
5482 fprintf(fplog, "\n");
5483 fprintf(stderr, "\n");
5485 if (lossf >= DD_PERF_LOSS_WARN)
5488 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5489 " in the domain decomposition.\n", lossf*100);
5490 if (!comm->bDynLoadBal)
5492 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5496 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5498 fprintf(fplog, "%s\n", buf);
5499 fprintf(stderr, "%s\n", buf);
5501 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS_WARN)
5504 "NOTE: %.1f %% performance was lost because the PME ranks\n"
5505 " had %s work to do than the PP ranks.\n"
5506 " You might want to %s the number of PME ranks\n"
5507 " or %s the cut-off and the grid spacing.\n",
5509 (lossp < 0) ? "less" : "more",
5510 (lossp < 0) ? "decrease" : "increase",
5511 (lossp < 0) ? "decrease" : "increase");
5512 fprintf(fplog, "%s\n", buf);
5513 fprintf(stderr, "%s\n", buf);
5518 static float dd_vol_min(gmx_domdec_t *dd)
5520 return dd->comm->load[0].cvol_min*dd->nnodes;
5523 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5525 return dd->comm->load[0].flags;
5528 static float dd_f_imbal(gmx_domdec_t *dd)
5530 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5533 float dd_pme_f_ratio(gmx_domdec_t *dd)
5535 if (dd->comm->cycl_n[ddCyclPME] > 0)
5537 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5545 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
5550 flags = dd_load_flags(dd);
5554 "DD load balancing is limited by minimum cell size in dimension");
5555 for (d = 0; d < dd->ndim; d++)
5559 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5562 fprintf(fplog, "\n");
5564 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5565 if (dd->comm->bDynLoadBal)
5567 fprintf(fplog, " vol min/aver %5.3f%c",
5568 dd_vol_min(dd), flags ? '!' : ' ');
5570 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5571 if (dd->comm->cycl_n[ddCyclPME])
5573 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5575 fprintf(fplog, "\n\n");
5578 static void dd_print_load_verbose(gmx_domdec_t *dd)
5580 if (dd->comm->bDynLoadBal)
5582 fprintf(stderr, "vol %4.2f%c ",
5583 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5585 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5586 if (dd->comm->cycl_n[ddCyclPME])
5588 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5593 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5598 gmx_domdec_root_t *root;
5599 gmx_bool bPartOfGroup = FALSE;
5601 dim = dd->dim[dim_ind];
5602 copy_ivec(loc, loc_c);
5603 for (i = 0; i < dd->nc[dim]; i++)
5606 rank = dd_index(dd->nc, loc_c);
5607 if (rank == dd->rank)
5609 /* This process is part of the group */
5610 bPartOfGroup = TRUE;
5613 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
5617 dd->comm->mpi_comm_load[dim_ind] = c_row;
5618 if (dd->comm->eDLB != edlbNO)
5620 if (dd->ci[dim] == dd->master_ci[dim])
5622 /* This is the root process of this row */
5623 snew(dd->comm->root[dim_ind], 1);
5624 root = dd->comm->root[dim_ind];
5625 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
5626 snew(root->old_cell_f, dd->nc[dim]+1);
5627 snew(root->bCellMin, dd->nc[dim]);
5630 snew(root->cell_f_max0, dd->nc[dim]);
5631 snew(root->cell_f_min1, dd->nc[dim]);
5632 snew(root->bound_min, dd->nc[dim]);
5633 snew(root->bound_max, dd->nc[dim]);
5635 snew(root->buf_ncd, dd->nc[dim]);
5639 /* This is not a root process, we only need to receive cell_f */
5640 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
5643 if (dd->ci[dim] == dd->master_ci[dim])
5645 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
5651 void dd_setup_dlb_resource_sharing(t_commrec gmx_unused *cr,
5652 const gmx_hw_info_t gmx_unused *hwinfo,
5653 const gmx_hw_opt_t gmx_unused *hw_opt)
5656 int physicalnode_id_hash;
5659 MPI_Comm mpi_comm_pp_physicalnode;
5661 if (!(cr->duty & DUTY_PP) ||
5662 hw_opt->gpu_opt.ncuda_dev_use == 0)
5664 /* Only PP nodes (currently) use GPUs.
5665 * If we don't have GPUs, there are no resources to share.
5670 physicalnode_id_hash = gmx_physicalnode_id_hash();
5672 gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
5678 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5679 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5680 dd->rank, physicalnode_id_hash, gpu_id);
5682 /* Split the PP communicator over the physical nodes */
5683 /* TODO: See if we should store this (before), as it's also used for
5684 * for the nodecomm summution.
5686 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5687 &mpi_comm_pp_physicalnode);
5688 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5689 &dd->comm->mpi_comm_gpu_shared);
5690 MPI_Comm_free(&mpi_comm_pp_physicalnode);
5691 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5695 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5698 /* Note that some ranks could share a GPU, while others don't */
5700 if (dd->comm->nrank_gpu_shared == 1)
5702 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5707 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
5710 int dim0, dim1, i, j;
5715 fprintf(debug, "Making load communicators\n");
5718 snew(dd->comm->load, dd->ndim);
5719 snew(dd->comm->mpi_comm_load, dd->ndim);
5722 make_load_communicator(dd, 0, loc);
5726 for (i = 0; i < dd->nc[dim0]; i++)
5729 make_load_communicator(dd, 1, loc);
5735 for (i = 0; i < dd->nc[dim0]; i++)
5739 for (j = 0; j < dd->nc[dim1]; j++)
5742 make_load_communicator(dd, 2, loc);
5749 fprintf(debug, "Finished making load communicators\n");
5754 void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
5757 int d, dim, i, j, m;
5760 ivec dd_zp[DD_MAXIZONE];
5761 gmx_domdec_zones_t *zones;
5762 gmx_domdec_ns_ranges_t *izone;
5764 for (d = 0; d < dd->ndim; d++)
5767 copy_ivec(dd->ci, tmp);
5768 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5769 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5770 copy_ivec(dd->ci, tmp);
5771 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5772 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5775 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5778 dd->neighbor[d][1]);
5784 fprintf(fplog, "\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5786 dd->nc[XX], dd->nc[YY], dd->nc[ZZ],
5787 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
5794 for (i = 0; i < nzonep; i++)
5796 copy_ivec(dd_zp3[i], dd_zp[i]);
5802 for (i = 0; i < nzonep; i++)
5804 copy_ivec(dd_zp2[i], dd_zp[i]);
5810 for (i = 0; i < nzonep; i++)
5812 copy_ivec(dd_zp1[i], dd_zp[i]);
5816 gmx_fatal(FARGS, "Can only do 1, 2 or 3D domain decomposition");
5821 zones = &dd->comm->zones;
5823 for (i = 0; i < nzone; i++)
5826 clear_ivec(zones->shift[i]);
5827 for (d = 0; d < dd->ndim; d++)
5829 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5834 for (i = 0; i < nzone; i++)
5836 for (d = 0; d < DIM; d++)
5838 s[d] = dd->ci[d] - zones->shift[i][d];
5843 else if (s[d] >= dd->nc[d])
5849 zones->nizone = nzonep;
5850 for (i = 0; i < zones->nizone; i++)
5852 if (dd_zp[i][0] != i)
5854 gmx_fatal(FARGS, "Internal inconsistency in the dd grid setup");
5856 izone = &zones->izone[i];
5857 izone->j0 = dd_zp[i][1];
5858 izone->j1 = dd_zp[i][2];
5859 for (dim = 0; dim < DIM; dim++)
5861 if (dd->nc[dim] == 1)
5863 /* All shifts should be allowed */
5864 izone->shift0[dim] = -1;
5865 izone->shift1[dim] = 1;
5870 izone->shift0[d] = 0;
5871 izone->shift1[d] = 0;
5872 for(j=izone->j0; j<izone->j1; j++) {
5873 if (dd->shift[j][d] > dd->shift[i][d])
5874 izone->shift0[d] = -1;
5875 if (dd->shift[j][d] < dd->shift[i][d])
5876 izone->shift1[d] = 1;
5882 /* Assume the shift are not more than 1 cell */
5883 izone->shift0[dim] = 1;
5884 izone->shift1[dim] = -1;
5885 for (j = izone->j0; j < izone->j1; j++)
5887 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5888 if (shift_diff < izone->shift0[dim])
5890 izone->shift0[dim] = shift_diff;
5892 if (shift_diff > izone->shift1[dim])
5894 izone->shift1[dim] = shift_diff;
5901 if (dd->comm->eDLB != edlbNO)
5903 snew(dd->comm->root, dd->ndim);
5906 if (dd->comm->bRecordLoad)
5908 make_load_communicators(dd);
5912 static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reorder)
5915 gmx_domdec_comm_t *comm;
5926 if (comm->bCartesianPP)
5928 /* Set up cartesian communication for the particle-particle part */
5931 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5932 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
5935 for (i = 0; i < DIM; i++)
5939 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
5941 /* We overwrite the old communicator with the new cartesian one */
5942 cr->mpi_comm_mygroup = comm_cart;
5945 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5946 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5948 if (comm->bCartesianPP_PME)
5950 /* Since we want to use the original cartesian setup for sim,
5951 * and not the one after split, we need to make an index.
5953 snew(comm->ddindex2ddnodeid, dd->nnodes);
5954 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
5955 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5956 /* Get the rank of the DD master,
5957 * above we made sure that the master node is a PP node.
5967 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
5969 else if (comm->bCartesianPP)
5971 if (cr->npmenodes == 0)
5973 /* The PP communicator is also
5974 * the communicator for this simulation
5976 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5978 cr->nodeid = dd->rank;
5980 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
5982 /* We need to make an index to go from the coordinates
5983 * to the nodeid of this simulation.
5985 snew(comm->ddindex2simnodeid, dd->nnodes);
5986 snew(buf, dd->nnodes);
5987 if (cr->duty & DUTY_PP)
5989 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
5991 /* Communicate the ddindex to simulation nodeid index */
5992 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
5993 cr->mpi_comm_mysim);
5996 /* Determine the master coordinates and rank.
5997 * The DD master should be the same node as the master of this sim.
5999 for (i = 0; i < dd->nnodes; i++)
6001 if (comm->ddindex2simnodeid[i] == 0)
6003 ddindex2xyz(dd->nc, i, dd->master_ci);
6004 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
6009 fprintf(debug, "The master rank is %d\n", dd->masterrank);
6014 /* No Cartesian communicators */
6015 /* We use the rank in dd->comm->all as DD index */
6016 ddindex2xyz(dd->nc, dd->rank, dd->ci);
6017 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
6019 clear_ivec(dd->master_ci);
6026 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
6027 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6032 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
6033 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6037 static void receive_ddindex2simnodeid(t_commrec *cr)
6041 gmx_domdec_comm_t *comm;
6048 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
6050 snew(comm->ddindex2simnodeid, dd->nnodes);
6051 snew(buf, dd->nnodes);
6052 if (cr->duty & DUTY_PP)
6054 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
6057 /* Communicate the ddindex to simulation nodeid index */
6058 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
6059 cr->mpi_comm_mysim);
6066 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
6067 int ncg, int natoms)
6069 gmx_domdec_master_t *ma;
6074 snew(ma->ncg, dd->nnodes);
6075 snew(ma->index, dd->nnodes+1);
6077 snew(ma->nat, dd->nnodes);
6078 snew(ma->ibuf, dd->nnodes*2);
6079 snew(ma->cell_x, DIM);
6080 for (i = 0; i < DIM; i++)
6082 snew(ma->cell_x[i], dd->nc[i]+1);
6085 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
6091 snew(ma->vbuf, natoms);
6097 static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused dd_node_order,
6098 int gmx_unused reorder)
6101 gmx_domdec_comm_t *comm;
6112 if (comm->bCartesianPP)
6114 for (i = 1; i < DIM; i++)
6116 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
6118 if (bDiv[YY] || bDiv[ZZ])
6120 comm->bCartesianPP_PME = TRUE;
6121 /* If we have 2D PME decomposition, which is always in x+y,
6122 * we stack the PME only nodes in z.
6123 * Otherwise we choose the direction that provides the thinnest slab
6124 * of PME only nodes as this will have the least effect
6125 * on the PP communication.
6126 * But for the PME communication the opposite might be better.
6128 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
6130 dd->nc[YY] > dd->nc[ZZ]))
6132 comm->cartpmedim = ZZ;
6136 comm->cartpmedim = YY;
6138 comm->ntot[comm->cartpmedim]
6139 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
6143 fprintf(fplog, "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
6145 "Will not use a Cartesian communicator for PP <-> PME\n\n");
6150 if (comm->bCartesianPP_PME)
6154 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
6157 for (i = 0; i < DIM; i++)
6161 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
6164 MPI_Comm_rank(comm_cart, &rank);
6165 if (MASTERNODE(cr) && rank != 0)
6167 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
6170 /* With this assigment we loose the link to the original communicator
6171 * which will usually be MPI_COMM_WORLD, unless have multisim.
6173 cr->mpi_comm_mysim = comm_cart;
6174 cr->sim_nodeid = rank;
6176 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
6180 fprintf(fplog, "Cartesian rank %d, coordinates %d %d %d\n\n",
6181 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
6184 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
6188 if (cr->npmenodes == 0 ||
6189 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
6191 cr->duty = DUTY_PME;
6194 /* Split the sim communicator into PP and PME only nodes */
6195 MPI_Comm_split(cr->mpi_comm_mysim,
6197 dd_index(comm->ntot, dd->ci),
6198 &cr->mpi_comm_mygroup);
6202 switch (dd_node_order)
6207 fprintf(fplog, "Order of the ranks: PP first, PME last\n");
6210 case ddnoINTERLEAVE:
6211 /* Interleave the PP-only and PME-only nodes,
6212 * as on clusters with dual-core machines this will double
6213 * the communication bandwidth of the PME processes
6214 * and thus speed up the PP <-> PME and inter PME communication.
6218 fprintf(fplog, "Interleaving PP and PME ranks\n");
6220 comm->pmenodes = dd_pmenodes(cr);
6225 gmx_fatal(FARGS, "Unknown dd_node_order=%d", dd_node_order);
6228 if (dd_simnode2pmenode(cr, cr->sim_nodeid) == -1)
6230 cr->duty = DUTY_PME;
6237 /* Split the sim communicator into PP and PME only nodes */
6238 MPI_Comm_split(cr->mpi_comm_mysim,
6241 &cr->mpi_comm_mygroup);
6242 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
6248 fprintf(fplog, "This rank does only %s work.\n\n",
6249 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
6253 void make_dd_communicators(FILE *fplog, t_commrec *cr, int dd_node_order)
6256 gmx_domdec_comm_t *comm;
6262 copy_ivec(dd->nc, comm->ntot);
6264 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
6265 comm->bCartesianPP_PME = FALSE;
6267 /* Reorder the nodes by default. This might change the MPI ranks.
6268 * Real reordering is only supported on very few architectures,
6269 * Blue Gene is one of them.
6271 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
6273 if (cr->npmenodes > 0)
6275 /* Split the communicator into a PP and PME part */
6276 split_communicator(fplog, cr, dd_node_order, CartReorder);
6277 if (comm->bCartesianPP_PME)
6279 /* We (possibly) reordered the nodes in split_communicator,
6280 * so it is no longer required in make_pp_communicator.
6282 CartReorder = FALSE;
6287 /* All nodes do PP and PME */
6289 /* We do not require separate communicators */
6290 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
6294 if (cr->duty & DUTY_PP)
6296 /* Copy or make a new PP communicator */
6297 make_pp_communicator(fplog, cr, CartReorder);
6301 receive_ddindex2simnodeid(cr);
6304 if (!(cr->duty & DUTY_PME))
6306 /* Set up the commnuication to our PME node */
6307 dd->pme_nodeid = dd_simnode2pmenode(cr, cr->sim_nodeid);
6308 dd->pme_receive_vir_ener = receive_vir_ener(cr);
6311 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
6312 dd->pme_nodeid, dd->pme_receive_vir_ener);
6317 dd->pme_nodeid = -1;
6322 dd->ma = init_gmx_domdec_master_t(dd,
6324 comm->cgs_gl.index[comm->cgs_gl.nr]);
6328 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6330 real *slb_frac, tot;
6335 if (nc > 1 && size_string != NULL)
6339 fprintf(fplog, "Using static load balancing for the %s direction\n",
6344 for (i = 0; i < nc; i++)
6347 sscanf(size_string, "%lf%n", &dbl, &n);
6350 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6359 fprintf(fplog, "Relative cell sizes:");
6361 for (i = 0; i < nc; i++)
6366 fprintf(fplog, " %5.3f", slb_frac[i]);
6371 fprintf(fplog, "\n");
6378 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
6381 gmx_mtop_ilistloop_t iloop;
6385 iloop = gmx_mtop_ilistloop_init(mtop);
6386 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6388 for (ftype = 0; ftype < F_NRE; ftype++)
6390 if ((interaction_function[ftype].flags & IF_BOND) &&
6393 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
6401 static int dd_getenv(FILE *fplog, const char *env_var, int def)
6407 val = getenv(env_var);
6410 if (sscanf(val, "%d", &nst) <= 0)
6416 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6424 static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6428 fprintf(stderr, "\n%s\n", warn_string);
6432 fprintf(fplog, "\n%s\n", warn_string);
6436 static void check_dd_restrictions(t_commrec *cr, gmx_domdec_t *dd,
6437 t_inputrec *ir, FILE *fplog)
6439 if (ir->ePBC == epbcSCREW &&
6440 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
6442 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6445 if (ir->ns_type == ensSIMPLE)
6447 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
6450 if (ir->nstlist == 0)
6452 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
6455 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6457 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6461 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6466 r = ddbox->box_size[XX];
6467 for (di = 0; di < dd->ndim; di++)
6470 /* Check using the initial average cell size */
6471 r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6477 static int check_dlb_support(FILE *fplog, t_commrec *cr,
6478 const char *dlb_opt, gmx_bool bRecordLoad,
6479 unsigned long Flags, t_inputrec *ir)
6487 case 'a': eDLB = edlbAUTO; break;
6488 case 'n': eDLB = edlbNO; break;
6489 case 'y': eDLB = edlbYES; break;
6490 default: gmx_incons("Unknown dlb_opt");
6493 if (Flags & MD_RERUN)
6498 if (!EI_DYNAMICS(ir->eI))
6500 if (eDLB == edlbYES)
6502 sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
6503 dd_warning(cr, fplog, buf);
6511 dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6516 if (Flags & MD_REPRODUCIBLE)
6523 dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
6527 dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6530 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", eDLB);
6538 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6543 if (getenv("GMX_DD_ORDER_ZYX") != NULL)
6545 /* Decomposition order z,y,x */
6548 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6550 for (dim = DIM-1; dim >= 0; dim--)
6552 if (dd->nc[dim] > 1)
6554 dd->dim[dd->ndim++] = dim;
6560 /* Decomposition order x,y,z */
6561 for (dim = 0; dim < DIM; dim++)
6563 if (dd->nc[dim] > 1)
6565 dd->dim[dd->ndim++] = dim;
6571 static gmx_domdec_comm_t *init_dd_comm()
6573 gmx_domdec_comm_t *comm;
6577 snew(comm->cggl_flag, DIM*2);
6578 snew(comm->cgcm_state, DIM*2);
6579 for (i = 0; i < DIM*2; i++)
6581 comm->cggl_flag_nalloc[i] = 0;
6582 comm->cgcm_state_nalloc[i] = 0;
6585 comm->nalloc_int = 0;
6586 comm->buf_int = NULL;
6588 vec_rvec_init(&comm->vbuf);
6590 comm->n_load_have = 0;
6591 comm->n_load_collect = 0;
6593 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6595 comm->sum_nat[i] = 0;
6599 comm->load_step = 0;
6602 clear_ivec(comm->load_lim);
6609 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
6610 unsigned long Flags,
6612 real comm_distance_min, real rconstr,
6613 const char *dlb_opt, real dlb_scale,
6614 const char *sizex, const char *sizey, const char *sizez,
6615 gmx_mtop_t *mtop, t_inputrec *ir,
6616 matrix box, rvec *x,
6618 int *npme_x, int *npme_y)
6621 gmx_domdec_comm_t *comm;
6624 real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
6631 "\nInitializing Domain Decomposition on %d ranks\n", cr->nnodes);
6636 dd->comm = init_dd_comm();
6638 snew(comm->cggl_flag, DIM*2);
6639 snew(comm->cgcm_state, DIM*2);
6641 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6642 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6644 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
6645 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
6646 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
6647 recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
6648 comm->nstSortCG = dd_getenv(fplog, "GMX_DD_NST_SORT_CHARGE_GROUPS", 1);
6649 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
6650 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
6651 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
6653 dd->pme_recv_f_alloc = 0;
6654 dd->pme_recv_f_buf = NULL;
6656 if (dd->bSendRecv2 && fplog)
6658 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6664 fprintf(fplog, "Will load balance based on FLOP count\n");
6666 if (comm->eFlop > 1)
6668 srand(1+cr->nodeid);
6670 comm->bRecordLoad = TRUE;
6674 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6678 /* Initialize to GPU share count to 0, might change later */
6679 comm->nrank_gpu_shared = 0;
6681 comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
6683 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6686 fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
6688 dd->bGridJump = comm->bDynLoadBal;
6689 comm->bPMELoadBalDLBLimits = FALSE;
6691 if (comm->nstSortCG)
6695 if (comm->nstSortCG == 1)
6697 fprintf(fplog, "Will sort the charge groups at every domain (re)decomposition\n");
6701 fprintf(fplog, "Will sort the charge groups every %d steps\n",
6705 snew(comm->sort, 1);
6711 fprintf(fplog, "Will not sort the charge groups\n");
6715 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6717 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6718 if (comm->bInterCGBondeds)
6720 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6724 comm->bInterCGMultiBody = FALSE;
6727 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6728 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6730 if (ir->rlistlong == 0)
6732 /* Set the cut-off to some very large value,
6733 * so we don't need if statements everywhere in the code.
6734 * We use sqrt, since the cut-off is squared in some places.
6736 comm->cutoff = GMX_CUTOFF_INF;
6740 comm->cutoff = ir->rlistlong;
6742 comm->cutoff_mbody = 0;
6744 comm->cellsize_limit = 0;
6745 comm->bBondComm = FALSE;
6747 if (comm->bInterCGBondeds)
6749 if (comm_distance_min > 0)
6751 comm->cutoff_mbody = comm_distance_min;
6752 if (Flags & MD_DDBONDCOMM)
6754 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6758 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6760 r_bonded_limit = comm->cutoff_mbody;
6762 else if (ir->bPeriodicMols)
6764 /* Can not easily determine the required cut-off */
6765 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6766 comm->cutoff_mbody = comm->cutoff/2;
6767 r_bonded_limit = comm->cutoff_mbody;
6773 dd_bonded_cg_distance(fplog, mtop, ir, x, box,
6774 Flags & MD_DDBONDCHECK, &r_2b, &r_mb);
6776 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6777 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6779 /* We use an initial margin of 10% for the minimum cell size,
6780 * except when we are just below the non-bonded cut-off.
6782 if (Flags & MD_DDBONDCOMM)
6784 if (max(r_2b, r_mb) > comm->cutoff)
6786 r_bonded = max(r_2b, r_mb);
6787 r_bonded_limit = 1.1*r_bonded;
6788 comm->bBondComm = TRUE;
6793 r_bonded_limit = min(1.1*r_bonded, comm->cutoff);
6795 /* We determine cutoff_mbody later */
6799 /* No special bonded communication,
6800 * simply increase the DD cut-off.
6802 r_bonded_limit = 1.1*max(r_2b, r_mb);
6803 comm->cutoff_mbody = r_bonded_limit;
6804 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
6807 comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit);
6811 "Minimum cell size due to bonded interactions: %.3f nm\n",
6812 comm->cellsize_limit);
6816 if (dd->bInterCGcons && rconstr <= 0)
6818 /* There is a cell size limit due to the constraints (P-LINCS) */
6819 rconstr = constr_r_max(fplog, mtop, ir);
6823 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6825 if (rconstr > comm->cellsize_limit)
6827 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6831 else if (rconstr > 0 && fplog)
6833 /* Here we do not check for dd->bInterCGcons,
6834 * because one can also set a cell size limit for virtual sites only
6835 * and at this point we don't know yet if there are intercg v-sites.
6838 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6841 comm->cellsize_limit = max(comm->cellsize_limit, rconstr);
6843 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6847 copy_ivec(nc, dd->nc);
6848 set_dd_dim(fplog, dd);
6849 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, x, ddbox);
6851 if (cr->npmenodes == -1)
6855 acs = average_cellsize_min(dd, ddbox);
6856 if (acs < comm->cellsize_limit)
6860 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6862 gmx_fatal_collective(FARGS, cr, NULL,
6863 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6864 acs, comm->cellsize_limit);
6869 set_ddbox_cr(cr, NULL, ir, box, &comm->cgs_gl, x, ddbox);
6871 /* We need to choose the optimal DD grid and possibly PME nodes */
6872 limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6873 comm->eDLB != edlbNO, dlb_scale,
6874 comm->cellsize_limit, comm->cutoff,
6875 comm->bInterCGBondeds);
6877 if (dd->nc[XX] == 0)
6879 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6880 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
6881 !bC ? "-rdd" : "-rcon",
6882 comm->eDLB != edlbNO ? " or -dds" : "",
6883 bC ? " or your LINCS settings" : "");
6885 gmx_fatal_collective(FARGS, cr, NULL,
6886 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
6888 "Look in the log file for details on the domain decomposition",
6889 cr->nnodes-cr->npmenodes, limit, buf);
6891 set_dd_dim(fplog, dd);
6897 "Domain decomposition grid %d x %d x %d, separate PME ranks %d\n",
6898 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
6901 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6902 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6904 gmx_fatal_collective(FARGS, cr, NULL,
6905 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
6906 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6908 if (cr->npmenodes > dd->nnodes)
6910 gmx_fatal_collective(FARGS, cr, NULL,
6911 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6913 if (cr->npmenodes > 0)
6915 comm->npmenodes = cr->npmenodes;
6919 comm->npmenodes = dd->nnodes;
6922 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
6924 /* The following choices should match those
6925 * in comm_cost_est in domdec_setup.c.
6926 * Note that here the checks have to take into account
6927 * that the decomposition might occur in a different order than xyz
6928 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6929 * in which case they will not match those in comm_cost_est,
6930 * but since that is mainly for testing purposes that's fine.
6932 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6933 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6934 getenv("GMX_PMEONEDD") == NULL)
6936 comm->npmedecompdim = 2;
6937 comm->npmenodes_x = dd->nc[XX];
6938 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6942 /* In case nc is 1 in both x and y we could still choose to
6943 * decompose pme in y instead of x, but we use x for simplicity.
6945 comm->npmedecompdim = 1;
6946 if (dd->dim[0] == YY)
6948 comm->npmenodes_x = 1;
6949 comm->npmenodes_y = comm->npmenodes;
6953 comm->npmenodes_x = comm->npmenodes;
6954 comm->npmenodes_y = 1;
6959 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6960 comm->npmenodes_x, comm->npmenodes_y, 1);
6965 comm->npmedecompdim = 0;
6966 comm->npmenodes_x = 0;
6967 comm->npmenodes_y = 0;
6970 /* Technically we don't need both of these,
6971 * but it simplifies code not having to recalculate it.
6973 *npme_x = comm->npmenodes_x;
6974 *npme_y = comm->npmenodes_y;
6976 snew(comm->slb_frac, DIM);
6977 if (comm->eDLB == edlbNO)
6979 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
6980 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
6981 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], sizez);
6984 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6986 if (comm->bBondComm || comm->eDLB != edlbNO)
6988 /* Set the bonded communication distance to halfway
6989 * the minimum and the maximum,
6990 * since the extra communication cost is nearly zero.
6992 acs = average_cellsize_min(dd, ddbox);
6993 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6994 if (comm->eDLB != edlbNO)
6996 /* Check if this does not limit the scaling */
6997 comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs);
6999 if (!comm->bBondComm)
7001 /* Without bBondComm do not go beyond the n.b. cut-off */
7002 comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff);
7003 if (comm->cellsize_limit >= comm->cutoff)
7005 /* We don't loose a lot of efficieny
7006 * when increasing it to the n.b. cut-off.
7007 * It can even be slightly faster, because we need
7008 * less checks for the communication setup.
7010 comm->cutoff_mbody = comm->cutoff;
7013 /* Check if we did not end up below our original limit */
7014 comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit);
7016 if (comm->cutoff_mbody > comm->cellsize_limit)
7018 comm->cellsize_limit = comm->cutoff_mbody;
7021 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
7026 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
7027 "cellsize limit %f\n",
7028 comm->bBondComm, comm->cellsize_limit);
7033 check_dd_restrictions(cr, dd, ir, fplog);
7036 comm->partition_step = INT_MIN;
7039 clear_dd_cycle_counts(dd);
7044 static void set_dlb_limits(gmx_domdec_t *dd)
7049 for (d = 0; d < dd->ndim; d++)
7051 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
7052 dd->comm->cellsize_min[dd->dim[d]] =
7053 dd->comm->cellsize_min_dlb[dd->dim[d]];
7058 static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
7061 gmx_domdec_comm_t *comm;
7071 fprintf(fplog, "At step %s the performance loss due to force load imbalance is %.1f %%\n", gmx_step_str(step, buf), dd_force_imb_perf_loss(dd)*100);
7074 cellsize_min = comm->cellsize_min[dd->dim[0]];
7075 for (d = 1; d < dd->ndim; d++)
7077 cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
7080 if (cellsize_min < comm->cellsize_limit*1.05)
7082 dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
7084 /* Change DLB from "auto" to "no". */
7085 comm->eDLB = edlbNO;
7090 dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
7091 comm->bDynLoadBal = TRUE;
7092 dd->bGridJump = TRUE;
7096 /* We can set the required cell size info here,
7097 * so we do not need to communicate this.
7098 * The grid is completely uniform.
7100 for (d = 0; d < dd->ndim; d++)
7104 comm->load[d].sum_m = comm->load[d].sum;
7106 nc = dd->nc[dd->dim[d]];
7107 for (i = 0; i < nc; i++)
7109 comm->root[d]->cell_f[i] = i/(real)nc;
7112 comm->root[d]->cell_f_max0[i] = i /(real)nc;
7113 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
7116 comm->root[d]->cell_f[nc] = 1.0;
7121 static char *init_bLocalCG(gmx_mtop_t *mtop)
7126 ncg = ncg_mtop(mtop);
7127 snew(bLocalCG, ncg);
7128 for (cg = 0; cg < ncg; cg++)
7130 bLocalCG[cg] = FALSE;
7136 void dd_init_bondeds(FILE *fplog,
7137 gmx_domdec_t *dd, gmx_mtop_t *mtop,
7139 t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
7141 gmx_domdec_comm_t *comm;
7145 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
7149 if (comm->bBondComm)
7151 /* Communicate atoms beyond the cut-off for bonded interactions */
7154 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
7156 comm->bLocalCG = init_bLocalCG(mtop);
7160 /* Only communicate atoms based on cut-off */
7161 comm->cglink = NULL;
7162 comm->bLocalCG = NULL;
7166 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
7168 gmx_bool bDynLoadBal, real dlb_scale,
7171 gmx_domdec_comm_t *comm;
7186 fprintf(fplog, "The maximum number of communication pulses is:");
7187 for (d = 0; d < dd->ndim; d++)
7189 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
7191 fprintf(fplog, "\n");
7192 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
7193 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
7194 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
7195 for (d = 0; d < DIM; d++)
7199 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
7206 comm->cellsize_min_dlb[d]/
7207 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
7209 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
7212 fprintf(fplog, "\n");
7216 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
7217 fprintf(fplog, "The initial number of communication pulses is:");
7218 for (d = 0; d < dd->ndim; d++)
7220 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
7222 fprintf(fplog, "\n");
7223 fprintf(fplog, "The initial domain decomposition cell size is:");
7224 for (d = 0; d < DIM; d++)
7228 fprintf(fplog, " %c %.2f nm",
7229 dim2char(d), dd->comm->cellsize_min[d]);
7232 fprintf(fplog, "\n\n");
7235 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
7237 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
7238 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7239 "non-bonded interactions", "", comm->cutoff);
7243 limit = dd->comm->cellsize_limit;
7247 if (dynamic_dd_box(ddbox, ir))
7249 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
7251 limit = dd->comm->cellsize_min[XX];
7252 for (d = 1; d < DIM; d++)
7254 limit = min(limit, dd->comm->cellsize_min[d]);
7258 if (comm->bInterCGBondeds)
7260 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7261 "two-body bonded interactions", "(-rdd)",
7262 max(comm->cutoff, comm->cutoff_mbody));
7263 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7264 "multi-body bonded interactions", "(-rdd)",
7265 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit));
7269 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7270 "virtual site constructions", "(-rcon)", limit);
7272 if (dd->constraint_comm)
7274 sprintf(buf, "atoms separated by up to %d constraints",
7276 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7277 buf, "(-rcon)", limit);
7279 fprintf(fplog, "\n");
7285 static void set_cell_limits_dlb(gmx_domdec_t *dd,
7287 const t_inputrec *ir,
7288 const gmx_ddbox_t *ddbox)
7290 gmx_domdec_comm_t *comm;
7291 int d, dim, npulse, npulse_d_max, npulse_d;
7296 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7298 /* Determine the maximum number of comm. pulses in one dimension */
7300 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7302 /* Determine the maximum required number of grid pulses */
7303 if (comm->cellsize_limit >= comm->cutoff)
7305 /* Only a single pulse is required */
7308 else if (!bNoCutOff && comm->cellsize_limit > 0)
7310 /* We round down slightly here to avoid overhead due to the latency
7311 * of extra communication calls when the cut-off
7312 * would be only slightly longer than the cell size.
7313 * Later cellsize_limit is redetermined,
7314 * so we can not miss interactions due to this rounding.
7316 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7320 /* There is no cell size limit */
7321 npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1));
7324 if (!bNoCutOff && npulse > 1)
7326 /* See if we can do with less pulses, based on dlb_scale */
7328 for (d = 0; d < dd->ndim; d++)
7331 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7332 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7333 npulse_d_max = max(npulse_d_max, npulse_d);
7335 npulse = min(npulse, npulse_d_max);
7338 /* This env var can override npulse */
7339 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
7346 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7347 for (d = 0; d < dd->ndim; d++)
7349 comm->cd[d].np_dlb = min(npulse, dd->nc[dd->dim[d]]-1);
7350 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7351 snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
7352 comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb);
7353 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7355 comm->bVacDLBNoLimit = FALSE;
7359 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7360 if (!comm->bVacDLBNoLimit)
7362 comm->cellsize_limit = max(comm->cellsize_limit,
7363 comm->cutoff/comm->maxpulse);
7365 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
7366 /* Set the minimum cell size for each DD dimension */
7367 for (d = 0; d < dd->ndim; d++)
7369 if (comm->bVacDLBNoLimit ||
7370 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7372 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7376 comm->cellsize_min_dlb[dd->dim[d]] =
7377 comm->cutoff/comm->cd[d].np_dlb;
7380 if (comm->cutoff_mbody <= 0)
7382 comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit);
7384 if (comm->bDynLoadBal)
7390 gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd, int ePBC)
7392 /* If each molecule is a single charge group
7393 * or we use domain decomposition for each periodic dimension,
7394 * we do not need to take pbc into account for the bonded interactions.
7396 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7399 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
7402 void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7403 t_inputrec *ir, gmx_ddbox_t *ddbox)
7405 gmx_domdec_comm_t *comm;
7411 /* Initialize the thread data.
7412 * This can not be done in init_domain_decomposition,
7413 * as the numbers of threads is determined later.
7415 comm->nth = gmx_omp_nthreads_get(emntDomdec);
7418 snew(comm->dth, comm->nth);
7421 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
7423 init_ddpme(dd, &comm->ddpme[0], 0);
7424 if (comm->npmedecompdim >= 2)
7426 init_ddpme(dd, &comm->ddpme[1], 1);
7431 comm->npmenodes = 0;
7432 if (dd->pme_nodeid >= 0)
7434 gmx_fatal_collective(FARGS, NULL, dd,
7435 "Can not have separate PME ranks without PME electrostatics");
7441 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7443 if (comm->eDLB != edlbNO)
7445 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7448 print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
7449 if (comm->eDLB == edlbAUTO)
7453 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7455 print_dd_settings(fplog, dd, ir, TRUE, dlb_scale, ddbox);
7458 if (ir->ePBC == epbcNONE)
7460 vol_frac = 1 - 1/(double)dd->nnodes;
7465 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7469 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7471 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7473 dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
7476 static gmx_bool test_dd_cutoff(t_commrec *cr,
7477 t_state *state, t_inputrec *ir,
7488 set_ddbox(dd, FALSE, cr, ir, state->box,
7489 TRUE, &dd->comm->cgs_gl, state->x, &ddbox);
7493 for (d = 0; d < dd->ndim; d++)
7497 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
7498 if (dynamic_dd_box(&ddbox, ir))
7500 inv_cell_size *= DD_PRES_SCALE_MARGIN;
7503 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7505 if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
7506 dd->comm->cd[d].np_dlb > 0)
7508 if (np > dd->comm->cd[d].np_dlb)
7513 /* If a current local cell size is smaller than the requested
7514 * cut-off, we could still fix it, but this gets very complicated.
7515 * Without fixing here, we might actually need more checks.
7517 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7524 if (dd->comm->eDLB != edlbNO)
7526 /* If DLB is not active yet, we don't need to check the grid jumps.
7527 * Actually we shouldn't, because then the grid jump data is not set.
7529 if (dd->comm->bDynLoadBal &&
7530 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
7535 gmx_sumi(1, &LocallyLimited, cr);
7537 if (LocallyLimited > 0)
7546 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, t_inputrec *ir,
7549 gmx_bool bCutoffAllowed;
7551 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7555 cr->dd->comm->cutoff = cutoff_req;
7558 return bCutoffAllowed;
7561 void change_dd_dlb_cutoff_limit(t_commrec *cr)
7563 gmx_domdec_comm_t *comm;
7565 comm = cr->dd->comm;
7567 /* Turn on the DLB limiting (might have been on already) */
7568 comm->bPMELoadBalDLBLimits = TRUE;
7570 /* Change the cut-off limit */
7571 comm->PMELoadBal_max_cutoff = comm->cutoff;
7574 static void merge_cg_buffers(int ncell,
7575 gmx_domdec_comm_dim_t *cd, int pulse,
7577 int *index_gl, int *recv_i,
7578 rvec *cg_cm, rvec *recv_vr,
7580 cginfo_mb_t *cginfo_mb, int *cginfo)
7582 gmx_domdec_ind_t *ind, *ind_p;
7583 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7584 int shift, shift_at;
7586 ind = &cd->ind[pulse];
7588 /* First correct the already stored data */
7589 shift = ind->nrecv[ncell];
7590 for (cell = ncell-1; cell >= 0; cell--)
7592 shift -= ind->nrecv[cell];
7595 /* Move the cg's present from previous grid pulses */
7596 cg0 = ncg_cell[ncell+cell];
7597 cg1 = ncg_cell[ncell+cell+1];
7598 cgindex[cg1+shift] = cgindex[cg1];
7599 for (cg = cg1-1; cg >= cg0; cg--)
7601 index_gl[cg+shift] = index_gl[cg];
7602 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7603 cgindex[cg+shift] = cgindex[cg];
7604 cginfo[cg+shift] = cginfo[cg];
7606 /* Correct the already stored send indices for the shift */
7607 for (p = 1; p <= pulse; p++)
7609 ind_p = &cd->ind[p];
7611 for (c = 0; c < cell; c++)
7613 cg0 += ind_p->nsend[c];
7615 cg1 = cg0 + ind_p->nsend[cell];
7616 for (cg = cg0; cg < cg1; cg++)
7618 ind_p->index[cg] += shift;
7624 /* Merge in the communicated buffers */
7628 for (cell = 0; cell < ncell; cell++)
7630 cg1 = ncg_cell[ncell+cell+1] + shift;
7633 /* Correct the old cg indices */
7634 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7636 cgindex[cg+1] += shift_at;
7639 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7641 /* Copy this charge group from the buffer */
7642 index_gl[cg1] = recv_i[cg0];
7643 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7644 /* Add it to the cgindex */
7645 cg_gl = index_gl[cg1];
7646 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7647 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7648 cgindex[cg1+1] = cgindex[cg1] + nat;
7653 shift += ind->nrecv[cell];
7654 ncg_cell[ncell+cell+1] = cg1;
7658 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7659 int nzone, int cg0, const int *cgindex)
7663 /* Store the atom block boundaries for easy copying of communication buffers
7666 for (zone = 0; zone < nzone; zone++)
7668 for (p = 0; p < cd->np; p++)
7670 cd->ind[p].cell2at0[zone] = cgindex[cg];
7671 cg += cd->ind[p].nrecv[zone];
7672 cd->ind[p].cell2at1[zone] = cgindex[cg];
7677 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7683 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7685 if (!bLocalCG[link->a[i]])
7694 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7696 real c[DIM][4]; /* the corners for the non-bonded communication */
7697 real cr0; /* corner for rounding */
7698 real cr1[4]; /* corners for rounding */
7699 real bc[DIM]; /* corners for bounded communication */
7700 real bcr1; /* corner for rounding for bonded communication */
7703 /* Determine the corners of the domain(s) we are communicating with */
7705 set_dd_corners(const gmx_domdec_t *dd,
7706 int dim0, int dim1, int dim2,
7710 const gmx_domdec_comm_t *comm;
7711 const gmx_domdec_zones_t *zones;
7716 zones = &comm->zones;
7718 /* Keep the compiler happy */
7722 /* The first dimension is equal for all cells */
7723 c->c[0][0] = comm->cell_x0[dim0];
7726 c->bc[0] = c->c[0][0];
7731 /* This cell row is only seen from the first row */
7732 c->c[1][0] = comm->cell_x0[dim1];
7733 /* All rows can see this row */
7734 c->c[1][1] = comm->cell_x0[dim1];
7737 c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
7740 /* For the multi-body distance we need the maximum */
7741 c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
7744 /* Set the upper-right corner for rounding */
7745 c->cr0 = comm->cell_x1[dim0];
7750 for (j = 0; j < 4; j++)
7752 c->c[2][j] = comm->cell_x0[dim2];
7756 /* Use the maximum of the i-cells that see a j-cell */
7757 for (i = 0; i < zones->nizone; i++)
7759 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7765 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7771 /* For the multi-body distance we need the maximum */
7772 c->bc[2] = comm->cell_x0[dim2];
7773 for (i = 0; i < 2; i++)
7775 for (j = 0; j < 2; j++)
7777 c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0);
7783 /* Set the upper-right corner for rounding */
7784 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7785 * Only cell (0,0,0) can see cell 7 (1,1,1)
7787 c->cr1[0] = comm->cell_x1[dim1];
7788 c->cr1[3] = comm->cell_x1[dim1];
7791 c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
7794 /* For the multi-body distance we need the maximum */
7795 c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
7802 /* Determine which cg's we need to send in this pulse from this zone */
7804 get_zone_pulse_cgs(gmx_domdec_t *dd,
7805 int zonei, int zone,
7807 const int *index_gl,
7809 int dim, int dim_ind,
7810 int dim0, int dim1, int dim2,
7811 real r_comm2, real r_bcomm2,
7815 real skew_fac2_d, real skew_fac_01,
7816 rvec *v_d, rvec *v_0, rvec *v_1,
7817 const dd_corners_t *c,
7819 gmx_bool bDistBonded,
7825 gmx_domdec_ind_t *ind,
7826 int **ibuf, int *ibuf_nalloc,
7832 gmx_domdec_comm_t *comm;
7834 gmx_bool bDistMB_pulse;
7836 real r2, rb2, r, tric_sh;
7839 int nsend_z, nsend, nat;
7843 bScrew = (dd->bScrewPBC && dim == XX);
7845 bDistMB_pulse = (bDistMB && bDistBonded);
7851 for (cg = cg0; cg < cg1; cg++)
7855 if (tric_dist[dim_ind] == 0)
7857 /* Rectangular direction, easy */
7858 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7865 r = cg_cm[cg][dim] - c->bc[dim_ind];
7871 /* Rounding gives at most a 16% reduction
7872 * in communicated atoms
7874 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7876 r = cg_cm[cg][dim0] - c->cr0;
7877 /* This is the first dimension, so always r >= 0 */
7884 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7886 r = cg_cm[cg][dim1] - c->cr1[zone];
7893 r = cg_cm[cg][dim1] - c->bcr1;
7903 /* Triclinic direction, more complicated */
7906 /* Rounding, conservative as the skew_fac multiplication
7907 * will slightly underestimate the distance.
7909 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7911 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7912 for (i = dim0+1; i < DIM; i++)
7914 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7916 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7919 rb[dim0] = rn[dim0];
7922 /* Take care that the cell planes along dim0 might not
7923 * be orthogonal to those along dim1 and dim2.
7925 for (i = 1; i <= dim_ind; i++)
7928 if (normal[dim0][dimd] > 0)
7930 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7933 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7938 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7940 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7942 for (i = dim1+1; i < DIM; i++)
7944 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7946 rn[dim1] += tric_sh;
7949 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7950 /* Take care of coupling of the distances
7951 * to the planes along dim0 and dim1 through dim2.
7953 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7954 /* Take care that the cell planes along dim1
7955 * might not be orthogonal to that along dim2.
7957 if (normal[dim1][dim2] > 0)
7959 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7965 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7968 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7969 /* Take care of coupling of the distances
7970 * to the planes along dim0 and dim1 through dim2.
7972 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7973 /* Take care that the cell planes along dim1
7974 * might not be orthogonal to that along dim2.
7976 if (normal[dim1][dim2] > 0)
7978 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7983 /* The distance along the communication direction */
7984 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
7986 for (i = dim+1; i < DIM; i++)
7988 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7993 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7994 /* Take care of coupling of the distances
7995 * to the planes along dim0 and dim1 through dim2.
7997 if (dim_ind == 1 && zonei == 1)
7999 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
8005 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
8008 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
8009 /* Take care of coupling of the distances
8010 * to the planes along dim0 and dim1 through dim2.
8012 if (dim_ind == 1 && zonei == 1)
8014 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
8022 ((bDistMB && rb2 < r_bcomm2) ||
8023 (bDist2B && r2 < r_bcomm2)) &&
8025 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
8026 missing_link(comm->cglink, index_gl[cg],
8029 /* Make an index to the local charge groups */
8030 if (nsend+1 > ind->nalloc)
8032 ind->nalloc = over_alloc_large(nsend+1);
8033 srenew(ind->index, ind->nalloc);
8035 if (nsend+1 > *ibuf_nalloc)
8037 *ibuf_nalloc = over_alloc_large(nsend+1);
8038 srenew(*ibuf, *ibuf_nalloc);
8040 ind->index[nsend] = cg;
8041 (*ibuf)[nsend] = index_gl[cg];
8043 vec_rvec_check_alloc(vbuf, nsend+1);
8045 if (dd->ci[dim] == 0)
8047 /* Correct cg_cm for pbc */
8048 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
8051 vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
8052 vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
8057 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
8060 nat += cgindex[cg+1] - cgindex[cg];
8066 *nsend_z_ptr = nsend_z;
8069 static void setup_dd_communication(gmx_domdec_t *dd,
8070 matrix box, gmx_ddbox_t *ddbox,
8071 t_forcerec *fr, t_state *state, rvec **f)
8073 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
8074 int nzone, nzone_send, zone, zonei, cg0, cg1;
8075 int c, i, j, cg, cg_gl, nrcg;
8076 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
8077 gmx_domdec_comm_t *comm;
8078 gmx_domdec_zones_t *zones;
8079 gmx_domdec_comm_dim_t *cd;
8080 gmx_domdec_ind_t *ind;
8081 cginfo_mb_t *cginfo_mb;
8082 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
8083 real r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
8084 dd_corners_t corners;
8086 rvec *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
8087 real skew_fac2_d, skew_fac_01;
8094 fprintf(debug, "Setting up DD communication\n");
8099 switch (fr->cutoff_scheme)
8108 gmx_incons("unimplemented");
8112 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8114 dim = dd->dim[dim_ind];
8116 /* Check if we need to use triclinic distances */
8117 tric_dist[dim_ind] = 0;
8118 for (i = 0; i <= dim_ind; i++)
8120 if (ddbox->tric_dir[dd->dim[i]])
8122 tric_dist[dim_ind] = 1;
8127 bBondComm = comm->bBondComm;
8129 /* Do we need to determine extra distances for multi-body bondeds? */
8130 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8132 /* Do we need to determine extra distances for only two-body bondeds? */
8133 bDist2B = (bBondComm && !bDistMB);
8135 r_comm2 = sqr(comm->cutoff);
8136 r_bcomm2 = sqr(comm->cutoff_mbody);
8140 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, sqrt(r_bcomm2));
8143 zones = &comm->zones;
8146 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8147 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8149 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8151 /* Triclinic stuff */
8152 normal = ddbox->normal;
8156 v_0 = ddbox->v[dim0];
8157 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8159 /* Determine the coupling coefficient for the distances
8160 * to the cell planes along dim0 and dim1 through dim2.
8161 * This is required for correct rounding.
8164 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8167 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8173 v_1 = ddbox->v[dim1];
8176 zone_cg_range = zones->cg_range;
8177 index_gl = dd->index_gl;
8178 cgindex = dd->cgindex;
8179 cginfo_mb = fr->cginfo_mb;
8181 zone_cg_range[0] = 0;
8182 zone_cg_range[1] = dd->ncg_home;
8183 comm->zone_ncg1[0] = dd->ncg_home;
8184 pos_cg = dd->ncg_home;
8186 nat_tot = dd->nat_home;
8188 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8190 dim = dd->dim[dim_ind];
8191 cd = &comm->cd[dim_ind];
8193 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8195 /* No pbc in this dimension, the first node should not comm. */
8203 v_d = ddbox->v[dim];
8204 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
8206 cd->bInPlace = TRUE;
8207 for (p = 0; p < cd->np; p++)
8209 /* Only atoms communicated in the first pulse are used
8210 * for multi-body bonded interactions or for bBondComm.
8212 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8217 for (zone = 0; zone < nzone_send; zone++)
8219 if (tric_dist[dim_ind] && dim_ind > 0)
8221 /* Determine slightly more optimized skew_fac's
8223 * This reduces the number of communicated atoms
8224 * by about 10% for 3D DD of rhombic dodecahedra.
8226 for (dimd = 0; dimd < dim; dimd++)
8228 sf2_round[dimd] = 1;
8229 if (ddbox->tric_dir[dimd])
8231 for (i = dd->dim[dimd]+1; i < DIM; i++)
8233 /* If we are shifted in dimension i
8234 * and the cell plane is tilted forward
8235 * in dimension i, skip this coupling.
8237 if (!(zones->shift[nzone+zone][i] &&
8238 ddbox->v[dimd][i][dimd] >= 0))
8241 sqr(ddbox->v[dimd][i][dimd]);
8244 sf2_round[dimd] = 1/sf2_round[dimd];
8249 zonei = zone_perm[dim_ind][zone];
8252 /* Here we permutate the zones to obtain a convenient order
8253 * for neighbor searching
8255 cg0 = zone_cg_range[zonei];
8256 cg1 = zone_cg_range[zonei+1];
8260 /* Look only at the cg's received in the previous grid pulse
8262 cg1 = zone_cg_range[nzone+zone+1];
8263 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8266 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8267 for (th = 0; th < comm->nth; th++)
8269 gmx_domdec_ind_t *ind_p;
8270 int **ibuf_p, *ibuf_nalloc_p;
8272 int *nsend_p, *nat_p;
8278 /* Thread 0 writes in the comm buffers */
8280 ibuf_p = &comm->buf_int;
8281 ibuf_nalloc_p = &comm->nalloc_int;
8282 vbuf_p = &comm->vbuf;
8285 nsend_zone_p = &ind->nsend[zone];
8289 /* Other threads write into temp buffers */
8290 ind_p = &comm->dth[th].ind;
8291 ibuf_p = &comm->dth[th].ibuf;
8292 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8293 vbuf_p = &comm->dth[th].vbuf;
8294 nsend_p = &comm->dth[th].nsend;
8295 nat_p = &comm->dth[th].nat;
8296 nsend_zone_p = &comm->dth[th].nsend_zone;
8298 comm->dth[th].nsend = 0;
8299 comm->dth[th].nat = 0;
8300 comm->dth[th].nsend_zone = 0;
8310 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8311 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8314 /* Get the cg's for this pulse in this zone */
8315 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8317 dim, dim_ind, dim0, dim1, dim2,
8320 normal, skew_fac2_d, skew_fac_01,
8321 v_d, v_0, v_1, &corners, sf2_round,
8322 bDistBonded, bBondComm,
8326 ibuf_p, ibuf_nalloc_p,
8332 /* Append data of threads>=1 to the communication buffers */
8333 for (th = 1; th < comm->nth; th++)
8335 dd_comm_setup_work_t *dth;
8338 dth = &comm->dth[th];
8340 ns1 = nsend + dth->nsend_zone;
8341 if (ns1 > ind->nalloc)
8343 ind->nalloc = over_alloc_dd(ns1);
8344 srenew(ind->index, ind->nalloc);
8346 if (ns1 > comm->nalloc_int)
8348 comm->nalloc_int = over_alloc_dd(ns1);
8349 srenew(comm->buf_int, comm->nalloc_int);
8351 if (ns1 > comm->vbuf.nalloc)
8353 comm->vbuf.nalloc = over_alloc_dd(ns1);
8354 srenew(comm->vbuf.v, comm->vbuf.nalloc);
8357 for (i = 0; i < dth->nsend_zone; i++)
8359 ind->index[nsend] = dth->ind.index[i];
8360 comm->buf_int[nsend] = dth->ibuf[i];
8361 copy_rvec(dth->vbuf.v[i],
8362 comm->vbuf.v[nsend]);
8366 ind->nsend[zone] += dth->nsend_zone;
8369 /* Clear the counts in case we do not have pbc */
8370 for (zone = nzone_send; zone < nzone; zone++)
8372 ind->nsend[zone] = 0;
8374 ind->nsend[nzone] = nsend;
8375 ind->nsend[nzone+1] = nat;
8376 /* Communicate the number of cg's and atoms to receive */
8377 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8378 ind->nsend, nzone+2,
8379 ind->nrecv, nzone+2);
8381 /* The rvec buffer is also required for atom buffers of size nsend
8382 * in dd_move_x and dd_move_f.
8384 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8388 /* We can receive in place if only the last zone is not empty */
8389 for (zone = 0; zone < nzone-1; zone++)
8391 if (ind->nrecv[zone] > 0)
8393 cd->bInPlace = FALSE;
8398 /* The int buffer is only required here for the cg indices */
8399 if (ind->nrecv[nzone] > comm->nalloc_int2)
8401 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8402 srenew(comm->buf_int2, comm->nalloc_int2);
8404 /* The rvec buffer is also required for atom buffers
8405 * of size nrecv in dd_move_x and dd_move_f.
8407 i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
8408 vec_rvec_check_alloc(&comm->vbuf2, i);
8412 /* Make space for the global cg indices */
8413 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8414 || dd->cg_nalloc == 0)
8416 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8417 srenew(index_gl, dd->cg_nalloc);
8418 srenew(cgindex, dd->cg_nalloc+1);
8420 /* Communicate the global cg indices */
8423 recv_i = index_gl + pos_cg;
8427 recv_i = comm->buf_int2;
8429 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8430 comm->buf_int, nsend,
8431 recv_i, ind->nrecv[nzone]);
8433 /* Make space for cg_cm */
8434 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8435 if (fr->cutoff_scheme == ecutsGROUP)
8443 /* Communicate cg_cm */
8446 recv_vr = cg_cm + pos_cg;
8450 recv_vr = comm->vbuf2.v;
8452 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8453 comm->vbuf.v, nsend,
8454 recv_vr, ind->nrecv[nzone]);
8456 /* Make the charge group index */
8459 zone = (p == 0 ? 0 : nzone - 1);
8460 while (zone < nzone)
8462 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8464 cg_gl = index_gl[pos_cg];
8465 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8466 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
8467 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8470 /* Update the charge group presence,
8471 * so we can use it in the next pass of the loop.
8473 comm->bLocalCG[cg_gl] = TRUE;
8479 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8482 zone_cg_range[nzone+zone] = pos_cg;
8487 /* This part of the code is never executed with bBondComm. */
8488 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8489 index_gl, recv_i, cg_cm, recv_vr,
8490 cgindex, fr->cginfo_mb, fr->cginfo);
8491 pos_cg += ind->nrecv[nzone];
8493 nat_tot += ind->nrecv[nzone+1];
8497 /* Store the atom block for easy copying of communication buffers */
8498 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8502 dd->index_gl = index_gl;
8503 dd->cgindex = cgindex;
8505 dd->ncg_tot = zone_cg_range[zones->n];
8506 dd->nat_tot = nat_tot;
8507 comm->nat[ddnatHOME] = dd->nat_home;
8508 for (i = ddnatZONE; i < ddnatNR; i++)
8510 comm->nat[i] = dd->nat_tot;
8515 /* We don't need to update cginfo, since that was alrady done above.
8516 * So we pass NULL for the forcerec.
8518 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8519 NULL, comm->bLocalCG);
8524 fprintf(debug, "Finished setting up DD communication, zones:");
8525 for (c = 0; c < zones->n; c++)
8527 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8529 fprintf(debug, "\n");
8533 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8537 for (c = 0; c < zones->nizone; c++)
8539 zones->izone[c].cg1 = zones->cg_range[c+1];
8540 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8541 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8545 static void set_zones_size(gmx_domdec_t *dd,
8546 matrix box, const gmx_ddbox_t *ddbox,
8547 int zone_start, int zone_end)
8549 gmx_domdec_comm_t *comm;
8550 gmx_domdec_zones_t *zones;
8552 int z, zi, zj0, zj1, d, dim;
8555 real size_j, add_tric;
8560 zones = &comm->zones;
8562 /* Do we need to determine extra distances for multi-body bondeds? */
8563 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8565 for (z = zone_start; z < zone_end; z++)
8567 /* Copy cell limits to zone limits.
8568 * Valid for non-DD dims and non-shifted dims.
8570 copy_rvec(comm->cell_x0, zones->size[z].x0);
8571 copy_rvec(comm->cell_x1, zones->size[z].x1);
8574 for (d = 0; d < dd->ndim; d++)
8578 for (z = 0; z < zones->n; z++)
8580 /* With a staggered grid we have different sizes
8581 * for non-shifted dimensions.
8583 if (dd->bGridJump && zones->shift[z][dim] == 0)
8587 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8588 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8592 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8593 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8599 rcmbs = comm->cutoff_mbody;
8600 if (ddbox->tric_dir[dim])
8602 rcs /= ddbox->skew_fac[dim];
8603 rcmbs /= ddbox->skew_fac[dim];
8606 /* Set the lower limit for the shifted zone dimensions */
8607 for (z = zone_start; z < zone_end; z++)
8609 if (zones->shift[z][dim] > 0)
8612 if (!dd->bGridJump || d == 0)
8614 zones->size[z].x0[dim] = comm->cell_x1[dim];
8615 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8619 /* Here we take the lower limit of the zone from
8620 * the lowest domain of the zone below.
8624 zones->size[z].x0[dim] =
8625 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8631 zones->size[z].x0[dim] =
8632 zones->size[zone_perm[2][z-4]].x0[dim];
8636 zones->size[z].x0[dim] =
8637 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8640 /* A temporary limit, is updated below */
8641 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8645 for (zi = 0; zi < zones->nizone; zi++)
8647 if (zones->shift[zi][dim] == 0)
8649 /* This takes the whole zone into account.
8650 * With multiple pulses this will lead
8651 * to a larger zone then strictly necessary.
8653 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8654 zones->size[zi].x1[dim]+rcmbs);
8662 /* Loop over the i-zones to set the upper limit of each
8665 for (zi = 0; zi < zones->nizone; zi++)
8667 if (zones->shift[zi][dim] == 0)
8669 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8671 if (zones->shift[z][dim] > 0)
8673 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
8674 zones->size[zi].x1[dim]+rcs);
8681 for (z = zone_start; z < zone_end; z++)
8683 /* Initialization only required to keep the compiler happy */
8684 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8687 /* To determine the bounding box for a zone we need to find
8688 * the extreme corners of 4, 2 or 1 corners.
8690 nc = 1 << (ddbox->npbcdim - 1);
8692 for (c = 0; c < nc; c++)
8694 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8698 corner[YY] = zones->size[z].x0[YY];
8702 corner[YY] = zones->size[z].x1[YY];
8706 corner[ZZ] = zones->size[z].x0[ZZ];
8710 corner[ZZ] = zones->size[z].x1[ZZ];
8712 if (dd->ndim == 1 && box[ZZ][YY] != 0)
8714 /* With 1D domain decomposition the cg's are not in
8715 * the triclinic box, but triclinic x-y and rectangular y-z.
8716 * Shift y back, so it will later end up at 0.
8718 corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
8720 /* Apply the triclinic couplings */
8721 assert(ddbox->npbcdim <= DIM);
8722 for (i = YY; i < ddbox->npbcdim; i++)
8724 for (j = XX; j < i; j++)
8726 corner[j] += corner[i]*box[i][j]/box[i][i];
8731 copy_rvec(corner, corner_min);
8732 copy_rvec(corner, corner_max);
8736 for (i = 0; i < DIM; i++)
8738 corner_min[i] = min(corner_min[i], corner[i]);
8739 corner_max[i] = max(corner_max[i], corner[i]);
8743 /* Copy the extreme cornes without offset along x */
8744 for (i = 0; i < DIM; i++)
8746 zones->size[z].bb_x0[i] = corner_min[i];
8747 zones->size[z].bb_x1[i] = corner_max[i];
8749 /* Add the offset along x */
8750 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
8751 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
8754 if (zone_start == 0)
8757 for (dim = 0; dim < DIM; dim++)
8759 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8761 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
8766 for (z = zone_start; z < zone_end; z++)
8768 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8770 zones->size[z].x0[XX], zones->size[z].x1[XX],
8771 zones->size[z].x0[YY], zones->size[z].x1[YY],
8772 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
8773 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8775 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
8776 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
8777 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
8782 static int comp_cgsort(const void *a, const void *b)
8786 gmx_cgsort_t *cga, *cgb;
8787 cga = (gmx_cgsort_t *)a;
8788 cgb = (gmx_cgsort_t *)b;
8790 comp = cga->nsc - cgb->nsc;
8793 comp = cga->ind_gl - cgb->ind_gl;
8799 static void order_int_cg(int n, const gmx_cgsort_t *sort,
8804 /* Order the data */
8805 for (i = 0; i < n; i++)
8807 buf[i] = a[sort[i].ind];
8810 /* Copy back to the original array */
8811 for (i = 0; i < n; i++)
8817 static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8822 /* Order the data */
8823 for (i = 0; i < n; i++)
8825 copy_rvec(v[sort[i].ind], buf[i]);
8828 /* Copy back to the original array */
8829 for (i = 0; i < n; i++)
8831 copy_rvec(buf[i], v[i]);
8835 static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8838 int a, atot, cg, cg0, cg1, i;
8840 if (cgindex == NULL)
8842 /* Avoid the useless loop of the atoms within a cg */
8843 order_vec_cg(ncg, sort, v, buf);
8848 /* Order the data */
8850 for (cg = 0; cg < ncg; cg++)
8852 cg0 = cgindex[sort[cg].ind];
8853 cg1 = cgindex[sort[cg].ind+1];
8854 for (i = cg0; i < cg1; i++)
8856 copy_rvec(v[i], buf[a]);
8862 /* Copy back to the original array */
8863 for (a = 0; a < atot; a++)
8865 copy_rvec(buf[a], v[a]);
8869 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8870 int nsort_new, gmx_cgsort_t *sort_new,
8871 gmx_cgsort_t *sort1)
8875 /* The new indices are not very ordered, so we qsort them */
8876 gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8878 /* sort2 is already ordered, so now we can merge the two arrays */
8882 while (i2 < nsort2 || i_new < nsort_new)
8886 sort1[i1++] = sort_new[i_new++];
8888 else if (i_new == nsort_new)
8890 sort1[i1++] = sort2[i2++];
8892 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8893 (sort2[i2].nsc == sort_new[i_new].nsc &&
8894 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8896 sort1[i1++] = sort2[i2++];
8900 sort1[i1++] = sort_new[i_new++];
8905 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8907 gmx_domdec_sort_t *sort;
8908 gmx_cgsort_t *cgsort, *sort_i;
8909 int ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
8910 int sort_last, sort_skip;
8912 sort = dd->comm->sort;
8914 a = fr->ns.grid->cell_index;
8916 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns.grid->ncells;
8918 if (ncg_home_old >= 0)
8920 /* The charge groups that remained in the same ns grid cell
8921 * are completely ordered. So we can sort efficiently by sorting
8922 * the charge groups that did move into the stationary list.
8927 for (i = 0; i < dd->ncg_home; i++)
8929 /* Check if this cg did not move to another node */
8932 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8934 /* This cg is new on this node or moved ns grid cell */
8935 if (nsort_new >= sort->sort_new_nalloc)
8937 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8938 srenew(sort->sort_new, sort->sort_new_nalloc);
8940 sort_i = &(sort->sort_new[nsort_new++]);
8944 /* This cg did not move */
8945 sort_i = &(sort->sort2[nsort2++]);
8947 /* Sort on the ns grid cell indices
8948 * and the global topology index.
8949 * index_gl is irrelevant with cell ns,
8950 * but we set it here anyhow to avoid a conditional.
8953 sort_i->ind_gl = dd->index_gl[i];
8960 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8963 /* Sort efficiently */
8964 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8969 cgsort = sort->sort;
8971 for (i = 0; i < dd->ncg_home; i++)
8973 /* Sort on the ns grid cell indices
8974 * and the global topology index
8976 cgsort[i].nsc = a[i];
8977 cgsort[i].ind_gl = dd->index_gl[i];
8979 if (cgsort[i].nsc < moved)
8986 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
8988 /* Determine the order of the charge groups using qsort */
8989 gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
8995 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
8998 int ncg_new, i, *a, na;
9000 sort = dd->comm->sort->sort;
9002 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
9005 for (i = 0; i < na; i++)
9009 sort[ncg_new].ind = a[i];
9017 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
9020 gmx_domdec_sort_t *sort;
9021 gmx_cgsort_t *cgsort, *sort_i;
9023 int ncg_new, i, *ibuf, cgsize;
9026 sort = dd->comm->sort;
9028 if (dd->ncg_home > sort->sort_nalloc)
9030 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
9031 srenew(sort->sort, sort->sort_nalloc);
9032 srenew(sort->sort2, sort->sort_nalloc);
9034 cgsort = sort->sort;
9036 switch (fr->cutoff_scheme)
9039 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
9042 ncg_new = dd_sort_order_nbnxn(dd, fr);
9045 gmx_incons("unimplemented");
9049 /* We alloc with the old size, since cgindex is still old */
9050 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9051 vbuf = dd->comm->vbuf.v;
9055 cgindex = dd->cgindex;
9062 /* Remove the charge groups which are no longer at home here */
9063 dd->ncg_home = ncg_new;
9066 fprintf(debug, "Set the new home charge group count to %d\n",
9070 /* Reorder the state */
9071 for (i = 0; i < estNR; i++)
9073 if (EST_DISTR(i) && (state->flags & (1<<i)))
9078 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
9081 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
9084 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->sd_X, vbuf);
9087 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
9091 case estDISRE_INITF:
9092 case estDISRE_RM3TAV:
9093 case estORIRE_INITF:
9095 /* No ordering required */
9098 gmx_incons("Unknown state entry encountered in dd_sort_state");
9103 if (fr->cutoff_scheme == ecutsGROUP)
9106 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9109 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9111 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9112 srenew(sort->ibuf, sort->ibuf_nalloc);
9115 /* Reorder the global cg index */
9116 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9117 /* Reorder the cginfo */
9118 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9119 /* Rebuild the local cg index */
9123 for (i = 0; i < dd->ncg_home; i++)
9125 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9126 ibuf[i+1] = ibuf[i] + cgsize;
9128 for (i = 0; i < dd->ncg_home+1; i++)
9130 dd->cgindex[i] = ibuf[i];
9135 for (i = 0; i < dd->ncg_home+1; i++)
9140 /* Set the home atom number */
9141 dd->nat_home = dd->cgindex[dd->ncg_home];
9143 if (fr->cutoff_scheme == ecutsVERLET)
9145 /* The atoms are now exactly in grid order, update the grid order */
9146 nbnxn_set_atomorder(fr->nbv->nbs);
9150 /* Copy the sorted ns cell indices back to the ns grid struct */
9151 for (i = 0; i < dd->ncg_home; i++)
9153 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
9155 fr->ns.grid->nr = dd->ncg_home;
9159 static void add_dd_statistics(gmx_domdec_t *dd)
9161 gmx_domdec_comm_t *comm;
9166 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9168 comm->sum_nat[ddnat-ddnatZONE] +=
9169 comm->nat[ddnat] - comm->nat[ddnat-1];
9174 void reset_dd_statistics_counters(gmx_domdec_t *dd)
9176 gmx_domdec_comm_t *comm;
9181 /* Reset all the statistics and counters for total run counting */
9182 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9184 comm->sum_nat[ddnat-ddnatZONE] = 0;
9188 comm->load_step = 0;
9191 clear_ivec(comm->load_lim);
9196 void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
9198 gmx_domdec_comm_t *comm;
9202 comm = cr->dd->comm;
9204 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9211 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9213 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9215 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9220 " av. #atoms communicated per step for force: %d x %.1f\n",
9224 if (cr->dd->vsite_comm)
9227 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9228 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
9233 if (cr->dd->constraint_comm)
9236 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9237 1 + ir->nLincsIter, av);
9241 gmx_incons(" Unknown type for DD statistics");
9244 fprintf(fplog, "\n");
9246 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
9248 print_dd_load_av(fplog, cr->dd);
9252 void dd_partition_system(FILE *fplog,
9255 gmx_bool bMasterState,
9257 t_state *state_global,
9258 gmx_mtop_t *top_global,
9260 t_state *state_local,
9263 gmx_localtop_t *top_local,
9266 gmx_shellfc_t shellfc,
9267 gmx_constr_t constr,
9269 gmx_wallcycle_t wcycle,
9273 gmx_domdec_comm_t *comm;
9274 gmx_ddbox_t ddbox = {0};
9276 gmx_int64_t step_pcoupl;
9277 rvec cell_ns_x0, cell_ns_x1;
9278 int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9279 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
9280 gmx_bool bRedist, bSortCG, bResortAll;
9281 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9288 bBoxChanged = (bMasterState || DEFORM(*ir));
9289 if (ir->epc != epcNO)
9291 /* With nstpcouple > 1 pressure coupling happens.
9292 * one step after calculating the pressure.
9293 * Box scaling happens at the end of the MD step,
9294 * after the DD partitioning.
9295 * We therefore have to do DLB in the first partitioning
9296 * after an MD step where P-coupling occured.
9297 * We need to determine the last step in which p-coupling occurred.
9298 * MRS -- need to validate this for vv?
9303 step_pcoupl = step - 1;
9307 step_pcoupl = ((step - 1)/n)*n + 1;
9309 if (step_pcoupl >= comm->partition_step)
9315 bNStGlobalComm = (step % nstglobalcomm == 0);
9317 if (!comm->bDynLoadBal)
9323 /* Should we do dynamic load balacing this step?
9324 * Since it requires (possibly expensive) global communication,
9325 * we might want to do DLB less frequently.
9327 if (bBoxChanged || ir->epc != epcNO)
9329 bDoDLB = bBoxChanged;
9333 bDoDLB = bNStGlobalComm;
9337 /* Check if we have recorded loads on the nodes */
9338 if (comm->bRecordLoad && dd_load_count(comm))
9340 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
9342 /* Check if we should use DLB at the second partitioning
9343 * and every 100 partitionings,
9344 * so the extra communication cost is negligible.
9346 n = max(100, nstglobalcomm);
9347 bCheckDLB = (comm->n_load_collect == 0 ||
9348 comm->n_load_have % n == n-1);
9355 /* Print load every nstlog, first and last step to the log file */
9356 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9357 comm->n_load_collect == 0 ||
9359 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9361 /* Avoid extra communication due to verbose screen output
9362 * when nstglobalcomm is set.
9364 if (bDoDLB || bLogLoad || bCheckDLB ||
9365 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9367 get_load_distribution(dd, wcycle);
9372 dd_print_load(fplog, dd, step-1);
9376 dd_print_load_verbose(dd);
9379 comm->n_load_collect++;
9383 /* Since the timings are node dependent, the master decides */
9387 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
9390 fprintf(debug, "step %s, imb loss %f\n",
9391 gmx_step_str(step, sbuf),
9392 dd_force_imb_perf_loss(dd));
9395 dd_bcast(dd, sizeof(bTurnOnDLB), &bTurnOnDLB);
9398 turn_on_dlb(fplog, cr, step);
9403 comm->n_load_have++;
9406 cgs_gl = &comm->cgs_gl;
9411 /* Clear the old state */
9412 clear_dd_indices(dd, 0, 0);
9415 set_ddbox(dd, bMasterState, cr, ir, state_global->box,
9416 TRUE, cgs_gl, state_global->x, &ddbox);
9418 get_cg_distribution(fplog, step, dd, cgs_gl,
9419 state_global->box, &ddbox, state_global->x);
9421 dd_distribute_state(dd, cgs_gl,
9422 state_global, state_local, f);
9424 dd_make_local_cgs(dd, &top_local->cgs);
9426 /* Ensure that we have space for the new distribution */
9427 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9429 if (fr->cutoff_scheme == ecutsGROUP)
9431 calc_cgcm(fplog, 0, dd->ncg_home,
9432 &top_local->cgs, state_local->x, fr->cg_cm);
9435 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9437 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9439 else if (state_local->ddp_count != dd->ddp_count)
9441 if (state_local->ddp_count > dd->ddp_count)
9443 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9446 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9448 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9451 /* Clear the old state */
9452 clear_dd_indices(dd, 0, 0);
9454 /* Build the new indices */
9455 rebuild_cgindex(dd, cgs_gl->index, state_local);
9456 make_dd_indices(dd, cgs_gl->index, 0);
9457 ncgindex_set = dd->ncg_home;
9459 if (fr->cutoff_scheme == ecutsGROUP)
9461 /* Redetermine the cg COMs */
9462 calc_cgcm(fplog, 0, dd->ncg_home,
9463 &top_local->cgs, state_local->x, fr->cg_cm);
9466 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
9468 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9470 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9471 TRUE, &top_local->cgs, state_local->x, &ddbox);
9473 bRedist = comm->bDynLoadBal;
9477 /* We have the full state, only redistribute the cgs */
9479 /* Clear the non-home indices */
9480 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9483 /* Avoid global communication for dim's without pbc and -gcom */
9484 if (!bNStGlobalComm)
9486 copy_rvec(comm->box0, ddbox.box0 );
9487 copy_rvec(comm->box_size, ddbox.box_size);
9489 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9490 bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
9495 /* For dim's without pbc and -gcom */
9496 copy_rvec(ddbox.box0, comm->box0 );
9497 copy_rvec(ddbox.box_size, comm->box_size);
9499 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9502 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9504 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9507 /* Check if we should sort the charge groups */
9508 if (comm->nstSortCG > 0)
9510 bSortCG = (bMasterState ||
9511 (bRedist && (step % comm->nstSortCG == 0)));
9518 ncg_home_old = dd->ncg_home;
9523 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9525 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9527 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9529 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9532 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9534 &comm->cell_x0, &comm->cell_x1,
9535 dd->ncg_home, fr->cg_cm,
9536 cell_ns_x0, cell_ns_x1, &grid_density);
9540 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9543 switch (fr->cutoff_scheme)
9546 copy_ivec(fr->ns.grid->n, ncells_old);
9547 grid_first(fplog, fr->ns.grid, dd, &ddbox,
9548 state_local->box, cell_ns_x0, cell_ns_x1,
9549 fr->rlistlong, grid_density);
9552 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
9555 gmx_incons("unimplemented");
9557 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9558 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9562 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9564 /* Sort the state on charge group position.
9565 * This enables exact restarts from this step.
9566 * It also improves performance by about 15% with larger numbers
9567 * of atoms per node.
9570 /* Fill the ns grid with the home cell,
9571 * so we can sort with the indices.
9573 set_zones_ncg_home(dd);
9575 switch (fr->cutoff_scheme)
9578 set_zones_size(dd, state_local->box, &ddbox, 0, 1);
9580 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9582 comm->zones.size[0].bb_x0,
9583 comm->zones.size[0].bb_x1,
9585 comm->zones.dens_zone0,
9588 ncg_moved, bRedist ? comm->moved : NULL,
9589 fr->nbv->grp[eintLocal].kernel_type,
9590 fr->nbv->grp[eintLocal].nbat);
9592 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
9595 fill_grid(&comm->zones, fr->ns.grid, dd->ncg_home,
9596 0, dd->ncg_home, fr->cg_cm);
9598 copy_ivec(fr->ns.grid->n, ncells_new);
9601 gmx_incons("unimplemented");
9604 bResortAll = bMasterState;
9606 /* Check if we can user the old order and ns grid cell indices
9607 * of the charge groups to sort the charge groups efficiently.
9609 if (ncells_new[XX] != ncells_old[XX] ||
9610 ncells_new[YY] != ncells_old[YY] ||
9611 ncells_new[ZZ] != ncells_old[ZZ])
9618 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9619 gmx_step_str(step, sbuf), dd->ncg_home);
9621 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9622 bResortAll ? -1 : ncg_home_old);
9623 /* Rebuild all the indices */
9624 ga2la_clear(dd->ga2la);
9627 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9630 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9632 /* Setup up the communication and communicate the coordinates */
9633 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9635 /* Set the indices */
9636 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9638 /* Set the charge group boundaries for neighbor searching */
9639 set_cg_boundaries(&comm->zones);
9641 if (fr->cutoff_scheme == ecutsVERLET)
9643 set_zones_size(dd, state_local->box, &ddbox,
9644 bSortCG ? 1 : 0, comm->zones.n);
9647 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9650 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9651 -1,state_local->x,state_local->box);
9654 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9656 /* Extract a local topology from the global topology */
9657 for (i = 0; i < dd->ndim; i++)
9659 np[dd->dim[i]] = comm->cd[i].np;
9661 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9662 comm->cellsize_min, np,
9664 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
9665 vsite, top_global, top_local);
9667 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9669 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9671 /* Set up the special atom communication */
9672 n = comm->nat[ddnatZONE];
9673 for (i = ddnatZONE+1; i < ddnatNR; i++)
9678 if (vsite && vsite->n_intercg_vsite)
9680 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9684 if (dd->bInterCGcons || dd->bInterCGsettles)
9686 /* Only for inter-cg constraints we need special code */
9687 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9688 constr, ir->nProjOrder,
9689 top_local->idef.il);
9693 gmx_incons("Unknown special atom type setup");
9698 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9700 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9702 /* Make space for the extra coordinates for virtual site
9703 * or constraint communication.
9705 state_local->natoms = comm->nat[ddnatNR-1];
9706 if (state_local->natoms > state_local->nalloc)
9708 dd_realloc_state(state_local, f, state_local->natoms);
9711 if (fr->bF_NoVirSum)
9713 if (vsite && vsite->n_intercg_vsite)
9715 nat_f_novirsum = comm->nat[ddnatVSITE];
9719 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
9721 nat_f_novirsum = dd->nat_tot;
9725 nat_f_novirsum = dd->nat_home;
9734 /* Set the number of atoms required for the force calculation.
9735 * Forces need to be constrained when using a twin-range setup
9736 * or with energy minimization. For simple simulations we could
9737 * avoid some allocation, zeroing and copying, but this is
9738 * probably not worth the complications ande checking.
9740 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9741 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9743 /* We make the all mdatoms up to nat_tot_con.
9744 * We could save some work by only setting invmass
9745 * between nat_tot and nat_tot_con.
9747 /* This call also sets the new number of home particles to dd->nat_home */
9748 atoms2md(top_global, ir,
9749 comm->nat[ddnatCON], dd->gatindex, dd->nat_home, mdatoms);
9751 /* Now we have the charges we can sort the FE interactions */
9752 dd_sort_local_top(dd, mdatoms, top_local);
9756 /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
9757 split_vsites_over_threads(top_local->idef.il, mdatoms, FALSE, vsite);
9762 /* Make the local shell stuff, currently no communication is done */
9763 make_local_shells(cr, mdatoms, shellfc);
9766 if (ir->implicit_solvent)
9768 make_local_gb(cr, fr->born, ir->gb_algorithm);
9771 setup_bonded_threading(fr, &top_local->idef);
9773 if (!(cr->duty & DUTY_PME))
9775 /* Send the charges and/or c6/sigmas to our PME only node */
9776 gmx_pme_send_parameters(cr,
9778 mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
9779 mdatoms->chargeA, mdatoms->chargeB,
9780 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
9781 mdatoms->sigmaA, mdatoms->sigmaB,
9782 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9787 set_constraints(constr, top_local, ir, mdatoms, cr);
9790 if (ir->ePull != epullNO)
9792 /* Update the local pull groups */
9793 dd_make_local_pull_groups(dd, ir->pull, mdatoms);
9798 /* Update the local rotation groups */
9799 dd_make_local_rotation_groups(dd, ir->rot);
9802 if (ir->eSwapCoords != eswapNO)
9804 /* Update the local groups needed for ion swapping */
9805 dd_make_local_swap_groups(dd, ir->swap);
9808 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
9809 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
9811 add_dd_statistics(dd);
9813 /* Make sure we only count the cycles for this DD partitioning */
9814 clear_dd_cycle_counts(dd);
9816 /* Because the order of the atoms might have changed since
9817 * the last vsite construction, we need to communicate the constructing
9818 * atom coordinates again (for spreading the forces this MD step).
9820 dd_move_x_vsites(dd, state_local->box, state_local->x);
9822 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9824 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9826 dd_move_x(dd, state_local->box, state_local->x);
9827 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9828 -1, state_local->x, state_local->box);
9831 /* Store the partitioning step */
9832 comm->partition_step = step;
9834 /* Increase the DD partitioning counter */
9836 /* The state currently matches this DD partitioning count, store it */
9837 state_local->ddp_count = dd->ddp_count;
9840 /* The DD master node knows the complete cg distribution,
9841 * store the count so we can possibly skip the cg info communication.
9843 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9846 if (comm->DD_debug > 0)
9848 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9849 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9850 "after partitioning");