Bug Summary

File:gromacs/mdlib/domdec.c
Location:line 2513, column 5
Description:Value stored to 'bLocalCG' is never read

Annotated Source Code

1/*
2 * This file is part of the GROMACS molecular simulation package.
3 *
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
8 *
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
13 *
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 *
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
31 *
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
34 */
35
36#ifdef HAVE_CONFIG_H1
37#include <config.h>
38#endif
39
40#include <stdio.h>
41#include <time.h>
42#include <math.h>
43#include <string.h>
44#include <stdlib.h>
45#include <assert.h>
46
47#include "typedefs.h"
48#include "gromacs/utility/smalloc.h"
49#include "gromacs/utility/fatalerror.h"
50#include "network.h"
51#include "gromacs/math/vec.h"
52#include "domdec.h"
53#include "domdec_network.h"
54#include "nrnb.h"
55#include "pbc.h"
56#include "chargegroup.h"
57#include "constr.h"
58#include "mdatoms.h"
59#include "names.h"
60#include "force.h"
61#include "pme.h"
62#include "mdrun.h"
63#include "nsgrid.h"
64#include "shellfc.h"
65#include "mtop_util.h"
66#include "gmx_ga2la.h"
67#include "macros.h"
68#include "nbnxn_search.h"
69#include "bondf.h"
70#include "gmx_omp_nthreads.h"
71#include "gpu_utils.h"
72
73#include "gromacs/utility/futil.h"
74#include "gromacs/fileio/gmxfio.h"
75#include "gromacs/fileio/pdbio.h"
76#include "gromacs/imd/imd.h"
77#include "gromacs/pulling/pull.h"
78#include "gromacs/pulling/pull_rotation.h"
79#include "gromacs/swap/swapcoords.h"
80#include "gromacs/timing/wallcycle.h"
81#include "gromacs/utility/basenetwork.h"
82#include "gromacs/utility/gmxmpi.h"
83#include "gromacs/utility/qsort_threadsafe.h"
84
85#define DDRANK(dd, rank)(rank) (rank)
86#define DDMASTERRANK(dd)(dd->masterrank) (dd->masterrank)
87
88typedef struct gmx_domdec_master
89{
90 /* The cell boundaries */
91 real **cell_x;
92 /* The global charge group division */
93 int *ncg; /* Number of home charge groups for each node */
94 int *index; /* Index of nnodes+1 into cg */
95 int *cg; /* Global charge group index */
96 int *nat; /* Number of home atoms for each node. */
97 int *ibuf; /* Buffer for communication */
98 rvec *vbuf; /* Buffer for state scattering and gathering */
99} gmx_domdec_master_t;
100
101typedef struct
102{
103 /* The numbers of charge groups to send and receive for each cell
104 * that requires communication, the last entry contains the total
105 * number of atoms that needs to be communicated.
106 */
107 int nsend[DD_MAXIZONE4+2];
108 int nrecv[DD_MAXIZONE4+2];
109 /* The charge groups to send */
110 int *index;
111 int nalloc;
112 /* The atom range for non-in-place communication */
113 int cell2at0[DD_MAXIZONE4];
114 int cell2at1[DD_MAXIZONE4];
115} gmx_domdec_ind_t;
116
117typedef struct
118{
119 int np; /* Number of grid pulses in this dimension */
120 int np_dlb; /* For dlb, for use with edlbAUTO */
121 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
122 int np_nalloc;
123 gmx_bool bInPlace; /* Can we communicate in place? */
124} gmx_domdec_comm_dim_t;
125
126typedef struct
127{
128 gmx_bool *bCellMin; /* Temp. var.: is this cell size at the limit */
129 real *cell_f; /* State var.: cell boundaries, box relative */
130 real *old_cell_f; /* Temp. var.: old cell size */
131 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
132 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
133 real *bound_min; /* Temp. var.: lower limit for cell boundary */
134 real *bound_max; /* Temp. var.: upper limit for cell boundary */
135 gmx_bool bLimited; /* State var.: is DLB limited in this dim and row */
136 real *buf_ncd; /* Temp. var. */
137} gmx_domdec_root_t;
138
139#define DD_NLOAD_MAX9 9
140
141/* Here floats are accurate enough, since these variables
142 * only influence the load balancing, not the actual MD results.
143 */
144typedef struct
145{
146 int nload;
147 float *load;
148 float sum;
149 float max;
150 float sum_m;
151 float cvol_min;
152 float mdf;
153 float pme;
154 int flags;
155} gmx_domdec_load_t;
156
157typedef struct
158{
159 int nsc;
160 int ind_gl;
161 int ind;
162} gmx_cgsort_t;
163
164typedef struct
165{
166 gmx_cgsort_t *sort;
167 gmx_cgsort_t *sort2;
168 int sort_nalloc;
169 gmx_cgsort_t *sort_new;
170 int sort_new_nalloc;
171 int *ibuf;
172 int ibuf_nalloc;
173} gmx_domdec_sort_t;
174
175typedef struct
176{
177 rvec *v;
178 int nalloc;
179} vec_rvec_t;
180
181/* This enum determines the order of the coordinates.
182 * ddnatHOME and ddnatZONE should be first and second,
183 * the others can be ordered as wanted.
184 */
185enum {
186 ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR
187};
188
189enum {
190 edlbAUTO, edlbNO, edlbYES, edlbNR
191};
192const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
193
194typedef struct
195{
196 int dim; /* The dimension */
197 gmx_bool dim_match; /* Tells if DD and PME dims match */
198 int nslab; /* The number of PME slabs in this dimension */
199 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
200 int *pp_min; /* The minimum pp node location, size nslab */
201 int *pp_max; /* The maximum pp node location,size nslab */
202 int maxshift; /* The maximum shift for coordinate redistribution in PME */
203} gmx_ddpme_t;
204
205typedef struct
206{
207 real min0; /* The minimum bottom of this zone */
208 real max1; /* The maximum top of this zone */
209 real min1; /* The minimum top of this zone */
210 real mch0; /* The maximum bottom communicaton height for this zone */
211 real mch1; /* The maximum top communicaton height for this zone */
212 real p1_0; /* The bottom value of the first cell in this zone */
213 real p1_1; /* The top value of the first cell in this zone */
214} gmx_ddzone_t;
215
216typedef struct
217{
218 gmx_domdec_ind_t ind;
219 int *ibuf;
220 int ibuf_nalloc;
221 vec_rvec_t vbuf;
222 int nsend;
223 int nat;
224 int nsend_zone;
225} dd_comm_setup_work_t;
226
227typedef struct gmx_domdec_comm
228{
229 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
230 * unless stated otherwise.
231 */
232
233 /* The number of decomposition dimensions for PME, 0: no PME */
234 int npmedecompdim;
235 /* The number of nodes doing PME (PP/PME or only PME) */
236 int npmenodes;
237 int npmenodes_x;
238 int npmenodes_y;
239 /* The communication setup including the PME only nodes */
240 gmx_bool bCartesianPP_PME;
241 ivec ntot;
242 int cartpmedim;
243 int *pmenodes; /* size npmenodes */
244 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
245 * but with bCartesianPP_PME */
246 gmx_ddpme_t ddpme[2];
247
248 /* The DD particle-particle nodes only */
249 gmx_bool bCartesianPP;
250 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
251
252 /* The global charge groups */
253 t_block cgs_gl;
254
255 /* Should we sort the cgs */
256 int nstSortCG;
257 gmx_domdec_sort_t *sort;
258
259 /* Are there charge groups? */
260 gmx_bool bCGs;
261
262 /* Are there bonded and multi-body interactions between charge groups? */
263 gmx_bool bInterCGBondeds;
264 gmx_bool bInterCGMultiBody;
265
266 /* Data for the optional bonded interaction atom communication range */
267 gmx_bool bBondComm;
268 t_blocka *cglink;
269 char *bLocalCG;
270
271 /* The DLB option */
272 int eDLB;
273 /* Are we actually using DLB? */
274 gmx_bool bDynLoadBal;
275
276 /* Cell sizes for static load balancing, first index cartesian */
277 real **slb_frac;
278
279 /* The width of the communicated boundaries */
280 real cutoff_mbody;
281 real cutoff;
282 /* The minimum cell size (including triclinic correction) */
283 rvec cellsize_min;
284 /* For dlb, for use with edlbAUTO */
285 rvec cellsize_min_dlb;
286 /* The lower limit for the DD cell size with DLB */
287 real cellsize_limit;
288 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
289 gmx_bool bVacDLBNoLimit;
290
291 /* With PME load balancing we set limits on DLB */
292 gmx_bool bPMELoadBalDLBLimits;
293 /* DLB needs to take into account that we want to allow this maximum
294 * cut-off (for PME load balancing), this could limit cell boundaries.
295 */
296 real PMELoadBal_max_cutoff;
297
298 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
299 ivec tric_dir;
300 /* box0 and box_size are required with dim's without pbc and -gcom */
301 rvec box0;
302 rvec box_size;
303
304 /* The cell boundaries */
305 rvec cell_x0;
306 rvec cell_x1;
307
308 /* The old location of the cell boundaries, to check cg displacements */
309 rvec old_cell_x0;
310 rvec old_cell_x1;
311
312 /* The communication setup and charge group boundaries for the zones */
313 gmx_domdec_zones_t zones;
314
315 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
316 * cell boundaries of neighboring cells for dynamic load balancing.
317 */
318 gmx_ddzone_t zone_d1[2];
319 gmx_ddzone_t zone_d2[2][2];
320
321 /* The coordinate/force communication setup and indices */
322 gmx_domdec_comm_dim_t cd[DIM3];
323 /* The maximum number of cells to communicate with in one dimension */
324 int maxpulse;
325
326 /* Which cg distribution is stored on the master node */
327 int master_cg_ddp_count;
328
329 /* The number of cg's received from the direct neighbors */
330 int zone_ncg1[DD_MAXZONE8];
331
332 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
333 int nat[ddnatNR];
334
335 /* Array for signalling if atoms have moved to another domain */
336 int *moved;
337 int moved_nalloc;
338
339 /* Communication buffer for general use */
340 int *buf_int;
341 int nalloc_int;
342
343 /* Communication buffer for general use */
344 vec_rvec_t vbuf;
345
346 /* Temporary storage for thread parallel communication setup */
347 int nth;
348 dd_comm_setup_work_t *dth;
349
350 /* Communication buffers only used with multiple grid pulses */
351 int *buf_int2;
352 int nalloc_int2;
353 vec_rvec_t vbuf2;
354
355 /* Communication buffers for local redistribution */
356 int **cggl_flag;
357 int cggl_flag_nalloc[DIM3*2];
358 rvec **cgcm_state;
359 int cgcm_state_nalloc[DIM3*2];
360
361 /* Cell sizes for dynamic load balancing */
362 gmx_domdec_root_t **root;
363 real *cell_f_row;
364 real cell_f0[DIM3];
365 real cell_f1[DIM3];
366 real cell_f_max0[DIM3];
367 real cell_f_min1[DIM3];
368
369 /* Stuff for load communication */
370 gmx_bool bRecordLoad;
371 gmx_domdec_load_t *load;
372 int nrank_gpu_shared;
373#ifdef GMX_MPI
374 MPI_Comm *mpi_comm_load;
375 MPI_Comm mpi_comm_gpu_shared;
376#endif
377
378 /* Maximum DLB scaling per load balancing step in percent */
379 int dlb_scale_lim;
380
381 /* Cycle counters */
382 float cycl[ddCyclNr];
383 int cycl_n[ddCyclNr];
384 float cycl_max[ddCyclNr];
385 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
386 int eFlop;
387 double flop;
388 int flop_n;
389 /* Have often have did we have load measurements */
390 int n_load_have;
391 /* Have often have we collected the load measurements */
392 int n_load_collect;
393
394 /* Statistics */
395 double sum_nat[ddnatNR-ddnatZONE];
396 int ndecomp;
397 int nload;
398 double load_step;
399 double load_sum;
400 double load_max;
401 ivec load_lim;
402 double load_mdf;
403 double load_pme;
404
405 /* The last partition step */
406 gmx_int64_t partition_step;
407
408 /* Debugging */
409 int nstDDDump;
410 int nstDDDumpGrid;
411 int DD_debug;
412} gmx_domdec_comm_t;
413
414/* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
415#define DD_CGIBS2 2
416
417/* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
418#define DD_FLAG_NRCG65535 65535
419#define DD_FLAG_FW(d)(1<<(16+(d)*2)) (1<<(16+(d)*2))
420#define DD_FLAG_BW(d)(1<<(16+(d)*2+1)) (1<<(16+(d)*2+1))
421
422/* Zone permutation required to obtain consecutive charge groups
423 * for neighbor searching.
424 */
425static const int zone_perm[3][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {3, 0, 1, 2} };
426
427/* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
428 * components see only j zones with that component 0.
429 */
430
431/* The DD zone order */
432static const ivec dd_zo[DD_MAXZONE8] =
433{{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
434
435/* The 3D setup */
436#define dd_z3n8 8
437#define dd_zp3n4 4
438static const ivec dd_zp3[dd_zp3n4] = {{0, 0, 8}, {1, 3, 6}, {2, 5, 6}, {3, 5, 7}};
439
440/* The 2D setup */
441#define dd_z2n4 4
442#define dd_zp2n2 2
443static const ivec dd_zp2[dd_zp2n2] = {{0, 0, 4}, {1, 3, 4}};
444
445/* The 1D setup */
446#define dd_z1n2 2
447#define dd_zp1n1 1
448static const ivec dd_zp1[dd_zp1n1] = {{0, 0, 2}};
449
450/* Factors used to avoid problems due to rounding issues */
451#define DD_CELL_MARGIN1.0001 1.0001
452#define DD_CELL_MARGIN21.00005 1.00005
453/* Factor to account for pressure scaling during nstlist steps */
454#define DD_PRES_SCALE_MARGIN1.02 1.02
455
456/* Allowed performance loss before we DLB or warn */
457#define DD_PERF_LOSS0.05 0.05
458
459#define DD_CELL_F_SIZE(dd, di)((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di)) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
460
461/* Use separate MPI send and receive commands
462 * when nnodes <= GMX_DD_NNODES_SENDRECV.
463 * This saves memory (and some copying for small nnodes).
464 * For high parallelization scatter and gather calls are used.
465 */
466#define GMX_DD_NNODES_SENDRECV4 4
467
468
469/*
470 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
471
472 static void index2xyz(ivec nc,int ind,ivec xyz)
473 {
474 xyz[XX] = ind % nc[XX];
475 xyz[YY] = (ind / nc[XX]) % nc[YY];
476 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
477 }
478 */
479
480/* This order is required to minimize the coordinate communication in PME
481 * which uses decomposition in the x direction.
482 */
483#define dd_index(n, i)((((i)[0]*(n)[1] + (i)[1])*(n)[2]) + (i)[2]) ((((i)[XX0]*(n)[YY1] + (i)[YY1])*(n)[ZZ2]) + (i)[ZZ2])
484
485static void ddindex2xyz(ivec nc, int ind, ivec xyz)
486{
487 xyz[XX0] = ind / (nc[YY1]*nc[ZZ2]);
488 xyz[YY1] = (ind / nc[ZZ2]) % nc[YY1];
489 xyz[ZZ2] = ind % nc[ZZ2];
490}
491
492static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
493{
494 int ddindex;
495 int ddnodeid = -1;
496
497 ddindex = dd_index(dd->nc, c)((((c)[0]*(dd->nc)[1] + (c)[1])*(dd->nc)[2]) + (c)[2]);
498 if (dd->comm->bCartesianPP_PME)
499 {
500 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
501 }
502 else if (dd->comm->bCartesianPP)
503 {
504#ifdef GMX_MPI
505 MPI_Cart_ranktMPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
506#endif
507 }
508 else
509 {
510 ddnodeid = ddindex;
511 }
512
513 return ddnodeid;
514}
515
516static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox, t_inputrec *ir)
517{
518 return (ddbox->nboundeddim < DIM3 || DYNAMIC_BOX(*ir)((*ir).epc != epcNO || (*ir).eI == eiTPI || ((*ir).deform[0][
0] != 0 || (*ir).deform[1][1] != 0 || (*ir).deform[2][2] != 0
|| (*ir).deform[1][0] != 0 || (*ir).deform[2][0] != 0 || (*ir
).deform[2][1] != 0))
);
519}
520
521int ddglatnr(gmx_domdec_t *dd, int i)
522{
523 int atnr;
524
525 if (dd == NULL((void*)0))
526 {
527 atnr = i + 1;
528 }
529 else
530 {
531 if (i >= dd->comm->nat[ddnatNR-1])
532 {
533 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
533
, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
534 }
535 atnr = dd->gatindex[i] + 1;
536 }
537
538 return atnr;
539}
540
541t_block *dd_charge_groups_global(gmx_domdec_t *dd)
542{
543 return &dd->comm->cgs_gl;
544}
545
546static void vec_rvec_init(vec_rvec_t *v)
547{
548 v->nalloc = 0;
549 v->v = NULL((void*)0);
550}
551
552static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
553{
554 if (n > v->nalloc)
555 {
556 v->nalloc = over_alloc_dd(n);
557 srenew(v->v, v->nalloc)(v->v) = save_realloc("v->v", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 557, (v->v), (v->nalloc), sizeof(*(v->v)))
;
558 }
559}
560
561void dd_store_state(gmx_domdec_t *dd, t_state *state)
562{
563 int i;
564
565 if (state->ddp_count != dd->ddp_count)
566 {
567 gmx_incons("The state does not the domain decomposition state")_gmx_error("incons", "The state does not the domain decomposition state"
, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c", 567
)
;
568 }
569
570 state->ncg_gl = dd->ncg_home;
571 if (state->ncg_gl > state->cg_gl_nalloc)
572 {
573 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
574 srenew(state->cg_gl, state->cg_gl_nalloc)(state->cg_gl) = save_realloc("state->cg_gl", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 574, (state->cg_gl), (state->cg_gl_nalloc), sizeof(*(
state->cg_gl)))
;
575 }
576 for (i = 0; i < state->ncg_gl; i++)
577 {
578 state->cg_gl[i] = dd->index_gl[i];
579 }
580
581 state->ddp_count_cg_gl = dd->ddp_count;
582}
583
584gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
585{
586 return &dd->comm->zones;
587}
588
589void dd_get_ns_ranges(gmx_domdec_t *dd, int icg,
590 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
591{
592 gmx_domdec_zones_t *zones;
593 int izone, d, dim;
594
595 zones = &dd->comm->zones;
596
597 izone = 0;
598 while (icg >= zones->izone[izone].cg1)
599 {
600 izone++;
601 }
602
603 if (izone == 0)
604 {
605 *jcg0 = icg;
606 }
607 else if (izone < zones->nizone)
608 {
609 *jcg0 = zones->izone[izone].jcg0;
610 }
611 else
612 {
613 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
613
, "DD icg %d out of range: izone (%d) >= nizone (%d)",
614 icg, izone, zones->nizone);
615 }
616
617 *jcg1 = zones->izone[izone].jcg1;
618
619 for (d = 0; d < dd->ndim; d++)
620 {
621 dim = dd->dim[d];
622 shift0[dim] = zones->izone[izone].shift0[dim];
623 shift1[dim] = zones->izone[izone].shift1[dim];
624 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
625 {
626 /* A conservative approach, this can be optimized */
627 shift0[dim] -= 1;
628 shift1[dim] += 1;
629 }
630 }
631}
632
633int dd_natoms_vsite(gmx_domdec_t *dd)
634{
635 return dd->comm->nat[ddnatVSITE];
636}
637
638void dd_get_constraint_range(gmx_domdec_t *dd, int *at_start, int *at_end)
639{
640 *at_start = dd->comm->nat[ddnatCON-1];
641 *at_end = dd->comm->nat[ddnatCON];
642}
643
644void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
645{
646 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
647 int *index, *cgindex;
648 gmx_domdec_comm_t *comm;
649 gmx_domdec_comm_dim_t *cd;
650 gmx_domdec_ind_t *ind;
651 rvec shift = {0, 0, 0}, *buf, *rbuf;
652 gmx_bool bPBC, bScrew;
653
654 comm = dd->comm;
655
656 cgindex = dd->cgindex;
657
658 buf = comm->vbuf.v;
659
660 nzone = 1;
661 nat_tot = dd->nat_home;
662 for (d = 0; d < dd->ndim; d++)
663 {
664 bPBC = (dd->ci[dd->dim[d]] == 0);
665 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX0);
666 if (bPBC)
667 {
668 copy_rvec(box[dd->dim[d]], shift);
669 }
670 cd = &comm->cd[d];
671 for (p = 0; p < cd->np; p++)
672 {
673 ind = &cd->ind[p];
674 index = ind->index;
675 n = 0;
676 if (!bPBC)
677 {
678 for (i = 0; i < ind->nsend[nzone]; i++)
679 {
680 at0 = cgindex[index[i]];
681 at1 = cgindex[index[i]+1];
682 for (j = at0; j < at1; j++)
683 {
684 copy_rvec(x[j], buf[n]);
685 n++;
686 }
687 }
688 }
689 else if (!bScrew)
690 {
691 for (i = 0; i < ind->nsend[nzone]; i++)
692 {
693 at0 = cgindex[index[i]];
694 at1 = cgindex[index[i]+1];
695 for (j = at0; j < at1; j++)
696 {
697 /* We need to shift the coordinates */
698 rvec_add(x[j], shift, buf[n]);
699 n++;
700 }
701 }
702 }
703 else
704 {
705 for (i = 0; i < ind->nsend[nzone]; i++)
706 {
707 at0 = cgindex[index[i]];
708 at1 = cgindex[index[i]+1];
709 for (j = at0; j < at1; j++)
710 {
711 /* Shift x */
712 buf[n][XX0] = x[j][XX0] + shift[XX0];
713 /* Rotate y and z.
714 * This operation requires a special shift force
715 * treatment, which is performed in calc_vir.
716 */
717 buf[n][YY1] = box[YY1][YY1] - x[j][YY1];
718 buf[n][ZZ2] = box[ZZ2][ZZ2] - x[j][ZZ2];
719 n++;
720 }
721 }
722 }
723
724 if (cd->bInPlace)
725 {
726 rbuf = x + nat_tot;
727 }
728 else
729 {
730 rbuf = comm->vbuf2.v;
731 }
732 /* Send and receive the coordinates */
733 dd_sendrecv_rvec(dd, d, dddirBackward,
734 buf, ind->nsend[nzone+1],
735 rbuf, ind->nrecv[nzone+1]);
736 if (!cd->bInPlace)
737 {
738 j = 0;
739 for (zone = 0; zone < nzone; zone++)
740 {
741 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
742 {
743 copy_rvec(rbuf[j], x[i]);
744 j++;
745 }
746 }
747 }
748 nat_tot += ind->nrecv[nzone+1];
749 }
750 nzone += nzone;
751 }
752}
753
754void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
755{
756 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
757 int *index, *cgindex;
758 gmx_domdec_comm_t *comm;
759 gmx_domdec_comm_dim_t *cd;
760 gmx_domdec_ind_t *ind;
761 rvec *buf, *sbuf;
762 ivec vis;
763 int is;
764 gmx_bool bPBC, bScrew;
765
766 comm = dd->comm;
767
768 cgindex = dd->cgindex;
769
770 buf = comm->vbuf.v;
771
772 n = 0;
773 nzone = comm->zones.n/2;
774 nat_tot = dd->nat_tot;
775 for (d = dd->ndim-1; d >= 0; d--)
776 {
777 bPBC = (dd->ci[dd->dim[d]] == 0);
778 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX0);
779 if (fshift == NULL((void*)0) && !bScrew)
780 {
781 bPBC = FALSE0;
782 }
783 /* Determine which shift vector we need */
784 clear_ivec(vis);
785 vis[dd->dim[d]] = 1;
786 is = IVEC2IS(vis)(((2*2 +1)*((2*1 +1)*(((vis)[2])+1)+((vis)[1])+1)+((vis)[0])+
2))
;
787
788 cd = &comm->cd[d];
789 for (p = cd->np-1; p >= 0; p--)
790 {
791 ind = &cd->ind[p];
792 nat_tot -= ind->nrecv[nzone+1];
793 if (cd->bInPlace)
794 {
795 sbuf = f + nat_tot;
796 }
797 else
798 {
799 sbuf = comm->vbuf2.v;
800 j = 0;
801 for (zone = 0; zone < nzone; zone++)
802 {
803 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
804 {
805 copy_rvec(f[i], sbuf[j]);
806 j++;
807 }
808 }
809 }
810 /* Communicate the forces */
811 dd_sendrecv_rvec(dd, d, dddirForward,
812 sbuf, ind->nrecv[nzone+1],
813 buf, ind->nsend[nzone+1]);
814 index = ind->index;
815 /* Add the received forces */
816 n = 0;
817 if (!bPBC)
818 {
819 for (i = 0; i < ind->nsend[nzone]; i++)
820 {
821 at0 = cgindex[index[i]];
822 at1 = cgindex[index[i]+1];
823 for (j = at0; j < at1; j++)
824 {
825 rvec_inc(f[j], buf[n]);
826 n++;
827 }
828 }
829 }
830 else if (!bScrew)
831 {
832 for (i = 0; i < ind->nsend[nzone]; i++)
833 {
834 at0 = cgindex[index[i]];
835 at1 = cgindex[index[i]+1];
836 for (j = at0; j < at1; j++)
837 {
838 rvec_inc(f[j], buf[n]);
839 /* Add this force to the shift force */
840 rvec_inc(fshift[is], buf[n]);
841 n++;
842 }
843 }
844 }
845 else
846 {
847 for (i = 0; i < ind->nsend[nzone]; i++)
848 {
849 at0 = cgindex[index[i]];
850 at1 = cgindex[index[i]+1];
851 for (j = at0; j < at1; j++)
852 {
853 /* Rotate the force */
854 f[j][XX0] += buf[n][XX0];
855 f[j][YY1] -= buf[n][YY1];
856 f[j][ZZ2] -= buf[n][ZZ2];
857 if (fshift)
858 {
859 /* Add this force to the shift force */
860 rvec_inc(fshift[is], buf[n]);
861 }
862 n++;
863 }
864 }
865 }
866 }
867 nzone /= 2;
868 }
869}
870
871void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
872{
873 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
874 int *index, *cgindex;
875 gmx_domdec_comm_t *comm;
876 gmx_domdec_comm_dim_t *cd;
877 gmx_domdec_ind_t *ind;
878 real *buf, *rbuf;
879
880 comm = dd->comm;
881
882 cgindex = dd->cgindex;
883
884 buf = &comm->vbuf.v[0][0];
885
886 nzone = 1;
887 nat_tot = dd->nat_home;
888 for (d = 0; d < dd->ndim; d++)
889 {
890 cd = &comm->cd[d];
891 for (p = 0; p < cd->np; p++)
892 {
893 ind = &cd->ind[p];
894 index = ind->index;
895 n = 0;
896 for (i = 0; i < ind->nsend[nzone]; i++)
897 {
898 at0 = cgindex[index[i]];
899 at1 = cgindex[index[i]+1];
900 for (j = at0; j < at1; j++)
901 {
902 buf[n] = v[j];
903 n++;
904 }
905 }
906
907 if (cd->bInPlace)
908 {
909 rbuf = v + nat_tot;
910 }
911 else
912 {
913 rbuf = &comm->vbuf2.v[0][0];
914 }
915 /* Send and receive the coordinates */
916 dd_sendrecv_real(dd, d, dddirBackward,
917 buf, ind->nsend[nzone+1],
918 rbuf, ind->nrecv[nzone+1]);
919 if (!cd->bInPlace)
920 {
921 j = 0;
922 for (zone = 0; zone < nzone; zone++)
923 {
924 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
925 {
926 v[i] = rbuf[j];
927 j++;
928 }
929 }
930 }
931 nat_tot += ind->nrecv[nzone+1];
932 }
933 nzone += nzone;
934 }
935}
936
937void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
938{
939 int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
940 int *index, *cgindex;
941 gmx_domdec_comm_t *comm;
942 gmx_domdec_comm_dim_t *cd;
943 gmx_domdec_ind_t *ind;
944 real *buf, *sbuf;
945
946 comm = dd->comm;
947
948 cgindex = dd->cgindex;
949
950 buf = &comm->vbuf.v[0][0];
951
952 n = 0;
953 nzone = comm->zones.n/2;
954 nat_tot = dd->nat_tot;
955 for (d = dd->ndim-1; d >= 0; d--)
956 {
957 cd = &comm->cd[d];
958 for (p = cd->np-1; p >= 0; p--)
959 {
960 ind = &cd->ind[p];
961 nat_tot -= ind->nrecv[nzone+1];
962 if (cd->bInPlace)
963 {
964 sbuf = v + nat_tot;
965 }
966 else
967 {
968 sbuf = &comm->vbuf2.v[0][0];
969 j = 0;
970 for (zone = 0; zone < nzone; zone++)
971 {
972 for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
973 {
974 sbuf[j] = v[i];
975 j++;
976 }
977 }
978 }
979 /* Communicate the forces */
980 dd_sendrecv_real(dd, d, dddirForward,
981 sbuf, ind->nrecv[nzone+1],
982 buf, ind->nsend[nzone+1]);
983 index = ind->index;
984 /* Add the received forces */
985 n = 0;
986 for (i = 0; i < ind->nsend[nzone]; i++)
987 {
988 at0 = cgindex[index[i]];
989 at1 = cgindex[index[i]+1];
990 for (j = at0; j < at1; j++)
991 {
992 v[j] += buf[n];
993 n++;
994 }
995 }
996 }
997 nzone /= 2;
998 }
999}
1000
1001static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
1002{
1003 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
1004 d, i, j,
1005 zone->min0, zone->max1,
1006 zone->mch0, zone->mch0,
1007 zone->p1_0, zone->p1_1);
1008}
1009
1010
1011#define DDZONECOMM_MAXZONE5 5
1012#define DDZONECOMM_BUFSIZE3 3
1013
1014static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
1015 int ddimind, int direction,
1016 gmx_ddzone_t *buf_s, int n_s,
1017 gmx_ddzone_t *buf_r, int n_r)
1018{
1019#define ZBS DDZONECOMM_BUFSIZE3
1020 rvec vbuf_s[DDZONECOMM_MAXZONE5*ZBS];
1021 rvec vbuf_r[DDZONECOMM_MAXZONE5*ZBS];
1022 int i;
1023
1024 for (i = 0; i < n_s; i++)
1025 {
1026 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
1027 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
1028 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
1029 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
1030 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
1031 vbuf_s[i*ZBS+1][2] = 0;
1032 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
1033 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
1034 vbuf_s[i*ZBS+2][2] = 0;
1035 }
1036
1037 dd_sendrecv_rvec(dd, ddimind, direction,
1038 vbuf_s, n_s*ZBS,
1039 vbuf_r, n_r*ZBS);
1040
1041 for (i = 0; i < n_r; i++)
1042 {
1043 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
1044 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
1045 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
1046 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
1047 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
1048 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
1049 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
1050 }
1051
1052#undef ZBS
1053}
1054
1055static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
1056 rvec cell_ns_x0, rvec cell_ns_x1)
1057{
1058 int d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
1059 gmx_ddzone_t *zp;
1060 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE5];
1061 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE5];
1062 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE5];
1063 rvec extr_s[2], extr_r[2];
1064 rvec dh;
1065 real dist_d, c = 0, det;
1066 gmx_domdec_comm_t *comm;
1067 gmx_bool bPBC, bUse;
1068
1069 comm = dd->comm;
1070
1071 for (d = 1; d < dd->ndim; d++)
1072 {
1073 dim = dd->dim[d];
1074 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
1075 zp->min0 = cell_ns_x0[dim];
1076 zp->max1 = cell_ns_x1[dim];
1077 zp->min1 = cell_ns_x1[dim];
1078 zp->mch0 = cell_ns_x0[dim];
1079 zp->mch1 = cell_ns_x1[dim];
1080 zp->p1_0 = cell_ns_x0[dim];
1081 zp->p1_1 = cell_ns_x1[dim];
1082 }
1083
1084 for (d = dd->ndim-2; d >= 0; d--)
1085 {
1086 dim = dd->dim[d];
1087 bPBC = (dim < ddbox->npbcdim);
1088
1089 /* Use an rvec to store two reals */
1090 extr_s[d][0] = comm->cell_f0[d+1];
1091 extr_s[d][1] = comm->cell_f1[d+1];
1092 extr_s[d][2] = comm->cell_f1[d+1];
1093
1094 pos = 0;
1095 /* Store the extremes in the backward sending buffer,
1096 * so the get updated separately from the forward communication.
1097 */
1098 for (d1 = d; d1 < dd->ndim-1; d1++)
1099 {
1100 /* We invert the order to be able to use the same loop for buf_e */
1101 buf_s[pos].min0 = extr_s[d1][1];
1102 buf_s[pos].max1 = extr_s[d1][0];
1103 buf_s[pos].min1 = extr_s[d1][2];
1104 buf_s[pos].mch0 = 0;
1105 buf_s[pos].mch1 = 0;
1106 /* Store the cell corner of the dimension we communicate along */
1107 buf_s[pos].p1_0 = comm->cell_x0[dim];
1108 buf_s[pos].p1_1 = 0;
1109 pos++;
1110 }
1111
1112 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1113 pos++;
1114
1115 if (dd->ndim == 3 && d == 0)
1116 {
1117 buf_s[pos] = comm->zone_d2[0][1];
1118 pos++;
1119 buf_s[pos] = comm->zone_d1[0];
1120 pos++;
1121 }
1122
1123 /* We only need to communicate the extremes
1124 * in the forward direction
1125 */
1126 npulse = comm->cd[d].np;
1127 if (bPBC)
1128 {
1129 /* Take the minimum to avoid double communication */
1130 npulse_min = min(npulse, dd->nc[dim]-1-npulse)(((npulse) < (dd->nc[dim]-1-npulse)) ? (npulse) : (dd->
nc[dim]-1-npulse) )
;
1131 }
1132 else
1133 {
1134 /* Without PBC we should really not communicate over
1135 * the boundaries, but implementing that complicates
1136 * the communication setup and therefore we simply
1137 * do all communication, but ignore some data.
1138 */
1139 npulse_min = npulse;
1140 }
1141 for (p = 0; p < npulse_min; p++)
1142 {
1143 /* Communicate the extremes forward */
1144 bUse = (bPBC || dd->ci[dim] > 0);
1145
1146 dd_sendrecv_rvec(dd, d, dddirForward,
1147 extr_s+d, dd->ndim-d-1,
1148 extr_r+d, dd->ndim-d-1);
1149
1150 if (bUse)
1151 {
1152 for (d1 = d; d1 < dd->ndim-1; d1++)
1153 {
1154 extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0])(((extr_s[d1][0]) > (extr_r[d1][0])) ? (extr_s[d1][0]) : (
extr_r[d1][0]) )
;
1155 extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1])(((extr_s[d1][1]) < (extr_r[d1][1])) ? (extr_s[d1][1]) : (
extr_r[d1][1]) )
;
1156 extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2])(((extr_s[d1][2]) < (extr_r[d1][2])) ? (extr_s[d1][2]) : (
extr_r[d1][2]) )
;
1157 }
1158 }
1159 }
1160
1161 buf_size = pos;
1162 for (p = 0; p < npulse; p++)
1163 {
1164 /* Communicate all the zone information backward */
1165 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1166
1167 dd_sendrecv_ddzone(dd, d, dddirBackward,
1168 buf_s, buf_size,
1169 buf_r, buf_size);
1170
1171 clear_rvec(dh);
1172 if (p > 0)
1173 {
1174 for (d1 = d+1; d1 < dd->ndim; d1++)
1175 {
1176 /* Determine the decrease of maximum required
1177 * communication height along d1 due to the distance along d,
1178 * this avoids a lot of useless atom communication.
1179 */
1180 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1181
1182 if (ddbox->tric_dir[dim])
1183 {
1184 /* c is the off-diagonal coupling between the cell planes
1185 * along directions d and d1.
1186 */
1187 c = ddbox->v[dim][dd->dim[d1]][dim];
1188 }
1189 else
1190 {
1191 c = 0;
1192 }
1193 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1194 if (det > 0)
1195 {
1196 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1197 }
1198 else
1199 {
1200 /* A negative value signals out of range */
1201 dh[d1] = -1;
1202 }
1203 }
1204 }
1205
1206 /* Accumulate the extremes over all pulses */
1207 for (i = 0; i < buf_size; i++)
1208 {
1209 if (p == 0)
1210 {
1211 buf_e[i] = buf_r[i];
1212 }
1213 else
1214 {
1215 if (bUse)
1216 {
1217 buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0)(((buf_e[i].min0) < (buf_r[i].min0)) ? (buf_e[i].min0) : (
buf_r[i].min0) )
;
1218 buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1)(((buf_e[i].max1) > (buf_r[i].max1)) ? (buf_e[i].max1) : (
buf_r[i].max1) )
;
1219 buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1)(((buf_e[i].min1) < (buf_r[i].min1)) ? (buf_e[i].min1) : (
buf_r[i].min1) )
;
1220 }
1221
1222 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1223 {
1224 d1 = 1;
1225 }
1226 else
1227 {
1228 d1 = d + 1;
1229 }
1230 if (bUse && dh[d1] >= 0)
1231 {
1232 buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1])(((buf_e[i].mch0) > (buf_r[i].mch0-dh[d1])) ? (buf_e[i].mch0
) : (buf_r[i].mch0-dh[d1]) )
;
1233 buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1])(((buf_e[i].mch1) > (buf_r[i].mch1-dh[d1])) ? (buf_e[i].mch1
) : (buf_r[i].mch1-dh[d1]) )
;
1234 }
1235 }
1236 /* Copy the received buffer to the send buffer,
1237 * to pass the data through with the next pulse.
1238 */
1239 buf_s[i] = buf_r[i];
1240 }
1241 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1242 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1243 {
1244 /* Store the extremes */
1245 pos = 0;
1246
1247 for (d1 = d; d1 < dd->ndim-1; d1++)
1248 {
1249 extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0)(((extr_s[d1][1]) < (buf_e[pos].min0)) ? (extr_s[d1][1]) :
(buf_e[pos].min0) )
;
1250 extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1)(((extr_s[d1][0]) > (buf_e[pos].max1)) ? (extr_s[d1][0]) :
(buf_e[pos].max1) )
;
1251 extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1)(((extr_s[d1][2]) < (buf_e[pos].min1)) ? (extr_s[d1][2]) :
(buf_e[pos].min1) )
;
1252 pos++;
1253 }
1254
1255 if (d == 1 || (d == 0 && dd->ndim == 3))
1256 {
1257 for (i = d; i < 2; i++)
1258 {
1259 comm->zone_d2[1-d][i] = buf_e[pos];
1260 pos++;
1261 }
1262 }
1263 if (d == 0)
1264 {
1265 comm->zone_d1[1] = buf_e[pos];
1266 pos++;
1267 }
1268 }
1269 }
1270 }
1271
1272 if (dd->ndim >= 2)
1273 {
1274 dim = dd->dim[1];
1275 for (i = 0; i < 2; i++)
1276 {
1277 if (debug)
1278 {
1279 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1280 }
1281 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0)(((cell_ns_x0[dim]) < (comm->zone_d1[i].min0)) ? (cell_ns_x0
[dim]) : (comm->zone_d1[i].min0) )
;
1282 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1)(((cell_ns_x1[dim]) > (comm->zone_d1[i].max1)) ? (cell_ns_x1
[dim]) : (comm->zone_d1[i].max1) )
;
1283 }
1284 }
1285 if (dd->ndim >= 3)
1286 {
1287 dim = dd->dim[2];
1288 for (i = 0; i < 2; i++)
1289 {
1290 for (j = 0; j < 2; j++)
1291 {
1292 if (debug)
1293 {
1294 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1295 }
1296 cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0)(((cell_ns_x0[dim]) < (comm->zone_d2[i][j].min0)) ? (cell_ns_x0
[dim]) : (comm->zone_d2[i][j].min0) )
;
1297 cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1)(((cell_ns_x1[dim]) > (comm->zone_d2[i][j].max1)) ? (cell_ns_x1
[dim]) : (comm->zone_d2[i][j].max1) )
;
1298 }
1299 }
1300 }
1301 for (d = 1; d < dd->ndim; d++)
1302 {
1303 comm->cell_f_max0[d] = extr_s[d-1][0];
1304 comm->cell_f_min1[d] = extr_s[d-1][1];
1305 if (debug)
1306 {
1307 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1308 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1309 }
1310 }
1311}
1312
1313static void dd_collect_cg(gmx_domdec_t *dd,
1314 t_state *state_local)
1315{
1316 gmx_domdec_master_t *ma = NULL((void*)0);
1317 int buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL((void*)0), nat_home = 0;
1318 t_block *cgs_gl;
1319
1320 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1321 {
1322 /* The master has the correct distribution */
1323 return;
1324 }
1325
1326 if (state_local->ddp_count == dd->ddp_count)
1327 {
1328 ncg_home = dd->ncg_home;
1329 cg = dd->index_gl;
1330 nat_home = dd->nat_home;
1331 }
1332 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1333 {
1334 cgs_gl = &dd->comm->cgs_gl;
1335
1336 ncg_home = state_local->ncg_gl;
1337 cg = state_local->cg_gl;
1338 nat_home = 0;
1339 for (i = 0; i < ncg_home; i++)
1340 {
1341 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1342 }
1343 }
1344 else
1345 {
1346 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown")_gmx_error("incons", "Attempted to collect a vector for a state for which the charge group distribution is unknown"
, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c", 1346
)
;
1347 }
1348
1349 buf2[0] = dd->ncg_home;
1350 buf2[1] = dd->nat_home;
1351 if (DDMASTER(dd)((dd)->rank == (dd)->masterrank))
1352 {
1353 ma = dd->ma;
1354 ibuf = ma->ibuf;
1355 }
1356 else
1357 {
1358 ibuf = NULL((void*)0);
1359 }
1360 /* Collect the charge group and atom counts on the master */
1361 dd_gather(dd, 2*sizeof(int), buf2, ibuf);
1362
1363 if (DDMASTER(dd)((dd)->rank == (dd)->masterrank))
1364 {
1365 ma->index[0] = 0;
1366 for (i = 0; i < dd->nnodes; i++)
1367 {
1368 ma->ncg[i] = ma->ibuf[2*i];
1369 ma->nat[i] = ma->ibuf[2*i+1];
1370 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1371
1372 }
1373 /* Make byte counts and indices */
1374 for (i = 0; i < dd->nnodes; i++)
1375 {
1376 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1377 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1378 }
1379 if (debug)
1380 {
1381 fprintf(debug, "Initial charge group distribution: ");
1382 for (i = 0; i < dd->nnodes; i++)
1383 {
1384 fprintf(debug, " %d", ma->ncg[i]);
1385 }
1386 fprintf(debug, "\n");
1387 }
1388 }
1389
1390 /* Collect the charge group indices on the master */
1391 dd_gatherv(dd,
1392 dd->ncg_home*sizeof(int), dd->index_gl,
1393 DDMASTER(dd)((dd)->rank == (dd)->masterrank) ? ma->ibuf : NULL((void*)0),
1394 DDMASTER(dd)((dd)->rank == (dd)->masterrank) ? ma->ibuf+dd->nnodes : NULL((void*)0),
1395 DDMASTER(dd)((dd)->rank == (dd)->masterrank) ? ma->cg : NULL((void*)0));
1396
1397 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1398}
1399
1400static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1401 rvec *lv, rvec *v)
1402{
1403 gmx_domdec_master_t *ma;
1404 int n, i, c, a, nalloc = 0;
1405 rvec *buf = NULL((void*)0);
1406 t_block *cgs_gl;
1407
1408 ma = dd->ma;
1409
1410 if (!DDMASTER(dd)((dd)->rank == (dd)->masterrank))
1411 {
1412#ifdef GMX_MPI
1413 MPI_SendtMPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTETMPI_BYTE, DDMASTERRANK(dd)(dd->masterrank),
1414 dd->rank, dd->mpi_comm_all);
1415#endif
1416 }
1417 else
1418 {
1419 /* Copy the master coordinates to the global array */
1420 cgs_gl = &dd->comm->cgs_gl;
1421
1422 n = DDMASTERRANK(dd)(dd->masterrank);
1423 a = 0;
1424 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1425 {
1426 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1427 {
1428 copy_rvec(lv[a++], v[c]);
1429 }
1430 }
1431
1432 for (n = 0; n < dd->nnodes; n++)
1433 {
1434 if (n != dd->rank)
1435 {
1436 if (ma->nat[n] > nalloc)
1437 {
1438 nalloc = over_alloc_dd(ma->nat[n]);
1439 srenew(buf, nalloc)(buf) = save_realloc("buf", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 1439, (buf), (nalloc), sizeof(*(buf)))
;
1440 }
1441#ifdef GMX_MPI
1442 MPI_RecvtMPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTETMPI_BYTE, DDRANK(dd, n)(n),
1443 n, dd->mpi_comm_all, MPI_STATUS_IGNORE((void*)0));
1444#endif
1445 a = 0;
1446 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1447 {
1448 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1449 {
1450 copy_rvec(buf[a++], v[c]);
1451 }
1452 }
1453 }
1454 }
1455 sfree(buf)save_free("buf", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 1455, (buf))
;
1456 }
1457}
1458
1459static void get_commbuffer_counts(gmx_domdec_t *dd,
1460 int **counts, int **disps)
1461{
1462 gmx_domdec_master_t *ma;
1463 int n;
1464
1465 ma = dd->ma;
1466
1467 /* Make the rvec count and displacment arrays */
1468 *counts = ma->ibuf;
1469 *disps = ma->ibuf + dd->nnodes;
1470 for (n = 0; n < dd->nnodes; n++)
1471 {
1472 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1473 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1474 }
1475}
1476
1477static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1478 rvec *lv, rvec *v)
1479{
1480 gmx_domdec_master_t *ma;
1481 int *rcounts = NULL((void*)0), *disps = NULL((void*)0);
1482 int n, i, c, a;
1483 rvec *buf = NULL((void*)0);
1484 t_block *cgs_gl;
1485
1486 ma = dd->ma;
1487
1488 if (DDMASTER(dd)((dd)->rank == (dd)->masterrank))
1489 {
1490 get_commbuffer_counts(dd, &rcounts, &disps);
1491
1492 buf = ma->vbuf;
1493 }
1494
1495 dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv, rcounts, disps, buf);
1496
1497 if (DDMASTER(dd)((dd)->rank == (dd)->masterrank))
1498 {
1499 cgs_gl = &dd->comm->cgs_gl;
1500
1501 a = 0;
1502 for (n = 0; n < dd->nnodes; n++)
1503 {
1504 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1505 {
1506 for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
1507 {
1508 copy_rvec(buf[a++], v[c]);
1509 }
1510 }
1511 }
1512 }
1513}
1514
1515void dd_collect_vec(gmx_domdec_t *dd,
1516 t_state *state_local, rvec *lv, rvec *v)
1517{
1518 gmx_domdec_master_t *ma;
1519 int n, i, c, a, nalloc = 0;
1520 rvec *buf = NULL((void*)0);
1521
1522 dd_collect_cg(dd, state_local);
1523
1524 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV4)
1525 {
1526 dd_collect_vec_sendrecv(dd, lv, v);
1527 }
1528 else
1529 {
1530 dd_collect_vec_gatherv(dd, lv, v);
1531 }
1532}
1533
1534
1535void dd_collect_state(gmx_domdec_t *dd,
1536 t_state *state_local, t_state *state)
1537{
1538 int est, i, j, nh;
1539
1540 nh = state->nhchainlength;
1541
1542 if (DDMASTER(dd)((dd)->rank == (dd)->masterrank))
1543 {
1544 for (i = 0; i < efptNR; i++)
1545 {
1546 state->lambda[i] = state_local->lambda[i];
1547 }
1548 state->fep_state = state_local->fep_state;
1549 state->veta = state_local->veta;
1550 state->vol0 = state_local->vol0;
1551 copy_mat(state_local->box, state->box);
1552 copy_mat(state_local->boxv, state->boxv);
1553 copy_mat(state_local->svir_prev, state->svir_prev);
1554 copy_mat(state_local->fvir_prev, state->fvir_prev);
1555 copy_mat(state_local->pres_prev, state->pres_prev);
1556
1557 for (i = 0; i < state_local->ngtc; i++)
1558 {
1559 for (j = 0; j < nh; j++)
1560 {
1561 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1562 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1563 }
1564 state->therm_integral[i] = state_local->therm_integral[i];
1565 }
1566 for (i = 0; i < state_local->nnhpres; i++)
1567 {
1568 for (j = 0; j < nh; j++)
1569 {
1570 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1571 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1572 }
1573 }
1574 }
1575 for (est = 0; est < estNR; est++)
1576 {
1577 if (EST_DISTR(est)(!(((est) >= estLAMBDA && (est) <= estTC_INT) ||
((est) >= estSVIR_PREV && (est) <= estMC_RNGI)
))
&& (state_local->flags & (1<<est)))
1578 {
1579 switch (est)
1580 {
1581 case estX:
1582 dd_collect_vec(dd, state_local, state_local->x, state->x);
1583 break;
1584 case estV:
1585 dd_collect_vec(dd, state_local, state_local->v, state->v);
1586 break;
1587 case estSDX:
1588 dd_collect_vec(dd, state_local, state_local->sd_X, state->sd_X);
1589 break;
1590 case estCGP:
1591 dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
1592 break;
1593 case estDISRE_INITF:
1594 case estDISRE_RM3TAV:
1595 case estORIRE_INITF:
1596 case estORIRE_DTAV:
1597 break;
1598 default:
1599 gmx_incons("Unknown state entry encountered in dd_collect_state")_gmx_error("incons", "Unknown state entry encountered in dd_collect_state"
, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c", 1599
)
;
1600 }
1601 }
1602 }
1603}
1604
1605static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
1606{
1607 int est;
1608
1609 if (debug)
1610 {
1611 fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
1612 }
1613
1614 state->nalloc = over_alloc_dd(nalloc);
1615
1616 for (est = 0; est < estNR; est++)
1617 {
1618 if (EST_DISTR(est)(!(((est) >= estLAMBDA && (est) <= estTC_INT) ||
((est) >= estSVIR_PREV && (est) <= estMC_RNGI)
))
&& (state->flags & (1<<est)))
1619 {
1620 switch (est)
1621 {
1622 case estX:
1623 srenew(state->x, state->nalloc)(state->x) = save_realloc("state->x", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 1623, (state->x), (state->nalloc), sizeof(*(state->
x)))
;
1624 break;
1625 case estV:
1626 srenew(state->v, state->nalloc)(state->v) = save_realloc("state->v", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 1626, (state->v), (state->nalloc), sizeof(*(state->
v)))
;
1627 break;
1628 case estSDX:
1629 srenew(state->sd_X, state->nalloc)(state->sd_X) = save_realloc("state->sd_X", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 1629, (state->sd_X), (state->nalloc), sizeof(*(state->
sd_X)))
;
1630 break;
1631 case estCGP:
1632 srenew(state->cg_p, state->nalloc)(state->cg_p) = save_realloc("state->cg_p", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 1632, (state->cg_p), (state->nalloc), sizeof(*(state->
cg_p)))
;
1633 break;
1634 case estDISRE_INITF:
1635 case estDISRE_RM3TAV:
1636 case estORIRE_INITF:
1637 case estORIRE_DTAV:
1638 /* No reallocation required */
1639 break;
1640 default:
1641 gmx_incons("Unknown state entry encountered in dd_realloc_state")_gmx_error("incons", "Unknown state entry encountered in dd_realloc_state"
, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c", 1641
)
;
1642 }
1643 }
1644 }
1645
1646 if (f != NULL((void*)0))
1647 {
1648 srenew(*f, state->nalloc)(*f) = save_realloc("*f", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 1648, (*f), (state->nalloc), sizeof(*(*f)))
;
1649 }
1650}
1651
1652static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
1653 int nalloc)
1654{
1655 if (nalloc > fr->cg_nalloc)
1656 {
1657 if (debug)
1658 {
1659 fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
1660 }
1661 fr->cg_nalloc = over_alloc_dd(nalloc);
1662 srenew(fr->cginfo, fr->cg_nalloc)(fr->cginfo) = save_realloc("fr->cginfo", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 1662, (fr->cginfo), (fr->cg_nalloc), sizeof(*(fr->
cginfo)))
;
1663 if (fr->cutoff_scheme == ecutsGROUP)
1664 {
1665 srenew(fr->cg_cm, fr->cg_nalloc)(fr->cg_cm) = save_realloc("fr->cg_cm", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 1665, (fr->cg_cm), (fr->cg_nalloc), sizeof(*(fr->cg_cm
)))
;
1666 }
1667 }
1668 if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
1669 {
1670 /* We don't use charge groups, we use x in state to set up
1671 * the atom communication.
1672 */
1673 dd_realloc_state(state, f, nalloc);
1674 }
1675}
1676
1677static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
1678 rvec *v, rvec *lv)
1679{
1680 gmx_domdec_master_t *ma;
1681 int n, i, c, a, nalloc = 0;
1682 rvec *buf = NULL((void*)0);
1683
1684 if (DDMASTER(dd)((dd)->rank == (dd)->masterrank))
1685 {
1686 ma = dd->ma;
1687
1688 for (n = 0; n < dd->nnodes; n++)
1689 {
1690 if (n != dd->rank)
1691 {
1692 if (ma->nat[n] > nalloc)
1693 {
1694 nalloc = over_alloc_dd(ma->nat[n]);
1695 srenew(buf, nalloc)(buf) = save_realloc("buf", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 1695, (buf), (nalloc), sizeof(*(buf)))
;
1696 }
1697 /* Use lv as a temporary buffer */
1698 a = 0;
1699 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1700 {
1701 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1702 {
1703 copy_rvec(v[c], buf[a++]);
1704 }
1705 }
1706 if (a != ma->nat[n])
1707 {
1708 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
1708
, "Internal error a (%d) != nat (%d)",
1709 a, ma->nat[n]);
1710 }
1711
1712#ifdef GMX_MPI
1713 MPI_SendtMPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTETMPI_BYTE,
1714 DDRANK(dd, n)(n), n, dd->mpi_comm_all);
1715#endif
1716 }
1717 }
1718 sfree(buf)save_free("buf", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 1718, (buf))
;
1719 n = DDMASTERRANK(dd)(dd->masterrank);
1720 a = 0;
1721 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1722 {
1723 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1724 {
1725 copy_rvec(v[c], lv[a++]);
1726 }
1727 }
1728 }
1729 else
1730 {
1731#ifdef GMX_MPI
1732 MPI_RecvtMPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTETMPI_BYTE, DDMASTERRANK(dd)(dd->masterrank),
1733 MPI_ANY_TAG-1, dd->mpi_comm_all, MPI_STATUS_IGNORE((void*)0));
1734#endif
1735 }
1736}
1737
1738static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
1739 rvec *v, rvec *lv)
1740{
1741 gmx_domdec_master_t *ma;
1742 int *scounts = NULL((void*)0), *disps = NULL((void*)0);
1743 int n, i, c, a, nalloc = 0;
1744 rvec *buf = NULL((void*)0);
1745
1746 if (DDMASTER(dd)((dd)->rank == (dd)->masterrank))
1747 {
1748 ma = dd->ma;
1749
1750 get_commbuffer_counts(dd, &scounts, &disps);
1751
1752 buf = ma->vbuf;
1753 a = 0;
1754 for (n = 0; n < dd->nnodes; n++)
1755 {
1756 for (i = ma->index[n]; i < ma->index[n+1]; i++)
1757 {
1758 for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
1759 {
1760 copy_rvec(v[c], buf[a++]);
1761 }
1762 }
1763 }
1764 }
1765
1766 dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
1767}
1768
1769static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs, rvec *v, rvec *lv)
1770{
1771 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV4)
1772 {
1773 dd_distribute_vec_sendrecv(dd, cgs, v, lv);
1774 }
1775 else
1776 {
1777 dd_distribute_vec_scatterv(dd, cgs, v, lv);
1778 }
1779}
1780
1781static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
1782{
1783 int i;
1784 dd_bcast(dd, sizeof(int), &dfhist->bEquil);
1785 dd_bcast(dd, sizeof(int), &dfhist->nlambda);
1786 dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
1787
1788 if (dfhist->nlambda > 0)
1789 {
1790 int nlam = dfhist->nlambda;
1791 dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
1792 dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
1793 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
1794 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
1795 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
1796 dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
1797
1798 for (i = 0; i < nlam; i++)
1799 {
1800 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
1801 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
1802 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
1803 dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
1804 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
1805 dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
1806 }
1807 }
1808}
1809
1810static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
1811 t_state *state, t_state *state_local,
1812 rvec **f)
1813{
1814 int i, j, nh;
1815
1816 nh = state->nhchainlength;
1817
1818 if (DDMASTER(dd)((dd)->rank == (dd)->masterrank))
1819 {
1820 for (i = 0; i < efptNR; i++)
1821 {
1822 state_local->lambda[i] = state->lambda[i];
1823 }
1824 state_local->fep_state = state->fep_state;
1825 state_local->veta = state->veta;
1826 state_local->vol0 = state->vol0;
1827 copy_mat(state->box, state_local->box);
1828 copy_mat(state->box_rel, state_local->box_rel);
1829 copy_mat(state->boxv, state_local->boxv);
1830 copy_mat(state->svir_prev, state_local->svir_prev);
1831 copy_mat(state->fvir_prev, state_local->fvir_prev);
1832 copy_df_history(&state_local->dfhist, &state->dfhist);
1833 for (i = 0; i < state_local->ngtc; i++)
1834 {
1835 for (j = 0; j < nh; j++)
1836 {
1837 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1838 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1839 }
1840 state_local->therm_integral[i] = state->therm_integral[i];
1841 }
1842 for (i = 0; i < state_local->nnhpres; i++)
1843 {
1844 for (j = 0; j < nh; j++)
1845 {
1846 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1847 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1848 }
1849 }
1850 }
1851 dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
1852 dd_bcast(dd, sizeof(int), &state_local->fep_state);
1853 dd_bcast(dd, sizeof(real), &state_local->veta);
1854 dd_bcast(dd, sizeof(real), &state_local->vol0);
1855 dd_bcast(dd, sizeof(state_local->box), state_local->box);
1856 dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
1857 dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
1858 dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
1859 dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
1860 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
1861 dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
1862 dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
1863 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
1864 dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
1865
1866 /* communicate df_history -- required for restarting from checkpoint */
1867 dd_distribute_dfhist(dd, &state_local->dfhist);
1868
1869 if (dd->nat_home > state_local->nalloc)
1870 {
1871 dd_realloc_state(state_local, f, dd->nat_home);
1872 }
1873 for (i = 0; i < estNR; i++)
1874 {
1875 if (EST_DISTR(i)(!(((i) >= estLAMBDA && (i) <= estTC_INT) || ((
i) >= estSVIR_PREV && (i) <= estMC_RNGI)))
&& (state_local->flags & (1<<i)))
1876 {
1877 switch (i)
1878 {
1879 case estX:
1880 dd_distribute_vec(dd, cgs, state->x, state_local->x);
1881 break;
1882 case estV:
1883 dd_distribute_vec(dd, cgs, state->v, state_local->v);
1884 break;
1885 case estSDX:
1886 dd_distribute_vec(dd, cgs, state->sd_X, state_local->sd_X);
1887 break;
1888 case estCGP:
1889 dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
1890 break;
1891 case estDISRE_INITF:
1892 case estDISRE_RM3TAV:
1893 case estORIRE_INITF:
1894 case estORIRE_DTAV:
1895 /* Not implemented yet */
1896 break;
1897 default:
1898 gmx_incons("Unknown state entry encountered in dd_distribute_state")_gmx_error("incons", "Unknown state entry encountered in dd_distribute_state"
, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c", 1898
)
;
1899 }
1900 }
1901 }
1902}
1903
1904static char dim2char(int dim)
1905{
1906 char c = '?';
1907
1908 switch (dim)
1909 {
1910 case XX0: c = 'X'; break;
1911 case YY1: c = 'Y'; break;
1912 case ZZ2: c = 'Z'; break;
1913 default: gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
1913
, "Unknown dim %d", dim);
1914 }
1915
1916 return c;
1917}
1918
1919static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1920 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1921{
1922 rvec grid_s[2], *grid_r = NULL((void*)0), cx, r;
1923 char fname[STRLEN4096], format[STRLEN4096], buf[22];
1924 FILE *out;
1925 int a, i, d, z, y, x;
1926 matrix tric;
1927 real vol;
1928
1929 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1930 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1931
1932 if (DDMASTER(dd)((dd)->rank == (dd)->masterrank))
1933 {
1934 snew(grid_r, 2*dd->nnodes)(grid_r) = save_calloc("grid_r", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 1934, (2*dd->nnodes), sizeof(*(grid_r)))
;
1935 }
1936
1937 dd_gather(dd, 2*sizeof(rvec), grid_s[0], DDMASTER(dd)((dd)->rank == (dd)->masterrank) ? grid_r[0] : NULL((void*)0));
1938
1939 if (DDMASTER(dd)((dd)->rank == (dd)->masterrank))
1940 {
1941 for (d = 0; d < DIM3; d++)
1942 {
1943 for (i = 0; i < DIM3; i++)
1944 {
1945 if (d == i)
1946 {
1947 tric[d][i] = 1;
1948 }
1949 else
1950 {
1951 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1952 {
1953 tric[d][i] = box[i][d]/box[i][i];
1954 }
1955 else
1956 {
1957 tric[d][i] = 0;
1958 }
1959 }
1960 }
1961 }
1962 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1963 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
1964 out = gmx_fio_fopen(fname, "w");
1965 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1966 a = 1;
1967 for (i = 0; i < dd->nnodes; i++)
1968 {
1969 vol = dd->nnodes/(box[XX0][XX0]*box[YY1][YY1]*box[ZZ2][ZZ2]);
1970 for (d = 0; d < DIM3; d++)
1971 {
1972 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1973 }
1974 for (z = 0; z < 2; z++)
1975 {
1976 for (y = 0; y < 2; y++)
1977 {
1978 for (x = 0; x < 2; x++)
1979 {
1980 cx[XX0] = grid_r[i*2+x][XX0];
1981 cx[YY1] = grid_r[i*2+y][YY1];
1982 cx[ZZ2] = grid_r[i*2+z][ZZ2];
1983 mvmul(tric, cx, r);
1984 fprintf(out, format, "ATOM", a++, "CA", "GLY", ' ', 1+i,
1985 ' ', 10*r[XX0], 10*r[YY1], 10*r[ZZ2], 1.0, vol);
1986 }
1987 }
1988 }
1989 for (d = 0; d < DIM3; d++)
1990 {
1991 for (x = 0; x < 4; x++)
1992 {
1993 switch (d)
1994 {
1995 case 0: y = 1 + i*8 + 2*x; break;
1996 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1997 case 2: y = 1 + i*8 + x; break;
1998 }
1999 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
2000 }
2001 }
2002 }
2003 gmx_fio_fclose(out);
2004 sfree(grid_r)save_free("grid_r", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 2004, (grid_r))
;
2005 }
2006}
2007
2008void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
2009 gmx_mtop_t *mtop, t_commrec *cr,
2010 int natoms, rvec x[], matrix box)
2011{
2012 char fname[STRLEN4096], format[STRLEN4096], format4[STRLEN4096], buf[22];
2013 FILE *out;
2014 int i, ii, resnr, c;
2015 char *atomname, *resname;
2016 real b;
2017 gmx_domdec_t *dd;
2018
2019 dd = cr->dd;
2020 if (natoms == -1)
2021 {
2022 natoms = dd->comm->nat[ddnatVSITE];
2023 }
2024
2025 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
2026
2027 sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
2028 sprintf(format4, "%s%s\n", get_pdbformat4(), "%6.2f%6.2f");
2029
2030 out = gmx_fio_fopen(fname, "w");
2031
2032 fprintf(out, "TITLE %s\n", title);
2033 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
2034 for (i = 0; i < natoms; i++)
2035 {
2036 ii = dd->gatindex[i];
2037 gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
2038 if (i < dd->comm->nat[ddnatZONE])
2039 {
2040 c = 0;
2041 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
2042 {
2043 c++;
2044 }
2045 b = c;
2046 }
2047 else if (i < dd->comm->nat[ddnatVSITE])
2048 {
2049 b = dd->comm->zones.n;
2050 }
2051 else
2052 {
2053 b = dd->comm->zones.n + 1;
2054 }
2055 fprintf(out, strlen(atomname) < 4 ? format : format4,
2056 "ATOM", (ii+1)%100000,
2057 atomname, resname, ' ', resnr%10000, ' ',
2058 10*x[i][XX0], 10*x[i][YY1], 10*x[i][ZZ2], 1.0, b);
2059 }
2060 fprintf(out, "TER\n");
2061
2062 gmx_fio_fclose(out);
2063}
2064
2065real dd_cutoff_mbody(gmx_domdec_t *dd)
2066{
2067 gmx_domdec_comm_t *comm;
2068 int di;
2069 real r;
2070
2071 comm = dd->comm;
2072
2073 r = -1;
2074 if (comm->bInterCGBondeds)
2075 {
2076 if (comm->cutoff_mbody > 0)
2077 {
2078 r = comm->cutoff_mbody;
2079 }
2080 else
2081 {
2082 /* cutoff_mbody=0 means we do not have DLB */
2083 r = comm->cellsize_min[dd->dim[0]];
2084 for (di = 1; di < dd->ndim; di++)
2085 {
2086 r = min(r, comm->cellsize_min[dd->dim[di]])(((r) < (comm->cellsize_min[dd->dim[di]])) ? (r) : (
comm->cellsize_min[dd->dim[di]]) )
;
2087 }
2088 if (comm->bBondComm)
2089 {
2090 r = max(r, comm->cutoff_mbody)(((r) > (comm->cutoff_mbody)) ? (r) : (comm->cutoff_mbody
) )
;
2091 }
2092 else
2093 {
2094 r = min(r, comm->cutoff)(((r) < (comm->cutoff)) ? (r) : (comm->cutoff) );
2095 }
2096 }
2097 }
2098
2099 return r;
2100}
2101
2102real dd_cutoff_twobody(gmx_domdec_t *dd)
2103{
2104 real r_mb;
2105
2106 r_mb = dd_cutoff_mbody(dd);
2107
2108 return max(dd->comm->cutoff, r_mb)(((dd->comm->cutoff) > (r_mb)) ? (dd->comm->cutoff
) : (r_mb) )
;
2109}
2110
2111
2112static void dd_cart_coord2pmecoord(gmx_domdec_t *dd, ivec coord, ivec coord_pme)
2113{
2114 int nc, ntot;
2115
2116 nc = dd->nc[dd->comm->cartpmedim];
2117 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2118 copy_ivec(coord, coord_pme);
2119 coord_pme[dd->comm->cartpmedim] =
2120 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2121}
2122
2123static int low_ddindex2pmeindex(int ndd, int npme, int ddindex)
2124{
2125 /* Here we assign a PME node to communicate with this DD node
2126 * by assuming that the major index of both is x.
2127 * We add cr->npmenodes/2 to obtain an even distribution.
2128 */
2129 return (ddindex*npme + npme/2)/ndd;
2130}
2131
2132static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
2133{
2134 return low_ddindex2pmeindex(dd->nnodes, dd->comm->npmenodes, ddindex);
2135}
2136
2137static int cr_ddindex2pmeindex(const t_commrec *cr, int ddindex)
2138{
2139 return low_ddindex2pmeindex(cr->dd->nnodes, cr->npmenodes, ddindex);
2140}
2141
2142static int *dd_pmenodes(t_commrec *cr)
2143{
2144 int *pmenodes;
2145 int n, i, p0, p1;
2146
2147 snew(pmenodes, cr->npmenodes)(pmenodes) = save_calloc("pmenodes", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 2147, (cr->npmenodes), sizeof(*(pmenodes)))
;
2148 n = 0;
2149 for (i = 0; i < cr->dd->nnodes; i++)
2150 {
2151 p0 = cr_ddindex2pmeindex(cr, i);
2152 p1 = cr_ddindex2pmeindex(cr, i+1);
2153 if (i+1 == cr->dd->nnodes || p1 > p0)
2154 {
2155 if (debug)
2156 {
2157 fprintf(debug, "pmenode[%d] = %d\n", n, i+1+n);
2158 }
2159 pmenodes[n] = i + 1 + n;
2160 n++;
2161 }
2162 }
2163
2164 return pmenodes;
2165}
2166
2167static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
2168{
2169 gmx_domdec_t *dd;
2170 ivec coords, coords_pme, nc;
2171 int slab;
2172
2173 dd = cr->dd;
2174 /*
2175 if (dd->comm->bCartesian) {
2176 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2177 dd_coords2pmecoords(dd,coords,coords_pme);
2178 copy_ivec(dd->ntot,nc);
2179 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2180 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2181
2182 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2183 } else {
2184 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2185 }
2186 */
2187 coords[XX0] = x;
2188 coords[YY1] = y;
2189 coords[ZZ2] = z;
2190 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords)((((coords)[0]*(dd->nc)[1] + (coords)[1])*(dd->nc)[2]) +
(coords)[2])
);
2191
2192 return slab;
2193}
2194
2195static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
2196{
2197 gmx_domdec_comm_t *comm;
2198 ivec coords;
2199 int ddindex, nodeid = -1;
2200
2201 comm = cr->dd->comm;
2202
2203 coords[XX0] = x;
2204 coords[YY1] = y;
2205 coords[ZZ2] = z;
2206 if (comm->bCartesianPP_PME)
2207 {
2208#ifdef GMX_MPI
2209 MPI_Cart_ranktMPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
2210#endif
2211 }
2212 else
2213 {
2214 ddindex = dd_index(cr->dd->nc, coords)((((coords)[0]*(cr->dd->nc)[1] + (coords)[1])*(cr->dd
->nc)[2]) + (coords)[2])
;
2215 if (comm->bCartesianPP)
2216 {
2217 nodeid = comm->ddindex2simnodeid[ddindex];
2218 }
2219 else
2220 {
2221 if (comm->pmenodes)
2222 {
2223 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
2224 }
2225 else
2226 {
2227 nodeid = ddindex;
2228 }
2229 }
2230 }
2231
2232 return nodeid;
2233}
2234
2235static int dd_simnode2pmenode(t_commrec *cr, int sim_nodeid)
2236{
2237 gmx_domdec_t *dd;
2238 gmx_domdec_comm_t *comm;
2239 ivec coord, coord_pme;
2240 int i;
2241 int pmenode = -1;
2242
2243 dd = cr->dd;
2244 comm = dd->comm;
2245
2246 /* This assumes a uniform x domain decomposition grid cell size */
2247 if (comm->bCartesianPP_PME)
2248 {
2249#ifdef GMX_MPI
2250 MPI_Cart_coordstMPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM3, coord);
2251 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2252 {
2253 /* This is a PP node */
2254 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2255 MPI_Cart_ranktMPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
2256 }
2257#endif
2258 }
2259 else if (comm->bCartesianPP)
2260 {
2261 if (sim_nodeid < dd->nnodes)
2262 {
2263 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2264 }
2265 }
2266 else
2267 {
2268 /* This assumes DD cells with identical x coordinates
2269 * are numbered sequentially.
2270 */
2271 if (dd->comm->pmenodes == NULL((void*)0))
2272 {
2273 if (sim_nodeid < dd->nnodes)
2274 {
2275 /* The DD index equals the nodeid */
2276 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
2277 }
2278 }
2279 else
2280 {
2281 i = 0;
2282 while (sim_nodeid > dd->comm->pmenodes[i])
2283 {
2284 i++;
2285 }
2286 if (sim_nodeid < dd->comm->pmenodes[i])
2287 {
2288 pmenode = dd->comm->pmenodes[i];
2289 }
2290 }
2291 }
2292
2293 return pmenode;
2294}
2295
2296void get_pme_nnodes(const gmx_domdec_t *dd,
2297 int *npmenodes_x, int *npmenodes_y)
2298{
2299 if (dd != NULL((void*)0))
2300 {
2301 *npmenodes_x = dd->comm->npmenodes_x;
2302 *npmenodes_y = dd->comm->npmenodes_y;
2303 }
2304 else
2305 {
2306 *npmenodes_x = 1;
2307 *npmenodes_y = 1;
2308 }
2309}
2310
2311gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
2312{
2313 gmx_bool bPMEOnlyNode;
2314
2315 if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes >
1))
)
2316 {
2317 bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
2318 }
2319 else
2320 {
2321 bPMEOnlyNode = FALSE0;
2322 }
2323
2324 return bPMEOnlyNode;
2325}
2326
2327void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
2328 int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
2329{
2330 gmx_domdec_t *dd;
2331 int x, y, z;
2332 ivec coord, coord_pme;
2333
2334 dd = cr->dd;
2335
2336 snew(*my_ddnodes, (dd->nnodes+cr->npmenodes-1)/cr->npmenodes)(*my_ddnodes) = save_calloc("*my_ddnodes", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 2336, ((dd->nnodes+cr->npmenodes-1)/cr->npmenodes)
, sizeof(*(*my_ddnodes)))
;
2337
2338 *nmy_ddnodes = 0;
2339 for (x = 0; x < dd->nc[XX0]; x++)
2340 {
2341 for (y = 0; y < dd->nc[YY1]; y++)
2342 {
2343 for (z = 0; z < dd->nc[ZZ2]; z++)
2344 {
2345 if (dd->comm->bCartesianPP_PME)
2346 {
2347 coord[XX0] = x;
2348 coord[YY1] = y;
2349 coord[ZZ2] = z;
2350 dd_cart_coord2pmecoord(dd, coord, coord_pme);
2351 if (dd->ci[XX0] == coord_pme[XX0] &&
2352 dd->ci[YY1] == coord_pme[YY1] &&
2353 dd->ci[ZZ2] == coord_pme[ZZ2])
2354 {
2355 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2356 }
2357 }
2358 else
2359 {
2360 /* The slab corresponds to the nodeid in the PME group */
2361 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
2362 {
2363 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr, x, y, z);
2364 }
2365 }
2366 }
2367 }
2368 }
2369
2370 /* The last PP-only node is the peer node */
2371 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2372
2373 if (debug)
2374 {
2375 fprintf(debug, "Receive coordinates from PP nodes:");
2376 for (x = 0; x < *nmy_ddnodes; x++)
2377 {
2378 fprintf(debug, " %d", (*my_ddnodes)[x]);
2379 }
2380 fprintf(debug, "\n");
2381 }
2382}
2383
2384static gmx_bool receive_vir_ener(t_commrec *cr)
2385{
2386 gmx_domdec_comm_t *comm;
2387 int pmenode, coords[DIM3], rank;
2388 gmx_bool bReceive;
2389
2390 bReceive = TRUE1;
2391 if (cr->npmenodes < cr->dd->nnodes)
2392 {
2393 comm = cr->dd->comm;
2394 if (comm->bCartesianPP_PME)
2395 {
2396 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2397#ifdef GMX_MPI
2398 MPI_Cart_coordstMPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM3, coords);
2399 coords[comm->cartpmedim]++;
2400 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2401 {
2402 MPI_Cart_ranktMPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
2403 if (dd_simnode2pmenode(cr, rank) == pmenode)
2404 {
2405 /* This is not the last PP node for pmenode */
2406 bReceive = FALSE0;
2407 }
2408 }
2409#endif
2410 }
2411 else
2412 {
2413 pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
2414 if (cr->sim_nodeid+1 < cr->nnodes &&
2415 dd_simnode2pmenode(cr, cr->sim_nodeid+1) == pmenode)
2416 {
2417 /* This is not the last PP node for pmenode */
2418 bReceive = FALSE0;
2419 }
2420 }
2421 }
2422
2423 return bReceive;
2424}
2425
2426static void set_zones_ncg_home(gmx_domdec_t *dd)
2427{
2428 gmx_domdec_zones_t *zones;
2429 int i;
2430
2431 zones = &dd->comm->zones;
2432
2433 zones->cg_range[0] = 0;
2434 for (i = 1; i < zones->n+1; i++)
2435 {
2436 zones->cg_range[i] = dd->ncg_home;
2437 }
2438 /* zone_ncg1[0] should always be equal to ncg_home */
2439 dd->comm->zone_ncg1[0] = dd->ncg_home;
2440}
2441
2442static void rebuild_cgindex(gmx_domdec_t *dd,
2443 const int *gcgs_index, t_state *state)
2444{
2445 int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
2446
2447 ind = state->cg_gl;
2448 dd_cg_gl = dd->index_gl;
2449 cgindex = dd->cgindex;
2450 nat = 0;
2451 cgindex[0] = nat;
2452 for (i = 0; i < state->ncg_gl; i++)
2453 {
2454 cgindex[i] = nat;
2455 cg_gl = ind[i];
2456 dd_cg_gl[i] = cg_gl;
2457 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2458 }
2459 cgindex[i] = nat;
2460
2461 dd->ncg_home = state->ncg_gl;
2462 dd->nat_home = nat;
2463
2464 set_zones_ncg_home(dd);
2465}
2466
2467static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
2468{
2469 while (cg >= cginfo_mb->cg_end)
2470 {
2471 cginfo_mb++;
2472 }
2473
2474 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2475}
2476
2477static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
2478 t_forcerec *fr, char *bLocalCG)
2479{
2480 cginfo_mb_t *cginfo_mb;
2481 int *cginfo;
2482 int cg;
2483
2484 if (fr != NULL((void*)0))
2485 {
2486 cginfo_mb = fr->cginfo_mb;
2487 cginfo = fr->cginfo;
2488
2489 for (cg = cg0; cg < cg1; cg++)
2490 {
2491 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
2492 }
2493 }
2494
2495 if (bLocalCG != NULL((void*)0))
2496 {
2497 for (cg = cg0; cg < cg1; cg++)
2498 {
2499 bLocalCG[index_gl[cg]] = TRUE1;
2500 }
2501 }
2502}
2503
2504static void make_dd_indices(gmx_domdec_t *dd,
2505 const int *gcgs_index, int cg_start)
2506{
2507 int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
2508 int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
2509 gmx_ga2la_t *ga2la;
2510 char *bLocalCG;
2511 gmx_bool bCGs;
2512
2513 bLocalCG = dd->comm->bLocalCG;
Value stored to 'bLocalCG' is never read
2514
2515 if (dd->nat_tot > dd->gatindex_nalloc)
2516 {
2517 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2518 srenew(dd->gatindex, dd->gatindex_nalloc)(dd->gatindex) = save_realloc("dd->gatindex", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 2518, (dd->gatindex), (dd->gatindex_nalloc), sizeof(*
(dd->gatindex)))
;
2519 }
2520
2521 nzone = dd->comm->zones.n;
2522 zone2cg = dd->comm->zones.cg_range;
2523 zone_ncg1 = dd->comm->zone_ncg1;
2524 index_gl = dd->index_gl;
2525 gatindex = dd->gatindex;
2526 bCGs = dd->comm->bCGs;
2527
2528 if (zone2cg[1] != dd->ncg_home)
2529 {
2530 gmx_incons("dd->ncg_zone is not up to date")_gmx_error("incons", "dd->ncg_zone is not up to date", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 2530)
;
2531 }
2532
2533 /* Make the local to global and global to local atom index */
2534 a = dd->cgindex[cg_start];
2535 for (zone = 0; zone < nzone; zone++)
2536 {
2537 if (zone == 0)
2538 {
2539 cg0 = cg_start;
2540 }
2541 else
2542 {
2543 cg0 = zone2cg[zone];
2544 }
2545 cg1 = zone2cg[zone+1];
2546 cg1_p1 = cg0 + zone_ncg1[zone];
2547
2548 for (cg = cg0; cg < cg1; cg++)
2549 {
2550 zone1 = zone;
2551 if (cg >= cg1_p1)
2552 {
2553 /* Signal that this cg is from more than one pulse away */
2554 zone1 += nzone;
2555 }
2556 cg_gl = index_gl[cg];
2557 if (bCGs)
2558 {
2559 for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
2560 {
2561 gatindex[a] = a_gl;
2562 ga2la_set(dd->ga2la, a_gl, a, zone1);
2563 a++;
2564 }
2565 }
2566 else
2567 {
2568 gatindex[a] = cg_gl;
2569 ga2la_set(dd->ga2la, cg_gl, a, zone1);
2570 a++;
2571 }
2572 }
2573 }
2574}
2575
2576static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
2577 const char *where)
2578{
2579 int ncg, i, ngl, nerr;
2580
2581 nerr = 0;
2582 if (bLocalCG == NULL((void*)0))
2583 {
2584 return nerr;
2585 }
2586 for (i = 0; i < dd->ncg_tot; i++)
2587 {
2588 if (!bLocalCG[dd->index_gl[i]])
2589 {
2590 fprintf(stderrstderr,
2591 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
2592 nerr++;
2593 }
2594 }
2595 ngl = 0;
2596 for (i = 0; i < ncg_sys; i++)
2597 {
2598 if (bLocalCG[i])
2599 {
2600 ngl++;
2601 }
2602 }
2603 if (ngl != dd->ncg_tot)
2604 {
2605 fprintf(stderrstderr, "DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
2606 nerr++;
2607 }
2608
2609 return nerr;
2610}
2611
2612static void check_index_consistency(gmx_domdec_t *dd,
2613 int natoms_sys, int ncg_sys,
2614 const char *where)
2615{
2616 int nerr, ngl, i, a, cell;
2617 int *have;
2618
2619 nerr = 0;
2620
2621 if (dd->comm->DD_debug > 1)
2622 {
2623 snew(have, natoms_sys)(have) = save_calloc("have", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 2623, (natoms_sys), sizeof(*(have)))
;
2624 for (a = 0; a < dd->nat_tot; a++)
2625 {
2626 if (have[dd->gatindex[a]] > 0)
2627 {
2628 fprintf(stderrstderr, "DD node %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
2629 }
2630 else
2631 {
2632 have[dd->gatindex[a]] = a + 1;
2633 }
2634 }
2635 sfree(have)save_free("have", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 2635, (have))
;
2636 }
2637
2638 snew(have, dd->nat_tot)(have) = save_calloc("have", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 2638, (dd->nat_tot), sizeof(*(have)))
;
2639
2640 ngl = 0;
2641 for (i = 0; i < natoms_sys; i++)
2642 {
2643 if (ga2la_get(dd->ga2la, i, &a, &cell))
2644 {
2645 if (a >= dd->nat_tot)
2646 {
2647 fprintf(stderrstderr, "DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
2648 nerr++;
2649 }
2650 else
2651 {
2652 have[a] = 1;
2653 if (dd->gatindex[a] != i)
2654 {
2655 fprintf(stderrstderr, "DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
2656 nerr++;
2657 }
2658 }
2659 ngl++;
2660 }
2661 }
2662 if (ngl != dd->nat_tot)
2663 {
2664 fprintf(stderrstderr,
2665 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2666 dd->rank, where, ngl, dd->nat_tot);
2667 }
2668 for (a = 0; a < dd->nat_tot; a++)
2669 {
2670 if (have[a] == 0)
2671 {
2672 fprintf(stderrstderr,
2673 "DD node %d, %s: local atom %d, global %d has no global index\n",
2674 dd->rank, where, a+1, dd->gatindex[a]+1);
2675 }
2676 }
2677 sfree(have)save_free("have", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 2677, (have))
;
2678
2679 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
2680
2681 if (nerr > 0)
2682 {
2683 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
2683
, "DD node %d, %s: %d atom/cg index inconsistencies",
2684 dd->rank, where, nerr);
2685 }
2686}
2687
2688static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
2689{
2690 int i;
2691 char *bLocalCG;
2692
2693 if (a_start == 0)
2694 {
2695 /* Clear the whole list without searching */
2696 ga2la_clear(dd->ga2la);
2697 }
2698 else
2699 {
2700 for (i = a_start; i < dd->nat_tot; i++)
2701 {
2702 ga2la_del(dd->ga2la, dd->gatindex[i]);
2703 }
2704 }
2705
2706 bLocalCG = dd->comm->bLocalCG;
2707 if (bLocalCG)
2708 {
2709 for (i = cg_start; i < dd->ncg_tot; i++)
2710 {
2711 bLocalCG[dd->index_gl[i]] = FALSE0;
2712 }
2713 }
2714
2715 dd_clear_local_vsite_indices(dd);
2716
2717 if (dd->constraints)
2718 {
2719 dd_clear_local_constraint_indices(dd);
2720 }
2721}
2722
2723/* This function should be used for moving the domain boudaries during DLB,
2724 * for obtaining the minimum cell size. It checks the initially set limit
2725 * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
2726 * and, possibly, a longer cut-off limit set for PME load balancing.
2727 */
2728static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
2729{
2730 real cellsize_min;
2731
2732 cellsize_min = comm->cellsize_min[dim];
2733
2734 if (!comm->bVacDLBNoLimit)
2735 {
2736 /* The cut-off might have changed, e.g. by PME load balacning,
2737 * from the value used to set comm->cellsize_min, so check it.
2738 */
2739 cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb)(((cellsize_min) > (comm->cutoff/comm->cd[dim_ind].np_dlb
)) ? (cellsize_min) : (comm->cutoff/comm->cd[dim_ind].np_dlb
) )
;
2740
2741 if (comm->bPMELoadBalDLBLimits)
2742 {
2743 /* Check for the cut-off limit set by the PME load balancing */
2744 cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb)(((cellsize_min) > (comm->PMELoadBal_max_cutoff/comm->
cd[dim_ind].np_dlb)) ? (cellsize_min) : (comm->PMELoadBal_max_cutoff
/comm->cd[dim_ind].np_dlb) )
;
2745 }
2746 }
2747
2748 return cellsize_min;
2749}
2750
2751static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
2752 int dim_ind)
2753{
2754 real grid_jump_limit;
2755
2756 /* The distance between the boundaries of cells at distance
2757 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2758 * and by the fact that cells should not be shifted by more than
2759 * half their size, such that cg's only shift by one cell
2760 * at redecomposition.
2761 */
2762 grid_jump_limit = comm->cellsize_limit;
2763 if (!comm->bVacDLBNoLimit)
2764 {
2765 if (comm->bPMELoadBalDLBLimits)
2766 {
2767 cutoff = max(cutoff, comm->PMELoadBal_max_cutoff)(((cutoff) > (comm->PMELoadBal_max_cutoff)) ? (cutoff) :
(comm->PMELoadBal_max_cutoff) )
;
2768 }
2769 grid_jump_limit = max(grid_jump_limit,(((grid_jump_limit) > (cutoff/comm->cd[dim_ind].np)) ? (
grid_jump_limit) : (cutoff/comm->cd[dim_ind].np) )
2770 cutoff/comm->cd[dim_ind].np)(((grid_jump_limit) > (cutoff/comm->cd[dim_ind].np)) ? (
grid_jump_limit) : (cutoff/comm->cd[dim_ind].np) )
;
2771 }
2772
2773 return grid_jump_limit;
2774}
2775
2776static gmx_bool check_grid_jump(gmx_int64_t step,
2777 gmx_domdec_t *dd,
2778 real cutoff,
2779 gmx_ddbox_t *ddbox,
2780 gmx_bool bFatal)
2781{
2782 gmx_domdec_comm_t *comm;
2783 int d, dim;
2784 real limit, bfac;
2785 gmx_bool bInvalid;
2786
2787 bInvalid = FALSE0;
2788
2789 comm = dd->comm;
2790
2791 for (d = 1; d < dd->ndim; d++)
2792 {
2793 dim = dd->dim[d];
2794 limit = grid_jump_limit(comm, cutoff, d);
2795 bfac = ddbox->box_size[dim];
2796 if (ddbox->tric_dir[dim])
2797 {
2798 bfac *= ddbox->skew_fac[dim];
2799 }
2800 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2801 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2802 {
2803 bInvalid = TRUE1;
2804
2805 if (bFatal)
2806 {
2807 char buf[22];
2808
2809 /* This error should never be triggered under normal
2810 * circumstances, but you never know ...
2811 */
2812 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
2812
, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with less nodes might avoid this issue.",
2813 gmx_step_str(step, buf),
2814 dim2char(dim), dd->ci[XX0], dd->ci[YY1], dd->ci[ZZ2]);
2815 }
2816 }
2817 }
2818
2819 return bInvalid;
2820}
2821
2822static int dd_load_count(gmx_domdec_comm_t *comm)
2823{
2824 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2825}
2826
2827static float dd_force_load(gmx_domdec_comm_t *comm)
2828{
2829 float load;
2830
2831 if (comm->eFlop)
2832 {
2833 load = comm->flop;
2834 if (comm->eFlop > 1)
2835 {
2836 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX2147483647 - 0.05);
2837 }
2838 }
2839 else
2840 {
2841 load = comm->cycl[ddCyclF];
2842 if (comm->cycl_n[ddCyclF] > 1)
2843 {
2844 /* Subtract the maximum of the last n cycle counts
2845 * to get rid of possible high counts due to other sources,
2846 * for instance system activity, that would otherwise
2847 * affect the dynamic load balancing.
2848 */
2849 load -= comm->cycl_max[ddCyclF];
2850 }
2851
2852#ifdef GMX_MPI
2853 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
2854 {
2855 float gpu_wait, gpu_wait_sum;
2856
2857 gpu_wait = comm->cycl[ddCyclWaitGPU];
2858 if (comm->cycl_n[ddCyclF] > 1)
2859 {
2860 /* We should remove the WaitGPU time of the same MD step
2861 * as the one with the maximum F time, since the F time
2862 * and the wait time are not independent.
2863 * Furthermore, the step for the max F time should be chosen
2864 * the same on all ranks that share the same GPU.
2865 * But to keep the code simple, we remove the average instead.
2866 * The main reason for artificially long times at some steps
2867 * is spurious CPU activity or MPI time, so we don't expect
2868 * that changes in the GPU wait time matter a lot here.
2869 */
2870 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
2871 }
2872 /* Sum the wait times over the ranks that share the same GPU */
2873 MPI_AllreducetMPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOATTMPI_FLOAT, MPI_SUMTMPI_SUM,
2874 comm->mpi_comm_gpu_shared);
2875 /* Replace the wait time by the average over the ranks */
2876 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
2877 }
2878#endif
2879 }
2880
2881 return load;
2882}
2883
2884static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
2885{
2886 gmx_domdec_comm_t *comm;
2887 int i;
2888
2889 comm = dd->comm;
2890
2891 snew(*dim_f, dd->nc[dim]+1)(*dim_f) = save_calloc("*dim_f", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 2891, (dd->nc[dim]+1), sizeof(*(*dim_f)))
;
2892 (*dim_f)[0] = 0;
2893 for (i = 1; i < dd->nc[dim]; i++)
2894 {
2895 if (comm->slb_frac[dim])
2896 {
2897 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2898 }
2899 else
2900 {
2901 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2902 }
2903 }
2904 (*dim_f)[dd->nc[dim]] = 1;
2905}
2906
2907static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
2908{
2909 int pmeindex, slab, nso, i;
2910 ivec xyz;
2911
2912 if (dimind == 0 && dd->dim[0] == YY1 && dd->comm->npmenodes_x == 1)
2913 {
2914 ddpme->dim = YY1;
2915 }
2916 else
2917 {
2918 ddpme->dim = dimind;
2919 }
2920 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2921
2922 ddpme->nslab = (ddpme->dim == 0 ?
2923 dd->comm->npmenodes_x :
2924 dd->comm->npmenodes_y);
2925
2926 if (ddpme->nslab <= 1)
2927 {
2928 return;
2929 }
2930
2931 nso = dd->comm->npmenodes/ddpme->nslab;
2932 /* Determine for each PME slab the PP location range for dimension dim */
2933 snew(ddpme->pp_min, ddpme->nslab)(ddpme->pp_min) = save_calloc("ddpme->pp_min", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 2933, (ddpme->nslab), sizeof(*(ddpme->pp_min)))
;
2934 snew(ddpme->pp_max, ddpme->nslab)(ddpme->pp_max) = save_calloc("ddpme->pp_max", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 2934, (ddpme->nslab), sizeof(*(ddpme->pp_max)))
;
2935 for (slab = 0; slab < ddpme->nslab; slab++)
2936 {
2937 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2938 ddpme->pp_max[slab] = 0;
2939 }
2940 for (i = 0; i < dd->nnodes; i++)
2941 {
2942 ddindex2xyz(dd->nc, i, xyz);
2943 /* For y only use our y/z slab.
2944 * This assumes that the PME x grid size matches the DD grid size.
2945 */
2946 if (dimind == 0 || xyz[XX0] == dd->ci[XX0])
2947 {
2948 pmeindex = ddindex2pmeindex(dd, i);
2949 if (dimind == 0)
2950 {
2951 slab = pmeindex/nso;
2952 }
2953 else
2954 {
2955 slab = pmeindex % ddpme->nslab;
2956 }
2957 ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind])(((ddpme->pp_min[slab]) < (xyz[dimind])) ? (ddpme->pp_min
[slab]) : (xyz[dimind]) )
;
2958 ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind])(((ddpme->pp_max[slab]) > (xyz[dimind])) ? (ddpme->pp_max
[slab]) : (xyz[dimind]) )
;
2959 }
2960 }
2961
2962 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
2963}
2964
2965int dd_pme_maxshift_x(gmx_domdec_t *dd)
2966{
2967 if (dd->comm->ddpme[0].dim == XX0)
2968 {
2969 return dd->comm->ddpme[0].maxshift;
2970 }
2971 else
2972 {
2973 return 0;
2974 }
2975}
2976
2977int dd_pme_maxshift_y(gmx_domdec_t *dd)
2978{
2979 if (dd->comm->ddpme[0].dim == YY1)
2980 {
2981 return dd->comm->ddpme[0].maxshift;
2982 }
2983 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY1)
2984 {
2985 return dd->comm->ddpme[1].maxshift;
2986 }
2987 else
2988 {
2989 return 0;
2990 }
2991}
2992
2993static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
2994 gmx_bool bUniform, gmx_ddbox_t *ddbox, real *cell_f)
2995{
2996 gmx_domdec_comm_t *comm;
2997 int nc, ns, s;
2998 int *xmin, *xmax;
2999 real range, pme_boundary;
3000 int sh;
3001
3002 comm = dd->comm;
3003 nc = dd->nc[ddpme->dim];
3004 ns = ddpme->nslab;
3005
3006 if (!ddpme->dim_match)
3007 {
3008 /* PP decomposition is not along dim: the worst situation */
3009 sh = ns/2;
3010 }
3011 else if (ns <= 3 || (bUniform && ns == nc))
3012 {
3013 /* The optimal situation */
3014 sh = 1;
3015 }
3016 else
3017 {
3018 /* We need to check for all pme nodes which nodes they
3019 * could possibly need to communicate with.
3020 */
3021 xmin = ddpme->pp_min;
3022 xmax = ddpme->pp_max;
3023 /* Allow for atoms to be maximally 2/3 times the cut-off
3024 * out of their DD cell. This is a reasonable balance between
3025 * between performance and support for most charge-group/cut-off
3026 * combinations.
3027 */
3028 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
3029 /* Avoid extra communication when we are exactly at a boundary */
3030 range *= 0.999;
3031
3032 sh = 1;
3033 for (s = 0; s < ns; s++)
3034 {
3035 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
3036 pme_boundary = (real)s/ns;
3037 while (sh+1 < ns &&
3038 ((s-(sh+1) >= 0 &&
3039 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
3040 (s-(sh+1) < 0 &&
3041 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
3042 {
3043 sh++;
3044 }
3045 pme_boundary = (real)(s+1)/ns;
3046 while (sh+1 < ns &&
3047 ((s+(sh+1) < ns &&
3048 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
3049 (s+(sh+1) >= ns &&
3050 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
3051 {
3052 sh++;
3053 }
3054 }
3055 }
3056
3057 ddpme->maxshift = sh;
3058
3059 if (debug)
3060 {
3061 fprintf(debug, "PME slab communication range for dim %d is %d\n",
3062 ddpme->dim, ddpme->maxshift);
3063 }
3064}
3065
3066static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3067{
3068 int d, dim;
3069
3070 for (d = 0; d < dd->ndim; d++)
3071 {
3072 dim = dd->dim[d];
3073 if (dim < ddbox->nboundeddim &&
3074 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
3075 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN1.0001)
3076 {
3077 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
3077
, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
3078 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3079 dd->nc[dim], dd->comm->cellsize_limit);
3080 }
3081 }
3082}
3083
3084enum {
3085 setcellsizeslbLOCAL, setcellsizeslbMASTER, setcellsizeslbPULSE_ONLY
3086};
3087
3088/* Set the domain boundaries. Use for static (or no) load balancing,
3089 * and also for the starting state for dynamic load balancing.
3090 * setmode determine if and where the boundaries are stored, use enum above.
3091 * Returns the number communication pulses in npulse.
3092 */
3093static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
3094 int setmode, ivec npulse)
3095{
3096 gmx_domdec_comm_t *comm;
3097 int d, j;
3098 rvec cellsize_min;
3099 real *cell_x, cell_dx, cellsize;
3100
3101 comm = dd->comm;
3102
3103 for (d = 0; d < DIM3; d++)
3104 {
3105 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
3106 npulse[d] = 1;
3107 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL((void*)0))
3108 {
3109 /* Uniform grid */
3110 cell_dx = ddbox->box_size[d]/dd->nc[d];
3111 switch (setmode)
3112 {
3113 case setcellsizeslbMASTER:
3114 for (j = 0; j < dd->nc[d]+1; j++)
3115 {
3116 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
3117 }
3118 break;
3119 case setcellsizeslbLOCAL:
3120 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
3121 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
3122 break;
3123 default:
3124 break;
3125 }
3126 cellsize = cell_dx*ddbox->skew_fac[d];
3127 while (cellsize*npulse[d] < comm->cutoff)
3128 {
3129 npulse[d]++;
3130 }
3131 cellsize_min[d] = cellsize;
3132 }
3133 else
3134 {
3135 /* Statically load balanced grid */
3136 /* Also when we are not doing a master distribution we determine
3137 * all cell borders in a loop to obtain identical values
3138 * to the master distribution case and to determine npulse.
3139 */
3140 if (setmode == setcellsizeslbMASTER)
3141 {
3142 cell_x = dd->ma->cell_x[d];
3143 }
3144 else
3145 {
3146 snew(cell_x, dd->nc[d]+1)(cell_x) = save_calloc("cell_x", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 3146, (dd->nc[d]+1), sizeof(*(cell_x)))
;
3147 }
3148 cell_x[0] = ddbox->box0[d];
3149 for (j = 0; j < dd->nc[d]; j++)
3150 {
3151 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
3152 cell_x[j+1] = cell_x[j] + cell_dx;
3153 cellsize = cell_dx*ddbox->skew_fac[d];
3154 while (cellsize*npulse[d] < comm->cutoff &&
3155 npulse[d] < dd->nc[d]-1)
3156 {
3157 npulse[d]++;
3158 }
3159 cellsize_min[d] = min(cellsize_min[d], cellsize)(((cellsize_min[d]) < (cellsize)) ? (cellsize_min[d]) : (cellsize
) )
;
3160 }
3161 if (setmode == setcellsizeslbLOCAL)
3162 {
3163 comm->cell_x0[d] = cell_x[dd->ci[d]];
3164 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
3165 }
3166 if (setmode != setcellsizeslbMASTER)
3167 {
3168 sfree(cell_x)save_free("cell_x", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 3168, (cell_x))
;
3169 }
3170 }
3171 /* The following limitation is to avoid that a cell would receive
3172 * some of its own home charge groups back over the periodic boundary.
3173 * Double charge groups cause trouble with the global indices.
3174 */
3175 if (d < ddbox->npbcdim &&
3176 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
3177 {
3178 char error_string[STRLEN4096];
3179
3180 sprintf(error_string,
3181 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3182 dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
3183 comm->cutoff,
3184 dd->nc[d], dd->nc[d],
3185 dd->nnodes > dd->nc[d] ? "cells" : "processors");
3186
3187 if (setmode == setcellsizeslbLOCAL)
3188 {
3189 gmx_fatal_collective(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
3189
, NULL((void*)0), dd, error_string);
3190 }
3191 else
3192 {
3193 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
3193
, error_string);
3194 }
3195 }
3196 }
3197
3198 if (!comm->bDynLoadBal)
3199 {
3200 copy_rvec(cellsize_min, comm->cellsize_min);
3201 }
3202
3203 for (d = 0; d < comm->npmedecompdim; d++)
3204 {
3205 set_pme_maxshift(dd, &comm->ddpme[d],
3206 comm->slb_frac[dd->dim[d]] == NULL((void*)0), ddbox,
3207 comm->ddpme[d].slb_dim_f);
3208 }
3209}
3210
3211
3212static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
3213 int d, int dim, gmx_domdec_root_t *root,
3214 gmx_ddbox_t *ddbox,
3215 gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
3216{
3217 gmx_domdec_comm_t *comm;
3218 int ncd, i, j, nmin, nmin_old;
3219 gmx_bool bLimLo, bLimHi;
3220 real *cell_size;
3221 real fac, halfway, cellsize_limit_f_i, region_size;
3222 gmx_bool bPBC, bLastHi = FALSE0;
3223 int nrange[] = {range[0], range[1]};
3224
3225 region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
3226
3227 comm = dd->comm;
3228
3229 ncd = dd->nc[dim];
3230
3231 bPBC = (dim < ddbox->npbcdim);
3232
3233 cell_size = root->buf_ncd;
3234
3235 if (debug)
3236 {
3237 fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
3238 }
3239
3240 /* First we need to check if the scaling does not make cells
3241 * smaller than the smallest allowed size.
3242 * We need to do this iteratively, since if a cell is too small,
3243 * it needs to be enlarged, which makes all the other cells smaller,
3244 * which could in turn make another cell smaller than allowed.
3245 */
3246 for (i = range[0]; i < range[1]; i++)
3247 {
3248 root->bCellMin[i] = FALSE0;
3249 }
3250 nmin = 0;
3251 do
3252 {
3253 nmin_old = nmin;
3254 /* We need the total for normalization */
3255 fac = 0;
3256 for (i = range[0]; i < range[1]; i++)
3257 {
3258 if (root->bCellMin[i] == FALSE0)
3259 {
3260 fac += cell_size[i];
3261 }
3262 }
3263 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3264 /* Determine the cell boundaries */
3265 for (i = range[0]; i < range[1]; i++)
3266 {
3267 if (root->bCellMin[i] == FALSE0)
3268 {
3269 cell_size[i] *= fac;
3270 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3271 {
3272 cellsize_limit_f_i = 0;
3273 }
3274 else
3275 {
3276 cellsize_limit_f_i = cellsize_limit_f;
3277 }
3278 if (cell_size[i] < cellsize_limit_f_i)
3279 {
3280 root->bCellMin[i] = TRUE1;
3281 cell_size[i] = cellsize_limit_f_i;
3282 nmin++;
3283 }
3284 }
3285 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3286 }
3287 }
3288 while (nmin > nmin_old);
3289
3290 i = range[1]-1;
3291 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3292 /* For this check we should not use DD_CELL_MARGIN,
3293 * but a slightly smaller factor,
3294 * since rounding could get use below the limit.
3295 */
3296 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN21.00005/DD_CELL_MARGIN1.0001)
3297 {
3298 char buf[22];
3299 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
3299
, "Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3300 gmx_step_str(step, buf),
3301 dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
3302 ncd, comm->cellsize_min[dim]);
3303 }
3304
3305 root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
3306
3307 if (!bUniform)
3308 {
3309 /* Check if the boundary did not displace more than halfway
3310 * each of the cells it bounds, as this could cause problems,
3311 * especially when the differences between cell sizes are large.
3312 * If changes are applied, they will not make cells smaller
3313 * than the cut-off, as we check all the boundaries which
3314 * might be affected by a change and if the old state was ok,
3315 * the cells will at most be shrunk back to their old size.
3316 */
3317 for (i = range[0]+1; i < range[1]; i++)
3318 {
3319 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3320 if (root->cell_f[i] < halfway)
3321 {
3322 root->cell_f[i] = halfway;
3323 /* Check if the change also causes shifts of the next boundaries */
3324 for (j = i+1; j < range[1]; j++)
3325 {
3326 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3327 {
3328 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3329 }
3330 }
3331 }
3332 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3333 if (root->cell_f[i] > halfway)
3334 {
3335 root->cell_f[i] = halfway;
3336 /* Check if the change also causes shifts of the next boundaries */
3337 for (j = i-1; j >= range[0]+1; j--)
3338 {
3339 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3340 {
3341 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3342 }
3343 }
3344 }
3345 }
3346 }
3347
3348 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3349 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3350 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3351 * for a and b nrange is used */
3352 if (d > 0)
3353 {
3354 /* Take care of the staggering of the cell boundaries */
3355 if (bUniform)
3356 {
3357 for (i = range[0]; i < range[1]; i++)
3358 {
3359 root->cell_f_max0[i] = root->cell_f[i];
3360 root->cell_f_min1[i] = root->cell_f[i+1];
3361 }
3362 }
3363 else
3364 {
3365 for (i = range[0]+1; i < range[1]; i++)
3366 {
3367 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3368 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3369 if (bLimLo && bLimHi)
3370 {
3371 /* Both limits violated, try the best we can */
3372 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3373 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3374 nrange[0] = range[0];
3375 nrange[1] = i;
3376 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3377
3378 nrange[0] = i;
3379 nrange[1] = range[1];
3380 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3381
3382 return;
3383 }
3384 else if (bLimLo)
3385 {
3386 /* root->cell_f[i] = root->bound_min[i]; */
3387 nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
3388 bLastHi = FALSE0;
3389 }
3390 else if (bLimHi && !bLastHi)
3391 {
3392 bLastHi = TRUE1;
3393 if (nrange[1] < range[1]) /* found a LimLo before */
3394 {
3395 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3396 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3397 nrange[0] = nrange[1];
3398 }
3399 root->cell_f[i] = root->bound_max[i];
3400 nrange[1] = i;
3401 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3402 nrange[0] = i;
3403 nrange[1] = range[1];
3404 }
3405 }
3406 if (nrange[1] < range[1]) /* found last a LimLo */
3407 {
3408 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3409 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3410 nrange[0] = nrange[1];
3411 nrange[1] = range[1];
3412 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3413 }
3414 else if (nrange[0] > range[0]) /* found at least one LimHi */
3415 {
3416 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3417 }
3418 }
3419 }
3420}
3421
3422
3423static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3424 int d, int dim, gmx_domdec_root_t *root,
3425 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3426 gmx_bool bUniform, gmx_int64_t step)
3427{
3428 gmx_domdec_comm_t *comm;
3429 int ncd, d1, i, j, pos;
3430 real *cell_size;
3431 real load_aver, load_i, imbalance, change, change_max, sc;
3432 real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
3433 real change_limit;
3434 real relax = 0.5;
3435 gmx_bool bPBC;
3436 int range[] = { 0, 0 };
3437
3438 comm = dd->comm;
3439
3440 /* Convert the maximum change from the input percentage to a fraction */
3441 change_limit = comm->dlb_scale_lim*0.01;
3442
3443 ncd = dd->nc[dim];
3444
3445 bPBC = (dim < ddbox->npbcdim);
3446
3447 cell_size = root->buf_ncd;
3448
3449 /* Store the original boundaries */
3450 for (i = 0; i < ncd+1; i++)
3451 {
3452 root->old_cell_f[i] = root->cell_f[i];
3453 }
3454 if (bUniform)
3455 {
3456 for (i = 0; i < ncd; i++)
3457 {
3458 cell_size[i] = 1.0/ncd;
3459 }
3460 }
3461 else if (dd_load_count(comm))
3462 {
3463 load_aver = comm->load[d].sum_m/ncd;
3464 change_max = 0;
3465 for (i = 0; i < ncd; i++)
3466 {
3467 /* Determine the relative imbalance of cell i */
3468 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3469 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3470 /* Determine the change of the cell size using underrelaxation */
3471 change = -relax*imbalance;
3472 change_max = max(change_max, max(change, -change))(((change_max) > ((((change) > (-change)) ? (change) : (
-change) ))) ? (change_max) : ((((change) > (-change)) ? (
change) : (-change) )) )
;
3473 }
3474 /* Limit the amount of scaling.
3475 * We need to use the same rescaling for all cells in one row,
3476 * otherwise the load balancing might not converge.
3477 */
3478 sc = relax;
3479 if (change_max > change_limit)
3480 {
3481 sc *= change_limit/change_max;
3482 }
3483 for (i = 0; i < ncd; i++)
3484 {
3485 /* Determine the relative imbalance of cell i */
3486 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3487 imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
3488 /* Determine the change of the cell size using underrelaxation */
3489 change = -sc*imbalance;
3490 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3491 }
3492 }
3493
3494 cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
3495 cellsize_limit_f *= DD_CELL_MARGIN1.0001;
3496 dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
3497 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN1.0001;
3498 if (ddbox->tric_dir[dim])
3499 {
3500 cellsize_limit_f /= ddbox->skew_fac[dim];
3501 dist_min_f /= ddbox->skew_fac[dim];
3502 }
3503 if (bDynamicBox && d > 0)
3504 {
3505 dist_min_f *= DD_PRES_SCALE_MARGIN1.02;
3506 }
3507 if (d > 0 && !bUniform)
3508 {
3509 /* Make sure that the grid is not shifted too much */
3510 for (i = 1; i < ncd; i++)
3511 {
3512 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3513 {
3514 gmx_incons("Inconsistent DD boundary staggering limits!")_gmx_error("incons", "Inconsistent DD boundary staggering limits!"
, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c", 3514
)
;
3515 }
3516 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3517 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3518 if (space > 0)
3519 {
3520 root->bound_min[i] += 0.5*space;
3521 }
3522 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3523 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3524 if (space < 0)
3525 {
3526 root->bound_max[i] += 0.5*space;
3527 }
3528 if (debug)
3529 {
3530 fprintf(debug,
3531 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3532 d, i,
3533 root->cell_f_max0[i-1] + dist_min_f,
3534 root->bound_min[i], root->cell_f[i], root->bound_max[i],
3535 root->cell_f_min1[i] - dist_min_f);
3536 }
3537 }
3538 }
3539 range[1] = ncd;
3540 root->cell_f[0] = 0;
3541 root->cell_f[ncd] = 1;
3542 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3543
3544
3545 /* After the checks above, the cells should obey the cut-off
3546 * restrictions, but it does not hurt to check.
3547 */
3548 for (i = 0; i < ncd; i++)
3549 {
3550 if (debug)
3551 {
3552 fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
3553 dim, i, root->cell_f[i], root->cell_f[i+1]);
3554 }
3555
3556 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3557 root->cell_f[i+1] - root->cell_f[i] <
3558 cellsize_limit_f/DD_CELL_MARGIN1.0001)
3559 {
3560 char buf[22];
3561 fprintf(stderrstderr,
3562 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3563 gmx_step_str(step, buf), dim2char(dim), i,
3564 (root->cell_f[i+1] - root->cell_f[i])
3565 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3566 }
3567 }
3568
3569 pos = ncd + 1;
3570 /* Store the cell boundaries of the lower dimensions at the end */
3571 for (d1 = 0; d1 < d; d1++)
3572 {
3573 root->cell_f[pos++] = comm->cell_f0[d1];
3574 root->cell_f[pos++] = comm->cell_f1[d1];
3575 }
3576
3577 if (d < comm->npmedecompdim)
3578 {
3579 /* The master determines the maximum shift for
3580 * the coordinate communication between separate PME nodes.
3581 */
3582 set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
3583 }
3584 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3585 if (d >= 1)
3586 {
3587 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3588 }
3589}
3590
3591static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3592 gmx_ddbox_t *ddbox, int dimind)
3593{
3594 gmx_domdec_comm_t *comm;
3595 int dim;
3596
3597 comm = dd->comm;
3598
3599 /* Set the cell dimensions */
3600 dim = dd->dim[dimind];
3601 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3602 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3603 if (dim >= ddbox->nboundeddim)
3604 {
3605 comm->cell_x0[dim] += ddbox->box0[dim];
3606 comm->cell_x1[dim] += ddbox->box0[dim];
3607 }
3608}
3609
3610static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3611 int d, int dim, real *cell_f_row,
3612 gmx_ddbox_t *ddbox)
3613{
3614 gmx_domdec_comm_t *comm;
3615 int d1, dim1, pos;
3616
3617 comm = dd->comm;
3618
3619#ifdef GMX_MPI
3620 /* Each node would only need to know two fractions,
3621 * but it is probably cheaper to broadcast the whole array.
3622 */
3623 MPI_BcasttMPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)((dd)->nc[(dd)->dim[(d)]]+1+(d)*2+1+(d))*sizeof(real), MPI_BYTETMPI_BYTE,
3624 0, comm->mpi_comm_load[d]);
3625#endif
3626 /* Copy the fractions for this dimension from the buffer */
3627 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3628 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3629 /* The whole array was communicated, so set the buffer position */
3630 pos = dd->nc[dim] + 1;
3631 for (d1 = 0; d1 <= d; d1++)
3632 {
3633 if (d1 < d)
3634 {
3635 /* Copy the cell fractions of the lower dimensions */
3636 comm->cell_f0[d1] = cell_f_row[pos++];
3637 comm->cell_f1[d1] = cell_f_row[pos++];
3638 }
3639 relative_to_absolute_cell_bounds(dd, ddbox, d1);
3640 }
3641 /* Convert the communicated shift from float to int */
3642 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3643 if (d >= 1)
3644 {
3645 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3646 }
3647}
3648
3649static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3650 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3651 gmx_bool bUniform, gmx_int64_t step)
3652{
3653 gmx_domdec_comm_t *comm;
3654 int d, dim, d1;
3655 gmx_bool bRowMember, bRowRoot;
3656 real *cell_f_row;
3657
3658 comm = dd->comm;
3659
3660 for (d = 0; d < dd->ndim; d++)
3661 {
3662 dim = dd->dim[d];
3663 bRowMember = TRUE1;
3664 bRowRoot = TRUE1;
3665 for (d1 = d; d1 < dd->ndim; d1++)
3666 {
3667 if (dd->ci[dd->dim[d1]] > 0)
3668 {
3669 if (d1 != d)
3670 {
3671 bRowMember = FALSE0;
3672 }
3673 bRowRoot = FALSE0;
3674 }
3675 }
3676 if (bRowMember)
3677 {
3678 if (bRowRoot)
3679 {
3680 set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
3681 ddbox, bDynamicBox, bUniform, step);
3682 cell_f_row = comm->root[d]->cell_f;
3683 }
3684 else
3685 {
3686 cell_f_row = comm->cell_f_row;
3687 }
3688 distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
3689 }
3690 }
3691}
3692
3693static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3694{
3695 int d;
3696
3697 /* This function assumes the box is static and should therefore
3698 * not be called when the box has changed since the last
3699 * call to dd_partition_system.
3700 */
3701 for (d = 0; d < dd->ndim; d++)
3702 {
3703 relative_to_absolute_cell_bounds(dd, ddbox, d);
3704 }
3705}
3706
3707
3708
3709static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3710 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3711 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3712 gmx_wallcycle_t wcycle)
3713{
3714 gmx_domdec_comm_t *comm;
3715 int dim;
3716
3717 comm = dd->comm;
3718
3719 if (bDoDLB)
3720 {
3721 wallcycle_start(wcycle, ewcDDCOMMBOUND);
3722 set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
3723 wallcycle_stop(wcycle, ewcDDCOMMBOUND);
3724 }
3725 else if (bDynamicBox)
3726 {
3727 set_dd_cell_sizes_dlb_nochange(dd, ddbox);
3728 }
3729
3730 /* Set the dimensions for which no DD is used */
3731 for (dim = 0; dim < DIM3; dim++)
3732 {
3733 if (dd->nc[dim] == 1)
3734 {
3735 comm->cell_x0[dim] = 0;
3736 comm->cell_x1[dim] = ddbox->box_size[dim];
3737 if (dim >= ddbox->nboundeddim)
3738 {
3739 comm->cell_x0[dim] += ddbox->box0[dim];
3740 comm->cell_x1[dim] += ddbox->box0[dim];
3741 }
3742 }
3743 }
3744}
3745
3746static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
3747{
3748 int d, np, i;
3749 gmx_domdec_comm_dim_t *cd;
3750
3751 for (d = 0; d < dd->ndim; d++)
3752 {
3753 cd = &dd->comm->cd[d];
3754 np = npulse[dd->dim[d]];
3755 if (np > cd->np_nalloc)
3756 {
3757 if (debug)
3758 {
3759 fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
3760 dim2char(dd->dim[d]), np);
3761 }
3762 if (DDMASTER(dd)((dd)->rank == (dd)->masterrank) && cd->np_nalloc > 0)
3763 {
3764 fprintf(stderrstderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
3765 }
3766 srenew(cd->ind, np)(cd->ind) = save_realloc("cd->ind", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 3766, (cd->ind), (np), sizeof(*(cd->ind)))
;
3767 for (i = cd->np_nalloc; i < np; i++)
3768 {
3769 cd->ind[i].index = NULL((void*)0);
3770 cd->ind[i].nalloc = 0;
3771 }
3772 cd->np_nalloc = np;
3773 }
3774 cd->np = np;
3775 }
3776}
3777
3778
3779static void set_dd_cell_sizes(gmx_domdec_t *dd,
3780 gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
3781 gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
3782 gmx_wallcycle_t wcycle)
3783{
3784 gmx_domdec_comm_t *comm;
3785 int d;
3786 ivec npulse;
3787
3788 comm = dd->comm;
3789
3790 /* Copy the old cell boundaries for the cg displacement check */
3791 copy_rvec(comm->cell_x0, comm->old_cell_x0);
3792 copy_rvec(comm->cell_x1, comm->old_cell_x1);
3793
3794 if (comm->bDynLoadBal)
3795 {
3796 if (DDMASTER(dd)((dd)->rank == (dd)->masterrank))
3797 {
3798 check_box_size(dd, ddbox);
3799 }
3800 set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
3801 }
3802 else
3803 {
3804 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbLOCAL, npulse);
3805 realloc_comm_ind(dd, npulse);
3806 }
3807
3808 if (debug)
3809 {
3810 for (d = 0; d < DIM3; d++)
3811 {
3812 fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
3813 d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
3814 }
3815 }
3816}
3817
3818static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3819 gmx_ddbox_t *ddbox,
3820 rvec cell_ns_x0, rvec cell_ns_x1,
3821 gmx_int64_t step)
3822{
3823 gmx_domdec_comm_t *comm;
3824 int dim_ind, dim;
3825
3826 comm = dd->comm;
3827
3828 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
3829 {
3830 dim = dd->dim[dim_ind];
3831
3832 /* Without PBC we don't have restrictions on the outer cells */
3833 if (!(dim >= ddbox->npbcdim &&
3834 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3835 comm->bDynLoadBal &&
3836 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3837 comm->cellsize_min[dim])
3838 {
3839 char buf[22];
3840 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
3840
, "Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3841 gmx_step_str(step, buf), dim2char(dim),
3842 comm->cell_x1[dim] - comm->cell_x0[dim],
3843 ddbox->skew_fac[dim],
3844 dd->comm->cellsize_min[dim],
3845 dd->ci[XX0], dd->ci[YY1], dd->ci[ZZ2]);
3846 }
3847 }
3848
3849 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM3)
3850 {
3851 /* Communicate the boundaries and update cell_ns_x0/1 */
3852 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
3853 if (dd->bGridJump && dd->ndim > 1)
3854 {
3855 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE1);
3856 }
3857 }
3858}
3859
3860static void make_tric_corr_matrix(int npbcdim, matrix box, matrix tcm)
3861{
3862 if (YY1 < npbcdim)
3863 {
3864 tcm[YY1][XX0] = -box[YY1][XX0]/box[YY1][YY1];
3865 }
3866 else
3867 {
3868 tcm[YY1][XX0] = 0;
3869 }
3870 if (ZZ2 < npbcdim)
3871 {
3872 tcm[ZZ2][XX0] = -(box[ZZ2][YY1]*tcm[YY1][XX0] + box[ZZ2][XX0])/box[ZZ2][ZZ2];
3873 tcm[ZZ2][YY1] = -box[ZZ2][YY1]/box[ZZ2][ZZ2];
3874 }
3875 else
3876 {
3877 tcm[ZZ2][XX0] = 0;
3878 tcm[ZZ2][YY1] = 0;
3879 }
3880}
3881
3882static void check_screw_box(matrix box)
3883{
3884 /* Mathematical limitation */
3885 if (box[YY1][XX0] != 0 || box[ZZ2][XX0] != 0)
3886 {
3887 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
3887
, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3888 }
3889
3890 /* Limitation due to the asymmetry of the eighth shell method */
3891 if (box[ZZ2][YY1] != 0)
3892 {
3893 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
3893
, "pbc=screw with non-zero box_zy is not supported");
3894 }
3895}
3896
3897static void distribute_cg(FILE *fplog, gmx_int64_t step,
3898 matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
3899 gmx_domdec_t *dd)
3900{
3901 gmx_domdec_master_t *ma;
3902 int **tmp_ind = NULL((void*)0), *tmp_nalloc = NULL((void*)0);
3903 int i, icg, j, k, k0, k1, d, npbcdim;
3904 matrix tcm;
3905 rvec box_size, cg_cm;
3906 ivec ind;
3907 real nrcg, inv_ncg, pos_d;
3908 atom_id *cgindex;
3909 gmx_bool bUnbounded, bScrew;
3910
3911 ma = dd->ma;
3912
3913 if (tmp_ind == NULL((void*)0))
3914 {
3915 snew(tmp_nalloc, dd->nnodes)(tmp_nalloc) = save_calloc("tmp_nalloc", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 3915, (dd->nnodes), sizeof(*(tmp_nalloc)))
;
3916 snew(tmp_ind, dd->nnodes)(tmp_ind) = save_calloc("tmp_ind", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 3916, (dd->nnodes), sizeof(*(tmp_ind)))
;
3917 for (i = 0; i < dd->nnodes; i++)
3918 {
3919 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1)(int)(1.19*(cgs->nr/dd->nnodes+1) + 1000);
3920 snew(tmp_ind[i], tmp_nalloc[i])(tmp_ind[i]) = save_calloc("tmp_ind[i]", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 3920, (tmp_nalloc[i]), sizeof(*(tmp_ind[i])))
;
3921 }
3922 }
3923
3924 /* Clear the count */
3925 for (i = 0; i < dd->nnodes; i++)
3926 {
3927 ma->ncg[i] = 0;
3928 ma->nat[i] = 0;
3929 }
3930
3931 make_tric_corr_matrix(dd->npbcdim, box, tcm);
3932
3933 cgindex = cgs->index;
3934
3935 /* Compute the center of geometry for all charge groups */
3936 for (icg = 0; icg < cgs->nr; icg++)
3937 {
3938 k0 = cgindex[icg];
3939 k1 = cgindex[icg+1];
3940 nrcg = k1 - k0;
3941 if (nrcg == 1)
3942 {
3943 copy_rvec(pos[k0], cg_cm);
3944 }
3945 else
3946 {
3947 inv_ncg = 1.0/nrcg;
3948
3949 clear_rvec(cg_cm);
3950 for (k = k0; (k < k1); k++)
3951 {
3952 rvec_inc(cg_cm, pos[k]);
3953 }
3954 for (d = 0; (d < DIM3); d++)
3955 {
3956 cg_cm[d] *= inv_ncg;
3957 }
3958 }
3959 /* Put the charge group in the box and determine the cell index */
3960 for (d = DIM3-1; d >= 0; d--)
3961 {
3962 pos_d = cg_cm[d];
3963 if (d < dd->npbcdim)
3964 {
3965 bScrew = (dd->bScrewPBC && d == XX0);
3966 if (tric_dir[d] && dd->nc[d] > 1)
3967 {
3968 /* Use triclinic coordintates for this dimension */
3969 for (j = d+1; j < DIM3; j++)
3970 {
3971 pos_d += cg_cm[j]*tcm[j][d];
3972 }
3973 }
3974 while (pos_d >= box[d][d])
3975 {
3976 pos_d -= box[d][d];
3977 rvec_dec(cg_cm, box[d]);
3978 if (bScrew)
3979 {
3980 cg_cm[YY1] = box[YY1][YY1] - cg_cm[YY1];
3981 cg_cm[ZZ2] = box[ZZ2][ZZ2] - cg_cm[ZZ2];
3982 }
3983 for (k = k0; (k < k1); k++)
3984 {
3985 rvec_dec(pos[k], box[d]);
3986 if (bScrew)
3987 {
3988 pos[k][YY1] = box[YY1][YY1] - pos[k][YY1];
3989 pos[k][ZZ2] = box[ZZ2][ZZ2] - pos[k][ZZ2];
3990 }
3991 }
3992 }
3993 while (pos_d < 0)
3994 {
3995 pos_d += box[d][d];
3996 rvec_inc(cg_cm, box[d]);
3997 if (bScrew)
3998 {
3999 cg_cm[YY1] = box[YY1][YY1] - cg_cm[YY1];
4000 cg_cm[ZZ2] = box[ZZ2][ZZ2] - cg_cm[ZZ2];
4001 }
4002 for (k = k0; (k < k1); k++)
4003 {
4004 rvec_inc(pos[k], box[d]);
4005 if (bScrew)
4006 {
4007 pos[k][YY1] = box[YY1][YY1] - pos[k][YY1];
4008 pos[k][ZZ2] = box[ZZ2][ZZ2] - pos[k][ZZ2];
4009 }
4010 }
4011 }
4012 }
4013 /* This could be done more efficiently */
4014 ind[d] = 0;
4015 while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
4016 {
4017 ind[d]++;
4018 }
4019 }
4020 i = dd_index(dd->nc, ind)((((ind)[0]*(dd->nc)[1] + (ind)[1])*(dd->nc)[2]) + (ind
)[2])
;
4021 if (ma->ncg[i] == tmp_nalloc[i])
4022 {
4023 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1)(int)(1.19*(ma->ncg[i]+1) + 1000);
4024 srenew(tmp_ind[i], tmp_nalloc[i])(tmp_ind[i]) = save_realloc("tmp_ind[i]", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 4024, (tmp_ind[i]), (tmp_nalloc[i]), sizeof(*(tmp_ind[i])))
;
4025 }
4026 tmp_ind[i][ma->ncg[i]] = icg;
4027 ma->ncg[i]++;
4028 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
4029 }
4030
4031 k1 = 0;
4032 for (i = 0; i < dd->nnodes; i++)
4033 {
4034 ma->index[i] = k1;
4035 for (k = 0; k < ma->ncg[i]; k++)
4036 {
4037 ma->cg[k1++] = tmp_ind[i][k];
4038 }
4039 }
4040 ma->index[dd->nnodes] = k1;
4041
4042 for (i = 0; i < dd->nnodes; i++)
4043 {
4044 sfree(tmp_ind[i])save_free("tmp_ind[i]", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 4044, (tmp_ind[i]))
;
4045 }
4046 sfree(tmp_ind)save_free("tmp_ind", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 4046, (tmp_ind))
;
4047 sfree(tmp_nalloc)save_free("tmp_nalloc", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 4047, (tmp_nalloc))
;
4048
4049 if (fplog)
4050 {
4051 char buf[22];
4052 fprintf(fplog, "Charge group distribution at step %s:",
4053 gmx_step_str(step, buf));
4054 for (i = 0; i < dd->nnodes; i++)
4055 {
4056 fprintf(fplog, " %d", ma->ncg[i]);
4057 }
4058 fprintf(fplog, "\n");
4059 }
4060}
4061
4062static void get_cg_distribution(FILE *fplog, gmx_int64_t step, gmx_domdec_t *dd,
4063 t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
4064 rvec pos[])
4065{
4066 gmx_domdec_master_t *ma = NULL((void*)0);
4067 ivec npulse;
4068 int i, cg_gl;
4069 int *ibuf, buf2[2] = { 0, 0 };
4070 gmx_bool bMaster = DDMASTER(dd)((dd)->rank == (dd)->masterrank);
4071
4072 if (bMaster)
4073 {
4074 ma = dd->ma;
4075
4076 if (dd->bScrewPBC)
4077 {
4078 check_screw_box(box);
4079 }
4080
4081 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbMASTER, npulse);
4082
4083 distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
4084 for (i = 0; i < dd->nnodes; i++)
4085 {
4086 ma->ibuf[2*i] = ma->ncg[i];
4087 ma->ibuf[2*i+1] = ma->nat[i];
4088 }
4089 ibuf = ma->ibuf;
4090 }
4091 else
4092 {
4093 ibuf = NULL((void*)0);
4094 }
4095 dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
4096
4097 dd->ncg_home = buf2[0];
4098 dd->nat_home = buf2[1];
4099 dd->ncg_tot = dd->ncg_home;
4100 dd->nat_tot = dd->nat_home;
4101 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
4102 {
4103 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
4104 srenew(dd->index_gl, dd->cg_nalloc)(dd->index_gl) = save_realloc("dd->index_gl", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 4104, (dd->index_gl), (dd->cg_nalloc), sizeof(*(dd->
index_gl)))
;
4105 srenew(dd->cgindex, dd->cg_nalloc+1)(dd->cgindex) = save_realloc("dd->cgindex", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 4105, (dd->cgindex), (dd->cg_nalloc+1), sizeof(*(dd->
cgindex)))
;
4106 }
4107 if (bMaster)
4108 {
4109 for (i = 0; i < dd->nnodes; i++)
4110 {
4111 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
4112 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
4113 }
4114 }
4115
4116 dd_scatterv(dd,
4117 DDMASTER(dd)((dd)->rank == (dd)->masterrank) ? ma->ibuf : NULL((void*)0),
4118 DDMASTER(dd)((dd)->rank == (dd)->masterrank) ? ma->ibuf+dd->nnodes : NULL((void*)0),
4119 DDMASTER(dd)((dd)->rank == (dd)->masterrank) ? ma->cg : NULL((void*)0),
4120 dd->ncg_home*sizeof(int), dd->index_gl);
4121
4122 /* Determine the home charge group sizes */
4123 dd->cgindex[0] = 0;
4124 for (i = 0; i < dd->ncg_home; i++)
4125 {
4126 cg_gl = dd->index_gl[i];
4127 dd->cgindex[i+1] =
4128 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
4129 }
4130
4131 if (debug)
4132 {
4133 fprintf(debug, "Home charge groups:\n");
4134 for (i = 0; i < dd->ncg_home; i++)
4135 {
4136 fprintf(debug, " %d", dd->index_gl[i]);
4137 if (i % 10 == 9)
4138 {
4139 fprintf(debug, "\n");
4140 }
4141 }
4142 fprintf(debug, "\n");
4143 }
4144}
4145
4146static int compact_and_copy_vec_at(int ncg, int *move,
4147 int *cgindex,
4148 int nvec, int vec,
4149 rvec *src, gmx_domdec_comm_t *comm,
4150 gmx_bool bCompact)
4151{
4152 int m, icg, i, i0, i1, nrcg;
4153 int home_pos;
4154 int pos_vec[DIM3*2];
4155
4156 home_pos = 0;
4157
4158 for (m = 0; m < DIM3*2; m++)
4159 {
4160 pos_vec[m] = 0;
4161 }
4162
4163 i0 = 0;
4164 for (icg = 0; icg < ncg; icg++)
4165 {
4166 i1 = cgindex[icg+1];
4167 m = move[icg];
4168 if (m == -1)
4169 {
4170 if (bCompact)
4171 {
4172 /* Compact the home array in place */
4173 for (i = i0; i < i1; i++)
4174 {
4175 copy_rvec(src[i], src[home_pos++]);
4176 }
4177 }
4178 }
4179 else
4180 {
4181 /* Copy to the communication buffer */
4182 nrcg = i1 - i0;
4183 pos_vec[m] += 1 + vec*nrcg;
4184 for (i = i0; i < i1; i++)
4185 {
4186 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
4187 }
4188 pos_vec[m] += (nvec - vec - 1)*nrcg;
4189 }
4190 if (!bCompact)
4191 {
4192 home_pos += i1 - i0;
4193 }
4194 i0 = i1;
4195 }
4196
4197 return home_pos;
4198}
4199
4200static int compact_and_copy_vec_cg(int ncg, int *move,
4201 int *cgindex,
4202 int nvec, rvec *src, gmx_domdec_comm_t *comm,
4203 gmx_bool bCompact)
4204{
4205 int m, icg, i0, i1, nrcg;
4206 int home_pos;
4207 int pos_vec[DIM3*2];
4208
4209 home_pos = 0;
4210
4211 for (m = 0; m < DIM3*2; m++)
4212 {
4213 pos_vec[m] = 0;
4214 }
4215
4216 i0 = 0;
4217 for (icg = 0; icg < ncg; icg++)
4218 {
4219 i1 = cgindex[icg+1];
4220 m = move[icg];
4221 if (m == -1)
4222 {
4223 if (bCompact)
4224 {
4225 /* Compact the home array in place */
4226 copy_rvec(src[icg], src[home_pos++]);
4227 }
4228 }
4229 else
4230 {
4231 nrcg = i1 - i0;
4232 /* Copy to the communication buffer */
4233 copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
4234 pos_vec[m] += 1 + nrcg*nvec;
4235 }
4236 i0 = i1;
4237 }
4238 if (!bCompact)
4239 {
4240 home_pos = ncg;
4241 }
4242
4243 return home_pos;
4244}
4245
4246static int compact_ind(int ncg, int *move,
4247 int *index_gl, int *cgindex,
4248 int *gatindex,
4249 gmx_ga2la_t ga2la, char *bLocalCG,
4250 int *cginfo)
4251{
4252 int cg, nat, a0, a1, a, a_gl;
4253 int home_pos;
4254
4255 home_pos = 0;
4256 nat = 0;
4257 for (cg = 0; cg < ncg; cg++)
4258 {
4259 a0 = cgindex[cg];
4260 a1 = cgindex[cg+1];
4261 if (move[cg] == -1)
4262 {
4263 /* Compact the home arrays in place.
4264 * Anything that can be done here avoids access to global arrays.
4265 */
4266 cgindex[home_pos] = nat;
4267 for (a = a0; a < a1; a++)
4268 {
4269 a_gl = gatindex[a];
4270 gatindex[nat] = a_gl;
4271 /* The cell number stays 0, so we don't need to set it */
4272 ga2la_change_la(ga2la, a_gl, nat);
4273 nat++;
4274 }
4275 index_gl[home_pos] = index_gl[cg];
4276 cginfo[home_pos] = cginfo[cg];
4277 /* The charge group remains local, so bLocalCG does not change */
4278 home_pos++;
4279 }
4280 else
4281 {
4282 /* Clear the global indices */
4283 for (a = a0; a < a1; a++)
4284 {
4285 ga2la_del(ga2la, gatindex[a]);
4286 }
4287 if (bLocalCG)
4288 {
4289 bLocalCG[index_gl[cg]] = FALSE0;
4290 }
4291 }
4292 }
4293 cgindex[home_pos] = nat;
4294
4295 return home_pos;
4296}
4297
4298static void clear_and_mark_ind(int ncg, int *move,
4299 int *index_gl, int *cgindex, int *gatindex,
4300 gmx_ga2la_t ga2la, char *bLocalCG,
4301 int *cell_index)
4302{
4303 int cg, a0, a1, a;
4304
4305 for (cg = 0; cg < ncg; cg++)
4306 {
4307 if (move[cg] >= 0)
4308 {
4309 a0 = cgindex[cg];
4310 a1 = cgindex[cg+1];
4311 /* Clear the global indices */
4312 for (a = a0; a < a1; a++)
4313 {
4314 ga2la_del(ga2la, gatindex[a]);
4315 }
4316 if (bLocalCG)
4317 {
4318 bLocalCG[index_gl[cg]] = FALSE0;
4319 }
4320 /* Signal that this cg has moved using the ns cell index.
4321 * Here we set it to -1. fill_grid will change it
4322 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4323 */
4324 cell_index[cg] = -1;
4325 }
4326 }
4327}
4328
4329static void print_cg_move(FILE *fplog,
4330 gmx_domdec_t *dd,
4331 gmx_int64_t step, int cg, int dim, int dir,
4332 gmx_bool bHaveLimitdAndCMOld, real limitd,
4333 rvec cm_old, rvec cm_new, real pos_d)
4334{
4335 gmx_domdec_comm_t *comm;
4336 char buf[22];
4337
4338 comm = dd->comm;
4339
4340 fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
4341 if (bHaveLimitdAndCMOld)
4342 {
4343 fprintf(fplog, "The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4344 ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
4345 }
4346 else
4347 {
4348 fprintf(fplog, "The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4349 ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
4350 }
4351 fprintf(fplog, "distance out of cell %f\n",
4352 dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4353 if (bHaveLimitdAndCMOld)
4354 {
4355 fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
4356 cm_old[XX0], cm_old[YY1], cm_old[ZZ2]);
4357 }
4358 fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
4359 cm_new[XX0], cm_new[YY1], cm_new[ZZ2]);
4360 fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
4361 dim2char(dim),
4362 comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
4363 fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
4364 dim2char(dim),
4365 comm->cell_x0[dim], comm->cell_x1[dim]);
4366}
4367
4368static void cg_move_error(FILE *fplog,
4369 gmx_domdec_t *dd,
4370 gmx_int64_t step, int cg, int dim, int dir,
4371 gmx_bool bHaveLimitdAndCMOld, real limitd,
4372 rvec cm_old, rvec cm_new, real pos_d)
4373{
4374 if (fplog)
4375 {
4376 print_cg_move(fplog, dd, step, cg, dim, dir,
4377 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4378 }
4379 print_cg_move(stderrstderr, dd, step, cg, dim, dir,
4380 bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
4381 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
4381
,
4382 "A charge group moved too far between two domain decomposition steps\n"
4383 "This usually means that your system is not well equilibrated");
4384}
4385
4386static void rotate_state_atom(t_state *state, int a)
4387{
4388 int est;
4389
4390 for (est = 0; est < estNR; est++)
4391 {
4392 if (EST_DISTR(est)(!(((est) >= estLAMBDA && (est) <= estTC_INT) ||
((est) >= estSVIR_PREV && (est) <= estMC_RNGI)
))
&& (state->flags & (1<<est)))
4393 {
4394 switch (est)
4395 {
4396 case estX:
4397 /* Rotate the complete state; for a rectangular box only */
4398 state->x[a][YY1] = state->box[YY1][YY1] - state->x[a][YY1];
4399 state->x[a][ZZ2] = state->box[ZZ2][ZZ2] - state->x[a][ZZ2];
4400 break;
4401 case estV:
4402 state->v[a][YY1] = -state->v[a][YY1];
4403 state->v[a][ZZ2] = -state->v[a][ZZ2];
4404 break;
4405 case estSDX:
4406 state->sd_X[a][YY1] = -state->sd_X[a][YY1];
4407 state->sd_X[a][ZZ2] = -state->sd_X[a][ZZ2];
4408 break;
4409 case estCGP:
4410 state->cg_p[a][YY1] = -state->cg_p[a][YY1];
4411 state->cg_p[a][ZZ2] = -state->cg_p[a][ZZ2];
4412 break;
4413 case estDISRE_INITF:
4414 case estDISRE_RM3TAV:
4415 case estORIRE_INITF:
4416 case estORIRE_DTAV:
4417 /* These are distances, so not affected by rotation */
4418 break;
4419 default:
4420 gmx_incons("Unknown state entry encountered in rotate_state_atom")_gmx_error("incons", "Unknown state entry encountered in rotate_state_atom"
, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c", 4420
)
;
4421 }
4422 }
4423 }
4424}
4425
4426static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
4427{
4428 if (natoms > comm->moved_nalloc)
4429 {
4430 /* Contents should be preserved here */
4431 comm->moved_nalloc = over_alloc_dd(natoms);
4432 srenew(comm->moved, comm->moved_nalloc)(comm->moved) = save_realloc("comm->moved", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 4432, (comm->moved), (comm->moved_nalloc), sizeof(*(comm
->moved)))
;
4433 }
4434
4435 return comm->moved;
4436}
4437
4438static void calc_cg_move(FILE *fplog, gmx_int64_t step,
4439 gmx_domdec_t *dd,
4440 t_state *state,
4441 ivec tric_dir, matrix tcm,
4442 rvec cell_x0, rvec cell_x1,
4443 rvec limitd, rvec limit0, rvec limit1,
4444 const int *cgindex,
4445 int cg_start, int cg_end,
4446 rvec *cg_cm,
4447 int *move)
4448{
4449 int npbcdim;
4450 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4451 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4452 int flag;
4453 gmx_bool bScrew;
4454 ivec dev;
4455 real inv_ncg, pos_d;
4456 rvec cm_new;
4457
4458 npbcdim = dd->npbcdim;
4459
4460 for (cg = cg_start; cg < cg_end; cg++)
4461 {
4462 k0 = cgindex[cg];
4463 k1 = cgindex[cg+1];
4464 nrcg = k1 - k0;
4465 if (nrcg == 1)
4466 {
4467 copy_rvec(state->x[k0], cm_new);
4468 }
4469 else
4470 {
4471 inv_ncg = 1.0/nrcg;
4472
4473 clear_rvec(cm_new);
4474 for (k = k0; (k < k1); k++)
4475 {
4476 rvec_inc(cm_new, state->x[k]);
4477 }
4478 for (d = 0; (d < DIM3); d++)
4479 {
4480 cm_new[d] = inv_ncg*cm_new[d];
4481 }
4482 }
4483
4484 clear_ivec(dev);
4485 /* Do pbc and check DD cell boundary crossings */
4486 for (d = DIM3-1; d >= 0; d--)
4487 {
4488 if (dd->nc[d] > 1)
4489 {
4490 bScrew = (dd->bScrewPBC && d == XX0);
4491 /* Determine the location of this cg in lattice coordinates */
4492 pos_d = cm_new[d];
4493 if (tric_dir[d])
4494 {
4495 for (d2 = d+1; d2 < DIM3; d2++)
4496 {
4497 pos_d += cm_new[d2]*tcm[d2][d];
4498 }
4499 }
4500 /* Put the charge group in the triclinic unit-cell */
4501 if (pos_d >= cell_x1[d])
4502 {
4503 if (pos_d >= limit1[d])
4504 {
4505 cg_move_error(fplog, dd, step, cg, d, 1, TRUE1, limitd[d],
4506 cg_cm[cg], cm_new, pos_d);
4507 }
4508 dev[d] = 1;
4509 if (dd->ci[d] == dd->nc[d] - 1)
4510 {
4511 rvec_dec(cm_new, state->box[d]);
4512 if (bScrew)
4513 {
4514 cm_new[YY1] = state->box[YY1][YY1] - cm_new[YY1];
4515 cm_new[ZZ2] = state->box[ZZ2][ZZ2] - cm_new[ZZ2];
4516 }
4517 for (k = k0; (k < k1); k++)
4518 {
4519 rvec_dec(state->x[k], state->box[d]);
4520 if (bScrew)
4521 {
4522 rotate_state_atom(state, k);
4523 }
4524 }
4525 }
4526 }
4527 else if (pos_d < cell_x0[d])
4528 {
4529 if (pos_d < limit0[d])
4530 {
4531 cg_move_error(fplog, dd, step, cg, d, -1, TRUE1, limitd[d],
4532 cg_cm[cg], cm_new, pos_d);
4533 }
4534 dev[d] = -1;
4535 if (dd->ci[d] == 0)
4536 {
4537 rvec_inc(cm_new, state->box[d]);
4538 if (bScrew)
4539 {
4540 cm_new[YY1] = state->box[YY1][YY1] - cm_new[YY1];
4541 cm_new[ZZ2] = state->box[ZZ2][ZZ2] - cm_new[ZZ2];
4542 }
4543 for (k = k0; (k < k1); k++)
4544 {
4545 rvec_inc(state->x[k], state->box[d]);
4546 if (bScrew)
4547 {
4548 rotate_state_atom(state, k);
4549 }
4550 }
4551 }
4552 }
4553 }
4554 else if (d < npbcdim)
4555 {
4556 /* Put the charge group in the rectangular unit-cell */
4557 while (cm_new[d] >= state->box[d][d])
4558 {
4559 rvec_dec(cm_new, state->box[d]);
4560 for (k = k0; (k < k1); k++)
4561 {
4562 rvec_dec(state->x[k], state->box[d]);
4563 }
4564 }
4565 while (cm_new[d] < 0)
4566 {
4567 rvec_inc(cm_new, state->box[d]);
4568 for (k = k0; (k < k1); k++)
4569 {
4570 rvec_inc(state->x[k], state->box[d]);
4571 }
4572 }
4573 }
4574 }
4575
4576 copy_rvec(cm_new, cg_cm[cg]);
4577
4578 /* Determine where this cg should go */
4579 flag = 0;
4580 mc = -1;
4581 for (d = 0; d < dd->ndim; d++)
4582 {
4583 dim = dd->dim[d];
4584 if (dev[dim] == 1)
4585 {
4586 flag |= DD_FLAG_FW(d)(1<<(16+(d)*2));
4587 if (mc == -1)
4588 {
4589 mc = d*2;
4590 }
4591 }
4592 else if (dev[dim] == -1)
4593 {
4594 flag |= DD_FLAG_BW(d)(1<<(16+(d)*2+1));
4595 if (mc == -1)
4596 {
4597 if (dd->nc[dim] > 2)
4598 {
4599 mc = d*2 + 1;
4600 }
4601 else
4602 {
4603 mc = d*2;
4604 }
4605 }
4606 }
4607 }
4608 /* Temporarily store the flag in move */
4609 move[cg] = mc + flag;
4610 }
4611}
4612
4613static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
4614 gmx_domdec_t *dd, ivec tric_dir,
4615 t_state *state, rvec **f,
4616 t_forcerec *fr,
4617 gmx_bool bCompact,
4618 t_nrnb *nrnb,
4619 int *ncg_stay_home,
4620 int *ncg_moved)
4621{
4622 int *move;
4623 int npbcdim;
4624 int ncg[DIM3*2], nat[DIM3*2];
4625 int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
4626 int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
4627 int sbuf[2], rbuf[2];
4628 int home_pos_cg, home_pos_at, buf_pos;
4629 int flag;
4630 gmx_bool bV = FALSE0, bSDX = FALSE0, bCGP = FALSE0;
4631 gmx_bool bScrew;
4632 ivec dev;
4633 real inv_ncg, pos_d;
4634 matrix tcm;
4635 rvec *cg_cm = NULL((void*)0), cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
4636 atom_id *cgindex;
4637 cginfo_mb_t *cginfo_mb;
4638 gmx_domdec_comm_t *comm;
4639 int *moved;
4640 int nthread, thread;
4641
4642 if (dd->bScrewPBC)
4643 {
4644 check_screw_box(state->box);
4645 }
4646
4647 comm = dd->comm;
4648 if (fr->cutoff_scheme == ecutsGROUP)
4649 {
4650 cg_cm = fr->cg_cm;
4651 }
4652
4653 for (i = 0; i < estNR; i++)
4654 {
4655 if (EST_DISTR(i)(!(((i) >= estLAMBDA && (i) <= estTC_INT) || ((
i) >= estSVIR_PREV && (i) <= estMC_RNGI)))
)
4656 {
4657 switch (i)
4658 {
4659 case estX: /* Always present */ break;
4660 case estV: bV = (state->flags & (1<<i)); break;
4661 case estSDX: bSDX = (state->flags & (1<<i)); break;
4662 case estCGP: bCGP = (state->flags & (1<<i)); break;
4663 case estLD_RNG:
4664 case estLD_RNGI:
4665 case estDISRE_INITF:
4666 case estDISRE_RM3TAV:
4667 case estORIRE_INITF:
4668 case estORIRE_DTAV:
4669 /* No processing required */
4670 break;
4671 default:
4672 gmx_incons("Unknown state entry encountered in dd_redistribute_cg")_gmx_error("incons", "Unknown state entry encountered in dd_redistribute_cg"
, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c", 4672
)
;
4673 }
4674 }
4675 }
4676
4677 if (dd->ncg_tot > comm->nalloc_int)
4678 {
4679 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4680 srenew(comm->buf_int, comm->nalloc_int)(comm->buf_int) = save_realloc("comm->buf_int", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 4680, (comm->buf_int), (comm->nalloc_int), sizeof(*(comm
->buf_int)))
;
4681 }
4682 move = comm->buf_int;
4683
4684 /* Clear the count */
4685 for (c = 0; c < dd->ndim*2; c++)
4686 {
4687 ncg[c] = 0;
4688 nat[c] = 0;
4689 }
4690
4691 npbcdim = dd->npbcdim;
4692
4693 for (d = 0; (d < DIM3); d++)
4694 {
4695 limitd[d] = dd->comm->cellsize_min[d];
4696 if (d >= npbcdim && dd->ci[d] == 0)
4697 {
4698 cell_x0[d] = -GMX_FLOAT_MAX3.40282346E+38;
4699 }
4700 else
4701 {
4702 cell_x0[d] = comm->cell_x0[d];
4703 }
4704 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4705 {
4706 cell_x1[d] = GMX_FLOAT_MAX3.40282346E+38;
4707 }
4708 else
4709 {
4710 cell_x1[d] = comm->cell_x1[d];
4711 }
4712 if (d < npbcdim)
4713 {
4714 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4715 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4716 }
4717 else
4718 {
4719 /* We check after communication if a charge group moved
4720 * more than one cell. Set the pre-comm check limit to float_max.
4721 */
4722 limit0[d] = -GMX_FLOAT_MAX3.40282346E+38;
4723 limit1[d] = GMX_FLOAT_MAX3.40282346E+38;
4724 }
4725 }
4726
4727 make_tric_corr_matrix(npbcdim, state->box, tcm);
4728
4729 cgindex = dd->cgindex;
4730
4731 nthread = gmx_omp_nthreads_get(emntDomdec);
4732
4733 /* Compute the center of geometry for all home charge groups
4734 * and put them in the box and determine where they should go.
4735 */
4736#pragma omp parallel for num_threads(nthread) schedule(static)
4737 for (thread = 0; thread < nthread; thread++)
4738 {
4739 calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
4740 cell_x0, cell_x1, limitd, limit0, limit1,
4741 cgindex,
4742 ( thread *dd->ncg_home)/nthread,
4743 ((thread+1)*dd->ncg_home)/nthread,
4744 fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
4745 move);
4746 }
4747
4748 for (cg = 0; cg < dd->ncg_home; cg++)
4749 {
4750 if (move[cg] >= 0)
4751 {
4752 mc = move[cg];
4753 flag = mc & ~DD_FLAG_NRCG65535;
4754 mc = mc & DD_FLAG_NRCG65535;
4755 move[cg] = mc;
4756
4757 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4758 {
4759 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4760 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS)(comm->cggl_flag[mc]) = save_realloc("comm->cggl_flag[mc]"
, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c", 4760
, (comm->cggl_flag[mc]), (comm->cggl_flag_nalloc[mc]*2)
, sizeof(*(comm->cggl_flag[mc])))
;
4761 }
4762 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS2 ] = dd->index_gl[cg];
4763 /* We store the cg size in the lower 16 bits
4764 * and the place where the charge group should go
4765 * in the next 6 bits. This saves some communication volume.
4766 */
4767 nrcg = cgindex[cg+1] - cgindex[cg];
4768 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS2+1] = nrcg | flag;
4769 ncg[mc] += 1;
4770 nat[mc] += nrcg;
4771 }
4772 }
4773
4774 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home)(nrnb)->n[eNR_CGCM] += dd->nat_home;
4775 inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home)(nrnb)->n[eNR_RESETX] += dd->ncg_home;
4776
4777 *ncg_moved = 0;
4778 for (i = 0; i < dd->ndim*2; i++)
4779 {
4780 *ncg_moved += ncg[i];
4781 }
4782
4783 nvec = 1;
4784 if (bV)
4785 {
4786 nvec++;
4787 }
4788 if (bSDX)
4789 {
4790 nvec++;
4791 }
4792 if (bCGP)
4793 {
4794 nvec++;
4795 }
4796
4797 /* Make sure the communication buffers are large enough */
4798 for (mc = 0; mc < dd->ndim*2; mc++)
4799 {
4800 nvr = ncg[mc] + nat[mc]*nvec;
4801 if (nvr > comm->cgcm_state_nalloc[mc])
4802 {
4803 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4804 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc])(comm->cgcm_state[mc]) = save_realloc("comm->cgcm_state[mc]"
, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c", 4804
, (comm->cgcm_state[mc]), (comm->cgcm_state_nalloc[mc])
, sizeof(*(comm->cgcm_state[mc])))
;
4805 }
4806 }
4807
4808 switch (fr->cutoff_scheme)
4809 {
4810 case ecutsGROUP:
4811 /* Recalculating cg_cm might be cheaper than communicating,
4812 * but that could give rise to rounding issues.
4813 */
4814 home_pos_cg =
4815 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4816 nvec, cg_cm, comm, bCompact);
4817 break;
4818 case ecutsVERLET:
4819 /* Without charge groups we send the moved atom coordinates
4820 * over twice. This is so the code below can be used without
4821 * many conditionals for both for with and without charge groups.
4822 */
4823 home_pos_cg =
4824 compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
4825 nvec, state->x, comm, FALSE0);
4826 if (bCompact)
4827 {
4828 home_pos_cg -= *ncg_moved;
4829 }
4830 break;
4831 default:
4832 gmx_incons("unimplemented")_gmx_error("incons", "unimplemented", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 4832)
;
4833 home_pos_cg = 0;
4834 }
4835
4836 vec = 0;
4837 home_pos_at =
4838 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4839 nvec, vec++, state->x, comm, bCompact);
4840 if (bV)
4841 {
4842 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4843 nvec, vec++, state->v, comm, bCompact);
4844 }
4845 if (bSDX)
4846 {
4847 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4848 nvec, vec++, state->sd_X, comm, bCompact);
4849 }
4850 if (bCGP)
4851 {
4852 compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
4853 nvec, vec++, state->cg_p, comm, bCompact);
4854 }
4855
4856 if (bCompact)
4857 {
4858 compact_ind(dd->ncg_home, move,
4859 dd->index_gl, dd->cgindex, dd->gatindex,
4860 dd->ga2la, comm->bLocalCG,
4861 fr->cginfo);
4862 }
4863 else
4864 {
4865 if (fr->cutoff_scheme == ecutsVERLET)
4866 {
4867 moved = get_moved(comm, dd->ncg_home);
4868
4869 for (k = 0; k < dd->ncg_home; k++)
4870 {
4871 moved[k] = 0;
4872 }
4873 }
4874 else
4875 {
4876 moved = fr->ns.grid->cell_index;
4877 }
4878
4879 clear_and_mark_ind(dd->ncg_home, move,
4880 dd->index_gl, dd->cgindex, dd->gatindex,
4881 dd->ga2la, comm->bLocalCG,
4882 moved);
4883 }
4884
4885 cginfo_mb = fr->cginfo_mb;
4886
4887 *ncg_stay_home = home_pos_cg;
4888 for (d = 0; d < dd->ndim; d++)
4889 {
4890 dim = dd->dim[d];
4891 ncg_recv = 0;
4892 nat_recv = 0;
4893 nvr = 0;
4894 for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
4895 {
4896 cdd = d*2 + dir;
4897 /* Communicate the cg and atom counts */
4898 sbuf[0] = ncg[cdd];
4899 sbuf[1] = nat[cdd];
4900 if (debug)
4901 {
4902 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
4903 d, dir, sbuf[0], sbuf[1]);
4904 }
4905 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4906
4907 if ((ncg_recv+rbuf[0])*DD_CGIBS2 > comm->nalloc_int)
4908 {
4909 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS2);
4910 srenew(comm->buf_int, comm->nalloc_int)(comm->buf_int) = save_realloc("comm->buf_int", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 4910, (comm->buf_int), (comm->nalloc_int), sizeof(*(comm
->buf_int)))
;
4911 }
4912
4913 /* Communicate the charge group indices, sizes and flags */
4914 dd_sendrecv_int(dd, d, dir,
4915 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS2,
4916 comm->buf_int+ncg_recv*DD_CGIBS2, rbuf[0]*DD_CGIBS2);
4917
4918 nvs = ncg[cdd] + nat[cdd]*nvec;
4919 i = rbuf[0] + rbuf[1] *nvec;
4920 vec_rvec_check_alloc(&comm->vbuf, nvr+i);
4921
4922 /* Communicate cgcm and state */
4923 dd_sendrecv_rvec(dd, d, dir,
4924 comm->cgcm_state[cdd], nvs,
4925 comm->vbuf.v+nvr, i);
4926 ncg_recv += rbuf[0];
4927 nat_recv += rbuf[1];
4928 nvr += i;
4929 }
4930
4931 /* Process the received charge groups */
4932 buf_pos = 0;
4933 for (cg = 0; cg < ncg_recv; cg++)
4934 {
4935 flag = comm->buf_int[cg*DD_CGIBS2+1];
4936
4937 if (dim >= npbcdim && dd->nc[dim] > 2)
4938 {
4939 /* No pbc in this dim and more than one domain boundary.
4940 * We do a separate check if a charge group didn't move too far.
4941 */
4942 if (((flag & DD_FLAG_FW(d)(1<<(16+(d)*2))) &&
4943 comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
4944 ((flag & DD_FLAG_BW(d)(1<<(16+(d)*2+1))) &&
4945 comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
4946 {
4947 cg_move_error(fplog, dd, step, cg, dim,
4948 (flag & DD_FLAG_FW(d)(1<<(16+(d)*2))) ? 1 : 0,
4949 FALSE0, 0,
4950 comm->vbuf.v[buf_pos],
4951 comm->vbuf.v[buf_pos],
4952 comm->vbuf.v[buf_pos][dim]);
4953 }
4954 }
4955
4956 mc = -1;
4957 if (d < dd->ndim-1)
4958 {
4959 /* Check which direction this cg should go */
4960 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
4961 {
4962 if (dd->bGridJump)
4963 {
4964 /* The cell boundaries for dimension d2 are not equal
4965 * for each cell row of the lower dimension(s),
4966 * therefore we might need to redetermine where
4967 * this cg should go.
4968 */
4969 dim2 = dd->dim[d2];
4970 /* If this cg crosses the box boundary in dimension d2
4971 * we can use the communicated flag, so we do not
4972 * have to worry about pbc.
4973 */
4974 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4975 (flag & DD_FLAG_FW(d2)(1<<(16+(d2)*2)))) ||
4976 (dd->ci[dim2] == 0 &&
4977 (flag & DD_FLAG_BW(d2)(1<<(16+(d2)*2+1))))))
4978 {
4979 /* Clear the two flags for this dimension */
4980 flag &= ~(DD_FLAG_FW(d2)(1<<(16+(d2)*2)) | DD_FLAG_BW(d2)(1<<(16+(d2)*2+1)));
4981 /* Determine the location of this cg
4982 * in lattice coordinates
4983 */
4984 pos_d = comm->vbuf.v[buf_pos][dim2];
4985 if (tric_dir[dim2])
4986 {
4987 for (d3 = dim2+1; d3 < DIM3; d3++)
4988 {
4989 pos_d +=
4990 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
4991 }
4992 }
4993 /* Check of we are not at the box edge.
4994 * pbc is only handled in the first step above,
4995 * but this check could move over pbc while
4996 * the first step did not due to different rounding.
4997 */
4998 if (pos_d >= cell_x1[dim2] &&
4999 dd->ci[dim2] != dd->nc[dim2]-1)
5000 {
5001 flag |= DD_FLAG_FW(d2)(1<<(16+(d2)*2));
5002 }
5003 else if (pos_d < cell_x0[dim2] &&
5004 dd->ci[dim2] != 0)
5005 {
5006 flag |= DD_FLAG_BW(d2)(1<<(16+(d2)*2+1));
5007 }
5008 comm->buf_int[cg*DD_CGIBS2+1] = flag;
5009 }
5010 }
5011 /* Set to which neighboring cell this cg should go */
5012 if (flag & DD_FLAG_FW(d2)(1<<(16+(d2)*2)))
5013 {
5014 mc = d2*2;
5015 }
5016 else if (flag & DD_FLAG_BW(d2)(1<<(16+(d2)*2+1)))
5017 {
5018 if (dd->nc[dd->dim[d2]] > 2)
5019 {
5020 mc = d2*2+1;
5021 }
5022 else
5023 {
5024 mc = d2*2;
5025 }
5026 }
5027 }
5028 }
5029
5030 nrcg = flag & DD_FLAG_NRCG65535;
5031 if (mc == -1)
5032 {
5033 if (home_pos_cg+1 > dd->cg_nalloc)
5034 {
5035 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
5036 srenew(dd->index_gl, dd->cg_nalloc)(dd->index_gl) = save_realloc("dd->index_gl", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 5036, (dd->index_gl), (dd->cg_nalloc), sizeof(*(dd->
index_gl)))
;
5037 srenew(dd->cgindex, dd->cg_nalloc+1)(dd->cgindex) = save_realloc("dd->cgindex", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 5037, (dd->cgindex), (dd->cg_nalloc+1), sizeof(*(dd->
cgindex)))
;
5038 }
5039 /* Set the global charge group index and size */
5040 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS2];
5041 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
5042 /* Copy the state from the buffer */
5043 dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
5044 if (fr->cutoff_scheme == ecutsGROUP)
5045 {
5046 cg_cm = fr->cg_cm;
5047 copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
5048 }
5049 buf_pos++;
5050
5051 /* Set the cginfo */
5052 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
5053 dd->index_gl[home_pos_cg]);
5054 if (comm->bLocalCG)
5055 {
5056 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE1;
5057 }
5058
5059 if (home_pos_at+nrcg > state->nalloc)
5060 {
5061 dd_realloc_state(state, f, home_pos_at+nrcg);
5062 }
5063 for (i = 0; i < nrcg; i++)
5064 {
5065 copy_rvec(comm->vbuf.v[buf_pos++],
5066 state->x[home_pos_at+i]);
5067 }
5068 if (bV)
5069 {
5070 for (i = 0; i < nrcg; i++)
5071 {
5072 copy_rvec(comm->vbuf.v[buf_pos++],
5073 state->v[home_pos_at+i]);
5074 }
5075 }
5076 if (bSDX)
5077 {
5078 for (i = 0; i < nrcg; i++)
5079 {
5080 copy_rvec(comm->vbuf.v[buf_pos++],
5081 state->sd_X[home_pos_at+i]);
5082 }
5083 }
5084 if (bCGP)
5085 {
5086 for (i = 0; i < nrcg; i++)
5087 {
5088 copy_rvec(comm->vbuf.v[buf_pos++],
5089 state->cg_p[home_pos_at+i]);
5090 }
5091 }
5092 home_pos_cg += 1;
5093 home_pos_at += nrcg;
5094 }
5095 else
5096 {
5097 /* Reallocate the buffers if necessary */
5098 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
5099 {
5100 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
5101 srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS)(comm->cggl_flag[mc]) = save_realloc("comm->cggl_flag[mc]"
, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c", 5101
, (comm->cggl_flag[mc]), (comm->cggl_flag_nalloc[mc]*2)
, sizeof(*(comm->cggl_flag[mc])))
;
5102 }
5103 nvr = ncg[mc] + nat[mc]*nvec;
5104 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
5105 {
5106 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
5107 srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc])(comm->cgcm_state[mc]) = save_realloc("comm->cgcm_state[mc]"
, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c", 5107
, (comm->cgcm_state[mc]), (comm->cgcm_state_nalloc[mc])
, sizeof(*(comm->cgcm_state[mc])))
;
5108 }
5109 /* Copy from the receive to the send buffers */
5110 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS2,
5111 comm->buf_int + cg*DD_CGIBS2,
5112 DD_CGIBS2*sizeof(int));
5113 memcpy(comm->cgcm_state[mc][nvr],
5114 comm->vbuf.v[buf_pos],
5115 (1+nrcg*nvec)*sizeof(rvec));
5116 buf_pos += 1 + nrcg*nvec;
5117 ncg[mc] += 1;
5118 nat[mc] += nrcg;
5119 }
5120 }
5121 }
5122
5123 /* With sorting (!bCompact) the indices are now only partially up to date
5124 * and ncg_home and nat_home are not the real count, since there are
5125 * "holes" in the arrays for the charge groups that moved to neighbors.
5126 */
5127 if (fr->cutoff_scheme == ecutsVERLET)
5128 {
5129 moved = get_moved(comm, home_pos_cg);
5130
5131 for (i = dd->ncg_home; i < home_pos_cg; i++)
5132 {
5133 moved[i] = 0;
5134 }
5135 }
5136 dd->ncg_home = home_pos_cg;
5137 dd->nat_home = home_pos_at;
5138
5139 if (debug)
5140 {
5141 fprintf(debug,
5142 "Finished repartitioning: cgs moved out %d, new home %d\n",
5143 *ncg_moved, dd->ncg_home-*ncg_moved);
5144
5145 }
5146}
5147
5148void dd_cycles_add(gmx_domdec_t *dd, float cycles, int ddCycl)
5149{
5150 dd->comm->cycl[ddCycl] += cycles;
5151 dd->comm->cycl_n[ddCycl]++;
5152 if (cycles > dd->comm->cycl_max[ddCycl])
5153 {
5154 dd->comm->cycl_max[ddCycl] = cycles;
5155 }
5156}
5157
5158static double force_flop_count(t_nrnb *nrnb)
5159{
5160 int i;
5161 double sum;
5162 const char *name;
5163
5164 sum = 0;
5165 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
5166 {
5167 /* To get closer to the real timings, we half the count
5168 * for the normal loops and again half it for water loops.
5169 */
5170 name = nrnb_str(i);
5171 if (strstr(name, "W3") != NULL((void*)0) || strstr(name, "W4") != NULL((void*)0))
5172 {
5173 sum += nrnb->n[i]*0.25*cost_nrnb(i);
5174 }
5175 else
5176 {
5177 sum += nrnb->n[i]*0.50*cost_nrnb(i);
5178 }
5179 }
5180 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
5181 {
5182 name = nrnb_str(i);
5183 if (strstr(name, "W3") != NULL((void*)0) || strstr(name, "W4") != NULL((void*)0))
5184 {
5185 sum += nrnb->n[i]*cost_nrnb(i);
5186 }
5187 }
5188 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
5189 {
5190 sum += nrnb->n[i]*cost_nrnb(i);
5191 }
5192
5193 return sum;
5194}
5195
5196void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
5197{
5198 if (dd->comm->eFlop)
5199 {
5200 dd->comm->flop -= force_flop_count(nrnb);
5201 }
5202}
5203void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
5204{
5205 if (dd->comm->eFlop)
5206 {
5207 dd->comm->flop += force_flop_count(nrnb);
5208 dd->comm->flop_n++;
5209 }
5210}
5211
5212static void clear_dd_cycle_counts(gmx_domdec_t *dd)
5213{
5214 int i;
5215
5216 for (i = 0; i < ddCyclNr; i++)
5217 {
5218 dd->comm->cycl[i] = 0;
5219 dd->comm->cycl_n[i] = 0;
5220 dd->comm->cycl_max[i] = 0;
5221 }
5222 dd->comm->flop = 0;
5223 dd->comm->flop_n = 0;
5224}
5225
5226static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
5227{
5228 gmx_domdec_comm_t *comm;
5229 gmx_domdec_load_t *load;
5230 gmx_domdec_root_t *root = NULL((void*)0);
5231 int d, dim, cid, i, pos;
5232 float cell_frac = 0, sbuf[DD_NLOAD_MAX9];
5233 gmx_bool bSepPME;
5234
5235 if (debug)
5236 {
5237 fprintf(debug, "get_load_distribution start\n");
5238 }
5239
5240 wallcycle_start(wcycle, ewcDDCOMMLOAD);
5241
5242 comm = dd->comm;
5243
5244 bSepPME = (dd->pme_nodeid >= 0);
5245
5246 for (d = dd->ndim-1; d >= 0; d--)
5247 {
5248 dim = dd->dim[d];
5249 /* Check if we participate in the communication in this dimension */
5250 if (d == dd->ndim-1 ||
5251 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
5252 {
5253 load = &comm->load[d];
5254 if (dd->bGridJump)
5255 {
5256 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
5257 }
5258 pos = 0;
5259 if (d == dd->ndim-1)
5260 {
5261 sbuf[pos++] = dd_force_load(comm);
5262 sbuf[pos++] = sbuf[0];
5263 if (dd->bGridJump)
5264 {
5265 sbuf[pos++] = sbuf[0];
5266 sbuf[pos++] = cell_frac;
5267 if (d > 0)
5268 {
5269 sbuf[pos++] = comm->cell_f_max0[d];
5270 sbuf[pos++] = comm->cell_f_min1[d];
5271 }
5272 }
5273 if (bSepPME)
5274 {
5275 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
5276 sbuf[pos++] = comm->cycl[ddCyclPME];
5277 }
5278 }
5279 else
5280 {
5281 sbuf[pos++] = comm->load[d+1].sum;
5282 sbuf[pos++] = comm->load[d+1].max;
5283 if (dd->bGridJump)
5284 {
5285 sbuf[pos++] = comm->load[d+1].sum_m;
5286 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
5287 sbuf[pos++] = comm->load[d+1].flags;
5288 if (d > 0)
5289 {
5290 sbuf[pos++] = comm->cell_f_max0[d];
5291 sbuf[pos++] = comm->cell_f_min1[d];
5292 }
5293 }
5294 if (bSepPME)
5295 {
5296 sbuf[pos++] = comm->load[d+1].mdf;
5297 sbuf[pos++] = comm->load[d+1].pme;
5298 }
5299 }
5300 load->nload = pos;
5301 /* Communicate a row in DD direction d.
5302 * The communicators are setup such that the root always has rank 0.
5303 */
5304#ifdef GMX_MPI
5305 MPI_GathertMPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTETMPI_BYTE,
5306 load->load, load->nload*sizeof(float), MPI_BYTETMPI_BYTE,
5307 0, comm->mpi_comm_load[d]);
5308#endif
5309 if (dd->ci[dim] == dd->master_ci[dim])
5310 {
5311 /* We are the root, process this row */
5312 if (comm->bDynLoadBal)
5313 {
5314 root = comm->root[d];
5315 }
5316 load->sum = 0;
5317 load->max = 0;
5318 load->sum_m = 0;
5319 load->cvol_min = 1;
5320 load->flags = 0;
5321 load->mdf = 0;
5322 load->pme = 0;
5323 pos = 0;
5324 for (i = 0; i < dd->nc[dim]; i++)
5325 {
5326 load->sum += load->load[pos++];
5327 load->max = max(load->max, load->load[pos])(((load->max) > (load->load[pos])) ? (load->max) :
(load->load[pos]) )
;
5328 pos++;
5329 if (dd->bGridJump)
5330 {
5331 if (root->bLimited)
5332 {
5333 /* This direction could not be load balanced properly,
5334 * therefore we need to use the maximum iso the average load.
5335 */
5336 load->sum_m = max(load->sum_m, load->load[pos])(((load->sum_m) > (load->load[pos])) ? (load->sum_m
) : (load->load[pos]) )
;
5337 }
5338 else
5339 {
5340 load->sum_m += load->load[pos];
5341 }
5342 pos++;
5343 load->cvol_min = min(load->cvol_min, load->load[pos])(((load->cvol_min) < (load->load[pos])) ? (load->
cvol_min) : (load->load[pos]) )
;
5344 pos++;
5345 if (d < dd->ndim-1)
5346 {
5347 load->flags = (int)(load->load[pos++] + 0.5);
5348 }
5349 if (d > 0)
5350 {
5351 root->cell_f_max0[i] = load->load[pos++];
5352 root->cell_f_min1[i] = load->load[pos++];
5353 }
5354 }
5355 if (bSepPME)
5356 {
5357 load->mdf = max(load->mdf, load->load[pos])(((load->mdf) > (load->load[pos])) ? (load->mdf) :
(load->load[pos]) )
;
5358 pos++;
5359 load->pme = max(load->pme, load->load[pos])(((load->pme) > (load->load[pos])) ? (load->pme) :
(load->load[pos]) )
;
5360 pos++;
5361 }
5362 }
5363 if (comm->bDynLoadBal && root->bLimited)
5364 {
5365 load->sum_m *= dd->nc[dim];
5366 load->flags |= (1<<d);
5367 }
5368 }
5369 }
5370 }
5371
5372 if (DDMASTER(dd)((dd)->rank == (dd)->masterrank))
5373 {
5374 comm->nload += dd_load_count(comm);
5375 comm->load_step += comm->cycl[ddCyclStep];
5376 comm->load_sum += comm->load[0].sum;
5377 comm->load_max += comm->load[0].max;
5378 if (comm->bDynLoadBal)
5379 {
5380 for (d = 0; d < dd->ndim; d++)
5381 {
5382 if (comm->load[0].flags & (1<<d))
5383 {
5384 comm->load_lim[d]++;
5385 }
5386 }
5387 }
5388 if (bSepPME)
5389 {
5390 comm->load_mdf += comm->load[0].mdf;
5391 comm->load_pme += comm->load[0].pme;
5392 }
5393 }
5394
5395 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
5396
5397 if (debug)
5398 {
5399 fprintf(debug, "get_load_distribution finished\n");
5400 }
5401}
5402
5403static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5404{
5405 /* Return the relative performance loss on the total run time
5406 * due to the force calculation load imbalance.
5407 */
5408 if (dd->comm->nload > 0)
5409 {
5410 return
5411 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5412 (dd->comm->load_step*dd->nnodes);
5413 }
5414 else
5415 {
5416 return 0;
5417 }
5418}
5419
5420static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
5421{
5422 char buf[STRLEN4096];
5423 int npp, npme, nnodes, d, limp;
5424 float imbal, pme_f_ratio, lossf, lossp = 0;
5425 gmx_bool bLim;
5426 gmx_domdec_comm_t *comm;
5427
5428 comm = dd->comm;
5429 if (DDMASTER(dd)((dd)->rank == (dd)->masterrank) && comm->nload > 0)
5430 {
5431 npp = dd->nnodes;
5432 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5433 nnodes = npp + npme;
5434 imbal = comm->load_max*npp/comm->load_sum - 1;
5435 lossf = dd_force_imb_perf_loss(dd);
5436 sprintf(buf, " Average load imbalance: %.1f %%\n", imbal*100);
5437 fprintf(fplog, "%s", buf);
5438 fprintf(stderrstderr, "\n");
5439 fprintf(stderrstderr, "%s", buf);
5440 sprintf(buf, " Part of the total run time spent waiting due to load imbalance: %.1f %%\n", lossf*100);
5441 fprintf(fplog, "%s", buf);
5442 fprintf(stderrstderr, "%s", buf);
5443 bLim = FALSE0;
5444 if (comm->bDynLoadBal)
5445 {
5446 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5447 for (d = 0; d < dd->ndim; d++)
5448 {
5449 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5450 sprintf(buf+strlen(buf), " %c %d %%", dim2char(dd->dim[d]), limp);
5451 if (limp >= 50)
5452 {
5453 bLim = TRUE1;
5454 }
5455 }
5456 sprintf(buf+strlen(buf), "\n");
5457 fprintf(fplog, "%s", buf);
5458 fprintf(stderrstderr, "%s", buf);
5459 }
5460 if (npme > 0)
5461 {
5462 pme_f_ratio = comm->load_pme/comm->load_mdf;
5463 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5464 if (lossp <= 0)
5465 {
5466 lossp *= (float)npme/(float)nnodes;
5467 }
5468 else
5469 {
5470 lossp *= (float)npp/(float)nnodes;
5471 }
5472 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pme_f_ratio);
5473 fprintf(fplog, "%s", buf);
5474 fprintf(stderrstderr, "%s", buf);
5475 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossp)*100);
5476 fprintf(fplog, "%s", buf);
5477 fprintf(stderrstderr, "%s", buf);
5478 }
5479 fprintf(fplog, "\n");
5480 fprintf(stderrstderr, "\n");
5481
5482 if (lossf >= DD_PERF_LOSS0.05)
5483 {
5484 sprintf(buf,
5485 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
5486 " in the domain decomposition.\n", lossf*100);
5487 if (!comm->bDynLoadBal)
5488 {
5489 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
5490 }
5491 else if (bLim)
5492 {
5493 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5494 }
5495 fprintf(fplog, "%s\n", buf);
5496 fprintf(stderrstderr, "%s\n", buf);
5497 }
5498 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS0.05)
5499 {
5500 sprintf(buf,
5501 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5502 " had %s work to do than the PP nodes.\n"
5503 " You might want to %s the number of PME nodes\n"
5504 " or %s the cut-off and the grid spacing.\n",
5505 fabs(lossp*100),
5506 (lossp < 0) ? "less" : "more",
5507 (lossp < 0) ? "decrease" : "increase",
5508 (lossp < 0) ? "decrease" : "increase");
5509 fprintf(fplog, "%s\n", buf);
5510 fprintf(stderrstderr, "%s\n", buf);
5511 }
5512 }
5513}
5514
5515static float dd_vol_min(gmx_domdec_t *dd)
5516{
5517 return dd->comm->load[0].cvol_min*dd->nnodes;
5518}
5519
5520static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5521{
5522 return dd->comm->load[0].flags;
5523}
5524
5525static float dd_f_imbal(gmx_domdec_t *dd)
5526{
5527 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5528}
5529
5530float dd_pme_f_ratio(gmx_domdec_t *dd)
5531{
5532 if (dd->comm->cycl_n[ddCyclPME] > 0)
5533 {
5534 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5535 }
5536 else
5537 {
5538 return -1.0;
5539 }
5540}
5541
5542static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
5543{
5544 int flags, d;
5545 char buf[22];
5546
5547 flags = dd_load_flags(dd);
5548 if (flags)
5549 {
5550 fprintf(fplog,
5551 "DD load balancing is limited by minimum cell size in dimension");
5552 for (d = 0; d < dd->ndim; d++)
5553 {
5554 if (flags & (1<<d))
5555 {
5556 fprintf(fplog, " %c", dim2char(dd->dim[d]));
5557 }
5558 }
5559 fprintf(fplog, "\n");
5560 }
5561 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
5562 if (dd->comm->bDynLoadBal)
5563 {
5564 fprintf(fplog, " vol min/aver %5.3f%c",
5565 dd_vol_min(dd), flags ? '!' : ' ');
5566 }
5567 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
5568 if (dd->comm->cycl_n[ddCyclPME])
5569 {
5570 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
5571 }
5572 fprintf(fplog, "\n\n");
5573}
5574
5575static void dd_print_load_verbose(gmx_domdec_t *dd)
5576{
5577 if (dd->comm->bDynLoadBal)
5578 {
5579 fprintf(stderrstderr, "vol %4.2f%c ",
5580 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
5581 }
5582 fprintf(stderrstderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
5583 if (dd->comm->cycl_n[ddCyclPME])
5584 {
5585 fprintf(stderrstderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
5586 }
5587}
5588
5589#ifdef GMX_MPI
5590static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
5591{
5592 MPI_Comm c_row;
5593 int dim, i, rank;
5594 ivec loc_c;
5595 gmx_domdec_root_t *root;
5596 gmx_bool bPartOfGroup = FALSE0;
5597
5598 dim = dd->dim[dim_ind];
5599 copy_ivec(loc, loc_c);
5600 for (i = 0; i < dd->nc[dim]; i++)
5601 {
5602 loc_c[dim] = i;
5603 rank = dd_index(dd->nc, loc_c)((((loc_c)[0]*(dd->nc)[1] + (loc_c)[1])*(dd->nc)[2]) + (
loc_c)[2])
;
5604 if (rank == dd->rank)
5605 {
5606 /* This process is part of the group */
5607 bPartOfGroup = TRUE1;
5608 }
5609 }
5610 MPI_Comm_splittMPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED-1, dd->rank,
5611 &c_row);
5612 if (bPartOfGroup)
5613 {
5614 dd->comm->mpi_comm_load[dim_ind] = c_row;
5615 if (dd->comm->eDLB != edlbNO)
5616 {
5617 if (dd->ci[dim] == dd->master_ci[dim])
5618 {
5619 /* This is the root process of this row */
5620 snew(dd->comm->root[dim_ind], 1)(dd->comm->root[dim_ind]) = save_calloc("dd->comm->root[dim_ind]"
, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c", 5620
, (1), sizeof(*(dd->comm->root[dim_ind])))
;
5621 root = dd->comm->root[dim_ind];
5622 snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind))(root->cell_f) = save_calloc("root->cell_f", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 5622, (((dd)->nc[(dd)->dim[(dim_ind)]]+1+(dim_ind)*2+
1+(dim_ind))), sizeof(*(root->cell_f)))
;
5623 snew(root->old_cell_f, dd->nc[dim]+1)(root->old_cell_f) = save_calloc("root->old_cell_f", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 5623, (dd->nc[dim]+1), sizeof(*(root->old_cell_f)))
;
5624 snew(root->bCellMin, dd->nc[dim])(root->bCellMin) = save_calloc("root->bCellMin", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 5624, (dd->nc[dim]), sizeof(*(root->bCellMin)))
;
5625 if (dim_ind > 0)
5626 {
5627 snew(root->cell_f_max0, dd->nc[dim])(root->cell_f_max0) = save_calloc("root->cell_f_max0", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 5627, (dd->nc[dim]), sizeof(*(root->cell_f_max0)))
;
5628 snew(root->cell_f_min1, dd->nc[dim])(root->cell_f_min1) = save_calloc("root->cell_f_min1", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 5628, (dd->nc[dim]), sizeof(*(root->cell_f_min1)))
;
5629 snew(root->bound_min, dd->nc[dim])(root->bound_min) = save_calloc("root->bound_min", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 5629, (dd->nc[dim]), sizeof(*(root->bound_min)))
;
5630 snew(root->bound_max, dd->nc[dim])(root->bound_max) = save_calloc("root->bound_max", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 5630, (dd->nc[dim]), sizeof(*(root->bound_max)))
;
5631 }
5632 snew(root->buf_ncd, dd->nc[dim])(root->buf_ncd) = save_calloc("root->buf_ncd", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 5632, (dd->nc[dim]), sizeof(*(root->buf_ncd)))
;
5633 }
5634 else
5635 {
5636 /* This is not a root process, we only need to receive cell_f */
5637 snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind))(dd->comm->cell_f_row) = save_calloc("dd->comm->cell_f_row"
, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c", 5637
, (((dd)->nc[(dd)->dim[(dim_ind)]]+1+(dim_ind)*2+1+(dim_ind
))), sizeof(*(dd->comm->cell_f_row)))
;
5638 }
5639 }
5640 if (dd->ci[dim] == dd->master_ci[dim])
5641 {
5642 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX)(dd->comm->load[dim_ind].load) = save_calloc("dd->comm->load[dim_ind].load"
, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c", 5642
, (dd->nc[dim]*9), sizeof(*(dd->comm->load[dim_ind].
load)))
;
5643 }
5644 }
5645}
5646#endif
5647
5648void dd_setup_dlb_resource_sharing(t_commrec gmx_unused__attribute__ ((unused)) *cr,
5649 const gmx_hw_info_t gmx_unused__attribute__ ((unused)) *hwinfo,
5650 const gmx_hw_opt_t gmx_unused__attribute__ ((unused)) *hw_opt)
5651{
5652#ifdef GMX_MPI
5653 int physicalnode_id_hash;
5654 int gpu_id;
5655 gmx_domdec_t *dd;
5656 MPI_Comm mpi_comm_pp_physicalnode;
5657
5658 if (!(cr->duty & DUTY_PP(1<<0)) ||
5659 hw_opt->gpu_opt.ncuda_dev_use == 0)
5660 {
5661 /* Only PP nodes (currently) use GPUs.
5662 * If we don't have GPUs, there are no resources to share.
5663 */
5664 return;
5665 }
5666
5667 physicalnode_id_hash = gmx_physicalnode_id_hash();
5668
5669 gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
5670
5671 dd = cr->dd;
5672
5673 if (debug)
5674 {
5675 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
5676 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
5677 dd->rank, physicalnode_id_hash, gpu_id);
5678 }
5679 /* Split the PP communicator over the physical nodes */
5680 /* TODO: See if we should store this (before), as it's also used for
5681 * for the nodecomm summution.
5682 */
5683 MPI_Comm_splittMPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
5684 &mpi_comm_pp_physicalnode);
5685 MPI_Comm_splittMPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
5686 &dd->comm->mpi_comm_gpu_shared);
5687 MPI_Comm_freetMPI_Comm_free(&mpi_comm_pp_physicalnode);
5688 MPI_Comm_sizetMPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
5689
5690 if (debug)
5691 {
5692 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
5693 }
5694
5695 /* Note that some ranks could share a GPU, while others don't */
5696
5697 if (dd->comm->nrank_gpu_shared == 1)
5698 {
5699 MPI_Comm_freetMPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
5700 }
5701#endif
5702}
5703
5704static void make_load_communicators(gmx_domdec_t gmx_unused__attribute__ ((unused)) *dd)
5705{
5706#ifdef GMX_MPI
5707 int dim0, dim1, i, j;
5708 ivec loc;
5709
5710 if (debug)
5711 {
5712 fprintf(debug, "Making load communicators\n");
5713 }
5714
5715 snew(dd->comm->load, dd->ndim)(dd->comm->load) = save_calloc("dd->comm->load", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 5715, (dd->ndim), sizeof(*(dd->comm->load)))
;
5716 snew(dd->comm->mpi_comm_load, dd->ndim)(dd->comm->mpi_comm_load) = save_calloc("dd->comm->mpi_comm_load"
, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c", 5716
, (dd->ndim), sizeof(*(dd->comm->mpi_comm_load)))
;
5717
5718 clear_ivec(loc);
5719 make_load_communicator(dd, 0, loc);
5720 if (dd->ndim > 1)
5721 {
5722 dim0 = dd->dim[0];
5723 for (i = 0; i < dd->nc[dim0]; i++)
5724 {
5725 loc[dim0] = i;
5726 make_load_communicator(dd, 1, loc);
5727 }
5728 }
5729 if (dd->ndim > 2)
5730 {
5731 dim0 = dd->dim[0];
5732 for (i = 0; i < dd->nc[dim0]; i++)
5733 {
5734 loc[dim0] = i;
5735 dim1 = dd->dim[1];
5736 for (j = 0; j < dd->nc[dim1]; j++)
5737 {
5738 loc[dim1] = j;
5739 make_load_communicator(dd, 2, loc);
5740 }
5741 }
5742 }
5743
5744 if (debug)
5745 {
5746 fprintf(debug, "Finished making load communicators\n");
5747 }
5748#endif
5749}
5750
5751void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
5752{
5753 gmx_bool bZYX;
5754 int d, dim, i, j, m;
5755 ivec tmp, s;
5756 int nzone, nzonep;
5757 ivec dd_zp[DD_MAXIZONE4];
5758 gmx_domdec_zones_t *zones;
5759 gmx_domdec_ns_ranges_t *izone;
5760
5761 for (d = 0; d < dd->ndim; d++)
5762 {
5763 dim = dd->dim[d];
5764 copy_ivec(dd->ci, tmp);
5765 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5766 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
5767 copy_ivec(dd->ci, tmp);
5768 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5769 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
5770 if (debug)
5771 {
5772 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5773 dd->rank, dim,
5774 dd->neighbor[d][0],
5775 dd->neighbor[d][1]);
5776 }
5777 }
5778
5779 if (fplog)
5780 {
5781 fprintf(fplog, "\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5782 dd->ndim,
5783 dd->nc[XX0], dd->nc[YY1], dd->nc[ZZ2],
5784 dd->ci[XX0], dd->ci[YY1], dd->ci[ZZ2]);
5785 }
5786 switch (dd->ndim)
5787 {
5788 case 3:
5789 nzone = dd_z3n8;
5790 nzonep = dd_zp3n4;
5791 for (i = 0; i < nzonep; i++)
5792 {
5793 copy_ivec(dd_zp3[i], dd_zp[i]);
5794 }
5795 break;
5796 case 2:
5797 nzone = dd_z2n4;
5798 nzonep = dd_zp2n2;
5799 for (i = 0; i < nzonep; i++)
5800 {
5801 copy_ivec(dd_zp2[i], dd_zp[i]);
5802 }
5803 break;
5804 case 1:
5805 nzone = dd_z1n2;
5806 nzonep = dd_zp1n1;
5807 for (i = 0; i < nzonep; i++)
5808 {
5809 copy_ivec(dd_zp1[i], dd_zp[i]);
5810 }
5811 break;
5812 default:
5813 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
5813
, "Can only do 1, 2 or 3D domain decomposition");
5814 nzone = 0;
5815 nzonep = 0;
5816 }
5817
5818 zones = &dd->comm->zones;
5819
5820 for (i = 0; i < nzone; i++)
5821 {
5822 m = 0;
5823 clear_ivec(zones->shift[i]);
5824 for (d = 0; d < dd->ndim; d++)
5825 {
5826 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5827 }
5828 }
5829
5830 zones->n = nzone;
5831 for (i = 0; i < nzone; i++)
5832 {
5833 for (d = 0; d < DIM3; d++)
5834 {
5835 s[d] = dd->ci[d] - zones->shift[i][d];
5836 if (s[d] < 0)
5837 {
5838 s[d] += dd->nc[d];
5839 }
5840 else if (s[d] >= dd->nc[d])
5841 {
5842 s[d] -= dd->nc[d];
5843 }
5844 }
5845 }
5846 zones->nizone = nzonep;
5847 for (i = 0; i < zones->nizone; i++)
5848 {
5849 if (dd_zp[i][0] != i)
5850 {
5851 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
5851
, "Internal inconsistency in the dd grid setup");
5852 }
5853 izone = &zones->izone[i];
5854 izone->j0 = dd_zp[i][1];
5855 izone->j1 = dd_zp[i][2];
5856 for (dim = 0; dim < DIM3; dim++)
5857 {
5858 if (dd->nc[dim] == 1)
5859 {
5860 /* All shifts should be allowed */
5861 izone->shift0[dim] = -1;
5862 izone->shift1[dim] = 1;
5863 }
5864 else
5865 {
5866 /*
5867 izone->shift0[d] = 0;
5868 izone->shift1[d] = 0;
5869 for(j=izone->j0; j<izone->j1; j++) {
5870 if (dd->shift[j][d] > dd->shift[i][d])
5871 izone->shift0[d] = -1;
5872 if (dd->shift[j][d] < dd->shift[i][d])
5873 izone->shift1[d] = 1;
5874 }
5875 */
5876
5877 int shift_diff;
5878
5879 /* Assume the shift are not more than 1 cell */
5880 izone->shift0[dim] = 1;
5881 izone->shift1[dim] = -1;
5882 for (j = izone->j0; j < izone->j1; j++)
5883 {
5884 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5885 if (shift_diff < izone->shift0[dim])
5886 {
5887 izone->shift0[dim] = shift_diff;
5888 }
5889 if (shift_diff > izone->shift1[dim])
5890 {
5891 izone->shift1[dim] = shift_diff;
5892 }
5893 }
5894 }
5895 }
5896 }
5897
5898 if (dd->comm->eDLB != edlbNO)
5899 {
5900 snew(dd->comm->root, dd->ndim)(dd->comm->root) = save_calloc("dd->comm->root", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 5900, (dd->ndim), sizeof(*(dd->comm->root)))
;
5901 }
5902
5903 if (dd->comm->bRecordLoad)
5904 {
5905 make_load_communicators(dd);
5906 }
5907}
5908
5909static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused__attribute__ ((unused)) reorder)
5910{
5911 gmx_domdec_t *dd;
5912 gmx_domdec_comm_t *comm;
5913 int i, rank, *buf;
5914 ivec periods;
5915#ifdef GMX_MPI
5916 MPI_Comm comm_cart;
5917#endif
5918
5919 dd = cr->dd;
5920 comm = dd->comm;
5921
5922#ifdef GMX_MPI
5923 if (comm->bCartesianPP)
5924 {
5925 /* Set up cartesian communication for the particle-particle part */
5926 if (fplog)
5927 {
5928 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
5929 dd->nc[XX0], dd->nc[YY1], dd->nc[ZZ2]);
5930 }
5931
5932 for (i = 0; i < DIM3; i++)
5933 {
5934 periods[i] = TRUE1;
5935 }
5936 MPI_Cart_createtMPI_Cart_create(cr->mpi_comm_mygroup, DIM3, dd->nc, periods, reorder,
5937 &comm_cart);
5938 /* We overwrite the old communicator with the new cartesian one */
5939 cr->mpi_comm_mygroup = comm_cart;
5940 }
5941
5942 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5943 MPI_Comm_ranktMPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
5944
5945 if (comm->bCartesianPP_PME)
5946 {
5947 /* Since we want to use the original cartesian setup for sim,
5948 * and not the one after split, we need to make an index.
5949 */
5950 snew(comm->ddindex2ddnodeid, dd->nnodes)(comm->ddindex2ddnodeid) = save_calloc("comm->ddindex2ddnodeid"
, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c", 5950
, (dd->nnodes), sizeof(*(comm->ddindex2ddnodeid)))
;
5951 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)((((dd->ci)[0]*(dd->nc)[1] + (dd->ci)[1])*(dd->nc
)[2]) + (dd->ci)[2])
] = dd->rank;
5952 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
5953 /* Get the rank of the DD master,
5954 * above we made sure that the master node is a PP node.
5955 */
5956 if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1)))
5957 {
5958 rank = dd->rank;
5959 }
5960 else
5961 {
5962 rank = 0;
5963 }
5964 MPI_AllreducetMPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INTTMPI_INT, MPI_SUMTMPI_SUM, dd->mpi_comm_all);
5965 }
5966 else if (comm->bCartesianPP)
5967 {
5968 if (cr->npmenodes == 0)
5969 {
5970 /* The PP communicator is also
5971 * the communicator for this simulation
5972 */
5973 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5974 }
5975 cr->nodeid = dd->rank;
5976
5977 MPI_Cart_coordstMPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM3, dd->ci);
5978
5979 /* We need to make an index to go from the coordinates
5980 * to the nodeid of this simulation.
5981 */
5982 snew(comm->ddindex2simnodeid, dd->nnodes)(comm->ddindex2simnodeid) = save_calloc("comm->ddindex2simnodeid"
, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c", 5982
, (dd->nnodes), sizeof(*(comm->ddindex2simnodeid)))
;
5983 snew(buf, dd->nnodes)(buf) = save_calloc("buf", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 5983, (dd->nnodes), sizeof(*(buf)))
;
5984 if (cr->duty & DUTY_PP(1<<0))
5985 {
5986 buf[dd_index(dd->nc, dd->ci)((((dd->ci)[0]*(dd->nc)[1] + (dd->ci)[1])*(dd->nc
)[2]) + (dd->ci)[2])
] = cr->sim_nodeid;
5987 }
5988 /* Communicate the ddindex to simulation nodeid index */
5989 MPI_AllreducetMPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INTTMPI_INT, MPI_SUMTMPI_SUM,
5990 cr->mpi_comm_mysim);
5991 sfree(buf)save_free("buf", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 5991, (buf))
;
5992
5993 /* Determine the master coordinates and rank.
5994 * The DD master should be the same node as the master of this sim.
5995 */
5996 for (i = 0; i < dd->nnodes; i++)
5997 {
5998 if (comm->ddindex2simnodeid[i] == 0)
5999 {
6000 ddindex2xyz(dd->nc, i, dd->master_ci);
6001 MPI_Cart_ranktMPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
6002 }
6003 }
6004 if (debug)
6005 {
6006 fprintf(debug, "The master rank is %d\n", dd->masterrank);
6007 }
6008 }
6009 else
6010 {
6011 /* No Cartesian communicators */
6012 /* We use the rank in dd->comm->all as DD index */
6013 ddindex2xyz(dd->nc, dd->rank, dd->ci);
6014 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
6015 dd->masterrank = 0;
6016 clear_ivec(dd->master_ci);
6017 }
6018#endif
6019
6020 if (fplog)
6021 {
6022 fprintf(fplog,
6023 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6024 dd->rank, dd->ci[XX0], dd->ci[YY1], dd->ci[ZZ2]);
6025 }
6026 if (debug)
6027 {
6028 fprintf(debug,
6029 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
6030 dd->rank, dd->ci[XX0], dd->ci[YY1], dd->ci[ZZ2]);
6031 }
6032}
6033
6034static void receive_ddindex2simnodeid(t_commrec *cr)
6035{
6036 gmx_domdec_t *dd;
6037
6038 gmx_domdec_comm_t *comm;
6039 int *buf;
6040
6041 dd = cr->dd;
6042 comm = dd->comm;
6043
6044#ifdef GMX_MPI
6045 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
6046 {
6047 snew(comm->ddindex2simnodeid, dd->nnodes)(comm->ddindex2simnodeid) = save_calloc("comm->ddindex2simnodeid"
, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c", 6047
, (dd->nnodes), sizeof(*(comm->ddindex2simnodeid)))
;
6048 snew(buf, dd->nnodes)(buf) = save_calloc("buf", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 6048, (dd->nnodes), sizeof(*(buf)))
;
6049 if (cr->duty & DUTY_PP(1<<0))
6050 {
6051 buf[dd_index(dd->nc, dd->ci)((((dd->ci)[0]*(dd->nc)[1] + (dd->ci)[1])*(dd->nc
)[2]) + (dd->ci)[2])
] = cr->sim_nodeid;
6052 }
6053#ifdef GMX_MPI
6054 /* Communicate the ddindex to simulation nodeid index */
6055 MPI_AllreducetMPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INTTMPI_INT, MPI_SUMTMPI_SUM,
6056 cr->mpi_comm_mysim);
6057#endif
6058 sfree(buf)save_free("buf", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 6058, (buf))
;
6059 }
6060#endif
6061}
6062
6063static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
6064 int ncg, int natoms)
6065{
6066 gmx_domdec_master_t *ma;
6067 int i;
6068
6069 snew(ma, 1)(ma) = save_calloc("ma", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 6069, (1), sizeof(*(ma)))
;
6070
6071 snew(ma->ncg, dd->nnodes)(ma->ncg) = save_calloc("ma->ncg", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 6071, (dd->nnodes), sizeof(*(ma->ncg)))
;
6072 snew(ma->index, dd->nnodes+1)(ma->index) = save_calloc("ma->index", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 6072, (dd->nnodes+1), sizeof(*(ma->index)))
;
6073 snew(ma->cg, ncg)(ma->cg) = save_calloc("ma->cg", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 6073, (ncg), sizeof(*(ma->cg)))
;
6074 snew(ma->nat, dd->nnodes)(ma->nat) = save_calloc("ma->nat", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 6074, (dd->nnodes), sizeof(*(ma->nat)))
;
6075 snew(ma->ibuf, dd->nnodes*2)(ma->ibuf) = save_calloc("ma->ibuf", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 6075, (dd->nnodes*2), sizeof(*(ma->ibuf)))
;
6076 snew(ma->cell_x, DIM)(ma->cell_x) = save_calloc("ma->cell_x", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 6076, (3), sizeof(*(ma->cell_x)))
;
6077 for (i = 0; i < DIM3; i++)
6078 {
6079 snew(ma->cell_x[i], dd->nc[i]+1)(ma->cell_x[i]) = save_calloc("ma->cell_x[i]", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 6079, (dd->nc[i]+1), sizeof(*(ma->cell_x[i])))
;
6080 }
6081
6082 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV4)
6083 {
6084 ma->vbuf = NULL((void*)0);
6085 }
6086 else
6087 {
6088 snew(ma->vbuf, natoms)(ma->vbuf) = save_calloc("ma->vbuf", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 6088, (natoms), sizeof(*(ma->vbuf)))
;
6089 }
6090
6091 return ma;
6092}
6093
6094static void split_communicator(FILE *fplog, t_commrec *cr, int gmx_unused__attribute__ ((unused)) dd_node_order,
6095 int gmx_unused__attribute__ ((unused)) reorder)
6096{
6097 gmx_domdec_t *dd;
6098 gmx_domdec_comm_t *comm;
6099 int i, rank;
6100 gmx_bool bDiv[DIM3];
6101 ivec periods;
6102#ifdef GMX_MPI
6103 MPI_Comm comm_cart;
6104#endif
6105
6106 dd = cr->dd;
6107 comm = dd->comm;
6108
6109 if (comm->bCartesianPP)
6110 {
6111 for (i = 1; i < DIM3; i++)
6112 {
6113 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
6114 }
6115 if (bDiv[YY1] || bDiv[ZZ2])
6116 {
6117 comm->bCartesianPP_PME = TRUE1;
6118 /* If we have 2D PME decomposition, which is always in x+y,
6119 * we stack the PME only nodes in z.
6120 * Otherwise we choose the direction that provides the thinnest slab
6121 * of PME only nodes as this will have the least effect
6122 * on the PP communication.
6123 * But for the PME communication the opposite might be better.
6124 */
6125 if (bDiv[ZZ2] && (comm->npmenodes_y > 1 ||
6126 !bDiv[YY1] ||
6127 dd->nc[YY1] > dd->nc[ZZ2]))
6128 {
6129 comm->cartpmedim = ZZ2;
6130 }
6131 else
6132 {
6133 comm->cartpmedim = YY1;
6134 }
6135 comm->ntot[comm->cartpmedim]
6136 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
6137 }
6138 else if (fplog)
6139 {
6140 fprintf(fplog, "#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX0], dd->nc[YY1], dd->nc[XX0], dd->nc[ZZ2]);
6141 fprintf(fplog,
6142 "Will not use a Cartesian communicator for PP <-> PME\n\n");
6143 }
6144 }
6145
6146#ifdef GMX_MPI
6147 if (comm->bCartesianPP_PME)
6148 {
6149 if (fplog)
6150 {
6151 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX0], comm->ntot[YY1], comm->ntot[ZZ2]);
6152 }
6153
6154 for (i = 0; i < DIM3; i++)
6155 {
6156 periods[i] = TRUE1;
6157 }
6158 MPI_Cart_createtMPI_Cart_create(cr->mpi_comm_mysim, DIM3, comm->ntot, periods, reorder,
6159 &comm_cart);
6160
6161 MPI_Comm_ranktMPI_Comm_rank(comm_cart, &rank);
6162 if (MASTERNODE(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1)) && rank != 0)
6163 {
6164 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
6164
, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
6165 }
6166
6167 /* With this assigment we loose the link to the original communicator
6168 * which will usually be MPI_COMM_WORLD, unless have multisim.
6169 */
6170 cr->mpi_comm_mysim = comm_cart;
6171 cr->sim_nodeid = rank;
6172
6173 MPI_Cart_coordstMPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM3, dd->ci);
6174
6175 if (fplog)
6176 {
6177 fprintf(fplog, "Cartesian nodeid %d, coordinates %d %d %d\n\n",
6178 cr->sim_nodeid, dd->ci[XX0], dd->ci[YY1], dd->ci[ZZ2]);
6179 }
6180
6181 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
6182 {
6183 cr->duty = DUTY_PP(1<<0);
6184 }
6185 if (cr->npmenodes == 0 ||
6186 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
6187 {
6188 cr->duty = DUTY_PME(1<<1);
6189 }
6190
6191 /* Split the sim communicator into PP and PME only nodes */
6192 MPI_Comm_splittMPI_Comm_split(cr->mpi_comm_mysim,
6193 cr->duty,
6194 dd_index(comm->ntot, dd->ci)((((dd->ci)[0]*(comm->ntot)[1] + (dd->ci)[1])*(comm->
ntot)[2]) + (dd->ci)[2])
,
6195 &cr->mpi_comm_mygroup);
6196 }
6197 else
6198 {
6199 switch (dd_node_order)
6200 {
6201 case ddnoPP_PME:
6202 if (fplog)
6203 {
6204 fprintf(fplog, "Order of the nodes: PP first, PME last\n");
6205 }
6206 break;
6207 case ddnoINTERLEAVE:
6208 /* Interleave the PP-only and PME-only nodes,
6209 * as on clusters with dual-core machines this will double
6210 * the communication bandwidth of the PME processes
6211 * and thus speed up the PP <-> PME and inter PME communication.
6212 */
6213 if (fplog)
6214 {
6215 fprintf(fplog, "Interleaving PP and PME nodes\n");
6216 }
6217 comm->pmenodes = dd_pmenodes(cr);
6218 break;
6219 case ddnoCARTESIAN:
6220 break;
6221 default:
6222 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
6222
, "Unknown dd_node_order=%d", dd_node_order);
6223 }
6224
6225 if (dd_simnode2pmenode(cr, cr->sim_nodeid) == -1)
6226 {
6227 cr->duty = DUTY_PME(1<<1);
6228 }
6229 else
6230 {
6231 cr->duty = DUTY_PP(1<<0);
6232 }
6233
6234 /* Split the sim communicator into PP and PME only nodes */
6235 MPI_Comm_splittMPI_Comm_split(cr->mpi_comm_mysim,
6236 cr->duty,
6237 cr->nodeid,
6238 &cr->mpi_comm_mygroup);
6239 MPI_Comm_ranktMPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
6240 }
6241#endif
6242
6243 if (fplog)
6244 {
6245 fprintf(fplog, "This is a %s only node\n\n",
6246 (cr->duty & DUTY_PP(1<<0)) ? "particle-particle" : "PME-mesh");
6247 }
6248}
6249
6250void make_dd_communicators(FILE *fplog, t_commrec *cr, int dd_node_order)
6251{
6252 gmx_domdec_t *dd;
6253 gmx_domdec_comm_t *comm;
6254 int CartReorder;
6255
6256 dd = cr->dd;
6257 comm = dd->comm;
6258
6259 copy_ivec(dd->nc, comm->ntot);
6260
6261 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
6262 comm->bCartesianPP_PME = FALSE0;
6263
6264 /* Reorder the nodes by default. This might change the MPI ranks.
6265 * Real reordering is only supported on very few architectures,
6266 * Blue Gene is one of them.
6267 */
6268 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL((void*)0));
6269
6270 if (cr->npmenodes > 0)
6271 {
6272 /* Split the communicator into a PP and PME part */
6273 split_communicator(fplog, cr, dd_node_order, CartReorder);
6274 if (comm->bCartesianPP_PME)
6275 {
6276 /* We (possibly) reordered the nodes in split_communicator,
6277 * so it is no longer required in make_pp_communicator.
6278 */
6279 CartReorder = FALSE0;
6280 }
6281 }
6282 else
6283 {
6284 /* All nodes do PP and PME */
6285#ifdef GMX_MPI
6286 /* We do not require separate communicators */
6287 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
6288#endif
6289 }
6290
6291 if (cr->duty & DUTY_PP(1<<0))
6292 {
6293 /* Copy or make a new PP communicator */
6294 make_pp_communicator(fplog, cr, CartReorder);
6295 }
6296 else
6297 {
6298 receive_ddindex2simnodeid(cr);
6299 }
6300
6301 if (!(cr->duty & DUTY_PME(1<<1)))
6302 {
6303 /* Set up the commnuication to our PME node */
6304 dd->pme_nodeid = dd_simnode2pmenode(cr, cr->sim_nodeid);
6305 dd->pme_receive_vir_ener = receive_vir_ener(cr);
6306 if (debug)
6307 {
6308 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
6309 dd->pme_nodeid, dd->pme_receive_vir_ener);
6310 }
6311 }
6312 else
6313 {
6314 dd->pme_nodeid = -1;
6315 }
6316
6317 if (DDMASTER(dd)((dd)->rank == (dd)->masterrank))
6318 {
6319 dd->ma = init_gmx_domdec_master_t(dd,
6320 comm->cgs_gl.nr,
6321 comm->cgs_gl.index[comm->cgs_gl.nr]);
6322 }
6323}
6324
6325static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
6326{
6327 real *slb_frac, tot;
6328 int i, n;
6329 double dbl;
6330
6331 slb_frac = NULL((void*)0);
6332 if (nc > 1 && size_string != NULL((void*)0))
6333 {
6334 if (fplog)
6335 {
6336 fprintf(fplog, "Using static load balancing for the %s direction\n",
6337 dir);
6338 }
6339 snew(slb_frac, nc)(slb_frac) = save_calloc("slb_frac", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 6339, (nc), sizeof(*(slb_frac)))
;
6340 tot = 0;
6341 for (i = 0; i < nc; i++)
6342 {
6343 dbl = 0;
6344 sscanf(size_string, "%lf%n", &dbl, &n);
6345 if (dbl == 0)
6346 {
6347 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
6347
, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
6348 }
6349 slb_frac[i] = dbl;
6350 size_string += n;
6351 tot += slb_frac[i];
6352 }
6353 /* Normalize */
6354 if (fplog)
6355 {
6356 fprintf(fplog, "Relative cell sizes:");
6357 }
6358 for (i = 0; i < nc; i++)
6359 {
6360 slb_frac[i] /= tot;
6361 if (fplog)
6362 {
6363 fprintf(fplog, " %5.3f", slb_frac[i]);
6364 }
6365 }
6366 if (fplog)
6367 {
6368 fprintf(fplog, "\n");
6369 }
6370 }
6371
6372 return slb_frac;
6373}
6374
6375static int multi_body_bondeds_count(gmx_mtop_t *mtop)
6376{
6377 int n, nmol, ftype;
6378 gmx_mtop_ilistloop_t iloop;
6379 t_ilist *il;
6380
6381 n = 0;
6382 iloop = gmx_mtop_ilistloop_init(mtop);
6383 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
6384 {
6385 for (ftype = 0; ftype < F_NRE; ftype++)
6386 {
6387 if ((interaction_function[ftype].flags & IF_BOND1) &&
6388 NRAL(ftype)(interaction_function[(ftype)].nratoms) > 2)
6389 {
6390 n += nmol*il[ftype].nr/(1 + NRAL(ftype)(interaction_function[(ftype)].nratoms));
6391 }
6392 }
6393 }
6394
6395 return n;
6396}
6397
6398static int dd_getenv(FILE *fplog, const char *env_var, int def)
6399{
6400 char *val;
6401 int nst;
6402
6403 nst = def;
6404 val = getenv(env_var);
6405 if (val)
6406 {
6407 if (sscanf(val, "%d", &nst) <= 0)
6408 {
6409 nst = 1;
6410 }
6411 if (fplog)
6412 {
6413 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
6414 env_var, val, nst);
6415 }
6416 }
6417
6418 return nst;
6419}
6420
6421static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
6422{
6423 if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1)))
6424 {
6425 fprintf(stderrstderr, "\n%s\n", warn_string);
6426 }
6427 if (fplog)
6428 {
6429 fprintf(fplog, "\n%s\n", warn_string);
6430 }
6431}
6432
6433static void check_dd_restrictions(t_commrec *cr, gmx_domdec_t *dd,
6434 t_inputrec *ir, FILE *fplog)
6435{
6436 if (ir->ePBC == epbcSCREW &&
6437 (dd->nc[XX0] == 1 || dd->nc[YY1] > 1 || dd->nc[ZZ2] > 1))
6438 {
6439 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
6439
, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
6440 }
6441
6442 if (ir->ns_type == ensSIMPLE)
6443 {
6444 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
6444
, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
6445 }
6446
6447 if (ir->nstlist == 0)
6448 {
6449 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
6449
, "Domain decomposition does not work with nstlist=0");
6450 }
6451
6452 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6453 {
6454 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6455 }
6456}
6457
6458static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
6459{
6460 int di, d;
6461 real r;
6462
6463 r = ddbox->box_size[XX0];
6464 for (di = 0; di < dd->ndim; di++)
6465 {
6466 d = dd->dim[di];
6467 /* Check using the initial average cell size */
6468 r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d])(((r) < (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->
nc[d])) ? (r) : (ddbox->box_size[d]*ddbox->skew_fac[d]/
dd->nc[d]) )
;
6469 }
6470
6471 return r;
6472}
6473
6474static int check_dlb_support(FILE *fplog, t_commrec *cr,
6475 const char *dlb_opt, gmx_bool bRecordLoad,
6476 unsigned long Flags, t_inputrec *ir)
6477{
6478 gmx_domdec_t *dd;
6479 int eDLB = -1;
6480 char buf[STRLEN4096];
6481
6482 switch (dlb_opt[0])
6483 {
6484 case 'a': eDLB = edlbAUTO; break;
6485 case 'n': eDLB = edlbNO; break;
6486 case 'y': eDLB = edlbYES; break;
6487 default: gmx_incons("Unknown dlb_opt")_gmx_error("incons", "Unknown dlb_opt", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 6487)
;
6488 }
6489
6490 if (Flags & MD_RERUN(1<<4))
6491 {
6492 return edlbNO;
6493 }
6494
6495 if (!EI_DYNAMICS(ir->eI)(((ir->eI) == eiMD || ((ir->eI) == eiVV || (ir->eI) ==
eiVVAK)) || ((ir->eI) == eiSD1 || (ir->eI) == eiSD2) ||
(ir->eI) == eiBD)
)
6496 {
6497 if (eDLB == edlbYES)
6498 {
6499 sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI)((((ir->eI) < 0) || ((ir->eI) >= (eiNR))) ? "UNDEFINED"
: (ei_names)[ir->eI])
);
6500 dd_warning(cr, fplog, buf);
6501 }
6502
6503 return edlbNO;
6504 }
6505
6506 if (!bRecordLoad)
6507 {
6508 dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6509
6510 return edlbNO;
6511 }
6512
6513 if (Flags & MD_REPRODUCIBLE(1<<13))
6514 {
6515 switch (eDLB)
6516 {
6517 case edlbNO:
6518 break;
6519 case edlbAUTO:
6520 dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
6521 eDLB = edlbNO;
6522 break;
6523 case edlbYES:
6524 dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6525 break;
6526 default:
6527 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
6527
, "Death horror: undefined case (%d) for load balancing choice", eDLB);
6528 break;
6529 }
6530 }
6531
6532 return eDLB;
6533}
6534
6535static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
6536{
6537 int dim;
6538
6539 dd->ndim = 0;
6540 if (getenv("GMX_DD_ORDER_ZYX") != NULL((void*)0))
6541 {
6542 /* Decomposition order z,y,x */
6543 if (fplog)
6544 {
6545 fprintf(fplog, "Using domain decomposition order z, y, x\n");
6546 }
6547 for (dim = DIM3-1; dim >= 0; dim--)
6548 {
6549 if (dd->nc[dim] > 1)
6550 {
6551 dd->dim[dd->ndim++] = dim;
6552 }
6553 }
6554 }
6555 else
6556 {
6557 /* Decomposition order x,y,z */
6558 for (dim = 0; dim < DIM3; dim++)
6559 {
6560 if (dd->nc[dim] > 1)
6561 {
6562 dd->dim[dd->ndim++] = dim;
6563 }
6564 }
6565 }
6566}
6567
6568static gmx_domdec_comm_t *init_dd_comm()
6569{
6570 gmx_domdec_comm_t *comm;
6571 int i;
6572
6573 snew(comm, 1)(comm) = save_calloc("comm", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 6573, (1), sizeof(*(comm)))
;
6574 snew(comm->cggl_flag, DIM*2)(comm->cggl_flag) = save_calloc("comm->cggl_flag", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 6574, (3*2), sizeof(*(comm->cggl_flag)))
;
6575 snew(comm->cgcm_state, DIM*2)(comm->cgcm_state) = save_calloc("comm->cgcm_state", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 6575, (3*2), sizeof(*(comm->cgcm_state)))
;
6576 for (i = 0; i < DIM3*2; i++)
6577 {
6578 comm->cggl_flag_nalloc[i] = 0;
6579 comm->cgcm_state_nalloc[i] = 0;
6580 }
6581
6582 comm->nalloc_int = 0;
6583 comm->buf_int = NULL((void*)0);
6584
6585 vec_rvec_init(&comm->vbuf);
6586
6587 comm->n_load_have = 0;
6588 comm->n_load_collect = 0;
6589
6590 for (i = 0; i < ddnatNR-ddnatZONE; i++)
6591 {
6592 comm->sum_nat[i] = 0;
6593 }
6594 comm->ndecomp = 0;
6595 comm->nload = 0;
6596 comm->load_step = 0;
6597 comm->load_sum = 0;
6598 comm->load_max = 0;
6599 clear_ivec(comm->load_lim);
6600 comm->load_mdf = 0;
6601 comm->load_pme = 0;
6602
6603 return comm;
6604}
6605
6606gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
6607 unsigned long Flags,
6608 ivec nc,
6609 real comm_distance_min, real rconstr,
6610 const char *dlb_opt, real dlb_scale,
6611 const char *sizex, const char *sizey, const char *sizez,
6612 gmx_mtop_t *mtop, t_inputrec *ir,
6613 matrix box, rvec *x,
6614 gmx_ddbox_t *ddbox,
6615 int *npme_x, int *npme_y)
6616{
6617 gmx_domdec_t *dd;
6618 gmx_domdec_comm_t *comm;
6619 int recload;
6620 int d, i, j;
6621 real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
6622 gmx_bool bC;
6623 char buf[STRLEN4096];
6624
6625 if (fplog)
6626 {
6627 fprintf(fplog,
6628 "\nInitializing Domain Decomposition on %d nodes\n", cr->nnodes);
6629 }
6630
6631 snew(dd, 1)(dd) = save_calloc("dd", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 6631, (1), sizeof(*(dd)))
;
6632
6633 dd->comm = init_dd_comm();
6634 comm = dd->comm;
6635 snew(comm->cggl_flag, DIM*2)(comm->cggl_flag) = save_calloc("comm->cggl_flag", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 6635, (3*2), sizeof(*(comm->cggl_flag)))
;
6636 snew(comm->cgcm_state, DIM*2)(comm->cgcm_state) = save_calloc("comm->cgcm_state", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 6636, (3*2), sizeof(*(comm->cgcm_state)))
;
6637
6638 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6639 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6640
6641 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
6642 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
6643 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
6644 recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
6645 comm->nstSortCG = dd_getenv(fplog, "GMX_DD_NST_SORT_CHARGE_GROUPS", 1);
6646 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
6647 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
6648 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
6649
6650 dd->pme_recv_f_alloc = 0;
6651 dd->pme_recv_f_buf = NULL((void*)0);
6652
6653 if (dd->bSendRecv2 && fplog)
6654 {
6655 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6656 }
6657 if (comm->eFlop)
6658 {
6659 if (fplog)
6660 {
6661 fprintf(fplog, "Will load balance based on FLOP count\n");
6662 }
6663 if (comm->eFlop > 1)
6664 {
6665 srand(1+cr->nodeid);
6666 }
6667 comm->bRecordLoad = TRUE1;
6668 }
6669 else
6670 {
6671 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6672
6673 }
6674
6675 /* Initialize to GPU share count to 0, might change later */
6676 comm->nrank_gpu_shared = 0;
6677
6678 comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
6679
6680 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6681 if (fplog)
6682 {
6683 fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
6684 }
6685 dd->bGridJump = comm->bDynLoadBal;
6686 comm->bPMELoadBalDLBLimits = FALSE0;
6687
6688 if (comm->nstSortCG)
6689 {
6690 if (fplog)
6691 {
6692 if (comm->nstSortCG == 1)
6693 {
6694 fprintf(fplog, "Will sort the charge groups at every domain (re)decomposition\n");
6695 }
6696 else
6697 {
6698 fprintf(fplog, "Will sort the charge groups every %d steps\n",
6699 comm->nstSortCG);
6700 }
6701 }
6702 snew(comm->sort, 1)(comm->sort) = save_calloc("comm->sort", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 6702, (1), sizeof(*(comm->sort)))
;
6703 }
6704 else
6705 {
6706 if (fplog)
6707 {
6708 fprintf(fplog, "Will not sort the charge groups\n");
6709 }
6710 }
6711
6712 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
6713
6714 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6715 if (comm->bInterCGBondeds)
6716 {
6717 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6718 }
6719 else
6720 {
6721 comm->bInterCGMultiBody = FALSE0;
6722 }
6723
6724 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6725 dd->bInterCGsettles = inter_charge_group_settles(mtop);
6726
6727 if (ir->rlistlong == 0)
6728 {
6729 /* Set the cut-off to some very large value,
6730 * so we don't need if statements everywhere in the code.
6731 * We use sqrt, since the cut-off is squared in some places.
6732 */
6733 comm->cutoff = GMX_CUTOFF_INF1E+18;
6734 }
6735 else
6736 {
6737 comm->cutoff = ir->rlistlong;
6738 }
6739 comm->cutoff_mbody = 0;
6740
6741 comm->cellsize_limit = 0;
6742 comm->bBondComm = FALSE0;
6743
6744 if (comm->bInterCGBondeds)
6745 {
6746 if (comm_distance_min > 0)
6747 {
6748 comm->cutoff_mbody = comm_distance_min;
6749 if (Flags & MD_DDBONDCOMM(1<<11))
6750 {
6751 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6752 }
6753 else
6754 {
6755 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody)(((comm->cutoff) > (comm->cutoff_mbody)) ? (comm->
cutoff) : (comm->cutoff_mbody) )
;
6756 }
6757 r_bonded_limit = comm->cutoff_mbody;
6758 }
6759 else if (ir->bPeriodicMols)
6760 {
6761 /* Can not easily determine the required cut-off */
6762 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6763 comm->cutoff_mbody = comm->cutoff/2;
6764 r_bonded_limit = comm->cutoff_mbody;
6765 }
6766 else
6767 {
6768 if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1)))
6769 {
6770 dd_bonded_cg_distance(fplog, mtop, ir, x, box,
6771 Flags & MD_DDBONDCHECK(1<<10), &r_2b, &r_mb);
6772 }
6773 gmx_bcast(sizeof(r_2b), &r_2b, cr);
6774 gmx_bcast(sizeof(r_mb), &r_mb, cr);
6775
6776 /* We use an initial margin of 10% for the minimum cell size,
6777 * except when we are just below the non-bonded cut-off.
6778 */
6779 if (Flags & MD_DDBONDCOMM(1<<11))
6780 {
6781 if (max(r_2b, r_mb)(((r_2b) > (r_mb)) ? (r_2b) : (r_mb) ) > comm->cutoff)
6782 {
6783 r_bonded = max(r_2b, r_mb)(((r_2b) > (r_mb)) ? (r_2b) : (r_mb) );
6784 r_bonded_limit = 1.1*r_bonded;
6785 comm->bBondComm = TRUE1;
6786 }
6787 else
6788 {
6789 r_bonded = r_mb;
6790 r_bonded_limit = min(1.1*r_bonded, comm->cutoff)(((1.1*r_bonded) < (comm->cutoff)) ? (1.1*r_bonded) : (
comm->cutoff) )
;
6791 }
6792 /* We determine cutoff_mbody later */
6793 }
6794 else
6795 {
6796 /* No special bonded communication,
6797 * simply increase the DD cut-off.
6798 */
6799 r_bonded_limit = 1.1*max(r_2b, r_mb)(((r_2b) > (r_mb)) ? (r_2b) : (r_mb) );
6800 comm->cutoff_mbody = r_bonded_limit;
6801 comm->cutoff = max(comm->cutoff, comm->cutoff_mbody)(((comm->cutoff) > (comm->cutoff_mbody)) ? (comm->
cutoff) : (comm->cutoff_mbody) )
;
6802 }
6803 }
6804 comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit)(((comm->cellsize_limit) > (r_bonded_limit)) ? (comm->
cellsize_limit) : (r_bonded_limit) )
;
6805 if (fplog)
6806 {
6807 fprintf(fplog,
6808 "Minimum cell size due to bonded interactions: %.3f nm\n",
6809 comm->cellsize_limit);
6810 }
6811 }
6812
6813 if (dd->bInterCGcons && rconstr <= 0)
6814 {
6815 /* There is a cell size limit due to the constraints (P-LINCS) */
6816 rconstr = constr_r_max(fplog, mtop, ir);
6817 if (fplog)
6818 {
6819 fprintf(fplog,
6820 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6821 rconstr);
6822 if (rconstr > comm->cellsize_limit)
6823 {
6824 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
6825 }
6826 }
6827 }
6828 else if (rconstr > 0 && fplog)
6829 {
6830 /* Here we do not check for dd->bInterCGcons,
6831 * because one can also set a cell size limit for virtual sites only
6832 * and at this point we don't know yet if there are intercg v-sites.
6833 */
6834 fprintf(fplog,
6835 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6836 rconstr);
6837 }
6838 comm->cellsize_limit = max(comm->cellsize_limit, rconstr)(((comm->cellsize_limit) > (rconstr)) ? (comm->cellsize_limit
) : (rconstr) )
;
6839
6840 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6841
6842 if (nc[XX0] > 0)
6843 {
6844 copy_ivec(nc, dd->nc);
6845 set_dd_dim(fplog, dd);
6846 set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, x, ddbox);
6847
6848 if (cr->npmenodes == -1)
6849 {
6850 cr->npmenodes = 0;
6851 }
6852 acs = average_cellsize_min(dd, ddbox);
6853 if (acs < comm->cellsize_limit)
6854 {
6855 if (fplog)
6856 {
6857 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
6858 }
6859 gmx_fatal_collective(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
6859
, cr, NULL((void*)0),
6860 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6861 acs, comm->cellsize_limit);
6862 }
6863 }
6864 else
6865 {
6866 set_ddbox_cr(cr, NULL((void*)0), ir, box, &comm->cgs_gl, x, ddbox);
6867
6868 /* We need to choose the optimal DD grid and possibly PME nodes */
6869 limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
6870 comm->eDLB != edlbNO, dlb_scale,
6871 comm->cellsize_limit, comm->cutoff,
6872 comm->bInterCGBondeds);
6873
6874 if (dd->nc[XX0] == 0)
6875 {
6876 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6877 sprintf(buf, "Change the number of nodes or mdrun option %s%s%s",
6878 !bC ? "-rdd" : "-rcon",
6879 comm->eDLB != edlbNO ? " or -dds" : "",
6880 bC ? " or your LINCS settings" : "");
6881
6882 gmx_fatal_collective(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
6882
, cr, NULL((void*)0),
6883 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6884 "%s\n"
6885 "Look in the log file for details on the domain decomposition",
6886 cr->nnodes-cr->npmenodes, limit, buf);
6887 }
6888 set_dd_dim(fplog, dd);
6889 }
6890
6891 if (fplog)
6892 {
6893 fprintf(fplog,
6894 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6895 dd->nc[XX0], dd->nc[YY1], dd->nc[ZZ2], cr->npmenodes);
6896 }
6897
6898 dd->nnodes = dd->nc[XX0]*dd->nc[YY1]*dd->nc[ZZ2];
6899 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6900 {
6901 gmx_fatal_collective(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
6901
, cr, NULL((void*)0),
6902 "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6903 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
6904 }
6905 if (cr->npmenodes > dd->nnodes)
6906 {
6907 gmx_fatal_collective(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
6907
, cr, NULL((void*)0),
6908 "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.", cr->npmenodes, dd->nnodes);
6909 }
6910 if (cr->npmenodes > 0)
6911 {
6912 comm->npmenodes = cr->npmenodes;
6913 }
6914 else
6915 {
6916 comm->npmenodes = dd->nnodes;
6917 }
6918
6919 if (EEL_PME(ir->coulombtype)((ir->coulombtype) == eelPME || (ir->coulombtype) == eelPMESWITCH
|| (ir->coulombtype) == eelPMEUSER || (ir->coulombtype
) == eelPMEUSERSWITCH || (ir->coulombtype) == eelP3M_AD)
|| EVDW_PME(ir->vdwtype)((ir->vdwtype) == evdwPME))
6920 {
6921 /* The following choices should match those
6922 * in comm_cost_est in domdec_setup.c.
6923 * Note that here the checks have to take into account
6924 * that the decomposition might occur in a different order than xyz
6925 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6926 * in which case they will not match those in comm_cost_est,
6927 * but since that is mainly for testing purposes that's fine.
6928 */
6929 if (dd->ndim >= 2 && dd->dim[0] == XX0 && dd->dim[1] == YY1 &&
6930 comm->npmenodes > dd->nc[XX0] && comm->npmenodes % dd->nc[XX0] == 0 &&
6931 getenv("GMX_PMEONEDD") == NULL((void*)0))
6932 {
6933 comm->npmedecompdim = 2;
6934 comm->npmenodes_x = dd->nc[XX0];
6935 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6936 }
6937 else
6938 {
6939 /* In case nc is 1 in both x and y we could still choose to
6940 * decompose pme in y instead of x, but we use x for simplicity.
6941 */
6942 comm->npmedecompdim = 1;
6943 if (dd->dim[0] == YY1)
6944 {
6945 comm->npmenodes_x = 1;
6946 comm->npmenodes_y = comm->npmenodes;
6947 }
6948 else
6949 {
6950 comm->npmenodes_x = comm->npmenodes;
6951 comm->npmenodes_y = 1;
6952 }
6953 }
6954 if (fplog)
6955 {
6956 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
6957 comm->npmenodes_x, comm->npmenodes_y, 1);
6958 }
6959 }
6960 else
6961 {
6962 comm->npmedecompdim = 0;
6963 comm->npmenodes_x = 0;
6964 comm->npmenodes_y = 0;
6965 }
6966
6967 /* Technically we don't need both of these,
6968 * but it simplifies code not having to recalculate it.
6969 */
6970 *npme_x = comm->npmenodes_x;
6971 *npme_y = comm->npmenodes_y;
6972
6973 snew(comm->slb_frac, DIM)(comm->slb_frac) = save_calloc("comm->slb_frac", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 6973, (3), sizeof(*(comm->slb_frac)))
;
6974 if (comm->eDLB == edlbNO)
6975 {
6976 comm->slb_frac[XX0] = get_slb_frac(fplog, "x", dd->nc[XX0], sizex);
6977 comm->slb_frac[YY1] = get_slb_frac(fplog, "y", dd->nc[YY1], sizey);
6978 comm->slb_frac[ZZ2] = get_slb_frac(fplog, "z", dd->nc[ZZ2], sizez);
6979 }
6980
6981 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6982 {
6983 if (comm->bBondComm || comm->eDLB != edlbNO)
6984 {
6985 /* Set the bonded communication distance to halfway
6986 * the minimum and the maximum,
6987 * since the extra communication cost is nearly zero.
6988 */
6989 acs = average_cellsize_min(dd, ddbox);
6990 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6991 if (comm->eDLB != edlbNO)
6992 {
6993 /* Check if this does not limit the scaling */
6994 comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs)(((comm->cutoff_mbody) < (dlb_scale*acs)) ? (comm->cutoff_mbody
) : (dlb_scale*acs) )
;
6995 }
6996 if (!comm->bBondComm)
6997 {
6998 /* Without bBondComm do not go beyond the n.b. cut-off */
6999 comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff)(((comm->cutoff_mbody) < (comm->cutoff)) ? (comm->
cutoff_mbody) : (comm->cutoff) )
;
7000 if (comm->cellsize_limit >= comm->cutoff)
7001 {
7002 /* We don't loose a lot of efficieny
7003 * when increasing it to the n.b. cut-off.
7004 * It can even be slightly faster, because we need
7005 * less checks for the communication setup.
7006 */
7007 comm->cutoff_mbody = comm->cutoff;
7008 }
7009 }
7010 /* Check if we did not end up below our original limit */
7011 comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit)(((comm->cutoff_mbody) > (r_bonded_limit)) ? (comm->
cutoff_mbody) : (r_bonded_limit) )
;
7012
7013 if (comm->cutoff_mbody > comm->cellsize_limit)
7014 {
7015 comm->cellsize_limit = comm->cutoff_mbody;
7016 }
7017 }
7018 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
7019 }
7020
7021 if (debug)
7022 {
7023 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
7024 "cellsize limit %f\n",
7025 comm->bBondComm, comm->cellsize_limit);
7026 }
7027
7028 if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1)))
7029 {
7030 check_dd_restrictions(cr, dd, ir, fplog);
7031 }
7032
7033 comm->partition_step = INT_MIN(-2147483647 -1);
7034 dd->ddp_count = 0;
7035
7036 clear_dd_cycle_counts(dd);
7037
7038 return dd;
7039}
7040
7041static void set_dlb_limits(gmx_domdec_t *dd)
7042
7043{
7044 int d;
7045
7046 for (d = 0; d < dd->ndim; d++)
7047 {
7048 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
7049 dd->comm->cellsize_min[dd->dim[d]] =
7050 dd->comm->cellsize_min_dlb[dd->dim[d]];
7051 }
7052}
7053
7054
7055static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
7056{
7057 gmx_domdec_t *dd;
7058 gmx_domdec_comm_t *comm;
7059 real cellsize_min;
7060 int d, nc, i;
7061 char buf[STRLEN4096];
7062
7063 dd = cr->dd;
7064 comm = dd->comm;
7065
7066 if (fplog)
7067 {
7068 fprintf(fplog, "At step %s the performance loss due to force load imbalance is %.1f %%\n", gmx_step_str(step, buf), dd_force_imb_perf_loss(dd)*100);
7069 }
7070
7071 cellsize_min = comm->cellsize_min[dd->dim[0]];
7072 for (d = 1; d < dd->ndim; d++)
7073 {
7074 cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]])(((cellsize_min) < (comm->cellsize_min[dd->dim[d]]))
? (cellsize_min) : (comm->cellsize_min[dd->dim[d]]) )
;
7075 }
7076
7077 if (cellsize_min < comm->cellsize_limit*1.05)
7078 {
7079 dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
7080
7081 /* Change DLB from "auto" to "no". */
7082 comm->eDLB = edlbNO;
7083
7084 return;
7085 }
7086
7087 dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
7088 comm->bDynLoadBal = TRUE1;
7089 dd->bGridJump = TRUE1;
7090
7091 set_dlb_limits(dd);
7092
7093 /* We can set the required cell size info here,
7094 * so we do not need to communicate this.
7095 * The grid is completely uniform.
7096 */
7097 for (d = 0; d < dd->ndim; d++)
7098 {
7099 if (comm->root[d])
7100 {
7101 comm->load[d].sum_m = comm->load[d].sum;
7102
7103 nc = dd->nc[dd->dim[d]];
7104 for (i = 0; i < nc; i++)
7105 {
7106 comm->root[d]->cell_f[i] = i/(real)nc;
7107 if (d > 0)
7108 {
7109 comm->root[d]->cell_f_max0[i] = i /(real)nc;
7110 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
7111 }
7112 }
7113 comm->root[d]->cell_f[nc] = 1.0;
7114 }
7115 }
7116}
7117
7118static char *init_bLocalCG(gmx_mtop_t *mtop)
7119{
7120 int ncg, cg;
7121 char *bLocalCG;
7122
7123 ncg = ncg_mtop(mtop);
7124 snew(bLocalCG, ncg)(bLocalCG) = save_calloc("bLocalCG", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 7124, (ncg), sizeof(*(bLocalCG)))
;
7125 for (cg = 0; cg < ncg; cg++)
7126 {
7127 bLocalCG[cg] = FALSE0;
7128 }
7129
7130 return bLocalCG;
7131}
7132
7133void dd_init_bondeds(FILE *fplog,
7134 gmx_domdec_t *dd, gmx_mtop_t *mtop,
7135 gmx_vsite_t *vsite,
7136 t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
7137{
7138 gmx_domdec_comm_t *comm;
7139 gmx_bool bBondComm;
7140 int d;
7141
7142 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
7143
7144 comm = dd->comm;
7145
7146 if (comm->bBondComm)
7147 {
7148 /* Communicate atoms beyond the cut-off for bonded interactions */
7149 comm = dd->comm;
7150
7151 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
7152
7153 comm->bLocalCG = init_bLocalCG(mtop);
7154 }
7155 else
7156 {
7157 /* Only communicate atoms based on cut-off */
7158 comm->cglink = NULL((void*)0);
7159 comm->bLocalCG = NULL((void*)0);
7160 }
7161}
7162
7163static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
7164 t_inputrec *ir,
7165 gmx_bool bDynLoadBal, real dlb_scale,
7166 gmx_ddbox_t *ddbox)
7167{
7168 gmx_domdec_comm_t *comm;
7169 int d;
7170 ivec np;
7171 real limit, shrink;
7172 char buf[64];
7173
7174 if (fplog == NULL((void*)0))
7175 {
7176 return;
7177 }
7178
7179 comm = dd->comm;
7180
7181 if (bDynLoadBal)
7182 {
7183 fprintf(fplog, "The maximum number of communication pulses is:");
7184 for (d = 0; d < dd->ndim; d++)
7185 {
7186 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
7187 }
7188 fprintf(fplog, "\n");
7189 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
7190 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
7191 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
7192 for (d = 0; d < DIM3; d++)
7193 {
7194 if (dd->nc[d] > 1)
7195 {
7196 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
7197 {
7198 shrink = 0;
7199 }
7200 else
7201 {
7202 shrink =
7203 comm->cellsize_min_dlb[d]/
7204 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
7205 }
7206 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
7207 }
7208 }
7209 fprintf(fplog, "\n");
7210 }
7211 else
7212 {
7213 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
7214 fprintf(fplog, "The initial number of communication pulses is:");
7215 for (d = 0; d < dd->ndim; d++)
7216 {
7217 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
7218 }
7219 fprintf(fplog, "\n");
7220 fprintf(fplog, "The initial domain decomposition cell size is:");
7221 for (d = 0; d < DIM3; d++)
7222 {
7223 if (dd->nc[d] > 1)
7224 {
7225 fprintf(fplog, " %c %.2f nm",
7226 dim2char(d), dd->comm->cellsize_min[d]);
7227 }
7228 }
7229 fprintf(fplog, "\n\n");
7230 }
7231
7232 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
7233 {
7234 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
7235 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7236 "non-bonded interactions", "", comm->cutoff);
7237
7238 if (bDynLoadBal)
7239 {
7240 limit = dd->comm->cellsize_limit;
7241 }
7242 else
7243 {
7244 if (dynamic_dd_box(ddbox, ir))
7245 {
7246 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
7247 }
7248 limit = dd->comm->cellsize_min[XX0];
7249 for (d = 1; d < DIM3; d++)
7250 {
7251 limit = min(limit, dd->comm->cellsize_min[d])(((limit) < (dd->comm->cellsize_min[d])) ? (limit) :
(dd->comm->cellsize_min[d]) )
;
7252 }
7253 }
7254
7255 if (comm->bInterCGBondeds)
7256 {
7257 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7258 "two-body bonded interactions", "(-rdd)",
7259 max(comm->cutoff, comm->cutoff_mbody)(((comm->cutoff) > (comm->cutoff_mbody)) ? (comm->
cutoff) : (comm->cutoff_mbody) )
);
7260 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7261 "multi-body bonded interactions", "(-rdd)",
7262 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit)(((comm->cutoff) < (limit)) ? (comm->cutoff) : (limit
) )
);
7263 }
7264 if (dd->vsite_comm)
7265 {
7266 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7267 "virtual site constructions", "(-rcon)", limit);
7268 }
7269 if (dd->constraint_comm)
7270 {
7271 sprintf(buf, "atoms separated by up to %d constraints",
7272 1+ir->nProjOrder);
7273 fprintf(fplog, "%40s %-7s %6.3f nm\n",
7274 buf, "(-rcon)", limit);
7275 }
7276 fprintf(fplog, "\n");
7277 }
7278
7279 fflush(fplog);
7280}
7281
7282static void set_cell_limits_dlb(gmx_domdec_t *dd,
7283 real dlb_scale,
7284 const t_inputrec *ir,
7285 const gmx_ddbox_t *ddbox)
7286{
7287 gmx_domdec_comm_t *comm;
7288 int d, dim, npulse, npulse_d_max, npulse_d;
7289 gmx_bool bNoCutOff;
7290
7291 comm = dd->comm;
7292
7293 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
7294
7295 /* Determine the maximum number of comm. pulses in one dimension */
7296
7297 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody)(((comm->cellsize_limit) > (comm->cutoff_mbody)) ? (
comm->cellsize_limit) : (comm->cutoff_mbody) )
;
7298
7299 /* Determine the maximum required number of grid pulses */
7300 if (comm->cellsize_limit >= comm->cutoff)
7301 {
7302 /* Only a single pulse is required */
7303 npulse = 1;
7304 }
7305 else if (!bNoCutOff && comm->cellsize_limit > 0)
7306 {
7307 /* We round down slightly here to avoid overhead due to the latency
7308 * of extra communication calls when the cut-off
7309 * would be only slightly longer than the cell size.
7310 * Later cellsize_limit is redetermined,
7311 * so we can not miss interactions due to this rounding.
7312 */
7313 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
7314 }
7315 else
7316 {
7317 /* There is no cell size limit */
7318 npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1))(((dd->nc[0]-1) > ((((dd->nc[1]-1) > (dd->nc[2
]-1)) ? (dd->nc[1]-1) : (dd->nc[2]-1) ))) ? (dd->nc[
0]-1) : ((((dd->nc[1]-1) > (dd->nc[2]-1)) ? (dd->
nc[1]-1) : (dd->nc[2]-1) )) )
;
7319 }
7320
7321 if (!bNoCutOff && npulse > 1)
7322 {
7323 /* See if we can do with less pulses, based on dlb_scale */
7324 npulse_d_max = 0;
7325 for (d = 0; d < dd->ndim; d++)
7326 {
7327 dim = dd->dim[d];
7328 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
7329 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
7330 npulse_d_max = max(npulse_d_max, npulse_d)(((npulse_d_max) > (npulse_d)) ? (npulse_d_max) : (npulse_d
) )
;
7331 }
7332 npulse = min(npulse, npulse_d_max)(((npulse) < (npulse_d_max)) ? (npulse) : (npulse_d_max) );
7333 }
7334
7335 /* This env var can override npulse */
7336 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
7337 if (d > 0)
7338 {
7339 npulse = d;
7340 }
7341
7342 comm->maxpulse = 1;
7343 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
7344 for (d = 0; d < dd->ndim; d++)
7345 {
7346 comm->cd[d].np_dlb = min(npulse, dd->nc[dd->dim[d]]-1)(((npulse) < (dd->nc[dd->dim[d]]-1)) ? (npulse) : (dd
->nc[dd->dim[d]]-1) )
;
7347 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
7348 snew(comm->cd[d].ind, comm->cd[d].np_nalloc)(comm->cd[d].ind) = save_calloc("comm->cd[d].ind", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 7348, (comm->cd[d].np_nalloc), sizeof(*(comm->cd[d].ind
)))
;
7349 comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb)(((comm->maxpulse) > (comm->cd[d].np_dlb)) ? (comm->
maxpulse) : (comm->cd[d].np_dlb) )
;
7350 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
7351 {
7352 comm->bVacDLBNoLimit = FALSE0;
7353 }
7354 }
7355
7356 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7357 if (!comm->bVacDLBNoLimit)
7358 {
7359 comm->cellsize_limit = max(comm->cellsize_limit,(((comm->cellsize_limit) > (comm->cutoff/comm->maxpulse
)) ? (comm->cellsize_limit) : (comm->cutoff/comm->maxpulse
) )
7360 comm->cutoff/comm->maxpulse)(((comm->cellsize_limit) > (comm->cutoff/comm->maxpulse
)) ? (comm->cellsize_limit) : (comm->cutoff/comm->maxpulse
) )
;
7361 }
7362 comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody)(((comm->cellsize_limit) > (comm->cutoff_mbody)) ? (
comm->cellsize_limit) : (comm->cutoff_mbody) )
;
7363 /* Set the minimum cell size for each DD dimension */
7364 for (d = 0; d < dd->ndim; d++)
7365 {
7366 if (comm->bVacDLBNoLimit ||
7367 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
7368 {
7369 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
7370 }
7371 else
7372 {
7373 comm->cellsize_min_dlb[dd->dim[d]] =
7374 comm->cutoff/comm->cd[d].np_dlb;
7375 }
7376 }
7377 if (comm->cutoff_mbody <= 0)
7378 {
7379 comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit)(((comm->cutoff) < (comm->cellsize_limit)) ? (comm->
cutoff) : (comm->cellsize_limit) )
;
7380 }
7381 if (comm->bDynLoadBal)
7382 {
7383 set_dlb_limits(dd);
7384 }
7385}
7386
7387gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd, int ePBC)
7388{
7389 /* If each molecule is a single charge group
7390 * or we use domain decomposition for each periodic dimension,
7391 * we do not need to take pbc into account for the bonded interactions.
7392 */
7393 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
7394 !(dd->nc[XX0] > 1 &&
7395 dd->nc[YY1] > 1 &&
7396 (dd->nc[ZZ2] > 1 || ePBC == epbcXY)));
7397}
7398
7399void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
7400 t_inputrec *ir, gmx_ddbox_t *ddbox)
7401{
7402 gmx_domdec_comm_t *comm;
7403 int natoms_tot;
7404 real vol_frac;
7405
7406 comm = dd->comm;
7407
7408 /* Initialize the thread data.
7409 * This can not be done in init_domain_decomposition,
7410 * as the numbers of threads is determined later.
7411 */
7412 comm->nth = gmx_omp_nthreads_get(emntDomdec);
7413 if (comm->nth > 1)
7414 {
7415 snew(comm->dth, comm->nth)(comm->dth) = save_calloc("comm->dth", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 7415, (comm->nth), sizeof(*(comm->dth)))
;
7416 }
7417
7418 if (EEL_PME(ir->coulombtype)((ir->coulombtype) == eelPME || (ir->coulombtype) == eelPMESWITCH
|| (ir->coulombtype) == eelPMEUSER || (ir->coulombtype
) == eelPMEUSERSWITCH || (ir->coulombtype) == eelP3M_AD)
|| EVDW_PME(ir->vdwtype)((ir->vdwtype) == evdwPME))
7419 {
7420 init_ddpme(dd, &comm->ddpme[0], 0);
7421 if (comm->npmedecompdim >= 2)
7422 {
7423 init_ddpme(dd, &comm->ddpme[1], 1);
7424 }
7425 }
7426 else
7427 {
7428 comm->npmenodes = 0;
7429 if (dd->pme_nodeid >= 0)
7430 {
7431 gmx_fatal_collective(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
7431
, NULL((void*)0), dd,
7432 "Can not have separate PME nodes without PME electrostatics");
7433 }
7434 }
7435
7436 if (debug)
7437 {
7438 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
7439 }
7440 if (comm->eDLB != edlbNO)
7441 {
7442 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
7443 }
7444
7445 print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
7446 if (comm->eDLB == edlbAUTO)
7447 {
7448 if (fplog)
7449 {
7450 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
7451 }
7452 print_dd_settings(fplog, dd, ir, TRUE1, dlb_scale, ddbox);
7453 }
7454
7455 if (ir->ePBC == epbcNONE)
7456 {
7457 vol_frac = 1 - 1/(double)dd->nnodes;
7458 }
7459 else
7460 {
7461 vol_frac =
7462 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
7463 }
7464 if (debug)
7465 {
7466 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
7467 }
7468 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
7469
7470 dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
7471}
7472
7473static gmx_bool test_dd_cutoff(t_commrec *cr,
7474 t_state *state, t_inputrec *ir,
7475 real cutoff_req)
7476{
7477 gmx_domdec_t *dd;
7478 gmx_ddbox_t ddbox;
7479 int d, dim, np;
7480 real inv_cell_size;
7481 int LocallyLimited;
7482
7483 dd = cr->dd;
7484
7485 set_ddbox(dd, FALSE0, cr, ir, state->box,
7486 TRUE1, &dd->comm->cgs_gl, state->x, &ddbox);
7487
7488 LocallyLimited = 0;
7489
7490 for (d = 0; d < dd->ndim; d++)
7491 {
7492 dim = dd->dim[d];
7493
7494 inv_cell_size = DD_CELL_MARGIN1.0001*dd->nc[dim]/ddbox.box_size[dim];
7495 if (dynamic_dd_box(&ddbox, ir))
7496 {
7497 inv_cell_size *= DD_PRES_SCALE_MARGIN1.02;
7498 }
7499
7500 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
7501
7502 if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
7503 dd->comm->cd[d].np_dlb > 0)
7504 {
7505 if (np > dd->comm->cd[d].np_dlb)
7506 {
7507 return FALSE0;
7508 }
7509
7510 /* If a current local cell size is smaller than the requested
7511 * cut-off, we could still fix it, but this gets very complicated.
7512 * Without fixing here, we might actually need more checks.
7513 */
7514 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
7515 {
7516 LocallyLimited = 1;
7517 }
7518 }
7519 }
7520
7521 if (dd->comm->eDLB != edlbNO)
7522 {
7523 /* If DLB is not active yet, we don't need to check the grid jumps.
7524 * Actually we shouldn't, because then the grid jump data is not set.
7525 */
7526 if (dd->comm->bDynLoadBal &&
7527 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE0))
7528 {
7529 LocallyLimited = 1;
7530 }
7531
7532 gmx_sumi(1, &LocallyLimited, cr);
7533
7534 if (LocallyLimited > 0)
7535 {
7536 return FALSE0;
7537 }
7538 }
7539
7540 return TRUE1;
7541}
7542
7543gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, t_inputrec *ir,
7544 real cutoff_req)
7545{
7546 gmx_bool bCutoffAllowed;
7547
7548 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
7549
7550 if (bCutoffAllowed)
7551 {
7552 cr->dd->comm->cutoff = cutoff_req;
7553 }
7554
7555 return bCutoffAllowed;
7556}
7557
7558void change_dd_dlb_cutoff_limit(t_commrec *cr)
7559{
7560 gmx_domdec_comm_t *comm;
7561
7562 comm = cr->dd->comm;
7563
7564 /* Turn on the DLB limiting (might have been on already) */
7565 comm->bPMELoadBalDLBLimits = TRUE1;
7566
7567 /* Change the cut-off limit */
7568 comm->PMELoadBal_max_cutoff = comm->cutoff;
7569}
7570
7571static void merge_cg_buffers(int ncell,
7572 gmx_domdec_comm_dim_t *cd, int pulse,
7573 int *ncg_cell,
7574 int *index_gl, int *recv_i,
7575 rvec *cg_cm, rvec *recv_vr,
7576 int *cgindex,
7577 cginfo_mb_t *cginfo_mb, int *cginfo)
7578{
7579 gmx_domdec_ind_t *ind, *ind_p;
7580 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
7581 int shift, shift_at;
7582
7583 ind = &cd->ind[pulse];
7584
7585 /* First correct the already stored data */
7586 shift = ind->nrecv[ncell];
7587 for (cell = ncell-1; cell >= 0; cell--)
7588 {
7589 shift -= ind->nrecv[cell];
7590 if (shift > 0)
7591 {
7592 /* Move the cg's present from previous grid pulses */
7593 cg0 = ncg_cell[ncell+cell];
7594 cg1 = ncg_cell[ncell+cell+1];
7595 cgindex[cg1+shift] = cgindex[cg1];
7596 for (cg = cg1-1; cg >= cg0; cg--)
7597 {
7598 index_gl[cg+shift] = index_gl[cg];
7599 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
7600 cgindex[cg+shift] = cgindex[cg];
7601 cginfo[cg+shift] = cginfo[cg];
7602 }
7603 /* Correct the already stored send indices for the shift */
7604 for (p = 1; p <= pulse; p++)
7605 {
7606 ind_p = &cd->ind[p];
7607 cg0 = 0;
7608 for (c = 0; c < cell; c++)
7609 {
7610 cg0 += ind_p->nsend[c];
7611 }
7612 cg1 = cg0 + ind_p->nsend[cell];
7613 for (cg = cg0; cg < cg1; cg++)
7614 {
7615 ind_p->index[cg] += shift;
7616 }
7617 }
7618 }
7619 }
7620
7621 /* Merge in the communicated buffers */
7622 shift = 0;
7623 shift_at = 0;
7624 cg0 = 0;
7625 for (cell = 0; cell < ncell; cell++)
7626 {
7627 cg1 = ncg_cell[ncell+cell+1] + shift;
7628 if (shift_at > 0)
7629 {
7630 /* Correct the old cg indices */
7631 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
7632 {
7633 cgindex[cg+1] += shift_at;
7634 }
7635 }
7636 for (cg = 0; cg < ind->nrecv[cell]; cg++)
7637 {
7638 /* Copy this charge group from the buffer */
7639 index_gl[cg1] = recv_i[cg0];
7640 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
7641 /* Add it to the cgindex */
7642 cg_gl = index_gl[cg1];
7643 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
7644 nat = GET_CGINFO_NATOMS(cginfo[cg1])(((cginfo[cg1])>>25) & 63);
7645 cgindex[cg1+1] = cgindex[cg1] + nat;
7646 cg0++;
7647 cg1++;
7648 shift_at += nat;
7649 }
7650 shift += ind->nrecv[cell];
7651 ncg_cell[ncell+cell+1] = cg1;
7652 }
7653}
7654
7655static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7656 int nzone, int cg0, const int *cgindex)
7657{
7658 int cg, zone, p;
7659
7660 /* Store the atom block boundaries for easy copying of communication buffers
7661 */
7662 cg = cg0;
7663 for (zone = 0; zone < nzone; zone++)
7664 {
7665 for (p = 0; p < cd->np; p++)
7666 {
7667 cd->ind[p].cell2at0[zone] = cgindex[cg];
7668 cg += cd->ind[p].nrecv[zone];
7669 cd->ind[p].cell2at1[zone] = cgindex[cg];
7670 }
7671 }
7672}
7673
7674static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
7675{
7676 int i;
7677 gmx_bool bMiss;
7678
7679 bMiss = FALSE0;
7680 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
7681 {
7682 if (!bLocalCG[link->a[i]])
7683 {
7684 bMiss = TRUE1;
7685 }
7686 }
7687
7688 return bMiss;
7689}
7690
7691/* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7692typedef struct {
7693 real c[DIM3][4]; /* the corners for the non-bonded communication */
7694 real cr0; /* corner for rounding */
7695 real cr1[4]; /* corners for rounding */
7696 real bc[DIM3]; /* corners for bounded communication */
7697 real bcr1; /* corner for rounding for bonded communication */
7698} dd_corners_t;
7699
7700/* Determine the corners of the domain(s) we are communicating with */
7701static void
7702set_dd_corners(const gmx_domdec_t *dd,
7703 int dim0, int dim1, int dim2,
7704 gmx_bool bDistMB,
7705 dd_corners_t *c)
7706{
7707 const gmx_domdec_comm_t *comm;
7708 const gmx_domdec_zones_t *zones;
7709 int i, j;
7710
7711 comm = dd->comm;
7712
7713 zones = &comm->zones;
7714
7715 /* Keep the compiler happy */
7716 c->cr0 = 0;
7717 c->bcr1 = 0;
7718
7719 /* The first dimension is equal for all cells */
7720 c->c[0][0] = comm->cell_x0[dim0];
7721 if (bDistMB)
7722 {
7723 c->bc[0] = c->c[0][0];
7724 }
7725 if (dd->ndim >= 2)
7726 {
7727 dim1 = dd->dim[1];
7728 /* This cell row is only seen from the first row */
7729 c->c[1][0] = comm->cell_x0[dim1];
7730 /* All rows can see this row */
7731 c->c[1][1] = comm->cell_x0[dim1];
7732 if (dd->bGridJump)
7733 {
7734 c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0)(((comm->cell_x0[dim1]) > (comm->zone_d1[1].mch0)) ?
(comm->cell_x0[dim1]) : (comm->zone_d1[1].mch0) )
;
7735 if (bDistMB)
7736 {
7737 /* For the multi-body distance we need the maximum */
7738 c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0)(((comm->cell_x0[dim1]) > (comm->zone_d1[1].p1_0)) ?
(comm->cell_x0[dim1]) : (comm->zone_d1[1].p1_0) )
;
7739 }
7740 }
7741 /* Set the upper-right corner for rounding */
7742 c->cr0 = comm->cell_x1[dim0];
7743
7744 if (dd->ndim >= 3)
7745 {
7746 dim2 = dd->dim[2];
7747 for (j = 0; j < 4; j++)
7748 {
7749 c->c[2][j] = comm->cell_x0[dim2];
7750 }
7751 if (dd->bGridJump)
7752 {
7753 /* Use the maximum of the i-cells that see a j-cell */
7754 for (i = 0; i < zones->nizone; i++)
7755 {
7756 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
7757 {
7758 if (j >= 4)
7759 {
7760 c->c[2][j-4] =
7761 max(c->c[2][j-4],(((c->c[2][j-4]) > (comm->zone_d2[zones->shift[i]
[dim0]][zones->shift[i][dim1]].mch0)) ? (c->c[2][j-4]) :
(comm->zone_d2[zones->shift[i][dim0]][zones->shift[
i][dim1]].mch0) )
7762 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0)(((c->c[2][j-4]) > (comm->zone_d2[zones->shift[i]
[dim0]][zones->shift[i][dim1]].mch0)) ? (c->c[2][j-4]) :
(comm->zone_d2[zones->shift[i][dim0]][zones->shift[
i][dim1]].mch0) )
;
7763 }
7764 }
7765 }
7766 if (bDistMB)
7767 {
7768 /* For the multi-body distance we need the maximum */
7769 c->bc[2] = comm->cell_x0[dim2];
7770 for (i = 0; i < 2; i++)
7771 {
7772 for (j = 0; j < 2; j++)
7773 {
7774 c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0)(((c->bc[2]) > (comm->zone_d2[i][j].p1_0)) ? (c->
bc[2]) : (comm->zone_d2[i][j].p1_0) )
;
7775 }
7776 }
7777 }
7778 }
7779
7780 /* Set the upper-right corner for rounding */
7781 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7782 * Only cell (0,0,0) can see cell 7 (1,1,1)
7783 */
7784 c->cr1[0] = comm->cell_x1[dim1];
7785 c->cr1[3] = comm->cell_x1[dim1];
7786 if (dd->bGridJump)
7787 {
7788 c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1)(((comm->cell_x1[dim1]) > (comm->zone_d1[1].mch1)) ?
(comm->cell_x1[dim1]) : (comm->zone_d1[1].mch1) )
;
7789 if (bDistMB)
7790 {
7791 /* For the multi-body distance we need the maximum */
7792 c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1)(((comm->cell_x1[dim1]) > (comm->zone_d1[1].p1_1)) ?
(comm->cell_x1[dim1]) : (comm->zone_d1[1].p1_1) )
;
7793 }
7794 }
7795 }
7796 }
7797}
7798
7799/* Determine which cg's we need to send in this pulse from this zone */
7800static void
7801get_zone_pulse_cgs(gmx_domdec_t *dd,
7802 int zonei, int zone,
7803 int cg0, int cg1,
7804 const int *index_gl,
7805 const int *cgindex,
7806 int dim, int dim_ind,
7807 int dim0, int dim1, int dim2,
7808 real r_comm2, real r_bcomm2,
7809 matrix box,
7810 ivec tric_dist,
7811 rvec *normal,
7812 real skew_fac2_d, real skew_fac_01,
7813 rvec *v_d, rvec *v_0, rvec *v_1,
7814 const dd_corners_t *c,
7815 rvec sf2_round,
7816 gmx_bool bDistBonded,
7817 gmx_bool bBondComm,
7818 gmx_bool bDist2B,
7819 gmx_bool bDistMB,
7820 rvec *cg_cm,
7821 int *cginfo,
7822 gmx_domdec_ind_t *ind,
7823 int **ibuf, int *ibuf_nalloc,
7824 vec_rvec_t *vbuf,
7825 int *nsend_ptr,
7826 int *nat_ptr,
7827 int *nsend_z_ptr)
7828{
7829 gmx_domdec_comm_t *comm;
7830 gmx_bool bScrew;
7831 gmx_bool bDistMB_pulse;
7832 int cg, i;
7833 real r2, rb2, r, tric_sh;
7834 rvec rn, rb;
7835 int dimd;
7836 int nsend_z, nsend, nat;
7837
7838 comm = dd->comm;
7839
7840 bScrew = (dd->bScrewPBC && dim == XX0);
7841
7842 bDistMB_pulse = (bDistMB && bDistBonded);
7843
7844 nsend_z = 0;
7845 nsend = *nsend_ptr;
7846 nat = *nat_ptr;
7847
7848 for (cg = cg0; cg < cg1; cg++)
7849 {
7850 r2 = 0;
7851 rb2 = 0;
7852 if (tric_dist[dim_ind] == 0)
7853 {
7854 /* Rectangular direction, easy */
7855 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
7856 if (r > 0)
7857 {
7858 r2 += r*r;
7859 }
7860 if (bDistMB_pulse)
7861 {
7862 r = cg_cm[cg][dim] - c->bc[dim_ind];
7863 if (r > 0)
7864 {
7865 rb2 += r*r;
7866 }
7867 }
7868 /* Rounding gives at most a 16% reduction
7869 * in communicated atoms
7870 */
7871 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7872 {
7873 r = cg_cm[cg][dim0] - c->cr0;
7874 /* This is the first dimension, so always r >= 0 */
7875 r2 += r*r;
7876 if (bDistMB_pulse)
7877 {
7878 rb2 += r*r;
7879 }
7880 }
7881 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7882 {
7883 r = cg_cm[cg][dim1] - c->cr1[zone];
7884 if (r > 0)
7885 {
7886 r2 += r*r;
7887 }
7888 if (bDistMB_pulse)
7889 {
7890 r = cg_cm[cg][dim1] - c->bcr1;
7891 if (r > 0)
7892 {
7893 rb2 += r*r;
7894 }
7895 }
7896 }
7897 }
7898 else
7899 {
7900 /* Triclinic direction, more complicated */
7901 clear_rvec(rn);
7902 clear_rvec(rb);
7903 /* Rounding, conservative as the skew_fac multiplication
7904 * will slightly underestimate the distance.
7905 */
7906 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7907 {
7908 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
7909 for (i = dim0+1; i < DIM3; i++)
7910 {
7911 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7912 }
7913 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7914 if (bDistMB_pulse)
7915 {
7916 rb[dim0] = rn[dim0];
7917 rb2 = r2;
7918 }
7919 /* Take care that the cell planes along dim0 might not
7920 * be orthogonal to those along dim1 and dim2.
7921 */
7922 for (i = 1; i <= dim_ind; i++)
7923 {
7924 dimd = dd->dim[i];
7925 if (normal[dim0][dimd] > 0)
7926 {
7927 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7928 if (bDistMB_pulse)
7929 {
7930 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7931 }
7932 }
7933 }
7934 }
7935 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7936 {
7937 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
7938 tric_sh = 0;
7939 for (i = dim1+1; i < DIM3; i++)
7940 {
7941 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7942 }
7943 rn[dim1] += tric_sh;
7944 if (rn[dim1] > 0)
7945 {
7946 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7947 /* Take care of coupling of the distances
7948 * to the planes along dim0 and dim1 through dim2.
7949 */
7950 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7951 /* Take care that the cell planes along dim1
7952 * might not be orthogonal to that along dim2.
7953 */
7954 if (normal[dim1][dim2] > 0)
7955 {
7956 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7957 }
7958 }
7959 if (bDistMB_pulse)
7960 {
7961 rb[dim1] +=
7962 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
7963 if (rb[dim1] > 0)
7964 {
7965 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7966 /* Take care of coupling of the distances
7967 * to the planes along dim0 and dim1 through dim2.
7968 */
7969 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7970 /* Take care that the cell planes along dim1
7971 * might not be orthogonal to that along dim2.
7972 */
7973 if (normal[dim1][dim2] > 0)
7974 {
7975 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7976 }
7977 }
7978 }
7979 }
7980 /* The distance along the communication direction */
7981 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
7982 tric_sh = 0;
7983 for (i = dim+1; i < DIM3; i++)
7984 {
7985 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7986 }
7987 rn[dim] += tric_sh;
7988 if (rn[dim] > 0)
7989 {
7990 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7991 /* Take care of coupling of the distances
7992 * to the planes along dim0 and dim1 through dim2.
7993 */
7994 if (dim_ind == 1 && zonei == 1)
7995 {
7996 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
7997 }
7998 }
7999 if (bDistMB_pulse)
8000 {
8001 clear_rvec(rb);
8002 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
8003 if (rb[dim] > 0)
8004 {
8005 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
8006 /* Take care of coupling of the distances
8007 * to the planes along dim0 and dim1 through dim2.
8008 */
8009 if (dim_ind == 1 && zonei == 1)
8010 {
8011 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
8012 }
8013 }
8014 }
8015 }
8016
8017 if (r2 < r_comm2 ||
8018 (bDistBonded &&
8019 ((bDistMB && rb2 < r_bcomm2) ||
8020 (bDist2B && r2 < r_bcomm2)) &&
8021 (!bBondComm ||
8022 (GET_CGINFO_BOND_INTER(cginfo[cg])( (cginfo[cg]) & (1<<22)) &&
8023 missing_link(comm->cglink, index_gl[cg],
8024 comm->bLocalCG)))))
8025 {
8026 /* Make an index to the local charge groups */
8027 if (nsend+1 > ind->nalloc)
8028 {
8029 ind->nalloc = over_alloc_large(nsend+1)(int)(1.19*(nsend+1) + 1000);
8030 srenew(ind->index, ind->nalloc)(ind->index) = save_realloc("ind->index", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 8030, (ind->index), (ind->nalloc), sizeof(*(ind->index
)))
;
8031 }
8032 if (nsend+1 > *ibuf_nalloc)
8033 {
8034 *ibuf_nalloc = over_alloc_large(nsend+1)(int)(1.19*(nsend+1) + 1000);
8035 srenew(*ibuf, *ibuf_nalloc)(*ibuf) = save_realloc("*ibuf", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 8035, (*ibuf), (*ibuf_nalloc), sizeof(*(*ibuf)))
;
8036 }
8037 ind->index[nsend] = cg;
8038 (*ibuf)[nsend] = index_gl[cg];
8039 nsend_z++;
8040 vec_rvec_check_alloc(vbuf, nsend+1);
8041
8042 if (dd->ci[dim] == 0)
8043 {
8044 /* Correct cg_cm for pbc */
8045 rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
8046 if (bScrew)
8047 {
8048 vbuf->v[nsend][YY1] = box[YY1][YY1] - vbuf->v[nsend][YY1];
8049 vbuf->v[nsend][ZZ2] = box[ZZ2][ZZ2] - vbuf->v[nsend][ZZ2];
8050 }
8051 }
8052 else
8053 {
8054 copy_rvec(cg_cm[cg], vbuf->v[nsend]);
8055 }
8056 nsend++;
8057 nat += cgindex[cg+1] - cgindex[cg];
8058 }
8059 }
8060
8061 *nsend_ptr = nsend;
8062 *nat_ptr = nat;
8063 *nsend_z_ptr = nsend_z;
8064}
8065
8066static void setup_dd_communication(gmx_domdec_t *dd,
8067 matrix box, gmx_ddbox_t *ddbox,
8068 t_forcerec *fr, t_state *state, rvec **f)
8069{
8070 int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
8071 int nzone, nzone_send, zone, zonei, cg0, cg1;
8072 int c, i, j, cg, cg_gl, nrcg;
8073 int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
8074 gmx_domdec_comm_t *comm;
8075 gmx_domdec_zones_t *zones;
8076 gmx_domdec_comm_dim_t *cd;
8077 gmx_domdec_ind_t *ind;
8078 cginfo_mb_t *cginfo_mb;
8079 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
8080 real r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
8081 dd_corners_t corners;
8082 ivec tric_dist;
8083 rvec *cg_cm, *normal, *v_d, *v_0 = NULL((void*)0), *v_1 = NULL((void*)0), *recv_vr;
8084 real skew_fac2_d, skew_fac_01;
8085 rvec sf2_round;
8086 int nsend, nat;
8087 int th;
8088
8089 if (debug)
8090 {
8091 fprintf(debug, "Setting up DD communication\n");
8092 }
8093
8094 comm = dd->comm;
8095
8096 switch (fr->cutoff_scheme)
8097 {
8098 case ecutsGROUP:
8099 cg_cm = fr->cg_cm;
8100 break;
8101 case ecutsVERLET:
8102 cg_cm = state->x;
8103 break;
8104 default:
8105 gmx_incons("unimplemented")_gmx_error("incons", "unimplemented", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 8105)
;
8106 cg_cm = NULL((void*)0);
8107 }
8108
8109 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8110 {
8111 dim = dd->dim[dim_ind];
8112
8113 /* Check if we need to use triclinic distances */
8114 tric_dist[dim_ind] = 0;
8115 for (i = 0; i <= dim_ind; i++)
8116 {
8117 if (ddbox->tric_dir[dd->dim[i]])
8118 {
8119 tric_dist[dim_ind] = 1;
8120 }
8121 }
8122 }
8123
8124 bBondComm = comm->bBondComm;
8125
8126 /* Do we need to determine extra distances for multi-body bondeds? */
8127 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8128
8129 /* Do we need to determine extra distances for only two-body bondeds? */
8130 bDist2B = (bBondComm && !bDistMB);
8131
8132 r_comm2 = sqr(comm->cutoff);
8133 r_bcomm2 = sqr(comm->cutoff_mbody);
8134
8135 if (debug)
8136 {
8137 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, sqrt(r_bcomm2));
8138 }
8139
8140 zones = &comm->zones;
8141
8142 dim0 = dd->dim[0];
8143 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
8144 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
8145
8146 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
8147
8148 /* Triclinic stuff */
8149 normal = ddbox->normal;
8150 skew_fac_01 = 0;
8151 if (dd->ndim >= 2)
8152 {
8153 v_0 = ddbox->v[dim0];
8154 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
8155 {
8156 /* Determine the coupling coefficient for the distances
8157 * to the cell planes along dim0 and dim1 through dim2.
8158 * This is required for correct rounding.
8159 */
8160 skew_fac_01 =
8161 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
8162 if (debug)
8163 {
8164 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
8165 }
8166 }
8167 }
8168 if (dd->ndim >= 3)
8169 {
8170 v_1 = ddbox->v[dim1];
8171 }
8172
8173 zone_cg_range = zones->cg_range;
8174 index_gl = dd->index_gl;
8175 cgindex = dd->cgindex;
8176 cginfo_mb = fr->cginfo_mb;
8177
8178 zone_cg_range[0] = 0;
8179 zone_cg_range[1] = dd->ncg_home;
8180 comm->zone_ncg1[0] = dd->ncg_home;
8181 pos_cg = dd->ncg_home;
8182
8183 nat_tot = dd->nat_home;
8184 nzone = 1;
8185 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
8186 {
8187 dim = dd->dim[dim_ind];
8188 cd = &comm->cd[dim_ind];
8189
8190 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
8191 {
8192 /* No pbc in this dimension, the first node should not comm. */
8193 nzone_send = 0;
8194 }
8195 else
8196 {
8197 nzone_send = nzone;
8198 }
8199
8200 v_d = ddbox->v[dim];
8201 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
8202
8203 cd->bInPlace = TRUE1;
8204 for (p = 0; p < cd->np; p++)
8205 {
8206 /* Only atoms communicated in the first pulse are used
8207 * for multi-body bonded interactions or for bBondComm.
8208 */
8209 bDistBonded = ((bDistMB || bDist2B) && p == 0);
8210
8211 ind = &cd->ind[p];
8212 nsend = 0;
8213 nat = 0;
8214 for (zone = 0; zone < nzone_send; zone++)
8215 {
8216 if (tric_dist[dim_ind] && dim_ind > 0)
8217 {
8218 /* Determine slightly more optimized skew_fac's
8219 * for rounding.
8220 * This reduces the number of communicated atoms
8221 * by about 10% for 3D DD of rhombic dodecahedra.
8222 */
8223 for (dimd = 0; dimd < dim; dimd++)
8224 {
8225 sf2_round[dimd] = 1;
8226 if (ddbox->tric_dir[dimd])
8227 {
8228 for (i = dd->dim[dimd]+1; i < DIM3; i++)
8229 {
8230 /* If we are shifted in dimension i
8231 * and the cell plane is tilted forward
8232 * in dimension i, skip this coupling.
8233 */
8234 if (!(zones->shift[nzone+zone][i] &&
8235 ddbox->v[dimd][i][dimd] >= 0))
8236 {
8237 sf2_round[dimd] +=
8238 sqr(ddbox->v[dimd][i][dimd]);
8239 }
8240 }
8241 sf2_round[dimd] = 1/sf2_round[dimd];
8242 }
8243 }
8244 }
8245
8246 zonei = zone_perm[dim_ind][zone];
8247 if (p == 0)
8248 {
8249 /* Here we permutate the zones to obtain a convenient order
8250 * for neighbor searching
8251 */
8252 cg0 = zone_cg_range[zonei];
8253 cg1 = zone_cg_range[zonei+1];
8254 }
8255 else
8256 {
8257 /* Look only at the cg's received in the previous grid pulse
8258 */
8259 cg1 = zone_cg_range[nzone+zone+1];
8260 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
8261 }
8262
8263#pragma omp parallel for num_threads(comm->nth) schedule(static)
8264 for (th = 0; th < comm->nth; th++)
8265 {
8266 gmx_domdec_ind_t *ind_p;
8267 int **ibuf_p, *ibuf_nalloc_p;
8268 vec_rvec_t *vbuf_p;
8269 int *nsend_p, *nat_p;
8270 int *nsend_zone_p;
8271 int cg0_th, cg1_th;
8272
8273 if (th == 0)
8274 {
8275 /* Thread 0 writes in the comm buffers */
8276 ind_p = ind;
8277 ibuf_p = &comm->buf_int;
8278 ibuf_nalloc_p = &comm->nalloc_int;
8279 vbuf_p = &comm->vbuf;
8280 nsend_p = &nsend;
8281 nat_p = &nat;
8282 nsend_zone_p = &ind->nsend[zone];
8283 }
8284 else
8285 {
8286 /* Other threads write into temp buffers */
8287 ind_p = &comm->dth[th].ind;
8288 ibuf_p = &comm->dth[th].ibuf;
8289 ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
8290 vbuf_p = &comm->dth[th].vbuf;
8291 nsend_p = &comm->dth[th].nsend;
8292 nat_p = &comm->dth[th].nat;
8293 nsend_zone_p = &comm->dth[th].nsend_zone;
8294
8295 comm->dth[th].nsend = 0;
8296 comm->dth[th].nat = 0;
8297 comm->dth[th].nsend_zone = 0;
8298 }
8299
8300 if (comm->nth == 1)
8301 {
8302 cg0_th = cg0;
8303 cg1_th = cg1;
8304 }
8305 else
8306 {
8307 cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
8308 cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
8309 }
8310
8311 /* Get the cg's for this pulse in this zone */
8312 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
8313 index_gl, cgindex,
8314 dim, dim_ind, dim0, dim1, dim2,
8315 r_comm2, r_bcomm2,
8316 box, tric_dist,
8317 normal, skew_fac2_d, skew_fac_01,
8318 v_d, v_0, v_1, &corners, sf2_round,
8319 bDistBonded, bBondComm,
8320 bDist2B, bDistMB,
8321 cg_cm, fr->cginfo,
8322 ind_p,
8323 ibuf_p, ibuf_nalloc_p,
8324 vbuf_p,
8325 nsend_p, nat_p,
8326 nsend_zone_p);
8327 }
8328
8329 /* Append data of threads>=1 to the communication buffers */
8330 for (th = 1; th < comm->nth; th++)
8331 {
8332 dd_comm_setup_work_t *dth;
8333 int i, ns1;
8334
8335 dth = &comm->dth[th];
8336
8337 ns1 = nsend + dth->nsend_zone;
8338 if (ns1 > ind->nalloc)
8339 {
8340 ind->nalloc = over_alloc_dd(ns1);
8341 srenew(ind->index, ind->nalloc)(ind->index) = save_realloc("ind->index", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 8341, (ind->index), (ind->nalloc), sizeof(*(ind->index
)))
;
8342 }
8343 if (ns1 > comm->nalloc_int)
8344 {
8345 comm->nalloc_int = over_alloc_dd(ns1);
8346 srenew(comm->buf_int, comm->nalloc_int)(comm->buf_int) = save_realloc("comm->buf_int", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 8346, (comm->buf_int), (comm->nalloc_int), sizeof(*(comm
->buf_int)))
;
8347 }
8348 if (ns1 > comm->vbuf.nalloc)
8349 {
8350 comm->vbuf.nalloc = over_alloc_dd(ns1);
8351 srenew(comm->vbuf.v, comm->vbuf.nalloc)(comm->vbuf.v) = save_realloc("comm->vbuf.v", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 8351, (comm->vbuf.v), (comm->vbuf.nalloc), sizeof(*(comm
->vbuf.v)))
;
8352 }
8353
8354 for (i = 0; i < dth->nsend_zone; i++)
8355 {
8356 ind->index[nsend] = dth->ind.index[i];
8357 comm->buf_int[nsend] = dth->ibuf[i];
8358 copy_rvec(dth->vbuf.v[i],
8359 comm->vbuf.v[nsend]);
8360 nsend++;
8361 }
8362 nat += dth->nat;
8363 ind->nsend[zone] += dth->nsend_zone;
8364 }
8365 }
8366 /* Clear the counts in case we do not have pbc */
8367 for (zone = nzone_send; zone < nzone; zone++)
8368 {
8369 ind->nsend[zone] = 0;
8370 }
8371 ind->nsend[nzone] = nsend;
8372 ind->nsend[nzone+1] = nat;
8373 /* Communicate the number of cg's and atoms to receive */
8374 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8375 ind->nsend, nzone+2,
8376 ind->nrecv, nzone+2);
8377
8378 /* The rvec buffer is also required for atom buffers of size nsend
8379 * in dd_move_x and dd_move_f.
8380 */
8381 vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
8382
8383 if (p > 0)
8384 {
8385 /* We can receive in place if only the last zone is not empty */
8386 for (zone = 0; zone < nzone-1; zone++)
8387 {
8388 if (ind->nrecv[zone] > 0)
8389 {
8390 cd->bInPlace = FALSE0;
8391 }
8392 }
8393 if (!cd->bInPlace)
8394 {
8395 /* The int buffer is only required here for the cg indices */
8396 if (ind->nrecv[nzone] > comm->nalloc_int2)
8397 {
8398 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
8399 srenew(comm->buf_int2, comm->nalloc_int2)(comm->buf_int2) = save_realloc("comm->buf_int2", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 8399, (comm->buf_int2), (comm->nalloc_int2), sizeof(*
(comm->buf_int2)))
;
8400 }
8401 /* The rvec buffer is also required for atom buffers
8402 * of size nrecv in dd_move_x and dd_move_f.
8403 */
8404 i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1])(((cd->ind[0].nrecv[nzone+1]) > (ind->nrecv[nzone+1]
)) ? (cd->ind[0].nrecv[nzone+1]) : (ind->nrecv[nzone+1]
) )
;
8405 vec_rvec_check_alloc(&comm->vbuf2, i);
8406 }
8407 }
8408
8409 /* Make space for the global cg indices */
8410 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
8411 || dd->cg_nalloc == 0)
8412 {
8413 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
8414 srenew(index_gl, dd->cg_nalloc)(index_gl) = save_realloc("index_gl", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 8414, (index_gl), (dd->cg_nalloc), sizeof(*(index_gl)))
;
8415 srenew(cgindex, dd->cg_nalloc+1)(cgindex) = save_realloc("cgindex", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 8415, (cgindex), (dd->cg_nalloc+1), sizeof(*(cgindex)))
;
8416 }
8417 /* Communicate the global cg indices */
8418 if (cd->bInPlace)
8419 {
8420 recv_i = index_gl + pos_cg;
8421 }
8422 else
8423 {
8424 recv_i = comm->buf_int2;
8425 }
8426 dd_sendrecv_int(dd, dim_ind, dddirBackward,
8427 comm->buf_int, nsend,
8428 recv_i, ind->nrecv[nzone]);
8429
8430 /* Make space for cg_cm */
8431 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
8432 if (fr->cutoff_scheme == ecutsGROUP)
8433 {
8434 cg_cm = fr->cg_cm;
8435 }
8436 else
8437 {
8438 cg_cm = state->x;
8439 }
8440 /* Communicate cg_cm */
8441 if (cd->bInPlace)
8442 {
8443 recv_vr = cg_cm + pos_cg;
8444 }
8445 else
8446 {
8447 recv_vr = comm->vbuf2.v;
8448 }
8449 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
8450 comm->vbuf.v, nsend,
8451 recv_vr, ind->nrecv[nzone]);
8452
8453 /* Make the charge group index */
8454 if (cd->bInPlace)
8455 {
8456 zone = (p == 0 ? 0 : nzone - 1);
8457 while (zone < nzone)
8458 {
8459 for (cg = 0; cg < ind->nrecv[zone]; cg++)
8460 {
8461 cg_gl = index_gl[pos_cg];
8462 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
8463 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg])(((fr->cginfo[pos_cg])>>25) & 63);
8464 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
8465 if (bBondComm)
8466 {
8467 /* Update the charge group presence,
8468 * so we can use it in the next pass of the loop.
8469 */
8470 comm->bLocalCG[cg_gl] = TRUE1;
8471 }
8472 pos_cg++;
8473 }
8474 if (p == 0)
8475 {
8476 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
8477 }
8478 zone++;
8479 zone_cg_range[nzone+zone] = pos_cg;
8480 }
8481 }
8482 else
8483 {
8484 /* This part of the code is never executed with bBondComm. */
8485 merge_cg_buffers(nzone, cd, p, zone_cg_range,
8486 index_gl, recv_i, cg_cm, recv_vr,
8487 cgindex, fr->cginfo_mb, fr->cginfo);
8488 pos_cg += ind->nrecv[nzone];
8489 }
8490 nat_tot += ind->nrecv[nzone+1];
8491 }
8492 if (!cd->bInPlace)
8493 {
8494 /* Store the atom block for easy copying of communication buffers */
8495 make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
8496 }
8497 nzone += nzone;
8498 }
8499 dd->index_gl = index_gl;
8500 dd->cgindex = cgindex;
8501
8502 dd->ncg_tot = zone_cg_range[zones->n];
8503 dd->nat_tot = nat_tot;
8504 comm->nat[ddnatHOME] = dd->nat_home;
8505 for (i = ddnatZONE; i < ddnatNR; i++)
8506 {
8507 comm->nat[i] = dd->nat_tot;
8508 }
8509
8510 if (!bBondComm)
8511 {
8512 /* We don't need to update cginfo, since that was alrady done above.
8513 * So we pass NULL for the forcerec.
8514 */
8515 dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
8516 NULL((void*)0), comm->bLocalCG);
8517 }
8518
8519 if (debug)
8520 {
8521 fprintf(debug, "Finished setting up DD communication, zones:");
8522 for (c = 0; c < zones->n; c++)
8523 {
8524 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
8525 }
8526 fprintf(debug, "\n");
8527 }
8528}
8529
8530static void set_cg_boundaries(gmx_domdec_zones_t *zones)
8531{
8532 int c;
8533
8534 for (c = 0; c < zones->nizone; c++)
8535 {
8536 zones->izone[c].cg1 = zones->cg_range[c+1];
8537 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
8538 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
8539 }
8540}
8541
8542static void set_zones_size(gmx_domdec_t *dd,
8543 matrix box, const gmx_ddbox_t *ddbox,
8544 int zone_start, int zone_end)
8545{
8546 gmx_domdec_comm_t *comm;
8547 gmx_domdec_zones_t *zones;
8548 gmx_bool bDistMB;
8549 int z, zi, zj0, zj1, d, dim;
8550 real rcs, rcmbs;
8551 int i, j;
8552 real size_j, add_tric;
8553 real vol;
8554
8555 comm = dd->comm;
8556
8557 zones = &comm->zones;
8558
8559 /* Do we need to determine extra distances for multi-body bondeds? */
8560 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
8561
8562 for (z = zone_start; z < zone_end; z++)
8563 {
8564 /* Copy cell limits to zone limits.
8565 * Valid for non-DD dims and non-shifted dims.
8566 */
8567 copy_rvec(comm->cell_x0, zones->size[z].x0);
8568 copy_rvec(comm->cell_x1, zones->size[z].x1);
8569 }
8570
8571 for (d = 0; d < dd->ndim; d++)
8572 {
8573 dim = dd->dim[d];
8574
8575 for (z = 0; z < zones->n; z++)
8576 {
8577 /* With a staggered grid we have different sizes
8578 * for non-shifted dimensions.
8579 */
8580 if (dd->bGridJump && zones->shift[z][dim] == 0)
8581 {
8582 if (d == 1)
8583 {
8584 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
8585 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
8586 }
8587 else if (d == 2)
8588 {
8589 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
8590 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
8591 }
8592 }
8593 }
8594
8595 rcs = comm->cutoff;
8596 rcmbs = comm->cutoff_mbody;
8597 if (ddbox->tric_dir[dim])
8598 {
8599 rcs /= ddbox->skew_fac[dim];
8600 rcmbs /= ddbox->skew_fac[dim];
8601 }
8602
8603 /* Set the lower limit for the shifted zone dimensions */
8604 for (z = zone_start; z < zone_end; z++)
8605 {
8606 if (zones->shift[z][dim] > 0)
8607 {
8608 dim = dd->dim[d];
8609 if (!dd->bGridJump || d == 0)
8610 {
8611 zones->size[z].x0[dim] = comm->cell_x1[dim];
8612 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
8613 }
8614 else
8615 {
8616 /* Here we take the lower limit of the zone from
8617 * the lowest domain of the zone below.
8618 */
8619 if (z < 4)
8620 {
8621 zones->size[z].x0[dim] =
8622 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
8623 }
8624 else
8625 {
8626 if (d == 1)
8627 {
8628 zones->size[z].x0[dim] =
8629 zones->size[zone_perm[2][z-4]].x0[dim];
8630 }
8631 else
8632 {
8633 zones->size[z].x0[dim] =
8634 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
8635 }
8636 }
8637 /* A temporary limit, is updated below */
8638 zones->size[z].x1[dim] = zones->size[z].x0[dim];
8639
8640 if (bDistMB)
8641 {
8642 for (zi = 0; zi < zones->nizone; zi++)
8643 {
8644 if (zones->shift[zi][dim] == 0)
8645 {
8646 /* This takes the whole zone into account.
8647 * With multiple pulses this will lead
8648 * to a larger zone then strictly necessary.
8649 */
8650 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],(((zones->size[z].x1[dim]) > (zones->size[zi].x1[dim
]+rcmbs)) ? (zones->size[z].x1[dim]) : (zones->size[zi]
.x1[dim]+rcmbs) )
8651 zones->size[zi].x1[dim]+rcmbs)(((zones->size[z].x1[dim]) > (zones->size[zi].x1[dim
]+rcmbs)) ? (zones->size[z].x1[dim]) : (zones->size[zi]
.x1[dim]+rcmbs) )
;
8652 }
8653 }
8654 }
8655 }
8656 }
8657 }
8658
8659 /* Loop over the i-zones to set the upper limit of each
8660 * j-zone they see.
8661 */
8662 for (zi = 0; zi < zones->nizone; zi++)
8663 {
8664 if (zones->shift[zi][dim] == 0)
8665 {
8666 for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
8667 {
8668 if (zones->shift[z][dim] > 0)
8669 {
8670 zones->size[z].x1[dim] = max(zones->size[z].x1[dim],(((zones->size[z].x1[dim]) > (zones->size[zi].x1[dim
]+rcs)) ? (zones->size[z].x1[dim]) : (zones->size[zi].x1
[dim]+rcs) )
8671 zones->size[zi].x1[dim]+rcs)(((zones->size[z].x1[dim]) > (zones->size[zi].x1[dim
]+rcs)) ? (zones->size[z].x1[dim]) : (zones->size[zi].x1
[dim]+rcs) )
;
8672 }
8673 }
8674 }
8675 }
8676 }
8677
8678 for (z = zone_start; z < zone_end; z++)
8679 {
8680 /* Initialization only required to keep the compiler happy */
8681 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
8682 int nc, c;
8683
8684 /* To determine the bounding box for a zone we need to find
8685 * the extreme corners of 4, 2 or 1 corners.
8686 */
8687 nc = 1 << (ddbox->npbcdim - 1);
8688
8689 for (c = 0; c < nc; c++)
8690 {
8691 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8692 corner[XX0] = 0;
8693 if ((c & 1) == 0)
8694 {
8695 corner[YY1] = zones->size[z].x0[YY1];
8696 }
8697 else
8698 {
8699 corner[YY1] = zones->size[z].x1[YY1];
8700 }
8701 if ((c & 2) == 0)
8702 {
8703 corner[ZZ2] = zones->size[z].x0[ZZ2];
8704 }
8705 else
8706 {
8707 corner[ZZ2] = zones->size[z].x1[ZZ2];
8708 }
8709 if (dd->ndim == 1 && box[ZZ2][YY1] != 0)
8710 {
8711 /* With 1D domain decomposition the cg's are not in
8712 * the triclinic box, but triclinic x-y and rectangular y-z.
8713 * Shift y back, so it will later end up at 0.
8714 */
8715 corner[YY1] -= corner[ZZ2]*box[ZZ2][YY1]/box[ZZ2][ZZ2];
8716 }
8717 /* Apply the triclinic couplings */
8718 assert(ddbox->npbcdim <= DIM)((void) (0));
8719 for (i = YY1; i < ddbox->npbcdim; i++)
8720 {
8721 for (j = XX0; j < i; j++)
8722 {
8723 corner[j] += corner[i]*box[i][j]/box[i][i];
8724 }
8725 }
8726 if (c == 0)
8727 {
8728 copy_rvec(corner, corner_min);
8729 copy_rvec(corner, corner_max);
8730 }
8731 else
8732 {
8733 for (i = 0; i < DIM3; i++)
8734 {
8735 corner_min[i] = min(corner_min[i], corner[i])(((corner_min[i]) < (corner[i])) ? (corner_min[i]) : (corner
[i]) )
;
8736 corner_max[i] = max(corner_max[i], corner[i])(((corner_max[i]) > (corner[i])) ? (corner_max[i]) : (corner
[i]) )
;
8737 }
8738 }
8739 }
8740 /* Copy the extreme cornes without offset along x */
8741 for (i = 0; i < DIM3; i++)
8742 {
8743 zones->size[z].bb_x0[i] = corner_min[i];
8744 zones->size[z].bb_x1[i] = corner_max[i];
8745 }
8746 /* Add the offset along x */
8747 zones->size[z].bb_x0[XX0] += zones->size[z].x0[XX0];
8748 zones->size[z].bb_x1[XX0] += zones->size[z].x1[XX0];
8749 }
8750
8751 if (zone_start == 0)
8752 {
8753 vol = 1;
8754 for (dim = 0; dim < DIM3; dim++)
8755 {
8756 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
8757 }
8758 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
8759 }
8760
8761 if (debug)
8762 {
8763 for (z = zone_start; z < zone_end; z++)
8764 {
8765 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8766 z,
8767 zones->size[z].x0[XX0], zones->size[z].x1[XX0],
8768 zones->size[z].x0[YY1], zones->size[z].x1[YY1],
8769 zones->size[z].x0[ZZ2], zones->size[z].x1[ZZ2]);
8770 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8771 z,
8772 zones->size[z].bb_x0[XX0], zones->size[z].bb_x1[XX0],
8773 zones->size[z].bb_x0[YY1], zones->size[z].bb_x1[YY1],
8774 zones->size[z].bb_x0[ZZ2], zones->size[z].bb_x1[ZZ2]);
8775 }
8776 }
8777}
8778
8779static int comp_cgsort(const void *a, const void *b)
8780{
8781 int comp;
8782
8783 gmx_cgsort_t *cga, *cgb;
8784 cga = (gmx_cgsort_t *)a;
8785 cgb = (gmx_cgsort_t *)b;
8786
8787 comp = cga->nsc - cgb->nsc;
8788 if (comp == 0)
8789 {
8790 comp = cga->ind_gl - cgb->ind_gl;
8791 }
8792
8793 return comp;
8794}
8795
8796static void order_int_cg(int n, const gmx_cgsort_t *sort,
8797 int *a, int *buf)
8798{
8799 int i;
8800
8801 /* Order the data */
8802 for (i = 0; i < n; i++)
8803 {
8804 buf[i] = a[sort[i].ind];
8805 }
8806
8807 /* Copy back to the original array */
8808 for (i = 0; i < n; i++)
8809 {
8810 a[i] = buf[i];
8811 }
8812}
8813
8814static void order_vec_cg(int n, const gmx_cgsort_t *sort,
8815 rvec *v, rvec *buf)
8816{
8817 int i;
8818
8819 /* Order the data */
8820 for (i = 0; i < n; i++)
8821 {
8822 copy_rvec(v[sort[i].ind], buf[i]);
8823 }
8824
8825 /* Copy back to the original array */
8826 for (i = 0; i < n; i++)
8827 {
8828 copy_rvec(buf[i], v[i]);
8829 }
8830}
8831
8832static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
8833 rvec *v, rvec *buf)
8834{
8835 int a, atot, cg, cg0, cg1, i;
8836
8837 if (cgindex == NULL((void*)0))
8838 {
8839 /* Avoid the useless loop of the atoms within a cg */
8840 order_vec_cg(ncg, sort, v, buf);
8841
8842 return;
8843 }
8844
8845 /* Order the data */
8846 a = 0;
8847 for (cg = 0; cg < ncg; cg++)
8848 {
8849 cg0 = cgindex[sort[cg].ind];
8850 cg1 = cgindex[sort[cg].ind+1];
8851 for (i = cg0; i < cg1; i++)
8852 {
8853 copy_rvec(v[i], buf[a]);
8854 a++;
8855 }
8856 }
8857 atot = a;
8858
8859 /* Copy back to the original array */
8860 for (a = 0; a < atot; a++)
8861 {
8862 copy_rvec(buf[a], v[a]);
8863 }
8864}
8865
8866static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
8867 int nsort_new, gmx_cgsort_t *sort_new,
8868 gmx_cgsort_t *sort1)
8869{
8870 int i1, i2, i_new;
8871
8872 /* The new indices are not very ordered, so we qsort them */
8873 gmx_qsort_threadsafegmx_qsort(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
8874
8875 /* sort2 is already ordered, so now we can merge the two arrays */
8876 i1 = 0;
8877 i2 = 0;
8878 i_new = 0;
8879 while (i2 < nsort2 || i_new < nsort_new)
8880 {
8881 if (i2 == nsort2)
8882 {
8883 sort1[i1++] = sort_new[i_new++];
8884 }
8885 else if (i_new == nsort_new)
8886 {
8887 sort1[i1++] = sort2[i2++];
8888 }
8889 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
8890 (sort2[i2].nsc == sort_new[i_new].nsc &&
8891 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
8892 {
8893 sort1[i1++] = sort2[i2++];
8894 }
8895 else
8896 {
8897 sort1[i1++] = sort_new[i_new++];
8898 }
8899 }
8900}
8901
8902static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
8903{
8904 gmx_domdec_sort_t *sort;
8905 gmx_cgsort_t *cgsort, *sort_i;
8906 int ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
8907 int sort_last, sort_skip;
8908
8909 sort = dd->comm->sort;
8910
8911 a = fr->ns.grid->cell_index;
8912
8913 moved = NSGRID_SIGNAL_MOVED_FAC4*fr->ns.grid->ncells;
8914
8915 if (ncg_home_old >= 0)
8916 {
8917 /* The charge groups that remained in the same ns grid cell
8918 * are completely ordered. So we can sort efficiently by sorting
8919 * the charge groups that did move into the stationary list.
8920 */
8921 ncg_new = 0;
8922 nsort2 = 0;
8923 nsort_new = 0;
8924 for (i = 0; i < dd->ncg_home; i++)
8925 {
8926 /* Check if this cg did not move to another node */
8927 if (a[i] < moved)
8928 {
8929 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
8930 {
8931 /* This cg is new on this node or moved ns grid cell */
8932 if (nsort_new >= sort->sort_new_nalloc)
8933 {
8934 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
8935 srenew(sort->sort_new, sort->sort_new_nalloc)(sort->sort_new) = save_realloc("sort->sort_new", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 8935, (sort->sort_new), (sort->sort_new_nalloc), sizeof
(*(sort->sort_new)))
;
8936 }
8937 sort_i = &(sort->sort_new[nsort_new++]);
8938 }
8939 else
8940 {
8941 /* This cg did not move */
8942 sort_i = &(sort->sort2[nsort2++]);
8943 }
8944 /* Sort on the ns grid cell indices
8945 * and the global topology index.
8946 * index_gl is irrelevant with cell ns,
8947 * but we set it here anyhow to avoid a conditional.
8948 */
8949 sort_i->nsc = a[i];
8950 sort_i->ind_gl = dd->index_gl[i];
8951 sort_i->ind = i;
8952 ncg_new++;
8953 }
8954 }
8955 if (debug)
8956 {
8957 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
8958 nsort2, nsort_new);
8959 }
8960 /* Sort efficiently */
8961 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
8962 sort->sort);
8963 }
8964 else
8965 {
8966 cgsort = sort->sort;
8967 ncg_new = 0;
8968 for (i = 0; i < dd->ncg_home; i++)
8969 {
8970 /* Sort on the ns grid cell indices
8971 * and the global topology index
8972 */
8973 cgsort[i].nsc = a[i];
8974 cgsort[i].ind_gl = dd->index_gl[i];
8975 cgsort[i].ind = i;
8976 if (cgsort[i].nsc < moved)
8977 {
8978 ncg_new++;
8979 }
8980 }
8981 if (debug)
8982 {
8983 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
8984 }
8985 /* Determine the order of the charge groups using qsort */
8986 gmx_qsort_threadsafegmx_qsort(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
8987 }
8988
8989 return ncg_new;
8990}
8991
8992static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
8993{
8994 gmx_cgsort_t *sort;
8995 int ncg_new, i, *a, na;
8996
8997 sort = dd->comm->sort->sort;
8998
8999 nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
9000
9001 ncg_new = 0;
9002 for (i = 0; i < na; i++)
9003 {
9004 if (a[i] >= 0)
9005 {
9006 sort[ncg_new].ind = a[i];
9007 ncg_new++;
9008 }
9009 }
9010
9011 return ncg_new;
9012}
9013
9014static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
9015 int ncg_home_old)
9016{
9017 gmx_domdec_sort_t *sort;
9018 gmx_cgsort_t *cgsort, *sort_i;
9019 int *cgindex;
9020 int ncg_new, i, *ibuf, cgsize;
9021 rvec *vbuf;
9022
9023 sort = dd->comm->sort;
9024
9025 if (dd->ncg_home > sort->sort_nalloc)
9026 {
9027 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
9028 srenew(sort->sort, sort->sort_nalloc)(sort->sort) = save_realloc("sort->sort", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 9028, (sort->sort), (sort->sort_nalloc), sizeof(*(sort
->sort)))
;
9029 srenew(sort->sort2, sort->sort_nalloc)(sort->sort2) = save_realloc("sort->sort2", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 9029, (sort->sort2), (sort->sort_nalloc), sizeof(*(sort
->sort2)))
;
9030 }
9031 cgsort = sort->sort;
9032
9033 switch (fr->cutoff_scheme)
9034 {
9035 case ecutsGROUP:
9036 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
9037 break;
9038 case ecutsVERLET:
9039 ncg_new = dd_sort_order_nbnxn(dd, fr);
9040 break;
9041 default:
9042 gmx_incons("unimplemented")_gmx_error("incons", "unimplemented", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 9042)
;
9043 ncg_new = 0;
9044 }
9045
9046 /* We alloc with the old size, since cgindex is still old */
9047 vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
9048 vbuf = dd->comm->vbuf.v;
9049
9050 if (dd->comm->bCGs)
9051 {
9052 cgindex = dd->cgindex;
9053 }
9054 else
9055 {
9056 cgindex = NULL((void*)0);
9057 }
9058
9059 /* Remove the charge groups which are no longer at home here */
9060 dd->ncg_home = ncg_new;
9061 if (debug)
9062 {
9063 fprintf(debug, "Set the new home charge group count to %d\n",
9064 dd->ncg_home);
9065 }
9066
9067 /* Reorder the state */
9068 for (i = 0; i < estNR; i++)
9069 {
9070 if (EST_DISTR(i)(!(((i) >= estLAMBDA && (i) <= estTC_INT) || ((
i) >= estSVIR_PREV && (i) <= estMC_RNGI)))
&& (state->flags & (1<<i)))
9071 {
9072 switch (i)
9073 {
9074 case estX:
9075 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
9076 break;
9077 case estV:
9078 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
9079 break;
9080 case estSDX:
9081 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->sd_X, vbuf);
9082 break;
9083 case estCGP:
9084 order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
9085 break;
9086 case estLD_RNG:
9087 case estLD_RNGI:
9088 case estDISRE_INITF:
9089 case estDISRE_RM3TAV:
9090 case estORIRE_INITF:
9091 case estORIRE_DTAV:
9092 /* No ordering required */
9093 break;
9094 default:
9095 gmx_incons("Unknown state entry encountered in dd_sort_state")_gmx_error("incons", "Unknown state entry encountered in dd_sort_state"
, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c", 9095
)
;
9096 break;
9097 }
9098 }
9099 }
9100 if (fr->cutoff_scheme == ecutsGROUP)
9101 {
9102 /* Reorder cgcm */
9103 order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
9104 }
9105
9106 if (dd->ncg_home+1 > sort->ibuf_nalloc)
9107 {
9108 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
9109 srenew(sort->ibuf, sort->ibuf_nalloc)(sort->ibuf) = save_realloc("sort->ibuf", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 9109, (sort->ibuf), (sort->ibuf_nalloc), sizeof(*(sort
->ibuf)))
;
9110 }
9111 ibuf = sort->ibuf;
9112 /* Reorder the global cg index */
9113 order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
9114 /* Reorder the cginfo */
9115 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
9116 /* Rebuild the local cg index */
9117 if (dd->comm->bCGs)
9118 {
9119 ibuf[0] = 0;
9120 for (i = 0; i < dd->ncg_home; i++)
9121 {
9122 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
9123 ibuf[i+1] = ibuf[i] + cgsize;
9124 }
9125 for (i = 0; i < dd->ncg_home+1; i++)
9126 {
9127 dd->cgindex[i] = ibuf[i];
9128 }
9129 }
9130 else
9131 {
9132 for (i = 0; i < dd->ncg_home+1; i++)
9133 {
9134 dd->cgindex[i] = i;
9135 }
9136 }
9137 /* Set the home atom number */
9138 dd->nat_home = dd->cgindex[dd->ncg_home];
9139
9140 if (fr->cutoff_scheme == ecutsVERLET)
9141 {
9142 /* The atoms are now exactly in grid order, update the grid order */
9143 nbnxn_set_atomorder(fr->nbv->nbs);
9144 }
9145 else
9146 {
9147 /* Copy the sorted ns cell indices back to the ns grid struct */
9148 for (i = 0; i < dd->ncg_home; i++)
9149 {
9150 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
9151 }
9152 fr->ns.grid->nr = dd->ncg_home;
9153 }
9154}
9155
9156static void add_dd_statistics(gmx_domdec_t *dd)
9157{
9158 gmx_domdec_comm_t *comm;
9159 int ddnat;
9160
9161 comm = dd->comm;
9162
9163 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9164 {
9165 comm->sum_nat[ddnat-ddnatZONE] +=
9166 comm->nat[ddnat] - comm->nat[ddnat-1];
9167 }
9168 comm->ndecomp++;
9169}
9170
9171void reset_dd_statistics_counters(gmx_domdec_t *dd)
9172{
9173 gmx_domdec_comm_t *comm;
9174 int ddnat;
9175
9176 comm = dd->comm;
9177
9178 /* Reset all the statistics and counters for total run counting */
9179 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9180 {
9181 comm->sum_nat[ddnat-ddnatZONE] = 0;
9182 }
9183 comm->ndecomp = 0;
9184 comm->nload = 0;
9185 comm->load_step = 0;
9186 comm->load_sum = 0;
9187 comm->load_max = 0;
9188 clear_ivec(comm->load_lim);
9189 comm->load_mdf = 0;
9190 comm->load_pme = 0;
9191}
9192
9193void print_dd_statistics(t_commrec *cr, t_inputrec *ir, FILE *fplog)
9194{
9195 gmx_domdec_comm_t *comm;
9196 int ddnat;
9197 double av;
9198
9199 comm = cr->dd->comm;
9200
9201 gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
9202
9203 if (fplog == NULL((void*)0))
9204 {
9205 return;
9206 }
9207
9208 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
9209
9210 for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
9211 {
9212 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
9213 switch (ddnat)
9214 {
9215 case ddnatZONE:
9216 fprintf(fplog,
9217 " av. #atoms communicated per step for force: %d x %.1f\n",
9218 2, av);
9219 break;
9220 case ddnatVSITE:
9221 if (cr->dd->vsite_comm)
9222 {
9223 fprintf(fplog,
9224 " av. #atoms communicated per step for vsites: %d x %.1f\n",
9225 (EEL_PME(ir->coulombtype)((ir->coulombtype) == eelPME || (ir->coulombtype) == eelPMESWITCH
|| (ir->coulombtype) == eelPMEUSER || (ir->coulombtype
) == eelPMEUSERSWITCH || (ir->coulombtype) == eelP3M_AD)
|| ir->coulombtype == eelEWALD) ? 3 : 2,
9226 av);
9227 }
9228 break;
9229 case ddnatCON:
9230 if (cr->dd->constraint_comm)
9231 {
9232 fprintf(fplog,
9233 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9234 1 + ir->nLincsIter, av);
9235 }
9236 break;
9237 default:
9238 gmx_incons(" Unknown type for DD statistics")_gmx_error("incons", " Unknown type for DD statistics", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 9238)
;
9239 }
9240 }
9241 fprintf(fplog, "\n");
9242
9243 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI)(((ir->eI) == eiMD || ((ir->eI) == eiVV || (ir->eI) ==
eiVVAK)) || ((ir->eI) == eiSD1 || (ir->eI) == eiSD2) ||
(ir->eI) == eiBD)
)
9244 {
9245 print_dd_load_av(fplog, cr->dd);
9246 }
9247}
9248
9249void dd_partition_system(FILE *fplog,
9250 gmx_int64_t step,
9251 t_commrec *cr,
9252 gmx_bool bMasterState,
9253 int nstglobalcomm,
9254 t_state *state_global,
9255 gmx_mtop_t *top_global,
9256 t_inputrec *ir,
9257 t_state *state_local,
9258 rvec **f,
9259 t_mdatoms *mdatoms,
9260 gmx_localtop_t *top_local,
9261 t_forcerec *fr,
9262 gmx_vsite_t *vsite,
9263 gmx_shellfc_t shellfc,
9264 gmx_constr_t constr,
9265 t_nrnb *nrnb,
9266 gmx_wallcycle_t wcycle,
9267 gmx_bool bVerbose)
9268{
9269 gmx_domdec_t *dd;
9270 gmx_domdec_comm_t *comm;
9271 gmx_ddbox_t ddbox = {0};
9272 t_block *cgs_gl;
9273 gmx_int64_t step_pcoupl;
9274 rvec cell_ns_x0, cell_ns_x1;
9275 int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
9276 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
9277 gmx_bool bRedist, bSortCG, bResortAll;
9278 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
9279 real grid_density;
9280 char sbuf[22];
9281
9282 dd = cr->dd;
9283 comm = dd->comm;
9284
9285 bBoxChanged = (bMasterState || DEFORM(*ir)((*ir).deform[0][0] != 0 || (*ir).deform[1][1] != 0 || (*ir).
deform[2][2] != 0 || (*ir).deform[1][0] != 0 || (*ir).deform[
2][0] != 0 || (*ir).deform[2][1] != 0)
);
9286 if (ir->epc != epcNO)
9287 {
9288 /* With nstpcouple > 1 pressure coupling happens.
9289 * one step after calculating the pressure.
9290 * Box scaling happens at the end of the MD step,
9291 * after the DD partitioning.
9292 * We therefore have to do DLB in the first partitioning
9293 * after an MD step where P-coupling occured.
9294 * We need to determine the last step in which p-coupling occurred.
9295 * MRS -- need to validate this for vv?
9296 */
9297 n = ir->nstpcouple;
9298 if (n == 1)
9299 {
9300 step_pcoupl = step - 1;
9301 }
9302 else
9303 {
9304 step_pcoupl = ((step - 1)/n)*n + 1;
9305 }
9306 if (step_pcoupl >= comm->partition_step)
9307 {
9308 bBoxChanged = TRUE1;
9309 }
9310 }
9311
9312 bNStGlobalComm = (step % nstglobalcomm == 0);
9313
9314 if (!comm->bDynLoadBal)
9315 {
9316 bDoDLB = FALSE0;
9317 }
9318 else
9319 {
9320 /* Should we do dynamic load balacing this step?
9321 * Since it requires (possibly expensive) global communication,
9322 * we might want to do DLB less frequently.
9323 */
9324 if (bBoxChanged || ir->epc != epcNO)
9325 {
9326 bDoDLB = bBoxChanged;
9327 }
9328 else
9329 {
9330 bDoDLB = bNStGlobalComm;
9331 }
9332 }
9333
9334 /* Check if we have recorded loads on the nodes */
9335 if (comm->bRecordLoad && dd_load_count(comm))
9336 {
9337 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
9338 {
9339 /* Check if we should use DLB at the second partitioning
9340 * and every 100 partitionings,
9341 * so the extra communication cost is negligible.
9342 */
9343 n = max(100, nstglobalcomm)(((100) > (nstglobalcomm)) ? (100) : (nstglobalcomm) );
9344 bCheckDLB = (comm->n_load_collect == 0 ||
9345 comm->n_load_have % n == n-1);
9346 }
9347 else
9348 {
9349 bCheckDLB = FALSE0;
9350 }
9351
9352 /* Print load every nstlog, first and last step to the log file */
9353 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
9354 comm->n_load_collect == 0 ||
9355 (ir->nsteps >= 0 &&
9356 (step + ir->nstlist > ir->init_step + ir->nsteps)));
9357
9358 /* Avoid extra communication due to verbose screen output
9359 * when nstglobalcomm is set.
9360 */
9361 if (bDoDLB || bLogLoad || bCheckDLB ||
9362 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
9363 {
9364 get_load_distribution(dd, wcycle);
9365 if (DDMASTER(dd)((dd)->rank == (dd)->masterrank))
9366 {
9367 if (bLogLoad)
9368 {
9369 dd_print_load(fplog, dd, step-1);
9370 }
9371 if (bVerbose)
9372 {
9373 dd_print_load_verbose(dd);
9374 }
9375 }
9376 comm->n_load_collect++;
9377
9378 if (bCheckDLB)
9379 {
9380 /* Since the timings are node dependent, the master decides */
9381 if (DDMASTER(dd)((dd)->rank == (dd)->masterrank))
9382 {
9383 bTurnOnDLB =
9384 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS0.05);
9385 if (debug)
9386 {
9387 fprintf(debug, "step %s, imb loss %f\n",
9388 gmx_step_str(step, sbuf),
9389 dd_force_imb_perf_loss(dd));
9390 }
9391 }
9392 dd_bcast(dd, sizeof(bTurnOnDLB), &bTurnOnDLB);
9393 if (bTurnOnDLB)
9394 {
9395 turn_on_dlb(fplog, cr, step);
9396 bDoDLB = TRUE1;
9397 }
9398 }
9399 }
9400 comm->n_load_have++;
9401 }
9402
9403 cgs_gl = &comm->cgs_gl;
9404
9405 bRedist = FALSE0;
9406 if (bMasterState)
9407 {
9408 /* Clear the old state */
9409 clear_dd_indices(dd, 0, 0);
9410 ncgindex_set = 0;
9411
9412 set_ddbox(dd, bMasterState, cr, ir, state_global->box,
9413 TRUE1, cgs_gl, state_global->x, &ddbox);
9414
9415 get_cg_distribution(fplog, step, dd, cgs_gl,
9416 state_global->box, &ddbox, state_global->x);
9417
9418 dd_distribute_state(dd, cgs_gl,
9419 state_global, state_local, f);
9420
9421 dd_make_local_cgs(dd, &top_local->cgs);
9422
9423 /* Ensure that we have space for the new distribution */
9424 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
9425
9426 if (fr->cutoff_scheme == ecutsGROUP)
9427 {
9428 calc_cgcm(fplog, 0, dd->ncg_home,
9429 &top_local->cgs, state_local->x, fr->cg_cm);
9430 }
9431
9432 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home)(nrnb)->n[eNR_CGCM] += dd->nat_home;
9433
9434 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9435 }
9436 else if (state_local->ddp_count != dd->ddp_count)
9437 {
9438 if (state_local->ddp_count > dd->ddp_count)
9439 {
9440 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
9440
, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
9441 }
9442
9443 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
9444 {
9445 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c",
9445
, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
9446 }
9447
9448 /* Clear the old state */
9449 clear_dd_indices(dd, 0, 0);
9450
9451 /* Build the new indices */
9452 rebuild_cgindex(dd, cgs_gl->index, state_local);
9453 make_dd_indices(dd, cgs_gl->index, 0);
9454 ncgindex_set = dd->ncg_home;
9455
9456 if (fr->cutoff_scheme == ecutsGROUP)
9457 {
9458 /* Redetermine the cg COMs */
9459 calc_cgcm(fplog, 0, dd->ncg_home,
9460 &top_local->cgs, state_local->x, fr->cg_cm);
9461 }
9462
9463 inc_nrnb(nrnb, eNR_CGCM, dd->nat_home)(nrnb)->n[eNR_CGCM] += dd->nat_home;
9464
9465 dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
9466
9467 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9468 TRUE1, &top_local->cgs, state_local->x, &ddbox);
9469
9470 bRedist = comm->bDynLoadBal;
9471 }
9472 else
9473 {
9474 /* We have the full state, only redistribute the cgs */
9475
9476 /* Clear the non-home indices */
9477 clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
9478 ncgindex_set = 0;
9479
9480 /* Avoid global communication for dim's without pbc and -gcom */
9481 if (!bNStGlobalComm)
9482 {
9483 copy_rvec(comm->box0, ddbox.box0 );
9484 copy_rvec(comm->box_size, ddbox.box_size);
9485 }
9486 set_ddbox(dd, bMasterState, cr, ir, state_local->box,
9487 bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
9488
9489 bBoxChanged = TRUE1;
9490 bRedist = TRUE1;
9491 }
9492 /* For dim's without pbc and -gcom */
9493 copy_rvec(ddbox.box0, comm->box0 );
9494 copy_rvec(ddbox.box_size, comm->box_size);
9495
9496 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
9497 step, wcycle);
9498
9499 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
9500 {
9501 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
9502 }
9503
9504 /* Check if we should sort the charge groups */
9505 if (comm->nstSortCG > 0)
9506 {
9507 bSortCG = (bMasterState ||
9508 (bRedist && (step % comm->nstSortCG == 0)));
9509 }
9510 else
9511 {
9512 bSortCG = FALSE0;
9513 }
9514
9515 ncg_home_old = dd->ncg_home;
9516
9517 ncg_moved = 0;
9518 if (bRedist)
9519 {
9520 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
9521
9522 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
9523 state_local, f, fr,
9524 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
9525
9526 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
9527 }
9528
9529 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
9530 dd, &ddbox,
9531 &comm->cell_x0, &comm->cell_x1,
9532 dd->ncg_home, fr->cg_cm,
9533 cell_ns_x0, cell_ns_x1, &grid_density);
9534
9535 if (bBoxChanged)
9536 {
9537 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
9538 }
9539
9540 switch (fr->cutoff_scheme)
9541 {
9542 case ecutsGROUP:
9543 copy_ivec(fr->ns.grid->n, ncells_old);
9544 grid_first(fplog, fr->ns.grid, dd, &ddbox,
9545 state_local->box, cell_ns_x0, cell_ns_x1,
9546 fr->rlistlong, grid_density);
9547 break;
9548 case ecutsVERLET:
9549 nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX0], &ncells_old[YY1]);
9550 break;
9551 default:
9552 gmx_incons("unimplemented")_gmx_error("incons", "unimplemented", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 9552)
;
9553 }
9554 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9555 copy_ivec(ddbox.tric_dir, comm->tric_dir);
9556
9557 if (bSortCG)
9558 {
9559 wallcycle_sub_start(wcycle, ewcsDD_GRID);
9560
9561 /* Sort the state on charge group position.
9562 * This enables exact restarts from this step.
9563 * It also improves performance by about 15% with larger numbers
9564 * of atoms per node.
9565 */
9566
9567 /* Fill the ns grid with the home cell,
9568 * so we can sort with the indices.
9569 */
9570 set_zones_ncg_home(dd);
9571
9572 switch (fr->cutoff_scheme)
9573 {
9574 case ecutsVERLET:
9575 set_zones_size(dd, state_local->box, &ddbox, 0, 1);
9576
9577 nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
9578 0,
9579 comm->zones.size[0].bb_x0,
9580 comm->zones.size[0].bb_x1,
9581 0, dd->ncg_home,
9582 comm->zones.dens_zone0,
9583 fr->cginfo,
9584 state_local->x,
9585 ncg_moved, bRedist ? comm->moved : NULL((void*)0),
9586 fr->nbv->grp[eintLocal].kernel_type,
9587 fr->nbv->grp[eintLocal].nbat);
9588
9589 nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX0], &ncells_new[YY1]);
9590 break;
9591 case ecutsGROUP:
9592 fill_grid(&comm->zones, fr->ns.grid, dd->ncg_home,
9593 0, dd->ncg_home, fr->cg_cm);
9594
9595 copy_ivec(fr->ns.grid->n, ncells_new);
9596 break;
9597 default:
9598 gmx_incons("unimplemented")_gmx_error("incons", "unimplemented", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 9598)
;
9599 }
9600
9601 bResortAll = bMasterState;
9602
9603 /* Check if we can user the old order and ns grid cell indices
9604 * of the charge groups to sort the charge groups efficiently.
9605 */
9606 if (ncells_new[XX0] != ncells_old[XX0] ||
9607 ncells_new[YY1] != ncells_old[YY1] ||
9608 ncells_new[ZZ2] != ncells_old[ZZ2])
9609 {
9610 bResortAll = TRUE1;
9611 }
9612
9613 if (debug)
9614 {
9615 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
9616 gmx_step_str(step, sbuf), dd->ncg_home);
9617 }
9618 dd_sort_state(dd, fr->cg_cm, fr, state_local,
9619 bResortAll ? -1 : ncg_home_old);
9620 /* Rebuild all the indices */
9621 ga2la_clear(dd->ga2la);
9622 ncgindex_set = 0;
9623
9624 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
9625 }
9626
9627 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
9628
9629 /* Setup up the communication and communicate the coordinates */
9630 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
9631
9632 /* Set the indices */
9633 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
9634
9635 /* Set the charge group boundaries for neighbor searching */
9636 set_cg_boundaries(&comm->zones);
9637
9638 if (fr->cutoff_scheme == ecutsVERLET)
9639 {
9640 set_zones_size(dd, state_local->box, &ddbox,
9641 bSortCG ? 1 : 0, comm->zones.n);
9642 }
9643
9644 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
9645
9646 /*
9647 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9648 -1,state_local->x,state_local->box);
9649 */
9650
9651 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
9652
9653 /* Extract a local topology from the global topology */
9654 for (i = 0; i < dd->ndim; i++)
9655 {
9656 np[dd->dim[i]] = comm->cd[i].np;
9657 }
9658 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
9659 comm->cellsize_min, np,
9660 fr,
9661 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
9662 vsite, top_global, top_local);
9663
9664 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
9665
9666 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
9667
9668 /* Set up the special atom communication */
9669 n = comm->nat[ddnatZONE];
9670 for (i = ddnatZONE+1; i < ddnatNR; i++)
9671 {
9672 switch (i)
9673 {
9674 case ddnatVSITE:
9675 if (vsite && vsite->n_intercg_vsite)
9676 {
9677 n = dd_make_local_vsites(dd, n, top_local->idef.il);
9678 }
9679 break;
9680 case ddnatCON:
9681 if (dd->bInterCGcons || dd->bInterCGsettles)
9682 {
9683 /* Only for inter-cg constraints we need special code */
9684 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
9685 constr, ir->nProjOrder,
9686 top_local->idef.il);
9687 }
9688 break;
9689 default:
9690 gmx_incons("Unknown special atom type setup")_gmx_error("incons", "Unknown special atom type setup", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/domdec.c"
, 9690)
;
9691 }
9692 comm->nat[i] = n;
9693 }
9694
9695 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
9696
9697 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
9698
9699 /* Make space for the extra coordinates for virtual site
9700 * or constraint communication.
9701 */
9702 state_local->natoms = comm->nat[ddnatNR-1];
9703 if (state_local->natoms > state_local->nalloc)
9704 {
9705 dd_realloc_state(state_local, f, state_local->natoms);
9706 }
9707
9708 if (fr->bF_NoVirSum)
9709 {
9710 if (vsite && vsite->n_intercg_vsite)
9711 {
9712 nat_f_novirsum = comm->nat[ddnatVSITE];
9713 }
9714 else
9715 {
9716 if (EEL_FULL(ir->coulombtype)((((ir->coulombtype) == eelPME || (ir->coulombtype) == eelPMESWITCH
|| (ir->coulombtype) == eelPMEUSER || (ir->coulombtype
) == eelPMEUSERSWITCH || (ir->coulombtype) == eelP3M_AD) ||
(ir->coulombtype) == eelEWALD) || (ir->coulombtype) ==
eelPOISSON)
&& dd->n_intercg_excl > 0)
9717 {
9718 nat_f_novirsum = dd->nat_tot;
9719 }
9720 else
9721 {
9722 nat_f_novirsum = dd->nat_home;
9723 }
9724 }
9725 }
9726 else
9727 {
9728 nat_f_novirsum = 0;
9729 }
9730
9731 /* Set the number of atoms required for the force calculation.
9732 * Forces need to be constrained when using a twin-range setup
9733 * or with energy minimization. For simple simulations we could
9734 * avoid some allocation, zeroing and copying, but this is
9735 * probably not worth the complications ande checking.
9736 */
9737 forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
9738 dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
9739
9740 /* We make the all mdatoms up to nat_tot_con.
9741 * We could save some work by only setting invmass
9742 * between nat_tot and nat_tot_con.
9743 */
9744 /* This call also sets the new number of home particles to dd->nat_home */
9745 atoms2md(top_global, ir,
9746 comm->nat[ddnatCON], dd->gatindex, dd->nat_home, mdatoms);
9747
9748 /* Now we have the charges we can sort the FE interactions */
9749 dd_sort_local_top(dd, mdatoms, top_local);
9750
9751 if (vsite != NULL((void*)0))
9752 {
9753 /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
9754 split_vsites_over_threads(top_local->idef.il, mdatoms, FALSE0, vsite);
9755 }
9756
9757 if (shellfc)
9758 {
9759 /* Make the local shell stuff, currently no communication is done */
9760 make_local_shells(cr, mdatoms, shellfc);
9761 }
9762
9763 if (ir->implicit_solvent)
9764 {
9765 make_local_gb(cr, fr->born, ir->gb_algorithm);
9766 }
9767
9768 setup_bonded_threading(fr, &top_local->idef);
9769
9770 if (!(cr->duty & DUTY_PME(1<<1)))
9771 {
9772 /* Send the charges and/or c6/sigmas to our PME only node */
9773 gmx_pme_send_parameters(cr, mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
9774 mdatoms->chargeA, mdatoms->chargeB,
9775 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
9776 mdatoms->sigmaA, mdatoms->sigmaB,
9777 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
9778 }
9779
9780 if (constr)
9781 {
9782 set_constraints(constr, top_local, ir, mdatoms, cr);
9783 }
9784
9785 if (ir->ePull != epullNO)
9786 {
9787 /* Update the local pull groups */
9788 dd_make_local_pull_groups(dd, ir->pull, mdatoms);
9789 }
9790
9791 if (ir->bRot)
9792 {
9793 /* Update the local rotation groups */
9794 dd_make_local_rotation_groups(dd, ir->rot);
9795 }
9796
9797 if (ir->eSwapCoords != eswapNO)
9798 {
9799 /* Update the local groups needed for ion swapping */
9800 dd_make_local_swap_groups(dd, ir->swap);
9801 }
9802
9803 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
9804 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
9805
9806 add_dd_statistics(dd);
9807
9808 /* Make sure we only count the cycles for this DD partitioning */
9809 clear_dd_cycle_counts(dd);
9810
9811 /* Because the order of the atoms might have changed since
9812 * the last vsite construction, we need to communicate the constructing
9813 * atom coordinates again (for spreading the forces this MD step).
9814 */
9815 dd_move_x_vsites(dd, state_local->box, state_local->x);
9816
9817 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
9818
9819 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
9820 {
9821 dd_move_x(dd, state_local->box, state_local->x);
9822 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
9823 -1, state_local->x, state_local->box);
9824 }
9825
9826 /* Store the partitioning step */
9827 comm->partition_step = step;
9828
9829 /* Increase the DD partitioning counter */
9830 dd->ddp_count++;
9831 /* The state currently matches this DD partitioning count, store it */
9832 state_local->ddp_count = dd->ddp_count;
9833 if (bMasterState)
9834 {
9835 /* The DD master node knows the complete cg distribution,
9836 * store the count so we can possibly skip the cg info communication.
9837 */
9838 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
9839 }
9840
9841 if (comm->DD_debug > 0)
9842 {
9843 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9844 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
9845 "after partitioning");
9846 }
9847}