-/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
- *
+/*
+ * This file is part of the GROMACS molecular simulation package.
*
- * This file is part of Gromacs Copyright (c) 1991-2008
- * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
+ * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * And Hey:
- * Gnomes, ROck Monsters And Chili Sauce
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
-#include <stdio.h>
-#include <time.h>
+#include "gromacs/legacyheaders/domdec.h"
+
+#include "config.h"
+
+#include <assert.h>
#include <math.h>
-#include <string.h>
+#include <stdio.h>
#include <stdlib.h>
-#include "typedefs.h"
-#include "smalloc.h"
-#include "gmx_fatal.h"
-#include "gmx_fatal_collective.h"
-#include "vec.h"
-#include "domdec.h"
-#include "domdec_network.h"
-#include "nrnb.h"
-#include "pbc.h"
-#include "chargegroup.h"
-#include "constr.h"
-#include "mdatoms.h"
-#include "names.h"
-#include "force.h"
-#include "pme.h"
-#include "pull.h"
-#include "pull_rotation.h"
-#include "mdrun.h"
-#include "nsgrid.h"
-#include "shellfc.h"
-#include "mtop_util.h"
-#include "gmx_ga2la.h"
-#include "gmx_sort.h"
-#include "macros.h"
-#include "nbnxn_search.h"
-#include "bondf.h"
-#include "gmx_omp_nthreads.h"
-#include "gpu_utils.h"
-
-#include "gromacs/fileio/futil.h"
+#include <string.h>
+#include <time.h>
+
+#include "gromacs/bonded/bonded.h"
#include "gromacs/fileio/gmxfio.h"
#include "gromacs/fileio/pdbio.h"
+#include "gromacs/imd/imd.h"
+#include "gromacs/legacyheaders/bonded-threading.h"
+#include "gromacs/legacyheaders/chargegroup.h"
+#include "gromacs/legacyheaders/constr.h"
+#include "gromacs/legacyheaders/domdec_network.h"
+#include "gromacs/legacyheaders/force.h"
+#include "gromacs/legacyheaders/gmx_ga2la.h"
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/gpu_utils.h"
+#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/legacyheaders/mdatoms.h"
+#include "gromacs/legacyheaders/mdrun.h"
+#include "gromacs/legacyheaders/names.h"
+#include "gromacs/legacyheaders/network.h"
+#include "gromacs/legacyheaders/nrnb.h"
+#include "gromacs/legacyheaders/nsgrid.h"
+#include "gromacs/legacyheaders/pme.h"
+#include "gromacs/legacyheaders/shellfc.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/nb_verlet.h"
+#include "gromacs/mdlib/nbnxn_search.h"
+#include "gromacs/pbcutil/ishift.h"
+#include "gromacs/pbcutil/pbc.h"
+#include "gromacs/pulling/pull.h"
+#include "gromacs/pulling/pull_rotation.h"
+#include "gromacs/swap/swapcoords.h"
#include "gromacs/timing/wallcycle.h"
+#include "gromacs/topology/mtop_util.h"
+#include "gromacs/utility/basenetwork.h"
+#include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/futil.h"
#include "gromacs/utility/gmxmpi.h"
+#include "gromacs/utility/qsort_threadsafe.h"
+#include "gromacs/utility/smalloc.h"
#define DDRANK(dd, rank) (rank)
#define DDMASTERRANK(dd) (dd->masterrank)
double load_pme;
/* The last partition step */
- gmx_large_int_t partition_step;
+ gmx_int64_t partition_step;
/* Debugging */
int nstDDDump;
/* Factor to account for pressure scaling during nstlist steps */
#define DD_PRES_SCALE_MARGIN 1.02
-/* Allowed performance loss before we DLB or warn */
-#define DD_PERF_LOSS 0.05
+/* Turn on DLB when the load imbalance causes this amount of total loss.
+ * There is a bit of overhead with DLB and it's difficult to achieve
+ * a load imbalance of less than 2% with DLB.
+ */
+#define DD_PERF_LOSS_DLB_ON 0.02
+
+/* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
+#define DD_PERF_LOSS_WARN 0.05
#define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
{
gmx_domdec_master_t *ma = NULL;
int buf2[2], *ibuf, i, ncg_home = 0, *cg = NULL, nat_home = 0;
- t_block *cgs_gl;
if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
{
if (state_local->ddp_count == dd->ddp_count)
{
+ /* The local state and DD are in sync, use the DD indices */
ncg_home = dd->ncg_home;
cg = dd->index_gl;
nat_home = dd->nat_home;
}
else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
{
+ /* The DD is out of sync with the local state, but we have stored
+ * the cg indices with the local state, so we can use those.
+ */
+ t_block *cgs_gl;
+
cgs_gl = &dd->comm->cgs_gl;
ncg_home = state_local->ncg_gl;
gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
}
- buf2[0] = dd->ncg_home;
- buf2[1] = dd->nat_home;
+ buf2[0] = ncg_home;
+ buf2[1] = nat_home;
if (DDMASTER(dd))
{
ma = dd->ma;
/* Collect the charge group indices on the master */
dd_gatherv(dd,
- dd->ncg_home*sizeof(int), dd->index_gl,
+ ncg_home*sizeof(int), cg,
DDMASTER(dd) ? ma->ibuf : NULL,
DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
DDMASTER(dd) ? ma->cg : NULL);
case estCGP:
dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
break;
- case estLD_RNG:
- if (state->nrngi == 1)
- {
- if (DDMASTER(dd))
- {
- for (i = 0; i < state_local->nrng; i++)
- {
- state->ld_rng[i] = state_local->ld_rng[i];
- }
- }
- }
- else
- {
- dd_gather(dd, state_local->nrng*sizeof(state->ld_rng[0]),
- state_local->ld_rng, state->ld_rng);
- }
- break;
- case estLD_RNGI:
- if (state->nrngi == 1)
- {
- if (DDMASTER(dd))
- {
- state->ld_rngi[0] = state_local->ld_rngi[0];
- }
- }
- else
- {
- dd_gather(dd, sizeof(state->ld_rngi[0]),
- state_local->ld_rngi, state->ld_rngi);
- }
- break;
case estDISRE_INITF:
case estDISRE_RM3TAV:
case estORIRE_INITF:
case estCGP:
srenew(state->cg_p, state->nalloc);
break;
- case estLD_RNG:
- case estLD_RNGI:
case estDISRE_INITF:
case estDISRE_RM3TAV:
case estORIRE_INITF:
case estCGP:
dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
break;
- case estLD_RNG:
- if (state->nrngi == 1)
- {
- dd_bcastc(dd,
- state_local->nrng*sizeof(state_local->ld_rng[0]),
- state->ld_rng, state_local->ld_rng);
- }
- else
- {
- dd_scatter(dd,
- state_local->nrng*sizeof(state_local->ld_rng[0]),
- state->ld_rng, state_local->ld_rng);
- }
- break;
- case estLD_RNGI:
- if (state->nrngi == 1)
- {
- dd_bcastc(dd, sizeof(state_local->ld_rngi[0]),
- state->ld_rngi, state_local->ld_rngi);
- }
- else
- {
- dd_scatter(dd, sizeof(state_local->ld_rngi[0]),
- state->ld_rngi, state_local->ld_rngi);
- }
- break;
case estDISRE_INITF:
case estDISRE_RM3TAV:
case estORIRE_INITF:
return c;
}
-static void write_dd_grid_pdb(const char *fn, gmx_large_int_t step,
+static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
{
rvec grid_s[2], *grid_r = NULL, cx, r;
- char fname[STRLEN], format[STRLEN], buf[22];
+ char fname[STRLEN], buf[22];
FILE *out;
int a, i, d, z, y, x;
matrix tric;
snew(grid_r, 2*dd->nnodes);
}
- dd_gather(dd, 2*sizeof(rvec), grid_s[0], DDMASTER(dd) ? grid_r[0] : NULL);
+ dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : NULL);
if (DDMASTER(dd))
{
}
}
sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
- sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
out = gmx_fio_fopen(fname, "w");
gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
a = 1;
cx[YY] = grid_r[i*2+y][YY];
cx[ZZ] = grid_r[i*2+z][ZZ];
mvmul(tric, cx, r);
- fprintf(out, format, "ATOM", a++, "CA", "GLY", ' ', 1+i,
- ' ', 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol);
+ gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
+ 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
}
}
}
}
}
-void write_dd_pdb(const char *fn, gmx_large_int_t step, const char *title,
+void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
gmx_mtop_t *mtop, t_commrec *cr,
int natoms, rvec x[], matrix box)
{
- char fname[STRLEN], format[STRLEN], format4[STRLEN], buf[22];
+ char fname[STRLEN], buf[22];
FILE *out;
int i, ii, resnr, c;
char *atomname, *resname;
sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
- sprintf(format, "%s%s\n", get_pdbformat(), "%6.2f%6.2f");
- sprintf(format4, "%s%s\n", get_pdbformat4(), "%6.2f%6.2f");
-
out = gmx_fio_fopen(fname, "w");
fprintf(out, "TITLE %s\n", title);
{
b = dd->comm->zones.n + 1;
}
- fprintf(out, strlen(atomname) < 4 ? format : format4,
- "ATOM", (ii+1)%100000,
- atomname, resname, ' ', resnr%10000, ' ',
- 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b);
+ gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
+ 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
}
fprintf(out, "TER\n");
if (debug)
{
- fprintf(debug, "Receive coordinates from PP nodes:");
+ fprintf(debug, "Receive coordinates from PP ranks:");
for (x = 0; x < *nmy_ddnodes; x++)
{
fprintf(debug, " %d", (*my_ddnodes)[x]);
if (!bLocalCG[dd->index_gl[i]])
{
fprintf(stderr,
- "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
+ "DD rank %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
nerr++;
}
}
}
if (ngl != dd->ncg_tot)
{
- fprintf(stderr, "DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
+ fprintf(stderr, "DD rank %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
nerr++;
}
{
if (have[dd->gatindex[a]] > 0)
{
- fprintf(stderr, "DD node %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
+ fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
}
else
{
{
if (a >= dd->nat_tot)
{
- fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
+ fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
nerr++;
}
else
have[a] = 1;
if (dd->gatindex[a] != i)
{
- fprintf(stderr, "DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
+ fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
nerr++;
}
}
if (ngl != dd->nat_tot)
{
fprintf(stderr,
- "DD node %d, %s: %d global atom indices, %d local atoms\n",
+ "DD rank %d, %s: %d global atom indices, %d local atoms\n",
dd->rank, where, ngl, dd->nat_tot);
}
for (a = 0; a < dd->nat_tot; a++)
if (have[a] == 0)
{
fprintf(stderr,
- "DD node %d, %s: local atom %d, global %d has no global index\n",
+ "DD rank %d, %s: local atom %d, global %d has no global index\n",
dd->rank, where, a+1, dd->gatindex[a]+1);
}
}
if (nerr > 0)
{
- gmx_fatal(FARGS, "DD node %d, %s: %d atom/cg index inconsistencies",
+ gmx_fatal(FARGS, "DD rank %d, %s: %d atom/cg index inconsistencies",
dd->rank, where, nerr);
}
}
return grid_jump_limit;
}
-static gmx_bool check_grid_jump(gmx_large_int_t step,
+static gmx_bool check_grid_jump(gmx_int64_t step,
gmx_domdec_t *dd,
real cutoff,
gmx_ddbox_t *ddbox,
/* This error should never be triggered under normal
* circumstances, but you never know ...
*/
- gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with less nodes might avoid this issue.",
+ gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
gmx_step_str(step, buf),
dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
}
}
}
+enum {
+ setcellsizeslbLOCAL, setcellsizeslbMASTER, setcellsizeslbPULSE_ONLY
+};
+
+/* Set the domain boundaries. Use for static (or no) load balancing,
+ * and also for the starting state for dynamic load balancing.
+ * setmode determine if and where the boundaries are stored, use enum above.
+ * Returns the number communication pulses in npulse.
+ */
static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
- gmx_bool bMaster, ivec npulse)
+ int setmode, ivec npulse)
{
gmx_domdec_comm_t *comm;
int d, j;
{
/* Uniform grid */
cell_dx = ddbox->box_size[d]/dd->nc[d];
- if (bMaster)
+ switch (setmode)
{
- for (j = 0; j < dd->nc[d]+1; j++)
- {
- dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
- }
- }
- else
- {
- comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
- comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
+ case setcellsizeslbMASTER:
+ for (j = 0; j < dd->nc[d]+1; j++)
+ {
+ dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
+ }
+ break;
+ case setcellsizeslbLOCAL:
+ comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
+ comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
+ break;
+ default:
+ break;
}
cellsize = cell_dx*ddbox->skew_fac[d];
- while (cellsize*npulse[d] < comm->cutoff && npulse[d] < dd->nc[d]-1)
+ while (cellsize*npulse[d] < comm->cutoff)
{
npulse[d]++;
}
* all cell borders in a loop to obtain identical values
* to the master distribution case and to determine npulse.
*/
- if (bMaster)
+ if (setmode == setcellsizeslbMASTER)
{
cell_x = dd->ma->cell_x[d];
}
}
cellsize_min[d] = min(cellsize_min[d], cellsize);
}
- if (!bMaster)
+ if (setmode == setcellsizeslbLOCAL)
{
comm->cell_x0[d] = cell_x[dd->ci[d]];
comm->cell_x1[d] = cell_x[dd->ci[d]+1];
+ }
+ if (setmode != setcellsizeslbMASTER)
+ {
sfree(cell_x);
}
}
if (d < ddbox->npbcdim &&
dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
{
- gmx_fatal_collective(FARGS, NULL, dd,
- "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
- dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
- comm->cutoff,
- dd->nc[d], dd->nc[d],
- dd->nnodes > dd->nc[d] ? "cells" : "processors");
+ char error_string[STRLEN];
+
+ sprintf(error_string,
+ "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
+ dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
+ comm->cutoff,
+ dd->nc[d], dd->nc[d],
+ dd->nnodes > dd->nc[d] ? "cells" : "ranks");
+
+ if (setmode == setcellsizeslbLOCAL)
+ {
+ gmx_fatal_collective(FARGS, NULL, dd, error_string);
+ }
+ else
+ {
+ gmx_fatal(FARGS, error_string);
+ }
}
}
static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
int d, int dim, gmx_domdec_root_t *root,
gmx_ddbox_t *ddbox,
- gmx_bool bUniform, gmx_large_int_t step, real cellsize_limit_f, int range[])
+ gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
{
gmx_domdec_comm_t *comm;
int ncd, i, j, nmin, nmin_old;
static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
int d, int dim, gmx_domdec_root_t *root,
gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
- gmx_bool bUniform, gmx_large_int_t step)
+ gmx_bool bUniform, gmx_int64_t step)
{
gmx_domdec_comm_t *comm;
int ncd, d1, i, j, pos;
static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
- gmx_bool bUniform, gmx_large_int_t step)
+ gmx_bool bUniform, gmx_int64_t step)
{
gmx_domdec_comm_t *comm;
int d, dim, d1;
static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
- gmx_bool bUniform, gmx_bool bDoDLB, gmx_large_int_t step,
+ gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
gmx_wallcycle_t wcycle)
{
gmx_domdec_comm_t *comm;
static void set_dd_cell_sizes(gmx_domdec_t *dd,
gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
- gmx_bool bUniform, gmx_bool bDoDLB, gmx_large_int_t step,
+ gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
gmx_wallcycle_t wcycle)
{
gmx_domdec_comm_t *comm;
}
else
{
- set_dd_cell_sizes_slb(dd, ddbox, FALSE, npulse);
+ set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbLOCAL, npulse);
realloc_comm_ind(dd, npulse);
}
static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
gmx_ddbox_t *ddbox,
rvec cell_ns_x0, rvec cell_ns_x1,
- gmx_large_int_t step)
+ gmx_int64_t step)
{
gmx_domdec_comm_t *comm;
int dim_ind, dim;
}
}
-static void distribute_cg(FILE *fplog, gmx_large_int_t step,
+static void distribute_cg(FILE *fplog, gmx_int64_t step,
matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
gmx_domdec_t *dd)
{
}
}
-static void get_cg_distribution(FILE *fplog, gmx_large_int_t step, gmx_domdec_t *dd,
+static void get_cg_distribution(FILE *fplog, gmx_int64_t step, gmx_domdec_t *dd,
t_block *cgs, matrix box, gmx_ddbox_t *ddbox,
rvec pos[])
{
int i, cg_gl;
int *ibuf, buf2[2] = { 0, 0 };
gmx_bool bMaster = DDMASTER(dd);
+
if (bMaster)
{
ma = dd->ma;
check_screw_box(box);
}
- set_dd_cell_sizes_slb(dd, ddbox, TRUE, npulse);
+ set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbMASTER, npulse);
distribute_cg(fplog, step, box, ddbox->tric_dir, cgs, pos, dd);
for (i = 0; i < dd->nnodes; i++)
static void print_cg_move(FILE *fplog,
gmx_domdec_t *dd,
- gmx_large_int_t step, int cg, int dim, int dir,
+ gmx_int64_t step, int cg, int dim, int dir,
gmx_bool bHaveLimitdAndCMOld, real limitd,
rvec cm_old, rvec cm_new, real pos_d)
{
static void cg_move_error(FILE *fplog,
gmx_domdec_t *dd,
- gmx_large_int_t step, int cg, int dim, int dir,
+ gmx_int64_t step, int cg, int dim, int dir,
gmx_bool bHaveLimitdAndCMOld, real limitd,
rvec cm_old, rvec cm_new, real pos_d)
{
return comm->moved;
}
-static void calc_cg_move(FILE *fplog, gmx_large_int_t step,
+static void calc_cg_move(FILE *fplog, gmx_int64_t step,
gmx_domdec_t *dd,
t_state *state,
ivec tric_dir, matrix tcm,
}
}
-static void dd_redistribute_cg(FILE *fplog, gmx_large_int_t step,
+static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
gmx_domdec_t *dd, ivec tric_dir,
t_state *state, rvec **f,
t_forcerec *fr,
fprintf(fplog, "\n");
fprintf(stderr, "\n");
- if (lossf >= DD_PERF_LOSS)
+ if (lossf >= DD_PERF_LOSS_WARN)
{
sprintf(buf,
"NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
fprintf(fplog, "%s\n", buf);
fprintf(stderr, "%s\n", buf);
}
- if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS)
+ if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS_WARN)
{
sprintf(buf,
- "NOTE: %.1f %% performance was lost because the PME nodes\n"
- " had %s work to do than the PP nodes.\n"
- " You might want to %s the number of PME nodes\n"
+ "NOTE: %.1f %% performance was lost because the PME ranks\n"
+ " had %s work to do than the PP ranks.\n"
+ " You might want to %s the number of PME ranks\n"
" or %s the cut-off and the grid spacing.\n",
fabs(lossp*100),
(lossp < 0) ? "less" : "more",
}
}
-static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_large_int_t step)
+static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
{
int flags, d;
char buf[22];
physicalnode_id_hash = gmx_physicalnode_id_hash();
- gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->nodeid);
+ gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
dd = cr->dd;
if (fplog)
{
fprintf(fplog,
- "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
+ "Domain decomposition rank %d, coordinates %d %d %d\n\n",
dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
}
if (debug)
{
fprintf(debug,
- "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
+ "Domain decomposition rank %d, coordinates %d %d %d\n\n",
dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
}
}
}
else if (fplog)
{
- fprintf(fplog, "#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
+ fprintf(fplog, "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
fprintf(fplog,
"Will not use a Cartesian communicator for PP <-> PME\n\n");
}
if (fplog)
{
- fprintf(fplog, "Cartesian nodeid %d, coordinates %d %d %d\n\n",
+ fprintf(fplog, "Cartesian rank %d, coordinates %d %d %d\n\n",
cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
}
case ddnoPP_PME:
if (fplog)
{
- fprintf(fplog, "Order of the nodes: PP first, PME last\n");
+ fprintf(fplog, "Order of the ranks: PP first, PME last\n");
}
break;
case ddnoINTERLEAVE:
*/
if (fplog)
{
- fprintf(fplog, "Interleaving PP and PME nodes\n");
+ fprintf(fplog, "Interleaving PP and PME ranks\n");
}
comm->pmenodes = dd_pmenodes(cr);
break;
if (fplog)
{
- fprintf(fplog, "This is a %s only node\n\n",
+ fprintf(fplog, "This rank does only %s work.\n\n",
(cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
}
}
return n;
}
-static int dd_nst_env(FILE *fplog, const char *env_var, int def)
+static int dd_getenv(FILE *fplog, const char *env_var, int def)
{
char *val;
int nst;
if (ir->ns_type == ensSIMPLE)
{
- gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
+ gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
}
if (ir->nstlist == 0)
if (fplog)
{
fprintf(fplog,
- "\nInitializing Domain Decomposition on %d nodes\n", cr->nnodes);
+ "\nInitializing Domain Decomposition on %d ranks\n", cr->nnodes);
}
snew(dd, 1);
dd->npbcdim = ePBC2npbcdim(ir->ePBC);
dd->bScrewPBC = (ir->ePBC == epbcSCREW);
- dd->bSendRecv2 = dd_nst_env(fplog, "GMX_DD_SENDRECV2", 0);
- comm->dlb_scale_lim = dd_nst_env(fplog, "GMX_DLB_MAX", 10);
- comm->eFlop = dd_nst_env(fplog, "GMX_DLB_FLOP", 0);
- recload = dd_nst_env(fplog, "GMX_DD_LOAD", 1);
- comm->nstSortCG = dd_nst_env(fplog, "GMX_DD_SORT", 1);
- comm->nstDDDump = dd_nst_env(fplog, "GMX_DD_DUMP", 0);
- comm->nstDDDumpGrid = dd_nst_env(fplog, "GMX_DD_DUMP_GRID", 0);
- comm->DD_debug = dd_nst_env(fplog, "GMX_DD_DEBUG", 0);
+ dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
+ comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
+ comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
+ recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
+ comm->nstSortCG = dd_getenv(fplog, "GMX_DD_NST_SORT_CHARGE_GROUPS", 1);
+ comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
+ comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
+ comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
dd->pme_recv_f_alloc = 0;
dd->pme_recv_f_buf = NULL;
if (dd->nc[XX] == 0)
{
bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
- sprintf(buf, "Change the number of nodes or mdrun option %s%s%s",
+ sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
!bC ? "-rdd" : "-rcon",
comm->eDLB != edlbNO ? " or -dds" : "",
bC ? " or your LINCS settings" : "");
gmx_fatal_collective(FARGS, cr, NULL,
- "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
+ "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
"%s\n"
"Look in the log file for details on the domain decomposition",
cr->nnodes-cr->npmenodes, limit, buf);
if (fplog)
{
fprintf(fplog,
- "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
+ "Domain decomposition grid %d x %d x %d, separate PME ranks %d\n",
dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
}
if (cr->nnodes - dd->nnodes != cr->npmenodes)
{
gmx_fatal_collective(FARGS, cr, NULL,
- "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
+ "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
}
if (cr->npmenodes > dd->nnodes)
{
gmx_fatal_collective(FARGS, cr, NULL,
- "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.", cr->npmenodes, dd->nnodes);
+ "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
}
if (cr->npmenodes > 0)
{
comm->npmenodes = dd->nnodes;
}
- if (EEL_PME(ir->coulombtype))
+ if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
{
/* The following choices should match those
* in comm_cost_est in domdec_setup.c.
}
-static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_large_int_t step)
+static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
{
gmx_domdec_t *dd;
gmx_domdec_comm_t *comm;
}
else
{
- set_dd_cell_sizes_slb(dd, ddbox, FALSE, np);
+ set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
fprintf(fplog, "The initial number of communication pulses is:");
for (d = 0; d < dd->ndim; d++)
{
}
/* This env var can override npulse */
- d = dd_nst_env(debug, "GMX_DD_NPULSE", 0);
+ d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
if (d > 0)
{
npulse = d;
snew(comm->dth, comm->nth);
}
- if (EEL_PME(ir->coulombtype))
+ if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
{
init_ddpme(dd, &comm->ddpme[0], 0);
if (comm->npmedecompdim >= 2)
if (dd->pme_nodeid >= 0)
{
gmx_fatal_collective(FARGS, NULL, dd,
- "Can not have separate PME nodes without PME electrostatics");
+ "Can not have separate PME ranks without PME electrostatics");
}
}
corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
}
/* Apply the triclinic couplings */
+ assert(ddbox->npbcdim <= DIM);
for (i = YY; i < ddbox->npbcdim; i++)
{
for (j = XX; j < i; j++)
int i1, i2, i_new;
/* The new indices are not very ordered, so we qsort them */
- qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
+ gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
/* sort2 is already ordered, so now we can merge the two arrays */
i1 = 0;
fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
}
/* Determine the order of the charge groups using qsort */
- qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
+ gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
}
return ncg_new;
}
void dd_partition_system(FILE *fplog,
- gmx_large_int_t step,
+ gmx_int64_t step,
t_commrec *cr,
gmx_bool bMasterState,
int nstglobalcomm,
gmx_domdec_comm_t *comm;
gmx_ddbox_t ddbox = {0};
t_block *cgs_gl;
- gmx_large_int_t step_pcoupl;
+ gmx_int64_t step_pcoupl;
rvec cell_ns_x0, cell_ns_x1;
int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
if (DDMASTER(dd))
{
bTurnOnDLB =
- (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS);
+ (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
if (debug)
{
fprintf(debug, "step %s, imb loss %f\n",
*/
/* This call also sets the new number of home particles to dd->nat_home */
atoms2md(top_global, ir,
- comm->nat[ddnatCON], dd->gatindex, 0, dd->nat_home, mdatoms);
+ comm->nat[ddnatCON], dd->gatindex, dd->nat_home, mdatoms);
/* Now we have the charges we can sort the FE interactions */
dd_sort_local_top(dd, mdatoms, top_local);
if (vsite != NULL)
{
/* Now we have updated mdatoms, we can do the last vsite bookkeeping */
- split_vsites_over_threads(top_local->idef.il, mdatoms, FALSE, vsite);
+ split_vsites_over_threads(top_local->idef.il, top_local->idef.iparams,
+ mdatoms, FALSE, vsite);
}
if (shellfc)
if (!(cr->duty & DUTY_PME))
{
- /* Send the charges to our PME only node */
- gmx_pme_send_q(cr, mdatoms->nChargePerturbed,
- mdatoms->chargeA, mdatoms->chargeB,
- dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
+ /* Send the charges and/or c6/sigmas to our PME only node */
+ gmx_pme_send_parameters(cr,
+ fr->ic,
+ mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
+ mdatoms->chargeA, mdatoms->chargeB,
+ mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
+ mdatoms->sigmaA, mdatoms->sigmaB,
+ dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
}
if (constr)
dd_make_local_rotation_groups(dd, ir->rot);
}
+ if (ir->eSwapCoords != eswapNO)
+ {
+ /* Update the local groups needed for ion swapping */
+ dd_make_local_swap_groups(dd, ir->swap);
+ }
+
+ /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
+ dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
add_dd_statistics(dd);