/* Abstract type for PME that is defined only in the routine that use them. */
struct gmx_pme_t;
struct nonbonded_verlet_t;
+struct bonded_threading_t;
/* Structure describing the data in a single table */
typedef struct
/* Forward declaration of type for managing Ewald tables */
struct gmx_ewald_tab_t;
-typedef struct f_thread_t f_thread_t;
+typedef struct ewald_corr_thread_t ewald_corr_thread_t;
typedef struct {
interaction_const_t *ic;
real userreal3;
real userreal4;
- /* Thread local force and energy data */
- /* FIXME move to bonded_thread_data_t */
- int nthreads;
- int red_ashift;
- int red_nblock;
- f_thread_t *f_t;
+ /* Pointer to struct for managing threading of bonded force calculation */
+ struct bonded_threading_t *bonded_threading;
- /* Maximum thread count for uniform distribution of bondeds over threads */
- int bonded_max_nthread_uniform;
-
- /* Exclusion load distribution over the threads */
- int *excl_load;
+ /* Ewald correction thread local virial and energy data */
+ int nthread_ewc;
+ ewald_corr_thread_t *ewc_t;
+ /* Ewald charge correction load distribution over the threads */
+ int *excl_load;
} t_forcerec;
/* Important: Starting with Gromacs-4.6, the values of c6 and c12 in the nbfp array have
#include "gromacs/listed-forces/bonded.h"
#include "gromacs/listed-forces/position-restraints.h"
#include "gromacs/math/vec.h"
-#include "gromacs/mdlib/forcerec-threading.h"
#include "gromacs/pbcutil/ishift.h"
#include "gromacs/pbcutil/pbc.h"
#include "gromacs/simd/simd.h"
#include "gromacs/timing/wallcycle.h"
#include "gromacs/utility/smalloc.h"
+#include "listed-internal.h"
#include "pairs.h"
namespace
t_fcdata *fcd, int *global_atom_index,
int force_flags)
{
- gmx_bool bCalcEnerVir;
- int i;
- real dvdl[efptNR]; /* The dummy array is to have a place to store the dhdl at other values
- of lambda, which will be thrown away in the end*/
- const t_pbc *pbc_null;
- int thread;
+ struct bonded_threading_t *bt;
+ gmx_bool bCalcEnerVir;
+ int i;
+ /* The dummy array is to have a place to store the dhdl at other values
+ of lambda, which will be thrown away in the end */
+ real dvdl[efptNR];
+ const t_pbc *pbc_null;
+ int thread;
+
+ bt = fr->bonded_threading;
- assert(fr->nthreads == idef->nthreads);
+ assert(bt->nthreads == idef->nthreads);
bCalcEnerVir = (force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY));
}
wallcycle_sub_start(wcycle, ewcsLISTED);
-#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
- for (thread = 0; thread < fr->nthreads; thread++)
+#pragma omp parallel for num_threads(bt->nthreads) schedule(static)
+ for (thread = 0; thread < bt->nthreads; thread++)
{
int ftype;
real *epot, v;
}
else
{
- zero_thread_forces(&fr->f_t[thread], fr->natoms_force,
- fr->red_nblock, 1<<fr->red_ashift);
-
- ft = fr->f_t[thread].f;
- fshift = fr->f_t[thread].fshift;
- epot = fr->f_t[thread].ener;
- grpp = &fr->f_t[thread].grpp;
- dvdlt = fr->f_t[thread].dvdl;
+ zero_thread_forces(&bt->f_t[thread], fr->natoms_force,
+ bt->red_nblock, 1<<bt->red_ashift);
+
+ ft = bt->f_t[thread].f;
+ fshift = bt->f_t[thread].fshift;
+ epot = bt->f_t[thread].ener;
+ grpp = &bt->f_t[thread].grpp;
+ dvdlt = bt->f_t[thread].dvdl;
}
/* Loop over all bonded force types to calculate the bonded forces */
for (ftype = 0; (ftype < F_NRE); ftype++)
}
wallcycle_sub_stop(wcycle, ewcsLISTED);
- if (fr->nthreads > 1)
+ if (bt->nthreads > 1)
{
wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
reduce_thread_forces(fr->natoms_force, f, fr->fshift,
enerd->term, &enerd->grpp, dvdl,
- fr->nthreads, fr->f_t,
- fr->red_nblock, 1<<fr->red_ashift,
+ bt->nthreads, bt->f_t,
+ bt->red_nblock, 1<<bt->red_ashift,
bCalcEnerVir,
force_flags & GMX_FORCE_DHDL);
wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS);
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef GMX_LISTED_FORCES_LISTED_INTERNAL_H
#define GMX_LISTED_FORCES_LISTED_INTERNAL_H
+#include "gromacs/legacyheaders/types/forcerec.h"
+#include "gromacs/math/vectypes.h"
+#include "gromacs/topology/idef.h"
+#include "gromacs/utility/bitmask.h"
+
+/*! \internal \brief struct with output for bonded forces, used per thread */
+typedef struct
+{
+ rvec *f; /**< Force array */
+ int f_nalloc; /**< Allocation size of f */
+ gmx_bitmask_t red_mask; /**< Mask for marking which parts of f are filled */
+ rvec *fshift; /**< Shift force array, size SHIFTS */
+ real ener[F_NRE]; /**< Energy array */
+ gmx_grppairener_t grpp; /**< Group pair energy data for pairs */
+ real dvdl[efptNR]; /**< Free-energy dV/dl output */
+}
+f_thread_t;
+
+/*! \internal \brief struct contain all data for bonded force threading */
+struct bonded_threading_t
+{
+ /* Thread local force and energy data */
+ int nthreads; /**< Number of threads to be used for bondeds */
+ int red_ashift; /**< Size of force reduction blocks in bits */
+ int red_nblock; /**< The number of force blocks to reduce */
+ f_thread_t *f_t; /**< Force/enegry data per thread, size nthreads */
+
+ /* There are two different ways to distribute the bonded force calculation
+ * over the threads. We dedice which to use based on the number of threads.
+ */
+ int bonded_max_nthread_uniform; /**< Maximum thread count for uniform distribution of bondeds over threads */
+};
+
+
/*! \brief Returns the global topology atom number belonging to local
* atom index i.
*
#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
#include "gromacs/listed-forces/listed-forces.h"
-#include "gromacs/mdlib/forcerec-threading.h"
#include "gromacs/pbcutil/ishift.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/smalloc.h"
#include "gromacs/utility/stringutil.h"
+#include "listed-internal.h"
+
/*! \brief struct for passing all data required for a function type */
typedef struct {
int ftype; /**< the function type index */
void setup_bonded_threading(t_forcerec *fr, t_idef *idef)
{
- int t;
- int ctot, c, b;
+ bonded_threading_t *bt;
+ int t;
+ int ctot, c, b;
+
+ bt = fr->bonded_threading;
- assert(fr->nthreads >= 1);
+ assert(bt->nthreads >= 1);
/* Divide the bonded interaction over the threads */
divide_bondeds_over_threads(idef,
- fr->nthreads,
- fr->bonded_max_nthread_uniform);
+ bt->nthreads,
+ bt->bonded_max_nthread_uniform);
- if (fr->nthreads == 1)
+ if (bt->nthreads == 1)
{
- fr->red_nblock = 0;
+ bt->red_nblock = 0;
return;
}
- fr->red_ashift = 6;
- while (fr->natoms_force > (int)(maxBlockBits*(1U<<fr->red_ashift)))
+ bt->red_ashift = 6;
+ while (fr->natoms_force > (int)(maxBlockBits*(1U<<bt->red_ashift)))
{
- fr->red_ashift++;
+ bt->red_ashift++;
}
if (debug)
{
fprintf(debug, "bonded force buffer block atom shift %d bits\n",
- fr->red_ashift);
+ bt->red_ashift);
}
/* Determine to which blocks each thread's bonded force calculation
* contributes. Store this is a mask for each thread.
*/
-#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
- for (t = 1; t < fr->nthreads; t++)
+#pragma omp parallel for num_threads(bt->nthreads) schedule(static)
+ for (t = 1; t < bt->nthreads; t++)
{
- calc_bonded_reduction_mask(&fr->f_t[t].red_mask,
- idef, fr->red_ashift, t, fr->nthreads);
+ calc_bonded_reduction_mask(&bt->f_t[t].red_mask,
+ idef, bt->red_ashift, t, bt->nthreads);
}
/* Determine the maximum number of blocks we need to reduce over */
- fr->red_nblock = 0;
+ bt->red_nblock = 0;
ctot = 0;
- for (t = 0; t < fr->nthreads; t++)
+ for (t = 0; t < bt->nthreads; t++)
{
c = 0;
for (b = 0; b < maxBlockBits; b++)
{
- if (bitmask_is_set(fr->f_t[t].red_mask, b))
+ if (bitmask_is_set(bt->f_t[t].red_mask, b))
{
- fr->red_nblock = std::max(fr->red_nblock, b+1);
+ bt->red_nblock = std::max(bt->red_nblock, b+1);
c++;
}
}
if (debug)
{
#if BITMASK_SIZE <= 64 //move into bitmask when it is C++
- std::string flags = gmx::formatString("%x", fr->f_t[t].red_mask);
+ std::string flags = gmx::formatString("%x", bt->f_t[t].red_mask);
#else
- std::string flags = gmx::formatAndJoin(fr->f_t[t].red_mask,
- fr->f_t[t].red_mask+BITMASK_ALEN,
+ std::string flags = gmx::formatAndJoin(bt->f_t[t].red_mask,
+ bt->f_t[t].red_mask+BITMASK_ALEN,
"", gmx::StringFormatter("%x"));
#endif
fprintf(debug, "thread %d flags %s count %d\n",
if (debug)
{
fprintf(debug, "Number of blocks to reduce: %d of size %d\n",
- fr->red_nblock, 1<<fr->red_ashift);
+ bt->red_nblock, 1<<bt->red_ashift);
fprintf(debug, "Reduction density %.2f density/#thread %.2f\n",
- ctot*(1<<fr->red_ashift)/(double)fr->natoms_force,
- ctot*(1<<fr->red_ashift)/(double)(fr->natoms_force*fr->nthreads));
+ ctot*(1<<bt->red_ashift)/(double)fr->natoms_force,
+ ctot*(1<<bt->red_ashift)/(double)(fr->natoms_force*bt->nthreads));
}
}
-void init_bonded_threading(FILE *fplog, t_forcerec *fr, int nenergrp)
+void init_bonded_threading(FILE *fplog, int nenergrp,
+ struct bonded_threading_t **bt_ptr)
{
+ bonded_threading_t *bt;
+
+ snew(bt, 1);
+
/* These thread local data structures are used for bondeds only */
- fr->nthreads = gmx_omp_nthreads_get(emntBonded);
+ bt->nthreads = gmx_omp_nthreads_get(emntBonded);
- if (fr->nthreads > 1)
+ if (bt->nthreads > 1)
{
int t;
- snew(fr->f_t, fr->nthreads);
-#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
- for (t = 0; t < fr->nthreads; t++)
+ snew(bt->f_t, bt->nthreads);
+#pragma omp parallel for num_threads(bt->nthreads) schedule(static)
+ for (t = 0; t < bt->nthreads; t++)
{
/* Thread 0 uses the global force and energy arrays */
if (t > 0)
{
int i;
- fr->f_t[t].f = NULL;
- fr->f_t[t].f_nalloc = 0;
- snew(fr->f_t[t].fshift, SHIFTS);
- fr->f_t[t].grpp.nener = nenergrp*nenergrp;
+ bt->f_t[t].f = NULL;
+ bt->f_t[t].f_nalloc = 0;
+ snew(bt->f_t[t].fshift, SHIFTS);
+ bt->f_t[t].grpp.nener = nenergrp*nenergrp;
for (i = 0; i < egNR; i++)
{
- snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
+ snew(bt->f_t[t].grpp.ener[i], bt->f_t[t].grpp.nener);
}
}
}
if ((ptr = getenv("GMX_BONDED_NTHREAD_UNIFORM")) != NULL)
{
- sscanf(ptr, "%d", &fr->bonded_max_nthread_uniform);
+ sscanf(ptr, "%d", &bt->bonded_max_nthread_uniform);
if (fplog != NULL)
{
fprintf(fplog, "\nMax threads for uniform bonded distribution set to %d by env.var.\n",
- fr->bonded_max_nthread_uniform);
+ bt->bonded_max_nthread_uniform);
}
}
else
{
- fr->bonded_max_nthread_uniform = max_nthread_uniform;
+ bt->bonded_max_nthread_uniform = max_nthread_uniform;
}
}
+
+ *bt_ptr = bt;
}
*/
void setup_bonded_threading(t_forcerec *fr, t_idef *idef);
-/*! \brief Initialize the bonded threading data structures */
-void init_bonded_threading(FILE *fplog, t_forcerec *fr, int nenergrp);
+/*! \brief Initialize the bonded threading data structures
+ *
+ * Allocates and initializes a bonded threading data structure.
+ * A pointer to this struct is returned as \p *bb_ptr.
+ */
+void init_bonded_threading(FILE *fplog, int nenergrp,
+ struct bonded_threading_t **bt_ptr);
#ifdef __cplusplus
}
}
}
-static void reduce_thread_forces(int n, rvec *f,
- tensor vir_q, tensor vir_lj,
- real *Vcorr_q, real *Vcorr_lj,
- real *dvdl_q, real *dvdl_lj,
- int nthreads, f_thread_t *f_t)
+static void reduce_thread_energies(tensor vir_q, tensor vir_lj,
+ real *Vcorr_q, real *Vcorr_lj,
+ real *dvdl_q, real *dvdl_lj,
+ int nthreads,
+ ewald_corr_thread_t *ewc_t)
{
- int t, i;
- int nthreads_loop gmx_unused;
+ int t;
- // cppcheck-suppress unreadVariable
- nthreads_loop = gmx_omp_nthreads_get(emntBonded);
- /* This reduction can run over any number of threads */
-#pragma omp parallel for num_threads(nthreads_loop) private(t) schedule(static)
- for (i = 0; i < n; i++)
- {
- for (t = 1; t < nthreads; t++)
- {
- rvec_inc(f[i], f_t[t].f[i]);
- }
- }
for (t = 1; t < nthreads; t++)
{
- *Vcorr_q += f_t[t].Vcorr_q;
- *Vcorr_lj += f_t[t].Vcorr_lj;
- *dvdl_q += f_t[t].dvdl[efptCOUL];
- *dvdl_lj += f_t[t].dvdl[efptVDW];
- m_add(vir_q, f_t[t].vir_q, vir_q);
- m_add(vir_lj, f_t[t].vir_lj, vir_lj);
+ *Vcorr_q += ewc_t[t].Vcorr_q;
+ *Vcorr_lj += ewc_t[t].Vcorr_lj;
+ *dvdl_q += ewc_t[t].dvdl[efptCOUL];
+ *dvdl_lj += ewc_t[t].dvdl[efptVDW];
+ m_add(vir_q, ewc_t[t].vir_q, vir_q);
+ m_add(vir_lj, ewc_t[t].vir_lj, vir_lj);
}
}
gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
}
- nthreads = gmx_omp_nthreads_get(emntBonded);
+ nthreads = fr->nthread_ewc;
#pragma omp parallel for num_threads(nthreads) schedule(static)
for (t = 0; t < nthreads; t++)
{
- int i;
- rvec *fnv;
tensor *vir_q, *vir_lj;
real *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj;
if (t == 0)
{
- fnv = fr->f_novirsum;
vir_q = &fr->vir_el_recip;
vir_lj = &fr->vir_lj_recip;
Vcorrt_q = &Vcorr_q;
}
else
{
- fnv = fr->f_t[t].f;
- vir_q = &fr->f_t[t].vir_q;
- vir_lj = &fr->f_t[t].vir_lj;
- Vcorrt_q = &fr->f_t[t].Vcorr_q;
- Vcorrt_lj = &fr->f_t[t].Vcorr_lj;
- dvdlt_q = &fr->f_t[t].dvdl[efptCOUL];
- dvdlt_lj = &fr->f_t[t].dvdl[efptVDW];
- for (i = 0; i < fr->natoms_force; i++)
- {
- clear_rvec(fnv[i]);
- }
+ vir_q = &fr->ewc_t[t].vir_q;
+ vir_lj = &fr->ewc_t[t].vir_lj;
+ Vcorrt_q = &fr->ewc_t[t].Vcorr_q;
+ Vcorrt_lj = &fr->ewc_t[t].Vcorr_lj;
+ dvdlt_q = &fr->ewc_t[t].dvdl[efptCOUL];
+ dvdlt_lj = &fr->ewc_t[t].dvdl[efptVDW];
clear_mat(*vir_q);
clear_mat(*vir_lj);
}
*dvdlt_q = 0;
*dvdlt_lj = 0;
+ /* Threading is only supported with the Verlet cut-off
+ * scheme and then only single particle forces (no
+ * exclusion forces) are calculated, so we can store
+ * the forces in the normal, single fr->f_novirsum array.
+ */
ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1],
cr, t, fr,
md->chargeA, md->chargeB,
excl, x, bSB ? boxs : box, mu_tot,
ir->ewald_geometry,
ir->epsilon_surface,
- fnv, *vir_q, *vir_lj,
+ fr->f_novirsum, *vir_q, *vir_lj,
Vcorrt_q, Vcorrt_lj,
lambda[efptCOUL], lambda[efptVDW],
dvdlt_q, dvdlt_lj);
}
if (nthreads > 1)
{
- reduce_thread_forces(fr->natoms_force, fr->f_novirsum,
- fr->vir_el_recip, fr->vir_lj_recip,
- &Vcorr_q, &Vcorr_lj,
- &dvdl_long_range_correction_q,
- &dvdl_long_range_correction_lj,
- nthreads, fr->f_t);
+ reduce_thread_energies(fr->vir_el_recip, fr->vir_lj_recip,
+ &Vcorr_q, &Vcorr_lj,
+ &dvdl_long_range_correction_q,
+ &dvdl_long_range_correction_lj,
+ nthreads, fr->ewc_t);
}
wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
}
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef GMX_MDLIB_FORCEREC_THREADING_H
#define GMX_MDLIB_FORCEREC_THREADING_H
-#include "gromacs/utility/bitmask.h"
-
#ifdef __cplusplus
extern "C" {
#endif
-struct f_thread_t {
- rvec *f;
- int f_nalloc;
- gmx_bitmask_t red_mask; /* Mask for marking which parts of f are filled */
- rvec *fshift;
- real ener[F_NRE];
- gmx_grppairener_t grpp;
+struct ewald_corr_thread_t {
real Vcorr_q;
real Vcorr_lj;
real dvdl[efptNR];
}
/* Initialize the thread working data for bonded interactions */
- init_bonded_threading(fp, fr, mtop->groups.grps[egcENER].nr);
+ init_bonded_threading(fp, mtop->groups.grps[egcENER].nr,
+ &fr->bonded_threading);
- snew(fr->excl_load, fr->nthreads+1);
+ fr->nthread_ewc = gmx_omp_nthreads_get(emntBonded);
+ snew(fr->ewc_t, fr->nthread_ewc);
+ snew(fr->excl_load, fr->nthread_ewc + 1);
/* fr->ic is used both by verlet and group kernels (to some extent) now */
init_interaction_const(fp, &fr->ic, fr);
fr->excl_load[0] = 0;
n = 0;
i = 0;
- for (t = 1; t <= fr->nthreads; t++)
+ for (t = 1; t <= fr->nthread_ewc; t++)
{
- ntarget = (ntot*t)/fr->nthreads;
+ ntarget = (ntot*t)/fr->nthread_ewc;
while (i < top->excls.nr && n < ntarget)
{
for (j = ind[i]; j < ind[i+1]; j++)