# To help us fund GROMACS development, we humbly ask that you cite
# the research papers on the package. Check out http://www.gromacs.org.
-file(GLOB EWALD_SOURCES *.c *cpp)
+file(GLOB EWALD_SOURCES *.cpp)
set(LIBGROMACS_SOURCES ${LIBGROMACS_SOURCES} ${EWALD_SOURCES} PARENT_SCOPE)
if (BUILD_TESTING)
#include <math.h>
-#include "gromacs/ewald/pme-internal.h"
-#include "gromacs/legacyheaders/macros.h"
+#include <algorithm>
+
#include "gromacs/math/vec.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/smalloc.h"
+#include "pme-internal.h"
+
static void make_dft_mod(real *mod, real *data, int ndata)
{
int i, j;
void make_bspline_moduli(splinevec bsp_mod,
int nx, int ny, int nz, int order)
{
- int nmax = max(nx, max(ny, nz));
+ int nmax = std::max(nx, std::max(ny, nz));
real *data, *ddata, *bsp_data;
int i, k, l;
real div;
#ifndef GMX_EWALD_CALCULATE_SPLINE_MODULI_H
#define GMX_EWALD_CALCULATE_SPLINE_MODULI_H
-#include "gromacs/ewald/pme-internal.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include "pme-internal.h"
/* Calulate plain SPME B-spline interpolation */
void make_bspline_moduli(splinevec bsp_mod,
void make_p3m_bspline_moduli(splinevec bsp_mod,
int nx, int ny, int nz, int order);
-#ifdef __cplusplus
-}
-#endif
-
#endif
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
+/*! \internal \file
+ *
+ * \brief This file contains function definitions necessary for
+ * computing energies and forces for the plain-Ewald long-ranged part,
+ * and the correction for overall system charge for all Ewald-family
+ * methods.
+ *
+ * \author David van der Spoel <david.vanderspoel@icm.uu.se>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_ewald
+ */
#include "gmxpre.h"
#include "ewald.h"
#include <math.h>
#include <stdio.h>
-#include "gromacs/legacyheaders/macros.h"
+#include <cstdlib>
+
+#include <algorithm>
+
#include "gromacs/legacyheaders/types/commrec.h"
#include "gromacs/legacyheaders/types/inputrec.h"
#include "gromacs/math/gmxcomplex.h"
void init_ewald_tab(struct gmx_ewald_tab_t **et, const t_inputrec *ir, FILE *fp)
{
- int n;
-
snew(*et, 1);
if (fp)
{
(*et)->nx = ir->nkx+1;
(*et)->ny = ir->nky+1;
(*et)->nz = ir->nkz+1;
- (*et)->kmax = max((*et)->nx, max((*et)->ny, (*et)->nz));
+ (*et)->kmax = std::max((*et)->nx, std::max((*et)->ny, (*et)->nz));
(*et)->eir = NULL;
(*et)->tab_xy = NULL;
(*et)->tab_qxyz = NULL;
}
-/* the other routines are in complex.h */
-static t_complex conjmul(t_complex a, t_complex b)
-{
- t_complex c;
-
- c.re = a.re*b.re + a.im*b.im;
- c.im = a.im*b.re - a.re*b.im;
-
- return c;
-}
-
-static void tabulate_eir(int natom, rvec x[], int kmax, cvec **eir, rvec lll)
+//! Make tables for the structure factor parts
+static void tabulateStructureFactors(int natom, rvec x[], int kmax, cvec **eir, rvec lll)
{
int i, j, m;
clear_mat(lrvir);
calc_lll(box, lll);
- /* make tables for the structure factor parts */
- tabulate_eir(natoms, x, et->kmax, et->eir, lll);
+ tabulateStructureFactors(natoms, x, et->kmax, et->eir, lll);
for (q = 0; q < (bFreeEnergy ? 2 : 1); q++)
{
{
for (n = 0; n < natoms; n++)
{
- et->tab_xy[n] = conjmul(et->eir[ix][n][XX], et->eir[-iy][n][YY]);
+ et->tab_xy[n] = cmul(et->eir[ix][n][XX], conjugate(et->eir[-iy][n][YY]));
}
}
for (iz = lowiz; iz < et->nz; iz++)
{
for (n = 0; n < natoms; n++)
{
- et->tab_qxyz[n] = rcmul(charge[n], conjmul(et->tab_xy[n],
- et->eir[-iz][n][ZZ]));
+ et->tab_qxyz[n] = rcmul(charge[n], cmul(et->tab_xy[n],
+ conjugate(et->eir[-iz][n][ZZ])));
}
}
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
+/*! \libinternal \defgroup module_ewald Ewald-family treatments of long-ranged forces
+ * \ingroup group_mdrun
+ *
+ * \brief Computes energies and forces for long-ranged interactions
+ * using the Ewald decomposition. Includes plain Ewald, PME, P3M for
+ * Coulomb, PME for Lennard-Jones, load-balancing for PME, and
+ * supporting code.
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \author Erik Lindahl <erik@kth.se>
+ * \author Roland Schulz <roland@rschulz.eu>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \author Christan Wennberg <cwennberg@kth.se>
+ */
+
+/*! \libinternal \file
+ *
+ * \brief This file contains function declarations necessary for
+ * computing energies and forces for the plain-Ewald long-ranged part,
+ * and the correction for overall system charge for all Ewald-family
+ * methods.
+ *
+ * \author David van der Spoel <david.vanderspoel@icm.uu.se>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_ewald
+ */
#ifndef GMX_EWALD_EWALD_H
#define GMX_EWALD_EWALD_H
#include "gromacs/math/vectypes.h"
#include "gromacs/utility/real.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
-
/* Forward declaration of type for managing Ewald tables */
struct gmx_ewald_tab_t;
ewald_charge_correction(t_commrec *cr, t_forcerec *fr, real lambda, matrix box,
real *dvdlambda, tensor vir);
-#ifdef __cplusplus
-}
-#endif
-
#endif
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
double Vexcl_q, dvdl_excl_q, dvdl_excl_lj; /* Necessary for precision */
double Vexcl_lj;
real one_4pi_eps;
- real v, vc, qiA, qiB, dr2, rinv, enercorr;
+ real v, vc, qiA, qiB, dr2, rinv;
real Vself_q[2], Vself_lj[2], Vdipole[2], rinv2, ewc_q = fr->ewaldcoeff_q, ewcdr;
real ewc_lj = fr->ewaldcoeff_lj, ewc_lj2 = ewc_lj * ewc_lj;
real c6Ai = 0, c6Bi = 0, c6A = 0, c6B = 0, ewcdr2, ewcdr4, c6L = 0, rinv6;
rvec df, dx, mutot[2], dipcorrA, dipcorrB;
- tensor dxdf_q, dxdf_lj;
- real vol = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
+ tensor dxdf_q = {{0}}, dxdf_lj = {{0}};
+ real vol = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
real L1_q, L1_lj, dipole_coeff, qqA, qqB, qqL, vr0_q, vr0_lj = 0;
gmx_bool bMolPBC = fr->bMolPBC;
gmx_bool bDoingLBRule = (fr->ljpme_combination_rule == eljpmeLB);
+ gmx_bool bNeedLongRangeCorrection;
/* This routine can be made faster by using tables instead of analytical interactions
* However, that requires a thorough verification that they are correct in all cases.
fprintf(debug, "mutot = %8.3f %8.3f %8.3f\n",
mutot[0][XX], mutot[0][YY], mutot[0][ZZ]);
}
- clear_mat(dxdf_q);
- if (EVDW_PME(fr->vdwtype))
- {
- clear_mat(dxdf_lj);
- }
- if ((calc_excl_corr || dipole_coeff != 0) && !bHaveChargeOrTypePerturbed)
+ bNeedLongRangeCorrection = (calc_excl_corr || dipole_coeff != 0);
+ if (bNeedLongRangeCorrection && !bHaveChargeOrTypePerturbed)
{
for (i = start; (i < end); i++)
{
}
if (qqA != 0.0 || c6A != 0.0)
{
- real fscal;
-
- fscal = 0;
rvec_sub(x[i], x[k], dx);
if (bMolPBC)
{
rinv2 = rinv*rinv;
if (qqA != 0.0)
{
- real dr;
+ real dr, fscal;
dr = 1.0/rinv;
ewcdr = ewc_q*dr;
if (c6A != 0.0)
{
+ real fscal;
+
rinv6 = rinv2*rinv2*rinv2;
ewcdr2 = ewc_lj2*dr2;
ewcdr4 = ewcdr2*ewcdr2;
}
}
}
- else if (calc_excl_corr || dipole_coeff != 0)
+ else if (bNeedLongRangeCorrection)
{
for (i = start; (i < end); i++)
{
{
real fscal;
- fscal = 0;
qqL = L1_q*qqA + lambda_q*qqB;
if (EVDW_PME(fr->vdwtype))
{
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
+/*! \libinternal \file
+ *
+ * \brief This file contains function declarations necessary for
+ * computing energies and forces for the PME long-ranged part (Coulomb
+ * and LJ).
+ *
+ * \author Erik Lindahl <erik@kth.se>
+ * \author Berk Hess <hess@kth.se>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_ewald
+ */
#ifndef GMX_EWALD_LONG_RANGE_CORRECTION_H
#define GMX_EWALD_LONG_RANGE_CORRECTION_H
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/real.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
-
/*! \brief Calculate long-range Ewald correction terms.
*
* For the group cutoff scheme (only), calculates the correction to
real lambda_q, real lambda_lj,
real *dvdlambda_q, real *dvdlambda_lj);
-#ifdef __cplusplus
-}
-#endif
-
#endif
#include "gmxpre.h"
-#include "pme-gather.h"
-
-#include "gromacs/ewald/pme-internal.h"
-#include "gromacs/ewald/pme-simd.h"
-#include "gromacs/ewald/pme-spline-work.h"
#include "gromacs/math/vec.h"
#include "gromacs/simd/simd.h"
#include "gromacs/utility/smalloc.h"
+#include "pme-internal.h"
+#include "pme-simd.h"
+#include "pme-spline-work.h"
+
#define DO_FSPLINE(order) \
for (ithx = 0; (ithx < order); ithx++) \
{ \
real scale)
{
/* sum forces for local particles */
- int nn, n, ithx, ithy, ithz, i0, j0, k0;
- int index_x, index_xy;
- int nx, ny, nz, pny, pnz;
- int * idxptr;
- real tx, ty, dx, dy, coefficient;
- real fx, fy, fz, gval;
- real fxy1, fz1;
- real *thx, *thy, *thz, *dthx, *dthy, *dthz;
- int norder;
- real rxx, ryx, ryy, rzx, rzy, rzz;
- int order;
+ int nn, n, ithx, ithy, ithz, i0, j0, k0;
+ int index_x, index_xy;
+ int nx, ny, nz, pny, pnz;
+ int *idxptr;
+ real tx, ty, dx, dy, coefficient;
+ real fx, fy, fz, gval;
+ real fxy1, fz1;
+ real *thx, *thy, *thz, *dthx, *dthy, *dthz;
+ int norder;
+ real rxx, ryx, ryy, rzx, rzy, rzz;
+ int order;
#ifdef PME_SIMD4_SPREAD_GATHER
// cppcheck-suppress unreadVariable cppcheck seems not to analyze code from pme-simd4.h
#define PME_GATHER_F_SIMD4_ALIGNED
#define PME_ORDER 4
#endif
-#include "gromacs/ewald/pme-simd4.h"
+#include "pme-simd4.h"
#else
DO_FSPLINE(4);
#endif
#ifdef PME_SIMD4_SPREAD_GATHER
#define PME_GATHER_F_SIMD4_ALIGNED
#define PME_ORDER 5
-#include "gromacs/ewald/pme-simd4.h"
+#include "pme-simd4.h"
#else
DO_FSPLINE(5);
#endif
#ifndef GMX_EWALD_PME_GATHER_H
#define GMX_EWALD_PME_GATHER_H
-#include "gromacs/ewald/pme-internal.h"
#include "gromacs/utility/real.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include "pme-internal.h"
void
gather_f_bsplines(struct gmx_pme_t *pme, real *grid,
gather_energy_bsplines(struct gmx_pme_t *pme, real *grid,
pme_atomcomm_t *atc);
-#ifdef __cplusplus
-}
-#endif
-
#endif
#include "config.h"
-#include "gromacs/ewald/pme-internal.h"
-#include "gromacs/ewald/pme-simd.h"
+#include <cstdlib>
+
#include "gromacs/ewald/pme.h"
-#include "gromacs/legacyheaders/macros.h"
#include "gromacs/math/vec.h"
#include "gromacs/timing/cyclecounter.h"
#include "gromacs/utility/smalloc.h"
#include "gromacs/utility/futil.h"
#endif
+#include "pme-simd.h"
+
/* GMX_CACHE_SEP should be a multiple of the SIMD and SIMD4 register size
* to preserve alignment.
*/
{
ivec local_fft_ndata, local_fft_offset, local_fft_size;
ivec local_pme_size;
- int i, ix, iy, iz;
+ int ix, iy, iz;
int pmeidx, fftidx;
/* Dimensions should be identical for A/B grid, so we just use A here */
void wrap_periodic_pmegrid(struct gmx_pme_t *pme, real *pmegrid)
{
- int nx, ny, nz, pnx, pny, pnz, ny_x, overlap, ix, iy, iz;
+ int nx, ny, nz, pny, pnz, ny_x, overlap, ix, iy, iz;
nx = pme->nkx;
ny = pme->nky;
nz = pme->nkz;
- pnx = pme->pmegrid_nx;
pny = pme->pmegrid_ny;
pnz = pme->pmegrid_nz;
void unwrap_periodic_pmegrid(struct gmx_pme_t *pme, real *pmegrid)
{
- int nx, ny, nz, pnx, pny, pnz, ny_x, overlap, ix;
+ int nx, ny, nz, pny, pnz, ny_x, overlap, ix;
nx = pme->nkx;
ny = pme->nky;
nz = pme->nkz;
- pnx = pme->pmegrid_nx;
pny = pme->pmegrid_ny;
pnz = pme->pmegrid_nz;
env = getenv("GMX_PME_THREAD_DIVISION");
if (env != NULL)
{
- sscanf(env, "%d %d %d", &nsub[XX], &nsub[YY], &nsub[ZZ]);
+ sscanf(env, "%20d %20d %20d", &nsub[XX], &nsub[YY], &nsub[ZZ]);
}
if (nsub[XX]*nsub[YY]*nsub[ZZ] != nthread)
int overlap_x,
int overlap_y)
{
- ivec n, n_base, g0, g1;
+ ivec n, n_base;
int t, x, y, z, d, i, tfac;
int max_comm_lines = -1;
#include "config.h"
-#include "gromacs/ewald/pme-internal.h"
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/real.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include "pme-internal.h"
#ifdef GMX_MPI
void
void
dump_local_fftgrid(struct gmx_pme_t *pme, const real *fftgrid);
-#ifdef __cplusplus
-}
-#endif
-
#endif
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
+/*! \internal \file
+ *
+ * \brief This file contains function declarations necessary for
+ * computing energies and forces for the PME long-ranged part (Coulomb
+ * and LJ).
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_ewald
+ */
/* TODO This file is a temporary holding area for stuff local to the
* PME code, before it acquires some more normal ewald/file.c and
#include "gromacs/timing/walltime_accounting.h"
#include "gromacs/utility/gmxmpi.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+//@{
+//! Grid indices for A state for charge and Lennard-Jones C6
+#define PME_GRID_QA 0
+#define PME_GRID_C6A 2
+//@}
-#define PME_GRID_QA 0 /* Gridindex for A-state for Q */
-#define PME_GRID_C6A 2 /* Gridindex for A-state for LJ */
+//@{
+/*! \brief Flags that indicate the number of PME grids in use */
#define DO_Q 2 /* Electrostatic grids have index q<2 */
#define DO_Q_AND_LJ 4 /* non-LB LJ grids have index 2 <= q < 4 */
#define DO_Q_AND_LJ_LB 9 /* With LB rules we need a total of 2+7 grids */
+//@}
-/* Pascal triangle coefficients scaled with (1/2)^6 for LJ-PME with LB-rules */
+/*! \brief Pascal triangle coefficients scaled with (1/2)^6 for LJ-PME with LB-rules */
static const real lb_scale_factor[] = {
1.0/64, 6.0/64, 15.0/64, 20.0/64,
15.0/64, 6.0/64, 1.0/64
};
-/* Pascal triangle coefficients used in solve_pme_lj_yzx, only need to do 4 calculations due to symmetry */
+/*! \brief Pascal triangle coefficients used in solve_pme_lj_yzx, only need to do 4 calculations due to symmetry */
static const real lb_scale_factor_symm[] = { 2.0/64, 12.0/64, 30.0/64, 20.0/64 };
-/* We only define a maximum to be able to use local arrays without allocation.
+/*! \brief We only define a maximum to be able to use local arrays without allocation.
* An order larger than 12 should never be needed, even for test cases.
* If needed it can be changed here.
*/
#define PME_ORDER_MAX 12
+/*! \brief As gmx_pme_init, but takes most settings, except the grid, from pme_src */
int gmx_pme_reinit(struct gmx_pme_t **pmedata,
t_commrec * cr,
struct gmx_pme_t * pme_src,
const t_inputrec * ir,
ivec grid_size);
-/* As gmx_pme_init, but takes most settings, except the grid, from pme_src */
/* The following three routines are for PME/PP node splitting in pme_pp.c */
-/* Abstract type for PME <-> PP communication */
+/*! \brief Abstract type for PME <-> PP communication */
typedef struct gmx_pme_pp *gmx_pme_pp_t;
-/* Internal datastructures */
+/* Temporary suppression until these structs become opaque and don't live in
+ * a header that is included by other headers. Also, until then I have no
+ * idea what some of the names mean. */
+
+//! @cond Doxygen_Suppress
+
+/*! \brief Data structure for grid communication */
typedef struct {
int send_index0;
int send_nindex;
int recv_size; /* Receive buffer width, used with OpenMP */
} pme_grid_comm_t;
+/*! \brief Data structure for grid overlap communication */
typedef struct {
#ifdef GMX_MPI
MPI_Comm mpi_comm;
real *recvbuf;
} pme_overlap_t;
+/*! \brief Data structure for organizing particle allocation to threads */
typedef struct {
int *n; /* Cumulative counts of the number of particles per thread */
int nalloc; /* Allocation size of i */
int *i; /* Particle indices ordered on thread index (n) */
} thread_plist_t;
+/*! \brief Helper typedef for spline vectors */
typedef real *splinevec[DIM];
+/*! \brief Data structure for beta-spline interpolation */
typedef struct {
int *thread_one;
int n;
real *ptr_dtheta_z;
} splinedata_t;
+/*! \brief Data structure for coordinating transfer between PP and PME ranks*/
typedef struct {
int dimind; /* The index of the dimension, 0=x, 1=y */
int nslab;
splinedata_t *spline;
} pme_atomcomm_t;
+/*! \brief Data structure for a single PME grid */
typedef struct {
ivec ci; /* The spatial location of this grid */
ivec n; /* The used size of *grid, including order-1 */
real *grid; /* The grid local thread, size n */
} pmegrid_t;
+/*! \brief Data structures for PME grids */
typedef struct {
pmegrid_t grid; /* The full node grid (non thread-local) */
int nthread; /* The number of threads operating on this grid */
ivec nthread_comm; /* The number of threads to communicate with */
} pmegrids_t;
+/*! \brief Data structure for spline-interpolation working buffers */
struct pme_spline_work;
+/*! \brief Data structure for working buffers */
struct pme_solve_work_t;
+/*! \brief Master PME data structure */
typedef struct gmx_pme_t {
int ndecompdim; /* The number of decomposition dimensions */
int nodeid; /* Our nodeid in mpi->mpi_comm */
real * sum_qgrid_dd_tmp;
} t_gmx_pme_t;
-void gmx_pme_check_restrictions(int pme_order,
- int nkx, int nky, int nkz,
- int nnodes_major,
- int nnodes_minor,
- gmx_bool bUseThreads,
- gmx_bool bFatal,
- gmx_bool *bValidSettings);
-/* Check restrictions on pme_order and the PME grid nkx,nky,nkz.
+//! @endcond
+
+/*! \brief Check restrictions on pme_order and the PME grid nkx,nky,nkz.
+ *
* With bFatal=TRUE, a fatal error is generated on violation,
* bValidSettings=NULL can be passed.
* With bFatal=FALSE, *bValidSettings reports the validity of the settings.
* If at calling you bUseThreads is unknown, pass TRUE for conservative
* checking.
*/
+void gmx_pme_check_restrictions(int pme_order,
+ int nkx, int nky, int nkz,
+ int nnodes_major,
+ int nnodes_minor,
+ gmx_bool bUseThreads,
+ gmx_bool bFatal,
+ gmx_bool *bValidSettings);
+/*! \brief Initialize the PME-only side of the PME <-> PP communication */
gmx_pme_pp_t gmx_pme_pp_init(t_commrec *cr);
-/* Initialize the PME-only side of the PME <-> PP communication */
+/*! \brief Tell our PME-only node to switch to a new grid size */
void gmx_pme_send_switchgrid(t_commrec *cr, ivec grid_size, real ewaldcoeff_q, real ewaldcoeff_lj);
-/* Tell our PME-only node to switch to a new grid size */
-/* Return values for gmx_pme_recv_q_x */
+/*! \brief Return values for gmx_pme_recv_q_x */
enum {
pmerecvqxX, /* calculate PME mesh interactions for new x */
pmerecvqxFINISH, /* the simulation should finish, we should quit */
pmerecvqxRESETCOUNTERS /* reset the cycle and flop counters */
};
-int gmx_pme_recv_coeffs_coords(gmx_pme_pp_t pme_pp,
+/*! \brief Called by PME-only ranks to receive coefficients and coordinates
+ *
+ * The return value is used to control further processing, with meanings:
+ * pmerecvqxX: all parameters set, chargeA and chargeB can be NULL
+ * pmerecvqxFINISH: no parameters set
+ * pmerecvqxSWITCHGRID: only grid_size and *ewaldcoeff are set
+ * pmerecvqxRESETCOUNTERS: *step is set
+ */
+int gmx_pme_recv_coeffs_coords(struct gmx_pme_pp *pme_pp,
int *natoms,
real **chargeA, real **chargeB,
real **sqrt_c6A, real **sqrt_c6B,
gmx_bool *bEnerVir, int *pme_flags,
gmx_int64_t *step,
ivec grid_size, real *ewaldcoeff_q, real *ewaldcoeff_lj);
-;
-/* With return value:
- * pmerecvqxX: all parameters set, chargeA and chargeB can be NULL
- * pmerecvqxFINISH: no parameters set
- * pmerecvqxSWITCHGRID: only grid_size and *ewaldcoeff are set
- * pmerecvqxRESETCOUNTERS: *step is set
- */
-void gmx_pme_send_force_vir_ener(gmx_pme_pp_t pme_pp,
+/*! \brief Send the PME mesh force, virial and energy to the PP-only nodes */
+void gmx_pme_send_force_vir_ener(struct gmx_pme_pp *pme_pp,
rvec *f, matrix vir_q, real energy_q,
matrix vir_lj, real energy_lj,
real dvdlambda_q, real dvdlambda_lj,
float cycles);
-/* Send the PME mesh force, virial and energy to the PP-only nodes */
-
-#ifdef __cplusplus
-}
-#endif
#endif
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
+/*! \internal \file
+ *
+ * \brief This file contains function definitions necessary for
+ * managing automatic load balance of PME calculations (Coulomb and
+ * LJ).
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \ingroup module_ewald
+ */
#include "gmxpre.h"
#include "pme-load-balancing.h"
#include "config.h"
+#include <cmath>
+
+#include <algorithm>
+
#include "gromacs/domdec/domdec.h"
-#include "gromacs/ewald/pme-internal.h"
#include "gromacs/legacyheaders/calcgrid.h"
#include "gromacs/legacyheaders/force.h"
-#include "gromacs/legacyheaders/macros.h"
#include "gromacs/legacyheaders/md_logging.h"
#include "gromacs/legacyheaders/network.h"
#include "gromacs/legacyheaders/sim_util.h"
#include "gromacs/utility/cstringutil.h"
#include "gromacs/utility/smalloc.h"
-/* Parameters and setting for one PP-PME setup */
-typedef struct {
- real rcut_coulomb; /* Coulomb cut-off */
- real rlist; /* pair-list cut-off */
- real rlistlong; /* LR pair-list cut-off */
- int nstcalclr; /* frequency of evaluating long-range forces for group scheme */
- real spacing; /* (largest) PME grid spacing */
- ivec grid; /* the PME grid dimensions */
- real grid_efficiency; /* ineffiency factor for non-uniform grids <= 1 */
- real ewaldcoeff_q; /* Electrostatic Ewald coefficient */
- real ewaldcoeff_lj; /* LJ Ewald coefficient, only for the call to send_switchgrid */
- struct gmx_pme_t *pmedata; /* the data structure used in the PME code */
- int count; /* number of times this setup has been timed */
- double cycles; /* the fastest time for this setup in cycles */
-} pme_setup_t;
-
-/* In the initial scan, step by grids that are at least a factor 0.8 coarser */
-#define PME_LB_GRID_SCALE_FAC 0.8
-/* In the initial scan, try to skip grids with uneven x/y/z spacing,
+#include "pme-internal.h"
+
+/*! \brief Parameters and settings for one PP-PME setup */
+struct pme_setup_t {
+ real rcut_coulomb; /**< Coulomb cut-off */
+ real rlist; /**< pair-list cut-off */
+ real rlistlong; /**< LR pair-list cut-off */
+ int nstcalclr; /**< frequency of evaluating long-range forces for group scheme */
+ real spacing; /**< (largest) PME grid spacing */
+ ivec grid; /**< the PME grid dimensions */
+ real grid_efficiency; /**< ineffiency factor for non-uniform grids <= 1 */
+ real ewaldcoeff_q; /**< Electrostatic Ewald coefficient */
+ real ewaldcoeff_lj; /**< LJ Ewald coefficient, only for the call to send_switchgrid */
+ struct gmx_pme_t *pmedata; /**< the data structure used in the PME code */
+ int count; /**< number of times this setup has been timed */
+ double cycles; /**< the fastest time for this setup in cycles */
+};
+
+/*! \brief In the initial scan, step by grids that are at least a factor 0.8 coarser */
+const real gridScaleFactor = 0.8;
+
+/*! \brief In the initial scan, try to skip grids with uneven x/y/z spacing,
* checking if the "efficiency" is more than 5% worse than the previous grid.
*/
-#define PME_LB_GRID_EFFICIENCY_REL_FAC 1.05
-/* Rerun up till 12% slower setups than the fastest up till now */
-#define PME_LB_SLOW_FAC 1.12
-/* If setups get more than 2% faster, do another round to avoid
+const real relativeEfficiencyFactor = 1.05;
+/*! \brief Rerun until a run is 12% slower setups than the fastest run so far */
+const real maxRelativeSlowdownAccepted = 1.12;
+/*! \brief If setups get more than 2% faster, do another round to avoid
* choosing a slower setup due to acceleration or fluctuations.
*/
-#define PME_LB_ACCEL_TOL 1.02
+const real maxFluctuationAccepted = 1.02;
-enum {
+/*! \brief Enumeration whose values describe the effect limiting the load balancing */
+enum epmelb {
epmelblimNO, epmelblimBOX, epmelblimDD, epmelblimPMEGRID, epmelblimNR
};
+/*! \brief Descriptive strings matching ::epmelb */
const char *pmelblim_str[epmelblimNR] =
{ "no", "box size", "domain decompostion", "PME grid restriction" };
-struct pme_load_balancing {
- int nstage; /* the current maximum number of stages */
-
- real cut_spacing; /* the minimum cutoff / PME grid spacing ratio */
- real rcut_vdw; /* Vdw cutoff (does not change) */
- real rcut_coulomb_start; /* Initial electrostatics cutoff */
- int nstcalclr_start; /* Initial electrostatics cutoff */
- real rbuf_coulomb; /* the pairlist buffer size */
- real rbuf_vdw; /* the pairlist buffer size */
- matrix box_start; /* the initial simulation box */
- int n; /* the count of setup as well as the allocation size */
- pme_setup_t *setup; /* the PME+cutoff setups */
- int cur; /* the current setup */
- int fastest; /* fastest setup up till now */
- int start; /* start of setup range to consider in stage>0 */
- int end; /* end of setup range to consider in stage>0 */
- int elimited; /* was the balancing limited, uses enum above */
- int cutoff_scheme; /* Verlet or group cut-offs */
-
- int stage; /* the current stage */
+struct pme_load_balancing_t {
+ int nstage; /**< the current maximum number of stages */
+
+ real cut_spacing; /**< the minimum cutoff / PME grid spacing ratio */
+ real rcut_vdw; /**< Vdw cutoff (does not change) */
+ real rcut_coulomb_start; /**< Initial electrostatics cutoff */
+ int nstcalclr_start; /**< Initial electrostatics cutoff */
+ real rbuf_coulomb; /**< the pairlist buffer size */
+ real rbuf_vdw; /**< the pairlist buffer size */
+ matrix box_start; /**< the initial simulation box */
+ int n; /**< the count of setup as well as the allocation size */
+ pme_setup_t *setup; /**< the PME+cutoff setups */
+ int cur; /**< the current setup */
+ int fastest; /**< fastest setup up till now */
+ int start; /**< start of setup range to consider in stage>0 */
+ int end; /**< end of setup range to consider in stage>0 */
+ int elimited; /**< was the balancing limited, uses enum above */
+ int cutoff_scheme; /**< Verlet or group cut-offs */
+
+ int stage; /**< the current stage */
};
-void pme_loadbal_init(pme_load_balancing_t *pme_lb_p,
+void pme_loadbal_init(pme_load_balancing_t **pme_lb_p,
const t_inputrec *ir, matrix box,
const interaction_const_t *ic,
struct gmx_pme_t *pmedata)
{
- pme_load_balancing_t pme_lb;
- real spm, sp;
- int d;
+ pme_load_balancing_t *pme_lb;
+ real spm, sp;
+ int d;
snew(pme_lb, 1);
*pme_lb_p = pme_lb;
}
-static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t pme_lb,
+/*! \brief Try to increase the cutoff during load balancing */
+static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t *pme_lb,
int pme_order,
const gmx_domdec_t *dd)
{
{
tmpr_coulomb = set->rcut_coulomb + pme_lb->rbuf_coulomb;
tmpr_vdw = pme_lb->rcut_vdw + pme_lb->rbuf_vdw;
- set->rlist = min(tmpr_coulomb, tmpr_vdw);
- set->rlistlong = max(tmpr_coulomb, tmpr_vdw);
+ set->rlist = std::min(tmpr_coulomb, tmpr_vdw);
+ set->rlistlong = std::max(tmpr_coulomb, tmpr_vdw);
/* Set the long-range update frequency */
if (set->rlist == set->rlistlong)
return TRUE;
}
+/*! \brief Print the PME grid */
static void print_grid(FILE *fp_err, FILE *fp_log,
const char *pre,
const char *desc,
}
}
-static int pme_loadbal_end(pme_load_balancing_t pme_lb)
+/*! \brief Return the index of the last setup used in PME load balancing */
+static int pme_loadbal_end(pme_load_balancing_t *pme_lb)
{
/* In the initial stage only n is set; end is not set yet */
if (pme_lb->end > 0)
}
}
+/*! \brief Print descriptive string about what limits PME load balancing */
static void print_loadbal_limited(FILE *fp_err, FILE *fp_log,
gmx_int64_t step,
- pme_load_balancing_t pme_lb)
+ pme_load_balancing_t *pme_lb)
{
char buf[STRLEN], sbuf[22];
}
}
-static void switch_to_stage1(pme_load_balancing_t pme_lb)
+/*! \brief Switch load balancing to stage 1
+ *
+ * In this stage, only reasonably fast setups are run again. */
+static void switch_to_stage1(pme_load_balancing_t *pme_lb)
{
pme_lb->start = 0;
while (pme_lb->start+1 < pme_lb->n &&
(pme_lb->setup[pme_lb->start].count == 0 ||
pme_lb->setup[pme_lb->start].cycles >
- pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC))
+ pme_lb->setup[pme_lb->fastest].cycles*maxRelativeSlowdownAccepted))
{
pme_lb->start++;
}
pme_lb->end = pme_lb->n;
if (pme_lb->setup[pme_lb->end-1].count > 0 &&
pme_lb->setup[pme_lb->end-1].cycles >
- pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC)
+ pme_lb->setup[pme_lb->fastest].cycles*maxRelativeSlowdownAccepted)
{
pme_lb->end--;
}
pme_lb->cur = pme_lb->start - 1;
}
-gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
+gmx_bool pme_load_balance(pme_load_balancing_t *pme_lb,
t_commrec *cr,
FILE *fp_err,
FILE *fp_log,
}
else
{
- if (cycles*PME_LB_ACCEL_TOL < set->cycles &&
+ if (cycles*maxFluctuationAccepted < set->cycles &&
pme_lb->stage == pme_lb->nstage - 1)
{
/* The performance went up a lot (due to e.g. DD load balancing).
"Increased the number stages to %d"
" and ignoring the previous performance\n",
set->grid[XX], set->grid[YY], set->grid[ZZ],
- cycles*1e-6, set->cycles*1e-6, PME_LB_ACCEL_TOL,
+ cycles*1e-6, set->cycles*1e-6, maxFluctuationAccepted,
pme_lb->nstage);
}
}
- set->cycles = min(set->cycles, cycles);
+ set->cycles = std::min(set->cycles, cycles);
}
if (set->cycles < pme_lb->setup[pme_lb->fastest].cycles)
cycles_fast = pme_lb->setup[pme_lb->fastest].cycles;
/* Check in stage 0 if we should stop scanning grids.
- * Stop when the time is more than SLOW_FAC longer than the fastest.
+ * Stop when the time is more than maxRelativeSlowDownAccepted longer than the fastest.
*/
if (pme_lb->stage == 0 && pme_lb->cur > 0 &&
- cycles > pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC)
+ cycles > pme_lb->setup[pme_lb->fastest].cycles*maxRelativeSlowdownAccepted)
{
pme_lb->n = pme_lb->cur + 1;
/* Done with scanning, go to stage 1 */
!(pme_lb->setup[pme_lb->cur].grid[XX]*
pme_lb->setup[pme_lb->cur].grid[YY]*
pme_lb->setup[pme_lb->cur].grid[ZZ] <
- gridsize_start*PME_LB_GRID_SCALE_FAC
+ gridsize_start*gridScaleFactor
&&
pme_lb->setup[pme_lb->cur].grid_efficiency <
- pme_lb->setup[pme_lb->cur-1].grid_efficiency*PME_LB_GRID_EFFICIENCY_REL_FAC));
+ pme_lb->setup[pme_lb->cur-1].grid_efficiency*relativeEfficiencyFactor));
}
if (pme_lb->stage > 0 && pme_lb->end == 1)
}
while (pme_lb->stage == pme_lb->nstage - 1 &&
pme_lb->setup[pme_lb->cur].count > 0 &&
- pme_lb->setup[pme_lb->cur].cycles > cycles_fast*PME_LB_SLOW_FAC);
+ pme_lb->setup[pme_lb->cur].cycles > cycles_fast*maxRelativeSlowdownAccepted);
if (pme_lb->stage == pme_lb->nstage)
{
ic->ewaldcoeff_lj = set->ewaldcoeff_lj;
if (ic->vdw_modifier == eintmodPOTSHIFT)
{
- real crc2;
+ real crc2;
- ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
- ic->repulsion_shift.cpot = -pow(ic->rvdw, -12.0);
+ ic->dispersion_shift.cpot = -std::pow(static_cast<double>(ic->rvdw), -6.0);
+ ic->repulsion_shift.cpot = -std::pow(static_cast<double>(ic->rvdw), -12.0);
ic->sh_invrc6 = -ic->dispersion_shift.cpot;
crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw);
- ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
+ ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*std::pow(static_cast<double>(ic->rvdw), -6.0);
}
}
return TRUE;
}
-void restart_pme_loadbal(pme_load_balancing_t pme_lb, int n)
+void restart_pme_loadbal(pme_load_balancing_t *pme_lb, int n)
{
pme_lb->nstage += n;
}
+/*! \brief Return product of the number of PME grid points in each dimension */
static int pme_grid_points(const pme_setup_t *setup)
{
return setup->grid[XX]*setup->grid[YY]*setup->grid[ZZ];
}
+/*! \brief Retuern the largest short-range list cut-off radius */
static real pme_loadbal_rlist(const pme_setup_t *setup)
{
/* With the group cut-off scheme we can have twin-range either
}
}
+/*! \brief Print one load-balancing setting */
static void print_pme_loadbal_setting(FILE *fplog,
- char *name,
+ const char *name,
const pme_setup_t *setup)
{
fprintf(fplog,
setup->spacing, 1/setup->ewaldcoeff_q);
}
-static void print_pme_loadbal_settings(pme_load_balancing_t pme_lb,
- t_commrec *cr,
- FILE *fplog,
- gmx_bool bNonBondedOnGPU)
+/*! \brief Print all load-balancing settings */
+static void print_pme_loadbal_settings(pme_load_balancing_t *pme_lb,
+ t_commrec *cr,
+ FILE *fplog,
+ gmx_bool bNonBondedOnGPU)
{
- double pp_ratio, grid_ratio;
+ double pp_ratio, grid_ratio;
+ real pp_ratio_temporary;
- pp_ratio = pow(pme_loadbal_rlist(&pme_lb->setup[pme_lb->cur])/pme_loadbal_rlist(&pme_lb->setup[0]), 3.0);
- grid_ratio = pme_grid_points(&pme_lb->setup[pme_lb->cur])/
+ pp_ratio_temporary = pme_loadbal_rlist(&pme_lb->setup[pme_lb->cur])/pme_loadbal_rlist(&pme_lb->setup[0]);
+ pp_ratio = std::pow(static_cast<double>(pp_ratio_temporary), 3.0);
+ grid_ratio = pme_grid_points(&pme_lb->setup[pme_lb->cur])/
(double)pme_grid_points(&pme_lb->setup[0]);
fprintf(fplog, "\n");
}
}
-void pme_loadbal_done(pme_load_balancing_t pme_lb,
- t_commrec *cr, FILE *fplog,
- gmx_bool bNonBondedOnGPU)
+void pme_loadbal_done(pme_load_balancing_t *pme_lb,
+ t_commrec *cr,
+ FILE *fplog,
+ gmx_bool bNonBondedOnGPU)
{
if (fplog != NULL && (pme_lb->cur > 0 || pme_lb->elimited != epmelblimNO))
{
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
+/*! \libinternal \file
+ *
+ * \brief This file contains function declarations necessary for
+ * managing automatic load balance of PME calculations (Coulomb and
+ * LJ).
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \inlibraryapi
+ * \ingroup module_ewald
+ */
#ifndef GMX_EWALD_PME_LOAD_BALANCING_H
#define GMX_EWALD_PME_LOAD_BALANCING_H
#include "gromacs/legacyheaders/types/interaction_const.h"
#include "gromacs/legacyheaders/types/state.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct pme_load_balancing *pme_load_balancing_t;
+/*! \brief Object to manage PME load balancing */
+struct pme_load_balancing_t;
-/* Initialze the PP-PME load balacing data and infrastructure */
-void pme_loadbal_init(pme_load_balancing_t *pme_lb_p,
- const t_inputrec *ir, matrix box,
+/*! \brief Initialze the PP-PME load balacing data and infrastructure */
+void pme_loadbal_init(pme_load_balancing_t **pme_lb_p,
+ const t_inputrec *ir,
+ matrix box,
const interaction_const_t *ic,
- struct gmx_pme_t *pmedata);
+ struct gmx_pme_t *pmedata);
-/* Try to adjust the PME grid and Coulomb cut-off.
+/*! \brief Try to adjust the PME grid and Coulomb cut-off.
+ *
* The adjustment is done to generate a different non-bonded PP and PME load.
* With separate PME nodes (PP and PME on different processes) or with
* a GPU (PP on GPU, PME on CPU), PP and PME run on different resources
* times and acquiring enough statistics, the best performing setup is chosen.
* Here we try to take into account fluctuations and changes due to external
* factors as well as DD load balancing.
- * Returns TRUE the load balancing continues, FALSE is the balancing is done.
+ *
+ * \return TRUE the load balancing continues, FALSE is the balancing is done.
*/
-gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
+gmx_bool pme_load_balance(pme_load_balancing_t *pme_lb,
t_commrec *cr,
FILE *fp_err,
FILE *fp_log,
struct gmx_pme_t ** pmedata,
gmx_int64_t step);
-/* Restart the PME load balancing discarding all timings gathered up till now */
-void restart_pme_loadbal(pme_load_balancing_t pme_lb, int n);
+/*! \brief Restart the PME load balancing discarding all timings gathered up till now */
+void restart_pme_loadbal(pme_load_balancing_t *pme_lb, int n);
-/* Finish the PME load balancing and print the settings when fplog!=NULL */
-void pme_loadbal_done(pme_load_balancing_t pme_lb,
+/*! \brief Finish the PME load balancing and print the settings when fplog!=NULL */
+void pme_loadbal_done(pme_load_balancing_t *pme_lb,
t_commrec *cr, FILE *fplog,
gmx_bool bNonBondedOnGPU);
-#ifdef __cplusplus
-}
-#endif
-
#endif
#include <stdlib.h>
#include <string.h>
-#include "gromacs/ewald/pme-internal.h"
#include "gromacs/ewald/pme.h"
#include "gromacs/fft/parallel_3dfft.h"
#include "gromacs/fileio/pdbio.h"
-#include "gromacs/legacyheaders/macros.h"
#include "gromacs/legacyheaders/network.h"
#include "gromacs/legacyheaders/nrnb.h"
#include "gromacs/legacyheaders/txtdump.h"
#include "gromacs/utility/gmxomp.h"
#include "gromacs/utility/smalloc.h"
+#include "pme-internal.h"
+
static void reset_pmeonly_counters(gmx_wallcycle_t wcycle,
gmx_walltime_accounting_t walltime_accounting,
t_nrnb *nrnb, t_inputrec *ir,
int count;
gmx_bool bEnerVir;
int pme_flags;
- gmx_int64_t step, step_rel;
+ gmx_int64_t step;
ivec grid_switch;
/* This data will only use with PME tuning, i.e. switching PME grids */
break;
}
- step_rel = step - ir->init_step;
-
if (count == 0)
{
wallcycle_start(wcycle, ewcRUN);
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
+/*! \internal \file
+ *
+ * \brief This file contains function definitions necessary for
+ * managing the offload of long-ranged PME work to separate MPI rank,
+ * for computing energies and forces (Coulomb and LJ).
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \ingroup module_ewald
+ */
#include "gmxpre.h"
#include <string.h>
#include "gromacs/domdec/domdec.h"
-#include "gromacs/ewald/pme-internal.h"
#include "gromacs/ewald/pme.h"
#include "gromacs/legacyheaders/network.h"
#include "gromacs/legacyheaders/sighandler.h"
#include "gromacs/utility/gmxmpi.h"
#include "gromacs/utility/smalloc.h"
+#include "pme-internal.h"
+
+/*! \brief MPI Tags used to separate communication of different types of quantities */
enum {
eCommType_ChargeA, eCommType_ChargeB, eCommType_SQRTC6A, eCommType_SQRTC6B,
eCommType_SigmaA, eCommType_SigmaB, eCommType_NR, eCommType_COORD,
eCommType_CNB
};
-/* Some parts of the code(gmx_pme_send_q, gmx_pme_recv_q_x) assume
+//@{
+/*! \brief Flags used to coordinate PP-PME communication and computation phases
+ *
+ * Some parts of the code(gmx_pme_send_q, gmx_pme_recv_q_x) assume
* that the six first flags are exactly in this order.
* If more PP_PME_...-flags are to be introduced be aware of some of
* the PME-specific flags in pme.h. Currently, they are also passed
#define PME_PP_SIGSTOP (1<<0)
#define PME_PP_SIGSTOPNSS (1<<1)
+//@}
-typedef struct gmx_pme_pp {
+/*! \brief Master PP-PME communication data structure */
+struct gmx_pme_pp {
#ifdef GMX_MPI
- MPI_Comm mpi_comm_mysim;
+ MPI_Comm mpi_comm_mysim; /**< MPI communicator for this simulation */
#endif
- int nnode; /* The number of PP node to communicate with */
- int *node; /* The PP node ranks */
- int node_peer; /* The peer PP node rank */
- int *nat; /* The number of atom for each PP node */
- int flags_charge; /* The flags sent along with the last charges */
+ int nnode; /**< The number of PP node to communicate with */
+ int *node; /**< The PP node ranks */
+ int node_peer; /**< The peer PP node rank */
+ int *nat; /**< The number of atom for each PP node */
+ int flags_charge; /**< The flags sent along with the last charges */
+ //@{
+ /**< Vectors of A- and B-state parameters used to transfer vectors to PME ranks */
real *chargeA;
real *chargeB;
real *sqrt_c6A;
real *sqrt_c6B;
real *sigmaA;
real *sigmaB;
- rvec *x;
- rvec *f;
- int nalloc;
+ //@}
+ rvec *x; /**< Vector of atom coordinates to transfer to PME ranks */
+ rvec *f; /**< Vector of atom forces received from PME ranks */
+ int nalloc; /**< Allocation size of transfer vectors (>= \p nat) */
#ifdef GMX_MPI
+ //@{
+ /**< Vectors of MPI objects used in non-blocking communication between multiple PP ranks per PME rank */
MPI_Request *req;
MPI_Status *stat;
+ //@}
#endif
-} t_gmx_pme_pp;
+};
+/*! \brief Helper struct for PP-PME communication of parameters */
typedef struct gmx_pme_comm_n_box {
- int natoms;
- matrix box;
- int maxshift_x;
- int maxshift_y;
- real lambda_q;
- real lambda_lj;
- int flags;
- gmx_int64_t step;
- ivec grid_size; /* For PME grid tuning */
- real ewaldcoeff_q; /* For PME grid tuning */
+ int natoms; /**< Number of atoms */
+ matrix box; /**< Box */
+ int maxshift_x; /**< Maximum shift in x direction */
+ int maxshift_y; /**< Maximum shift in y direction */
+ real lambda_q; /**< Free-energy lambda for electrostatics */
+ real lambda_lj; /**< Free-energy lambda for Lennard-Jones */
+ int flags; /**< Control flags */
+ gmx_int64_t step; /**< MD integration step number */
+ //@{
+ /*! \brief Used in PME grid tuning */
+ ivec grid_size;
+ real ewaldcoeff_q;
real ewaldcoeff_lj;
+ //@}
} gmx_pme_comm_n_box_t;
+/*! \brief Helper struct for PP-PME communication of virial and energy */
typedef struct {
+ //@{
+ /*! \brief Virial, energy, and derivative of potential w.r.t. lambda for charge and Lennard-Jones */
matrix vir_q;
matrix vir_lj;
real energy_q;
real energy_lj;
real dvdlambda_q;
real dvdlambda_lj;
- float cycles;
- gmx_stop_cond_t stop_cond;
+ //@}
+ float cycles; /**< Counter of CPU cycles used */
+ gmx_stop_cond_t stop_cond; /**< Flag used in responding to an external signal to terminate */
} gmx_pme_comm_vir_ene_t;
-
-
-
-gmx_pme_pp_t gmx_pme_pp_init(t_commrec gmx_unused *cr)
+gmx_pme_pp_t gmx_pme_pp_init(t_commrec *cr)
{
struct gmx_pme_pp *pme_pp;
- int rank;
snew(pme_pp, 1);
#ifdef GMX_MPI
+ int rank;
+
pme_pp->mpi_comm_mysim = cr->mpi_comm_mysim;
MPI_Comm_rank(cr->mpi_comm_mygroup, &rank);
get_pme_ddnodes(cr, rank, &pme_pp->nnode, &pme_pp->node, &pme_pp->node_peer);
snew(pme_pp->stat, eCommType_NR*pme_pp->nnode);
pme_pp->nalloc = 0;
pme_pp->flags_charge = 0;
+#else
+ GMX_UNUSED_VALUE(cr);
#endif
return pme_pp;
}
-/* This should be faster with a real non-blocking MPI implementation */
+/*! \brief Block to wait for communication to PME ranks to complete
+ *
+ * This should be faster with a real non-blocking MPI implementation */
/* #define GMX_PME_DELAYED_WAIT */
static void gmx_pme_send_coeffs_coords_wait(gmx_domdec_t gmx_unused *dd)
#endif
}
+/*! \brief Send data to PME ranks */
static void gmx_pme_send_coeffs_coords(t_commrec *cr, int flags,
real gmx_unused *chargeA, real gmx_unused *chargeB,
real gmx_unused *c6A, real gmx_unused *c6B,
#endif
}
-int gmx_pme_recv_coeffs_coords(struct gmx_pme_pp *pme_pp,
- int *natoms,
- real **chargeA,
- real **chargeB,
- real **sqrt_c6A,
- real **sqrt_c6B,
- real **sigmaA,
- real **sigmaB,
- matrix gmx_unused box,
- rvec **x,
- rvec **f,
- int gmx_unused *maxshift_x,
- int gmx_unused *maxshift_y,
- gmx_bool gmx_unused *bFreeEnergy_q,
- gmx_bool gmx_unused *bFreeEnergy_lj,
- real gmx_unused *lambda_q,
- real gmx_unused *lambda_lj,
- gmx_bool gmx_unused *bEnerVir,
- int *pme_flags,
- gmx_int64_t gmx_unused *step,
- ivec gmx_unused grid_size,
- real gmx_unused *ewaldcoeff_q,
- real gmx_unused *ewaldcoeff_lj)
+int gmx_pme_recv_coeffs_coords(struct gmx_pme_pp *pme_pp,
+ int *natoms,
+ real **chargeA,
+ real **chargeB,
+ real **sqrt_c6A,
+ real **sqrt_c6B,
+ real **sigmaA,
+ real **sigmaB,
+ matrix box,
+ rvec **x,
+ rvec **f,
+ int *maxshift_x,
+ int *maxshift_y,
+ gmx_bool *bFreeEnergy_q,
+ gmx_bool *bFreeEnergy_lj,
+ real *lambda_q,
+ real *lambda_lj,
+ gmx_bool *bEnerVir,
+ int *pme_flags,
+ gmx_int64_t *step,
+ ivec grid_size,
+ real *ewaldcoeff_q,
+ real *ewaldcoeff_lj)
{
- gmx_pme_comm_n_box_t cnb;
- int nat = 0, q, messages, sender;
- real *charge_pp;
-
- messages = 0;
+ int nat = 0, status;
- /* avoid compiler warning about unused variable without MPI support */
- cnb.flags = 0;
*pme_flags = 0;
#ifdef GMX_MPI
+ gmx_pme_comm_n_box_t cnb;
+ int messages;
+
+ cnb.flags = 0;
+ messages = 0;
do
{
+
/* Receive the send count, box and time step from the peer PP node */
MPI_Recv(&cnb, sizeof(cnb), MPI_BYTE,
pme_pp->node_peer, eCommType_CNB,
if (cnb.flags & (PP_PME_CHARGE | PP_PME_SQRTC6 | PP_PME_SIGMA))
{
/* Receive the send counts from the other PP nodes */
- for (sender = 0; sender < pme_pp->nnode; sender++)
+ for (int sender = 0; sender < pme_pp->nnode; sender++)
{
if (pme_pp->node[sender] == pme_pp->node_peer)
{
messages = 0;
nat = 0;
- for (sender = 0; sender < pme_pp->nnode; sender++)
+ for (int sender = 0; sender < pme_pp->nnode; sender++)
{
nat += pme_pp->nat[sender];
}
*maxshift_y = cnb.maxshift_y;
/* Receive the charges in place */
- for (q = 0; q < eCommType_NR; q++)
+ for (int q = 0; q < eCommType_NR; q++)
{
+ real *charge_pp;
+
if (!(cnb.flags & (PP_PME_CHARGE<<q)))
{
continue;
default: gmx_incons("Wrong eCommType");
}
nat = 0;
- for (sender = 0; sender < pme_pp->nnode; sender++)
+ for (int sender = 0; sender < pme_pp->nnode; sender++)
{
if (pme_pp->nat[sender] > 0)
{
/* Receive the coordinates in place */
nat = 0;
- for (sender = 0; sender < pme_pp->nnode; sender++)
+ for (int sender = 0; sender < pme_pp->nnode; sender++)
{
if (pme_pp->nat[sender] > 0)
{
messages = 0;
}
while (!(cnb.flags & (PP_PME_COORD | PP_PME_FINISH)));
+ status = ((cnb.flags & PP_PME_FINISH) ? pmerecvqxFINISH : pmerecvqxX);
*step = cnb.step;
+#else
+ GMX_UNUSED_VALUE(box);
+ GMX_UNUSED_VALUE(maxshift_x);
+ GMX_UNUSED_VALUE(maxshift_y);
+ GMX_UNUSED_VALUE(bFreeEnergy_q);
+ GMX_UNUSED_VALUE(bFreeEnergy_lj);
+ GMX_UNUSED_VALUE(lambda_q);
+ GMX_UNUSED_VALUE(lambda_lj);
+ GMX_UNUSED_VALUE(bEnerVir);
+ GMX_UNUSED_VALUE(step);
+ GMX_UNUSED_VALUE(grid_size);
+ GMX_UNUSED_VALUE(ewaldcoeff_q);
+ GMX_UNUSED_VALUE(ewaldcoeff_lj);
+
+ status = pmerecvqxX;
#endif
*natoms = nat;
*x = pme_pp->x;
*f = pme_pp->f;
- return ((cnb.flags & PP_PME_FINISH) ? pmerecvqxFINISH : pmerecvqxX);
+ return status;
}
-
+/*! \brief Receive virial and energy from PME rank */
static void receive_virial_energy(t_commrec *cr,
matrix vir_q, real *energy_q,
matrix vir_lj, real *energy_lj,
real dvdlambda_q, real dvdlambda_lj,
float cycles)
{
+#ifdef GMX_MPI
gmx_pme_comm_vir_ene_t cve;
- int messages, ind_start, ind_end, receiver;
-
+ int messages, ind_start, ind_end;
cve.cycles = cycles;
/* Now the evaluated forces have to be transferred to the PP nodes */
messages = 0;
ind_end = 0;
- for (receiver = 0; receiver < pme_pp->nnode; receiver++)
+ for (int receiver = 0; receiver < pme_pp->nnode; receiver++)
{
ind_start = ind_end;
ind_end = ind_start + pme_pp->nat[receiver];
-#ifdef GMX_MPI
if (MPI_Isend(f[ind_start], (ind_end-ind_start)*sizeof(rvec), MPI_BYTE,
pme_pp->node[receiver], 0,
pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]) != 0)
{
gmx_comm("MPI_Isend failed in do_pmeonly");
}
-#endif
}
/* send virial and energy to our last PP node */
fprintf(debug, "PME rank sending to PP rank %d: virial and energy\n",
pme_pp->node_peer);
}
-#ifdef GMX_MPI
MPI_Isend(&cve, sizeof(cve), MPI_BYTE,
pme_pp->node_peer, 1,
pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);
/* Wait for the forces to arrive */
MPI_Waitall(messages, pme_pp->req, pme_pp->stat);
+#else
+ gmx_call("MPI not enabled");
+ GMX_UNUSED_VALUE(pme_pp);
+ GMX_UNUSED_VALUE(f);
+ GMX_UNUSED_VALUE(vir_q);
+ GMX_UNUSED_VALUE(energy_q);
+ GMX_UNUSED_VALUE(vir_lj);
+ GMX_UNUSED_VALUE(energy_lj);
+ GMX_UNUSED_VALUE(dvdlambda_q);
+ GMX_UNUSED_VALUE(dvdlambda_lj);
+ GMX_UNUSED_VALUE(cycles);
#endif
}
#include "config.h"
-#include "gromacs/ewald/pme-internal.h"
-#include "gromacs/ewald/pme-simd.h"
-#include "gromacs/legacyheaders/macros.h"
+#include <algorithm>
+
#include "gromacs/legacyheaders/types/commrec.h"
#include "gromacs/math/vec.h"
#include "gromacs/utility/gmxmpi.h"
#include "gromacs/utility/smalloc.h"
+#include "pme-internal.h"
+#include "pme-simd.h"
+
static void pme_calc_pidx(int start, int end,
matrix recipbox, rvec x[],
pme_atomcomm_t *atc, int *count)
static void pme_realloc_splinedata(splinedata_t *spline, pme_atomcomm_t *atc)
{
- int i, d;
+ int i;
srenew(spline->ind, atc->nalloc);
/* Initialize the index to identity so it works without threads */
void pme_realloc_atomcomm_things(pme_atomcomm_t *atc)
{
- int nalloc_old, i, j, nalloc_tpl;
+ int nalloc_old, i;
/* We have to avoid a NULL pointer for atc->x to avoid
* possible fatal errors in MPI routines.
if (atc->n > atc->nalloc || atc->nalloc == 0)
{
nalloc_old = atc->nalloc;
- atc->nalloc = over_alloc_dd(max(atc->n, 1));
+ atc->nalloc = over_alloc_dd(std::max(atc->n, 1));
if (atc->nslab > 1)
{
commnode = atc->node_dest;
buf_index = atc->buf_index;
- nnodes_comm = min(2*atc->maxshift, atc->nslab-1);
+ nnodes_comm = std::min(2*atc->maxshift, atc->nslab-1);
nsend = 0;
for (i = 0; i < nnodes_comm; i++)
commnode = atc->node_dest;
buf_index = atc->buf_index;
- nnodes_comm = min(2*atc->maxshift, atc->nslab-1);
+ nnodes_comm = std::min(2*atc->maxshift, atc->nslab-1);
local_pos = atc->count[atc->nodeid];
buf_pos = 0;
#ifndef GMX_EWALD_PME_REDISTRIBUTE_H
#define GMX_EWALD_PME_REDISTRIBUTE_H
-#include "gromacs/ewald/pme-internal.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include "pme-internal.h"
void
pme_realloc_atomcomm_things(pme_atomcomm_t *atc);
do_redist_pos_coeffs(struct gmx_pme_t *pme, t_commrec *cr, int start, int homenr,
gmx_bool bFirst, rvec x[], real *data);
-#ifdef __cplusplus
-}
-#endif
-
#endif
#include <math.h>
-#include "gromacs/ewald/pme-internal.h"
#include "gromacs/fft/parallel_3dfft.h"
#include "gromacs/math/vec.h"
#include "gromacs/simd/simd.h"
#include "gromacs/simd/simd_math.h"
#include "gromacs/utility/smalloc.h"
+#include "pme-internal.h"
+
#ifdef GMX_SIMD_HAVE_REAL
/* Turn on arbitrary width SIMD intrinsics for PME solve */
# define PME_SIMD_SOLVE
gmx_inline static void calc_exponentials_q(int gmx_unused start, int end, real f, real *d_aligned, real *r_aligned, real *e_aligned)
{
{
- const gmx_simd_real_t two = gmx_simd_set1_r(2.0);
gmx_simd_real_t f_simd;
- gmx_simd_real_t lu;
gmx_simd_real_t tmp_d1, d_inv, tmp_r, tmp_e;
int kx;
f_simd = gmx_simd_set1_r(f);
/* do recip sum over local cells in grid */
/* y major, z middle, x minor or continuous */
t_complex *p0;
- int kx, ky, kz, maxkx, maxky, maxkz;
+ int kx, ky, kz, maxkx, maxky;
int nx, ny, nz, iyz0, iyz1, iyz, iy, iz, kxstart, kxend;
real mx, my, mz;
real factor = M_PI*M_PI/(ewaldcoeff*ewaldcoeff);
maxkx = (nx+1)/2;
maxky = (ny+1)/2;
- maxkz = nz/2+1;
work = &pme->solve_work[thread];
mhx = work->mhx;
/* do recip sum over local cells in grid */
/* y major, z middle, x minor or continuous */
int ig, gcount;
- int kx, ky, kz, maxkx, maxky, maxkz;
+ int kx, ky, kz, maxkx, maxky;
int nx, ny, nz, iy, iyz0, iyz1, iyz, iz, kxstart, kxend;
real mx, my, mz;
real factor = M_PI*M_PI/(ewaldcoeff*ewaldcoeff);
real rxx, ryx, ryy, rzx, rzy, rzz;
real *mhx, *mhy, *mhz, *m2, *denom, *tmp1, *tmp2;
real mhxk, mhyk, mhzk, m2k;
- real mk;
struct pme_solve_work_t *work;
real corner_fac;
ivec complex_order;
maxkx = (nx+1)/2;
maxky = (ny+1)/2;
- maxkz = nz/2+1;
work = &pme->solve_work[thread];
mhx = work->mhx;
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/real.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
-
struct pme_solve_work_t;
struct gmx_pme_t;
real ewaldcoeff, real vol,
gmx_bool bEnerVir, int nthread, int thread);
-#ifdef __cplusplus
-}
-#endif
-
#endif
#include "pme-spline-work.h"
-#include "gromacs/ewald/pme-simd.h"
#include "gromacs/simd/simd.h"
#include "gromacs/utility/real.h"
#include "gromacs/utility/smalloc.h"
+#include "pme-simd.h"
+
struct pme_spline_work *make_pme_spline_work(int gmx_unused order)
{
struct pme_spline_work *work;
#ifndef GMX_EWALD_PME_SPLINE_WORK_H
#define GMX_EWALD_PME_SPLINE_WORK_H
-#include "gromacs/ewald/pme-simd.h"
#include "gromacs/simd/simd.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include "pme-simd.h"
struct pme_spline_work
{
struct pme_spline_work *make_pme_spline_work(int order);
-#ifdef __cplusplus
-}
-#endif
-
#endif
#include <algorithm>
-#include "gromacs/ewald/pme-internal.h"
-#include "gromacs/ewald/pme-simd.h"
-#include "gromacs/ewald/pme-spline-work.h"
-#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/ewald/pme.h"
#include "gromacs/simd/simd.h"
#include "gromacs/utility/smalloc.h"
+#include "pme-internal.h"
+#include "pme-simd.h"
+#include "pme-spline-work.h"
+
/* TODO consider split of pme-spline from this file */
static void calc_interpolation_idx(struct gmx_pme_t *pme, pme_atomcomm_t *atc,
#define PME_SPREAD_SIMD4_ALIGNED
#define PME_ORDER 4
#endif
-#include "gromacs/ewald/pme-simd4.h"
+#include "pme-simd4.h"
#else
DO_BSPLINE(4);
#endif
#ifdef PME_SIMD4_SPREAD_GATHER
#define PME_SPREAD_SIMD4_ALIGNED
#define PME_ORDER 5
-#include "gromacs/ewald/pme-simd4.h"
+#include "pme-simd4.h"
#else
DO_BSPLINE(5);
#endif
#ifndef GMX_EWALD_PME_SPREAD_H
#define GMX_EWALD_PME_SPREAD_H
-#include "gromacs/ewald/pme-internal.h"
#include "gromacs/utility/real.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include "pme-internal.h"
void
spread_on_grid(struct gmx_pme_t *pme,
gmx_bool bCalcSplines, gmx_bool bSpread,
real *fftgrid, gmx_bool bDoSplines, int grid_index);
-#ifdef __cplusplus
-}
-#endif
-
#endif
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
+/*! \internal \file
+ *
+ * \brief This file contains function definitions necessary for
+ * computing energies and forces for the PME long-ranged part (Coulomb
+ * and LJ).
+ *
+ * \author Erik Lindahl <erik@kth.se>
+ * \author Berk Hess <hess@kth.se>
+ * \ingroup module_ewald
+ */
/* IMPORTANT FOR DEVELOPERS:
*
* Triclinic pme stuff isn't entirely trivial, and we've experienced
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
-#include "gromacs/ewald/calculate-spline-moduli.h"
-#include "gromacs/ewald/pme-gather.h"
-#include "gromacs/ewald/pme-grid.h"
-#include "gromacs/ewald/pme-internal.h"
-#include "gromacs/ewald/pme-redistribute.h"
-#include "gromacs/ewald/pme-solve.h"
-#include "gromacs/ewald/pme-spline-work.h"
-#include "gromacs/ewald/pme-spread.h"
-#include "gromacs/fft/fft.h"
#include "gromacs/fft/parallel_3dfft.h"
-#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/fileio/pdbio.h"
+#include "gromacs/legacyheaders/network.h"
#include "gromacs/legacyheaders/nrnb.h"
#include "gromacs/legacyheaders/types/commrec.h"
#include "gromacs/legacyheaders/types/enums.h"
#include "gromacs/utility/real.h"
#include "gromacs/utility/smalloc.h"
-#ifdef GMX_DOUBLE
-#define mpi_type MPI_DOUBLE
-#else
-#define mpi_type MPI_FLOAT
-#endif
+#include "calculate-spline-moduli.h"
+#include "pme-gather.h"
+#include "pme-grid.h"
+#include "pme-internal.h"
+#include "pme-redistribute.h"
+#include "pme-solve.h"
+#include "pme-spline-work.h"
+#include "pme-spread.h"
-/* GMX_CACHE_SEP should be a multiple of the SIMD and SIMD4 register size
- * to preserve alignment.
+/*! \brief Number of bytes in a cache line.
+ *
+ * Must also be a multiple of the SIMD and SIMD4 register size, to
+ * preserve alignment.
*/
-#define GMX_CACHE_SEP 64
-
+const int gmxCacheLineSize = 64;
+//! Set up coordinate communication
static void setup_coordinate_communication(pme_atomcomm_t *atc)
{
int nslab, n, i;
int gmx_pme_destroy(FILE *log, struct gmx_pme_t **pmedata)
{
- int thread, i;
+ int i;
if (NULL != log)
{
return 0;
}
+/*! \brief Round \p n up to the next multiple of \p f */
static int mult_up(int n, int f)
{
return ((n + f - 1)/f)*f;
}
-// TODO change this name to reflect what it actually does
-static double pme_load_imbalance(struct gmx_pme_t *pme)
+/*! \brief Return estimate of the load imbalance from the PME grid not being a good match for the number of PME ranks */
+static double estimate_pme_load_imbalance(struct gmx_pme_t *pme)
{
int nma, nmi;
double n1, n2, n3;
return (n1 + n2 + 3*n3)/(double)(6*pme->nkx*pme->nky*pme->nkz);
}
+/*! \brief Initialize atom communication data structure */
static void init_atomcomm(struct gmx_pme_t *pme, pme_atomcomm_t *atc,
int dimind, gmx_bool bSpread)
{
- int nk, k, s, thread;
+ int thread;
atc->dimind = dimind;
atc->nslab = 1;
{
if (atc->nthread > 1)
{
- snew(atc->thread_plist[thread].n, atc->nthread+2*GMX_CACHE_SEP);
- atc->thread_plist[thread].n += GMX_CACHE_SEP;
+ snew(atc->thread_plist[thread].n, atc->nthread+2*gmxCacheLineSize);
+ atc->thread_plist[thread].n += gmxCacheLineSize;
}
snew(atc->spline[thread].thread_one, pme->nthread);
atc->spline[thread].thread_one[thread] = 1;
}
}
+/*! \brief Initialize data structure for communication */
static void
init_overlap_comm(pme_overlap_t * ol,
int norder,
int ndata,
int commplainsize)
{
- int lbnd, rbnd, maxlr, b, i;
- int exten;
- int nn, nk;
+ int b, i;
pme_grid_comm_t *pgc;
gmx_bool bCont;
int fft_start, fft_end, send_index1, recv_index1;
fft_end += ndata;
}
send_index1 = ol->s2g1[nodeid];
- send_index1 = min(send_index1, fft_end);
+ send_index1 = std::min(send_index1, fft_end);
pgc->send_index0 = fft_start;
- pgc->send_nindex = max(0, send_index1 - pgc->send_index0);
+ pgc->send_nindex = std::max(0, send_index1 - pgc->send_index0);
ol->send_size += pgc->send_nindex;
/* We always start receiving to the first index of our slab */
{
recv_index1 -= ndata;
}
- recv_index1 = min(recv_index1, fft_end);
+ recv_index1 = std::min(recv_index1, fft_end);
pgc->recv_index0 = fft_start;
- pgc->recv_nindex = max(0, recv_index1 - pgc->recv_index0);
+ pgc->recv_nindex = std::max(0, recv_index1 - pgc->recv_index0);
}
#ifdef GMX_MPI
return;
}
+/*! \brief Round \p enumerator */
static int div_round_up(int enumerator, int denominator)
{
return (enumerator + denominator - 1)/denominator;
double imbal;
#ifdef GMX_MPI
- MPI_Type_contiguous(DIM, mpi_type, &(pme->rvec_mpi));
+ MPI_Type_contiguous(DIM, GMX_MPI_REAL, &(pme->rvec_mpi));
MPI_Type_commit(&(pme->rvec_mpi));
#endif
* (unless the coefficient distribution is inhomogeneous).
*/
- imbal = pme_load_imbalance(pme);
+ imbal = estimate_pme_load_imbalance(pme);
if (imbal >= 1.2 && pme->nodeid_major == 0 && pme->nodeid_minor == 0)
{
fprintf(stderr,
*V = gather_energy_bsplines(pme, grid->grid.grid, atc);
}
+/*! \brief Calculate initial Lorentz-Berthelot coefficients for LJ-PME */
static void
calc_initial_lb_coeffs(struct gmx_pme_t *pme, real *local_c6, real *local_sigma)
{
}
}
+/*! \brief Calculate next Lorentz-Berthelot coefficients for LJ-PME */
static void
calc_next_lb_coeffs(struct gmx_pme_t *pme, real *local_sigma)
{
int gmx_pme_do(struct gmx_pme_t *pme,
int start, int homenr,
rvec x[], rvec f[],
- real *chargeA, real *chargeB,
- real *c6A, real *c6B,
- real *sigmaA, real *sigmaB,
- matrix box, t_commrec *cr,
+ real chargeA[], real chargeB[],
+ real c6A[], real c6B[],
+ real sigmaA[], real sigmaB[],
+ matrix box, t_commrec *cr,
int maxshift_x, int maxshift_y,
t_nrnb *nrnb, gmx_wallcycle_t wcycle,
- matrix vir_q, real ewaldcoeff_q,
+ matrix vir_q, real ewaldcoeff_q,
matrix vir_lj, real ewaldcoeff_lj,
real *energy_q, real *energy_lj,
- real lambda_q, real lambda_lj,
+ real lambda_q, real lambda_lj,
real *dvdlambda_q, real *dvdlambda_lj,
int flags)
{
- int d, i, j, k, ntot, npme, grid_index, max_grid_index;
- int nx, ny, nz;
- int n_d, local_ny;
+ int d, i, j, npme, grid_index, max_grid_index;
+ int n_d;
pme_atomcomm_t *atc = NULL;
pmegrids_t *pmegrid = NULL;
real *grid = NULL;
- real *ptr;
- rvec *x_d, *f_d;
+ rvec *f_d;
real *coefficient = NULL;
real energy_AB[4];
matrix vir_AB[4];
if (pme->nodeid == 0)
{
- ntot = pme->nkx*pme->nky*pme->nkz;
- npme = ntot*log((real)ntot)/log(2.0);
+ real ntot = pme->nkx*pme->nky*pme->nkz;
+ npme = static_cast<int>(ntot*log(ntot)/log(2.0));
inc_nrnb(nrnb, eNR_FFT, 2*npme);
}
/* Unpack structure */
pmegrid = &pme->pmegrid[grid_index];
fftgrid = pme->fftgrid[grid_index];
- cfftgrid = pme->cfftgrid[grid_index];
pfft_setup = pme->pfft_setup[grid_index];
calc_next_lb_coeffs(pme, local_sigma);
grid = pmegrid->grid.grid;
/* Unpack structure */
pmegrid = &pme->pmegrid[grid_index];
fftgrid = pme->fftgrid[grid_index];
- cfftgrid = pme->cfftgrid[grid_index];
pfft_setup = pme->pfft_setup[grid_index];
grid = pmegrid->grid.grid;
calc_next_lb_coeffs(pme, local_sigma);
if (pme->nodeid == 0)
{
- ntot = pme->nkx*pme->nky*pme->nkz;
- npme = ntot*log((real)ntot)/log(2.0);
+ real ntot = pme->nkx*pme->nky*pme->nkz;
+ npme = static_cast<int>(ntot*log(ntot)/log(2.0));
inc_nrnb(nrnb, eNR_FFT, 2*npme);
}
wallcycle_start(wcycle, ewcPME_SPREADGATHER);
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
+/*! \libinternal \file
+ *
+ * \brief This file contains function declarations necessary for
+ * computing energies and forces for the PME long-ranged part (Coulomb
+ * and LJ).
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \inlibraryapi
+ * \ingroup module_ewald
+ */
#ifndef GMX_EWALD_PME_H
#define GMX_EWALD_PME_H
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/real.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
-
enum {
GMX_SUM_GRID_FORWARD, GMX_SUM_GRID_BACKWARD
};
+/*! \brief Initialize \p pmedata
+ *
+ * Return value 0 indicates all well, non zero is an error code.
+ */
int gmx_pme_init(struct gmx_pme_t **pmedata, t_commrec *cr,
int nnodes_major, int nnodes_minor,
t_inputrec *ir, int homenr,
gmx_bool bFreeEnergy_q, gmx_bool bFreeEnergy_lj,
gmx_bool bReproducible, int nthread);
-/* Initialize the pme data structures resepectively.
- * Return value 0 indicates all well, non zero is an error code.
- */
-int gmx_pme_destroy(FILE *log, struct gmx_pme_t **pmedata);
-/* Destroy the pme data structures resepectively.
- * Return value 0 indicates all well, non zero is an error code.
+/*! \brief Destroy the pme data structures resepectively.
+ *
+ * \return 0 indicates all well, non zero is an error code.
*/
+int gmx_pme_destroy(FILE *log, struct gmx_pme_t **pmedata);
+//@{
+/*! \brief Flag values that control what gmx_pme_do() will calculate
+ *
+ * These can be combined with bitwise-OR if more than one thing is required.
+ */
#define GMX_PME_SPREAD (1<<0)
#define GMX_PME_SOLVE (1<<1)
#define GMX_PME_CALC_F (1<<2)
#define GMX_PME_DO_LJ (1<<14)
#define GMX_PME_DO_ALL_F (GMX_PME_SPREAD | GMX_PME_SOLVE | GMX_PME_CALC_F)
+//@}
+/*! \brief Do a PME calculation for the long range electrostatics and/or LJ.
+ *
+ * The meaning of \p flags is defined above, and determines which
+ * parts of the calculation are performed.
+ *
+ * \return 0 indicates all well, non zero is an error code.
+ */
int gmx_pme_do(struct gmx_pme_t *pme,
int start, int homenr,
rvec x[], rvec f[],
matrix vir_q, real ewaldcoeff_q,
matrix vir_lj, real ewaldcoeff_lj,
real *energy_q, real *energy_lj,
- real lambda_q, real lambda_lj,
+ real lambda_q, real lambda_lj,
real *dvdlambda_q, real *dvdlambda_lj,
int flags);
-/* Do a PME calculation for the long range electrostatics and/or LJ.
- * flags, defined above, determine which parts of the calculation are performed.
- * Return value 0 indicates all well, non zero is an error code.
- */
+/*! \brief Called on the nodes that do PME exclusively (as slaves) */
int gmx_pmeonly(struct gmx_pme_t *pme,
t_commrec *cr, t_nrnb *mynrnb,
gmx_wallcycle_t wcycle,
gmx_walltime_accounting_t walltime_accounting,
real ewaldcoeff_q, real ewaldcoeff_lj,
t_inputrec *ir);
-/* Called on the nodes that do PME exclusively (as slaves)
- */
-void gmx_pme_calc_energy(struct gmx_pme_t *pme, int n, rvec *x, real *q, real *V);
-/* Calculate the PME grid energy V for n charges with a potential
- * in the pme struct determined before with a call to gmx_pme_do
- * with at least GMX_PME_SPREAD and GMX_PME_SOLVE specified.
- * Note that the charges are not spread on the grid in the pme struct.
- * Currently does not work in parallel or with free energy.
+/*! \brief Calculate the PME grid energy V for n charges.
+ *
+ * The potential (found in \p pme) must have been found already with a
+ * call to gmx_pme_do() with at least GMX_PME_SPREAD and GMX_PME_SOLVE
+ * specified. Note that the charges are not spread on the grid in the
+ * pme struct. Currently does not work in parallel or with free
+ * energy.
*/
+void gmx_pme_calc_energy(struct gmx_pme_t *pme, int n, rvec *x, real *q, real *V);
+/*! \brief Send the charges and maxshift to out PME-only node. */
void gmx_pme_send_parameters(t_commrec *cr,
const interaction_const_t *ic,
gmx_bool bFreeEnergy_q, gmx_bool bFreeEnergy_lj,
real *sqrt_c6A, real *sqrt_c6B,
real *sigmaA, real *sigmaB,
int maxshift_x, int maxshift_y);
-/* Send the charges and maxshift to out PME-only node. */
+/*! \brief Send the coordinates to our PME-only node and request a PME calculation */
void gmx_pme_send_coordinates(t_commrec *cr, matrix box, rvec *x,
gmx_bool bFreeEnergy_q, gmx_bool bFreeEnergy_lj,
real lambda_q, real lambda_lj,
gmx_bool bEnerVir, int pme_flags,
gmx_int64_t step);
-/* Send the coordinates to our PME-only node and request a PME calculation */
+/*! \brief Tell our PME-only node to finish */
void gmx_pme_send_finish(t_commrec *cr);
-/* Tell our PME-only node to finish */
+/*! \brief Tell our PME-only node to reset all cycle and flop counters */
void gmx_pme_send_resetcounters(t_commrec *cr, gmx_int64_t step);
-/* Tell our PME-only node to reset all cycle and flop counters */
+/*! \brief PP nodes receive the long range forces from the PME nodes */
void gmx_pme_receive_f(t_commrec *cr,
rvec f[], matrix vir_q, real *energy_q,
matrix vir_lj, real *energy_lj,
real *dvdlambda_q, real *dvdlambda_lj,
float *pme_cycles);
-/* PP nodes receive the long range forces from the PME nodes */
-
-#ifdef __cplusplus
-}
-#endif
#endif
simulation stops. If equal to zero, don't
communicate any more between multisims.*/
/* PME load balancing data for GPU kernels */
- pme_load_balancing_t pme_loadbal = NULL;
- double cycles_pmes;
- gmx_bool bPMETuneTry = FALSE, bPMETuneRunning = FALSE;
+ pme_load_balancing_t *pme_loadbal = NULL;
+ double cycles_pmes;
+ gmx_bool bPMETuneTry = FALSE, bPMETuneRunning = FALSE;
/* Interactive MD */
gmx_bool bIMDstep = FALSE;