#include "gromacs/mdtypes/group.h"
#include "gromacs/mdtypes/inputrec.h"
#include "gromacs/mdtypes/md_enums.h"
+#include "gromacs/mdtypes/state.h"
#include "gromacs/pbcutil/boxutilities.h"
#include "gromacs/pbcutil/mshift.h"
#include "gromacs/pbcutil/pbc.h"
#include "gromacs/pulling/pull.h"
#include "gromacs/random/tabulatednormaldistribution.h"
#include "gromacs/random/threefry.h"
+#include "gromacs/simd/simd.h"
#include "gromacs/timing/wallcycle.h"
#include "gromacs/utility/exceptions.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/gmxomp.h"
#include "gromacs/utility/smalloc.h"
+using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
+
/*For debugging, start at v(-dt/2) for velolcity verlet -- uncomment next line */
/*#define STARTFROMDT2*/
struct gmx_update_t
{
- gmx_stochd_t *sd;
+ gmx_stochd_t *sd;
/* xprime for constraint algorithms */
- rvec *xp;
- int xp_nalloc;
+ PaddedRVecVector xp;
/* Variables for the deform algorithm */
- gmx_int64_t deformref_step;
- matrix deformref_box;
+ gmx_int64_t deformref_step;
+ matrix deformref_box;
};
+static bool isTemperatureCouplingStep(gmx_int64_t step, const t_inputrec *ir)
+{
+ /* We should only couple after a step where energies were determined (for leapfrog versions)
+ or the step energies are determined, for velocity verlet versions */
+ int offset;
+ if (EI_VV(ir->eI))
+ {
+ offset = 0;
+ }
+ else
+ {
+ offset = 1;
+ }
+ return ir->etc != etcNO &&
+ (ir->nsttcouple == 1 ||
+ do_per_step(step + ir->nsttcouple - offset, ir->nsttcouple));
+}
-static void do_update_md(int start, int nrend,
- double dt, int nstpcouple,
- t_grp_tcstat *tcstat,
- double nh_vxi[],
- gmx_bool bNEMD, t_grp_acc *gstat, rvec accel[],
- ivec nFreeze[],
- real invmass[],
- unsigned short ptype[], unsigned short cFREEZE[],
- unsigned short cACC[], unsigned short cTC[],
- rvec x[], rvec xprime[], rvec v[],
- rvec f[], matrix M,
- gmx_bool bNH, gmx_bool bPR)
+static bool isPressureCouplingStep(gmx_int64_t step, const t_inputrec *ir)
{
- double imass, w_dt;
- int gf = 0, ga = 0, gt = 0;
- rvec vrel;
- real vn, vv, va, vb, vnrel;
- real lg, vxi = 0, u;
- int n, d;
+ GMX_ASSERT(ir->epc != epcMTTK, "MTTK pressure coupling is not handled here");
- if (bNH || bPR)
+ int offset;
+ if (ir->epc == epcBERENDSEN)
{
- /* Update with coupling to extended ensembles, used for
- * Nose-Hoover and Parrinello-Rahman coupling
- * Nose-Hoover uses the reversible leap-frog integrator from
- * Holian et al. Phys Rev E 52(3) : 2338, 1995
- */
- for (n = start; n < nrend; n++)
+ offset = 0;
+ }
+ else
+ {
+ offset = 1;
+ }
+ /* We should only couple after a step where pressures were determined */
+ return ir->epc != etcNO &&
+ (ir->nstpcouple == 1 ||
+ do_per_step(step + ir->nstpcouple - offset, ir->nstpcouple));
+}
+
+/*! \brief Sets the number of different temperature coupling values */
+enum class NumTempScaleValues
+{
+ single, //!< Single T-scaling value (either one group or all values =1)
+ multiple //!< Multiple T-scaling values, need to use T-group indices
+};
+
+/*! \brief Sets if to apply no or diagonal Parrinello-Rahman pressure scaling
+ *
+ * Note that this enum is only used in updateMDLeapfrogSimple(), which does
+ * not handle fully anistropic Parrinello-Rahman scaling, so we only have
+ * options \p no and \p diagonal here and no anistropic option.
+ */
+enum class ApplyParrinelloRahmanVScaling
+{
+ no, //!< Do not apply velocity scaling (not a PR-coupling run or step)
+ diagonal //!< Apply velocity scaling using a diagonal matrix
+};
+
+/*! \brief Integrate using leap-frog with T-scaling and optionally diagonal Parrinello-Rahman p-coupling
+ *
+ * \tparam numTempScaleValues The number of different T-couple values
+ * \tparam applyPRVScaling Apply Parrinello-Rahman velocity scaling
+ * \param[in] start Index of first atom to update
+ * \param[in] nrend Last atom to update: \p nrend - 1
+ * \param[in] dt The time step
+ * \param[in] dtPressureCouple Time step for pressure coupling
+ * \param[in] invMassPerDim 1/mass per atom and dimension
+ * \param[in] tcstat Temperature coupling information
+ * \param[in] cTC T-coupling group index per atom
+ * \param[in] pRVScaleMatrixDiagonal Parrinello-Rahman v-scale matrix diagonal
+ * \param[in] x Input coordinates
+ * \param[out] xprime Updated coordinates
+ * \param[inout] v Velocities
+ * \param[in] f Forces
+ *
+ * We expect this template to get good SIMD acceleration by most compilers,
+ * unlike the more complex general template.
+ * Note that we might get even better SIMD acceleration when we introduce
+ * aligned (and padded) memory, possibly with some hints for the compilers.
+ */
+template<NumTempScaleValues numTempScaleValues,
+ ApplyParrinelloRahmanVScaling applyPRVScaling>
+static void
+updateMDLeapfrogSimple(int start,
+ int nrend,
+ real dt,
+ real dtPressureCouple,
+ const rvec * gmx_restrict invMassPerDim,
+ const t_grp_tcstat * tcstat,
+ const unsigned short * cTC,
+ const rvec pRVScaleMatrixDiagonal,
+ const rvec * gmx_restrict x,
+ rvec * gmx_restrict xprime,
+ rvec * gmx_restrict v,
+ const rvec * gmx_restrict f)
+{
+ real lambdaGroup;
+
+ if (numTempScaleValues == NumTempScaleValues::single)
+ {
+ lambdaGroup = tcstat[0].lambda;
+ }
+
+ for (int a = start; a < nrend; a++)
+ {
+ if (numTempScaleValues == NumTempScaleValues::multiple)
{
- imass = invmass[n];
- if (cFREEZE)
- {
- gf = cFREEZE[n];
- }
- if (cACC)
- {
- ga = cACC[n];
- }
- if (cTC)
- {
- gt = cTC[n];
- }
- lg = tcstat[gt].lambda;
- if (bNH)
+ lambdaGroup = tcstat[cTC[a]].lambda;
+ }
+
+ for (int d = 0; d < DIM; d++)
+ {
+ /* Note that using rvec invMassPerDim results in more efficient
+ * SIMD code, but this increases the cache pressure.
+ * For large systems with PME on the CPU this slows down the
+ * (then already slow) update by 20%. If all data remains in cache,
+ * using rvec is much faster.
+ */
+ real vNew = lambdaGroup*v[a][d] + f[a][d]*invMassPerDim[a][d]*dt;
+
+ if (applyPRVScaling == ApplyParrinelloRahmanVScaling::diagonal)
{
- vxi = nh_vxi[gt];
+ vNew -= dtPressureCouple*pRVScaleMatrixDiagonal[d]*v[a][d];
}
- rvec_sub(v[n], gstat[ga].u, vrel);
+ v[a][d] = vNew;
+ xprime[a][d] = x[a][d] + vNew*dt;
+ }
+ }
+}
- for (d = 0; d < DIM; d++)
- {
- if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
- {
- vnrel = (lg*vrel[d] + dt*(imass*f[n][d] - 0.5*vxi*vrel[d]
- - nstpcouple*iprod(M[d], vrel)))/(1 + 0.5*vxi*dt);
- /* do not scale the mean velocities u */
- vn = gstat[ga].u[d] + accel[ga][d]*dt + vnrel;
- v[n][d] = vn;
- xprime[n][d] = x[n][d]+vn*dt;
- }
- else
+#if GMX_SIMD && GMX_SIMD_HAVE_REAL
+#define GMX_HAVE_SIMD_UPDATE 1
+#else
+#define GMX_HAVE_SIMD_UPDATE 0
+#endif
+
+#if GMX_HAVE_SIMD_UPDATE
+
+/*! \brief Load (aligned) the contents of GMX_SIMD_REAL_WIDTH rvec elements sequentially into 3 SIMD registers
+ *
+ * The loaded output is:
+ * \p r0: { r[index][XX], r[index][YY], ... }
+ * \p r1: { ... }
+ * \p r2: { ..., r[index+GMX_SIMD_REAL_WIDTH-1][YY], r[index+GMX_SIMD_REAL_WIDTH-1][ZZ] }
+ *
+ * \param[in] r Real to an rvec array, has to be aligned to SIMD register width
+ * \param[in] index Index of the first rvec triplet of reals to load
+ * \param[out] r0 Pointer to first SIMD register
+ * \param[out] r1 Pointer to second SIMD register
+ * \param[out] r2 Pointer to third SIMD register
+ */
+static inline void simdLoadRvecs(const rvec *r,
+ int index,
+ SimdReal *r0,
+ SimdReal *r1,
+ SimdReal *r2)
+{
+ const real *realPtr = r[index];
+
+ GMX_ASSERT(isSimdAligned(realPtr), "Pointer should be SIMD aligned");
+
+ *r0 = simdLoad(realPtr + 0*GMX_SIMD_REAL_WIDTH);
+ *r1 = simdLoad(realPtr + 1*GMX_SIMD_REAL_WIDTH);
+ *r2 = simdLoad(realPtr + 2*GMX_SIMD_REAL_WIDTH);
+}
+
+/*! \brief Store (aligned) 3 SIMD registers sequentially to GMX_SIMD_REAL_WIDTH rvec elements
+ *
+ * The stored output is:
+ * \p r[index] = { { r0[0], r0[1], ... }
+ * ...
+ * \p r[index+GMX_SIMD_REAL_WIDTH-1] = { ... , r2[GMX_SIMD_REAL_WIDTH-2], r2[GMX_SIMD_REAL_WIDTH-1] }
+ *
+ * \param[out] r Pointer to an rvec array, has to be aligned to SIMD register width
+ * \param[in] index Index of the first rvec triplet of reals to store to
+ * \param[in] r0 First SIMD register
+ * \param[in] r1 Second SIMD register
+ * \param[in] r2 Third SIMD register
+ */
+static inline void simdStoreRvecs(rvec *r,
+ int index,
+ SimdReal r0,
+ SimdReal r1,
+ SimdReal r2)
+{
+ real *realPtr = r[index];
+
+ GMX_ASSERT(isSimdAligned(realPtr), "Pointer should be SIMD aligned");
+
+ store(realPtr + 0*GMX_SIMD_REAL_WIDTH, r0);
+ store(realPtr + 1*GMX_SIMD_REAL_WIDTH, r1);
+ store(realPtr + 2*GMX_SIMD_REAL_WIDTH, r2);
+}
+
+/*! \brief Integrate using leap-frog with single group T-scaling and SIMD
+ *
+ * \param[in] start Index of first atom to update
+ * \param[in] nrend Last atom to update: \p nrend - 1
+ * \param[in] dt The time step
+ * \param[in] invMass 1/mass per atom
+ * \param[in] tcstat Temperature coupling information
+ * \param[in] x Input coordinates
+ * \param[out] xprime Updated coordinates
+ * \param[inout] v Velocities
+ * \param[in] f Forces
+ */
+static void
+updateMDLeapfrogSimpleSimd(int start,
+ int nrend,
+ real dt,
+ const real * gmx_restrict invMass,
+ const t_grp_tcstat * tcstat,
+ const rvec * gmx_restrict x,
+ rvec * gmx_restrict xprime,
+ rvec * gmx_restrict v,
+ const rvec * gmx_restrict f)
+{
+ SimdReal timestep(dt);
+ SimdReal lambdaSystem(tcstat[0].lambda);
+
+ /* We declare variables here, since code is often slower when declaring them inside the loop */
+
+ /* Note: We should implement a proper PaddedVector, so we don't need this check */
+ GMX_ASSERT(isSimdAligned(invMass), "invMass should be aligned");
+
+ for (int a = start; a < nrend; a += GMX_SIMD_REAL_WIDTH)
+ {
+ SimdReal invMass0, invMass1, invMass2;
+ expandScalarsToTriplets(simdLoad(invMass + a),
+ &invMass0, &invMass1, &invMass2);
+
+ SimdReal v0, v1, v2;
+ SimdReal f0, f1, f2;
+ simdLoadRvecs(v, a, &v0, &v1, &v2);
+ simdLoadRvecs(f, a, &f0, &f1, &f2);
+
+ v0 = fma(f0*invMass0, timestep, lambdaSystem*v0);
+ v1 = fma(f1*invMass1, timestep, lambdaSystem*v1);
+ v2 = fma(f2*invMass2, timestep, lambdaSystem*v2);
+
+ simdStoreRvecs(v, a, v0, v1, v2);
+
+ SimdReal x0, x1, x2;
+ simdLoadRvecs(x, a, &x0, &x1, &x2);
+
+ SimdReal xprime0 = fma(v0, timestep, x0);
+ SimdReal xprime1 = fma(v1, timestep, x1);
+ SimdReal xprime2 = fma(v2, timestep, x2);
+
+ simdStoreRvecs(xprime, a, xprime0, xprime1, xprime2);
+ }
+}
+
+#endif // GMX_HAVE_SIMD_UPDATE
+
+/*! \brief Sets the NEMD acceleration type */
+enum class AccelerationType
+{
+ none, group, cosine
+};
+
+/*! \brief Integrate using leap-frog with support for everything
+ *
+ * \tparam accelerationType Type of NEMD acceleration
+ * \param[in] start Index of first atom to update
+ * \param[in] nrend Last atom to update: \p nrend - 1
+ * \param[in] doNoseHoover If to apply Nose-Hoover T-coupling
+ * \param[in] dt The time step
+ * \param[in] dtPressureCouple Time step for pressure coupling, is 0 when no pressure coupling should be applied at this step
+ * \param[in] ir The input parameter record
+ * \param[in] md Atom properties
+ * \param[in] ekind Kinetic energy data
+ * \param[in] box The box dimensions
+ * \param[in] x Input coordinates
+ * \param[out] xprime Updated coordinates
+ * \param[inout] v Velocities
+ * \param[in] f Forces
+ * \param[in] nh_vxi Nose-Hoover velocity scaling factors
+ * \param[in] M Parrinello-Rahman scaling matrix
+ */
+template<AccelerationType accelerationType>
+static void
+updateMDLeapfrogGeneral(int start,
+ int nrend,
+ bool doNoseHoover,
+ real dt,
+ real dtPressureCouple,
+ const t_inputrec * ir,
+ const t_mdatoms * md,
+ const gmx_ekindata_t * ekind,
+ const matrix box,
+ const rvec * gmx_restrict x,
+ rvec * gmx_restrict xprime,
+ rvec * gmx_restrict v,
+ const rvec * gmx_restrict f,
+ const double * gmx_restrict nh_vxi,
+ const matrix M)
+{
+ /* This is a version of the leap-frog integrator that supports
+ * all combinations of T-coupling, P-coupling and NEMD.
+ * Nose-Hoover uses the reversible leap-frog integrator from
+ * Holian et al. Phys Rev E 52(3) : 2338, 1995
+ */
+
+ const unsigned short * cTC = md->cTC;
+ const t_grp_tcstat * tcstat = ekind->tcstat;
+
+ const unsigned short * cACC = md->cACC;
+ const rvec * accel = ir->opts.acc;
+ const t_grp_acc * grpstat = ekind->grpstat;
+
+ const rvec * gmx_restrict invMassPerDim = md->invMassPerDim;
+
+ /* Initialize group values, changed later when multiple groups are used */
+ int ga = 0;
+ int gt = 0;
+ real factorNH = 0;
+
+ for (int n = start; n < nrend; n++)
+ {
+ if (cTC)
+ {
+ gt = cTC[n];
+ }
+ real lg = tcstat[gt].lambda;
+
+ rvec vRel;
+ real cosineZ, vCosine;
+#ifdef __INTEL_COMPILER
+#pragma warning( disable : 280 )
+#endif
+ switch (accelerationType)
+ {
+ case AccelerationType::none:
+ copy_rvec(v[n], vRel);
+ break;
+ case AccelerationType::group:
+ if (cACC)
{
- v[n][d] = 0.0;
- xprime[n][d] = x[n][d];
+ ga = cACC[n];
}
+ /* Avoid scaling the group velocity */
+ rvec_sub(v[n], grpstat[ga].u, vRel);
+ break;
+ case AccelerationType::cosine:
+ cosineZ = std::cos(x[n][ZZ]*static_cast<real>(M_PI)/box[ZZ][ZZ]);
+ vCosine = cosineZ*ekind->cosacc.vcos;
+ /* Avoid scaling the cosine profile velocity */
+ copy_rvec(v[n], vRel);
+ vRel[XX] -= vCosine;
+ break;
+ }
+
+ if (doNoseHoover)
+ {
+ /* Here we account for multiple time stepping, by increasing
+ * the Nose-Hoover correction by nsttcouple
+ */
+ factorNH = 0.5*ir->nsttcouple*dt*nh_vxi[gt];
+ }
+
+ for (int d = 0; d < DIM; d++)
+ {
+ real vNew =
+ (lg*vRel[d] + (f[n][d]*invMassPerDim[n][d]*dt - factorNH*vRel[d]
+ - dtPressureCouple*iprod(M[d], vRel)))/(1 + factorNH);
+ switch (accelerationType)
+ {
+ case AccelerationType::none:
+ break;
+ case AccelerationType::group:
+ /* Add back the mean velocity and apply acceleration */
+ vNew += grpstat[ga].u[d] + accel[ga][d]*dt;
+ break;
+ case AccelerationType::cosine:
+ if (d == XX)
+ {
+ /* Add back the mean velocity and apply acceleration */
+ vNew += vCosine + cosineZ*ekind->cosacc.cos_accel*dt;
+ }
+ break;
}
+ v[n][d] = vNew;
+ xprime[n][d] = x[n][d] + vNew*dt;
}
}
- else if (cFREEZE != NULL ||
- nFreeze[0][XX] || nFreeze[0][YY] || nFreeze[0][ZZ] ||
- bNEMD)
+}
+
+/*! \brief Handles the Leap-frog MD x and v integration */
+static void do_update_md(int start,
+ int nrend,
+ gmx_int64_t step,
+ real dt,
+ const t_inputrec * ir,
+ const t_mdatoms * md,
+ const gmx_ekindata_t * ekind,
+ const matrix box,
+ const rvec * gmx_restrict x,
+ rvec * gmx_restrict xprime,
+ rvec * gmx_restrict v,
+ const rvec * gmx_restrict f,
+ const double * gmx_restrict nh_vxi,
+ const matrix M)
+{
+ GMX_ASSERT(nrend == start || xprime != x, "For SIMD optimization certain compilers need to have xprime != x");
+
+ /* Note: Berendsen pressure scaling is handled after do_update_md() */
+ bool doTempCouple = isTemperatureCouplingStep(step, ir);
+ bool doNoseHoover = (ir->etc == etcNOSEHOOVER && doTempCouple);
+ bool doParrinelloRahman = (ir->epc == epcPARRINELLORAHMAN && isPressureCouplingStep(step, ir));
+ bool doPROffDiagonal = (doParrinelloRahman && (M[YY][XX] != 0 || M[ZZ][XX] != 0 || M[ZZ][YY] != 0));
+
+ real dtPressureCouple = (doParrinelloRahman ? ir->nstpcouple*dt : 0);
+
+ /* NEMD (also cosine) acceleration is applied in updateMDLeapFrogGeneral */
+ bool doAcceleration = (ekind->bNEMD || ekind->cosacc.cos_accel != 0);
+
+ if (doNoseHoover || doPROffDiagonal || doAcceleration)
{
- /* Update with Berendsen/v-rescale coupling and freeze or NEMD */
- for (n = start; n < nrend; n++)
+ if (!doAcceleration)
{
- w_dt = invmass[n]*dt;
- if (cFREEZE)
- {
- gf = cFREEZE[n];
- }
- if (cACC)
+ updateMDLeapfrogGeneral<AccelerationType::none>
+ (start, nrend, doNoseHoover, dt, dtPressureCouple,
+ ir, md, ekind, box, x, xprime, v, f, nh_vxi, M);
+ }
+ else if (ekind->bNEMD)
+ {
+ updateMDLeapfrogGeneral<AccelerationType::group>
+ (start, nrend, doNoseHoover, dt, dtPressureCouple,
+ ir, md, ekind, box, x, xprime, v, f, nh_vxi, M);
+ }
+ else
+ {
+ updateMDLeapfrogGeneral<AccelerationType::cosine>
+ (start, nrend, doNoseHoover, dt, dtPressureCouple,
+ ir, md, ekind, box, x, xprime, v, f, nh_vxi, M);
+ }
+ }
+ else
+ {
+ /* We determine if we have a single T-coupling lambda value for all
+ * atoms. That allows for better SIMD acceleration in the template.
+ * If we do not do temperature coupling (in the run or this step),
+ * all scaling values are 1, so we effectively have a single value.
+ */
+ bool haveSingleTempScaleValue = (!doTempCouple || ekind->ngtc == 1);
+
+ /* Extract some pointers needed by all cases */
+ const unsigned short *cTC = md->cTC;
+ const t_grp_tcstat *tcstat = ekind->tcstat;
+ const rvec *invMassPerDim = md->invMassPerDim;
+
+ if (doParrinelloRahman)
+ {
+ GMX_ASSERT(!doPROffDiagonal, "updateMDLeapfrogSimple only support diagonal Parrinello-Rahman scaling matrices");
+
+ rvec diagM;
+ for (int d = 0; d < DIM; d++)
{
- ga = cACC[n];
+ diagM[d] = M[d][d];
}
- if (cTC)
+
+ if (haveSingleTempScaleValue)
{
- gt = cTC[n];
+ updateMDLeapfrogSimple
+ <NumTempScaleValues::single,
+ ApplyParrinelloRahmanVScaling::diagonal>
+ (start, nrend, dt, dtPressureCouple,
+ invMassPerDim, tcstat, cTC, diagM, x, xprime, v, f);
}
- lg = tcstat[gt].lambda;
-
- for (d = 0; d < DIM; d++)
+ else
{
- vn = v[n][d];
- if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
- {
- vv = lg*vn + f[n][d]*w_dt;
-
- /* do not scale the mean velocities u */
- u = gstat[ga].u[d];
- va = vv + accel[ga][d]*dt;
- vb = va + (1.0-lg)*u;
- v[n][d] = vb;
- xprime[n][d] = x[n][d]+vb*dt;
- }
- else
- {
- v[n][d] = 0.0;
- xprime[n][d] = x[n][d];
- }
+ updateMDLeapfrogSimple
+ <NumTempScaleValues::multiple,
+ ApplyParrinelloRahmanVScaling::diagonal>
+ (start, nrend, dt, dtPressureCouple,
+ invMassPerDim, tcstat, cTC, diagM, x, xprime, v, f);
}
}
- }
- else
- {
- /* Plain update with Berendsen/v-rescale coupling */
- for (n = start; n < nrend; n++)
+ else
{
- if ((ptype[n] != eptVSite) && (ptype[n] != eptShell))
+ if (haveSingleTempScaleValue)
{
- w_dt = invmass[n]*dt;
- if (cTC)
+ /* Note that modern compilers are pretty good at vectorizing
+ * updateMDLeapfrogSimple(). But the SIMD version will still
+ * be faster because invMass lowers the cache pressure
+ * compared to invMassPerDim.
+ */
+#if GMX_HAVE_SIMD_UPDATE
+ /* Check if we can use invmass instead of invMassPerDim */
+ if (!md->havePartiallyFrozenAtoms)
{
- gt = cTC[n];
+ updateMDLeapfrogSimpleSimd
+ (start, nrend, dt, md->invmass, tcstat, x, xprime, v, f);
}
- lg = tcstat[gt].lambda;
-
- for (d = 0; d < DIM; d++)
+ else
+#endif
{
- vn = lg*v[n][d] + f[n][d]*w_dt;
- v[n][d] = vn;
- xprime[n][d] = x[n][d] + vn*dt;
+ updateMDLeapfrogSimple
+ <NumTempScaleValues::single,
+ ApplyParrinelloRahmanVScaling::no>
+ (start, nrend, dt, dtPressureCouple,
+ invMassPerDim, tcstat, cTC, nullptr, x, xprime, v, f);
}
}
else
{
- for (d = 0; d < DIM; d++)
- {
- v[n][d] = 0.0;
- xprime[n][d] = x[n][d];
- }
+ updateMDLeapfrogSimple
+ <NumTempScaleValues::multiple,
+ ApplyParrinelloRahmanVScaling::no>
+ (start, nrend, dt, dtPressureCouple,
+ invMassPerDim, tcstat, cTC, nullptr, x, xprime, v, f);
}
}
}
}
-static void do_update_vv_vel(int start, int nrend, double dt,
+static void do_update_vv_vel(int start, int nrend, real dt,
rvec accel[], ivec nFreeze[], real invmass[],
unsigned short ptype[], unsigned short cFREEZE[],
- unsigned short cACC[], rvec v[], rvec f[],
+ unsigned short cACC[], rvec v[], const rvec f[],
gmx_bool bExtended, real veta, real alpha)
{
- double w_dt;
int gf = 0, ga = 0;
int n, d;
- double g, mv1, mv2;
+ real g, mv1, mv2;
if (bExtended)
{
g = 0.25*dt*veta*alpha;
- mv1 = exp(-g);
+ mv1 = std::exp(-g);
mv2 = gmx::series_sinhx(g);
}
else
}
for (n = start; n < nrend; n++)
{
- w_dt = invmass[n]*dt;
+ real w_dt = invmass[n]*dt;
if (cFREEZE)
{
gf = cFREEZE[n];
}
} /* do_update_vv_vel */
-static void do_update_vv_pos(int start, int nrend, double dt,
+static void do_update_vv_pos(int start, int nrend, real dt,
ivec nFreeze[],
unsigned short ptype[], unsigned short cFREEZE[],
- rvec x[], rvec xprime[], rvec v[],
+ const rvec x[], rvec xprime[], rvec v[],
gmx_bool bExtended, real veta)
{
int gf = 0;
int n, d;
- double g, mr1, mr2;
+ real g, mr1, mr2;
/* Would it make more sense if Parrinello-Rahman was put here? */
if (bExtended)
{
g = 0.5*dt*veta;
- mr1 = exp(g);
+ mr1 = std::exp(g);
mr2 = gmx::series_sinhx(g);
}
else
}
} /* do_update_vv_pos */
-static void do_update_visc(int start, int nrend,
- double dt, int nstpcouple,
- t_grp_tcstat *tcstat,
- double nh_vxi[],
- real invmass[],
- unsigned short ptype[], unsigned short cTC[],
- rvec x[], rvec xprime[], rvec v[],
- rvec f[], matrix M, matrix box, real
- cos_accel, real vcos,
- gmx_bool bNH, gmx_bool bPR)
-{
- double imass, w_dt;
- int gt = 0;
- real vn, vc;
- real lg, vxi = 0, vv;
- real fac, cosz;
- rvec vrel;
- int n, d;
-
- fac = 2*M_PI/(box[ZZ][ZZ]);
-
- if (bNH || bPR)
- {
- /* Update with coupling to extended ensembles, used for
- * Nose-Hoover and Parrinello-Rahman coupling
- */
- for (n = start; n < nrend; n++)
- {
- imass = invmass[n];
- if (cTC)
- {
- gt = cTC[n];
- }
- lg = tcstat[gt].lambda;
- cosz = cos(fac*x[n][ZZ]);
-
- copy_rvec(v[n], vrel);
-
- vc = cosz*vcos;
- vrel[XX] -= vc;
- if (bNH)
- {
- vxi = nh_vxi[gt];
- }
- for (d = 0; d < DIM; d++)
- {
- if ((ptype[n] != eptVSite) && (ptype[n] != eptShell))
- {
- vn = (lg*vrel[d] + dt*(imass*f[n][d] - 0.5*vxi*vrel[d]
- - nstpcouple*iprod(M[d], vrel)))/(1 + 0.5*vxi*dt);
- if (d == XX)
- {
- vn += vc + dt*cosz*cos_accel;
- }
- v[n][d] = vn;
- xprime[n][d] = x[n][d]+vn*dt;
- }
- else
- {
- xprime[n][d] = x[n][d];
- }
- }
- }
- }
- else
- {
- /* Classic version of update, used with berendsen coupling */
- for (n = start; n < nrend; n++)
- {
- w_dt = invmass[n]*dt;
- if (cTC)
- {
- gt = cTC[n];
- }
- lg = tcstat[gt].lambda;
- cosz = cos(fac*x[n][ZZ]);
-
- for (d = 0; d < DIM; d++)
- {
- vn = v[n][d];
-
- if ((ptype[n] != eptVSite) && (ptype[n] != eptShell))
- {
- if (d == XX)
- {
- vc = cosz*vcos;
- /* Do not scale the cosine velocity profile */
- vv = vc + lg*(vn - vc + f[n][d]*w_dt);
- /* Add the cosine accelaration profile */
- vv += dt*cosz*cos_accel;
- }
- else
- {
- vv = lg*(vn + f[n][d]*w_dt);
- }
- v[n][d] = vv;
- xprime[n][d] = x[n][d]+vv*dt;
- }
- else
- {
- v[n][d] = 0.0;
- xprime[n][d] = x[n][d];
- }
- }
- }
- }
-}
-
static gmx_stochd_t *init_stochd(const t_inputrec *ir)
{
gmx_stochd_t *sd;
{
if (opts->tau_t[gt] > 0)
{
- sdc[gt].em = exp(-ir->delta_t/opts->tau_t[gt]);
+ sdc[gt].em = std::exp(-ir->delta_t/opts->tau_t[gt]);
}
else
{
gmx_update_t *init_update(const t_inputrec *ir)
{
- gmx_update_t *upd;
-
- snew(upd, 1);
+ gmx_update_t *upd = new(gmx_update_t);
if (ir->eI == eiBD || EI_SD(ir->eI) || ir->etc == etcVRESCALE || ETC_ANDERSEN(ir->etc))
{
update_temperature_constants(upd, ir);
- upd->xp = NULL;
- upd->xp_nalloc = 0;
+ upd->xp.resize(0);
return upd;
}
-void update_realloc(gmx_update_t *upd, int state_nalloc)
+void update_realloc(gmx_update_t *upd, int natoms)
{
GMX_ASSERT(upd, "upd must be allocated before its fields can be reallocated");
- if (state_nalloc > upd->xp_nalloc)
- {
- upd->xp_nalloc = state_nalloc;
- /* We need to allocate one element extra, since we might use
- * (unaligned) 4-wide SIMD loads to access rvec entries. */
- srenew(upd->xp, upd->xp_nalloc + 1);
- }
+
+ upd->xp.resize(gmx::paddedRVecVectorSize(natoms));
}
static void do_update_sd1(gmx_stochd_t *sd,
- int start, int nrend, double dt,
+ int start, int nrend, real dt,
rvec accel[], ivec nFreeze[],
real invmass[], unsigned short ptype[],
unsigned short cFREEZE[], unsigned short cACC[],
unsigned short cTC[],
- rvec x[], rvec xprime[], rvec v[], rvec f[],
+ const rvec x[], rvec xprime[], rvec v[], const rvec f[],
gmx_bool bDoConstr,
gmx_bool bFirstHalfConstr,
gmx_int64_t step, int seed, int* gatindex)
}
}
-static void do_update_bd(int start, int nrend, double dt,
+static void do_update_bd(int start, int nrend, real dt,
ivec nFreeze[],
real invmass[], unsigned short ptype[],
unsigned short cFREEZE[], unsigned short cTC[],
- rvec x[], rvec xprime[], rvec v[],
- rvec f[], real friction_coefficient,
+ const rvec x[], rvec xprime[], rvec v[],
+ const rvec f[], real friction_coefficient,
real *rf, gmx_int64_t step, int seed,
int* gatindex)
{
}
static void dump_it_all(FILE gmx_unused *fp, const char gmx_unused *title,
- int gmx_unused natoms, rvec gmx_unused x[], rvec gmx_unused xp[],
- rvec gmx_unused v[], rvec gmx_unused f[])
+ int gmx_unused natoms,
+ const gmx_unused PaddedRVecVector *x,
+ const gmx_unused PaddedRVecVector *xp,
+ const gmx_unused PaddedRVecVector *v,
+ const gmx_unused PaddedRVecVector *f)
{
#ifdef DEBUG
if (fp)
{
fprintf(fp, "%s\n", title);
- pr_rvecs(fp, 0, "x", x, natoms);
- pr_rvecs(fp, 0, "xp", xp, natoms);
- pr_rvecs(fp, 0, "v", v, natoms);
- pr_rvecs(fp, 0, "f", f, natoms);
+ pr_rvecs(fp, 0, "x", as_rvec_array(x->data()), natoms);
+ pr_rvecs(fp, 0, "xp", as_rvec_array(xp->data()), natoms);
+ pr_rvecs(fp, 0, "v", as_rvec_array(v->data()), natoms);
+ if (f != NULL)
+ {
+ pr_rvecs(fp, 0, "f", as_rvec_array(f->data()), natoms);
+ }
}
#endif
}
/* Note that the times of x and v differ by half a step */
/* MRS -- would have to be changed for VV */
- cosz = cos(fac*x[n][ZZ]);
+ cosz = std::cos(fac*x[n][ZZ]);
/* Calculate the amplitude of the new velocity profile */
mvcos += 2*cosz*md->massT[n]*v[n][XX];
{
if (ekind->cosacc.cos_accel == 0)
{
- calc_ke_part_normal(state->v, opts, md, ekind, nrnb, bEkinAveVel);
+ calc_ke_part_normal(as_rvec_array(state->v.data()), opts, md, ekind, nrnb, bEkinAveVel);
}
else
{
- calc_ke_part_visc(state->box, state->x, state->v, opts, md, ekind, nrnb, bEkinAveVel);
+ calc_ke_part_visc(state->box, as_rvec_array(state->x.data()), as_rvec_array(state->v.data()), opts, md, ekind, nrnb, bEkinAveVel);
}
}
snew(ekinstate->ekinh, ekinstate->ekin_n);
snew(ekinstate->ekinf, ekinstate->ekin_n);
snew(ekinstate->ekinh_old, ekinstate->ekin_n);
- snew(ekinstate->ekinscalef_nhc, ekinstate->ekin_n);
- snew(ekinstate->ekinscaleh_nhc, ekinstate->ekin_n);
- snew(ekinstate->vscale_nhc, ekinstate->ekin_n);
+ ekinstate->ekinscalef_nhc.resize(ekinstate->ekin_n);
+ ekinstate->ekinscaleh_nhc.resize(ekinstate->ekin_n);
+ ekinstate->vscale_nhc.resize(ekinstate->ekin_n);
ekinstate->dekindl = 0;
ekinstate->mvcos = 0;
}
t_mdatoms *md)
{
- gmx_bool bTCouple = FALSE;
- real dttc;
- int i, offset;
+ bool doTemperatureCoupling = false;
/* if using vv with trotter decomposition methods, we do this elsewhere in the code */
- if (inputrec->etc != etcNO &&
- !(inputrecNvtTrotter(inputrec) || inputrecNptTrotter(inputrec) || inputrecNphTrotter(inputrec)))
+ if (!(EI_VV(inputrec->eI) &&
+ (inputrecNvtTrotter(inputrec) || inputrecNptTrotter(inputrec) || inputrecNphTrotter(inputrec))))
{
- /* We should only couple after a step where energies were determined (for leapfrog versions)
- or the step energies are determined, for velocity verlet versions */
-
- if (EI_VV(inputrec->eI))
- {
- offset = 0;
- }
- else
- {
- offset = 1;
- }
- bTCouple = (inputrec->nsttcouple == 1 ||
- do_per_step(step+inputrec->nsttcouple-offset,
- inputrec->nsttcouple));
+ doTemperatureCoupling = isTemperatureCouplingStep(step, inputrec);
}
- if (bTCouple)
+ if (doTemperatureCoupling)
{
- dttc = inputrec->nsttcouple*inputrec->delta_t;
+ real dttc = inputrec->nsttcouple*inputrec->delta_t;
switch (inputrec->etc)
{
case etcNO:
break;
case etcBERENDSEN:
- berendsen_tcoupl(inputrec, ekind, dttc);
+ berendsen_tcoupl(inputrec, ekind, dttc, state->therm_integral);
break;
case etcNOSEHOOVER:
nosehoover_tcoupl(&(inputrec->opts), ekind, dttc,
- state->nosehoover_xi, state->nosehoover_vxi, MassQ);
+ state->nosehoover_xi.data(), state->nosehoover_vxi.data(), MassQ);
break;
case etcVRESCALE:
vrescale_tcoupl(inputrec, step, ekind, dttc,
- state->therm_integral);
+ state->therm_integral.data());
break;
}
/* rescale in place here */
if (EI_VV(inputrec->eI))
{
- rescale_velocities(ekind, md, 0, md->homenr, state->v);
+ rescale_velocities(ekind, md, 0, md->homenr, as_rvec_array(state->v.data()));
}
}
else
{
/* Set the T scaling lambda to 1 to have no scaling */
- for (i = 0; (i < inputrec->opts.ngtc); i++)
+ for (int i = 0; (i < inputrec->opts.ngtc); i++)
{
ekind->tcstat[i].lambda = 1.0;
}
}
}
-void update_pcouple(FILE *fplog,
- gmx_int64_t step,
- t_inputrec *inputrec,
- t_state *state,
- matrix pcoupl_mu,
- matrix M,
- gmx_bool bInitStep)
+/*! \brief Computes the atom range for a thread to operate on, ensured SIMD aligned ranges
+ *
+ * \param[in] numThreads The number of threads to divide atoms over
+ * \param[in] threadIndex The thread to get the range for
+ * \param[in] numAtoms The total number of atoms (on this rank)
+ * \param[out] startAtom The start of the atom range
+ * \param[out] endAtom The end of the atom range, note that this is in general not a multiple of the SIMD width
+ */
+static void getThreadAtomRange(int numThreads, int threadIndex, int numAtoms,
+ int *startAtom, int *endAtom)
{
- gmx_bool bPCouple = FALSE;
- real dtpc = 0;
- int i;
-
- /* if using Trotter pressure, we do this in coupling.c, so we leave it false. */
- if (inputrec->epc != epcNO && (!(inputrecNptTrotter(inputrec) || inputrecNphTrotter(inputrec))))
- {
- /* We should only couple after a step where energies were determined */
- bPCouple = (inputrec->nstpcouple == 1 ||
- do_per_step(step+inputrec->nstpcouple-1,
- inputrec->nstpcouple));
- }
+#if GMX_HAVE_SIMD_UPDATE
+ constexpr int blockSize = GMX_SIMD_REAL_WIDTH;
+#else
+ constexpr int blockSize = 1;
+#endif
+ int numBlocks = (numAtoms + blockSize - 1)/blockSize;
- clear_mat(pcoupl_mu);
- for (i = 0; i < DIM; i++)
+ *startAtom = ((numBlocks* threadIndex )/numThreads)*blockSize;
+ *endAtom = ((numBlocks*(threadIndex + 1))/numThreads)*blockSize;
+ if (threadIndex == numThreads - 1)
{
- pcoupl_mu[i][i] = 1.0;
+ *endAtom = numAtoms;
}
+}
- clear_mat(M);
-
- if (bPCouple)
+void update_pcouple_before_coordinates(FILE *fplog,
+ gmx_int64_t step,
+ const t_inputrec *inputrec,
+ t_state *state,
+ matrix parrinellorahmanMu,
+ matrix M,
+ gmx_bool bInitStep)
+{
+ /* Berendsen P-coupling is completely handled after the coordinate update.
+ * Trotter P-coupling is handled by separate calls to trotter_update().
+ */
+ if (inputrec->epc == epcPARRINELLORAHMAN &&
+ isPressureCouplingStep(step, inputrec))
{
- dtpc = inputrec->nstpcouple*inputrec->delta_t;
+ real dtpc = inputrec->nstpcouple*inputrec->delta_t;
- switch (inputrec->epc)
- {
- /* We can always pcoupl, even if we did not sum the energies
- * the previous step, since state->pres_prev is only updated
- * when the energies have been summed.
- */
- case (epcNO):
- break;
- case (epcBERENDSEN):
- if (!bInitStep)
- {
- berendsen_pcoupl(fplog, step, inputrec, dtpc, state->pres_prev, state->box,
- pcoupl_mu);
- }
- break;
- case (epcPARRINELLORAHMAN):
- parrinellorahman_pcoupl(fplog, step, inputrec, dtpc, state->pres_prev,
- state->box, state->box_rel, state->boxv,
- M, pcoupl_mu, bInitStep);
- break;
- default:
- break;
- }
+ parrinellorahman_pcoupl(fplog, step, inputrec, dtpc, state->pres_prev,
+ state->box, state->box_rel, state->boxv,
+ M, parrinellorahmanMu, bInitStep);
}
}
t_state *state,
gmx_bool bMolPBC,
t_graph *graph,
- rvec force[], /* forces on home particles */
+ PaddedRVecVector *force, /* forces on home particles */
t_idef *idef,
tensor vir_part,
t_commrec *cr,
gmx_bool bCalcVir)
{
gmx_bool bLastStep, bLog = FALSE, bEner = FALSE, bDoConstr = FALSE;
- double dt;
- int start, homenr, nrend, i;
tensor vir_con;
int nth, th;
/* for now, SD update is here -- though it really seems like it
should be reformulated as a velocity verlet method, since it has two parts */
- start = 0;
- homenr = md->homenr;
- nrend = start+homenr;
+ int homenr = md->homenr;
- dt = inputrec->delta_t;
+ /* Cast delta_t from double to real to make the integrators faster.
+ * The only reason for having delta_t double is to get accurate values
+ * for t=delta_t*step when step is larger than float precision.
+ * For integration dt the accuracy of real suffices, since with
+ * integral += dt*integrand the increment is nearly always (much) smaller
+ * than the integral (and the integrand has real precision).
+ */
+ real dt = inputrec->delta_t;
/*
* Steps (7C, 8C)
wallcycle_start(wcycle, ewcCONSTR);
if (EI_VV(inputrec->eI) && bFirstHalf)
{
- constrain(NULL, bLog, bEner, constr, idef,
+ constrain(nullptr, bLog, bEner, constr, idef,
inputrec, cr, step, 1, 1.0, md,
- state->x, state->v, state->v,
+ as_rvec_array(state->x.data()), as_rvec_array(state->v.data()), as_rvec_array(state->v.data()),
bMolPBC, state->box,
state->lambda[efptBONDED], dvdlambda,
- NULL, bCalcVir ? &vir_con : NULL, nrnb, econqVeloc);
+ nullptr, bCalcVir ? &vir_con : nullptr, nrnb, econqVeloc);
}
else
{
- constrain(NULL, bLog, bEner, constr, idef,
+ constrain(nullptr, bLog, bEner, constr, idef,
inputrec, cr, step, 1, 1.0, md,
- state->x, upd->xp, NULL,
+ as_rvec_array(state->x.data()), as_rvec_array(upd->xp.data()), nullptr,
bMolPBC, state->box,
state->lambda[efptBONDED], dvdlambda,
- state->v, bCalcVir ? &vir_con : NULL, nrnb, econqCoord);
+ as_rvec_array(state->v.data()), bCalcVir ? &vir_con : nullptr, nrnb, econqCoord);
}
wallcycle_stop(wcycle, ewcCONSTR);
where();
dump_it_all(fplog, "After Shake",
- state->natoms, state->x, upd->xp, state->v, force);
+ state->natoms, &state->x, &upd->xp, &state->v, force);
if (bCalcVir)
{
try
{
int start_th, end_th;
-
- start_th = start + ((nrend-start)* th )/nth;
- end_th = start + ((nrend-start)*(th+1))/nth;
+ getThreadAtomRange(nth, th, homenr, &start_th, &end_th);
/* The second part of the SD integration */
do_update_sd1(upd->sd,
inputrec->opts.acc, inputrec->opts.nFreeze,
md->invmass, md->ptype,
md->cFREEZE, md->cACC, md->cTC,
- state->x, upd->xp, state->v, force,
+ as_rvec_array(state->x.data()), as_rvec_array(upd->xp.data()), as_rvec_array(state->v.data()), as_rvec_array(force->data()),
bDoConstr, FALSE,
step, inputrec->ld_seed,
- DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
+ DOMAINDECOMP(cr) ? cr->dd->gatindex : nullptr);
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
}
/* Constrain the coordinates upd->xp for half a time step */
wallcycle_start(wcycle, ewcCONSTR);
- constrain(NULL, bLog, bEner, constr, idef,
+ constrain(nullptr, bLog, bEner, constr, idef,
inputrec, cr, step, 1, 0.5, md,
- state->x, upd->xp, NULL,
+ as_rvec_array(state->x.data()), as_rvec_array(upd->xp.data()), nullptr,
bMolPBC, state->box,
state->lambda[efptBONDED], dvdlambda,
- state->v, NULL, nrnb, econqCoord);
+ as_rvec_array(state->v.data()), nullptr, nrnb, econqCoord);
wallcycle_stop(wcycle, ewcCONSTR);
}
*/
wallcycle_start_nocount(wcycle, ewcUPDATE);
- if (md->cFREEZE != NULL && constr != NULL)
+ if (md->cFREEZE != nullptr && constr != nullptr)
{
/* If we have atoms that are frozen along some, but not all
* dimensions, the constraints will have moved them also along
}
if (partialFreezeAndConstraints)
{
- for (int i = start; i < nrend; i++)
+ for (int i = 0; i < homenr; i++)
{
int g = md->cFREEZE[i];
if (graph && (graph->nnodes > 0))
{
- unshift_x(graph, state->box, state->x, upd->xp);
+ unshift_x(graph, state->box, as_rvec_array(state->x.data()), as_rvec_array(upd->xp.data()));
if (TRICLINIC(state->box))
{
inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes);
}
else
{
+ /* The copy is performance sensitive, so use a bare pointer */
+ rvec *xp = as_rvec_array(upd->xp.data());
#ifndef __clang_analyzer__
// cppcheck-suppress unreadVariable
nth = gmx_omp_nthreads_get(emntUpdate);
#endif
#pragma omp parallel for num_threads(nth) schedule(static)
- for (i = start; i < nrend; i++)
+ for (int i = 0; i < homenr; i++)
{
// Trivial statement, does not throw
- copy_rvec(upd->xp[i], state->x[i]);
+ copy_rvec(xp[i], state->x[i]);
}
}
wallcycle_stop(wcycle, ewcUPDATE);
dump_it_all(fplog, "After unshift",
- state->natoms, state->x, upd->xp, state->v, force);
+ state->natoms, &state->x, &upd->xp, &state->v, force);
}
/* ############# END the update of velocities and positions ######### */
}
-void update_box(FILE *fplog,
- gmx_int64_t step,
- t_inputrec *inputrec, /* input record and box stuff */
- t_mdatoms *md,
- t_state *state,
- rvec force[], /* forces on home particles */
- matrix pcoupl_mu,
- t_nrnb *nrnb,
- gmx_update_t *upd)
+void update_pcouple_after_coordinates(FILE *fplog,
+ gmx_int64_t step,
+ const t_inputrec *inputrec,
+ const t_mdatoms *md,
+ const matrix pressure,
+ const matrix forceVirial,
+ const matrix constraintVirial,
+ const matrix parrinellorahmanMu,
+ t_state *state,
+ t_nrnb *nrnb,
+ gmx_update_t *upd)
{
- double dt;
- int start, homenr, i, n, m;
+ int start = 0;
+ int homenr = md->homenr;
- start = 0;
- homenr = md->homenr;
-
- dt = inputrec->delta_t;
+ /* Cast to real for faster code, no loss in precision (see comment above) */
+ real dt = inputrec->delta_t;
where();
case (epcNO):
break;
case (epcBERENDSEN):
- /* We should only scale after a step where the pressure (kinetic
- * energy and virial) was calculated. This happens after the
- * coordinate update, whereas the current routine is called before
- * that, so we scale when step % nstpcouple = 1 instead of 0.
- */
- if (inputrec->nstpcouple == 1 || (step % inputrec->nstpcouple == 1))
+ if (isPressureCouplingStep(step, inputrec))
{
- berendsen_pscale(inputrec, pcoupl_mu, state->box, state->box_rel,
- start, homenr, state->x, md->cFREEZE, nrnb);
+ real dtpc = inputrec->nstpcouple*dt;
+ matrix mu;
+ berendsen_pcoupl(fplog, step, inputrec, dtpc,
+ pressure, state->box,
+ forceVirial, constraintVirial,
+ mu, &state->baros_integral);
+ berendsen_pscale(inputrec, mu, state->box, state->box_rel,
+ start, homenr, as_rvec_array(state->x.data()),
+ md->cFREEZE, nrnb);
}
break;
case (epcPARRINELLORAHMAN):
- /* The box velocities were updated in do_pr_pcoupl in the update
- * iteration, but we dont change the box vectors until we get here
- * since we need to be able to shift/unshift above.
- */
- for (i = 0; i < DIM; i++)
+ if (isPressureCouplingStep(step, inputrec))
{
- for (m = 0; m <= i; m++)
+ /* The box velocities were updated in do_pr_pcoupl,
+ * but we dont change the box vectors until we get here
+ * since we need to be able to shift/unshift above.
+ */
+ real dtpc = inputrec->nstpcouple*dt;
+ for (int i = 0; i < DIM; i++)
{
- state->box[i][m] += dt*state->boxv[i][m];
+ for (int m = 0; m <= i; m++)
+ {
+ state->box[i][m] += dtpc*state->boxv[i][m];
+ }
}
- }
- preserve_box_shape(inputrec, state->box_rel, state->box);
+ preserve_box_shape(inputrec, state->box_rel, state->box);
- /* Scale the coordinates */
- for (n = start; (n < start+homenr); n++)
- {
- tmvmul_ur0(pcoupl_mu, state->x[n], state->x[n]);
+ /* Scale the coordinates */
+ for (int n = start; n < start + homenr; n++)
+ {
+ tmvmul_ur0(parrinellorahmanMu, state->x[n], state->x[n]);
+ }
}
break;
case (epcMTTK):
ln V_new = ln V_old + 3*dt*veta => V_new = V_old*exp(3*dt*veta) =>
Side length scales as exp(veta*dt) */
- msmul(state->box, exp(state->veta*dt), state->box);
+ msmul(state->box, std::exp(state->veta*dt), state->box);
/* Relate veta to boxv. veta = d(eta)/dT = (1/DIM)*1/V dV/dT.
o If we assume isotropic scaling, and box length scaling
if (inputrecDeform(inputrec))
{
- deform(upd, start, homenr, state->x, state->box, inputrec, step);
+ deform(upd, start, homenr, as_rvec_array(state->x.data()), state->box, inputrec, step);
}
where();
dump_it_all(fplog, "After update",
- state->natoms, state->x, upd->xp, state->v, force);
+ state->natoms, &state->x, &upd->xp, &state->v, nullptr);
}
void update_coords(FILE *fplog,
t_inputrec *inputrec, /* input record and box stuff */
t_mdatoms *md,
t_state *state,
- rvec *f, /* forces on home particles */
+ PaddedRVecVector *f, /* forces on home particles */
t_fcdata *fcd,
gmx_ekindata_t *ekind,
matrix M,
t_commrec *cr, /* these shouldn't be here -- need to think about it */
gmx_constr_t constr)
{
- gmx_bool bNH, bPR, bDoConstr = FALSE;
- double dt, alpha;
- int start, homenr, nrend;
- int nth, th;
-
- bDoConstr = (NULL != constr);
+ gmx_bool bDoConstr = (nullptr != constr);
/* Running the velocity half does nothing except for velocity verlet */
if ((UpdatePart == etrtVELOCITY1 || UpdatePart == etrtVELOCITY2) &&
gmx_incons("update_coords called for velocity without VV integrator");
}
- start = 0;
- homenr = md->homenr;
- nrend = start+homenr;
+ int homenr = md->homenr;
- dt = inputrec->delta_t;
+ /* Cast to real for faster code, no loss in precision (see comment above) */
+ real dt = inputrec->delta_t;
/* We need to update the NMR restraint history when time averaging is used */
if (state->flags & (1<<estDISRE_RM3TAV))
update_orires_history(fcd, &state->hist);
}
-
- bNH = inputrec->etc == etcNOSEHOOVER;
- bPR = ((inputrec->epc == epcPARRINELLORAHMAN) || (inputrec->epc == epcMTTK));
-
/* ############# START The update of velocities and positions ######### */
where();
dump_it_all(fplog, "Before update",
- state->natoms, state->x, upd->xp, state->v, f);
+ state->natoms, &state->x, &upd->xp, &state->v, f);
- nth = gmx_omp_nthreads_get(emntUpdate);
+ int nth = gmx_omp_nthreads_get(emntUpdate);
-#pragma omp parallel for num_threads(nth) schedule(static) private(alpha)
- for (th = 0; th < nth; th++)
+#pragma omp parallel for num_threads(nth) schedule(static)
+ for (int th = 0; th < nth; th++)
{
try
{
int start_th, end_th;
+ getThreadAtomRange(nth, th, homenr, &start_th, &end_th);
+
+#ifndef NDEBUG
+ /* Strictly speaking, we would only need this check with SIMD
+ * and for the actual SIMD width. But since the code currently
+ * always adds padding for GMX_REAL_MAX_SIMD_WIDTH, we check that.
+ */
+ size_t homenrSimdPadded = ((homenr + GMX_REAL_MAX_SIMD_WIDTH - 1)/GMX_REAL_MAX_SIMD_WIDTH)*GMX_REAL_MAX_SIMD_WIDTH;
+ GMX_ASSERT(state->x.size() >= homenrSimdPadded, "state->x needs to be padded for SIMD access");
+ GMX_ASSERT(upd->xp.size() >= homenrSimdPadded, "upd->xp needs to be padded for SIMD access");
+ GMX_ASSERT(state->v.size() >= homenrSimdPadded, "state->v needs to be padded for SIMD access");
+ GMX_ASSERT(f->size() >= homenrSimdPadded, "f needs to be padded for SIMD access");
+#endif
- start_th = start + ((nrend-start)* th )/nth;
- end_th = start + ((nrend-start)*(th+1))/nth;
+ const rvec *x_rvec = as_rvec_array(state->x.data());
+ rvec *xp_rvec = as_rvec_array(upd->xp.data());
+ rvec *v_rvec = as_rvec_array(state->v.data());
+ const rvec *f_rvec = as_rvec_array(f->data());
switch (inputrec->eI)
{
case (eiMD):
- if (ekind->cosacc.cos_accel == 0)
- {
- do_update_md(start_th, end_th,
- dt, inputrec->nstpcouple,
- ekind->tcstat, state->nosehoover_vxi,
- ekind->bNEMD, ekind->grpstat, inputrec->opts.acc,
- inputrec->opts.nFreeze,
- md->invmass, md->ptype,
- md->cFREEZE, md->cACC, md->cTC,
- state->x, upd->xp, state->v, f, M,
- bNH, bPR);
- }
- else
- {
- do_update_visc(start_th, end_th,
- dt, inputrec->nstpcouple,
- ekind->tcstat, state->nosehoover_vxi,
- md->invmass, md->ptype,
- md->cTC, state->x, upd->xp, state->v, f, M,
- state->box,
- ekind->cosacc.cos_accel,
- ekind->cosacc.vcos,
- bNH, bPR);
- }
+ do_update_md(start_th, end_th, step, dt,
+ inputrec, md, ekind, state->box,
+ x_rvec, xp_rvec, v_rvec, f_rvec,
+ state->nosehoover_vxi.data(), M);
break;
case (eiSD1):
/* With constraints, the SD1 update is done in 2 parts */
inputrec->opts.acc, inputrec->opts.nFreeze,
md->invmass, md->ptype,
md->cFREEZE, md->cACC, md->cTC,
- state->x, upd->xp, state->v, f,
+ x_rvec, xp_rvec, v_rvec, f_rvec,
bDoConstr, TRUE,
- step, inputrec->ld_seed, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
+ step, inputrec->ld_seed, DOMAINDECOMP(cr) ? cr->dd->gatindex : nullptr);
break;
case (eiBD):
do_update_bd(start_th, end_th, dt,
inputrec->opts.nFreeze, md->invmass, md->ptype,
md->cFREEZE, md->cTC,
- state->x, upd->xp, state->v, f,
+ x_rvec, xp_rvec, v_rvec, f_rvec,
inputrec->bd_fric,
upd->sd->bd_rf,
- step, inputrec->ld_seed, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
+ step, inputrec->ld_seed, DOMAINDECOMP(cr) ? cr->dd->gatindex : nullptr);
break;
case (eiVV):
case (eiVVAK):
- alpha = 1.0 + DIM/((double)inputrec->opts.nrdf[0]); /* assuming barostat coupled to group 0. */
+ {
+ gmx_bool bExtended = (inputrec->etc == etcNOSEHOOVER ||
+ inputrec->epc == epcPARRINELLORAHMAN ||
+ inputrec->epc == epcMTTK);
+
+ /* assuming barostat coupled to group 0 */
+ real alpha = 1.0 + DIM/static_cast<real>(inputrec->opts.nrdf[0]);
switch (UpdatePart)
{
case etrtVELOCITY1:
inputrec->opts.acc, inputrec->opts.nFreeze,
md->invmass, md->ptype,
md->cFREEZE, md->cACC,
- state->v, f,
- (bNH || bPR), state->veta, alpha);
+ v_rvec, f_rvec,
+ bExtended, state->veta, alpha);
break;
case etrtPOSITION:
do_update_vv_pos(start_th, end_th, dt,
inputrec->opts.nFreeze,
md->ptype, md->cFREEZE,
- state->x, upd->xp, state->v,
- (bNH || bPR), state->veta);
+ x_rvec, xp_rvec, v_rvec,
+ bExtended, state->veta);
break;
}
break;
+ }
default:
gmx_fatal(FARGS, "Don't know how to update coordinates");
break;
real rate = (ir->delta_t)/ir->opts.tau_t[0];
- if (ir->etc == etcANDERSEN && constr != NULL)
+ if (ir->etc == etcANDERSEN && constr != nullptr)
{
/* Currently, Andersen thermostat does not support constrained
systems. Functionality exists in the andersen_tcoupl