-/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
- *
- *
- * This source code is part of
- *
- * G R O M A C S
- *
- * GROningen MAchine for Chemical Simulations
+/*
+ * This file is part of the GROMACS molecular simulation package.
*
- * VERSION 3.2.0
- * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team,
- * check out http://www.gromacs.org for more information.
-
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
+ * Copyright (c) 2001-2004, The GROMACS development team.
+ * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
- * If you want to redistribute modifications, please consider that
- * scientific software is very special. Version control is crucial -
- * bugs must be traceable. We will be happy to consider code for
- * inclusion in the official distribution, but derived work must not
- * be called official GROMACS. Details are found in the README & COPYING
- * files - if they are missing, get the official version at www.gromacs.org.
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * To help us fund GROMACS development, we humbly ask that you cite
- * the papers on the package - you can find them in the top README file.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*
- * For more info, check our website at http://www.gromacs.org
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
*
- * And Hey:
- * GROningen Mixture of Alchemy and Childrens' Stories
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <math.h>
+#include <assert.h>
#include "physics.h"
#include "vec.h"
-#include "maths.h"
+#include "gromacs/math/utilities.h"
#include "txtdump.h"
#include "bondf.h"
-#include "smalloc.h"
+#include "gromacs/utility/smalloc.h"
#include "pbc.h"
#include "ns.h"
#include "macros.h"
#include "orires.h"
#include "force.h"
#include "nonbonded.h"
+#include "restcbt.h"
-#if !defined GMX_DOUBLE && defined GMX_X86_SSE2
-#include "gmx_x86_simd_single.h"
-#define SSE_PROPER_DIHEDRALS
-#endif
+#include "gromacs/simd/simd.h"
+#include "gromacs/simd/simd_math.h"
+#include "gromacs/simd/vector_operations.h"
/* Find a better place for this? */
const int cmap_coeff_matrix[] = {
}
}
+#ifdef GMX_SIMD_HAVE_REAL
+
+/* SIMD PBC data structure, containing 1/boxdiag and the box vectors */
+typedef struct {
+ gmx_simd_real_t inv_bzz;
+ gmx_simd_real_t inv_byy;
+ gmx_simd_real_t inv_bxx;
+ gmx_simd_real_t bzx;
+ gmx_simd_real_t bzy;
+ gmx_simd_real_t bzz;
+ gmx_simd_real_t byx;
+ gmx_simd_real_t byy;
+ gmx_simd_real_t bxx;
+} pbc_simd_t;
+
+/* Set the SIMD pbc data from a normal t_pbc struct */
+static void set_pbc_simd(const t_pbc *pbc, pbc_simd_t *pbc_simd)
+{
+ rvec inv_bdiag;
+ int d;
+
+ /* Setting inv_bdiag to 0 effectively turns off PBC */
+ clear_rvec(inv_bdiag);
+ if (pbc != NULL)
+ {
+ for (d = 0; d < pbc->ndim_ePBC; d++)
+ {
+ inv_bdiag[d] = 1.0/pbc->box[d][d];
+ }
+ }
+
+ pbc_simd->inv_bzz = gmx_simd_set1_r(inv_bdiag[ZZ]);
+ pbc_simd->inv_byy = gmx_simd_set1_r(inv_bdiag[YY]);
+ pbc_simd->inv_bxx = gmx_simd_set1_r(inv_bdiag[XX]);
+
+ if (pbc != NULL)
+ {
+ pbc_simd->bzx = gmx_simd_set1_r(pbc->box[ZZ][XX]);
+ pbc_simd->bzy = gmx_simd_set1_r(pbc->box[ZZ][YY]);
+ pbc_simd->bzz = gmx_simd_set1_r(pbc->box[ZZ][ZZ]);
+ pbc_simd->byx = gmx_simd_set1_r(pbc->box[YY][XX]);
+ pbc_simd->byy = gmx_simd_set1_r(pbc->box[YY][YY]);
+ pbc_simd->bxx = gmx_simd_set1_r(pbc->box[XX][XX]);
+ }
+ else
+ {
+ pbc_simd->bzx = gmx_simd_setzero_r();
+ pbc_simd->bzy = gmx_simd_setzero_r();
+ pbc_simd->bzz = gmx_simd_setzero_r();
+ pbc_simd->byx = gmx_simd_setzero_r();
+ pbc_simd->byy = gmx_simd_setzero_r();
+ pbc_simd->bxx = gmx_simd_setzero_r();
+ }
+}
+
+/* Correct distance vector *dx,*dy,*dz for PBC using SIMD */
+static gmx_inline void
+pbc_dx_simd(gmx_simd_real_t *dx, gmx_simd_real_t *dy, gmx_simd_real_t *dz,
+ const pbc_simd_t *pbc)
+{
+ gmx_simd_real_t sh;
+
+ sh = gmx_simd_round_r(gmx_simd_mul_r(*dz, pbc->inv_bzz));
+ *dx = gmx_simd_fnmadd_r(sh, pbc->bzx, *dx);
+ *dy = gmx_simd_fnmadd_r(sh, pbc->bzy, *dy);
+ *dz = gmx_simd_fnmadd_r(sh, pbc->bzz, *dz);
+
+ sh = gmx_simd_round_r(gmx_simd_mul_r(*dy, pbc->inv_byy));
+ *dx = gmx_simd_fnmadd_r(sh, pbc->byx, *dx);
+ *dy = gmx_simd_fnmadd_r(sh, pbc->byy, *dy);
+
+ sh = gmx_simd_round_r(gmx_simd_mul_r(*dx, pbc->inv_bxx));
+ *dx = gmx_simd_fnmadd_r(sh, pbc->bxx, *dx);
+}
+
+#endif /* GMX_SIMD_HAVE_REAL */
+
/*
* Morse potential bond by Frank Everdij
*
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
const real one = 1.0;
const real two = 2.0;
const t_iatom forceatoms[], const t_iparams forceparams[],
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
- real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ real gmx_unused lambda, real gmx_unused *dvdlambda,
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
const real three = 3.0;
const real two = 2.0;
const t_iatom forceatoms[], const t_iparams forceparams[],
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
- real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
+ real gmx_unused lambda, real gmx_unused *dvdlambda,
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
int *global_atom_index)
{
const real half = 0.5;
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
int i, m, ki, ai, aj, type;
real dr, dr2, fbond, vbond, fij, vtot;
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
int i, m, ki, ai, aj, type;
real dr, dr2, fbond, vbond, fij, vtot;
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ const t_mdatoms *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
int i, m, ki, ai, aj, type;
real dr, dr2, fbond, vbond, fij, vtot, ksh;
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ const t_mdatoms *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
int i, m, ki, ai, aj, type;
real dr, dr2, fbond, vbond, fij, vtot, ksh, khyp, drcut, ddr, ddr3;
real water_pol(int nbonds,
const t_iatom forceatoms[], const t_iparams forceparams[],
- const rvec x[], rvec f[], rvec fshift[],
- const t_pbc *pbc, const t_graph *g,
- real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ const rvec x[], rvec f[], rvec gmx_unused fshift[],
+ const t_pbc gmx_unused *pbc, const t_graph gmx_unused *g,
+ real gmx_unused lambda, real gmx_unused *dvdlambda,
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
/* This routine implements anisotropic polarizibility for water, through
* a shell connected to a dummy with spring constant that differ in the
* three spatial dimensions in the molecular frame.
*/
- int i, m, aO, aH1, aH2, aD, aS, type, type0;
+ int i, m, aO, aH1, aH2, aD, aS, type, type0, ki;
+ ivec dt;
rvec dOH1, dOH2, dHH, dOD, dDS, nW, kk, dx, kdx, proj;
#ifdef DEBUG
rvec df;
aS = forceatoms[i+5];
/* Compute vectors describing the water frame */
- rvec_sub(x[aH1], x[aO], dOH1);
- rvec_sub(x[aH2], x[aO], dOH2);
- rvec_sub(x[aH2], x[aH1], dHH);
- rvec_sub(x[aD], x[aO], dOD);
- rvec_sub(x[aS], x[aD], dDS);
+ pbc_rvec_sub(pbc, x[aH1], x[aO], dOH1);
+ pbc_rvec_sub(pbc, x[aH2], x[aO], dOH2);
+ pbc_rvec_sub(pbc, x[aH2], x[aH1], dHH);
+ pbc_rvec_sub(pbc, x[aD], x[aO], dOD);
+ ki = pbc_rvec_sub(pbc, x[aS], x[aD], dDS);
cprod(dOH1, dOH2, nW);
/* Compute inverse length of normal vector
kdx[YY] = kk[YY]*dx[YY];
kdx[ZZ] = kk[ZZ]*dx[ZZ];
vtot += iprod(dx, kdx);
+
+ if (g)
+ {
+ ivec_sub(SHIFT_IVEC(g, aS), SHIFT_IVEC(g, aD), dt);
+ ki = IVEC2IS(dt);
+ }
+
for (m = 0; (m < DIM); m++)
{
/* This is a tensor operation but written out for speed */
#ifdef DEBUG
df[m] = fij;
#endif
- f[aS][m] += fij;
- f[aD][m] -= fij;
+ f[aS][m] += fij;
+ f[aD][m] -= fij;
+ fshift[ki][m] += fij;
+ fshift[CENTRAL][m] -= fij;
}
#ifdef DEBUG
if (debug)
real thole_pol(int nbonds,
const t_iatom forceatoms[], const t_iparams forceparams[],
const rvec x[], rvec f[], rvec fshift[],
- const t_pbc *pbc, const t_graph *g,
- real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ const t_pbc *pbc, const t_graph gmx_unused *g,
+ real gmx_unused lambda, real gmx_unused *dvdlambda,
+ const t_mdatoms *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
/* Interaction between two pairs of particles with opposite charge */
int i, type, a1, da1, a2, da2;
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
int i, ai, aj, ak, t1, t2, type;
rvec r_ij, r_kj;
return vtot;
}
+#ifdef GMX_SIMD_HAVE_REAL
+
+/* As angles, but using SIMD to calculate many dihedrals at once.
+ * This routines does not calculate energies and shift forces.
+ */
+static gmx_inline void
+angles_noener_simd(int nbonds,
+ const t_iatom forceatoms[], const t_iparams forceparams[],
+ const rvec x[], rvec f[],
+ const t_pbc *pbc, const t_graph gmx_unused *g,
+ real gmx_unused lambda,
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
+{
+ const int nfa1 = 4;
+ int i, iu, s, m;
+ int type, ai[GMX_SIMD_REAL_WIDTH], aj[GMX_SIMD_REAL_WIDTH];
+ int ak[GMX_SIMD_REAL_WIDTH];
+ real coeff_array[2*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *coeff;
+ real dr_array[2*DIM*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *dr;
+ real f_buf_array[6*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *f_buf;
+ gmx_simd_real_t k_S, theta0_S;
+ gmx_simd_real_t rijx_S, rijy_S, rijz_S;
+ gmx_simd_real_t rkjx_S, rkjy_S, rkjz_S;
+ gmx_simd_real_t one_S;
+ gmx_simd_real_t min_one_plus_eps_S;
+ gmx_simd_real_t rij_rkj_S;
+ gmx_simd_real_t nrij2_S, nrij_1_S;
+ gmx_simd_real_t nrkj2_S, nrkj_1_S;
+ gmx_simd_real_t cos_S, invsin_S;
+ gmx_simd_real_t theta_S;
+ gmx_simd_real_t st_S, sth_S;
+ gmx_simd_real_t cik_S, cii_S, ckk_S;
+ gmx_simd_real_t f_ix_S, f_iy_S, f_iz_S;
+ gmx_simd_real_t f_kx_S, f_ky_S, f_kz_S;
+ pbc_simd_t pbc_simd;
+
+ /* Ensure register memory alignment */
+ coeff = gmx_simd_align_r(coeff_array);
+ dr = gmx_simd_align_r(dr_array);
+ f_buf = gmx_simd_align_r(f_buf_array);
+
+ set_pbc_simd(pbc, &pbc_simd);
+
+ one_S = gmx_simd_set1_r(1.0);
+
+ /* The smallest number > -1 */
+ min_one_plus_eps_S = gmx_simd_set1_r(-1.0 + 2*GMX_REAL_EPS);
+
+ /* nbonds is the number of angles times nfa1, here we step GMX_SIMD_REAL_WIDTH angles */
+ for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH*nfa1)
+ {
+ /* Collect atoms for GMX_SIMD_REAL_WIDTH angles.
+ * iu indexes into forceatoms, we should not let iu go beyond nbonds.
+ */
+ iu = i;
+ for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
+ {
+ type = forceatoms[iu];
+ ai[s] = forceatoms[iu+1];
+ aj[s] = forceatoms[iu+2];
+ ak[s] = forceatoms[iu+3];
+
+ coeff[s] = forceparams[type].harmonic.krA;
+ coeff[GMX_SIMD_REAL_WIDTH+s] = forceparams[type].harmonic.rA*DEG2RAD;
+
+ /* If you can't use pbc_dx_simd below for PBC, e.g. because
+ * you can't round in SIMD, use pbc_rvec_sub here.
+ */
+ /* Store the non PBC corrected distances packed and aligned */
+ for (m = 0; m < DIM; m++)
+ {
+ dr[s + m *GMX_SIMD_REAL_WIDTH] = x[ai[s]][m] - x[aj[s]][m];
+ dr[s + (DIM+m)*GMX_SIMD_REAL_WIDTH] = x[ak[s]][m] - x[aj[s]][m];
+ }
+
+ /* At the end fill the arrays with identical entries */
+ if (iu + nfa1 < nbonds)
+ {
+ iu += nfa1;
+ }
+ }
+
+ k_S = gmx_simd_load_r(coeff);
+ theta0_S = gmx_simd_load_r(coeff+GMX_SIMD_REAL_WIDTH);
+
+ rijx_S = gmx_simd_load_r(dr + 0*GMX_SIMD_REAL_WIDTH);
+ rijy_S = gmx_simd_load_r(dr + 1*GMX_SIMD_REAL_WIDTH);
+ rijz_S = gmx_simd_load_r(dr + 2*GMX_SIMD_REAL_WIDTH);
+ rkjx_S = gmx_simd_load_r(dr + 3*GMX_SIMD_REAL_WIDTH);
+ rkjy_S = gmx_simd_load_r(dr + 4*GMX_SIMD_REAL_WIDTH);
+ rkjz_S = gmx_simd_load_r(dr + 5*GMX_SIMD_REAL_WIDTH);
+
+ pbc_dx_simd(&rijx_S, &rijy_S, &rijz_S, &pbc_simd);
+ pbc_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, &pbc_simd);
+
+ rij_rkj_S = gmx_simd_iprod_r(rijx_S, rijy_S, rijz_S,
+ rkjx_S, rkjy_S, rkjz_S);
+
+ nrij2_S = gmx_simd_norm2_r(rijx_S, rijy_S, rijz_S);
+ nrkj2_S = gmx_simd_norm2_r(rkjx_S, rkjy_S, rkjz_S);
+
+ nrij_1_S = gmx_simd_invsqrt_r(nrij2_S);
+ nrkj_1_S = gmx_simd_invsqrt_r(nrkj2_S);
+
+ cos_S = gmx_simd_mul_r(rij_rkj_S, gmx_simd_mul_r(nrij_1_S, nrkj_1_S));
+
+ /* To allow for 180 degrees, we take the max of cos and -1 + 1bit,
+ * so we can safely get the 1/sin from 1/sqrt(1 - cos^2).
+ * This also ensures that rounding errors would cause the argument
+ * of gmx_simd_acos_r to be < -1.
+ * Note that we do not take precautions for cos(0)=1, so the outer
+ * atoms in an angle should not be on top of each other.
+ */
+ cos_S = gmx_simd_max_r(cos_S, min_one_plus_eps_S);
+
+ theta_S = gmx_simd_acos_r(cos_S);
+
+ invsin_S = gmx_simd_invsqrt_r(gmx_simd_sub_r(one_S, gmx_simd_mul_r(cos_S, cos_S)));
+
+ st_S = gmx_simd_mul_r(gmx_simd_mul_r(k_S, gmx_simd_sub_r(theta0_S, theta_S)),
+ invsin_S);
+ sth_S = gmx_simd_mul_r(st_S, cos_S);
+
+ cik_S = gmx_simd_mul_r(st_S, gmx_simd_mul_r(nrij_1_S, nrkj_1_S));
+ cii_S = gmx_simd_mul_r(sth_S, gmx_simd_mul_r(nrij_1_S, nrij_1_S));
+ ckk_S = gmx_simd_mul_r(sth_S, gmx_simd_mul_r(nrkj_1_S, nrkj_1_S));
+
+ f_ix_S = gmx_simd_mul_r(cii_S, rijx_S);
+ f_ix_S = gmx_simd_fnmadd_r(cik_S, rkjx_S, f_ix_S);
+ f_iy_S = gmx_simd_mul_r(cii_S, rijy_S);
+ f_iy_S = gmx_simd_fnmadd_r(cik_S, rkjy_S, f_iy_S);
+ f_iz_S = gmx_simd_mul_r(cii_S, rijz_S);
+ f_iz_S = gmx_simd_fnmadd_r(cik_S, rkjz_S, f_iz_S);
+ f_kx_S = gmx_simd_mul_r(ckk_S, rkjx_S);
+ f_kx_S = gmx_simd_fnmadd_r(cik_S, rijx_S, f_kx_S);
+ f_ky_S = gmx_simd_mul_r(ckk_S, rkjy_S);
+ f_ky_S = gmx_simd_fnmadd_r(cik_S, rijy_S, f_ky_S);
+ f_kz_S = gmx_simd_mul_r(ckk_S, rkjz_S);
+ f_kz_S = gmx_simd_fnmadd_r(cik_S, rijz_S, f_kz_S);
+
+ gmx_simd_store_r(f_buf + 0*GMX_SIMD_REAL_WIDTH, f_ix_S);
+ gmx_simd_store_r(f_buf + 1*GMX_SIMD_REAL_WIDTH, f_iy_S);
+ gmx_simd_store_r(f_buf + 2*GMX_SIMD_REAL_WIDTH, f_iz_S);
+ gmx_simd_store_r(f_buf + 3*GMX_SIMD_REAL_WIDTH, f_kx_S);
+ gmx_simd_store_r(f_buf + 4*GMX_SIMD_REAL_WIDTH, f_ky_S);
+ gmx_simd_store_r(f_buf + 5*GMX_SIMD_REAL_WIDTH, f_kz_S);
+
+ iu = i;
+ s = 0;
+ do
+ {
+ for (m = 0; m < DIM; m++)
+ {
+ f[ai[s]][m] += f_buf[s + m*GMX_SIMD_REAL_WIDTH];
+ f[aj[s]][m] -= f_buf[s + m*GMX_SIMD_REAL_WIDTH] + f_buf[s + (DIM+m)*GMX_SIMD_REAL_WIDTH];
+ f[ak[s]][m] += f_buf[s + (DIM+m)*GMX_SIMD_REAL_WIDTH];
+ }
+ s++;
+ iu += nfa1;
+ }
+ while (s < GMX_SIMD_REAL_WIDTH && iu < nbonds);
+ }
+}
+
+#endif /* GMX_SIMD_HAVE_REAL */
+
real linear_angles(int nbonds,
const t_iatom forceatoms[], const t_iparams forceparams[],
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
int i, m, ai, aj, ak, t1, t2, type;
rvec f_i, f_j, f_k;
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
int i, m, ai, aj, ak, t1, t2, type, ki;
rvec r_ij, r_kj, r_ik;
const t_iatom forceatoms[], const t_iparams forceparams[],
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
- real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ real gmx_unused lambda, real gmx_unused *dvdlambda,
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
int i, j, ai, aj, ak, t1, t2, type;
rvec r_ij, r_kj;
}
-#ifdef SSE_PROPER_DIHEDRALS
-
-/* x86 SIMD inner-product of 4 float vectors */
-#define GMX_MM_IPROD_PS(ax, ay, az, bx, by, bz) \
- _mm_add_ps(_mm_add_ps(_mm_mul_ps(ax, bx), _mm_mul_ps(ay, by)), _mm_mul_ps(az, bz))
-
-/* x86 SIMD norm^2 of 4 float vectors */
-#define GMX_MM_NORM2_PS(ax, ay, az) GMX_MM_IPROD_PS(ax, ay, az, ax, ay, az)
-
-/* x86 SIMD cross-product of 4 float vectors */
-#define GMX_MM_CPROD_PS(ax, ay, az, bx, by, bz, cx, cy, cz) \
- { \
- cx = _mm_sub_ps(_mm_mul_ps(ay, bz), _mm_mul_ps(az, by)); \
- cy = _mm_sub_ps(_mm_mul_ps(az, bx), _mm_mul_ps(ax, bz)); \
- cz = _mm_sub_ps(_mm_mul_ps(ax, by), _mm_mul_ps(ay, bx)); \
- }
-
-/* load 4 rvec's into 3 x86 SIMD float registers */
-#define load_rvec4(r0, r1, r2, r3, rx_SSE, ry_SSE, rz_SSE) \
- { \
- __m128 tmp; \
- rx_SSE = _mm_load_ps(r0); \
- ry_SSE = _mm_load_ps(r1); \
- rz_SSE = _mm_load_ps(r2); \
- tmp = _mm_load_ps(r3); \
- _MM_TRANSPOSE4_PS(rx_SSE, ry_SSE, rz_SSE, tmp); \
- }
-
-#define store_rvec4(rx_SSE, ry_SSE, rz_SSE, r0, r1, r2, r3) \
- { \
- __m128 tmp = _mm_setzero_ps(); \
- _MM_TRANSPOSE4_PS(rx_SSE, ry_SSE, rz_SSE, tmp); \
- _mm_store_ps(r0, rx_SSE); \
- _mm_store_ps(r1, ry_SSE); \
- _mm_store_ps(r2, rz_SSE); \
- _mm_store_ps(r3, tmp ); \
- }
-
-/* An rvec in a structure which can be allocated 16-byte aligned */
-typedef struct {
- rvec v;
- float f;
-} rvec_sse_t;
+#ifdef GMX_SIMD_HAVE_REAL
-/* As dih_angle above, but calculates 4 dihedral angles at once using SSE,
+/* As dih_angle above, but calculates 4 dihedral angles at once using SIMD,
* also calculates the pre-factor required for the dihedral force update.
- * Note that bv and buf should be 16-byte aligned.
+ * Note that bv and buf should be register aligned.
*/
-static void
-dih_angle_sse(const rvec *x,
- int ai[4], int aj[4], int ak[4], int al[4],
- const t_pbc *pbc,
- int t1[4], int t2[4], int t3[4],
- rvec_sse_t *bv,
- real *buf)
+static gmx_inline void
+dih_angle_simd(const rvec *x,
+ const int *ai, const int *aj, const int *ak, const int *al,
+ const pbc_simd_t *pbc,
+ real *dr,
+ gmx_simd_real_t *phi_S,
+ gmx_simd_real_t *mx_S, gmx_simd_real_t *my_S, gmx_simd_real_t *mz_S,
+ gmx_simd_real_t *nx_S, gmx_simd_real_t *ny_S, gmx_simd_real_t *nz_S,
+ gmx_simd_real_t *nrkj_m2_S,
+ gmx_simd_real_t *nrkj_n2_S,
+ real *p,
+ real *q)
{
- int s;
- __m128 rijx_SSE, rijy_SSE, rijz_SSE;
- __m128 rkjx_SSE, rkjy_SSE, rkjz_SSE;
- __m128 rklx_SSE, rkly_SSE, rklz_SSE;
- __m128 mx_SSE, my_SSE, mz_SSE;
- __m128 nx_SSE, ny_SSE, nz_SSE;
- __m128 cx_SSE, cy_SSE, cz_SSE;
- __m128 cn_SSE;
- __m128 s_SSE;
- __m128 phi_SSE;
- __m128 ipr_SSE;
- int signs;
- __m128 iprm_SSE, iprn_SSE;
- __m128 nrkj2_SSE, nrkj_1_SSE, nrkj_2_SSE, nrkj_SSE;
- __m128 nrkj_m2_SSE, nrkj_n2_SSE;
- __m128 p_SSE, q_SSE;
- __m128 fmin_SSE = _mm_set1_ps(GMX_FLOAT_MIN);
-
- for (s = 0; s < 4; s++)
+ int s, m;
+ gmx_simd_real_t rijx_S, rijy_S, rijz_S;
+ gmx_simd_real_t rkjx_S, rkjy_S, rkjz_S;
+ gmx_simd_real_t rklx_S, rkly_S, rklz_S;
+ gmx_simd_real_t cx_S, cy_S, cz_S;
+ gmx_simd_real_t cn_S;
+ gmx_simd_real_t s_S;
+ gmx_simd_real_t ipr_S;
+ gmx_simd_real_t iprm_S, iprn_S;
+ gmx_simd_real_t nrkj2_S, nrkj_1_S, nrkj_2_S, nrkj_S;
+ gmx_simd_real_t toler_S;
+ gmx_simd_real_t p_S, q_S;
+ gmx_simd_real_t nrkj2_min_S;
+ gmx_simd_real_t real_eps_S;
+
+ /* Used to avoid division by zero.
+ * We take into acount that we multiply the result by real_eps_S.
+ */
+ nrkj2_min_S = gmx_simd_set1_r(GMX_REAL_MIN/(2*GMX_REAL_EPS));
+
+ /* The value of the last significant bit (GMX_REAL_EPS is half of that) */
+ real_eps_S = gmx_simd_set1_r(2*GMX_REAL_EPS);
+
+ for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
{
- t1[s] = pbc_rvec_sub(pbc, x[ai[s]], x[aj[s]], bv[0+s].v);
- t2[s] = pbc_rvec_sub(pbc, x[ak[s]], x[aj[s]], bv[4+s].v);
- t3[s] = pbc_rvec_sub(pbc, x[ak[s]], x[al[s]], bv[8+s].v);
+ /* If you can't use pbc_dx_simd below for PBC, e.g. because
+ * you can't round in SIMD, use pbc_rvec_sub here.
+ */
+ for (m = 0; m < DIM; m++)
+ {
+ dr[s + (0*DIM + m)*GMX_SIMD_REAL_WIDTH] = x[ai[s]][m] - x[aj[s]][m];
+ dr[s + (1*DIM + m)*GMX_SIMD_REAL_WIDTH] = x[ak[s]][m] - x[aj[s]][m];
+ dr[s + (2*DIM + m)*GMX_SIMD_REAL_WIDTH] = x[ak[s]][m] - x[al[s]][m];
+ }
}
- load_rvec4(bv[0].v, bv[1].v, bv[2].v, bv[3].v, rijx_SSE, rijy_SSE, rijz_SSE);
- load_rvec4(bv[4].v, bv[5].v, bv[6].v, bv[7].v, rkjx_SSE, rkjy_SSE, rkjz_SSE);
- load_rvec4(bv[8].v, bv[9].v, bv[10].v, bv[11].v, rklx_SSE, rkly_SSE, rklz_SSE);
+ rijx_S = gmx_simd_load_r(dr + 0*GMX_SIMD_REAL_WIDTH);
+ rijy_S = gmx_simd_load_r(dr + 1*GMX_SIMD_REAL_WIDTH);
+ rijz_S = gmx_simd_load_r(dr + 2*GMX_SIMD_REAL_WIDTH);
+ rkjx_S = gmx_simd_load_r(dr + 3*GMX_SIMD_REAL_WIDTH);
+ rkjy_S = gmx_simd_load_r(dr + 4*GMX_SIMD_REAL_WIDTH);
+ rkjz_S = gmx_simd_load_r(dr + 5*GMX_SIMD_REAL_WIDTH);
+ rklx_S = gmx_simd_load_r(dr + 6*GMX_SIMD_REAL_WIDTH);
+ rkly_S = gmx_simd_load_r(dr + 7*GMX_SIMD_REAL_WIDTH);
+ rklz_S = gmx_simd_load_r(dr + 8*GMX_SIMD_REAL_WIDTH);
- GMX_MM_CPROD_PS(rijx_SSE, rijy_SSE, rijz_SSE,
- rkjx_SSE, rkjy_SSE, rkjz_SSE,
- mx_SSE, my_SSE, mz_SSE);
+ pbc_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc);
+ pbc_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc);
+ pbc_dx_simd(&rklx_S, &rkly_S, &rklz_S, pbc);
- GMX_MM_CPROD_PS(rkjx_SSE, rkjy_SSE, rkjz_SSE,
- rklx_SSE, rkly_SSE, rklz_SSE,
- nx_SSE, ny_SSE, nz_SSE);
+ gmx_simd_cprod_r(rijx_S, rijy_S, rijz_S,
+ rkjx_S, rkjy_S, rkjz_S,
+ mx_S, my_S, mz_S);
- GMX_MM_CPROD_PS(mx_SSE, my_SSE, mz_SSE,
- nx_SSE, ny_SSE, nz_SSE,
- cx_SSE, cy_SSE, cz_SSE);
+ gmx_simd_cprod_r(rkjx_S, rkjy_S, rkjz_S,
+ rklx_S, rkly_S, rklz_S,
+ nx_S, ny_S, nz_S);
- cn_SSE = gmx_mm_sqrt_ps(GMX_MM_NORM2_PS(cx_SSE, cy_SSE, cz_SSE));
+ gmx_simd_cprod_r(*mx_S, *my_S, *mz_S,
+ *nx_S, *ny_S, *nz_S,
+ &cx_S, &cy_S, &cz_S);
- s_SSE = GMX_MM_IPROD_PS(mx_SSE, my_SSE, mz_SSE, nx_SSE, ny_SSE, nz_SSE);
+ cn_S = gmx_simd_sqrt_r(gmx_simd_norm2_r(cx_S, cy_S, cz_S));
- phi_SSE = gmx_mm_atan2_ps(cn_SSE, s_SSE);
- _mm_store_ps(buf+16, phi_SSE);
+ s_S = gmx_simd_iprod_r(*mx_S, *my_S, *mz_S, *nx_S, *ny_S, *nz_S);
- ipr_SSE = GMX_MM_IPROD_PS(rijx_SSE, rijy_SSE, rijz_SSE,
- nx_SSE, ny_SSE, nz_SSE);
+ /* Determine the dihedral angle, the sign might need correction */
+ *phi_S = gmx_simd_atan2_r(cn_S, s_S);
- signs = _mm_movemask_ps(ipr_SSE);
+ ipr_S = gmx_simd_iprod_r(rijx_S, rijy_S, rijz_S,
+ *nx_S, *ny_S, *nz_S);
- for (s = 0; s < 4; s++)
- {
- if (signs & (1<<s))
- {
- buf[16+s] = -buf[16+s];
- }
- }
-
- iprm_SSE = GMX_MM_NORM2_PS(mx_SSE, my_SSE, mz_SSE);
- iprn_SSE = GMX_MM_NORM2_PS(nx_SSE, ny_SSE, nz_SSE);
+ iprm_S = gmx_simd_norm2_r(*mx_S, *my_S, *mz_S);
+ iprn_S = gmx_simd_norm2_r(*nx_S, *ny_S, *nz_S);
- /* store_rvec4 messes with the input, don't use it after this! */
- store_rvec4(mx_SSE, my_SSE, mz_SSE, bv[0].v, bv[1].v, bv[2].v, bv[3].v);
- store_rvec4(nx_SSE, ny_SSE, nz_SSE, bv[4].v, bv[5].v, bv[6].v, bv[7].v);
-
- nrkj2_SSE = GMX_MM_NORM2_PS(rkjx_SSE, rkjy_SSE, rkjz_SSE);
+ nrkj2_S = gmx_simd_norm2_r(rkjx_S, rkjy_S, rkjz_S);
/* Avoid division by zero. When zero, the result is multiplied by 0
* anyhow, so the 3 max below do not affect the final result.
*/
- nrkj2_SSE = _mm_max_ps(nrkj2_SSE, fmin_SSE);
- nrkj_1_SSE = gmx_mm_invsqrt_ps(nrkj2_SSE);
- nrkj_2_SSE = _mm_mul_ps(nrkj_1_SSE, nrkj_1_SSE);
- nrkj_SSE = _mm_mul_ps(nrkj2_SSE, nrkj_1_SSE);
-
- iprm_SSE = _mm_max_ps(iprm_SSE, fmin_SSE);
- iprn_SSE = _mm_max_ps(iprn_SSE, fmin_SSE);
- nrkj_m2_SSE = _mm_mul_ps(nrkj_SSE, gmx_mm_inv_ps(iprm_SSE));
- nrkj_n2_SSE = _mm_mul_ps(nrkj_SSE, gmx_mm_inv_ps(iprn_SSE));
+ nrkj2_S = gmx_simd_max_r(nrkj2_S, nrkj2_min_S);
+ nrkj_1_S = gmx_simd_invsqrt_r(nrkj2_S);
+ nrkj_2_S = gmx_simd_mul_r(nrkj_1_S, nrkj_1_S);
+ nrkj_S = gmx_simd_mul_r(nrkj2_S, nrkj_1_S);
- _mm_store_ps(buf+0, nrkj_m2_SSE);
- _mm_store_ps(buf+4, nrkj_n2_SSE);
+ toler_S = gmx_simd_mul_r(nrkj2_S, real_eps_S);
- p_SSE = GMX_MM_IPROD_PS(rijx_SSE, rijy_SSE, rijz_SSE,
- rkjx_SSE, rkjy_SSE, rkjz_SSE);
- p_SSE = _mm_mul_ps(p_SSE, nrkj_2_SSE);
-
- q_SSE = GMX_MM_IPROD_PS(rklx_SSE, rkly_SSE, rklz_SSE,
- rkjx_SSE, rkjy_SSE, rkjz_SSE);
- q_SSE = _mm_mul_ps(q_SSE, nrkj_2_SSE);
-
- _mm_store_ps(buf+8, p_SSE);
- _mm_store_ps(buf+12, q_SSE);
+ /* Here the plain-C code uses a conditional, but we can't do that in SIMD.
+ * So we take a max with the tolerance instead. Since we multiply with
+ * m or n later, the max does not affect the results.
+ */
+ iprm_S = gmx_simd_max_r(iprm_S, toler_S);
+ iprn_S = gmx_simd_max_r(iprn_S, toler_S);
+ *nrkj_m2_S = gmx_simd_mul_r(nrkj_S, gmx_simd_inv_r(iprm_S));
+ *nrkj_n2_S = gmx_simd_mul_r(nrkj_S, gmx_simd_inv_r(iprn_S));
+
+ /* Set sign of phi_S with the sign of ipr_S; phi_S is currently positive */
+ *phi_S = gmx_simd_xor_sign_r(*phi_S, ipr_S);
+ p_S = gmx_simd_iprod_r(rijx_S, rijy_S, rijz_S,
+ rkjx_S, rkjy_S, rkjz_S);
+ p_S = gmx_simd_mul_r(p_S, nrkj_2_S);
+
+ q_S = gmx_simd_iprod_r(rklx_S, rkly_S, rklz_S,
+ rkjx_S, rkjy_S, rkjz_S);
+ q_S = gmx_simd_mul_r(q_S, nrkj_2_S);
+
+ gmx_simd_store_r(p, p_S);
+ gmx_simd_store_r(q, q_S);
}
-#endif /* SSE_PROPER_DIHEDRALS */
+#endif /* GMX_SIMD_HAVE_REAL */
void do_dih_fup(int i, int j, int k, int l, real ddphi,
}
/* As do_dih_fup_noshiftf above, but with pre-calculated pre-factors */
-static void
-do_dih_fup_noshiftf_precalc(int i, int j, int k, int l, real ddphi,
- real nrkj_m2, real nrkj_n2,
+static gmx_inline void
+do_dih_fup_noshiftf_precalc(int i, int j, int k, int l,
real p, real q,
- rvec m, rvec n, rvec f[])
+ real f_i_x, real f_i_y, real f_i_z,
+ real mf_l_x, real mf_l_y, real mf_l_z,
+ rvec f[])
{
rvec f_i, f_j, f_k, f_l;
- rvec uvec, vvec, svec, dx_jl;
- real a, b, toler;
- ivec jt, dt_ij, dt_kj, dt_lj;
-
- a = -ddphi*nrkj_m2;
- svmul(a, m, f_i);
- b = ddphi*nrkj_n2;
- svmul(b, n, f_l);
+ rvec uvec, vvec, svec;
+
+ f_i[XX] = f_i_x;
+ f_i[YY] = f_i_y;
+ f_i[ZZ] = f_i_z;
+ f_l[XX] = -mf_l_x;
+ f_l[YY] = -mf_l_y;
+ f_l[ZZ] = -mf_l_z;
svmul(p, f_i, uvec);
svmul(q, f_l, vvec);
rvec_sub(uvec, vvec, svec);
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
int i, type, ai, aj, ak, al;
int t1, t2, t3;
pdihs_noener(int nbonds,
const t_iatom forceatoms[], const t_iparams forceparams[],
const rvec x[], rvec f[],
- const t_pbc *pbc, const t_graph *g,
+ const t_pbc gmx_unused *pbc, const t_graph gmx_unused *g,
real lambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
int i, type, ai, aj, ak, al;
int t1, t2, t3;
}
-#ifdef SSE_PROPER_DIHEDRALS
+#ifdef GMX_SIMD_HAVE_REAL
-/* As pdihs_noner above, but using SSE to calculate 4 dihedrals at once */
+/* As pdihs_noner above, but using SIMD to calculate many dihedrals at once */
static void
-pdihs_noener_sse(int nbonds,
- const t_iatom forceatoms[], const t_iparams forceparams[],
- const rvec x[], rvec f[],
- const t_pbc *pbc, const t_graph *g,
- real lambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+pdihs_noener_simd(int nbonds,
+ const t_iatom forceatoms[], const t_iparams forceparams[],
+ const rvec x[], rvec f[],
+ const t_pbc *pbc, const t_graph gmx_unused *g,
+ real gmx_unused lambda,
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
- int i, i4, s;
- int type, ai[4], aj[4], ak[4], al[4];
- int t1[4], t2[4], t3[4];
- int mult[4];
- real cp[4], mdphi[4];
- real ddphi;
- rvec_sse_t rs_array[13], *rs;
- real buf_array[24], *buf;
- __m128 mdphi_SSE, sin_SSE, cos_SSE;
-
- /* Ensure 16-byte alignment */
- rs = (rvec_sse_t *)(((size_t)(rs_array +1)) & (~((size_t)15)));
- buf = (float *)(((size_t)(buf_array+3)) & (~((size_t)15)));
-
- for (i = 0; (i < nbonds); i += 20)
+ const int nfa1 = 5;
+ int i, iu, s;
+ int type, ai[GMX_SIMD_REAL_WIDTH], aj[GMX_SIMD_REAL_WIDTH], ak[GMX_SIMD_REAL_WIDTH], al[GMX_SIMD_REAL_WIDTH];
+ real ddphi;
+ real dr_array[3*DIM*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *dr;
+ real buf_array[7*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *buf;
+ real *cp, *phi0, *mult, *phi, *p, *q, *sf_i, *msf_l;
+ gmx_simd_real_t phi0_S, phi_S;
+ gmx_simd_real_t mx_S, my_S, mz_S;
+ gmx_simd_real_t nx_S, ny_S, nz_S;
+ gmx_simd_real_t nrkj_m2_S, nrkj_n2_S;
+ gmx_simd_real_t cp_S, mdphi_S, mult_S;
+ gmx_simd_real_t sin_S, cos_S;
+ gmx_simd_real_t mddphi_S;
+ gmx_simd_real_t sf_i_S, msf_l_S;
+ pbc_simd_t pbc_simd;
+
+ /* Ensure SIMD register alignment */
+ dr = gmx_simd_align_r(dr_array);
+ buf = gmx_simd_align_r(buf_array);
+
+ /* Extract aligned pointer for parameters and variables */
+ cp = buf + 0*GMX_SIMD_REAL_WIDTH;
+ phi0 = buf + 1*GMX_SIMD_REAL_WIDTH;
+ mult = buf + 2*GMX_SIMD_REAL_WIDTH;
+ p = buf + 3*GMX_SIMD_REAL_WIDTH;
+ q = buf + 4*GMX_SIMD_REAL_WIDTH;
+ sf_i = buf + 5*GMX_SIMD_REAL_WIDTH;
+ msf_l = buf + 6*GMX_SIMD_REAL_WIDTH;
+
+ set_pbc_simd(pbc, &pbc_simd);
+
+ /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
+ for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH*nfa1)
{
- /* Collect atoms quadruplets for 4 dihedrals */
- i4 = i;
- for (s = 0; s < 4; s++)
- {
- ai[s] = forceatoms[i4+1];
- aj[s] = forceatoms[i4+2];
- ak[s] = forceatoms[i4+3];
- al[s] = forceatoms[i4+4];
+ /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
+ * iu indexes into forceatoms, we should not let iu go beyond nbonds.
+ */
+ iu = i;
+ for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
+ {
+ type = forceatoms[iu];
+ ai[s] = forceatoms[iu+1];
+ aj[s] = forceatoms[iu+2];
+ ak[s] = forceatoms[iu+3];
+ al[s] = forceatoms[iu+4];
+
+ cp[s] = forceparams[type].pdihs.cpA;
+ phi0[s] = forceparams[type].pdihs.phiA*DEG2RAD;
+ mult[s] = forceparams[type].pdihs.mult;
+
/* At the end fill the arrays with identical entries */
- if (i4 + 5 < nbonds)
+ if (iu + nfa1 < nbonds)
{
- i4 += 5;
+ iu += nfa1;
}
}
- /* Caclulate 4 dihedral angles at once */
- dih_angle_sse(x, ai, aj, ak, al, pbc, t1, t2, t3, rs, buf);
+ /* Caclulate GMX_SIMD_REAL_WIDTH dihedral angles at once */
+ dih_angle_simd(x, ai, aj, ak, al, &pbc_simd,
+ dr,
+ &phi_S,
+ &mx_S, &my_S, &mz_S,
+ &nx_S, &ny_S, &nz_S,
+ &nrkj_m2_S,
+ &nrkj_n2_S,
+ p, q);
+
+ cp_S = gmx_simd_load_r(cp);
+ phi0_S = gmx_simd_load_r(phi0);
+ mult_S = gmx_simd_load_r(mult);
+
+ mdphi_S = gmx_simd_sub_r(gmx_simd_mul_r(mult_S, phi_S), phi0_S);
+
+ /* Calculate GMX_SIMD_REAL_WIDTH sines at once */
+ gmx_simd_sincos_r(mdphi_S, &sin_S, &cos_S);
+ mddphi_S = gmx_simd_mul_r(gmx_simd_mul_r(cp_S, mult_S), sin_S);
+ sf_i_S = gmx_simd_mul_r(mddphi_S, nrkj_m2_S);
+ msf_l_S = gmx_simd_mul_r(mddphi_S, nrkj_n2_S);
+
+ /* After this m?_S will contain f[i] */
+ mx_S = gmx_simd_mul_r(sf_i_S, mx_S);
+ my_S = gmx_simd_mul_r(sf_i_S, my_S);
+ mz_S = gmx_simd_mul_r(sf_i_S, mz_S);
+
+ /* After this m?_S will contain -f[l] */
+ nx_S = gmx_simd_mul_r(msf_l_S, nx_S);
+ ny_S = gmx_simd_mul_r(msf_l_S, ny_S);
+ nz_S = gmx_simd_mul_r(msf_l_S, nz_S);
+
+ gmx_simd_store_r(dr + 0*GMX_SIMD_REAL_WIDTH, mx_S);
+ gmx_simd_store_r(dr + 1*GMX_SIMD_REAL_WIDTH, my_S);
+ gmx_simd_store_r(dr + 2*GMX_SIMD_REAL_WIDTH, mz_S);
+ gmx_simd_store_r(dr + 3*GMX_SIMD_REAL_WIDTH, nx_S);
+ gmx_simd_store_r(dr + 4*GMX_SIMD_REAL_WIDTH, ny_S);
+ gmx_simd_store_r(dr + 5*GMX_SIMD_REAL_WIDTH, nz_S);
+
+ iu = i;
+ s = 0;
+ do
+ {
+ do_dih_fup_noshiftf_precalc(ai[s], aj[s], ak[s], al[s],
+ p[s], q[s],
+ dr[ XX *GMX_SIMD_REAL_WIDTH+s],
+ dr[ YY *GMX_SIMD_REAL_WIDTH+s],
+ dr[ ZZ *GMX_SIMD_REAL_WIDTH+s],
+ dr[(DIM+XX)*GMX_SIMD_REAL_WIDTH+s],
+ dr[(DIM+YY)*GMX_SIMD_REAL_WIDTH+s],
+ dr[(DIM+ZZ)*GMX_SIMD_REAL_WIDTH+s],
+ f);
+ s++;
+ iu += nfa1;
+ }
+ while (s < GMX_SIMD_REAL_WIDTH && iu < nbonds);
+ }
+}
- i4 = i;
- for (s = 0; s < 4; s++)
+/* This is mostly a copy of pdihs_noener_simd above, but with using
+ * the RB potential instead of a harmonic potential.
+ * This function can replace rbdihs() when no energy and virial are needed.
+ */
+static void
+rbdihs_noener_simd(int nbonds,
+ const t_iatom forceatoms[], const t_iparams forceparams[],
+ const rvec x[], rvec f[],
+ const t_pbc *pbc, const t_graph gmx_unused *g,
+ real gmx_unused lambda,
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
+{
+ const int nfa1 = 5;
+ int i, iu, s, j;
+ int type, ai[GMX_SIMD_REAL_WIDTH], aj[GMX_SIMD_REAL_WIDTH], ak[GMX_SIMD_REAL_WIDTH], al[GMX_SIMD_REAL_WIDTH];
+ real ddphi;
+ real dr_array[3*DIM*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *dr;
+ real buf_array[(NR_RBDIHS + 4)*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *buf;
+ real *parm, *phi, *p, *q, *sf_i, *msf_l;
+
+ gmx_simd_real_t phi_S;
+ gmx_simd_real_t ddphi_S, cosfac_S;
+ gmx_simd_real_t mx_S, my_S, mz_S;
+ gmx_simd_real_t nx_S, ny_S, nz_S;
+ gmx_simd_real_t nrkj_m2_S, nrkj_n2_S;
+ gmx_simd_real_t parm_S, c_S;
+ gmx_simd_real_t sin_S, cos_S;
+ gmx_simd_real_t sf_i_S, msf_l_S;
+ pbc_simd_t pbc_simd;
+
+ gmx_simd_real_t pi_S = gmx_simd_set1_r(M_PI);
+ gmx_simd_real_t one_S = gmx_simd_set1_r(1.0);
+
+ /* Ensure SIMD register alignment */
+ dr = gmx_simd_align_r(dr_array);
+ buf = gmx_simd_align_r(buf_array);
+
+ /* Extract aligned pointer for parameters and variables */
+ parm = buf;
+ p = buf + (NR_RBDIHS + 0)*GMX_SIMD_REAL_WIDTH;
+ q = buf + (NR_RBDIHS + 1)*GMX_SIMD_REAL_WIDTH;
+ sf_i = buf + (NR_RBDIHS + 2)*GMX_SIMD_REAL_WIDTH;
+ msf_l = buf + (NR_RBDIHS + 3)*GMX_SIMD_REAL_WIDTH;
+
+ set_pbc_simd(pbc, &pbc_simd);
+
+ /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
+ for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH*nfa1)
+ {
+ /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
+ * iu indexes into forceatoms, we should not let iu go beyond nbonds.
+ */
+ iu = i;
+ for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
{
- if (i4 < nbonds)
+ type = forceatoms[iu];
+ ai[s] = forceatoms[iu+1];
+ aj[s] = forceatoms[iu+2];
+ ak[s] = forceatoms[iu+3];
+ al[s] = forceatoms[iu+4];
+
+ /* We don't need the first parameter, since that's a constant
+ * which only affects the energies, not the forces.
+ */
+ for (j = 1; j < NR_RBDIHS; j++)
{
- /* Calculate the coefficient and angle deviation */
- type = forceatoms[i4];
- dopdihs_mdphi(forceparams[type].pdihs.cpA,
- forceparams[type].pdihs.cpB,
- forceparams[type].pdihs.phiA,
- forceparams[type].pdihs.phiB,
- forceparams[type].pdihs.mult,
- buf[16+s], lambda, &cp[s], &buf[16+s]);
- mult[s] = forceparams[type].pdihs.mult;
+ parm[j*GMX_SIMD_REAL_WIDTH + s] =
+ forceparams[type].rbdihs.rbcA[j];
}
- else
+
+ /* At the end fill the arrays with identical entries */
+ if (iu + nfa1 < nbonds)
{
- buf[16+s] = 0;
+ iu += nfa1;
}
- i4 += 5;
}
- /* Calculate 4 sines at once */
- mdphi_SSE = _mm_load_ps(buf+16);
- gmx_mm_sincos_ps(mdphi_SSE, &sin_SSE, &cos_SSE);
- _mm_store_ps(buf+16, sin_SSE);
+ /* Caclulate GMX_SIMD_REAL_WIDTH dihedral angles at once */
+ dih_angle_simd(x, ai, aj, ak, al, &pbc_simd,
+ dr,
+ &phi_S,
+ &mx_S, &my_S, &mz_S,
+ &nx_S, &ny_S, &nz_S,
+ &nrkj_m2_S,
+ &nrkj_n2_S,
+ p, q);
- i4 = i;
+ /* Change to polymer convention */
+ phi_S = gmx_simd_sub_r(phi_S, pi_S);
+
+ gmx_simd_sincos_r(phi_S, &sin_S, &cos_S);
+
+ ddphi_S = gmx_simd_setzero_r();
+ c_S = one_S;
+ cosfac_S = one_S;
+ for (j = 1; j < NR_RBDIHS; j++)
+ {
+ parm_S = gmx_simd_load_r(parm + j*GMX_SIMD_REAL_WIDTH);
+ ddphi_S = gmx_simd_fmadd_r(gmx_simd_mul_r(c_S, parm_S), cosfac_S, ddphi_S);
+ cosfac_S = gmx_simd_mul_r(cosfac_S, cos_S);
+ c_S = gmx_simd_add_r(c_S, one_S);
+ }
+
+ /* Note that here we do not use the minus sign which is present
+ * in the normal RB code. This is corrected for through (m)sf below.
+ */
+ ddphi_S = gmx_simd_mul_r(ddphi_S, sin_S);
+
+ sf_i_S = gmx_simd_mul_r(ddphi_S, nrkj_m2_S);
+ msf_l_S = gmx_simd_mul_r(ddphi_S, nrkj_n2_S);
+
+ /* After this m?_S will contain f[i] */
+ mx_S = gmx_simd_mul_r(sf_i_S, mx_S);
+ my_S = gmx_simd_mul_r(sf_i_S, my_S);
+ mz_S = gmx_simd_mul_r(sf_i_S, mz_S);
+
+ /* After this m?_S will contain -f[l] */
+ nx_S = gmx_simd_mul_r(msf_l_S, nx_S);
+ ny_S = gmx_simd_mul_r(msf_l_S, ny_S);
+ nz_S = gmx_simd_mul_r(msf_l_S, nz_S);
+
+ gmx_simd_store_r(dr + 0*GMX_SIMD_REAL_WIDTH, mx_S);
+ gmx_simd_store_r(dr + 1*GMX_SIMD_REAL_WIDTH, my_S);
+ gmx_simd_store_r(dr + 2*GMX_SIMD_REAL_WIDTH, mz_S);
+ gmx_simd_store_r(dr + 3*GMX_SIMD_REAL_WIDTH, nx_S);
+ gmx_simd_store_r(dr + 4*GMX_SIMD_REAL_WIDTH, ny_S);
+ gmx_simd_store_r(dr + 5*GMX_SIMD_REAL_WIDTH, nz_S);
+
+ iu = i;
s = 0;
do
{
- ddphi = -cp[s]*mult[s]*buf[16+s];
-
- do_dih_fup_noshiftf_precalc(ai[s], aj[s], ak[s], al[s], ddphi,
- buf[ 0+s], buf[ 4+s],
- buf[ 8+s], buf[12+s],
- rs[0+s].v, rs[4+s].v,
+ do_dih_fup_noshiftf_precalc(ai[s], aj[s], ak[s], al[s],
+ p[s], q[s],
+ dr[ XX *GMX_SIMD_REAL_WIDTH+s],
+ dr[ YY *GMX_SIMD_REAL_WIDTH+s],
+ dr[ ZZ *GMX_SIMD_REAL_WIDTH+s],
+ dr[(DIM+XX)*GMX_SIMD_REAL_WIDTH+s],
+ dr[(DIM+YY)*GMX_SIMD_REAL_WIDTH+s],
+ dr[(DIM+ZZ)*GMX_SIMD_REAL_WIDTH+s],
f);
s++;
- i4 += 5;
+ iu += nfa1;
}
- while (s < 4 && i4 < nbonds);
+ while (s < GMX_SIMD_REAL_WIDTH && iu < nbonds);
}
}
-#endif /* SSE_PROPER_DIHEDRALS */
+#endif /* GMX_SIMD_HAVE_REAL */
real idihs(int nbonds,
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
int i, type, ai, aj, ak, al;
int t1, t2, t3;
/* Box relative coordinates are stored for dimensions with pbc */
posA *= pbc->box[m][m];
posB *= pbc->box[m][m];
+ assert(npbcdim <= DIM);
for (d = m+1; d < npbcdim; d++)
{
posA += pos0A[d]*pbc->box[d][m];
clear_rvec(com_sc);
for (m = 0; m < npbcdim; m++)
{
+ assert(npbcdim <= DIM);
for (d = m; d < npbcdim; d++)
{
com_sc[m] += com[d]*pbc->box[d][m];
clear_rvec(comB_sc);
for (m = 0; m < npbcdim; m++)
{
+ assert(npbcdim <= DIM);
for (d = m; d < npbcdim; d++)
{
comA_sc[m] += comA[d]*pbc->box[d][m];
vtot += 0.5*kk*dx[m]*dx[m];
*dvdlambda +=
0.5*(pr->posres.fcB[m] - pr->posres.fcA[m])*dx[m]*dx[m]
- -fm*dpdl[m];
+ + fm*dpdl[m];
/* Here we correct for the pbc_dx which included rdist */
if (bForceValid)
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
return low_angres(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g,
lambda, dvdlambda, FALSE);
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
return low_angres(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g,
lambda, dvdlambda, TRUE);
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
real vtot = 0;
int ai, aj, ak, al, i, k, type, t1, t2, t3;
}
-real unimplemented(int nbonds,
- const t_iatom forceatoms[], const t_iparams forceparams[],
- const rvec x[], rvec f[], rvec fshift[],
- const t_pbc *pbc, const t_graph *g,
- real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+real unimplemented(int gmx_unused nbonds,
+ const t_iatom gmx_unused forceatoms[], const t_iparams gmx_unused forceparams[],
+ const rvec gmx_unused x[], rvec gmx_unused f[], rvec gmx_unused fshift[],
+ const t_pbc gmx_unused *pbc, const t_graph gmx_unused *g,
+ real gmx_unused lambda, real gmx_unused *dvdlambda,
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
gmx_impl("*** you are using a not implemented function");
return 0.0; /* To make the compiler happy */
}
+real restrangles(int nbonds,
+ const t_iatom forceatoms[], const t_iparams forceparams[],
+ const rvec x[], rvec f[], rvec fshift[],
+ const t_pbc *pbc, const t_graph *g,
+ real gmx_unused lambda, real gmx_unused *dvdlambda,
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
+{
+ int i, d, ai, aj, ak, type, m;
+ int t1, t2;
+ rvec r_ij, r_kj;
+ real v, vtot;
+ ivec jt, dt_ij, dt_kj;
+ rvec f_i, f_j, f_k;
+ real prefactor, ratio_ante, ratio_post;
+ rvec delta_ante, delta_post, vec_temp;
+
+ vtot = 0.0;
+ for (i = 0; (i < nbonds); )
+ {
+ type = forceatoms[i++];
+ ai = forceatoms[i++];
+ aj = forceatoms[i++];
+ ak = forceatoms[i++];
+
+ t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
+ pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
+ t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_post);
+
+
+ /* This function computes factors needed for restricted angle potential.
+ * The restricted angle potential is used in coarse-grained simulations to avoid singularities
+ * when three particles align and the dihedral angle and dihedral potential
+ * cannot be calculated. This potential is calculated using the formula:
+ real restrangles(int nbonds,
+ const t_iatom forceatoms[],const t_iparams forceparams[],
+ const rvec x[],rvec f[],rvec fshift[],
+ const t_pbc *pbc,const t_graph *g,
+ real gmx_unused lambda,real gmx_unused *dvdlambda,
+ const t_mdatoms gmx_unused *md,t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
+ {
+ int i, d, ai, aj, ak, type, m;
+ int t1, t2;
+ rvec r_ij,r_kj;
+ real v, vtot;
+ ivec jt,dt_ij,dt_kj;
+ rvec f_i, f_j, f_k;
+ real prefactor, ratio_ante, ratio_post;
+ rvec delta_ante, delta_post, vec_temp;
+
+ vtot = 0.0;
+ for(i=0; (i<nbonds); )
+ {
+ type = forceatoms[i++];
+ ai = forceatoms[i++];
+ aj = forceatoms[i++];
+ ak = forceatoms[i++];
+
+ * \f[V_{\rm ReB}(\theta_i) = \frac{1}{2} k_{\theta} \frac{(\cos\theta_i - \cos\theta_0)^2}
+ * {\sin^2\theta_i}\f] ({eq:ReB} and ref \cite{MonicaGoga2013} from the manual).
+ * For more explanations see comments file "restcbt.h". */
+
+ compute_factors_restangles(type, forceparams, delta_ante, delta_post,
+ &prefactor, &ratio_ante, &ratio_post, &v);
+
+ /* Forces are computed per component */
+ for (d = 0; d < DIM; d++)
+ {
+ f_i[d] = prefactor * (ratio_ante * delta_ante[d] - delta_post[d]);
+ f_j[d] = prefactor * ((ratio_post + 1.0) * delta_post[d] - (ratio_ante + 1.0) * delta_ante[d]);
+ f_k[d] = prefactor * (delta_ante[d] - ratio_post * delta_post[d]);
+ }
+
+ /* Computation of potential energy */
+
+ vtot += v;
+
+ /* Update forces */
+
+ for (m = 0; (m < DIM); m++)
+ {
+ f[ai][m] += f_i[m];
+ f[aj][m] += f_j[m];
+ f[ak][m] += f_k[m];
+ }
+
+ if (g)
+ {
+ copy_ivec(SHIFT_IVEC(g, aj), jt);
+ ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
+ ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
+ t1 = IVEC2IS(dt_ij);
+ t2 = IVEC2IS(dt_kj);
+ }
+
+ rvec_inc(fshift[t1], f_i);
+ rvec_inc(fshift[CENTRAL], f_j);
+ rvec_inc(fshift[t2], f_k);
+ }
+ return vtot;
+}
+
+
+real restrdihs(int nbonds,
+ const t_iatom forceatoms[], const t_iparams forceparams[],
+ const rvec x[], rvec f[], rvec fshift[],
+ const t_pbc *pbc, const t_graph *g,
+ real gmx_unused lambda, real gmx_unused *dvlambda,
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
+{
+ int i, d, type, ai, aj, ak, al;
+ rvec f_i, f_j, f_k, f_l;
+ rvec dx_jl;
+ ivec jt, dt_ij, dt_kj, dt_lj;
+ int t1, t2, t3;
+ real v, vtot;
+ rvec delta_ante, delta_crnt, delta_post, vec_temp;
+ real factor_phi_ai_ante, factor_phi_ai_crnt, factor_phi_ai_post;
+ real factor_phi_aj_ante, factor_phi_aj_crnt, factor_phi_aj_post;
+ real factor_phi_ak_ante, factor_phi_ak_crnt, factor_phi_ak_post;
+ real factor_phi_al_ante, factor_phi_al_crnt, factor_phi_al_post;
+ real prefactor_phi;
+
+
+ vtot = 0.0;
+ for (i = 0; (i < nbonds); )
+ {
+ type = forceatoms[i++];
+ ai = forceatoms[i++];
+ aj = forceatoms[i++];
+ ak = forceatoms[i++];
+ al = forceatoms[i++];
+
+ t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
+ pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
+ t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
+ t3 = pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
+ pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
+
+ /* This function computes factors needed for restricted angle potential.
+ * The restricted angle potential is used in coarse-grained simulations to avoid singularities
+ * when three particles align and the dihedral angle and dihedral potential cannot be calculated.
+ * This potential is calculated using the formula:
+ * \f[V_{\rm ReB}(\theta_i) = \frac{1}{2} k_{\theta}
+ * \frac{(\cos\theta_i - \cos\theta_0)^2}{\sin^2\theta_i}\f]
+ * ({eq:ReB} and ref \cite{MonicaGoga2013} from the manual).
+ * For more explanations see comments file "restcbt.h" */
+
+ compute_factors_restrdihs(type, forceparams,
+ delta_ante, delta_crnt, delta_post,
+ &factor_phi_ai_ante, &factor_phi_ai_crnt, &factor_phi_ai_post,
+ &factor_phi_aj_ante, &factor_phi_aj_crnt, &factor_phi_aj_post,
+ &factor_phi_ak_ante, &factor_phi_ak_crnt, &factor_phi_ak_post,
+ &factor_phi_al_ante, &factor_phi_al_crnt, &factor_phi_al_post,
+ &prefactor_phi, &v);
+
+
+ /* Computation of forces per component */
+ for (d = 0; d < DIM; d++)
+ {
+ f_i[d] = prefactor_phi * (factor_phi_ai_ante * delta_ante[d] + factor_phi_ai_crnt * delta_crnt[d] + factor_phi_ai_post * delta_post[d]);
+ f_j[d] = prefactor_phi * (factor_phi_aj_ante * delta_ante[d] + factor_phi_aj_crnt * delta_crnt[d] + factor_phi_aj_post * delta_post[d]);
+ f_k[d] = prefactor_phi * (factor_phi_ak_ante * delta_ante[d] + factor_phi_ak_crnt * delta_crnt[d] + factor_phi_ak_post * delta_post[d]);
+ f_l[d] = prefactor_phi * (factor_phi_al_ante * delta_ante[d] + factor_phi_al_crnt * delta_crnt[d] + factor_phi_al_post * delta_post[d]);
+ }
+ /* Computation of the energy */
+
+ vtot += v;
+
+
+
+ /* Updating the forces */
+
+ rvec_inc(f[ai], f_i);
+ rvec_inc(f[aj], f_j);
+ rvec_inc(f[ak], f_k);
+ rvec_inc(f[al], f_l);
+
+
+ /* Updating the fshift forces for the pressure coupling */
+ if (g)
+ {
+ copy_ivec(SHIFT_IVEC(g, aj), jt);
+ ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
+ ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
+ ivec_sub(SHIFT_IVEC(g, al), jt, dt_lj);
+ t1 = IVEC2IS(dt_ij);
+ t2 = IVEC2IS(dt_kj);
+ t3 = IVEC2IS(dt_lj);
+ }
+ else if (pbc)
+ {
+ t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
+ }
+ else
+ {
+ t3 = CENTRAL;
+ }
+
+ rvec_inc(fshift[t1], f_i);
+ rvec_inc(fshift[CENTRAL], f_j);
+ rvec_inc(fshift[t2], f_k);
+ rvec_inc(fshift[t3], f_l);
+
+ }
+
+ return vtot;
+}
+
+
+real cbtdihs(int nbonds,
+ const t_iatom forceatoms[], const t_iparams forceparams[],
+ const rvec x[], rvec f[], rvec fshift[],
+ const t_pbc *pbc, const t_graph *g,
+ real gmx_unused lambda, real gmx_unused *dvdlambda,
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
+{
+ int type, ai, aj, ak, al, i, d;
+ int t1, t2, t3;
+ real v, vtot;
+ rvec vec_temp;
+ rvec f_i, f_j, f_k, f_l;
+ ivec jt, dt_ij, dt_kj, dt_lj;
+ rvec dx_jl;
+ rvec delta_ante, delta_crnt, delta_post;
+ rvec f_phi_ai, f_phi_aj, f_phi_ak, f_phi_al;
+ rvec f_theta_ante_ai, f_theta_ante_aj, f_theta_ante_ak;
+ rvec f_theta_post_aj, f_theta_post_ak, f_theta_post_al;
+
+
+
+
+ vtot = 0.0;
+ for (i = 0; (i < nbonds); )
+ {
+ type = forceatoms[i++];
+ ai = forceatoms[i++];
+ aj = forceatoms[i++];
+ ak = forceatoms[i++];
+ al = forceatoms[i++];
+
+
+ t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
+ pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
+ t2 = pbc_rvec_sub(pbc, x[ak], x[aj], vec_temp);
+ pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
+ t3 = pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
+ pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
+
+ /* \brief Compute factors for CBT potential
+ * The combined bending-torsion potential goes to zero in a very smooth manner, eliminating the numerical
+ * instabilities, when three coarse-grained particles align and the dihedral angle and standard
+ * dihedral potentials cannot be calculated. The CBT potential is calculated using the formula:
+ * \f[V_{\rm CBT}(\theta_{i-1}, \theta_i, \phi_i) = k_{\phi} \sin^3\theta_{i-1} \sin^3\theta_{i}
+ * \sum_{n=0}^4 { a_n \cos^n\phi_i}\f] ({eq:CBT} and ref \cite{MonicaGoga2013} from the manual).
+ * It contains in its expression not only the dihedral angle \f$\phi\f$
+ * but also \f[\theta_{i-1}\f] (theta_ante bellow) and \f[\theta_{i}\f] (theta_post bellow)
+ * --- the adjacent bending angles.
+ * For more explanations see comments file "restcbt.h". */
+
+ compute_factors_cbtdihs(type, forceparams, delta_ante, delta_crnt, delta_post,
+ f_phi_ai, f_phi_aj, f_phi_ak, f_phi_al,
+ f_theta_ante_ai, f_theta_ante_aj, f_theta_ante_ak,
+ f_theta_post_aj, f_theta_post_ak, f_theta_post_al,
+ &v);
+
+
+ /* Acumulate the resuts per beads */
+ for (d = 0; d < DIM; d++)
+ {
+ f_i[d] = f_phi_ai[d] + f_theta_ante_ai[d];
+ f_j[d] = f_phi_aj[d] + f_theta_ante_aj[d] + f_theta_post_aj[d];
+ f_k[d] = f_phi_ak[d] + f_theta_ante_ak[d] + f_theta_post_ak[d];
+ f_l[d] = f_phi_al[d] + f_theta_post_al[d];
+ }
+
+ /* Compute the potential energy */
+
+ vtot += v;
+
+
+ /* Updating the forces */
+ rvec_inc(f[ai], f_i);
+ rvec_inc(f[aj], f_j);
+ rvec_inc(f[ak], f_k);
+ rvec_inc(f[al], f_l);
+
+
+ /* Updating the fshift forces for the pressure coupling */
+ if (g)
+ {
+ copy_ivec(SHIFT_IVEC(g, aj), jt);
+ ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
+ ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
+ ivec_sub(SHIFT_IVEC(g, al), jt, dt_lj);
+ t1 = IVEC2IS(dt_ij);
+ t2 = IVEC2IS(dt_kj);
+ t3 = IVEC2IS(dt_lj);
+ }
+ else if (pbc)
+ {
+ t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
+ }
+ else
+ {
+ t3 = CENTRAL;
+ }
+
+ rvec_inc(fshift[t1], f_i);
+ rvec_inc(fshift[CENTRAL], f_j);
+ rvec_inc(fshift[t2], f_k);
+ rvec_inc(fshift[t3], f_l);
+ }
+
+ return vtot;
+}
+
real rbdihs(int nbonds,
const t_iatom forceatoms[], const t_iparams forceparams[],
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
const real c0 = 0.0, c1 = 1.0, c2 = 2.0, c3 = 3.0, c4 = 4.0, c5 = 5.0;
int type, ai, aj, ak, al, i, j;
const gmx_cmap_t *cmap_grid,
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
- real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ real gmx_unused lambda, real gmx_unused *dvdlambda,
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
int i, j, k, n, idx;
int ai, aj, ak, al, am;
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
int i, m, ki, ai, aj, type;
real dr2, fbond, vbond, fij, vtot;
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
int i, ai, aj, ak, type, m, t1, t2;
rvec r_ij, r_kj;
const t_iatom forceatoms[], const t_iparams forceparams[],
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
- real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ real gmx_unused lambda, real gmx_unused *dvdlambda,
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
/* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
* pp. 842-847
const t_iatom forceatoms[], const t_iparams forceparams[],
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
- real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ real gmx_unused lambda, real gmx_unused *dvdlambda,
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
/* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
* pp. 842-847
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ const t_mdatoms gmx_unused *md, t_fcdata *fcd,
+ int gmx_unused *global_atom_index)
{
int i, m, ki, ai, aj, type, table;
real dr, dr2, fbond, vbond, fij, vtot;
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ const t_mdatoms gmx_unused *md, t_fcdata *fcd,
+ int gmx_unused *global_atom_index)
{
int i, ai, aj, ak, t1, t2, type, table;
rvec r_ij, r_kj;
const rvec x[], rvec f[], rvec fshift[],
const t_pbc *pbc, const t_graph *g,
real lambda, real *dvdlambda,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index)
+ const t_mdatoms gmx_unused *md, t_fcdata *fcd,
+ int gmx_unused *global_atom_index)
{
int i, type, ai, aj, ak, al, table;
int t1, t2, t3;
return vtot;
}
+/* Return if this is a potential calculated in bondfree.c,
+ * i.e. an interaction that actually calculates a potential and
+ * works on multiple atoms (not e.g. a connection or a position restraint).
+ */
+static gmx_inline gmx_bool ftype_is_bonded_potential(int ftype)
+{
+ return
+ (interaction_function[ftype].flags & IF_BOND) &&
+ !(ftype == F_CONNBONDS || ftype == F_POSRES || ftype == F_FBPOSRES) &&
+ (ftype < F_GB12 || ftype > F_GB14);
+}
+
+static void divide_bondeds_over_threads(t_idef *idef, int nthreads)
+{
+ int ftype;
+ int nat1;
+ int t;
+ int il_nr_thread;
+
+ idef->nthreads = nthreads;
+
+ if (F_NRE*(nthreads+1) > idef->il_thread_division_nalloc)
+ {
+ idef->il_thread_division_nalloc = F_NRE*(nthreads+1);
+ snew(idef->il_thread_division, idef->il_thread_division_nalloc);
+ }
+
+ for (ftype = 0; ftype < F_NRE; ftype++)
+ {
+ if (ftype_is_bonded_potential(ftype))
+ {
+ nat1 = interaction_function[ftype].nratoms + 1;
+
+ for (t = 0; t <= nthreads; t++)
+ {
+ /* Divide the interactions equally over the threads.
+ * When the different types of bonded interactions
+ * are distributed roughly equally over the threads,
+ * this should lead to well localized output into
+ * the force buffer on each thread.
+ * If this is not the case, a more advanced scheme
+ * (not implemented yet) will do better.
+ */
+ il_nr_thread = (((idef->il[ftype].nr/nat1)*t)/nthreads)*nat1;
+
+ /* Ensure that distance restraint pairs with the same label
+ * end up on the same thread.
+ * This is slighlty tricky code, since the next for iteration
+ * may have an initial il_nr_thread lower than the final value
+ * in the previous iteration, but this will anyhow be increased
+ * to the approriate value again by this while loop.
+ */
+ while (ftype == F_DISRES &&
+ il_nr_thread > 0 &&
+ il_nr_thread < idef->il[ftype].nr &&
+ idef->iparams[idef->il[ftype].iatoms[il_nr_thread]].disres.label ==
+ idef->iparams[idef->il[ftype].iatoms[il_nr_thread-nat1]].disres.label)
+ {
+ il_nr_thread += nat1;
+ }
+
+ idef->il_thread_division[ftype*(nthreads+1)+t] = il_nr_thread;
+ }
+ }
+ }
+}
+
static unsigned
calc_bonded_reduction_mask(const t_idef *idef,
int shift,
for (ftype = 0; ftype < F_NRE; ftype++)
{
- if (interaction_function[ftype].flags & IF_BOND &&
- !(ftype == F_CONNBONDS || ftype == F_POSRES) &&
- (ftype<F_GB12 || ftype>F_GB14))
+ if (ftype_is_bonded_potential(ftype))
{
nb = idef->il[ftype].nr;
if (nb > 0)
/* Divide this interaction equally over the threads.
* This is not stored: should match division in calc_bonds.
*/
- nb0 = (((nb/nat1)* t )/nt)*nat1;
- nb1 = (((nb/nat1)*(t+1))/nt)*nat1;
+ nb0 = idef->il_thread_division[ftype*(nt+1)+t];
+ nb1 = idef->il_thread_division[ftype*(nt+1)+t+1];
for (i = nb0; i < nb1; i += nat1)
{
return mask;
}
-void init_bonded_thread_force_reduction(t_forcerec *fr,
- const t_idef *idef)
+void setup_bonded_threading(t_forcerec *fr, t_idef *idef)
{
#define MAX_BLOCK_BITS 32
int t;
int ctot, c, b;
- if (fr->nthreads <= 1)
+ assert(fr->nthreads >= 1);
+
+ /* Divide the bonded interaction over the threads */
+ divide_bondeds_over_threads(idef, fr->nthreads);
+
+ if (fr->nthreads == 1)
{
fr->red_nblock = 0;
rvec x[], rvec f[], rvec fshift[],
t_forcerec *fr,
const t_pbc *pbc, const t_graph *g,
- gmx_enerdata_t *enerd, gmx_grppairener_t *grpp,
+ gmx_grppairener_t *grpp,
t_nrnb *nrnb,
real *lambda, real *dvdl,
const t_mdatoms *md, t_fcdata *fcd,
gmx_bool bCalcEnerVir,
int *global_atom_index, gmx_bool bPrintSepPot)
{
- int ind, nat1, nbonds, efptFTYPE;
+ int nat1, nbonds, efptFTYPE;
real v = 0;
t_iatom *iatoms;
int nb0, nbn;
efptFTYPE = efptBONDED;
}
- if (interaction_function[ftype].flags & IF_BOND &&
- !(ftype == F_CONNBONDS || ftype == F_POSRES))
- {
- ind = interaction_function[ftype].nrnb_ind;
- nat1 = interaction_function[ftype].nratoms + 1;
- nbonds = idef->il[ftype].nr/nat1;
- iatoms = idef->il[ftype].iatoms;
+ nat1 = interaction_function[ftype].nratoms + 1;
+ nbonds = idef->il[ftype].nr/nat1;
+ iatoms = idef->il[ftype].iatoms;
- nb0 = ((nbonds* thread )/(fr->nthreads))*nat1;
- nbn = ((nbonds*(thread+1))/(fr->nthreads))*nat1 - nb0;
+ nb0 = idef->il_thread_division[ftype*(idef->nthreads+1)+thread];
+ nbn = idef->il_thread_division[ftype*(idef->nthreads+1)+thread+1] - nb0;
- if (!IS_LISTED_LJ_C(ftype))
+ if (!IS_LISTED_LJ_C(ftype))
+ {
+ if (ftype == F_CMAP)
{
- if (ftype == F_CMAP)
- {
- v = cmap_dihs(nbn, iatoms+nb0,
- idef->iparams, &idef->cmap_grid,
- (const rvec*)x, f, fshift,
- pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
- md, fcd, global_atom_index);
- }
- else if (ftype == F_PDIHS &&
- !bCalcEnerVir && fr->efep == efepNO)
- {
- /* No energies, shift forces, dvdl */
-#ifndef SSE_PROPER_DIHEDRALS
- pdihs_noener
+ v = cmap_dihs(nbn, iatoms+nb0,
+ idef->iparams, &idef->cmap_grid,
+ (const rvec*)x, f, fshift,
+ pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
+ md, fcd, global_atom_index);
+ }
+#ifdef GMX_SIMD_HAVE_REAL
+ else if (ftype == F_ANGLES &&
+ !bCalcEnerVir && fr->efep == efepNO)
+ {
+ /* No energies, shift forces, dvdl */
+ angles_noener_simd(nbn, idef->il[ftype].iatoms+nb0,
+ idef->iparams,
+ (const rvec*)x, f,
+ pbc, g, lambda[efptFTYPE], md, fcd,
+ global_atom_index);
+ v = 0;
+ }
+#endif
+ else if (ftype == F_PDIHS &&
+ !bCalcEnerVir && fr->efep == efepNO)
+ {
+ /* No energies, shift forces, dvdl */
+#ifdef GMX_SIMD_HAVE_REAL
+ pdihs_noener_simd
#else
- pdihs_noener_sse
+ pdihs_noener
#endif
- (nbn, idef->il[ftype].iatoms+nb0,
- idef->iparams,
- (const rvec*)x, f,
- pbc, g, lambda[efptFTYPE], md, fcd,
- global_atom_index);
- v = 0;
- }
- else
- {
- v = interaction_function[ftype].ifunc(nbn, iatoms+nb0,
- idef->iparams,
- (const rvec*)x, f, fshift,
- pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
- md, fcd, global_atom_index);
- }
- if (bPrintSepPot)
- {
- fprintf(fplog, " %-23s #%4d V %12.5e dVdl %12.5e\n",
- interaction_function[ftype].longname,
- nbonds/nat1, v, lambda[efptFTYPE]);
- }
+ (nbn, idef->il[ftype].iatoms+nb0,
+ idef->iparams,
+ (const rvec*)x, f,
+ pbc, g, lambda[efptFTYPE], md, fcd,
+ global_atom_index);
+ v = 0;
}
+#ifdef GMX_SIMD_HAVE_REAL
+ else if (ftype == F_RBDIHS &&
+ !bCalcEnerVir && fr->efep == efepNO)
+ {
+ /* No energies, shift forces, dvdl */
+ rbdihs_noener_simd(nbn, idef->il[ftype].iatoms+nb0,
+ idef->iparams,
+ (const rvec*)x, f,
+ pbc, g, lambda[efptFTYPE], md, fcd,
+ global_atom_index);
+ v = 0;
+ }
+#endif
else
{
- v = do_nonbonded_listed(ftype, nbn, iatoms+nb0, idef->iparams, (const rvec*)x, f, fshift,
- pbc, g, lambda, dvdl, md, fr, grpp, global_atom_index);
-
- enerd->dvdl_nonlin[efptCOUL] += dvdl[efptCOUL];
- enerd->dvdl_nonlin[efptVDW] += dvdl[efptVDW];
-
- if (bPrintSepPot)
- {
- fprintf(fplog, " %-5s + %-15s #%4d dVdl %12.5e\n",
- interaction_function[ftype].longname,
- interaction_function[F_LJ14].longname, nbonds/nat1, dvdl[efptVDW]);
- fprintf(fplog, " %-5s + %-15s #%4d dVdl %12.5e\n",
- interaction_function[ftype].longname,
- interaction_function[F_COUL14].longname, nbonds/nat1, dvdl[efptCOUL]);
- }
+ v = interaction_function[ftype].ifunc(nbn, iatoms+nb0,
+ idef->iparams,
+ (const rvec*)x, f, fshift,
+ pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
+ md, fcd, global_atom_index);
}
- if (ind != -1 && thread == 0)
+ if (bPrintSepPot)
{
- inc_nrnb(nrnb, ind, nbonds);
+ fprintf(fplog, " %-23s #%4d V %12.5e dVdl %12.5e\n",
+ interaction_function[ftype].longname,
+ nbonds, v, lambda[efptFTYPE]);
}
}
-
- return v;
-}
-
-/* WARNING! THIS FUNCTION MUST EXACTLY TRACK THE calc
- function, or horrible things will happen when doing free energy
- calculations! In a good coding world, this would not be a
- different function, but for speed reasons, it needs to be made a
- separate function. TODO for 5.0 - figure out a way to reorganize
- to reduce duplication.
- */
-
-static real calc_one_bond_foreign(FILE *fplog, int ftype, const t_idef *idef,
- rvec x[], rvec f[], t_forcerec *fr,
- const t_pbc *pbc, const t_graph *g,
- gmx_grppairener_t *grpp, t_nrnb *nrnb,
- real *lambda, real *dvdl,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index, gmx_bool bPrintSepPot)
-{
- int ind, nat1, nbonds, efptFTYPE, nbonds_np;
- real v = 0;
- t_iatom *iatoms;
-
- if (IS_RESTRAINT_TYPE(ftype))
- {
- efptFTYPE = efptRESTRAINT;
- }
else
{
- efptFTYPE = efptBONDED;
+ v = do_nonbonded_listed(ftype, nbn, iatoms+nb0, idef->iparams, (const rvec*)x, f, fshift,
+ pbc, g, lambda, dvdl, md, fr, grpp, global_atom_index);
+
+ if (bPrintSepPot)
+ {
+ fprintf(fplog, " %-5s + %-15s #%4d dVdl %12.5e\n",
+ interaction_function[ftype].longname,
+ interaction_function[F_LJ14].longname, nbonds, dvdl[efptVDW]);
+ fprintf(fplog, " %-5s + %-15s #%4d dVdl %12.5e\n",
+ interaction_function[ftype].longname,
+ interaction_function[F_COUL14].longname, nbonds, dvdl[efptCOUL]);
+ }
}
- if (ftype < F_GB12 || ftype > F_GB14)
+ if (thread == 0)
{
- if (interaction_function[ftype].flags & IF_BOND &&
- !(ftype == F_CONNBONDS || ftype == F_POSRES || ftype == F_FBPOSRES))
- {
- ind = interaction_function[ftype].nrnb_ind;
- nat1 = interaction_function[ftype].nratoms+1;
- nbonds_np = idef->il[ftype].nr_nonperturbed;
- nbonds = idef->il[ftype].nr - nbonds_np;
- iatoms = idef->il[ftype].iatoms + nbonds_np;
- if (nbonds > 0)
- {
- if (!IS_LISTED_LJ_C(ftype))
- {
- if (ftype == F_CMAP)
- {
- v = cmap_dihs(nbonds, iatoms,
- idef->iparams, &idef->cmap_grid,
- (const rvec*)x, f, fr->fshift,
- pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]), md, fcd,
- global_atom_index);
- }
- else
- {
- v = interaction_function[ftype].ifunc(nbonds, iatoms,
- idef->iparams,
- (const rvec*)x, f, fr->fshift,
- pbc, g, lambda[efptFTYPE], &dvdl[efptFTYPE],
- md, fcd, global_atom_index);
- }
- }
- else
- {
- v = do_nonbonded_listed(ftype, nbonds, iatoms,
- idef->iparams,
- (const rvec*)x, f, fr->fshift,
- pbc, g, lambda, dvdl,
- md, fr, grpp, global_atom_index);
- }
- if (ind != -1)
- {
- inc_nrnb(nrnb, ind, nbonds/nat1);
- }
- }
- }
+ inc_nrnb(nrnb, interaction_function[ftype].nrnb_ind, nbonds);
}
+
return v;
}
real *lambda,
const t_mdatoms *md,
t_fcdata *fcd, int *global_atom_index,
- t_atomtypes *atype, gmx_genborn_t *born,
+ t_atomtypes gmx_unused *atype, gmx_genborn_t gmx_unused *born,
int force_flags,
- gmx_bool bPrintSepPot, gmx_large_int_t step)
+ gmx_bool bPrintSepPot, gmx_int64_t step)
{
gmx_bool bCalcEnerVir;
int i;
char buf[22];
int thread;
+ assert(fr->nthreads == idef->nthreads);
+
bCalcEnerVir = (force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY));
for (i = 0; i < efptNR; i++)
}
if (bPrintSepPot)
{
- fprintf(fplog, "Step %s: bonded V and dVdl for this node\n",
+ fprintf(fplog, "Step %s: bonded V and dVdl for this rank\n",
gmx_step_str(step, buf));
}
}
if (idef->il[F_DISRES].nr)
{
- calc_disres_R_6(ms, idef->il[F_DISRES].nr,
+ calc_disres_R_6(idef->il[F_DISRES].nr,
idef->il[F_DISRES].iatoms,
idef->iparams, (const rvec*)x, pbc_null,
fcd, hist);
+#ifdef GMX_MPI
+ if (fcd->disres.nsystems > 1)
+ {
+ gmx_sum_sim(2*fcd->disres.nres, fcd->disres.Rt_6, ms);
+ }
+#endif
}
#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
for (thread = 0; thread < fr->nthreads; thread++)
{
- int ftype, nbonds, ind, nat1;
+ int ftype;
real *epot, v;
/* thread stuff */
rvec *ft, *fshift;
real *dvdlt;
gmx_grppairener_t *grpp;
- int nb0, nbn;
if (thread == 0)
{
/* Loop over all bonded force types to calculate the bonded forces */
for (ftype = 0; (ftype < F_NRE); ftype++)
{
- if (idef->il[ftype].nr > 0 &&
- (interaction_function[ftype].flags & IF_BOND) &&
- (ftype < F_GB12 || ftype > F_GB14) &&
- !(ftype == F_CONNBONDS || ftype == F_POSRES))
+ if (idef->il[ftype].nr > 0 && ftype_is_bonded_potential(ftype))
{
v = calc_one_bond(fplog, thread, ftype, idef, x,
- ft, fshift, fr, pbc_null, g, enerd, grpp,
+ ft, fshift, fr, pbc_null, g, grpp,
nrnb, lambda, dvdlt,
md, fcd, bCalcEnerVir,
global_atom_index, bPrintSepPot);
- epot[ftype] += v;
+ epot[ftype] += v;
}
}
}
t_fcdata *fcd,
int *global_atom_index)
{
- int i, ftype, nbonds_np, nbonds, ind, nat;
- real v, dr, dr2;
+ int i, ftype, nr_nonperturbed, nr;
+ real v;
real dvdl_dum[efptNR];
- rvec *f, *fshift_orig;
+ rvec *f, *fshift;
const t_pbc *pbc_null;
- t_iatom *iatom_fe;
+ t_idef idef_fe;
if (fr->bMolPBC)
{
pbc_null = NULL;
}
+ /* Copy the whole idef, so we can modify the contents locally */
+ idef_fe = *idef;
+ idef_fe.nthreads = 1;
+ snew(idef_fe.il_thread_division, F_NRE*(idef_fe.nthreads+1));
+
+ /* We already have the forces, so we use temp buffers here */
snew(f, fr->natoms_force);
- /* We want to preserve the fshift array in forcerec */
- fshift_orig = fr->fshift;
- snew(fr->fshift, SHIFTS);
+ snew(fshift, SHIFTS);
- /* Loop over all bonded force types to calculate the bonded forces */
+ /* Loop over all bonded force types to calculate the bonded energies */
for (ftype = 0; (ftype < F_NRE); ftype++)
{
- v = calc_one_bond_foreign(fplog, ftype, idef, x,
- f, fr, pbc_null, g, grpp, nrnb, lambda, dvdl_dum,
- md, fcd, global_atom_index, FALSE);
- epot[ftype] += v;
+ if (ftype_is_bonded_potential(ftype))
+ {
+ /* Set the work range of thread 0 to the perturbed bondeds only */
+ nr_nonperturbed = idef->il[ftype].nr_nonperturbed;
+ nr = idef->il[ftype].nr;
+ idef_fe.il_thread_division[ftype*2+0] = nr_nonperturbed;
+ idef_fe.il_thread_division[ftype*2+1] = nr;
+
+ /* This is only to get the flop count correct */
+ idef_fe.il[ftype].nr = nr - nr_nonperturbed;
+
+ if (nr - nr_nonperturbed > 0)
+ {
+ v = calc_one_bond(fplog, 0, ftype, &idef_fe,
+ x, f, fshift, fr, pbc_null, g,
+ grpp, nrnb, lambda, dvdl_dum,
+ md, fcd, TRUE,
+ global_atom_index, FALSE);
+ epot[ftype] += v;
+ }
+ }
}
- sfree(fr->fshift);
- fr->fshift = fshift_orig;
+ sfree(fshift);
sfree(f);
+
+ sfree(idef_fe.il_thread_division);
}