}
}
+static inline real inverseNorm(const rvec x)
+{
+ return gmx::invsqrt(iprod(x, x));
+}
+
/* Vsite construction routines */
static void constr_vsite2(const rvec xi, const rvec xj, rvec x, real a, const t_pbc *pbc)
/* TOTAL: 10 flops */
}
+static void constr_vsite2FD(const rvec xi, const rvec xj, rvec x, real a,
+ const t_pbc *pbc)
+{
+ rvec xij;
+ pbc_rvec_sub(pbc, xj, xi, xij);
+ /* 3 flops */
+
+ const real b = a*inverseNorm(xij);
+ /* 6 + 10 flops */
+
+ x[XX] = xi[XX] + b*xij[XX];
+ x[YY] = xi[YY] + b*xij[YY];
+ x[ZZ] = xi[ZZ] + b*xij[ZZ];
+ /* 6 Flops */
+
+ /* TOTAL: 25 flops */
+}
+
static void constr_vsite3(const rvec xi, const rvec xj, const rvec xk, rvec x, real a, real b,
const t_pbc *pbc)
{
temp[ZZ] = xij[ZZ] + a*xjk[ZZ];
/* 6 flops */
- c = b*gmx::invsqrt(iprod(temp, temp));
+ c = b*inverseNorm(temp);
/* 6 + 10 flops */
x[XX] = xi[XX] + c*temp[XX];
pbc_rvec_sub(pbc, xk, xj, xjk);
/* 6 flops */
- invdij = gmx::invsqrt(iprod(xij, xij));
+ invdij = inverseNorm(xij);
c1 = invdij * invdij * iprod(xij, xjk);
xp[XX] = xjk[XX] - c1*xij[XX];
xp[YY] = xjk[YY] - c1*xij[YY];
xp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
a1 = a*invdij;
- b1 = b*gmx::invsqrt(iprod(xp, xp));
+ b1 = b*inverseNorm(xp);
/* 45 */
x[XX] = xi[XX] + a1*xij[XX] + b1*xp[XX];
temp[ZZ] = xij[ZZ] + a*xjk[ZZ] + b*xjl[ZZ];
/* 12 flops */
- d = c*gmx::invsqrt(iprod(temp, temp));
+ d = c*inverseNorm(temp);
/* 6 + 10 flops */
x[XX] = xi[XX] + d*temp[XX];
cprod(rja, rjb, rm);
/* 9 flops */
- d = c*gmx::invsqrt(norm2(rm));
+ d = c*inverseNorm(rm);
/* 5+5+1 flops */
x[XX] = xi[XX] + d*rm[XX];
aj = ia[3];
constr_vsite2(x[ai], x[aj], x[avsite], a1, pbc_null2);
break;
+ case F_VSITE2FD:
+ aj = ia[3];
+ constr_vsite2FD(x[ai], x[aj], x[avsite], a1, pbc_null2);
+ break;
case F_VSITE3:
aj = ia[3];
ak = ia[4];
}
}
+static void spread_vsite2FD(const t_iatom ia[], real a,
+ const rvec x[],
+ rvec f[], rvec fshift[],
+ gmx_bool VirCorr, matrix dxdf,
+ const t_pbc *pbc, const t_graph *g)
+{
+ const int av = ia[1];
+ const int ai = ia[2];
+ const int aj = ia[3];
+ rvec fv;
+ copy_rvec(f[av], fv);
+
+ rvec xij;
+ int sji = pbc_rvec_sub(pbc, x[aj], x[ai], xij);
+ /* 6 flops */
+
+ const real invDistance = inverseNorm(xij);
+ const real b = a*invDistance;
+ /* 4 + ?10? flops */
+
+ const real fproj = iprod(xij, fv)*invDistance*invDistance;
+
+ rvec fj;
+ fj[XX] = b*(fv[XX] - fproj*xij[XX]);
+ fj[YY] = b*(fv[YY] - fproj*xij[YY]);
+ fj[ZZ] = b*(fv[ZZ] - fproj*xij[ZZ]);
+ /* 9 */
+
+ /* b is already calculated in constr_vsite2FD
+ storing b somewhere will save flops. */
+
+ f[ai][XX] += fv[XX] - fj[XX];
+ f[ai][YY] += fv[YY] - fj[YY];
+ f[ai][ZZ] += fv[ZZ] - fj[ZZ];
+ f[aj][XX] += fj[XX];
+ f[aj][YY] += fj[YY];
+ f[aj][ZZ] += fj[ZZ];
+ /* 9 Flops */
+
+ if (fshift)
+ {
+ int svi;
+ if (g)
+ {
+ ivec di;
+ ivec_sub(SHIFT_IVEC(g, ia[1]), SHIFT_IVEC(g, ai), di);
+ svi = IVEC2IS(di);
+ ivec_sub(SHIFT_IVEC(g, aj), SHIFT_IVEC(g, ai), di);
+ sji = IVEC2IS(di);
+ }
+ else if (pbc)
+ {
+ rvec xvi;
+ svi = pbc_rvec_sub(pbc, x[av], x[ai], xvi);
+ }
+ else
+ {
+ svi = CENTRAL;
+ }
+
+ if (svi != CENTRAL || sji != CENTRAL)
+ {
+ rvec_dec(fshift[svi], fv);
+ fshift[CENTRAL][XX] += fv[XX] - fj[XX];
+ fshift[CENTRAL][YY] += fv[YY] - fj[YY];
+ fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ];
+ fshift[ sji][XX] += fj[XX];
+ fshift[ sji][YY] += fj[YY];
+ fshift[ sji][ZZ] += fj[ZZ];
+ }
+ }
+
+ if (VirCorr)
+ {
+ /* When VirCorr=TRUE, the virial for the current forces is not
+ * calculated from the redistributed forces. This means that
+ * the effect of non-linear virtual site constructions on the virial
+ * needs to be added separately. This contribution can be calculated
+ * in many ways, but the simplest and cheapest way is to use
+ * the first constructing atom ai as a reference position in space:
+ * subtract (xv-xi)*fv and add (xj-xi)*fj.
+ */
+ rvec xiv;
+
+ pbc_rvec_sub(pbc, x[av], x[ai], xiv);
+
+ for (int i = 0; i < DIM; i++)
+ {
+ for (int j = 0; j < DIM; j++)
+ {
+ /* As xix is a linear combination of j and k, use that here */
+ dxdf[i][j] += -xiv[i]*fv[j] + xij[i]*fj[j];
+ }
+ }
+ }
+
+ /* TOTAL: 38 flops */
+}
+
static void spread_vsite3(const t_iatom ia[], real a, real b,
const rvec x[],
rvec f[], rvec fshift[],
gmx_bool VirCorr, matrix dxdf,
const t_pbc *pbc, const t_graph *g)
{
- real c, invl, fproj, a1;
+ real fproj, a1;
rvec xvi, xij, xjk, xix, fv, temp;
t_iatom av, ai, aj, ak;
int svi, sji, skj;
xix[ZZ] = xij[ZZ]+a*xjk[ZZ];
/* 6 flops */
- invl = gmx::invsqrt(iprod(xix, xix));
- c = b*invl;
+ const real invDistance = inverseNorm(xix);
+ const real c = b*invDistance;
/* 4 + ?10? flops */
- fproj = iprod(xix, fv)*invl*invl; /* = (xix . f)/(xix . xix) */
+ fproj = iprod(xix, fv)*invDistance*invDistance; /* = (xix . f)/(xix . xix) */
temp[XX] = c*(fv[XX]-fproj*xix[XX]);
temp[YY] = c*(fv[YY]-fproj*xix[YY]);
skj = pbc_rvec_sub(pbc, x[ak], x[aj], xjk);
/* 6 flops */
- invdij = gmx::invsqrt(iprod(xij, xij));
+ invdij = inverseNorm(xij);
invdij2 = invdij * invdij;
c1 = iprod(xij, xjk) * invdij2;
xperp[XX] = xjk[XX] - c1*xij[XX];
xperp[YY] = xjk[YY] - c1*xij[YY];
xperp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
/* xperp in plane ijk, perp. to ij */
- invdp = gmx::invsqrt(iprod(xperp, xperp));
+ invdp = inverseNorm(xperp);
a1 = a*invdij;
b1 = b*invdp;
/* 45 flops */
gmx_bool VirCorr, matrix dxdf,
const t_pbc *pbc, const t_graph *g)
{
- real d, invl, fproj, a1;
+ real fproj, a1;
rvec xvi, xij, xjk, xjl, xix, fv, temp;
int av, ai, aj, ak, al;
ivec di;
}
/* 12 flops */
- invl = gmx::invsqrt(iprod(xix, xix));
- d = c*invl;
+ const real invDistance = inverseNorm(xix);
+ const real d = c*invDistance;
/* 4 + ?10? flops */
copy_rvec(f[av], fv);
- fproj = iprod(xix, fv)*invl*invl; /* = (xix . f)/(xix . xix) */
+ fproj = iprod(xix, fv)*invDistance*invDistance; /* = (xix . f)/(xix . xix) */
for (m = 0; m < DIM; m++)
{
cprod(rja, rjb, rm);
/* 9 flops */
- invrm = gmx::invsqrt(norm2(rm));
+ invrm = inverseNorm(rm);
denom = invrm*invrm;
/* 5+5+2 flops */
case F_VSITE2:
spread_vsite2(ia, a1, x, f, fshift, pbc_null2, g);
break;
+ case F_VSITE2FD:
+ spread_vsite2FD(ia, a1, x, f, fshift, VirCorr, dxdf, pbc_null2, g);
+ break;
case F_VSITE3:
b1 = ip[tp].vsite.b;
spread_vsite3(ia, a1, b1, x, f, fshift, pbc_null2, g);
}
inc_nrnb(nrnb, eNR_VSITE2, vsite_count(idef->il, F_VSITE2));
+ inc_nrnb(nrnb, eNR_VSITE2FD, vsite_count(idef->il, F_VSITE2FD));
inc_nrnb(nrnb, eNR_VSITE3, vsite_count(idef->il, F_VSITE3));
inc_nrnb(nrnb, eNR_VSITE3FD, vsite_count(idef->il, F_VSITE3FD));
inc_nrnb(nrnb, eNR_VSITE3FAD, vsite_count(idef->il, F_VSITE3FAD));