#ifndef M_1_PI
#define M_1_PI 0.31830988618379067154
#endif
-
+
+#ifndef M_FLOAT_1_SQRTPI /* used in CUDA kernels */
+/* 1.0 / sqrt(M_PI) */
+#define M_FLOAT_1_SQRTPI 0.564189583547756f
+#endif
+
+#ifndef M_1_SQRTPI
+/* 1.0 / sqrt(M_PI) */
+#define M_1_SQRTPI 0.564189583547756
+#endif
+
+#ifndef M_2_SQRTPI
+/* 2.0 / sqrt(M_PI) */
+#define M_2_SQRTPI 1.128379167095513
+#endif
+
/* Suzuki-Yoshida Constants, for n=3 and n=5, for symplectic integration */
/* for n=1, w0 = 1 */
/* for n=3, w0 = w2 = 1/(2-2^-(1/3)), w1 = 1-2*w0 */
static double erf2(double x)
{
- return -(4*x/(sqrt(M_PI)))*exp(-x*x);
+ return -(2*x*M_2_SQRTPI)*exp(-x*x);
}
static double erf1(double x)
{
- return (2/sqrt(M_PI))*exp(-x*x);
+ return M_2_SQRTPI*exp(-x*x);
}
static void do_hard(FILE *fp,int pts_nm,double efac,double delta)
double *vr,double *fr)
{
double r2,r_6,r_12;
- double isp= 0.564189583547756;
double ewc;
ewc = calc_ewaldcoeff(rcoulomb,ewald_rtol);
r_12 = r_6*r_6;
*vc = erfc(ewc*r)/r;
- /* *vc2 = 2*erfc(ewc*r)/(r*r2)+4*exp(-(ewc*ewc*r2))*ewc*isp/r2+
- 4*ewc*ewc*ewc*exp(-(ewc*ewc*r2))*isp;*/
- *fc = 2*ewc*exp(-ewc*ewc*r2)*isp/r + erfc(ewc*r)/r2;
+ /* *vc2 = 2*erfc(ewc*r)/(r*r2)+2*exp(-(ewc*ewc*r2))*ewc*M_2_SQRTPI/r2+
+ 2*ewc*ewc*ewc*exp(-(ewc*ewc*r2))*M_2_SQRTPI;*/
+ *fc = ewc*exp(-ewc*ewc*r2)*M_2_SQRTPI/r + erfc(ewc*r)/r2;
*vd = -r_6;
*fd = -6.0*(*vd)/r;
real eps,eps2,VV,FF,F,Y,Geps,Heps2,Fp,fijC,r1t;
real *VFtab=fr->coulvdwtab;
int n0,n1,nnn;
-#else
- double isp=0.564189583547756;
#endif
gmx_bool bFreeEnergy = (chargeB != NULL);
gmx_bool bMolPBC = fr->bMolPBC;
one_4pi_eps = ONE_4PI_EPS0/fr->epsilon_r;
- vr0 = ewc*2/sqrt(M_PI);
+ vr0 = ewc*M_2_SQRTPI;
AA = excl->a;
Vexcl = 0;
#define R_ERF_R_INACC 0.1
#endif
if (ewcdr > R_ERF_R_INACC) {
- fscal = rinv2*(vc - 2.0*qqA*ewc*isp*exp(-ewcdr*ewcdr));
+ fscal = rinv2*(vc - qqA*ewc*M_2_SQRTPI*exp(-ewcdr*ewcdr));
} else {
/* Use a fourth order series expansion for small ewcdr */
fscal = ewc*ewc*qqA*vr0*(2.0/3.0 - 0.4*ewcdr*ewcdr);
v = gmx_erf(ewc*dr)*rinv;
vc = qqL*v;
Vexcl += vc;
- fscal = rinv2*(vc-2.0*qqL*ewc*isp*exp(-ewc*ewc*dr2));
+ fscal = rinv2*(vc-qqL*ewc*M_2_SQRTPI*exp(-ewc*ewc*dr2));
svmul(fscal,dx,df);
rvec_inc(f[k],df);
rvec_dec(f[i],df);
for(q=0; q<(bFreeEnergy ? 2 : 1); q++) {
if (calc_excl_corr) {
/* Self-energy correction */
- Vself[q] = ewc*one_4pi_eps*fr->q2sum[q]/sqrt(M_PI);
+ Vself[q] = ewc*one_4pi_eps*fr->q2sum[q]*M_1_SQRTPI;
}
/* Apply surface dipole correction:
int do_coultab,do_vdwtab,do_tab,tab_elemsize;
int n0,n1C,n1V,nnn;
real Y,F,G,H,Fp,Geps,Heps2,epsC,eps2C,epsV,eps2V,VV,FF;
- double isp=0.564189583547756;
int icoul,ivdw;
int nri;
int * iinr;
if (r != 0)
{
VV = gmx_erf(ewc*r)*rinv;
- FF = rinv*rinv*(VV - 2.0*ewc*isp*exp(-ewc*ewc*rsq));
+ FF = rinv*rinv*(VV - ewc*M_2_SQRTPI*exp(-ewc*ewc*rsq));
}
else
{
- VV = ewc*2.0/sqrt(M_PI);
+ VV = ewc*M_2_SQRTPI;
FF = 0;
}
b = calc_ewaldcoeff(ir->rcoulomb,ir->ewald_rtol);
rc = ir->rcoulomb;
br = b*rc;
- md_el = elfac*(2*b*exp(-br*br)/(sqrt(M_PI)*rc) + gmx_erfc(br)/(rc*rc));
- dd_el = elfac/(rc*rc)*(4*b*(1 + br*br)*exp(-br*br)/sqrt(M_PI) + 2*gmx_erfc(br)/rc);
+ md_el = elfac*(b*exp(-br*br)*M_2_SQRTPI/rc + gmx_erfc(br)/(rc*rc));
+ dd_el = elfac/(rc*rc)*(2*b*(1 + br*br)*exp(-br*br)*M_2_SQRTPI + 2*gmx_erfc(br)/rc);
}
else
{
* Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
*/
+#include "maths.h"
+/* Note that floating-point constants in CUDA code should be suffixed
+ * with f (e.g. 0.5f), to stop the compiler producing intermediate
+ * code that is in double precision.
+ */
+
#if __CUDA_ARCH__ >= 300
#define REDUCE_SHUFFLE
/* On Kepler pre-loading i-atom types to shmem gives a few %,
#ifdef EL_RF
E_el *= -nbparam.epsfac*0.5f*c_rf;
#else
- E_el *= -nbparam.epsfac*beta*0.56418958f; /* last factor 1/sqrt(pi) */
+ E_el *= -nbparam.epsfac*beta*M_FLOAT_1_SQRTPI; /* last factor 1/sqrt(pi) */
#endif
}
#endif
* Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
*/
+#include "maths.h"
+/* Note that floating-point constants in CUDA code should be suffixed
+ * with f (e.g. 0.5f), to stop the compiler producing intermediate
+ * code that is in double precision.
+ */
+
/*
Kernel launch parameters:
- #blocks = #pair lists, blockId = pair list Id
#ifdef EL_RF
E_el *= -nbparam.epsfac*0.5f*c_rf;
#else
- E_el *= -nbparam.epsfac*beta*0.56418958f; /* last factor 1/sqrt(pi) */
+ E_el *= -nbparam.epsfac*beta*M_FLOAT_1_SQRTPI; /* last factor 1/sqrt(pi) */
#endif
}
#endif
* And Hey:
* Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
*/
+
+/* Note that floating-point constants in CUDA code should be suffixed
+ * with f (e.g. 0.5f), to stop the compiler producing intermediate
+ * code that is in double precision.
+ */
+
#include "../../gmxlib/cuda_tools/vectype_ops.cuh"
#ifndef NBNXN_CUDA_KERNEL_UTILS_CUH
else
{
/* last factor 1/sqrt(pi) */
- vctot *= -facel*iconst->ewaldcoeff*0.564189583548;
+ vctot *= -facel*iconst->ewaldcoeff*M_1_SQRTPI;
}
}
#endif
#endif
#ifdef CALC_COUL_EWALD
- /* 0.5*beta*2/sqrt(pi) */
- Vc_sub_self = 0.5*ic->ewaldcoeff*1.128379167095513;
+ /* beta/sqrt(pi) */
+ Vc_sub_self = 0.5*ic->ewaldcoeff*M_2_SQRTPI;
#endif
for(ia=0; ia<UNROLLI; ia++)
/* Temporary parameters */
gmx_bool bSwitch,bShift;
double ewc=fr->ewaldcoeff;
- double isp= 0.564189583547756;
bSwitch = ((tp == etabLJ6Switch) || (tp == etabLJ12Switch) ||
(tp == etabCOULSwitch) ||
case etabEwald:
case etabEwaldSwitch:
Vtab = gmx_erfc(ewc*r)/r;
- Ftab = gmx_erfc(ewc*r)/r2+2*exp(-(ewc*ewc*r2))*ewc*isp/r;
+ Ftab = gmx_erfc(ewc*r)/r2+exp(-(ewc*ewc*r2))*ewc*M_2_SQRTPI/r;
break;
case etabEwaldUser:
case etabEwaldUserSwitch:
/* Only calculate minus the reciprocal space contribution */
Vtab = -gmx_erf(ewc*r)/r;
- Ftab = -gmx_erf(ewc*r)/r2+2*exp(-(ewc*ewc*r2))*ewc*isp/r;
+ Ftab = -gmx_erf(ewc*r)/r2+exp(-(ewc*ewc*r2))*ewc*M_2_SQRTPI/r;
break;
case etabRF:
case etabRF_ZERO:
erfarg=(x-a[3])/(a[4]*a[4]);
erfarg2=erfarg*erfarg;
erfval=gmx_erf(erfarg)/2;
- derf=(2.0/sqrt(M_PI))*(a[1]-a[2])/2*exp(-erfarg2)/(a[4]*a[4]);
+ derf=M_2_SQRTPI*(a[1]-a[2])/2*exp(-erfarg2)/(a[4]*a[4]);
*y=(a[1]+a[2])/2-(a[1]-a[2])*erfval;
dyda[1]=1/2-erfval;
dyda[2]=1/2+erfval;