Clean up some mathematical constants
authorMark Abraham <mark.j.abraham@gmail.com>
Thu, 22 Nov 2012 14:48:29 +0000 (15:48 +0100)
committerMark Abraham <mark.j.abraham@gmail.com>
Mon, 26 Nov 2012 14:24:28 +0000 (15:24 +0100)
Replaced some hard-coded floats with unified constants in include/maths.h,
and made most places refer to them. Also added note that care is needed
about float-sized constants in CUDA code.

Change-Id: Icf1ec0b81179792d5e6d4d4ab9b38af3a38d7770

12 files changed:
include/maths.h
src/contrib/gen_table.c
src/gmxlib/ewald_util.c
src/gmxlib/nonbonded/nb_free_energy.c
src/kernel/calc_verletbuf.c
src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh
src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_legacy.cuh
src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh
src/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c
src/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_outer.h
src/mdlib/tables.c
src/tools/expfit.c

index e2c944bb04c78d1e827d0c3c1fa8298a61e8c8fe..a4fd55ded43197a4b2cbea603185e9fae4fee403 100644 (file)
@@ -64,7 +64,22 @@ extern "C" {
 #ifndef M_1_PI
 #define M_1_PI      0.31830988618379067154
 #endif
-    
+
+#ifndef M_FLOAT_1_SQRTPI /* used in CUDA kernels */
+/* 1.0 / sqrt(M_PI) */
+#define M_FLOAT_1_SQRTPI 0.564189583547756f
+#endif
+
+#ifndef M_1_SQRTPI
+/* 1.0 / sqrt(M_PI) */
+#define M_1_SQRTPI 0.564189583547756
+#endif
+
+#ifndef M_2_SQRTPI
+/* 2.0 / sqrt(M_PI) */
+#define M_2_SQRTPI  1.128379167095513
+#endif
+
 /* Suzuki-Yoshida Constants, for n=3 and n=5, for symplectic integration  */
 /* for n=1, w0 = 1 */
 /* for n=3, w0 = w2 = 1/(2-2^-(1/3)), w1 = 1-2*w0 */
index c0f0116409ff1a78936be9368babee20ae53fd19..1ae2912acef40caaef85b1232df7eabae101b3b4 100644 (file)
@@ -47,12 +47,12 @@ enum { mGuillot2001a, mAB1, mLjc, mMaaren, mGuillot_Maple, mHard_Wall, mGG, mGG_
 
 static double erf2(double x)
 {
-  return -(4*x/(sqrt(M_PI)))*exp(-x*x);
+  return -(2*x*M_2_SQRTPI)*exp(-x*x);
 }
 
 static double erf1(double x)
 {
-  return (2/sqrt(M_PI))*exp(-x*x);
+  return M_2_SQRTPI*exp(-x*x);
 }
 
 static void do_hard(FILE *fp,int pts_nm,double efac,double delta)
@@ -142,7 +142,6 @@ static void lo_do_ljc_pme(double r,
                          double *vr,double *fr)
 {
   double r2,r_6,r_12;
-  double isp= 0.564189583547756;
   double ewc;
 
   ewc  = calc_ewaldcoeff(rcoulomb,ewald_rtol);
@@ -152,9 +151,9 @@ static void lo_do_ljc_pme(double r,
   r_12 = r_6*r_6;
   
   *vc   = erfc(ewc*r)/r;
-  /* *vc2  = 2*erfc(ewc*r)/(r*r2)+4*exp(-(ewc*ewc*r2))*ewc*isp/r2+
-     4*ewc*ewc*ewc*exp(-(ewc*ewc*r2))*isp;*/
-  *fc  = 2*ewc*exp(-ewc*ewc*r2)*isp/r + erfc(ewc*r)/r2;
+  /* *vc2  = 2*erfc(ewc*r)/(r*r2)+2*exp(-(ewc*ewc*r2))*ewc*M_2_SQRTPI/r2+
+     2*ewc*ewc*ewc*exp(-(ewc*ewc*r2))*M_2_SQRTPI;*/
+  *fc  = ewc*exp(-ewc*ewc*r2)*M_2_SQRTPI/r + erfc(ewc*r)/r2;
 
   *vd  = -r_6;
   *fd  = -6.0*(*vd)/r;
index e688418f622982f703d3b1302f439b2e33487b3d..b88d86adcf4b98c865eb2bfab1b65807850b3fe9 100644 (file)
@@ -103,14 +103,12 @@ real ewald_LRcorrection(FILE *fplog,
   real    eps,eps2,VV,FF,F,Y,Geps,Heps2,Fp,fijC,r1t;
   real    *VFtab=fr->coulvdwtab;
   int     n0,n1,nnn;
-#else
-  double  isp=0.564189583547756;
 #endif
   gmx_bool    bFreeEnergy = (chargeB != NULL);
   gmx_bool    bMolPBC = fr->bMolPBC;
 
   one_4pi_eps = ONE_4PI_EPS0/fr->epsilon_r;
-  vr0 = ewc*2/sqrt(M_PI);
+  vr0 = ewc*M_2_SQRTPI;
 
   AA         = excl->a;
   Vexcl      = 0;
@@ -237,7 +235,7 @@ real ewald_LRcorrection(FILE *fplog,
 #define              R_ERF_R_INACC 0.1
 #endif
              if (ewcdr > R_ERF_R_INACC) {
-               fscal = rinv2*(vc - 2.0*qqA*ewc*isp*exp(-ewcdr*ewcdr));
+               fscal = rinv2*(vc - qqA*ewc*M_2_SQRTPI*exp(-ewcdr*ewcdr));
              } else {
                /* Use a fourth order series expansion for small ewcdr */
                fscal = ewc*ewc*qqA*vr0*(2.0/3.0 - 0.4*ewcdr*ewcdr);
@@ -302,7 +300,7 @@ real ewald_LRcorrection(FILE *fplog,
              v      = gmx_erf(ewc*dr)*rinv;
              vc     = qqL*v;
              Vexcl += vc;
-             fscal  = rinv2*(vc-2.0*qqL*ewc*isp*exp(-ewc*ewc*dr2));
+             fscal  = rinv2*(vc-qqL*ewc*M_2_SQRTPI*exp(-ewc*ewc*dr2));
              svmul(fscal,dx,df);
              rvec_inc(f[k],df);
              rvec_dec(f[i],df);
@@ -338,7 +336,7 @@ real ewald_LRcorrection(FILE *fplog,
     for(q=0; q<(bFreeEnergy ? 2 : 1); q++) {
       if (calc_excl_corr) {
         /* Self-energy correction */
-        Vself[q] = ewc*one_4pi_eps*fr->q2sum[q]/sqrt(M_PI);
+        Vself[q] = ewc*one_4pi_eps*fr->q2sum[q]*M_1_SQRTPI;
       }
       
       /* Apply surface dipole correction:
index a4bbe5b51247a78a77b70efb91e0eafcf640f854..fb7bbefbd31bb94b80a56d4c9e235a9e1306cee4 100644 (file)
@@ -79,7 +79,6 @@ gmx_nb_free_energy_kernel(t_nblist *                nlist,
     int           do_coultab,do_vdwtab,do_tab,tab_elemsize;
     int           n0,n1C,n1V,nnn;
     real          Y,F,G,H,Fp,Geps,Heps2,epsC,eps2C,epsV,eps2V,VV,FF;
-    double        isp=0.564189583547756;
     int           icoul,ivdw;
     int           nri;
     int *         iinr;
@@ -523,11 +522,11 @@ gmx_nb_free_energy_kernel(t_nblist *                nlist,
                 if (r != 0)
                 {
                     VV    = gmx_erf(ewc*r)*rinv;
-                    FF    = rinv*rinv*(VV - 2.0*ewc*isp*exp(-ewc*ewc*rsq));
+                    FF    = rinv*rinv*(VV - ewc*M_2_SQRTPI*exp(-ewc*ewc*rsq));
                 }
                 else
                 {
-                    VV    = ewc*2.0/sqrt(M_PI);
+                    VV    = ewc*M_2_SQRTPI;
                     FF    = 0;
                 }
 
index 9dbacd2016ab9b831258c2cf586513ce1efb8de3..86781abc5aa98808df6bd3d07928fb3654ee9f99 100644 (file)
@@ -591,8 +591,8 @@ void calc_verlet_buffer_size(const gmx_mtop_t *mtop,real boxvol,
         b  = calc_ewaldcoeff(ir->rcoulomb,ir->ewald_rtol);
         rc = ir->rcoulomb;
         br = b*rc;
-        md_el = elfac*(2*b*exp(-br*br)/(sqrt(M_PI)*rc) + gmx_erfc(br)/(rc*rc));
-        dd_el = elfac/(rc*rc)*(4*b*(1 + br*br)*exp(-br*br)/sqrt(M_PI) + 2*gmx_erfc(br)/rc);
+        md_el = elfac*(b*exp(-br*br)*M_2_SQRTPI/rc + gmx_erfc(br)/(rc*rc));
+        dd_el = elfac/(rc*rc)*(2*b*(1 + br*br)*exp(-br*br)*M_2_SQRTPI + 2*gmx_erfc(br)/rc);
     }
     else
     {
index 125161c9c413ed2c4986cea717199288ec4afaa7..11ab258d9eccd9dcee739bf623079dd7df5231b9 100644 (file)
  * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
  */
 
+#include "maths.h"
+/* Note that floating-point constants in CUDA code should be suffixed
+ * with f (e.g. 0.5f), to stop the compiler producing intermediate
+ * code that is in double precision.
+ */
+
 #if __CUDA_ARCH__ >= 300
 #define REDUCE_SHUFFLE
 /* On Kepler pre-loading i-atom types to shmem gives a few %,
@@ -188,7 +194,7 @@ __global__ void NB_KERNEL_FUNC_NAME(k_nbnxn)
 #ifdef EL_RF
         E_el *= -nbparam.epsfac*0.5f*c_rf;
 #else
-        E_el *= -nbparam.epsfac*beta*0.56418958f; /* last factor 1/sqrt(pi) */
+        E_el *= -nbparam.epsfac*beta*M_FLOAT_1_SQRTPI; /* last factor 1/sqrt(pi) */
 #endif
     }
 #endif
index 2cb8ece468789775321695f67bf68eb11ba68c47..6891d13bb769999c31c23e295de6f74318f593d4 100644 (file)
  * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
  */
 
+#include "maths.h"
+/* Note that floating-point constants in CUDA code should be suffixed
+ * with f (e.g. 0.5f), to stop the compiler producing intermediate
+ * code that is in double precision.
+ */
+
 /*
    Kernel launch parameters:
     - #blocks   = #pair lists, blockId = pair list Id
@@ -167,7 +173,7 @@ __global__ void NB_KERNEL_FUNC_NAME(k_nbnxn, _legacy)
 #ifdef EL_RF
         E_el *= -nbparam.epsfac*0.5f*c_rf;
 #else
-        E_el *= -nbparam.epsfac*beta*0.56418958f; /* last factor 1/sqrt(pi) */
+        E_el *= -nbparam.epsfac*beta*M_FLOAT_1_SQRTPI; /* last factor 1/sqrt(pi) */
 #endif
     }
 #endif
index 5233ddffc37abb21d83feb90db9adc2ffa48a705..610be4571d4a6504d9991f9761cdadd8c783f33c 100644 (file)
  * And Hey:
  * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
  */
+
+/* Note that floating-point constants in CUDA code should be suffixed
+ * with f (e.g. 0.5f), to stop the compiler producing intermediate
+ * code that is in double precision.
+ */
+
 #include "../../gmxlib/cuda_tools/vectype_ops.cuh"
 
 #ifndef NBNXN_CUDA_KERNEL_UTILS_CUH
index ef2c42f9a0484e69eab4a3067e0a79ef193da3b8..918c92b039f3a47298a1ff0f313d5f315601ad60 100644 (file)
@@ -177,7 +177,7 @@ nbnxn_kernel_gpu_ref(const nbnxn_pairlist_t     *nbl,
             else
             {
                 /* last factor 1/sqrt(pi) */
-                vctot *= -facel*iconst->ewaldcoeff*0.564189583548;
+                vctot *= -facel*iconst->ewaldcoeff*M_1_SQRTPI;
             }
         }
         
index 153e9f5b20cca110c9494f06b6b87b5399ea6824..91607eb4eeb9aecc6b5fc285d9db2424561e4f0e 100644 (file)
@@ -547,8 +547,8 @@ NBK_FUNC_NAME_S128_OR_S256(nbnxn_kernel,energrp)
 #endif
 #endif
 #ifdef CALC_COUL_EWALD
-            /* 0.5*beta*2/sqrt(pi) */
-            Vc_sub_self = 0.5*ic->ewaldcoeff*1.128379167095513;
+            /* beta/sqrt(pi) */
+            Vc_sub_self = 0.5*ic->ewaldcoeff*M_2_SQRTPI;
 #endif
 
             for(ia=0; ia<UNROLLI; ia++)
index 8b2228919c26dec9ac3cc314e661f0112e23d89f..e41f7c8db710bf942dadf01eec9e3c8c34e14b3f 100644 (file)
@@ -662,7 +662,6 @@ static void fill_table(t_tabledata *td,int tp,const t_forcerec *fr)
   /* Temporary parameters */
   gmx_bool bSwitch,bShift;
   double ewc=fr->ewaldcoeff;
-  double isp= 0.564189583547756;
    
   bSwitch = ((tp == etabLJ6Switch) || (tp == etabLJ12Switch) || 
             (tp == etabCOULSwitch) ||
@@ -811,13 +810,13 @@ static void fill_table(t_tabledata *td,int tp,const t_forcerec *fr)
     case etabEwald:
     case etabEwaldSwitch:
       Vtab  = gmx_erfc(ewc*r)/r;
-      Ftab  = gmx_erfc(ewc*r)/r2+2*exp(-(ewc*ewc*r2))*ewc*isp/r;
+      Ftab  = gmx_erfc(ewc*r)/r2+exp(-(ewc*ewc*r2))*ewc*M_2_SQRTPI/r;
       break;
     case etabEwaldUser:
     case etabEwaldUserSwitch:
       /* Only calculate minus the reciprocal space contribution */
       Vtab  = -gmx_erf(ewc*r)/r;
-      Ftab  = -gmx_erf(ewc*r)/r2+2*exp(-(ewc*ewc*r2))*ewc*isp/r;
+      Ftab  = -gmx_erf(ewc*r)/r2+exp(-(ewc*ewc*r2))*ewc*M_2_SQRTPI/r;
       break;
     case etabRF:
     case etabRF_ZERO:
index 605b297617ba670f67357c4661e4ebbadf38e8a1..331980151ca7b9f145116ff724585fe82249180f 100644 (file)
@@ -95,7 +95,7 @@ real derf;
  erfarg=(x-a[3])/(a[4]*a[4]);
         erfarg2=erfarg*erfarg;
         erfval=gmx_erf(erfarg)/2;
-        derf=(2.0/sqrt(M_PI))*(a[1]-a[2])/2*exp(-erfarg2)/(a[4]*a[4]);
+        derf=M_2_SQRTPI*(a[1]-a[2])/2*exp(-erfarg2)/(a[4]*a[4]);
         *y=(a[1]+a[2])/2-(a[1]-a[2])*erfval;
         dyda[1]=1/2-erfval;
         dyda[2]=1/2+erfval;