Clean up some mathematical constants

author Mark Abraham <mark.j.abraham@gmail.com>

Thu, 22 Nov 2012 14:48:29 +0000 (15:48 +0100)

committer Mark Abraham <mark.j.abraham@gmail.com>

Mon, 26 Nov 2012 14:24:28 +0000 (15:24 +0100)
author Mark Abraham <mark.j.abraham@gmail.com>
Thu, 22 Nov 2012 14:48:29 +0000 (15:48 +0100)
committer Mark Abraham <mark.j.abraham@gmail.com>
Mon, 26 Nov 2012 14:24:28 +0000 (15:24 +0100)
diff --git a/include/maths.h b/include/maths.h

index e2c944bb04c78d1e827d0c3c1fa8298a61e8c8fe..a4fd55ded43197a4b2cbea603185e9fae4fee403 100644 (file)
--- a/include/maths.h
+++ b/include/maths.h
@@ -64,7 +64,22 @@ extern "C" {
  #ifndef M_1_PI
  #define M_1_PI      0.31830988618379067154
  #endif
-    
+
+#ifndef M_FLOAT_1_SQRTPI /* used in CUDA kernels */
+/* 1.0 / sqrt(M_PI) */
+#define M_FLOAT_1_SQRTPI 0.564189583547756f
+#endif
+
+#ifndef M_1_SQRTPI
+/* 1.0 / sqrt(M_PI) */
+#define M_1_SQRTPI 0.564189583547756
+#endif
+
+#ifndef M_2_SQRTPI
+/* 2.0 / sqrt(M_PI) */
+#define M_2_SQRTPI  1.128379167095513
+#endif
+
  /* Suzuki-Yoshida Constants, for n=3 and n=5, for symplectic integration  */
  /* for n=1, w0 = 1 */
  /* for n=3, w0 = w2 = 1/(2-2^-(1/3)), w1 = 1-2*w0 */
diff --git a/src/contrib/gen_table.c b/src/contrib/gen_table.c

index c0f0116409ff1a78936be9368babee20ae53fd19..1ae2912acef40caaef85b1232df7eabae101b3b4 100644 (file)
--- a/src/contrib/gen_table.c
+++ b/src/contrib/gen_table.c
@@ -47,12 +47,12 @@ enum { mGuillot2001a, mAB1, mLjc, mMaaren, mGuillot_Maple, mHard_Wall, mGG, mGG_
  
  static double erf2(double x)
  {
-  return -(4*x/(sqrt(M_PI)))*exp(-x*x);
+  return -(2*x*M_2_SQRTPI)*exp(-x*x);
  }
  
  static double erf1(double x)
  {
-  return (2/sqrt(M_PI))*exp(-x*x);
+  return M_2_SQRTPI*exp(-x*x);
  }
  
  static void do_hard(FILE *fp,int pts_nm,double efac,double delta)
@@ -142,7 +142,6 @@ static void lo_do_ljc_pme(double r,
                           double *vr,double *fr)
  {
    double r2,r_6,r_12;
-  double isp= 0.564189583547756;
    double ewc;
  
    ewc  = calc_ewaldcoeff(rcoulomb,ewald_rtol);
@@ -152,9 +151,9 @@ static void lo_do_ljc_pme(double r,
    r_12 = r_6*r_6;
    
    *vc   = erfc(ewc*r)/r;
-  /* *vc2  = 2*erfc(ewc*r)/(r*r2)+4*exp(-(ewc*ewc*r2))*ewc*isp/r2+
-     4*ewc*ewc*ewc*exp(-(ewc*ewc*r2))*isp;*/
-  *fc  = 2*ewc*exp(-ewc*ewc*r2)*isp/r + erfc(ewc*r)/r2;
+  /* *vc2  = 2*erfc(ewc*r)/(r*r2)+2*exp(-(ewc*ewc*r2))*ewc*M_2_SQRTPI/r2+
+     2*ewc*ewc*ewc*exp(-(ewc*ewc*r2))*M_2_SQRTPI;*/
+  *fc  = ewc*exp(-ewc*ewc*r2)*M_2_SQRTPI/r + erfc(ewc*r)/r2;
  
    *vd  = -r_6;
    *fd  = -6.0*(*vd)/r;
diff --git a/src/gmxlib/ewald_util.c b/src/gmxlib/ewald_util.c

index e688418f622982f703d3b1302f439b2e33487b3d..b88d86adcf4b98c865eb2bfab1b65807850b3fe9 100644 (file)
--- a/src/gmxlib/ewald_util.c
+++ b/src/gmxlib/ewald_util.c
@@ -103,14 +103,12 @@ real ewald_LRcorrection(FILE *fplog,
    real    eps,eps2,VV,FF,F,Y,Geps,Heps2,Fp,fijC,r1t;
    real    *VFtab=fr->coulvdwtab;
    int     n0,n1,nnn;
-#else
-  double  isp=0.564189583547756;
  #endif
    gmx_bool    bFreeEnergy = (chargeB != NULL);
    gmx_bool    bMolPBC = fr->bMolPBC;
  
    one_4pi_eps = ONE_4PI_EPS0/fr->epsilon_r;
-  vr0 = ewc*2/sqrt(M_PI);
+  vr0 = ewc*M_2_SQRTPI;
  
    AA         = excl->a;
    Vexcl      = 0;
@@ -237,7 +235,7 @@ real ewald_LRcorrection(FILE *fplog,
  #define              R_ERF_R_INACC 0.1
  #endif
               if (ewcdr > R_ERF_R_INACC) {
-               fscal = rinv2*(vc - 2.0*qqA*ewc*isp*exp(-ewcdr*ewcdr));
+               fscal = rinv2*(vc - qqA*ewc*M_2_SQRTPI*exp(-ewcdr*ewcdr));
               } else {
                 /* Use a fourth order series expansion for small ewcdr */
                 fscal = ewc*ewc*qqA*vr0*(2.0/3.0 - 0.4*ewcdr*ewcdr);
@@ -302,7 +300,7 @@ real ewald_LRcorrection(FILE *fplog,
               v      = gmx_erf(ewc*dr)*rinv;
               vc     = qqL*v;
               Vexcl += vc;
-             fscal  = rinv2*(vc-2.0*qqL*ewc*isp*exp(-ewc*ewc*dr2));
+             fscal  = rinv2*(vc-qqL*ewc*M_2_SQRTPI*exp(-ewc*ewc*dr2));
               svmul(fscal,dx,df);
               rvec_inc(f[k],df);
               rvec_dec(f[i],df);
@@ -338,7 +336,7 @@ real ewald_LRcorrection(FILE *fplog,
      for(q=0; q<(bFreeEnergy ? 2 : 1); q++) {
        if (calc_excl_corr) {
          /* Self-energy correction */
-        Vself[q] = ewc*one_4pi_eps*fr->q2sum[q]/sqrt(M_PI);
+        Vself[q] = ewc*one_4pi_eps*fr->q2sum[q]*M_1_SQRTPI;
        }
        
        /* Apply surface dipole correction:
diff --git a/src/gmxlib/nonbonded/nb_free_energy.c b/src/gmxlib/nonbonded/nb_free_energy.c

index a4bbe5b51247a78a77b70efb91e0eafcf640f854..fb7bbefbd31bb94b80a56d4c9e235a9e1306cee4 100644 (file)
--- a/src/gmxlib/nonbonded/nb_free_energy.c
+++ b/src/gmxlib/nonbonded/nb_free_energy.c
@@ -79,7 +79,6 @@ gmx_nb_free_energy_kernel(t_nblist *                nlist,
      int           do_coultab,do_vdwtab,do_tab,tab_elemsize;
      int           n0,n1C,n1V,nnn;
      real          Y,F,G,H,Fp,Geps,Heps2,epsC,eps2C,epsV,eps2V,VV,FF;
-    double        isp=0.564189583547756;
      int           icoul,ivdw;
      int           nri;
      int *         iinr;
@@ -523,11 +522,11 @@ gmx_nb_free_energy_kernel(t_nblist *                nlist,
                  if (r != 0)
                  {
                      VV    = gmx_erf(ewc*r)*rinv;
-                    FF    = rinv*rinv*(VV - 2.0*ewc*isp*exp(-ewc*ewc*rsq));
+                    FF    = rinv*rinv*(VV - ewc*M_2_SQRTPI*exp(-ewc*ewc*rsq));
                  }
                  else
                  {
-                    VV    = ewc*2.0/sqrt(M_PI);
+                    VV    = ewc*M_2_SQRTPI;
                      FF    = 0;
                  }
  
diff --git a/src/kernel/calc_verletbuf.c b/src/kernel/calc_verletbuf.c

index 9dbacd2016ab9b831258c2cf586513ce1efb8de3..86781abc5aa98808df6bd3d07928fb3654ee9f99 100644 (file)
--- a/src/kernel/calc_verletbuf.c
+++ b/src/kernel/calc_verletbuf.c
@@ -591,8 +591,8 @@ void calc_verlet_buffer_size(const gmx_mtop_t *mtop,real boxvol,
          b  = calc_ewaldcoeff(ir->rcoulomb,ir->ewald_rtol);
          rc = ir->rcoulomb;
          br = b*rc;
-        md_el = elfac*(2*b*exp(-br*br)/(sqrt(M_PI)*rc) + gmx_erfc(br)/(rc*rc));
-        dd_el = elfac/(rc*rc)*(4*b*(1 + br*br)*exp(-br*br)/sqrt(M_PI) + 2*gmx_erfc(br)/rc);
+        md_el = elfac*(b*exp(-br*br)*M_2_SQRTPI/rc + gmx_erfc(br)/(rc*rc));
+        dd_el = elfac/(rc*rc)*(2*b*(1 + br*br)*exp(-br*br)*M_2_SQRTPI + 2*gmx_erfc(br)/rc);
      }
      else
      {
diff --git a/src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh b/src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh

index 125161c9c413ed2c4986cea717199288ec4afaa7..11ab258d9eccd9dcee739bf623079dd7df5231b9 100644 (file)
--- a/src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh
+++ b/src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh
@@ -33,6 +33,12 @@
   * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
   */
  
+#include "maths.h"
+/* Note that floating-point constants in CUDA code should be suffixed
+ * with f (e.g. 0.5f), to stop the compiler producing intermediate
+ * code that is in double precision.
+ */
+
  #if __CUDA_ARCH__ >= 300
  #define REDUCE_SHUFFLE
  /* On Kepler pre-loading i-atom types to shmem gives a few %,
@@ -188,7 +194,7 @@ __global__ void NB_KERNEL_FUNC_NAME(k_nbnxn)
  #ifdef EL_RF
          E_el *= -nbparam.epsfac*0.5f*c_rf;
  #else
-        E_el *= -nbparam.epsfac*beta*0.56418958f; /* last factor 1/sqrt(pi) */
+        E_el *= -nbparam.epsfac*beta*M_FLOAT_1_SQRTPI; /* last factor 1/sqrt(pi) */
  #endif
      }
  #endif
diff --git a/src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_legacy.cuh b/src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_legacy.cuh

index 2cb8ece468789775321695f67bf68eb11ba68c47..6891d13bb769999c31c23e295de6f74318f593d4 100644 (file)
--- a/src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_legacy.cuh
+++ b/src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_legacy.cuh
@@ -33,6 +33,12 @@
   * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
   */
  
+#include "maths.h"
+/* Note that floating-point constants in CUDA code should be suffixed
+ * with f (e.g. 0.5f), to stop the compiler producing intermediate
+ * code that is in double precision.
+ */
+
  /*
     Kernel launch parameters:
      - #blocks   = #pair lists, blockId = pair list Id
@@ -167,7 +173,7 @@ __global__ void NB_KERNEL_FUNC_NAME(k_nbnxn, _legacy)
  #ifdef EL_RF
          E_el *= -nbparam.epsfac*0.5f*c_rf;
  #else
-        E_el *= -nbparam.epsfac*beta*0.56418958f; /* last factor 1/sqrt(pi) */
+        E_el *= -nbparam.epsfac*beta*M_FLOAT_1_SQRTPI; /* last factor 1/sqrt(pi) */
  #endif
      }
  #endif
diff --git a/src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh b/src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh

index 5233ddffc37abb21d83feb90db9adc2ffa48a705..610be4571d4a6504d9991f9761cdadd8c783f33c 100644 (file)
--- a/src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh
+++ b/src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh
@@ -32,6 +32,12 @@
   * And Hey:
   * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
   */
+
+/* Note that floating-point constants in CUDA code should be suffixed
+ * with f (e.g. 0.5f), to stop the compiler producing intermediate
+ * code that is in double precision.
+ */
+
  #include "../../gmxlib/cuda_tools/vectype_ops.cuh"
  
  #ifndef NBNXN_CUDA_KERNEL_UTILS_CUH
diff --git a/src/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c b/src/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c

index ef2c42f9a0484e69eab4a3067e0a79ef193da3b8..918c92b039f3a47298a1ff0f313d5f315601ad60 100644 (file)
--- a/src/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c
+++ b/src/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c
@@ -177,7 +177,7 @@ nbnxn_kernel_gpu_ref(const nbnxn_pairlist_t     *nbl,
              else
              {
                  /* last factor 1/sqrt(pi) */
-                vctot *= -facel*iconst->ewaldcoeff*0.564189583548;
+                vctot *= -facel*iconst->ewaldcoeff*M_1_SQRTPI;
              }
          }
          
diff --git a/src/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_outer.h b/src/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_outer.h

index 153e9f5b20cca110c9494f06b6b87b5399ea6824..91607eb4eeb9aecc6b5fc285d9db2424561e4f0e 100644 (file)
--- a/src/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_outer.h
+++ b/src/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_outer.h
@@ -547,8 +547,8 @@ NBK_FUNC_NAME_S128_OR_S256(nbnxn_kernel,energrp)
  #endif
  #endif
  #ifdef CALC_COUL_EWALD
-            /* 0.5*beta*2/sqrt(pi) */
-            Vc_sub_self = 0.5*ic->ewaldcoeff*1.128379167095513;
+            /* beta/sqrt(pi) */
+            Vc_sub_self = 0.5*ic->ewaldcoeff*M_2_SQRTPI;
  #endif
  
              for(ia=0; ia<UNROLLI; ia++)
diff --git a/src/mdlib/tables.c b/src/mdlib/tables.c

index 8b2228919c26dec9ac3cc314e661f0112e23d89f..e41f7c8db710bf942dadf01eec9e3c8c34e14b3f 100644 (file)
--- a/src/mdlib/tables.c
+++ b/src/mdlib/tables.c
@@ -662,7 +662,6 @@ static void fill_table(t_tabledata *td,int tp,const t_forcerec *fr)
    /* Temporary parameters */
    gmx_bool bSwitch,bShift;
    double ewc=fr->ewaldcoeff;
-  double isp= 0.564189583547756;
     
    bSwitch = ((tp == etabLJ6Switch) || (tp == etabLJ12Switch) || 
              (tp == etabCOULSwitch) ||
@@ -811,13 +810,13 @@ static void fill_table(t_tabledata *td,int tp,const t_forcerec *fr)
      case etabEwald:
      case etabEwaldSwitch:
        Vtab  = gmx_erfc(ewc*r)/r;
-      Ftab  = gmx_erfc(ewc*r)/r2+2*exp(-(ewc*ewc*r2))*ewc*isp/r;
+      Ftab  = gmx_erfc(ewc*r)/r2+exp(-(ewc*ewc*r2))*ewc*M_2_SQRTPI/r;
        break;
      case etabEwaldUser:
      case etabEwaldUserSwitch:
        /* Only calculate minus the reciprocal space contribution */
        Vtab  = -gmx_erf(ewc*r)/r;
-      Ftab  = -gmx_erf(ewc*r)/r2+2*exp(-(ewc*ewc*r2))*ewc*isp/r;
+      Ftab  = -gmx_erf(ewc*r)/r2+exp(-(ewc*ewc*r2))*ewc*M_2_SQRTPI/r;
        break;
      case etabRF:
      case etabRF_ZERO:
diff --git a/src/tools/expfit.c b/src/tools/expfit.c

index 605b297617ba670f67357c4661e4ebbadf38e8a1..331980151ca7b9f145116ff724585fe82249180f 100644 (file)
--- a/src/tools/expfit.c
+++ b/src/tools/expfit.c
@@ -95,7 +95,7 @@ real derf;
   erfarg=(x-a[3])/(a[4]*a[4]);
          erfarg2=erfarg*erfarg;
          erfval=gmx_erf(erfarg)/2;
-        derf=(2.0/sqrt(M_PI))*(a[1]-a[2])/2*exp(-erfarg2)/(a[4]*a[4]);
+        derf=M_2_SQRTPI*(a[1]-a[2])/2*exp(-erfarg2)/(a[4]*a[4]);
          *y=(a[1]+a[2])/2-(a[1]-a[2])*erfval;
          dyda[1]=1/2-erfval;
          dyda[2]=1/2+erfval;
author	Mark Abraham <mark.j.abraham@gmail.com>
	Thu, 22 Nov 2012 14:48:29 +0000 (15:48 +0100)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Mon, 26 Nov 2012 14:24:28 +0000 (15:24 +0100)
include/maths.h		patch \| blob \| history
src/contrib/gen_table.c		patch \| blob \| history
src/gmxlib/ewald_util.c		patch \| blob \| history
src/gmxlib/nonbonded/nb_free_energy.c		patch \| blob \| history
src/kernel/calc_verletbuf.c		patch \| blob \| history
src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh		patch \| blob \| history
src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_legacy.cuh		patch \| blob \| history
src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh		patch \| blob \| history
src/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c		patch \| blob \| history
src/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_outer.h		patch \| blob \| history
src/mdlib/tables.c		patch \| blob \| history
src/tools/expfit.c		patch \| blob \| history