Fix OCL compiler warnings

[alexxy/gromacs.git] / src / gromacs / mdlib / nbnxn_ocl / nbnxn_ocl_kernel_utils.clh
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_utils.clh b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_utils.clh

index be24e7906ec7df56939bfc74e07ed7ae9e59c42b..753e0b897f44ace00344ea035b38632f8f9fdbb0 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_utils.clh
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_utils.clh
@@ -33,6 +33,8 @@
   * the research papers on the package. Check out http://www.gromacs.org.
   */
  
+#define GMX_DOUBLE 0
+
  #include "gromacs/gpu_utils/vectype_ops.clh"
  #include "gromacs/gpu_utils/device_utils.clh"
  #include "gromacs/mdlib/nbnxn_consts.h"
@@ -103,6 +105,13 @@
  #define ONE_TWELVETH_F  0.08333333f
  
  
+#ifdef __GNUC__
+/* GCC, clang, and some ICC pretending to be GCC */
+#  define gmx_unused __attribute__ ((unused))
+#else
+#  define gmx_unused
+#endif
+
  // Data structures shared between OpenCL device code and OpenCL host code
  // TODO: review, improve
  // Replaced real by float for now, to avoid including any other header
@@ -188,8 +197,7 @@ void preloadCj4Generic(__local int        *sm_cjPreload,
                         const __global int *gm_cj,
                         int                 tidxi,
                         int                 tidxj,
-                       bool                iMaskCond)
-
+                       bool gmx_unused     iMaskCond)
  {
      /* Pre-load cj into shared memory */
  #if defined _AMD_SOURCE_ //TODO: fix by setting c_nbnxnGpuClusterpairSplit properly
@@ -201,7 +209,6 @@ void preloadCj4Generic(__local int        *sm_cjPreload,
      const int c_clSize                   = CL_SIZE;
      const int c_nbnxnGpuClusterpairSplit = 2;
      const int c_splitClSize              = c_clSize/c_nbnxnGpuClusterpairSplit;
-
      if ((tidxj == 0 | tidxj == c_splitClSize) & (tidxi < c_nbnxnGpuJgroupSize))
      {
          sm_cjPreload[tidxi + tidxj * c_nbnxnGpuJgroupSize/c_splitClSize] = gm_cj[tidxi];
@@ -236,11 +243,11 @@ typedef __local int* CjType;
   * it's ready. This function does not call a barrier.
   */
  gmx_opencl_inline
-void preloadCj4(CjType             *cjs,
-                const __global int *gm_cj,
-                int                 tidxi,
-                int                 tidxj,
-                bool                iMaskCond)
+void preloadCj4(CjType gmx_unused             *cjs,
+                const __global int gmx_unused *gm_cj,
+                int                            tidxi,
+                int                            tidxj,
+                bool                           iMaskCond)
  {
  #if USE_SUBGROUP_PRELOAD
      *cjs = preloadCj4Subgroup(gm_cj);
@@ -252,10 +259,10 @@ void preloadCj4(CjType             *cjs,
  }
  
  gmx_opencl_inline
-int loadCjPreload(__local int*        sm_cjPreload,
-                  int                 jm,
-                  int                 tidxi,
-                  int                 tidxj)
+int loadCjPreload(__local int           *        sm_cjPreload,
+                  int                            jm,
+                  int gmx_unused                 tidxi,
+                  int gmx_unused                 tidxj)
  {
  #if defined _AMD_SOURCE_
      int       warpLoadOffset = 0; //TODO: fix by setting c_nbnxnGpuClusterpairSplit properly
@@ -263,8 +270,7 @@ int loadCjPreload(__local int*        sm_cjPreload,
      const int c_clSize                   = CL_SIZE;
      const int c_nbnxnGpuClusterpairSplit = 2;
      const int c_splitClSize              = c_clSize/c_nbnxnGpuClusterpairSplit;
-
-    int       warpLoadOffset = (tidxj & c_splitClSize) * c_nbnxnGpuJgroupSize/c_splitClSize;
+    int       warpLoadOffset             = (tidxj & c_splitClSize) * c_nbnxnGpuJgroupSize/c_splitClSize;
  #endif
      return sm_cjPreload[jm + warpLoadOffset];
  }
@@ -369,7 +375,7 @@ void calculate_potential_switch_F(cl_nbparam_params_t *nbparam,
                                    float                inv_r,
                                    float                r2,
                                    float               *F_invr,
-                                  float               *E_lj)
+                                  const float         *E_lj)
  {
      float r, r_switch;
      float sw, dsw;
@@ -430,14 +436,14 @@ void calculate_potential_switch_F_E(cl_nbparam_params_t *nbparam,
   *  geometric combination rule.
   */
  gmx_opencl_inline
-void calculate_lj_ewald_comb_geom_F(__constant float * nbfp_comb_climg2d,
-                                    int                typei,
-                                    int                typej,
-                                    float              r2,
-                                    float              inv_r2,
-                                    float              lje_coeff2,
-                                    float              lje_coeff6_6,
-                                    float             *F_invr)
+void calculate_lj_ewald_comb_geom_F(__constant const float *nbfp_comb_climg2d,
+                                    int                     typei,
+                                    int                     typej,
+                                    float                   r2,
+                                    float                   inv_r2,
+                                    float                   lje_coeff2,
+                                    float                   lje_coeff6_6,
+                                    float                  *F_invr)
  {
      float c6grid, inv_r6_nm, cr2, expmcr2, poly;
  
@@ -457,17 +463,17 @@ void calculate_lj_ewald_comb_geom_F(__constant float * nbfp_comb_climg2d,
   *  geometric combination rule.
   */
  gmx_opencl_inline
-void calculate_lj_ewald_comb_geom_F_E(__constant float    *nbfp_comb_climg2d,
-                                      cl_nbparam_params_t *nbparam,
-                                      int                  typei,
-                                      int                  typej,
-                                      float                r2,
-                                      float                inv_r2,
-                                      float                lje_coeff2,
-                                      float                lje_coeff6_6,
-                                      float                int_bit,
-                                      float               *F_invr,
-                                      float               *E_lj)
+void calculate_lj_ewald_comb_geom_F_E(__constant const float *nbfp_comb_climg2d,
+                                      cl_nbparam_params_t    *nbparam,
+                                      int                     typei,
+                                      int                     typej,
+                                      float                   r2,
+                                      float                   inv_r2,
+                                      float                   lje_coeff2,
+                                      float                   lje_coeff6_6,
+                                      float                   int_bit,
+                                      float                  *F_invr,
+                                      float                  *E_lj)
  {
      float c6grid, inv_r6_nm, cr2, expmcr2, poly, sh_mask;
  
@@ -493,18 +499,18 @@ void calculate_lj_ewald_comb_geom_F_E(__constant float    *nbfp_comb_climg2d,
   *  of this is pretty small and LB on the CPU is anyway very slow.
   */
  gmx_opencl_inline
-void calculate_lj_ewald_comb_LB_F_E(__constant float    *nbfp_comb_climg2d,
-                                    cl_nbparam_params_t *nbparam,
-                                    int                  typei,
-                                    int                  typej,
-                                    float                r2,
-                                    float                inv_r2,
-                                    float                lje_coeff2,
-                                    float                lje_coeff6_6,
-                                    float                int_bit,
-                                    bool                 with_E_lj,
-                                    float               *F_invr,
-                                    float               *E_lj)
+void calculate_lj_ewald_comb_LB_F_E(__constant const float *nbfp_comb_climg2d,
+                                    cl_nbparam_params_t    *nbparam,
+                                    int                     typei,
+                                    int                     typej,
+                                    float                   r2,
+                                    float                   inv_r2,
+                                    float                   lje_coeff2,
+                                    float                   lje_coeff6_6,
+                                    float                   int_bit,
+                                    bool                    with_E_lj,
+                                    float                  *F_invr,
+                                    float                  *E_lj)
  {
      float c6grid, inv_r6_nm, cr2, expmcr2, poly;
      float sigma, sigma2, epsilon;
@@ -540,9 +546,9 @@ void calculate_lj_ewald_comb_LB_F_E(__constant float    *nbfp_comb_climg2d,
   *  Original idea: from the OpenMM project
   */
  gmx_opencl_inline float
-interpolate_coulomb_force_r(__constant float *coulomb_tab_climg2d,
-                            float             r,
-                            float             scale)
+interpolate_coulomb_force_r(__constant const float *coulomb_tab_climg2d,
+                            float                   r,
+                            float                   scale)
  {
      float   normalized = scale * r;
      int     index      = (int) normalized;
@@ -835,10 +841,9 @@ void reduce_energy_pow2(volatile __local float  *buf,
                          volatile __global float *e_el,
                          unsigned int             tidx)
  {
-    int     i, j;
-    float   e1, e2;
+    int          j;
  
-    i = WARP_SIZE/2;
+    unsigned int i = WARP_SIZE/2;
  
      /* Can't just use i as loop variable because than nvcc refuses to unroll. */
      for (j = WARP_SIZE_LOG2 - 1; j > 0; j--)
@@ -854,8 +859,8 @@ void reduce_energy_pow2(volatile __local float  *buf,
      /* last reduction step, writing to global mem */
      if (tidx == 0)
      {
-        e1 = buf[              tidx] + buf[              tidx + i];
-        e2 = buf[FBUF_STRIDE + tidx] + buf[FBUF_STRIDE + tidx + i];
+        float e1 = buf[              tidx] + buf[              tidx + i];
+        float e2 = buf[FBUF_STRIDE + tidx] + buf[FBUF_STRIDE + tidx + i];
  
          atomicAdd_g_f(e_lj, e1);
          atomicAdd_g_f(e_el, e2);