Fix OCL compiler warnings
[alexxy/gromacs.git] / src / gromacs / mdlib / nbnxn_ocl / nbnxn_ocl_kernel_utils.clh
index be24e7906ec7df56939bfc74e07ed7ae9e59c42b..753e0b897f44ace00344ea035b38632f8f9fdbb0 100644 (file)
@@ -33,6 +33,8 @@
  * the research papers on the package. Check out http://www.gromacs.org.
  */
 
+#define GMX_DOUBLE 0
+
 #include "gromacs/gpu_utils/vectype_ops.clh"
 #include "gromacs/gpu_utils/device_utils.clh"
 #include "gromacs/mdlib/nbnxn_consts.h"
 #define ONE_TWELVETH_F  0.08333333f
 
 
+#ifdef __GNUC__
+/* GCC, clang, and some ICC pretending to be GCC */
+#  define gmx_unused __attribute__ ((unused))
+#else
+#  define gmx_unused
+#endif
+
 // Data structures shared between OpenCL device code and OpenCL host code
 // TODO: review, improve
 // Replaced real by float for now, to avoid including any other header
@@ -188,8 +197,7 @@ void preloadCj4Generic(__local int        *sm_cjPreload,
                        const __global int *gm_cj,
                        int                 tidxi,
                        int                 tidxj,
-                       bool                iMaskCond)
-
+                       bool gmx_unused     iMaskCond)
 {
     /* Pre-load cj into shared memory */
 #if defined _AMD_SOURCE_ //TODO: fix by setting c_nbnxnGpuClusterpairSplit properly
@@ -201,7 +209,6 @@ void preloadCj4Generic(__local int        *sm_cjPreload,
     const int c_clSize                   = CL_SIZE;
     const int c_nbnxnGpuClusterpairSplit = 2;
     const int c_splitClSize              = c_clSize/c_nbnxnGpuClusterpairSplit;
-
     if ((tidxj == 0 | tidxj == c_splitClSize) & (tidxi < c_nbnxnGpuJgroupSize))
     {
         sm_cjPreload[tidxi + tidxj * c_nbnxnGpuJgroupSize/c_splitClSize] = gm_cj[tidxi];
@@ -236,11 +243,11 @@ typedef __local int* CjType;
  * it's ready. This function does not call a barrier.
  */
 gmx_opencl_inline
-void preloadCj4(CjType             *cjs,
-                const __global int *gm_cj,
-                int                 tidxi,
-                int                 tidxj,
-                bool                iMaskCond)
+void preloadCj4(CjType gmx_unused             *cjs,
+                const __global int gmx_unused *gm_cj,
+                int                            tidxi,
+                int                            tidxj,
+                bool                           iMaskCond)
 {
 #if USE_SUBGROUP_PRELOAD
     *cjs = preloadCj4Subgroup(gm_cj);
@@ -252,10 +259,10 @@ void preloadCj4(CjType             *cjs,
 }
 
 gmx_opencl_inline
-int loadCjPreload(__local int*        sm_cjPreload,
-                  int                 jm,
-                  int                 tidxi,
-                  int                 tidxj)
+int loadCjPreload(__local int           *        sm_cjPreload,
+                  int                            jm,
+                  int gmx_unused                 tidxi,
+                  int gmx_unused                 tidxj)
 {
 #if defined _AMD_SOURCE_
     int       warpLoadOffset = 0; //TODO: fix by setting c_nbnxnGpuClusterpairSplit properly
@@ -263,8 +270,7 @@ int loadCjPreload(__local int*        sm_cjPreload,
     const int c_clSize                   = CL_SIZE;
     const int c_nbnxnGpuClusterpairSplit = 2;
     const int c_splitClSize              = c_clSize/c_nbnxnGpuClusterpairSplit;
-
-    int       warpLoadOffset = (tidxj & c_splitClSize) * c_nbnxnGpuJgroupSize/c_splitClSize;
+    int       warpLoadOffset             = (tidxj & c_splitClSize) * c_nbnxnGpuJgroupSize/c_splitClSize;
 #endif
     return sm_cjPreload[jm + warpLoadOffset];
 }
@@ -369,7 +375,7 @@ void calculate_potential_switch_F(cl_nbparam_params_t *nbparam,
                                   float                inv_r,
                                   float                r2,
                                   float               *F_invr,
-                                  float               *E_lj)
+                                  const float         *E_lj)
 {
     float r, r_switch;
     float sw, dsw;
@@ -430,14 +436,14 @@ void calculate_potential_switch_F_E(cl_nbparam_params_t *nbparam,
  *  geometric combination rule.
  */
 gmx_opencl_inline
-void calculate_lj_ewald_comb_geom_F(__constant float * nbfp_comb_climg2d,
-                                    int                typei,
-                                    int                typej,
-                                    float              r2,
-                                    float              inv_r2,
-                                    float              lje_coeff2,
-                                    float              lje_coeff6_6,
-                                    float             *F_invr)
+void calculate_lj_ewald_comb_geom_F(__constant const float *nbfp_comb_climg2d,
+                                    int                     typei,
+                                    int                     typej,
+                                    float                   r2,
+                                    float                   inv_r2,
+                                    float                   lje_coeff2,
+                                    float                   lje_coeff6_6,
+                                    float                  *F_invr)
 {
     float c6grid, inv_r6_nm, cr2, expmcr2, poly;
 
@@ -457,17 +463,17 @@ void calculate_lj_ewald_comb_geom_F(__constant float * nbfp_comb_climg2d,
  *  geometric combination rule.
  */
 gmx_opencl_inline
-void calculate_lj_ewald_comb_geom_F_E(__constant float    *nbfp_comb_climg2d,
-                                      cl_nbparam_params_t *nbparam,
-                                      int                  typei,
-                                      int                  typej,
-                                      float                r2,
-                                      float                inv_r2,
-                                      float                lje_coeff2,
-                                      float                lje_coeff6_6,
-                                      float                int_bit,
-                                      float               *F_invr,
-                                      float               *E_lj)
+void calculate_lj_ewald_comb_geom_F_E(__constant const float *nbfp_comb_climg2d,
+                                      cl_nbparam_params_t    *nbparam,
+                                      int                     typei,
+                                      int                     typej,
+                                      float                   r2,
+                                      float                   inv_r2,
+                                      float                   lje_coeff2,
+                                      float                   lje_coeff6_6,
+                                      float                   int_bit,
+                                      float                  *F_invr,
+                                      float                  *E_lj)
 {
     float c6grid, inv_r6_nm, cr2, expmcr2, poly, sh_mask;
 
@@ -493,18 +499,18 @@ void calculate_lj_ewald_comb_geom_F_E(__constant float    *nbfp_comb_climg2d,
  *  of this is pretty small and LB on the CPU is anyway very slow.
  */
 gmx_opencl_inline
-void calculate_lj_ewald_comb_LB_F_E(__constant float    *nbfp_comb_climg2d,
-                                    cl_nbparam_params_t *nbparam,
-                                    int                  typei,
-                                    int                  typej,
-                                    float                r2,
-                                    float                inv_r2,
-                                    float                lje_coeff2,
-                                    float                lje_coeff6_6,
-                                    float                int_bit,
-                                    bool                 with_E_lj,
-                                    float               *F_invr,
-                                    float               *E_lj)
+void calculate_lj_ewald_comb_LB_F_E(__constant const float *nbfp_comb_climg2d,
+                                    cl_nbparam_params_t    *nbparam,
+                                    int                     typei,
+                                    int                     typej,
+                                    float                   r2,
+                                    float                   inv_r2,
+                                    float                   lje_coeff2,
+                                    float                   lje_coeff6_6,
+                                    float                   int_bit,
+                                    bool                    with_E_lj,
+                                    float                  *F_invr,
+                                    float                  *E_lj)
 {
     float c6grid, inv_r6_nm, cr2, expmcr2, poly;
     float sigma, sigma2, epsilon;
@@ -540,9 +546,9 @@ void calculate_lj_ewald_comb_LB_F_E(__constant float    *nbfp_comb_climg2d,
  *  Original idea: from the OpenMM project
  */
 gmx_opencl_inline float
-interpolate_coulomb_force_r(__constant float *coulomb_tab_climg2d,
-                            float             r,
-                            float             scale)
+interpolate_coulomb_force_r(__constant const float *coulomb_tab_climg2d,
+                            float                   r,
+                            float                   scale)
 {
     float   normalized = scale * r;
     int     index      = (int) normalized;
@@ -835,10 +841,9 @@ void reduce_energy_pow2(volatile __local float  *buf,
                         volatile __global float *e_el,
                         unsigned int             tidx)
 {
-    int     i, j;
-    float   e1, e2;
+    int          j;
 
-    i = WARP_SIZE/2;
+    unsigned int i = WARP_SIZE/2;
 
     /* Can't just use i as loop variable because than nvcc refuses to unroll. */
     for (j = WARP_SIZE_LOG2 - 1; j > 0; j--)
@@ -854,8 +859,8 @@ void reduce_energy_pow2(volatile __local float  *buf,
     /* last reduction step, writing to global mem */
     if (tidx == 0)
     {
-        e1 = buf[              tidx] + buf[              tidx + i];
-        e2 = buf[FBUF_STRIDE + tidx] + buf[FBUF_STRIDE + tidx + i];
+        float e1 = buf[              tidx] + buf[              tidx + i];
+        float e2 = buf[FBUF_STRIDE + tidx] + buf[FBUF_STRIDE + tidx + i];
 
         atomicAdd_g_f(e_lj, e1);
         atomicAdd_g_f(e_el, e2);