Fix OCL compiler warnings

author Roland Schulz <roland.schulz@intel.com>

Wed, 3 Oct 2018 00:08:14 +0000 (17:08 -0700)

committer Szilárd Páll <pall.szilard@gmail.com>

Mon, 8 Oct 2018 16:06:25 +0000 (18:06 +0200)
author Roland Schulz <roland.schulz@intel.com>
Wed, 3 Oct 2018 00:08:14 +0000 (17:08 -0700)
committer Szilárd Páll <pall.szilard@gmail.com>
Mon, 8 Oct 2018 16:06:25 +0000 (18:06 +0200)
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel.clh b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel.clh

index aba1c026bf278c13aa1e513e43824245e42a1181..c19fcf8dcd0cd7bb194ba7fb69a860f97bbd0a91 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel.clh
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel.clh
@@ -110,24 +110,24 @@ __kernel void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_opencl)
  #endif
  (
  #ifndef LJ_COMB
-    int ntypes,                               /* IN  */
-#endif
-    cl_nbparam_params_t nbparam_params,       /* IN  */
-    const __global float4 *restrict xq,       /* IN  */
-    __global float *restrict f,               /* OUT stores float3 values */
-    __global float *restrict e_lj,            /* OUT */
-    __global float *restrict e_el,            /* OUT */
-    __global float *restrict fshift,          /* OUT stores float3 values */
+    int ntypes,                                       /* IN  */
+#endif
+    cl_nbparam_params_t nbparam_params,               /* IN  */
+    const __global float4 *restrict xq,               /* IN  */
+    __global float *restrict f,                       /* OUT stores float3 values */
+    __global float *restrict gmx_unused e_lj,         /* OUT */
+    __global float *restrict gmx_unused e_el,         /* OUT */
+    __global float *restrict fshift,                  /* OUT stores float3 values */
  #ifdef LJ_COMB
-    const __global float2 *restrict lj_comb,  /* IN stores float2 values */
+    const __global float2 *restrict lj_comb,          /* IN stores float2 values */
  #else
-    const __global int *restrict atom_types,  /* IN  */
+    const __global int *restrict atom_types,          /* IN  */
  #endif
-    const __global float *restrict shift_vec, /* IN stores float3 values */
-    __constant float* nbfp_climg2d,           /* IN  */
-    __constant float* nbfp_comb_climg2d,      /* IN  */
-    __constant float* coulomb_tab_climg2d,    /* IN  */
-    const __global nbnxn_sci_t* pl_sci,       /* IN  */
+    const __global float *restrict shift_vec,         /* IN stores float3 values */
+    __constant float* gmx_unused nbfp_climg2d,        /* IN  */
+    __constant float* gmx_unused nbfp_comb_climg2d,   /* IN  */
+    __constant float* gmx_unused coulomb_tab_climg2d, /* IN  */
+    const __global nbnxn_sci_t* pl_sci,               /* IN  */
  #ifndef PRUNE_NBL
      const
  #endif
@@ -178,12 +178,12 @@ __kernel void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_opencl)
      const unsigned superClInteractionMask = ((1U << NCL_PER_SUPERCL) - 1U);
  
  #define LOCAL_OFFSET (xqib + NCL_PER_SUPERCL * CL_SIZE)
-    CjType cjs;
+    CjType cjs = 0;
  #if USE_CJ_PREFETCH
      /* shmem buffer for cj, for both warps separately */
      cjs = (__local int *)(LOCAL_OFFSET);
      #undef LOCAL_OFFSET
-    #define LOCAL_OFFSET cjs + 2 * c_nbnxnGpuJgroupSize
+    #define LOCAL_OFFSET (cjs + 2 * c_nbnxnGpuJgroupSize)
  #endif //USE_CJ_PREFETCH
  
  #ifdef IATYPE_SHMEM
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_pruneonly.clh b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_pruneonly.clh

index ce4b885b2f79a186afc69af8f04c599809152672..14d993c66b84e8847798afb0a2dcf4d5b38543e4 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_pruneonly.clh
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_pruneonly.clh
@@ -106,14 +106,13 @@ __kernel void nbnxn_kernel_prune_rolling_opencl
  
      // TODO pass this value at compile-time as a macro
      const int c_nbnxnGpuClusterpairSplit = 2;
-    const int c_splitClSize              = c_clSize/c_nbnxnGpuClusterpairSplit;
  
      /*! i-cluster interaction mask for a super-cluster with all c_numClPerSupercl=8 bits set */
      const unsigned superClInteractionMask = ((1U << c_numClPerSupercl) - 1U);
  
      #define LOCAL_OFFSET (xib + c_numClPerSupercl * c_clSize)
      /* shmem buffer for i cj pre-loading */
-    CjType cjs;
+    CjType cjs = 0;
  #if USE_CJ_PREFETCH
      cjs = (((__local int *)(LOCAL_OFFSET)) + tidxz * c_nbnxnGpuClusterpairSplit * c_nbnxnGpuJgroupSize);
      #undef LOCAL_OFFSET
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_utils.clh b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_utils.clh

index be24e7906ec7df56939bfc74e07ed7ae9e59c42b..753e0b897f44ace00344ea035b38632f8f9fdbb0 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_utils.clh
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_utils.clh
@@ -33,6 +33,8 @@
   * the research papers on the package. Check out http://www.gromacs.org.
   */
  
+#define GMX_DOUBLE 0
+
  #include "gromacs/gpu_utils/vectype_ops.clh"
  #include "gromacs/gpu_utils/device_utils.clh"
  #include "gromacs/mdlib/nbnxn_consts.h"
@@ -103,6 +105,13 @@
  #define ONE_TWELVETH_F  0.08333333f
  
  
+#ifdef __GNUC__
+/* GCC, clang, and some ICC pretending to be GCC */
+#  define gmx_unused __attribute__ ((unused))
+#else
+#  define gmx_unused
+#endif
+
  // Data structures shared between OpenCL device code and OpenCL host code
  // TODO: review, improve
  // Replaced real by float for now, to avoid including any other header
@@ -188,8 +197,7 @@ void preloadCj4Generic(__local int        *sm_cjPreload,
                         const __global int *gm_cj,
                         int                 tidxi,
                         int                 tidxj,
-                       bool                iMaskCond)
-
+                       bool gmx_unused     iMaskCond)
  {
      /* Pre-load cj into shared memory */
  #if defined _AMD_SOURCE_ //TODO: fix by setting c_nbnxnGpuClusterpairSplit properly
@@ -201,7 +209,6 @@ void preloadCj4Generic(__local int        *sm_cjPreload,
      const int c_clSize                   = CL_SIZE;
      const int c_nbnxnGpuClusterpairSplit = 2;
      const int c_splitClSize              = c_clSize/c_nbnxnGpuClusterpairSplit;
-
      if ((tidxj == 0 | tidxj == c_splitClSize) & (tidxi < c_nbnxnGpuJgroupSize))
      {
          sm_cjPreload[tidxi + tidxj * c_nbnxnGpuJgroupSize/c_splitClSize] = gm_cj[tidxi];
@@ -236,11 +243,11 @@ typedef __local int* CjType;
   * it's ready. This function does not call a barrier.
   */
  gmx_opencl_inline
-void preloadCj4(CjType             *cjs,
-                const __global int *gm_cj,
-                int                 tidxi,
-                int                 tidxj,
-                bool                iMaskCond)
+void preloadCj4(CjType gmx_unused             *cjs,
+                const __global int gmx_unused *gm_cj,
+                int                            tidxi,
+                int                            tidxj,
+                bool                           iMaskCond)
  {
  #if USE_SUBGROUP_PRELOAD
      *cjs = preloadCj4Subgroup(gm_cj);
@@ -252,10 +259,10 @@ void preloadCj4(CjType             *cjs,
  }
  
  gmx_opencl_inline
-int loadCjPreload(__local int*        sm_cjPreload,
-                  int                 jm,
-                  int                 tidxi,
-                  int                 tidxj)
+int loadCjPreload(__local int           *        sm_cjPreload,
+                  int                            jm,
+                  int gmx_unused                 tidxi,
+                  int gmx_unused                 tidxj)
  {
  #if defined _AMD_SOURCE_
      int       warpLoadOffset = 0; //TODO: fix by setting c_nbnxnGpuClusterpairSplit properly
@@ -263,8 +270,7 @@ int loadCjPreload(__local int*        sm_cjPreload,
      const int c_clSize                   = CL_SIZE;
      const int c_nbnxnGpuClusterpairSplit = 2;
      const int c_splitClSize              = c_clSize/c_nbnxnGpuClusterpairSplit;
-
-    int       warpLoadOffset = (tidxj & c_splitClSize) * c_nbnxnGpuJgroupSize/c_splitClSize;
+    int       warpLoadOffset             = (tidxj & c_splitClSize) * c_nbnxnGpuJgroupSize/c_splitClSize;
  #endif
      return sm_cjPreload[jm + warpLoadOffset];
  }
@@ -369,7 +375,7 @@ void calculate_potential_switch_F(cl_nbparam_params_t *nbparam,
                                    float                inv_r,
                                    float                r2,
                                    float               *F_invr,
-                                  float               *E_lj)
+                                  const float         *E_lj)
  {
      float r, r_switch;
      float sw, dsw;
@@ -430,14 +436,14 @@ void calculate_potential_switch_F_E(cl_nbparam_params_t *nbparam,
   *  geometric combination rule.
   */
  gmx_opencl_inline
-void calculate_lj_ewald_comb_geom_F(__constant float * nbfp_comb_climg2d,
-                                    int                typei,
-                                    int                typej,
-                                    float              r2,
-                                    float              inv_r2,
-                                    float              lje_coeff2,
-                                    float              lje_coeff6_6,
-                                    float             *F_invr)
+void calculate_lj_ewald_comb_geom_F(__constant const float *nbfp_comb_climg2d,
+                                    int                     typei,
+                                    int                     typej,
+                                    float                   r2,
+                                    float                   inv_r2,
+                                    float                   lje_coeff2,
+                                    float                   lje_coeff6_6,
+                                    float                  *F_invr)
  {
      float c6grid, inv_r6_nm, cr2, expmcr2, poly;
  
@@ -457,17 +463,17 @@ void calculate_lj_ewald_comb_geom_F(__constant float * nbfp_comb_climg2d,
   *  geometric combination rule.
   */
  gmx_opencl_inline
-void calculate_lj_ewald_comb_geom_F_E(__constant float    *nbfp_comb_climg2d,
-                                      cl_nbparam_params_t *nbparam,
-                                      int                  typei,
-                                      int                  typej,
-                                      float                r2,
-                                      float                inv_r2,
-                                      float                lje_coeff2,
-                                      float                lje_coeff6_6,
-                                      float                int_bit,
-                                      float               *F_invr,
-                                      float               *E_lj)
+void calculate_lj_ewald_comb_geom_F_E(__constant const float *nbfp_comb_climg2d,
+                                      cl_nbparam_params_t    *nbparam,
+                                      int                     typei,
+                                      int                     typej,
+                                      float                   r2,
+                                      float                   inv_r2,
+                                      float                   lje_coeff2,
+                                      float                   lje_coeff6_6,
+                                      float                   int_bit,
+                                      float                  *F_invr,
+                                      float                  *E_lj)
  {
      float c6grid, inv_r6_nm, cr2, expmcr2, poly, sh_mask;
  
@@ -493,18 +499,18 @@ void calculate_lj_ewald_comb_geom_F_E(__constant float    *nbfp_comb_climg2d,
   *  of this is pretty small and LB on the CPU is anyway very slow.
   */
  gmx_opencl_inline
-void calculate_lj_ewald_comb_LB_F_E(__constant float    *nbfp_comb_climg2d,
-                                    cl_nbparam_params_t *nbparam,
-                                    int                  typei,
-                                    int                  typej,
-                                    float                r2,
-                                    float                inv_r2,
-                                    float                lje_coeff2,
-                                    float                lje_coeff6_6,
-                                    float                int_bit,
-                                    bool                 with_E_lj,
-                                    float               *F_invr,
-                                    float               *E_lj)
+void calculate_lj_ewald_comb_LB_F_E(__constant const float *nbfp_comb_climg2d,
+                                    cl_nbparam_params_t    *nbparam,
+                                    int                     typei,
+                                    int                     typej,
+                                    float                   r2,
+                                    float                   inv_r2,
+                                    float                   lje_coeff2,
+                                    float                   lje_coeff6_6,
+                                    float                   int_bit,
+                                    bool                    with_E_lj,
+                                    float                  *F_invr,
+                                    float                  *E_lj)
  {
      float c6grid, inv_r6_nm, cr2, expmcr2, poly;
      float sigma, sigma2, epsilon;
@@ -540,9 +546,9 @@ void calculate_lj_ewald_comb_LB_F_E(__constant float    *nbfp_comb_climg2d,
   *  Original idea: from the OpenMM project
   */
  gmx_opencl_inline float
-interpolate_coulomb_force_r(__constant float *coulomb_tab_climg2d,
-                            float             r,
-                            float             scale)
+interpolate_coulomb_force_r(__constant const float *coulomb_tab_climg2d,
+                            float                   r,
+                            float                   scale)
  {
      float   normalized = scale * r;
      int     index      = (int) normalized;
@@ -835,10 +841,9 @@ void reduce_energy_pow2(volatile __local float  *buf,
                          volatile __global float *e_el,
                          unsigned int             tidx)
  {
-    int     i, j;
-    float   e1, e2;
+    int          j;
  
-    i = WARP_SIZE/2;
+    unsigned int i = WARP_SIZE/2;
  
      /* Can't just use i as loop variable because than nvcc refuses to unroll. */
      for (j = WARP_SIZE_LOG2 - 1; j > 0; j--)
@@ -854,8 +859,8 @@ void reduce_energy_pow2(volatile __local float  *buf,
      /* last reduction step, writing to global mem */
      if (tidx == 0)
      {
-        e1 = buf[              tidx] + buf[              tidx + i];
-        e2 = buf[FBUF_STRIDE + tidx] + buf[FBUF_STRIDE + tidx + i];
+        float e1 = buf[              tidx] + buf[              tidx + i];
+        float e2 = buf[FBUF_STRIDE + tidx] + buf[FBUF_STRIDE + tidx + i];
  
          atomicAdd_g_f(e_lj, e1);
          atomicAdd_g_f(e_el, e2);
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernels.cl b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernels.cl

index 0cdb06128fb3dc117d9a86d2395652fbf4b8a4b4..72edb2c9c50a6d1ba650661387cfa42f80f8957a 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernels.cl
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernels.cl
@@ -32,8 +32,6 @@
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
-#define __IN_OPENCL_KERNEL__
-
  /* Auxiliary kernels */
  __kernel void
  memset_f3(__global float3 *buf, const float value, const unsigned int Nbuf)
author	Roland Schulz <roland.schulz@intel.com>
	Wed, 3 Oct 2018 00:08:14 +0000 (17:08 -0700)
committer	Szilárd Páll <pall.szilard@gmail.com>
	Mon, 8 Oct 2018 16:06:25 +0000 (18:06 +0200)
src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel.clh		patch \| blob \| history
src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_pruneonly.clh		patch \| blob \| history
src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_utils.clh		patch \| blob \| history
src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernels.cl		patch \| blob \| history