From 2fed6b4c53a5b48fe748d93ab2960d9e77ecc42a Mon Sep 17 00:00:00 2001 From: Roland Schulz Date: Tue, 2 Oct 2018 17:08:14 -0700 Subject: [PATCH] Fix OCL compiler warnings Related #2661 Change-Id: I715553a40d2d913bd484f455d8e39e46632a8372 --- .../mdlib/nbnxn_ocl/nbnxn_ocl_kernel.clh | 34 +++--- .../nbnxn_ocl/nbnxn_ocl_kernel_pruneonly.clh | 3 +- .../nbnxn_ocl/nbnxn_ocl_kernel_utils.clh | 113 +++++++++--------- .../mdlib/nbnxn_ocl/nbnxn_ocl_kernels.cl | 2 - 4 files changed, 77 insertions(+), 75 deletions(-) diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel.clh b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel.clh index aba1c026bf..c19fcf8dcd 100644 --- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel.clh +++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel.clh @@ -110,24 +110,24 @@ __kernel void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_opencl) #endif ( #ifndef LJ_COMB - int ntypes, /* IN */ -#endif - cl_nbparam_params_t nbparam_params, /* IN */ - const __global float4 *restrict xq, /* IN */ - __global float *restrict f, /* OUT stores float3 values */ - __global float *restrict e_lj, /* OUT */ - __global float *restrict e_el, /* OUT */ - __global float *restrict fshift, /* OUT stores float3 values */ + int ntypes, /* IN */ +#endif + cl_nbparam_params_t nbparam_params, /* IN */ + const __global float4 *restrict xq, /* IN */ + __global float *restrict f, /* OUT stores float3 values */ + __global float *restrict gmx_unused e_lj, /* OUT */ + __global float *restrict gmx_unused e_el, /* OUT */ + __global float *restrict fshift, /* OUT stores float3 values */ #ifdef LJ_COMB - const __global float2 *restrict lj_comb, /* IN stores float2 values */ + const __global float2 *restrict lj_comb, /* IN stores float2 values */ #else - const __global int *restrict atom_types, /* IN */ + const __global int *restrict atom_types, /* IN */ #endif - const __global float *restrict shift_vec, /* IN stores float3 values */ - __constant float* nbfp_climg2d, /* IN */ - __constant float* nbfp_comb_climg2d, /* IN */ - __constant float* coulomb_tab_climg2d, /* IN */ - const __global nbnxn_sci_t* pl_sci, /* IN */ + const __global float *restrict shift_vec, /* IN stores float3 values */ + __constant float* gmx_unused nbfp_climg2d, /* IN */ + __constant float* gmx_unused nbfp_comb_climg2d, /* IN */ + __constant float* gmx_unused coulomb_tab_climg2d, /* IN */ + const __global nbnxn_sci_t* pl_sci, /* IN */ #ifndef PRUNE_NBL const #endif @@ -178,12 +178,12 @@ __kernel void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_opencl) const unsigned superClInteractionMask = ((1U << NCL_PER_SUPERCL) - 1U); #define LOCAL_OFFSET (xqib + NCL_PER_SUPERCL * CL_SIZE) - CjType cjs; + CjType cjs = 0; #if USE_CJ_PREFETCH /* shmem buffer for cj, for both warps separately */ cjs = (__local int *)(LOCAL_OFFSET); #undef LOCAL_OFFSET - #define LOCAL_OFFSET cjs + 2 * c_nbnxnGpuJgroupSize + #define LOCAL_OFFSET (cjs + 2 * c_nbnxnGpuJgroupSize) #endif //USE_CJ_PREFETCH #ifdef IATYPE_SHMEM diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_pruneonly.clh b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_pruneonly.clh index ce4b885b2f..14d993c66b 100644 --- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_pruneonly.clh +++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_pruneonly.clh @@ -106,14 +106,13 @@ __kernel void nbnxn_kernel_prune_rolling_opencl // TODO pass this value at compile-time as a macro const int c_nbnxnGpuClusterpairSplit = 2; - const int c_splitClSize = c_clSize/c_nbnxnGpuClusterpairSplit; /*! i-cluster interaction mask for a super-cluster with all c_numClPerSupercl=8 bits set */ const unsigned superClInteractionMask = ((1U << c_numClPerSupercl) - 1U); #define LOCAL_OFFSET (xib + c_numClPerSupercl * c_clSize) /* shmem buffer for i cj pre-loading */ - CjType cjs; + CjType cjs = 0; #if USE_CJ_PREFETCH cjs = (((__local int *)(LOCAL_OFFSET)) + tidxz * c_nbnxnGpuClusterpairSplit * c_nbnxnGpuJgroupSize); #undef LOCAL_OFFSET diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_utils.clh b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_utils.clh index be24e7906e..753e0b897f 100644 --- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_utils.clh +++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_utils.clh @@ -33,6 +33,8 @@ * the research papers on the package. Check out http://www.gromacs.org. */ +#define GMX_DOUBLE 0 + #include "gromacs/gpu_utils/vectype_ops.clh" #include "gromacs/gpu_utils/device_utils.clh" #include "gromacs/mdlib/nbnxn_consts.h" @@ -103,6 +105,13 @@ #define ONE_TWELVETH_F 0.08333333f +#ifdef __GNUC__ +/* GCC, clang, and some ICC pretending to be GCC */ +# define gmx_unused __attribute__ ((unused)) +#else +# define gmx_unused +#endif + // Data structures shared between OpenCL device code and OpenCL host code // TODO: review, improve // Replaced real by float for now, to avoid including any other header @@ -188,8 +197,7 @@ void preloadCj4Generic(__local int *sm_cjPreload, const __global int *gm_cj, int tidxi, int tidxj, - bool iMaskCond) - + bool gmx_unused iMaskCond) { /* Pre-load cj into shared memory */ #if defined _AMD_SOURCE_ //TODO: fix by setting c_nbnxnGpuClusterpairSplit properly @@ -201,7 +209,6 @@ void preloadCj4Generic(__local int *sm_cjPreload, const int c_clSize = CL_SIZE; const int c_nbnxnGpuClusterpairSplit = 2; const int c_splitClSize = c_clSize/c_nbnxnGpuClusterpairSplit; - if ((tidxj == 0 | tidxj == c_splitClSize) & (tidxi < c_nbnxnGpuJgroupSize)) { sm_cjPreload[tidxi + tidxj * c_nbnxnGpuJgroupSize/c_splitClSize] = gm_cj[tidxi]; @@ -236,11 +243,11 @@ typedef __local int* CjType; * it's ready. This function does not call a barrier. */ gmx_opencl_inline -void preloadCj4(CjType *cjs, - const __global int *gm_cj, - int tidxi, - int tidxj, - bool iMaskCond) +void preloadCj4(CjType gmx_unused *cjs, + const __global int gmx_unused *gm_cj, + int tidxi, + int tidxj, + bool iMaskCond) { #if USE_SUBGROUP_PRELOAD *cjs = preloadCj4Subgroup(gm_cj); @@ -252,10 +259,10 @@ void preloadCj4(CjType *cjs, } gmx_opencl_inline -int loadCjPreload(__local int* sm_cjPreload, - int jm, - int tidxi, - int tidxj) +int loadCjPreload(__local int * sm_cjPreload, + int jm, + int gmx_unused tidxi, + int gmx_unused tidxj) { #if defined _AMD_SOURCE_ int warpLoadOffset = 0; //TODO: fix by setting c_nbnxnGpuClusterpairSplit properly @@ -263,8 +270,7 @@ int loadCjPreload(__local int* sm_cjPreload, const int c_clSize = CL_SIZE; const int c_nbnxnGpuClusterpairSplit = 2; const int c_splitClSize = c_clSize/c_nbnxnGpuClusterpairSplit; - - int warpLoadOffset = (tidxj & c_splitClSize) * c_nbnxnGpuJgroupSize/c_splitClSize; + int warpLoadOffset = (tidxj & c_splitClSize) * c_nbnxnGpuJgroupSize/c_splitClSize; #endif return sm_cjPreload[jm + warpLoadOffset]; } @@ -369,7 +375,7 @@ void calculate_potential_switch_F(cl_nbparam_params_t *nbparam, float inv_r, float r2, float *F_invr, - float *E_lj) + const float *E_lj) { float r, r_switch; float sw, dsw; @@ -430,14 +436,14 @@ void calculate_potential_switch_F_E(cl_nbparam_params_t *nbparam, * geometric combination rule. */ gmx_opencl_inline -void calculate_lj_ewald_comb_geom_F(__constant float * nbfp_comb_climg2d, - int typei, - int typej, - float r2, - float inv_r2, - float lje_coeff2, - float lje_coeff6_6, - float *F_invr) +void calculate_lj_ewald_comb_geom_F(__constant const float *nbfp_comb_climg2d, + int typei, + int typej, + float r2, + float inv_r2, + float lje_coeff2, + float lje_coeff6_6, + float *F_invr) { float c6grid, inv_r6_nm, cr2, expmcr2, poly; @@ -457,17 +463,17 @@ void calculate_lj_ewald_comb_geom_F(__constant float * nbfp_comb_climg2d, * geometric combination rule. */ gmx_opencl_inline -void calculate_lj_ewald_comb_geom_F_E(__constant float *nbfp_comb_climg2d, - cl_nbparam_params_t *nbparam, - int typei, - int typej, - float r2, - float inv_r2, - float lje_coeff2, - float lje_coeff6_6, - float int_bit, - float *F_invr, - float *E_lj) +void calculate_lj_ewald_comb_geom_F_E(__constant const float *nbfp_comb_climg2d, + cl_nbparam_params_t *nbparam, + int typei, + int typej, + float r2, + float inv_r2, + float lje_coeff2, + float lje_coeff6_6, + float int_bit, + float *F_invr, + float *E_lj) { float c6grid, inv_r6_nm, cr2, expmcr2, poly, sh_mask; @@ -493,18 +499,18 @@ void calculate_lj_ewald_comb_geom_F_E(__constant float *nbfp_comb_climg2d, * of this is pretty small and LB on the CPU is anyway very slow. */ gmx_opencl_inline -void calculate_lj_ewald_comb_LB_F_E(__constant float *nbfp_comb_climg2d, - cl_nbparam_params_t *nbparam, - int typei, - int typej, - float r2, - float inv_r2, - float lje_coeff2, - float lje_coeff6_6, - float int_bit, - bool with_E_lj, - float *F_invr, - float *E_lj) +void calculate_lj_ewald_comb_LB_F_E(__constant const float *nbfp_comb_climg2d, + cl_nbparam_params_t *nbparam, + int typei, + int typej, + float r2, + float inv_r2, + float lje_coeff2, + float lje_coeff6_6, + float int_bit, + bool with_E_lj, + float *F_invr, + float *E_lj) { float c6grid, inv_r6_nm, cr2, expmcr2, poly; float sigma, sigma2, epsilon; @@ -540,9 +546,9 @@ void calculate_lj_ewald_comb_LB_F_E(__constant float *nbfp_comb_climg2d, * Original idea: from the OpenMM project */ gmx_opencl_inline float -interpolate_coulomb_force_r(__constant float *coulomb_tab_climg2d, - float r, - float scale) +interpolate_coulomb_force_r(__constant const float *coulomb_tab_climg2d, + float r, + float scale) { float normalized = scale * r; int index = (int) normalized; @@ -835,10 +841,9 @@ void reduce_energy_pow2(volatile __local float *buf, volatile __global float *e_el, unsigned int tidx) { - int i, j; - float e1, e2; + int j; - i = WARP_SIZE/2; + unsigned int i = WARP_SIZE/2; /* Can't just use i as loop variable because than nvcc refuses to unroll. */ for (j = WARP_SIZE_LOG2 - 1; j > 0; j--) @@ -854,8 +859,8 @@ void reduce_energy_pow2(volatile __local float *buf, /* last reduction step, writing to global mem */ if (tidx == 0) { - e1 = buf[ tidx] + buf[ tidx + i]; - e2 = buf[FBUF_STRIDE + tidx] + buf[FBUF_STRIDE + tidx + i]; + float e1 = buf[ tidx] + buf[ tidx + i]; + float e2 = buf[FBUF_STRIDE + tidx] + buf[FBUF_STRIDE + tidx + i]; atomicAdd_g_f(e_lj, e1); atomicAdd_g_f(e_el, e2); diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernels.cl b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernels.cl index 0cdb06128f..72edb2c9c5 100644 --- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernels.cl +++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernels.cl @@ -32,8 +32,6 @@ * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org. */ -#define __IN_OPENCL_KERNEL__ - /* Auxiliary kernels */ __kernel void memset_f3(__global float3 *buf, const float value, const unsigned int Nbuf) -- 2.22.0