From: Szilárd Páll Date: Tue, 27 Oct 2020 14:13:45 +0000 (+0000) Subject: Fix clang-tidy v10 warnigns in OpenCL kernels X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=f73007c915985d63a9778b533edee4bc3ddab3bd;p=alexxy%2Fgromacs.git Fix clang-tidy v10 warnigns in OpenCL kernels Also made convert_sigma_epsilon_to_c6_c12() return a float2 with c6 and c12 instead of returning results in pointers. --- diff --git a/src/gromacs/ewald/pme_gather.clh b/src/gromacs/ewald/pme_gather.clh index 8c7106acf9..98cac0f282 100644 --- a/src/gromacs/ewald/pme_gather.clh +++ b/src/gromacs/ewald/pme_gather.clh @@ -49,8 +49,8 @@ * \author Aleksei Iupinov */ -#include "pme_gpu_types.h" #include "pme_gpu_calculate_splines.clh" +#include "pme_gpu_types.h" #ifndef COMPILE_GATHER_HELPERS_ONCE # define COMPILE_GATHER_HELPERS_ONCE @@ -442,9 +442,9 @@ __kernel void CUSTOMIZED_KERNEL_NAME(pme_gather_kernel)(const struct PmeOpenCLKe if (numGrids == 2) { barrier(CLK_LOCAL_MEM_FENCE); - fx = 0.0f; - fy = 0.0f; - fz = 0.0f; + fx = 0.0F; + fy = 0.0F; + fz = 0.0F; chargeCheck = pme_gpu_check_atom_charge(gm_coefficientsB[atomIndexGlobal]); if (chargeCheck) { @@ -473,8 +473,8 @@ __kernel void CUSTOMIZED_KERNEL_NAME(pme_gather_kernel)(const struct PmeOpenCLKe #pragma unroll for (int i = 0; i < numIter; i++) { - const int outputIndexLocal = i * iterThreads + threadLocalId; - const int outputIndexGlobal = get_group_id(XX) * blockForcesSize + outputIndexLocal; + const int outputIndexLocal = i * iterThreads + threadLocalId; + const int outputIndexGlobal = (int)get_group_id(XX) * blockForcesSize + outputIndexLocal; const float outputForceComponent = sm_forces[outputIndexLocal]; gm_forces[outputIndexGlobal] += outputForceComponent; } diff --git a/src/gromacs/ewald/pme_solve.clh b/src/gromacs/ewald/pme_solve.clh index 0c21512b6f..ae96f584d5 100644 --- a/src/gromacs/ewald/pme_solve.clh +++ b/src/gromacs/ewald/pme_solve.clh @@ -70,9 +70,9 @@ __kernel void CUSTOMIZED_KERNEL_NAME(pme_solve_kernel)(const struct PmeOpenCLKer __global float2* __restrict__ gm_grid) { /* This kernel supports 2 different grid dimension orderings: YZX and XYZ */ - int majorDim; - int middleDim; - int minorDim; + int majorDim = 0; + int middleDim = 0; + int minorDim = 0; if (gridOrdering == YZX) { majorDim = YY; @@ -163,9 +163,9 @@ __kernel void CUSTOMIZED_KERNEL_NAME(pme_solve_kernel)(const struct PmeOpenCLKer /* We should skip the k-space point (0,0,0) */ const bool notZeroPoint = (kMinor > 0) | (kMajor > 0) | (kMiddle > 0); - float mX; - float mY; - float mZ; + float mX = 0.0F; + float mY = 0.0F; + float mZ = 0.0F; if (gridOrdering == YZX) { mX = mMinor; diff --git a/src/gromacs/ewald/pme_spread.clh b/src/gromacs/ewald/pme_spread.clh index 3342120d51..9e887a9e6e 100644 --- a/src/gromacs/ewald/pme_spread.clh +++ b/src/gromacs/ewald/pme_spread.clh @@ -53,8 +53,8 @@ #include "gromacs/gpu_utils/vectype_ops.clh" -#include "pme_gpu_types.h" #include "pme_gpu_calculate_splines.clh" +#include "pme_gpu_types.h" /* * This define affects the spline calculation behaviour in the kernel. @@ -180,11 +180,10 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern /* Indices interpolation */ if (orderIndex == 0) { - int tableIndex; - int tInt; - float n; - float t; - const float3 x = vload3(atomIndexLocal, sm_coordinates); + int tableIndex = 0; + float n = 0.0F; + float t = 0.0F; + const float3 x = vload3(atomIndexLocal, sm_coordinates); /* Accessing fields in fshOffset/nXYZ/recipbox/... with dimIndex offset * puts them into local memory(!) instead of accessing the constant memory directly. @@ -225,7 +224,7 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern /* Fractional coordinates along box vectors, adding a positive shift to ensure t is positive for triclinic boxes */ t = (t + shift) * n; - tInt = (int)t; + const int tInt = (int)t; sm_fractCoords[sharedMemoryIndex] = t - (float)tInt; tableIndex += tInt; assert(tInt >= 0); @@ -241,7 +240,6 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern const int chargeCheck = pme_gpu_check_atom_charge(sm_coefficients[atomIndexLocal]); if (chargeCheck) { - float div; int o = orderIndex; // This is an index that is set once for PME_GPU_PARALLEL_SPLINE == 1 const float dr = sm_fractCoords[sharedMemoryIndex]; @@ -255,7 +253,7 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern # pragma unroll order for (int k = 3; k < order; k++) { - div = 1.0F / ((float)k - 1.0F); + const float div = 1.0F / ((float)k - 1.0F); *SPLINE_DATA_PTR(k - 1) = div * dr * SPLINE_DATA(k - 2); # pragma unroll for (int l = 1; l < (k - 1); l++) @@ -287,7 +285,7 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern gm_dtheta[thetaGlobalIndex] = dtheta; } - div = 1.0F / (order - 1.0F); + const float div = 1.0F / (order - 1.0F); *SPLINE_DATA_PTR(order - 1) = div * dr * SPLINE_DATA(order - 2); # pragma unroll for (int k = 1; k < (order - 1); k++) diff --git a/src/gromacs/nbnxm/opencl/CMakeLists.txt b/src/gromacs/nbnxm/opencl/CMakeLists.txt index 69d86e96ba..dab11de278 100644 --- a/src/gromacs/nbnxm/opencl/CMakeLists.txt +++ b/src/gromacs/nbnxm/opencl/CMakeLists.txt @@ -79,7 +79,7 @@ foreach(ELEC_DEF IN LISTS ELEC_DEFS) -Xclang -finclude-default-header -D_${VENDOR}_SOURCE_ -DGMX_OCL_FASTGEN ${ELEC_DEF} ${VDW_DEF} -Dc_nbnxnGpuClusterSize=${CLUSTER_SIZE} - -Dc_nbnxnMinDistanceSquared=3.82e-07F + -DNBNXM_MIN_DISTANCE_SQUARED_VALUE_FLOAT=3.82e-07 -Dc_nbnxnGpuNumClusterPerSupercluster=8 -Dc_nbnxnGpuJgroupSize=4 -DIATYPE_SHMEM diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp index 40eb905184..18c583937b 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp @@ -187,10 +187,13 @@ void nbnxn_gpu_compile_kernels(NbnxmGpu* nb) /* Here we pass macros and static const/constexpr int variables defined * in include files outside the opencl as macros, to avoid * including those files in the plain-C JIT compilation that happens - * at runtime. */ + * at runtime. + * Note that we need to re-add the the suffix to the floating point literals + * passed the to the kernel to avoid type ambiguity. + */ extraDefines += gmx::formatString( " -Dc_nbnxnGpuClusterSize=%d" - " -Dc_nbnxnMinDistanceSquared=%g" + " -DNBNXM_MIN_DISTANCE_SQUARED_VALUE_FLOAT=%g" " -Dc_nbnxnGpuNumClusterPerSupercluster=%d" " -Dc_nbnxnGpuJgroupSize=%d" "%s", diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel.clh b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel.clh index b238843a91..60242b0ba8 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel.clh +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel.clh @@ -415,19 +415,18 @@ __kernel void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_opencl) const float c12 = ljcp_i.y * ljcp_j.y; # else /* LJ 2^(1/6)*sigma and 12*epsilon */ - const float sigma = ljcp_i.x + ljcp_j.x; - const float epsilon = ljcp_i.y * ljcp_j.y; + const float sigma = ljcp_i.x + ljcp_j.x; + const float epsilon = ljcp_i.y * ljcp_j.y; # if defined CALC_ENERGIES || defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH - float c6, c12; - convert_sigma_epsilon_to_c6_c12(sigma, epsilon, &c6, &c12); + const float2 c6c12 = convert_sigma_epsilon_to_c6_c12(sigma, epsilon); + const float c6 = c6c12.x; + const float c12 = c6c12.y; # endif # endif /* LJ_COMB_GEOM */ #endif /* LJ_COMB */ // Ensure distance do not become so small that r^-12 overflows. - // Cast to float to ensure the correct built-in max() function - // is called. - r2 = max(r2, (float)c_nbnxnMinDistanceSquared); + r2 = max(r2, c_nbnxnMinDistanceSquared); const float inv_r = rsqrt(r2); const float inv_r2 = inv_r * inv_r; diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_pruneonly.clh b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_pruneonly.clh index 16dd0c4962..dfafdcbf28 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_pruneonly.clh +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_pruneonly.clh @@ -166,7 +166,7 @@ nbnxn_kernel_prune_rolling_opencl /* loop over the j clusters = seen by any of the atoms in the current super-cluster */ for (int j4 = cij4_start + tidxz; j4 < cij4_end; j4 += NTHREAD_Z) { - unsigned int imaskFull, imaskCheck, imaskNew; + unsigned int imaskFull = 0, imaskCheck = 0, imaskNew = 0; if (haveFreshList) { diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_utils.clh b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_utils.clh index 7d70a5908a..e8702e7c74 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_utils.clh +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_utils.clh @@ -234,6 +234,9 @@ typedef struct /*! i-cluster interaction mask for a super-cluster with all c_nbnxnGpuNumClusterPerSupercluster bits set */ __constant unsigned supercl_interaction_mask = ((1U << c_nbnxnGpuNumClusterPerSupercluster) - 1U); +/*! Minimum single precision threshold for r^2 to avoid r^-12 overflow. */ +__constant float c_nbnxnMinDistanceSquared = NBNXM_MIN_DISTANCE_SQUARED_VALUE_FLOAT; + gmx_opencl_inline void preloadCj4Generic(__local int* sm_cjPreload, const __global int* gm_cj, int tidxi, @@ -327,14 +330,14 @@ loadCj(CjType cjs, const __global int gmx_unused* gm_cj, int jm, int gmx_unused } /*! Convert LJ sigma,epsilon parameters to C6,C12. */ -gmx_opencl_inline void convert_sigma_epsilon_to_c6_c12(const float sigma, const float epsilon, float* c6, float* c12) -{ - float sigma2, sigma6; - - sigma2 = sigma * sigma; - sigma6 = sigma2 * sigma2 * sigma2; - *c6 = epsilon * sigma6; - *c12 = *c6 * sigma6; +gmx_opencl_inline float2 convert_sigma_epsilon_to_c6_c12(const float sigma, const float epsilon) +{ + const float sigma2 = sigma * sigma; + const float sigma6 = sigma2 * sigma2 * sigma2; + const float c6 = epsilon * sigma6; + const float2 c6c12 = (float2)(c6, /* c6 */ + c6 * sigma6); /* c12 */ + return c6c12; } @@ -346,17 +349,15 @@ gmx_opencl_inline void calculate_force_switch_F(const cl_nbparam_params_t* nbpar float r2, float* F_invr) { - float r, r_switch; - /* force switch constants */ - float disp_shift_V2 = nbparam->dispersion_shift.c2; - float disp_shift_V3 = nbparam->dispersion_shift.c3; - float repu_shift_V2 = nbparam->repulsion_shift.c2; - float repu_shift_V3 = nbparam->repulsion_shift.c3; + const float disp_shift_V2 = nbparam->dispersion_shift.c2; + const float disp_shift_V3 = nbparam->dispersion_shift.c3; + const float repu_shift_V2 = nbparam->repulsion_shift.c2; + const float repu_shift_V3 = nbparam->repulsion_shift.c3; - r = r2 * inv_r; - r_switch = r - nbparam->rvdw_switch; - r_switch = r_switch >= 0.0F ? r_switch : 0.0F; + const float r = r2 * inv_r; + float r_switch = r - nbparam->rvdw_switch; + r_switch = r_switch >= 0.0F ? r_switch : 0.0F; *F_invr += -c6 * (disp_shift_V2 + disp_shift_V3 * r_switch) * r_switch * r_switch * inv_r + c12 * (repu_shift_V2 + repu_shift_V3 * r_switch) * r_switch * r_switch * inv_r; @@ -371,22 +372,20 @@ gmx_opencl_inline void calculate_force_switch_F_E(const cl_nbparam_params_t* nbp float* F_invr, float* E_lj) { - float r, r_switch; - /* force switch constants */ - float disp_shift_V2 = nbparam->dispersion_shift.c2; - float disp_shift_V3 = nbparam->dispersion_shift.c3; - float repu_shift_V2 = nbparam->repulsion_shift.c2; - float repu_shift_V3 = nbparam->repulsion_shift.c3; + const float disp_shift_V2 = nbparam->dispersion_shift.c2; + const float disp_shift_V3 = nbparam->dispersion_shift.c3; + const float repu_shift_V2 = nbparam->repulsion_shift.c2; + const float repu_shift_V3 = nbparam->repulsion_shift.c3; - float disp_shift_F2 = nbparam->dispersion_shift.c2 / 3; - float disp_shift_F3 = nbparam->dispersion_shift.c3 / 4; - float repu_shift_F2 = nbparam->repulsion_shift.c2 / 3; - float repu_shift_F3 = nbparam->repulsion_shift.c3 / 4; + const float disp_shift_F2 = nbparam->dispersion_shift.c2 / 3; + const float disp_shift_F3 = nbparam->dispersion_shift.c3 / 4; + const float repu_shift_F2 = nbparam->repulsion_shift.c2 / 3; + const float repu_shift_F3 = nbparam->repulsion_shift.c3 / 4; - r = r2 * inv_r; - r_switch = r - nbparam->rvdw_switch; - r_switch = r_switch >= 0.0F ? r_switch : 0.0F; + const float r = r2 * inv_r; + float r_switch = r - nbparam->rvdw_switch; + r_switch = r_switch >= 0.0F ? r_switch : 0.0F; *F_invr += -c6 * (disp_shift_V2 + disp_shift_V3 * r_switch) * r_switch * r_switch * inv_r + c12 * (repu_shift_V2 + repu_shift_V3 * r_switch) * r_switch * r_switch * inv_r; @@ -401,25 +400,24 @@ gmx_opencl_inline void calculate_potential_switch_F(const cl_nbparam_params_t* n float* F_invr, const float* E_lj) { - float r, r_switch; - float sw, dsw; - /* potential switch constants */ - float switch_V3 = nbparam->vdw_switch.c3; - float switch_V4 = nbparam->vdw_switch.c4; - float switch_V5 = nbparam->vdw_switch.c5; - float switch_F2 = nbparam->vdw_switch.c3; - float switch_F3 = nbparam->vdw_switch.c4; - float switch_F4 = nbparam->vdw_switch.c5; + const float switch_V3 = nbparam->vdw_switch.c3; + const float switch_V4 = nbparam->vdw_switch.c4; + const float switch_V5 = nbparam->vdw_switch.c5; + const float switch_F2 = nbparam->vdw_switch.c3; + const float switch_F3 = nbparam->vdw_switch.c4; + const float switch_F4 = nbparam->vdw_switch.c5; - r = r2 * inv_r; - r_switch = r - nbparam->rvdw_switch; + const float r = r2 * inv_r; + const float r_switch = r - nbparam->rvdw_switch; /* Unlike in the F+E kernel, conditional is faster here */ if (r_switch > 0.0F) { - sw = 1.0F + (switch_V3 + (switch_V4 + switch_V5 * r_switch) * r_switch) * r_switch * r_switch * r_switch; - dsw = (switch_F2 + (switch_F3 + switch_F4 * r_switch) * r_switch) * r_switch * r_switch; + const float sw = 1.0F + + (switch_V3 + (switch_V4 + switch_V5 * r_switch) * r_switch) * r_switch + * r_switch * r_switch; + const float dsw = (switch_F2 + (switch_F3 + switch_F4 * r_switch) * r_switch) * r_switch * r_switch; *F_invr = (*F_invr) * sw - inv_r * (*E_lj) * dsw; } @@ -432,24 +430,22 @@ gmx_opencl_inline void calculate_potential_switch_F_E(const cl_nbparam_params_t* float* F_invr, float* E_lj) { - float r, r_switch; - float sw, dsw; - /* potential switch constants */ - float switch_V3 = nbparam->vdw_switch.c3; - float switch_V4 = nbparam->vdw_switch.c4; - float switch_V5 = nbparam->vdw_switch.c5; - float switch_F2 = nbparam->vdw_switch.c3; - float switch_F3 = nbparam->vdw_switch.c4; - float switch_F4 = nbparam->vdw_switch.c5; + const float switch_V3 = nbparam->vdw_switch.c3; + const float switch_V4 = nbparam->vdw_switch.c4; + const float switch_V5 = nbparam->vdw_switch.c5; + const float switch_F2 = nbparam->vdw_switch.c3; + const float switch_F3 = nbparam->vdw_switch.c4; + const float switch_F4 = nbparam->vdw_switch.c5; - r = r2 * inv_r; - r_switch = r - nbparam->rvdw_switch; - r_switch = r_switch >= 0.0F ? r_switch : 0.0F; + const float r = r2 * inv_r; + float r_switch = r - nbparam->rvdw_switch; + r_switch = r_switch >= 0.0F ? r_switch : 0.0F; /* Unlike in the F-only kernel, masking is faster here */ - sw = 1.0F + (switch_V3 + (switch_V4 + switch_V5 * r_switch) * r_switch) * r_switch * r_switch * r_switch; - dsw = (switch_F2 + (switch_F3 + switch_F4 * r_switch) * r_switch) * r_switch * r_switch; + const float sw = + 1.0F + (switch_V3 + (switch_V4 + switch_V5 * r_switch) * r_switch) * r_switch * r_switch * r_switch; + const float dsw = (switch_F2 + (switch_F3 + switch_F4 * r_switch) * r_switch) * r_switch * r_switch; *F_invr = (*F_invr) * sw - inv_r * (*E_lj) * dsw; *E_lj *= sw; @@ -467,15 +463,13 @@ gmx_opencl_inline void calculate_lj_ewald_comb_geom_F(__constant const float* nb float lje_coeff6_6, float* F_invr) { - float c6grid, inv_r6_nm, cr2, expmcr2, poly; - - c6grid = nbfp_comb_climg2d[2 * typei] * nbfp_comb_climg2d[2 * typej]; + const float c6grid = nbfp_comb_climg2d[2 * typei] * nbfp_comb_climg2d[2 * typej]; /* Recalculate inv_r6 without exclusion mask */ - inv_r6_nm = inv_r2 * inv_r2 * inv_r2; - cr2 = lje_coeff2 * r2; - expmcr2 = exp(-cr2); - poly = 1.0F + cr2 + HALF_F * cr2 * cr2; + const float inv_r6_nm = inv_r2 * inv_r2 * inv_r2; + const float cr2 = lje_coeff2 * r2; + const float expmcr2 = exp(-cr2); + const float poly = 1.0F + cr2 + HALF_F * cr2 * cr2; /* Subtract the grid force from the total LJ force */ *F_invr += c6grid * (inv_r6_nm - expmcr2 * (inv_r6_nm * poly + lje_coeff6_6)) * inv_r2; @@ -496,21 +490,19 @@ gmx_opencl_inline void calculate_lj_ewald_comb_geom_F_E(__constant const float* float* F_invr, float* E_lj) { - float c6grid, inv_r6_nm, cr2, expmcr2, poly, sh_mask; - - c6grid = nbfp_comb_climg2d[2 * typei] * nbfp_comb_climg2d[2 * typej]; + const float c6grid = nbfp_comb_climg2d[2 * typei] * nbfp_comb_climg2d[2 * typej]; /* Recalculate inv_r6 without exclusion mask */ - inv_r6_nm = inv_r2 * inv_r2 * inv_r2; - cr2 = lje_coeff2 * r2; - expmcr2 = exp(-cr2); - poly = 1.0F + cr2 + HALF_F * cr2 * cr2; + const float inv_r6_nm = inv_r2 * inv_r2 * inv_r2; + const float cr2 = lje_coeff2 * r2; + const float expmcr2 = exp(-cr2); + const float poly = 1.0F + cr2 + HALF_F * cr2 * cr2; /* Subtract the grid force from the total LJ force */ *F_invr += c6grid * (inv_r6_nm - expmcr2 * (inv_r6_nm * poly + lje_coeff6_6)) * inv_r2; /* Shift should be applied only to real LJ pairs */ - sh_mask = nbparam->sh_lj_ewald * int_bit; + const float sh_mask = nbparam->sh_lj_ewald * int_bit; *E_lj += ONE_SIXTH_F * c6grid * (inv_r6_nm * (1.0F - expmcr2 * poly) + sh_mask); } @@ -532,32 +524,28 @@ gmx_opencl_inline void calculate_lj_ewald_comb_LB_F_E(__constant const float* float* F_invr, float* E_lj) { - float c6grid, inv_r6_nm, cr2, expmcr2, poly; - float sigma, sigma2, epsilon; - /* sigma and epsilon are scaled to give 6*C6 */ - sigma = nbfp_comb_climg2d[2 * typei] + nbfp_comb_climg2d[2 * typej]; + const float sigma = nbfp_comb_climg2d[2 * typei] + nbfp_comb_climg2d[2 * typej]; - epsilon = nbfp_comb_climg2d[2 * typei + 1] * nbfp_comb_climg2d[2 * typej + 1]; + const float epsilon = nbfp_comb_climg2d[2 * typei + 1] * nbfp_comb_climg2d[2 * typej + 1]; - sigma2 = sigma * sigma; - c6grid = epsilon * sigma2 * sigma2 * sigma2; + const float sigma2 = sigma * sigma; + const float c6grid = epsilon * sigma2 * sigma2 * sigma2; /* Recalculate inv_r6 without exclusion mask */ - inv_r6_nm = inv_r2 * inv_r2 * inv_r2; - cr2 = lje_coeff2 * r2; - expmcr2 = exp(-cr2); - poly = 1.0F + cr2 + HALF_F * cr2 * cr2; + const float inv_r6_nm = inv_r2 * inv_r2 * inv_r2; + const float cr2 = lje_coeff2 * r2; + const float expmcr2 = exp(-cr2); + const float poly = 1.0F + cr2 + HALF_F * cr2 * cr2; /* Subtract the grid force from the total LJ force */ *F_invr += c6grid * (inv_r6_nm - expmcr2 * (inv_r6_nm * poly + lje_coeff6_6)) * inv_r2; if (with_E_lj) { - float sh_mask; /* Shift should be applied only to real LJ pairs */ - sh_mask = nbparam->sh_lj_ewald * int_bit; + const float sh_mask = nbparam->sh_lj_ewald * int_bit; *E_lj += ONE_SIXTH_F * c6grid * (inv_r6_nm * (1.0F - expmcr2 * poly) + sh_mask); } } @@ -594,24 +582,21 @@ gmx_opencl_inline float pmecorrF(float z2) const float FD1 = 0.50736591960530292870F; const float FD0 = 1.0F; - float z4; - float polyFN0, polyFN1, polyFD0, polyFD1; - - z4 = z2 * z2; + const float z4 = z2 * z2; - polyFD0 = FD4 * z4 + FD2; - polyFD1 = FD3 * z4 + FD1; - polyFD0 = polyFD0 * z4 + FD0; - polyFD0 = polyFD1 * z2 + polyFD0; + float polyFD0 = FD4 * z4 + FD2; + float polyFD1 = FD3 * z4 + FD1; + polyFD0 = polyFD0 * z4 + FD0; + polyFD0 = polyFD1 * z2 + polyFD0; polyFD0 = 1.0F / polyFD0; - polyFN0 = FN6 * z4 + FN4; - polyFN1 = FN5 * z4 + FN3; - polyFN0 = polyFN0 * z4 + FN2; - polyFN1 = polyFN1 * z4 + FN1; - polyFN0 = polyFN0 * z4 + FN0; - polyFN0 = polyFN1 * z2 + polyFN0; + float polyFN0 = FN6 * z4 + FN4; + float polyFN1 = FN5 * z4 + FN3; + polyFN0 = polyFN0 * z4 + FN2; + polyFN1 = polyFN1 * z4 + FN1; + polyFN0 = polyFN0 * z4 + FN0; + polyFN0 = polyFN1 * z2 + polyFN0; return polyFN0 * polyFD0; } @@ -868,12 +853,10 @@ gmx_opencl_inline void reduce_energy_pow2(volatile __local float* buf, volatile __global float* e_el, int tidx) { - int j; - int i = WARP_SIZE / 2; /* Can't just use i as loop variable because than nvcc refuses to unroll. */ - for (j = WARP_SIZE_LOG2 - 1; j > 0; j--) + for (int j = WARP_SIZE_LOG2 - 1; j > 0; j--) { if (tidx < i) {