From: Szilárd Páll <pall.szilard@gmail.com>
Date: Tue, 27 Oct 2020 14:13:45 +0000 (+0000)
Subject: Fix clang-tidy v10 warnigns in OpenCL kernels
X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=f73007c915985d63a9778b533edee4bc3ddab3bd;p=alexxy%2Fgromacs.git

Fix clang-tidy v10 warnigns in OpenCL kernels

Also made convert_sigma_epsilon_to_c6_c12() return a float2 with c6 and
c12 instead of returning results in pointers.
---

diff --git a/src/gromacs/ewald/pme_gather.clh b/src/gromacs/ewald/pme_gather.clh
index 8c7106acf9..98cac0f282 100644
--- a/src/gromacs/ewald/pme_gather.clh
+++ b/src/gromacs/ewald/pme_gather.clh
@@ -49,8 +49,8 @@
  *  \author Aleksei Iupinov <a.yupinov@gmail.com>
  */
 
-#include "pme_gpu_types.h"
 #include "pme_gpu_calculate_splines.clh"
+#include "pme_gpu_types.h"
 
 #ifndef COMPILE_GATHER_HELPERS_ONCE
 #    define COMPILE_GATHER_HELPERS_ONCE
@@ -442,9 +442,9 @@ __kernel void CUSTOMIZED_KERNEL_NAME(pme_gather_kernel)(const struct PmeOpenCLKe
     if (numGrids == 2)
     {
         barrier(CLK_LOCAL_MEM_FENCE);
-        fx          = 0.0f;
-        fy          = 0.0f;
-        fz          = 0.0f;
+        fx          = 0.0F;
+        fy          = 0.0F;
+        fz          = 0.0F;
         chargeCheck = pme_gpu_check_atom_charge(gm_coefficientsB[atomIndexGlobal]);
         if (chargeCheck)
         {
@@ -473,8 +473,8 @@ __kernel void CUSTOMIZED_KERNEL_NAME(pme_gather_kernel)(const struct PmeOpenCLKe
 #pragma unroll
             for (int i = 0; i < numIter; i++)
             {
-                const int outputIndexLocal  = i * iterThreads + threadLocalId;
-                const int outputIndexGlobal = get_group_id(XX) * blockForcesSize + outputIndexLocal;
+                const int outputIndexLocal = i * iterThreads + threadLocalId;
+                const int outputIndexGlobal = (int)get_group_id(XX) * blockForcesSize + outputIndexLocal;
                 const float outputForceComponent = sm_forces[outputIndexLocal];
                 gm_forces[outputIndexGlobal] += outputForceComponent;
             }
diff --git a/src/gromacs/ewald/pme_solve.clh b/src/gromacs/ewald/pme_solve.clh
index 0c21512b6f..ae96f584d5 100644
--- a/src/gromacs/ewald/pme_solve.clh
+++ b/src/gromacs/ewald/pme_solve.clh
@@ -70,9 +70,9 @@ __kernel void CUSTOMIZED_KERNEL_NAME(pme_solve_kernel)(const struct PmeOpenCLKer
                                                        __global float2* __restrict__ gm_grid)
 {
     /* This kernel supports 2 different grid dimension orderings: YZX and XYZ */
-    int majorDim;
-    int middleDim;
-    int minorDim;
+    int majorDim  = 0;
+    int middleDim = 0;
+    int minorDim  = 0;
     if (gridOrdering == YZX)
     {
         majorDim  = YY;
@@ -163,9 +163,9 @@ __kernel void CUSTOMIZED_KERNEL_NAME(pme_solve_kernel)(const struct PmeOpenCLKer
         /* We should skip the k-space point (0,0,0) */
         const bool notZeroPoint = (kMinor > 0) | (kMajor > 0) | (kMiddle > 0);
 
-        float mX;
-        float mY;
-        float mZ;
+        float mX = 0.0F;
+        float mY = 0.0F;
+        float mZ = 0.0F;
         if (gridOrdering == YZX)
         {
             mX = mMinor;
diff --git a/src/gromacs/ewald/pme_spread.clh b/src/gromacs/ewald/pme_spread.clh
index 3342120d51..9e887a9e6e 100644
--- a/src/gromacs/ewald/pme_spread.clh
+++ b/src/gromacs/ewald/pme_spread.clh
@@ -53,8 +53,8 @@
 
 #include "gromacs/gpu_utils/vectype_ops.clh"
 
-#include "pme_gpu_types.h"
 #include "pme_gpu_calculate_splines.clh"
+#include "pme_gpu_types.h"
 
 /*
  * This define affects the spline calculation behaviour in the kernel.
@@ -180,11 +180,10 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern
         /* Indices interpolation */
         if (orderIndex == 0)
         {
-            int          tableIndex;
-            int          tInt;
-            float        n;
-            float        t;
-            const float3 x = vload3(atomIndexLocal, sm_coordinates);
+            int          tableIndex = 0;
+            float        n          = 0.0F;
+            float        t          = 0.0F;
+            const float3 x          = vload3(atomIndexLocal, sm_coordinates);
 
             /* Accessing fields in fshOffset/nXYZ/recipbox/... with dimIndex offset
              * puts them into local memory(!) instead of accessing the constant memory directly.
@@ -225,7 +224,7 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern
 
             /* Fractional coordinates along box vectors, adding a positive shift to ensure t is positive for triclinic boxes */
             t                                 = (t + shift) * n;
-            tInt                              = (int)t;
+            const int tInt                    = (int)t;
             sm_fractCoords[sharedMemoryIndex] = t - (float)tInt;
             tableIndex += tInt;
             assert(tInt >= 0);
@@ -241,7 +240,6 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern
         const int chargeCheck = pme_gpu_check_atom_charge(sm_coefficients[atomIndexLocal]);
         if (chargeCheck)
         {
-            float div;
             int o = orderIndex; // This is an index that is set once for PME_GPU_PARALLEL_SPLINE == 1
 
             const float dr = sm_fractCoords[sharedMemoryIndex];
@@ -255,7 +253,7 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern
 #    pragma unroll order
             for (int k = 3; k < order; k++)
             {
-                div                     = 1.0F / ((float)k - 1.0F);
+                const float div         = 1.0F / ((float)k - 1.0F);
                 *SPLINE_DATA_PTR(k - 1) = div * dr * SPLINE_DATA(k - 2);
 #    pragma unroll
                 for (int l = 1; l < (k - 1); l++)
@@ -287,7 +285,7 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern
                 gm_dtheta[thetaGlobalIndex] = dtheta;
             }
 
-            div                         = 1.0F / (order - 1.0F);
+            const float div             = 1.0F / (order - 1.0F);
             *SPLINE_DATA_PTR(order - 1) = div * dr * SPLINE_DATA(order - 2);
 #    pragma unroll
             for (int k = 1; k < (order - 1); k++)
diff --git a/src/gromacs/nbnxm/opencl/CMakeLists.txt b/src/gromacs/nbnxm/opencl/CMakeLists.txt
index 69d86e96ba..dab11de278 100644
--- a/src/gromacs/nbnxm/opencl/CMakeLists.txt
+++ b/src/gromacs/nbnxm/opencl/CMakeLists.txt
@@ -79,7 +79,7 @@ foreach(ELEC_DEF IN LISTS ELEC_DEFS)
                 -Xclang -finclude-default-header  -D_${VENDOR}_SOURCE_
                 -DGMX_OCL_FASTGEN ${ELEC_DEF} ${VDW_DEF}
                 -Dc_nbnxnGpuClusterSize=${CLUSTER_SIZE}
-                -Dc_nbnxnMinDistanceSquared=3.82e-07F
+                -DNBNXM_MIN_DISTANCE_SQUARED_VALUE_FLOAT=3.82e-07
                 -Dc_nbnxnGpuNumClusterPerSupercluster=8
                 -Dc_nbnxnGpuJgroupSize=4
                 -DIATYPE_SHMEM
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp
index 40eb905184..18c583937b 100644
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp
@@ -187,10 +187,13 @@ void nbnxn_gpu_compile_kernels(NbnxmGpu* nb)
         /* Here we pass macros and static const/constexpr int variables defined
          * in include files outside the opencl as macros, to avoid
          * including those files in the plain-C JIT compilation that happens
-         * at runtime. */
+         * at runtime.
+         * Note that we need to re-add the the suffix to the floating point literals
+         * passed the to the kernel to avoid type ambiguity.
+         */
         extraDefines += gmx::formatString(
                 " -Dc_nbnxnGpuClusterSize=%d"
-                " -Dc_nbnxnMinDistanceSquared=%g"
+                " -DNBNXM_MIN_DISTANCE_SQUARED_VALUE_FLOAT=%g"
                 " -Dc_nbnxnGpuNumClusterPerSupercluster=%d"
                 " -Dc_nbnxnGpuJgroupSize=%d"
                 "%s",
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel.clh b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel.clh
index b238843a91..60242b0ba8 100644
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel.clh
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel.clh
@@ -415,19 +415,18 @@ __kernel void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_opencl)
                                 const float c12    = ljcp_i.y * ljcp_j.y;
 #    else
                                 /* LJ 2^(1/6)*sigma and 12*epsilon */
-                                const float sigma   = ljcp_i.x + ljcp_j.x;
-                                const float epsilon = ljcp_i.y * ljcp_j.y;
+                                const float  sigma   = ljcp_i.x + ljcp_j.x;
+                                const float  epsilon = ljcp_i.y * ljcp_j.y;
 #        if defined CALC_ENERGIES || defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH
-                                float       c6, c12;
-                                convert_sigma_epsilon_to_c6_c12(sigma, epsilon, &c6, &c12);
+                                const float2 c6c12 = convert_sigma_epsilon_to_c6_c12(sigma, epsilon);
+                                const float  c6    = c6c12.x;
+                                const float  c12   = c6c12.y;
 #        endif
 #    endif /* LJ_COMB_GEOM */
 #endif     /* LJ_COMB */
 
                                 // Ensure distance do not become so small that r^-12 overflows.
-                                // Cast to float to ensure the correct built-in max() function
-                                // is called.
-                                r2 = max(r2, (float)c_nbnxnMinDistanceSquared);
+                                r2 = max(r2, c_nbnxnMinDistanceSquared);
 
                                 const float inv_r  = rsqrt(r2);
                                 const float inv_r2 = inv_r * inv_r;
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_pruneonly.clh b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_pruneonly.clh
index 16dd0c4962..dfafdcbf28 100644
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_pruneonly.clh
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_pruneonly.clh
@@ -166,7 +166,7 @@ nbnxn_kernel_prune_rolling_opencl
     /* loop over the j clusters = seen by any of the atoms in the current super-cluster */
     for (int j4 = cij4_start + tidxz; j4 < cij4_end; j4 += NTHREAD_Z)
     {
-        unsigned int imaskFull, imaskCheck, imaskNew;
+        unsigned int imaskFull = 0, imaskCheck = 0, imaskNew = 0;
 
         if (haveFreshList)
         {
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_utils.clh b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_utils.clh
index 7d70a5908a..e8702e7c74 100644
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_utils.clh
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_utils.clh
@@ -234,6 +234,9 @@ typedef struct
 /*! i-cluster interaction mask for a super-cluster with all c_nbnxnGpuNumClusterPerSupercluster bits set */
 __constant unsigned supercl_interaction_mask = ((1U << c_nbnxnGpuNumClusterPerSupercluster) - 1U);
 
+/*! Minimum single precision threshold for r^2 to avoid r^-12 overflow. */
+__constant float c_nbnxnMinDistanceSquared = NBNXM_MIN_DISTANCE_SQUARED_VALUE_FLOAT;
+
 gmx_opencl_inline void preloadCj4Generic(__local int*        sm_cjPreload,
                                          const __global int* gm_cj,
                                          int                 tidxi,
@@ -327,14 +330,14 @@ loadCj(CjType cjs, const __global int gmx_unused* gm_cj, int jm, int gmx_unused
 }
 
 /*! Convert LJ sigma,epsilon parameters to C6,C12. */
-gmx_opencl_inline void convert_sigma_epsilon_to_c6_c12(const float sigma, const float epsilon, float* c6, float* c12)
-{
-    float sigma2, sigma6;
-
-    sigma2 = sigma * sigma;
-    sigma6 = sigma2 * sigma2 * sigma2;
-    *c6    = epsilon * sigma6;
-    *c12   = *c6 * sigma6;
+gmx_opencl_inline float2 convert_sigma_epsilon_to_c6_c12(const float sigma, const float epsilon)
+{
+    const float  sigma2 = sigma * sigma;
+    const float  sigma6 = sigma2 * sigma2 * sigma2;
+    const float  c6     = epsilon * sigma6;
+    const float2 c6c12  = (float2)(c6,           /* c6 */
+                                  c6 * sigma6); /* c12 */
+    return c6c12;
 }
 
 
@@ -346,17 +349,15 @@ gmx_opencl_inline void calculate_force_switch_F(const cl_nbparam_params_t* nbpar
                                                 float                      r2,
                                                 float*                     F_invr)
 {
-    float r, r_switch;
-
     /* force switch constants */
-    float disp_shift_V2 = nbparam->dispersion_shift.c2;
-    float disp_shift_V3 = nbparam->dispersion_shift.c3;
-    float repu_shift_V2 = nbparam->repulsion_shift.c2;
-    float repu_shift_V3 = nbparam->repulsion_shift.c3;
+    const float disp_shift_V2 = nbparam->dispersion_shift.c2;
+    const float disp_shift_V3 = nbparam->dispersion_shift.c3;
+    const float repu_shift_V2 = nbparam->repulsion_shift.c2;
+    const float repu_shift_V3 = nbparam->repulsion_shift.c3;
 
-    r        = r2 * inv_r;
-    r_switch = r - nbparam->rvdw_switch;
-    r_switch = r_switch >= 0.0F ? r_switch : 0.0F;
+    const float r        = r2 * inv_r;
+    float       r_switch = r - nbparam->rvdw_switch;
+    r_switch             = r_switch >= 0.0F ? r_switch : 0.0F;
 
     *F_invr += -c6 * (disp_shift_V2 + disp_shift_V3 * r_switch) * r_switch * r_switch * inv_r
                + c12 * (repu_shift_V2 + repu_shift_V3 * r_switch) * r_switch * r_switch * inv_r;
@@ -371,22 +372,20 @@ gmx_opencl_inline void calculate_force_switch_F_E(const cl_nbparam_params_t* nbp
                                                   float*                     F_invr,
                                                   float*                     E_lj)
 {
-    float r, r_switch;
-
     /* force switch constants */
-    float disp_shift_V2 = nbparam->dispersion_shift.c2;
-    float disp_shift_V3 = nbparam->dispersion_shift.c3;
-    float repu_shift_V2 = nbparam->repulsion_shift.c2;
-    float repu_shift_V3 = nbparam->repulsion_shift.c3;
+    const float disp_shift_V2 = nbparam->dispersion_shift.c2;
+    const float disp_shift_V3 = nbparam->dispersion_shift.c3;
+    const float repu_shift_V2 = nbparam->repulsion_shift.c2;
+    const float repu_shift_V3 = nbparam->repulsion_shift.c3;
 
-    float disp_shift_F2 = nbparam->dispersion_shift.c2 / 3;
-    float disp_shift_F3 = nbparam->dispersion_shift.c3 / 4;
-    float repu_shift_F2 = nbparam->repulsion_shift.c2 / 3;
-    float repu_shift_F3 = nbparam->repulsion_shift.c3 / 4;
+    const float disp_shift_F2 = nbparam->dispersion_shift.c2 / 3;
+    const float disp_shift_F3 = nbparam->dispersion_shift.c3 / 4;
+    const float repu_shift_F2 = nbparam->repulsion_shift.c2 / 3;
+    const float repu_shift_F3 = nbparam->repulsion_shift.c3 / 4;
 
-    r        = r2 * inv_r;
-    r_switch = r - nbparam->rvdw_switch;
-    r_switch = r_switch >= 0.0F ? r_switch : 0.0F;
+    const float r        = r2 * inv_r;
+    float       r_switch = r - nbparam->rvdw_switch;
+    r_switch             = r_switch >= 0.0F ? r_switch : 0.0F;
 
     *F_invr += -c6 * (disp_shift_V2 + disp_shift_V3 * r_switch) * r_switch * r_switch * inv_r
                + c12 * (repu_shift_V2 + repu_shift_V3 * r_switch) * r_switch * r_switch * inv_r;
@@ -401,25 +400,24 @@ gmx_opencl_inline void calculate_potential_switch_F(const cl_nbparam_params_t* n
                                                     float*                     F_invr,
                                                     const float*               E_lj)
 {
-    float r, r_switch;
-    float sw, dsw;
-
     /* potential switch constants */
-    float switch_V3 = nbparam->vdw_switch.c3;
-    float switch_V4 = nbparam->vdw_switch.c4;
-    float switch_V5 = nbparam->vdw_switch.c5;
-    float switch_F2 = nbparam->vdw_switch.c3;
-    float switch_F3 = nbparam->vdw_switch.c4;
-    float switch_F4 = nbparam->vdw_switch.c5;
+    const float switch_V3 = nbparam->vdw_switch.c3;
+    const float switch_V4 = nbparam->vdw_switch.c4;
+    const float switch_V5 = nbparam->vdw_switch.c5;
+    const float switch_F2 = nbparam->vdw_switch.c3;
+    const float switch_F3 = nbparam->vdw_switch.c4;
+    const float switch_F4 = nbparam->vdw_switch.c5;
 
-    r        = r2 * inv_r;
-    r_switch = r - nbparam->rvdw_switch;
+    const float r        = r2 * inv_r;
+    const float r_switch = r - nbparam->rvdw_switch;
 
     /* Unlike in the F+E kernel, conditional is faster here */
     if (r_switch > 0.0F)
     {
-        sw  = 1.0F + (switch_V3 + (switch_V4 + switch_V5 * r_switch) * r_switch) * r_switch * r_switch * r_switch;
-        dsw = (switch_F2 + (switch_F3 + switch_F4 * r_switch) * r_switch) * r_switch * r_switch;
+        const float sw = 1.0F
+                         + (switch_V3 + (switch_V4 + switch_V5 * r_switch) * r_switch) * r_switch
+                                   * r_switch * r_switch;
+        const float dsw = (switch_F2 + (switch_F3 + switch_F4 * r_switch) * r_switch) * r_switch * r_switch;
 
         *F_invr = (*F_invr) * sw - inv_r * (*E_lj) * dsw;
     }
@@ -432,24 +430,22 @@ gmx_opencl_inline void calculate_potential_switch_F_E(const cl_nbparam_params_t*
                                                       float*                     F_invr,
                                                       float*                     E_lj)
 {
-    float r, r_switch;
-    float sw, dsw;
-
     /* potential switch constants */
-    float switch_V3 = nbparam->vdw_switch.c3;
-    float switch_V4 = nbparam->vdw_switch.c4;
-    float switch_V5 = nbparam->vdw_switch.c5;
-    float switch_F2 = nbparam->vdw_switch.c3;
-    float switch_F3 = nbparam->vdw_switch.c4;
-    float switch_F4 = nbparam->vdw_switch.c5;
+    const float switch_V3 = nbparam->vdw_switch.c3;
+    const float switch_V4 = nbparam->vdw_switch.c4;
+    const float switch_V5 = nbparam->vdw_switch.c5;
+    const float switch_F2 = nbparam->vdw_switch.c3;
+    const float switch_F3 = nbparam->vdw_switch.c4;
+    const float switch_F4 = nbparam->vdw_switch.c5;
 
-    r        = r2 * inv_r;
-    r_switch = r - nbparam->rvdw_switch;
-    r_switch = r_switch >= 0.0F ? r_switch : 0.0F;
+    const float r        = r2 * inv_r;
+    float       r_switch = r - nbparam->rvdw_switch;
+    r_switch             = r_switch >= 0.0F ? r_switch : 0.0F;
 
     /* Unlike in the F-only kernel, masking is faster here */
-    sw  = 1.0F + (switch_V3 + (switch_V4 + switch_V5 * r_switch) * r_switch) * r_switch * r_switch * r_switch;
-    dsw = (switch_F2 + (switch_F3 + switch_F4 * r_switch) * r_switch) * r_switch * r_switch;
+    const float sw =
+            1.0F + (switch_V3 + (switch_V4 + switch_V5 * r_switch) * r_switch) * r_switch * r_switch * r_switch;
+    const float dsw = (switch_F2 + (switch_F3 + switch_F4 * r_switch) * r_switch) * r_switch * r_switch;
 
     *F_invr = (*F_invr) * sw - inv_r * (*E_lj) * dsw;
     *E_lj *= sw;
@@ -467,15 +463,13 @@ gmx_opencl_inline void calculate_lj_ewald_comb_geom_F(__constant const float* nb
                                                       float                   lje_coeff6_6,
                                                       float*                  F_invr)
 {
-    float c6grid, inv_r6_nm, cr2, expmcr2, poly;
-
-    c6grid = nbfp_comb_climg2d[2 * typei] * nbfp_comb_climg2d[2 * typej];
+    const float c6grid = nbfp_comb_climg2d[2 * typei] * nbfp_comb_climg2d[2 * typej];
 
     /* Recalculate inv_r6 without exclusion mask */
-    inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
-    cr2       = lje_coeff2 * r2;
-    expmcr2   = exp(-cr2);
-    poly      = 1.0F + cr2 + HALF_F * cr2 * cr2;
+    const float inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
+    const float cr2       = lje_coeff2 * r2;
+    const float expmcr2   = exp(-cr2);
+    const float poly      = 1.0F + cr2 + HALF_F * cr2 * cr2;
 
     /* Subtract the grid force from the total LJ force */
     *F_invr += c6grid * (inv_r6_nm - expmcr2 * (inv_r6_nm * poly + lje_coeff6_6)) * inv_r2;
@@ -496,21 +490,19 @@ gmx_opencl_inline void calculate_lj_ewald_comb_geom_F_E(__constant const float*
                                                         float*                     F_invr,
                                                         float*                     E_lj)
 {
-    float c6grid, inv_r6_nm, cr2, expmcr2, poly, sh_mask;
-
-    c6grid = nbfp_comb_climg2d[2 * typei] * nbfp_comb_climg2d[2 * typej];
+    const float c6grid = nbfp_comb_climg2d[2 * typei] * nbfp_comb_climg2d[2 * typej];
 
     /* Recalculate inv_r6 without exclusion mask */
-    inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
-    cr2       = lje_coeff2 * r2;
-    expmcr2   = exp(-cr2);
-    poly      = 1.0F + cr2 + HALF_F * cr2 * cr2;
+    const float inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
+    const float cr2       = lje_coeff2 * r2;
+    const float expmcr2   = exp(-cr2);
+    const float poly      = 1.0F + cr2 + HALF_F * cr2 * cr2;
 
     /* Subtract the grid force from the total LJ force */
     *F_invr += c6grid * (inv_r6_nm - expmcr2 * (inv_r6_nm * poly + lje_coeff6_6)) * inv_r2;
 
     /* Shift should be applied only to real LJ pairs */
-    sh_mask = nbparam->sh_lj_ewald * int_bit;
+    const float sh_mask = nbparam->sh_lj_ewald * int_bit;
     *E_lj += ONE_SIXTH_F * c6grid * (inv_r6_nm * (1.0F - expmcr2 * poly) + sh_mask);
 }
 
@@ -532,32 +524,28 @@ gmx_opencl_inline void calculate_lj_ewald_comb_LB_F_E(__constant const float*
                                                       float*                     F_invr,
                                                       float*                     E_lj)
 {
-    float c6grid, inv_r6_nm, cr2, expmcr2, poly;
-    float sigma, sigma2, epsilon;
-
     /* sigma and epsilon are scaled to give 6*C6 */
-    sigma = nbfp_comb_climg2d[2 * typei] + nbfp_comb_climg2d[2 * typej];
+    const float sigma = nbfp_comb_climg2d[2 * typei] + nbfp_comb_climg2d[2 * typej];
 
-    epsilon = nbfp_comb_climg2d[2 * typei + 1] * nbfp_comb_climg2d[2 * typej + 1];
+    const float epsilon = nbfp_comb_climg2d[2 * typei + 1] * nbfp_comb_climg2d[2 * typej + 1];
 
-    sigma2 = sigma * sigma;
-    c6grid = epsilon * sigma2 * sigma2 * sigma2;
+    const float sigma2 = sigma * sigma;
+    const float c6grid = epsilon * sigma2 * sigma2 * sigma2;
 
     /* Recalculate inv_r6 without exclusion mask */
-    inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
-    cr2       = lje_coeff2 * r2;
-    expmcr2   = exp(-cr2);
-    poly      = 1.0F + cr2 + HALF_F * cr2 * cr2;
+    const float inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
+    const float cr2       = lje_coeff2 * r2;
+    const float expmcr2   = exp(-cr2);
+    const float poly      = 1.0F + cr2 + HALF_F * cr2 * cr2;
 
     /* Subtract the grid force from the total LJ force */
     *F_invr += c6grid * (inv_r6_nm - expmcr2 * (inv_r6_nm * poly + lje_coeff6_6)) * inv_r2;
 
     if (with_E_lj)
     {
-        float sh_mask;
 
         /* Shift should be applied only to real LJ pairs */
-        sh_mask = nbparam->sh_lj_ewald * int_bit;
+        const float sh_mask = nbparam->sh_lj_ewald * int_bit;
         *E_lj += ONE_SIXTH_F * c6grid * (inv_r6_nm * (1.0F - expmcr2 * poly) + sh_mask);
     }
 }
@@ -594,24 +582,21 @@ gmx_opencl_inline float pmecorrF(float z2)
     const float FD1 = 0.50736591960530292870F;
     const float FD0 = 1.0F;
 
-    float z4;
-    float polyFN0, polyFN1, polyFD0, polyFD1;
-
-    z4 = z2 * z2;
+    const float z4 = z2 * z2;
 
-    polyFD0 = FD4 * z4 + FD2;
-    polyFD1 = FD3 * z4 + FD1;
-    polyFD0 = polyFD0 * z4 + FD0;
-    polyFD0 = polyFD1 * z2 + polyFD0;
+    float polyFD0 = FD4 * z4 + FD2;
+    float polyFD1 = FD3 * z4 + FD1;
+    polyFD0       = polyFD0 * z4 + FD0;
+    polyFD0       = polyFD1 * z2 + polyFD0;
 
     polyFD0 = 1.0F / polyFD0;
 
-    polyFN0 = FN6 * z4 + FN4;
-    polyFN1 = FN5 * z4 + FN3;
-    polyFN0 = polyFN0 * z4 + FN2;
-    polyFN1 = polyFN1 * z4 + FN1;
-    polyFN0 = polyFN0 * z4 + FN0;
-    polyFN0 = polyFN1 * z2 + polyFN0;
+    float polyFN0 = FN6 * z4 + FN4;
+    float polyFN1 = FN5 * z4 + FN3;
+    polyFN0       = polyFN0 * z4 + FN2;
+    polyFN1       = polyFN1 * z4 + FN1;
+    polyFN0       = polyFN0 * z4 + FN0;
+    polyFN0       = polyFN1 * z2 + polyFN0;
 
     return polyFN0 * polyFD0;
 }
@@ -868,12 +853,10 @@ gmx_opencl_inline void reduce_energy_pow2(volatile __local float*  buf,
                                           volatile __global float* e_el,
                                           int                      tidx)
 {
-    int j;
-
     int i = WARP_SIZE / 2;
 
     /* Can't just use i as loop variable because than nvcc refuses to unroll. */
-    for (j = WARP_SIZE_LOG2 - 1; j > 0; j--)
+    for (int j = WARP_SIZE_LOG2 - 1; j > 0; j--)
     {
         if (tidx < i)
         {