Fix clang-tidy v10 warnigns in OpenCL kernels

author Szilárd Páll <pall.szilard@gmail.com>

Tue, 27 Oct 2020 14:13:45 +0000 (14:13 +0000)

committer Mark Abraham <mark.j.abraham@gmail.com>

Tue, 27 Oct 2020 14:13:45 +0000 (14:13 +0000)
author Szilárd Páll <pall.szilard@gmail.com>
Tue, 27 Oct 2020 14:13:45 +0000 (14:13 +0000)
committer Mark Abraham <mark.j.abraham@gmail.com>
Tue, 27 Oct 2020 14:13:45 +0000 (14:13 +0000)
diff --git a/src/gromacs/ewald/pme_gather.clh b/src/gromacs/ewald/pme_gather.clh

index 8c7106acf9138fce11e7e6d51d45a2263026424e..98cac0f2824c4ba445485ba3f167e2f93544dea4 100644 (file)
--- a/src/gromacs/ewald/pme_gather.clh
+++ b/src/gromacs/ewald/pme_gather.clh
@@ -49,8 +49,8 @@
   *  \author Aleksei Iupinov <a.yupinov@gmail.com>
   */
  
   *  \author Aleksei Iupinov <a.yupinov@gmail.com>
   */
  
-#include "pme_gpu_types.h"
  #include "pme_gpu_calculate_splines.clh"
  #include "pme_gpu_calculate_splines.clh"
+#include "pme_gpu_types.h"
  
  #ifndef COMPILE_GATHER_HELPERS_ONCE
  #    define COMPILE_GATHER_HELPERS_ONCE
  
  #ifndef COMPILE_GATHER_HELPERS_ONCE
  #    define COMPILE_GATHER_HELPERS_ONCE
@@ -442,9 +442,9 @@ __kernel void CUSTOMIZED_KERNEL_NAME(pme_gather_kernel)(const struct PmeOpenCLKe
      if (numGrids == 2)
      {
          barrier(CLK_LOCAL_MEM_FENCE);
      if (numGrids == 2)
      {
          barrier(CLK_LOCAL_MEM_FENCE);
-        fx          = 0.0f;
-        fy          = 0.0f;
-        fz          = 0.0f;
+        fx          = 0.0F;
+        fy          = 0.0F;
+        fz          = 0.0F;
          chargeCheck = pme_gpu_check_atom_charge(gm_coefficientsB[atomIndexGlobal]);
          if (chargeCheck)
          {
          chargeCheck = pme_gpu_check_atom_charge(gm_coefficientsB[atomIndexGlobal]);
          if (chargeCheck)
          {
@@ -473,8 +473,8 @@ __kernel void CUSTOMIZED_KERNEL_NAME(pme_gather_kernel)(const struct PmeOpenCLKe
  #pragma unroll
              for (int i = 0; i < numIter; i++)
              {
  #pragma unroll
              for (int i = 0; i < numIter; i++)
              {
-                const int outputIndexLocal  = i * iterThreads + threadLocalId;
-                const int outputIndexGlobal = get_group_id(XX) * blockForcesSize + outputIndexLocal;
+                const int outputIndexLocal = i * iterThreads + threadLocalId;
+                const int outputIndexGlobal = (int)get_group_id(XX) * blockForcesSize + outputIndexLocal;
                  const float outputForceComponent = sm_forces[outputIndexLocal];
                  gm_forces[outputIndexGlobal] += outputForceComponent;
              }
                  const float outputForceComponent = sm_forces[outputIndexLocal];
                  gm_forces[outputIndexGlobal] += outputForceComponent;
              }
diff --git a/src/gromacs/ewald/pme_solve.clh b/src/gromacs/ewald/pme_solve.clh

index 0c21512b6fbb28a3640af598c31f8e8c545a4ced..ae96f584d531891b3d565e438e2781c93f8e3c44 100644 (file)
--- a/src/gromacs/ewald/pme_solve.clh
+++ b/src/gromacs/ewald/pme_solve.clh
@@ -70,9 +70,9 @@ __kernel void CUSTOMIZED_KERNEL_NAME(pme_solve_kernel)(const struct PmeOpenCLKer
                                                         __global float2* __restrict__ gm_grid)
  {
      /* This kernel supports 2 different grid dimension orderings: YZX and XYZ */
                                                         __global float2* __restrict__ gm_grid)
  {
      /* This kernel supports 2 different grid dimension orderings: YZX and XYZ */
-    int majorDim;
-    int middleDim;
-    int minorDim;
+    int majorDim  = 0;
+    int middleDim = 0;
+    int minorDim  = 0;
      if (gridOrdering == YZX)
      {
          majorDim  = YY;
      if (gridOrdering == YZX)
      {
          majorDim  = YY;
@@ -163,9 +163,9 @@ __kernel void CUSTOMIZED_KERNEL_NAME(pme_solve_kernel)(const struct PmeOpenCLKer
          /* We should skip the k-space point (0,0,0) */
          const bool notZeroPoint = (kMinor > 0) | (kMajor > 0) | (kMiddle > 0);
  
          /* We should skip the k-space point (0,0,0) */
          const bool notZeroPoint = (kMinor > 0) | (kMajor > 0) | (kMiddle > 0);
  
-        float mX;
-        float mY;
-        float mZ;
+        float mX = 0.0F;
+        float mY = 0.0F;
+        float mZ = 0.0F;
          if (gridOrdering == YZX)
          {
              mX = mMinor;
          if (gridOrdering == YZX)
          {
              mX = mMinor;
diff --git a/src/gromacs/ewald/pme_spread.clh b/src/gromacs/ewald/pme_spread.clh

index 3342120d51d46a67528ffebe718ed22e80f763bf..9e887a9e6efb7fe14eab0007065baf83b7f46ac4 100644 (file)
--- a/src/gromacs/ewald/pme_spread.clh
+++ b/src/gromacs/ewald/pme_spread.clh
@@ -53,8 +53,8 @@
  
  #include "gromacs/gpu_utils/vectype_ops.clh"
  
  
  #include "gromacs/gpu_utils/vectype_ops.clh"
  
-#include "pme_gpu_types.h"
  #include "pme_gpu_calculate_splines.clh"
  #include "pme_gpu_calculate_splines.clh"
+#include "pme_gpu_types.h"
  
  /*
   * This define affects the spline calculation behaviour in the kernel.
  
  /*
   * This define affects the spline calculation behaviour in the kernel.
@@ -180,11 +180,10 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern
          /* Indices interpolation */
          if (orderIndex == 0)
          {
          /* Indices interpolation */
          if (orderIndex == 0)
          {
-            int          tableIndex;
-            int          tInt;
-            float        n;
-            float        t;
-            const float3 x = vload3(atomIndexLocal, sm_coordinates);
+            int          tableIndex = 0;
+            float        n          = 0.0F;
+            float        t          = 0.0F;
+            const float3 x          = vload3(atomIndexLocal, sm_coordinates);
  
              /* Accessing fields in fshOffset/nXYZ/recipbox/... with dimIndex offset
               * puts them into local memory(!) instead of accessing the constant memory directly.
  
              /* Accessing fields in fshOffset/nXYZ/recipbox/... with dimIndex offset
               * puts them into local memory(!) instead of accessing the constant memory directly.
@@ -225,7 +224,7 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern
  
              /* Fractional coordinates along box vectors, adding a positive shift to ensure t is positive for triclinic boxes */
              t                                 = (t + shift) * n;
  
              /* Fractional coordinates along box vectors, adding a positive shift to ensure t is positive for triclinic boxes */
              t                                 = (t + shift) * n;
-            tInt                              = (int)t;
+            const int tInt                    = (int)t;
              sm_fractCoords[sharedMemoryIndex] = t - (float)tInt;
              tableIndex += tInt;
              assert(tInt >= 0);
              sm_fractCoords[sharedMemoryIndex] = t - (float)tInt;
              tableIndex += tInt;
              assert(tInt >= 0);
@@ -241,7 +240,6 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern
          const int chargeCheck = pme_gpu_check_atom_charge(sm_coefficients[atomIndexLocal]);
          if (chargeCheck)
          {
          const int chargeCheck = pme_gpu_check_atom_charge(sm_coefficients[atomIndexLocal]);
          if (chargeCheck)
          {
-            float div;
              int o = orderIndex; // This is an index that is set once for PME_GPU_PARALLEL_SPLINE == 1
  
              const float dr = sm_fractCoords[sharedMemoryIndex];
              int o = orderIndex; // This is an index that is set once for PME_GPU_PARALLEL_SPLINE == 1
  
              const float dr = sm_fractCoords[sharedMemoryIndex];
@@ -255,7 +253,7 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern
  #    pragma unroll order
              for (int k = 3; k < order; k++)
              {
  #    pragma unroll order
              for (int k = 3; k < order; k++)
              {
-                div                     = 1.0F / ((float)k - 1.0F);
+                const float div         = 1.0F / ((float)k - 1.0F);
                  *SPLINE_DATA_PTR(k - 1) = div * dr * SPLINE_DATA(k - 2);
  #    pragma unroll
                  for (int l = 1; l < (k - 1); l++)
                  *SPLINE_DATA_PTR(k - 1) = div * dr * SPLINE_DATA(k - 2);
  #    pragma unroll
                  for (int l = 1; l < (k - 1); l++)
@@ -287,7 +285,7 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern
                  gm_dtheta[thetaGlobalIndex] = dtheta;
              }
  
                  gm_dtheta[thetaGlobalIndex] = dtheta;
              }
  
-            div                         = 1.0F / (order - 1.0F);
+            const float div             = 1.0F / (order - 1.0F);
              *SPLINE_DATA_PTR(order - 1) = div * dr * SPLINE_DATA(order - 2);
  #    pragma unroll
              for (int k = 1; k < (order - 1); k++)
              *SPLINE_DATA_PTR(order - 1) = div * dr * SPLINE_DATA(order - 2);
  #    pragma unroll
              for (int k = 1; k < (order - 1); k++)
diff --git a/src/gromacs/nbnxm/opencl/CMakeLists.txt b/src/gromacs/nbnxm/opencl/CMakeLists.txt

index 69d86e96ba2bcbaa229bef10f3b5acd7e369af94..dab11de27837bc842a8dafe168f84117ef0b6ea6 100644 (file)
--- a/src/gromacs/nbnxm/opencl/CMakeLists.txt
+++ b/src/gromacs/nbnxm/opencl/CMakeLists.txt
@@ -79,7 +79,7 @@ foreach(ELEC_DEF IN LISTS ELEC_DEFS)
                  -Xclang -finclude-default-header  -D_${VENDOR}_SOURCE_
                  -DGMX_OCL_FASTGEN ${ELEC_DEF} ${VDW_DEF}
                  -Dc_nbnxnGpuClusterSize=${CLUSTER_SIZE}
                  -Xclang -finclude-default-header  -D_${VENDOR}_SOURCE_
                  -DGMX_OCL_FASTGEN ${ELEC_DEF} ${VDW_DEF}
                  -Dc_nbnxnGpuClusterSize=${CLUSTER_SIZE}
-                -Dc_nbnxnMinDistanceSquared=3.82e-07F
+                -DNBNXM_MIN_DISTANCE_SQUARED_VALUE_FLOAT=3.82e-07
                  -Dc_nbnxnGpuNumClusterPerSupercluster=8
                  -Dc_nbnxnGpuJgroupSize=4
                  -DIATYPE_SHMEM
                  -Dc_nbnxnGpuNumClusterPerSupercluster=8
                  -Dc_nbnxnGpuJgroupSize=4
                  -DIATYPE_SHMEM
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp

index 40eb905184bd1c5ae2405639f5a288f76b818b78..18c583937b6e50caf78c994d0ea3c59461b0b4b3 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp
@@ -187,10 +187,13 @@ void nbnxn_gpu_compile_kernels(NbnxmGpu* nb)
          /* Here we pass macros and static const/constexpr int variables defined
           * in include files outside the opencl as macros, to avoid
           * including those files in the plain-C JIT compilation that happens
          /* Here we pass macros and static const/constexpr int variables defined
           * in include files outside the opencl as macros, to avoid
           * including those files in the plain-C JIT compilation that happens
-         * at runtime. */
+         * at runtime.
+         * Note that we need to re-add the the suffix to the floating point literals
+         * passed the to the kernel to avoid type ambiguity.
+         */
          extraDefines += gmx::formatString(
                  " -Dc_nbnxnGpuClusterSize=%d"
          extraDefines += gmx::formatString(
                  " -Dc_nbnxnGpuClusterSize=%d"
-                " -Dc_nbnxnMinDistanceSquared=%g"
+                " -DNBNXM_MIN_DISTANCE_SQUARED_VALUE_FLOAT=%g"
                  " -Dc_nbnxnGpuNumClusterPerSupercluster=%d"
                  " -Dc_nbnxnGpuJgroupSize=%d"
                  "%s",
                  " -Dc_nbnxnGpuNumClusterPerSupercluster=%d"
                  " -Dc_nbnxnGpuJgroupSize=%d"
                  "%s",
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel.clh b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel.clh

index b238843a91efe632b73285eca77573db6689c97e..60242b0ba87fb3fb6e494d67d37eb6f62ba9f119 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel.clh
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel.clh
@@ -415,19 +415,18 @@ __kernel void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_opencl)
                                  const float c12    = ljcp_i.y * ljcp_j.y;
  #    else
                                  /* LJ 2^(1/6)*sigma and 12*epsilon */
                                  const float c12    = ljcp_i.y * ljcp_j.y;
  #    else
                                  /* LJ 2^(1/6)*sigma and 12*epsilon */
-                                const float sigma   = ljcp_i.x + ljcp_j.x;
-                                const float epsilon = ljcp_i.y * ljcp_j.y;
+                                const float  sigma   = ljcp_i.x + ljcp_j.x;
+                                const float  epsilon = ljcp_i.y * ljcp_j.y;
  #        if defined CALC_ENERGIES || defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH
  #        if defined CALC_ENERGIES || defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH
-                                float       c6, c12;
-                                convert_sigma_epsilon_to_c6_c12(sigma, epsilon, &c6, &c12);
+                                const float2 c6c12 = convert_sigma_epsilon_to_c6_c12(sigma, epsilon);
+                                const float  c6    = c6c12.x;
+                                const float  c12   = c6c12.y;
  #        endif
  #    endif /* LJ_COMB_GEOM */
  #endif     /* LJ_COMB */
  
                                  // Ensure distance do not become so small that r^-12 overflows.
  #        endif
  #    endif /* LJ_COMB_GEOM */
  #endif     /* LJ_COMB */
  
                                  // Ensure distance do not become so small that r^-12 overflows.
-                                // Cast to float to ensure the correct built-in max() function
-                                // is called.
-                                r2 = max(r2, (float)c_nbnxnMinDistanceSquared);
+                                r2 = max(r2, c_nbnxnMinDistanceSquared);
  
                                  const float inv_r  = rsqrt(r2);
                                  const float inv_r2 = inv_r * inv_r;
  
                                  const float inv_r  = rsqrt(r2);
                                  const float inv_r2 = inv_r * inv_r;
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_pruneonly.clh b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_pruneonly.clh

index 16dd0c4962dbf0b853403b9ec955b6029b1d4e16..dfafdcbf28d2ca2adf115661b94a47870a3f1078 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_pruneonly.clh
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_pruneonly.clh
@@ -166,7 +166,7 @@ nbnxn_kernel_prune_rolling_opencl
      /* loop over the j clusters = seen by any of the atoms in the current super-cluster */
      for (int j4 = cij4_start + tidxz; j4 < cij4_end; j4 += NTHREAD_Z)
      {
      /* loop over the j clusters = seen by any of the atoms in the current super-cluster */
      for (int j4 = cij4_start + tidxz; j4 < cij4_end; j4 += NTHREAD_Z)
      {
-        unsigned int imaskFull, imaskCheck, imaskNew;
+        unsigned int imaskFull = 0, imaskCheck = 0, imaskNew = 0;
  
          if (haveFreshList)
          {
  
          if (haveFreshList)
          {
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_utils.clh b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_utils.clh

index 7d70a5908abf11b4595df527e7560e7e1fdb38a9..e8702e7c74ba2c3ea55469232c739a989efe3ee6 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_utils.clh
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_utils.clh
@@ -234,6 +234,9 @@ typedef struct
  /*! i-cluster interaction mask for a super-cluster with all c_nbnxnGpuNumClusterPerSupercluster bits set */
  __constant unsigned supercl_interaction_mask = ((1U << c_nbnxnGpuNumClusterPerSupercluster) - 1U);
  
  /*! i-cluster interaction mask for a super-cluster with all c_nbnxnGpuNumClusterPerSupercluster bits set */
  __constant unsigned supercl_interaction_mask = ((1U << c_nbnxnGpuNumClusterPerSupercluster) - 1U);
  
+/*! Minimum single precision threshold for r^2 to avoid r^-12 overflow. */
+__constant float c_nbnxnMinDistanceSquared = NBNXM_MIN_DISTANCE_SQUARED_VALUE_FLOAT;
+
  gmx_opencl_inline void preloadCj4Generic(__local int*        sm_cjPreload,
                                           const __global int* gm_cj,
                                           int                 tidxi,
  gmx_opencl_inline void preloadCj4Generic(__local int*        sm_cjPreload,
                                           const __global int* gm_cj,
                                           int                 tidxi,
@@ -327,14 +330,14 @@ loadCj(CjType cjs, const __global int gmx_unused* gm_cj, int jm, int gmx_unused
  }
  
  /*! Convert LJ sigma,epsilon parameters to C6,C12. */
  }
  
  /*! Convert LJ sigma,epsilon parameters to C6,C12. */
-gmx_opencl_inline void convert_sigma_epsilon_to_c6_c12(const float sigma, const float epsilon, float* c6, float* c12)
-{
-    float sigma2, sigma6;
-
-    sigma2 = sigma * sigma;
-    sigma6 = sigma2 * sigma2 * sigma2;
-    *c6    = epsilon * sigma6;
-    *c12   = *c6 * sigma6;
+gmx_opencl_inline float2 convert_sigma_epsilon_to_c6_c12(const float sigma, const float epsilon)
+{
+    const float  sigma2 = sigma * sigma;
+    const float  sigma6 = sigma2 * sigma2 * sigma2;
+    const float  c6     = epsilon * sigma6;
+    const float2 c6c12  = (float2)(c6,           /* c6 */
+                                  c6 * sigma6); /* c12 */
+    return c6c12;
  }
  
  
  }
  
  
@@ -346,17 +349,15 @@ gmx_opencl_inline void calculate_force_switch_F(const cl_nbparam_params_t* nbpar
                                                  float                      r2,
                                                  float*                     F_invr)
  {
                                                  float                      r2,
                                                  float*                     F_invr)
  {
-    float r, r_switch;
-
      /* force switch constants */
      /* force switch constants */
-    float disp_shift_V2 = nbparam->dispersion_shift.c2;
-    float disp_shift_V3 = nbparam->dispersion_shift.c3;
-    float repu_shift_V2 = nbparam->repulsion_shift.c2;
-    float repu_shift_V3 = nbparam->repulsion_shift.c3;
+    const float disp_shift_V2 = nbparam->dispersion_shift.c2;
+    const float disp_shift_V3 = nbparam->dispersion_shift.c3;
+    const float repu_shift_V2 = nbparam->repulsion_shift.c2;
+    const float repu_shift_V3 = nbparam->repulsion_shift.c3;
  
  
-    r        = r2 * inv_r;
-    r_switch = r - nbparam->rvdw_switch;
-    r_switch = r_switch >= 0.0F ? r_switch : 0.0F;
+    const float r        = r2 * inv_r;
+    float       r_switch = r - nbparam->rvdw_switch;
+    r_switch             = r_switch >= 0.0F ? r_switch : 0.0F;
  
      *F_invr += -c6 * (disp_shift_V2 + disp_shift_V3 * r_switch) * r_switch * r_switch * inv_r
                 + c12 * (repu_shift_V2 + repu_shift_V3 * r_switch) * r_switch * r_switch * inv_r;
  
      *F_invr += -c6 * (disp_shift_V2 + disp_shift_V3 * r_switch) * r_switch * r_switch * inv_r
                 + c12 * (repu_shift_V2 + repu_shift_V3 * r_switch) * r_switch * r_switch * inv_r;
@@ -371,22 +372,20 @@ gmx_opencl_inline void calculate_force_switch_F_E(const cl_nbparam_params_t* nbp
                                                    float*                     F_invr,
                                                    float*                     E_lj)
  {
                                                    float*                     F_invr,
                                                    float*                     E_lj)
  {
-    float r, r_switch;
-
      /* force switch constants */
      /* force switch constants */
-    float disp_shift_V2 = nbparam->dispersion_shift.c2;
-    float disp_shift_V3 = nbparam->dispersion_shift.c3;
-    float repu_shift_V2 = nbparam->repulsion_shift.c2;
-    float repu_shift_V3 = nbparam->repulsion_shift.c3;
+    const float disp_shift_V2 = nbparam->dispersion_shift.c2;
+    const float disp_shift_V3 = nbparam->dispersion_shift.c3;
+    const float repu_shift_V2 = nbparam->repulsion_shift.c2;
+    const float repu_shift_V3 = nbparam->repulsion_shift.c3;
  
  
-    float disp_shift_F2 = nbparam->dispersion_shift.c2 / 3;
-    float disp_shift_F3 = nbparam->dispersion_shift.c3 / 4;
-    float repu_shift_F2 = nbparam->repulsion_shift.c2 / 3;
-    float repu_shift_F3 = nbparam->repulsion_shift.c3 / 4;
+    const float disp_shift_F2 = nbparam->dispersion_shift.c2 / 3;
+    const float disp_shift_F3 = nbparam->dispersion_shift.c3 / 4;
+    const float repu_shift_F2 = nbparam->repulsion_shift.c2 / 3;
+    const float repu_shift_F3 = nbparam->repulsion_shift.c3 / 4;
  
  
-    r        = r2 * inv_r;
-    r_switch = r - nbparam->rvdw_switch;
-    r_switch = r_switch >= 0.0F ? r_switch : 0.0F;
+    const float r        = r2 * inv_r;
+    float       r_switch = r - nbparam->rvdw_switch;
+    r_switch             = r_switch >= 0.0F ? r_switch : 0.0F;
  
      *F_invr += -c6 * (disp_shift_V2 + disp_shift_V3 * r_switch) * r_switch * r_switch * inv_r
                 + c12 * (repu_shift_V2 + repu_shift_V3 * r_switch) * r_switch * r_switch * inv_r;
  
      *F_invr += -c6 * (disp_shift_V2 + disp_shift_V3 * r_switch) * r_switch * r_switch * inv_r
                 + c12 * (repu_shift_V2 + repu_shift_V3 * r_switch) * r_switch * r_switch * inv_r;
@@ -401,25 +400,24 @@ gmx_opencl_inline void calculate_potential_switch_F(const cl_nbparam_params_t* n
                                                      float*                     F_invr,
                                                      const float*               E_lj)
  {
                                                      float*                     F_invr,
                                                      const float*               E_lj)
  {
-    float r, r_switch;
-    float sw, dsw;
-
      /* potential switch constants */
      /* potential switch constants */
-    float switch_V3 = nbparam->vdw_switch.c3;
-    float switch_V4 = nbparam->vdw_switch.c4;
-    float switch_V5 = nbparam->vdw_switch.c5;
-    float switch_F2 = nbparam->vdw_switch.c3;
-    float switch_F3 = nbparam->vdw_switch.c4;
-    float switch_F4 = nbparam->vdw_switch.c5;
+    const float switch_V3 = nbparam->vdw_switch.c3;
+    const float switch_V4 = nbparam->vdw_switch.c4;
+    const float switch_V5 = nbparam->vdw_switch.c5;
+    const float switch_F2 = nbparam->vdw_switch.c3;
+    const float switch_F3 = nbparam->vdw_switch.c4;
+    const float switch_F4 = nbparam->vdw_switch.c5;
  
  
-    r        = r2 * inv_r;
-    r_switch = r - nbparam->rvdw_switch;
+    const float r        = r2 * inv_r;
+    const float r_switch = r - nbparam->rvdw_switch;
  
      /* Unlike in the F+E kernel, conditional is faster here */
      if (r_switch > 0.0F)
      {
  
      /* Unlike in the F+E kernel, conditional is faster here */
      if (r_switch > 0.0F)
      {
-        sw  = 1.0F + (switch_V3 + (switch_V4 + switch_V5 * r_switch) * r_switch) * r_switch * r_switch * r_switch;
-        dsw = (switch_F2 + (switch_F3 + switch_F4 * r_switch) * r_switch) * r_switch * r_switch;
+        const float sw = 1.0F
+                         + (switch_V3 + (switch_V4 + switch_V5 * r_switch) * r_switch) * r_switch
+                                   * r_switch * r_switch;
+        const float dsw = (switch_F2 + (switch_F3 + switch_F4 * r_switch) * r_switch) * r_switch * r_switch;
  
          *F_invr = (*F_invr) * sw - inv_r * (*E_lj) * dsw;
      }
  
          *F_invr = (*F_invr) * sw - inv_r * (*E_lj) * dsw;
      }
@@ -432,24 +430,22 @@ gmx_opencl_inline void calculate_potential_switch_F_E(const cl_nbparam_params_t*
                                                        float*                     F_invr,
                                                        float*                     E_lj)
  {
                                                        float*                     F_invr,
                                                        float*                     E_lj)
  {
-    float r, r_switch;
-    float sw, dsw;
-
      /* potential switch constants */
      /* potential switch constants */
-    float switch_V3 = nbparam->vdw_switch.c3;
-    float switch_V4 = nbparam->vdw_switch.c4;
-    float switch_V5 = nbparam->vdw_switch.c5;
-    float switch_F2 = nbparam->vdw_switch.c3;
-    float switch_F3 = nbparam->vdw_switch.c4;
-    float switch_F4 = nbparam->vdw_switch.c5;
+    const float switch_V3 = nbparam->vdw_switch.c3;
+    const float switch_V4 = nbparam->vdw_switch.c4;
+    const float switch_V5 = nbparam->vdw_switch.c5;
+    const float switch_F2 = nbparam->vdw_switch.c3;
+    const float switch_F3 = nbparam->vdw_switch.c4;
+    const float switch_F4 = nbparam->vdw_switch.c5;
  
  
-    r        = r2 * inv_r;
-    r_switch = r - nbparam->rvdw_switch;
-    r_switch = r_switch >= 0.0F ? r_switch : 0.0F;
+    const float r        = r2 * inv_r;
+    float       r_switch = r - nbparam->rvdw_switch;
+    r_switch             = r_switch >= 0.0F ? r_switch : 0.0F;
  
      /* Unlike in the F-only kernel, masking is faster here */
  
      /* Unlike in the F-only kernel, masking is faster here */
-    sw  = 1.0F + (switch_V3 + (switch_V4 + switch_V5 * r_switch) * r_switch) * r_switch * r_switch * r_switch;
-    dsw = (switch_F2 + (switch_F3 + switch_F4 * r_switch) * r_switch) * r_switch * r_switch;
+    const float sw =
+            1.0F + (switch_V3 + (switch_V4 + switch_V5 * r_switch) * r_switch) * r_switch * r_switch * r_switch;
+    const float dsw = (switch_F2 + (switch_F3 + switch_F4 * r_switch) * r_switch) * r_switch * r_switch;
  
      *F_invr = (*F_invr) * sw - inv_r * (*E_lj) * dsw;
      *E_lj *= sw;
  
      *F_invr = (*F_invr) * sw - inv_r * (*E_lj) * dsw;
      *E_lj *= sw;
@@ -467,15 +463,13 @@ gmx_opencl_inline void calculate_lj_ewald_comb_geom_F(__constant const float* nb
                                                        float                   lje_coeff6_6,
                                                        float*                  F_invr)
  {
                                                        float                   lje_coeff6_6,
                                                        float*                  F_invr)
  {
-    float c6grid, inv_r6_nm, cr2, expmcr2, poly;
-
-    c6grid = nbfp_comb_climg2d[2 * typei] * nbfp_comb_climg2d[2 * typej];
+    const float c6grid = nbfp_comb_climg2d[2 * typei] * nbfp_comb_climg2d[2 * typej];
  
      /* Recalculate inv_r6 without exclusion mask */
  
      /* Recalculate inv_r6 without exclusion mask */
-    inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
-    cr2       = lje_coeff2 * r2;
-    expmcr2   = exp(-cr2);
-    poly      = 1.0F + cr2 + HALF_F * cr2 * cr2;
+    const float inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
+    const float cr2       = lje_coeff2 * r2;
+    const float expmcr2   = exp(-cr2);
+    const float poly      = 1.0F + cr2 + HALF_F * cr2 * cr2;
  
      /* Subtract the grid force from the total LJ force */
      *F_invr += c6grid * (inv_r6_nm - expmcr2 * (inv_r6_nm * poly + lje_coeff6_6)) * inv_r2;
  
      /* Subtract the grid force from the total LJ force */
      *F_invr += c6grid * (inv_r6_nm - expmcr2 * (inv_r6_nm * poly + lje_coeff6_6)) * inv_r2;
@@ -496,21 +490,19 @@ gmx_opencl_inline void calculate_lj_ewald_comb_geom_F_E(__constant const float*
                                                          float*                     F_invr,
                                                          float*                     E_lj)
  {
                                                          float*                     F_invr,
                                                          float*                     E_lj)
  {
-    float c6grid, inv_r6_nm, cr2, expmcr2, poly, sh_mask;
-
-    c6grid = nbfp_comb_climg2d[2 * typei] * nbfp_comb_climg2d[2 * typej];
+    const float c6grid = nbfp_comb_climg2d[2 * typei] * nbfp_comb_climg2d[2 * typej];
  
      /* Recalculate inv_r6 without exclusion mask */
  
      /* Recalculate inv_r6 without exclusion mask */
-    inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
-    cr2       = lje_coeff2 * r2;
-    expmcr2   = exp(-cr2);
-    poly      = 1.0F + cr2 + HALF_F * cr2 * cr2;
+    const float inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
+    const float cr2       = lje_coeff2 * r2;
+    const float expmcr2   = exp(-cr2);
+    const float poly      = 1.0F + cr2 + HALF_F * cr2 * cr2;
  
      /* Subtract the grid force from the total LJ force */
      *F_invr += c6grid * (inv_r6_nm - expmcr2 * (inv_r6_nm * poly + lje_coeff6_6)) * inv_r2;
  
      /* Shift should be applied only to real LJ pairs */
  
      /* Subtract the grid force from the total LJ force */
      *F_invr += c6grid * (inv_r6_nm - expmcr2 * (inv_r6_nm * poly + lje_coeff6_6)) * inv_r2;
  
      /* Shift should be applied only to real LJ pairs */
-    sh_mask = nbparam->sh_lj_ewald * int_bit;
+    const float sh_mask = nbparam->sh_lj_ewald * int_bit;
      *E_lj += ONE_SIXTH_F * c6grid * (inv_r6_nm * (1.0F - expmcr2 * poly) + sh_mask);
  }
  
      *E_lj += ONE_SIXTH_F * c6grid * (inv_r6_nm * (1.0F - expmcr2 * poly) + sh_mask);
  }
  
@@ -532,32 +524,28 @@ gmx_opencl_inline void calculate_lj_ewald_comb_LB_F_E(__constant const float*
                                                        float*                     F_invr,
                                                        float*                     E_lj)
  {
                                                        float*                     F_invr,
                                                        float*                     E_lj)
  {
-    float c6grid, inv_r6_nm, cr2, expmcr2, poly;
-    float sigma, sigma2, epsilon;
-
      /* sigma and epsilon are scaled to give 6*C6 */
      /* sigma and epsilon are scaled to give 6*C6 */
-    sigma = nbfp_comb_climg2d[2 * typei] + nbfp_comb_climg2d[2 * typej];
+    const float sigma = nbfp_comb_climg2d[2 * typei] + nbfp_comb_climg2d[2 * typej];
  
  
-    epsilon = nbfp_comb_climg2d[2 * typei + 1] * nbfp_comb_climg2d[2 * typej + 1];
+    const float epsilon = nbfp_comb_climg2d[2 * typei + 1] * nbfp_comb_climg2d[2 * typej + 1];
  
  
-    sigma2 = sigma * sigma;
-    c6grid = epsilon * sigma2 * sigma2 * sigma2;
+    const float sigma2 = sigma * sigma;
+    const float c6grid = epsilon * sigma2 * sigma2 * sigma2;
  
      /* Recalculate inv_r6 without exclusion mask */
  
      /* Recalculate inv_r6 without exclusion mask */
-    inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
-    cr2       = lje_coeff2 * r2;
-    expmcr2   = exp(-cr2);
-    poly      = 1.0F + cr2 + HALF_F * cr2 * cr2;
+    const float inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
+    const float cr2       = lje_coeff2 * r2;
+    const float expmcr2   = exp(-cr2);
+    const float poly      = 1.0F + cr2 + HALF_F * cr2 * cr2;
  
      /* Subtract the grid force from the total LJ force */
      *F_invr += c6grid * (inv_r6_nm - expmcr2 * (inv_r6_nm * poly + lje_coeff6_6)) * inv_r2;
  
      if (with_E_lj)
      {
  
      /* Subtract the grid force from the total LJ force */
      *F_invr += c6grid * (inv_r6_nm - expmcr2 * (inv_r6_nm * poly + lje_coeff6_6)) * inv_r2;
  
      if (with_E_lj)
      {
-        float sh_mask;
  
          /* Shift should be applied only to real LJ pairs */
  
          /* Shift should be applied only to real LJ pairs */
-        sh_mask = nbparam->sh_lj_ewald * int_bit;
+        const float sh_mask = nbparam->sh_lj_ewald * int_bit;
          *E_lj += ONE_SIXTH_F * c6grid * (inv_r6_nm * (1.0F - expmcr2 * poly) + sh_mask);
      }
  }
          *E_lj += ONE_SIXTH_F * c6grid * (inv_r6_nm * (1.0F - expmcr2 * poly) + sh_mask);
      }
  }
@@ -594,24 +582,21 @@ gmx_opencl_inline float pmecorrF(float z2)
      const float FD1 = 0.50736591960530292870F;
      const float FD0 = 1.0F;
  
      const float FD1 = 0.50736591960530292870F;
      const float FD0 = 1.0F;
  
-    float z4;
-    float polyFN0, polyFN1, polyFD0, polyFD1;
-
-    z4 = z2 * z2;
+    const float z4 = z2 * z2;
  
  
-    polyFD0 = FD4 * z4 + FD2;
-    polyFD1 = FD3 * z4 + FD1;
-    polyFD0 = polyFD0 * z4 + FD0;
-    polyFD0 = polyFD1 * z2 + polyFD0;
+    float polyFD0 = FD4 * z4 + FD2;
+    float polyFD1 = FD3 * z4 + FD1;
+    polyFD0       = polyFD0 * z4 + FD0;
+    polyFD0       = polyFD1 * z2 + polyFD0;
  
      polyFD0 = 1.0F / polyFD0;
  
  
      polyFD0 = 1.0F / polyFD0;
  
-    polyFN0 = FN6 * z4 + FN4;
-    polyFN1 = FN5 * z4 + FN3;
-    polyFN0 = polyFN0 * z4 + FN2;
-    polyFN1 = polyFN1 * z4 + FN1;
-    polyFN0 = polyFN0 * z4 + FN0;
-    polyFN0 = polyFN1 * z2 + polyFN0;
+    float polyFN0 = FN6 * z4 + FN4;
+    float polyFN1 = FN5 * z4 + FN3;
+    polyFN0       = polyFN0 * z4 + FN2;
+    polyFN1       = polyFN1 * z4 + FN1;
+    polyFN0       = polyFN0 * z4 + FN0;
+    polyFN0       = polyFN1 * z2 + polyFN0;
  
      return polyFN0 * polyFD0;
  }
  
      return polyFN0 * polyFD0;
  }
@@ -868,12 +853,10 @@ gmx_opencl_inline void reduce_energy_pow2(volatile __local float*  buf,
                                            volatile __global float* e_el,
                                            int                      tidx)
  {
                                            volatile __global float* e_el,
                                            int                      tidx)
  {
-    int j;
-
      int i = WARP_SIZE / 2;
  
      /* Can't just use i as loop variable because than nvcc refuses to unroll. */
      int i = WARP_SIZE / 2;
  
      /* Can't just use i as loop variable because than nvcc refuses to unroll. */
-    for (j = WARP_SIZE_LOG2 - 1; j > 0; j--)
+    for (int j = WARP_SIZE_LOG2 - 1; j > 0; j--)
      {
          if (tidx < i)
          {
      {
          if (tidx < i)
          {
author	Szilárd Páll <pall.szilard@gmail.com>
	Tue, 27 Oct 2020 14:13:45 +0000 (14:13 +0000)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Tue, 27 Oct 2020 14:13:45 +0000 (14:13 +0000)
src/gromacs/ewald/pme_gather.clh		patch \| blob \| history
src/gromacs/ewald/pme_solve.clh		patch \| blob \| history
src/gromacs/ewald/pme_spread.clh		patch \| blob \| history
src/gromacs/nbnxm/opencl/CMakeLists.txt		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel.clh		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_pruneonly.clh		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_utils.clh		patch \| blob \| history