From a0544b0e1089c66829a5c8387db26bddaff77f31 Mon Sep 17 00:00:00 2001
From: Paul Bauer <paul.bauer.q@gmail.com>
Date: Tue, 27 Oct 2020 16:29:24 +0100
Subject: [PATCH] Fix remaining clang tidy issues with OpenCL

Some code blocks still needed cleaning from clang-tidy.

Came up during manual run in preparation for CI job.
---
 src/gromacs/ewald/pme.h                          | 8 ++++----
 src/gromacs/ewald/pme_gpu.cpp                    | 2 +-
 src/gromacs/ewald/pme_gpu_3dfft.h                | 2 +-
 src/gromacs/ewald/pme_gpu_internal.h             | 2 +-
 src/gromacs/gpu_utils/devicebuffer_ocl.h         | 2 +-
 src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp           | 2 +-
 src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp | 4 ++++
 7 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/gromacs/ewald/pme.h b/src/gromacs/ewald/pme.h
index 0408b87f77..29411cad0d 100644
--- a/src/gromacs/ewald/pme.h
+++ b/src/gromacs/ewald/pme.h
@@ -344,7 +344,7 @@ GPU_FUNC_QUALIFIER void pme_gpu_prepare_computation(gmx_pme_t*     GPU_FUNC_ARGU
 GPU_FUNC_QUALIFIER void pme_gpu_launch_spread(gmx_pme_t*            GPU_FUNC_ARGUMENT(pme),
                                               GpuEventSynchronizer* GPU_FUNC_ARGUMENT(xReadyOnDevice),
                                               gmx_wallcycle*        GPU_FUNC_ARGUMENT(wcycle),
-                                              const real GPU_FUNC_ARGUMENT(lambdaQ)) GPU_FUNC_TERM;
+                                              real GPU_FUNC_ARGUMENT(lambdaQ)) GPU_FUNC_TERM;
 
 /*! \brief
  * Launches middle stages of PME (FFT R2C, solving, FFT C2R) either on GPU or on CPU, depending on the run mode.
@@ -367,7 +367,7 @@ pme_gpu_launch_complex_transforms(gmx_pme_t*               GPU_FUNC_ARGUMENT(pme
  */
 GPU_FUNC_QUALIFIER void pme_gpu_launch_gather(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
                                               gmx_wallcycle*   GPU_FUNC_ARGUMENT(wcycle),
-                                              const real GPU_FUNC_ARGUMENT(lambdaQ)) GPU_FUNC_TERM;
+                                              real GPU_FUNC_ARGUMENT(lambdaQ)) GPU_FUNC_TERM;
 
 /*! \brief
  * Attempts to complete PME GPU tasks.
@@ -394,7 +394,7 @@ GPU_FUNC_QUALIFIER bool pme_gpu_try_finish_task(gmx_pme_t*               GPU_FUN
                                                 gmx_wallcycle*           GPU_FUNC_ARGUMENT(wcycle),
                                                 gmx::ForceWithVirial* GPU_FUNC_ARGUMENT(forceWithVirial),
                                                 gmx_enerdata_t*       GPU_FUNC_ARGUMENT(enerd),
-                                                const real            GPU_FUNC_ARGUMENT(lambdaQ),
+                                                real                  GPU_FUNC_ARGUMENT(lambdaQ),
                                                 GpuTaskCompletion GPU_FUNC_ARGUMENT(completionKind))
         GPU_FUNC_TERM_WITH_RETURN(false);
 
@@ -414,7 +414,7 @@ GPU_FUNC_QUALIFIER void pme_gpu_wait_and_reduce(gmx_pme_t*               GPU_FUN
                                                 gmx_wallcycle*           GPU_FUNC_ARGUMENT(wcycle),
                                                 gmx::ForceWithVirial* GPU_FUNC_ARGUMENT(forceWithVirial),
                                                 gmx_enerdata_t*       GPU_FUNC_ARGUMENT(enerd),
-                                                const real GPU_FUNC_ARGUMENT(lambdaQ)) GPU_FUNC_TERM;
+                                                real GPU_FUNC_ARGUMENT(lambdaQ)) GPU_FUNC_TERM;
 
 /*! \brief
  * The PME GPU reinitialization function that is called both at the end of any PME computation and on any load balancing.
diff --git a/src/gromacs/ewald/pme_gpu.cpp b/src/gromacs/ewald/pme_gpu.cpp
index 19215fa90f..fcae02ab69 100644
--- a/src/gromacs/ewald/pme_gpu.cpp
+++ b/src/gromacs/ewald/pme_gpu.cpp
@@ -344,7 +344,7 @@ bool pme_gpu_try_finish_task(gmx_pme_t*               pme,
     // time needed for that checking, but do not yet record that the
     // gather has occured.
     bool           needToSynchronize      = true;
-    constexpr bool c_streamQuerySupported = bool(GMX_GPU_CUDA);
+    constexpr bool c_streamQuerySupported = GMX_GPU_CUDA;
 
     // TODO: implement c_streamQuerySupported with an additional GpuEventSynchronizer per stream (#2521)
     if ((completionKind == GpuTaskCompletion::Check) && c_streamQuerySupported)
diff --git a/src/gromacs/ewald/pme_gpu_3dfft.h b/src/gromacs/ewald/pme_gpu_3dfft.h
index c334d013e3..a39f751bab 100644
--- a/src/gromacs/ewald/pme_gpu_3dfft.h
+++ b/src/gromacs/ewald/pme_gpu_3dfft.h
@@ -75,7 +75,7 @@ public:
      * \param[in] pmeGpu                  The PME GPU structure.
      * \param[in] gridIndex               The index of the grid on which to perform the calculations.
      */
-    GpuParallel3dFft(const PmeGpu* pmeGpu, const int gridIndex);
+    GpuParallel3dFft(const PmeGpu* pmeGpu, int gridIndex);
     /*! \brief Destroys the FFT plans. */
     ~GpuParallel3dFft();
     /*! \brief Performs the FFT transform in given direction
diff --git a/src/gromacs/ewald/pme_gpu_internal.h b/src/gromacs/ewald/pme_gpu_internal.h
index 1220b13984..632557d13e 100644
--- a/src/gromacs/ewald/pme_gpu_internal.h
+++ b/src/gromacs/ewald/pme_gpu_internal.h
@@ -354,7 +354,7 @@ GPU_FUNC_QUALIFIER void pme_gpu_spread(const PmeGpu*         GPU_FUNC_ARGUMENT(p
                                        float**               GPU_FUNC_ARGUMENT(h_grids),
                                        bool                  GPU_FUNC_ARGUMENT(computeSplines),
                                        bool                  GPU_FUNC_ARGUMENT(spreadCharges),
-                                       const real GPU_FUNC_ARGUMENT(lambda)) GPU_FUNC_TERM;
+                                       real GPU_FUNC_ARGUMENT(lambda)) GPU_FUNC_TERM;
 
 /*! \libinternal \brief
  * 3D FFT R2C/C2R routine.
diff --git a/src/gromacs/gpu_utils/devicebuffer_ocl.h b/src/gromacs/gpu_utils/devicebuffer_ocl.h
index b8e047a87d..81501a2b08 100644
--- a/src/gromacs/gpu_utils/devicebuffer_ocl.h
+++ b/src/gromacs/gpu_utils/devicebuffer_ocl.h
@@ -321,7 +321,7 @@ void initParamLookupTable(DeviceBuffer<ValueType>* deviceBuffer,
  * \param[in,out] deviceBuffer  Device buffer to store data in.
  */
 template<typename ValueType>
-void destroyParamLookupTable(DeviceBuffer<ValueType>* deviceBuffer, DeviceTexture& /* deviceTexture*/)
+void destroyParamLookupTable(DeviceBuffer<ValueType>* deviceBuffer, const DeviceTexture& /* deviceTexture*/)
 {
     freeDeviceBuffer(deviceBuffer);
 }
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
index 359683337a..48e8ed03ad 100644
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
@@ -899,7 +899,7 @@ void gpu_launch_cpyback(NbnxmGpu*                nb,
     /* DtoH f */
     GMX_ASSERT(sizeof(*nbatom->out[0].f.data()) == sizeof(float),
                "The host force buffer should be in single precision to match device data size.");
-    copyFromDeviceBuffer(&nbatom->out[0].f.data()[adat_begin * DIM], &adat->f, adat_begin * DIM,
+    copyFromDeviceBuffer(&nbatom->out[0].f[adat_begin * DIM], &adat->f, adat_begin * DIM,
                          adat_len * DIM, deviceStream, GpuApiCallBehavior::Async,
                          bDoTime ? t->xf[aloc].nb_d2h.fetchNextEvent() : nullptr);
 
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
index 19b861db0c..29989c8095 100644
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
@@ -622,15 +622,19 @@ void gpu_free(NbnxmGpu* nb)
     }
 
     /* Free kernels */
+    // NOLINTNEXTLINE(bugprone-sizeof-expression)
     int kernel_count = sizeof(nb->kernel_ener_noprune_ptr) / sizeof(nb->kernel_ener_noprune_ptr[0][0]);
     free_kernels(nb->kernel_ener_noprune_ptr[0], kernel_count);
 
+    // NOLINTNEXTLINE(bugprone-sizeof-expression)
     kernel_count = sizeof(nb->kernel_ener_prune_ptr) / sizeof(nb->kernel_ener_prune_ptr[0][0]);
     free_kernels(nb->kernel_ener_prune_ptr[0], kernel_count);
 
+    // NOLINTNEXTLINE(bugprone-sizeof-expression)
     kernel_count = sizeof(nb->kernel_noener_noprune_ptr) / sizeof(nb->kernel_noener_noprune_ptr[0][0]);
     free_kernels(nb->kernel_noener_noprune_ptr[0], kernel_count);
 
+    // NOLINTNEXTLINE(bugprone-sizeof-expression)
     kernel_count = sizeof(nb->kernel_noener_prune_ptr) / sizeof(nb->kernel_noener_prune_ptr[0][0]);
     free_kernels(nb->kernel_noener_prune_ptr[0], kernel_count);
 
-- 
2.22.0