From 82e7186e10844e81cde43730dd01ba12399dd2ca Mon Sep 17 00:00:00 2001
From: Mark Abraham <mark.j.abraham@gmail.com>
Date: Thu, 17 Jun 2021 20:19:13 +0000
Subject: [PATCH] Fix warnings when clang is the CUDA host compiler

---
 src/gromacs/gpu_utils/cudautils.cuh    |  7 +++---
 src/gromacs/gpu_utils/devicebuffer.cuh | 33 +++++++++-----------------
 2 files changed, 14 insertions(+), 26 deletions(-)
diff --git a/src/gromacs/gpu_utils/cudautils.cuh b/src/gromacs/gpu_utils/cudautils.cuh
index daf05a019d..0c4507d809 100644
--- a/src/gromacs/gpu_utils/cudautils.cuh
+++ b/src/gromacs/gpu_utils/cudautils.cuh
@@ -40,6 +40,7 @@
 
 #include <array>
 #include <string>
+#include <type_traits>
 
 #include "gromacs/gpu_utils/device_stream.h"
 #include "gromacs/gpu_utils/gputraits.cuh"
@@ -239,8 +240,7 @@ void prepareGpuKernelArgument(KernelPtr                          kernel,
                               const CurrentArg*                  argPtr,
                               const RemainingArgs*... otherArgsPtrs)
 {
-    // NOLINTNEXTLINE(google-readability-casting)
-    (*kernelArgsPtr)[argIndex] = (void*)argPtr;
+    (*kernelArgsPtr)[argIndex] = const_cast<void*>(static_cast<const void*>(argPtr));
     prepareGpuKernelArgument(kernel, kernelArgsPtr, argIndex + 1, otherArgsPtrs...);
 }
 
@@ -284,8 +284,7 @@ void launchGpuKernel(void (*kernel)(Args...),
 {
     dim3 blockSize(config.blockSize[0], config.blockSize[1], config.blockSize[2]);
     dim3 gridSize(config.gridSize[0], config.gridSize[1], config.gridSize[2]);
-    // NOLINTNEXTLINE(google-readability-casting)
-    cudaLaunchKernel((void*)kernel,
+    cudaLaunchKernel(reinterpret_cast<void*>(kernel),
                      gridSize,
                      blockSize,
                      const_cast<void**>(kernelArgs.data()),
diff --git a/src/gromacs/gpu_utils/devicebuffer.cuh b/src/gromacs/gpu_utils/devicebuffer.cuh
index 97e9d525b2..9dd29c2923 100644
--- a/src/gromacs/gpu_utils/devicebuffer.cuh
+++ b/src/gromacs/gpu_utils/devicebuffer.cuh
@@ -68,8 +68,7 @@ template<typename ValueType>
 void allocateDeviceBuffer(DeviceBuffer<ValueType>* buffer, size_t numValues, const DeviceContext& /* deviceContext */)
 {
     GMX_ASSERT(buffer, "needs a buffer pointer");
-    // NOLINTNEXTLINE(google-readability-casting)
-    cudaError_t stat = cudaMalloc((void**)(buffer), numValues * sizeof(ValueType));
+    cudaError_t stat = cudaMalloc(buffer, numValues * sizeof(ValueType));
     GMX_RELEASE_ASSERT(
             stat == cudaSuccess,
             ("Allocation of the device buffer failed. " + gmx::getDeviceErrorString(stat)).c_str());
@@ -131,8 +130,7 @@ void copyToDeviceBuffer(DeviceBuffer<ValueType>* buffer,
     {
         case GpuApiCallBehavior::Async:
             GMX_ASSERT(isHostMemoryPinned(hostBuffer), "Source host buffer was not pinned for CUDA");
-            // NOLINTNEXTLINE(google-readability-casting)
-            stat = cudaMemcpyAsync(*((ValueType**)buffer) + startingOffset,
+            stat = cudaMemcpyAsync(*reinterpret_cast<ValueType**>(buffer) + startingOffset,
                                    hostBuffer,
                                    bytes,
                                    cudaMemcpyHostToDevice,
@@ -143,12 +141,10 @@ void copyToDeviceBuffer(DeviceBuffer<ValueType>* buffer,
             break;
 
         case GpuApiCallBehavior::Sync:
-            stat = cudaMemcpy(
-                    // NOLINTNEXTLINE(google-readability-casting)
-                    *((ValueType**)buffer) + startingOffset,
-                    hostBuffer,
-                    bytes,
-                    cudaMemcpyHostToDevice);
+            stat = cudaMemcpy(*reinterpret_cast<ValueType**>(buffer) + startingOffset,
+                              hostBuffer,
+                              bytes,
+                              cudaMemcpyHostToDevice);
             GMX_RELEASE_ASSERT(
                     stat == cudaSuccess,
                     ("Synchronous H2D copy failed. " + gmx::getDeviceErrorString(stat)).c_str());
@@ -195,8 +191,7 @@ void copyFromDeviceBuffer(ValueType*               hostBuffer,
             GMX_ASSERT(isHostMemoryPinned(hostBuffer),
                        "Destination host buffer was not pinned for CUDA");
             stat = cudaMemcpyAsync(hostBuffer,
-                                   // NOLINTNEXTLINE(google-readability-casting)
-                                   *((ValueType**)buffer) + startingOffset,
+                                   *reinterpret_cast<ValueType**>(buffer) + startingOffset,
                                    bytes,
                                    cudaMemcpyDeviceToHost,
                                    deviceStream.stream());
@@ -207,8 +202,7 @@ void copyFromDeviceBuffer(ValueType*               hostBuffer,
 
         case GpuApiCallBehavior::Sync:
             stat = cudaMemcpy(hostBuffer,
-                              // NOLINTNEXTLINE(google-readability-casting)
-                              *((ValueType**)buffer) + startingOffset,
+                              *reinterpret_cast<ValueType**>(buffer) + startingOffset,
                               bytes,
                               cudaMemcpyDeviceToHost);
             GMX_RELEASE_ASSERT(
@@ -297,11 +291,7 @@ void clearDeviceBufferAsync(DeviceBuffer<ValueType>* buffer,
     const char   pattern = 0;
 
     cudaError_t stat = cudaMemsetAsync(
-            // NOLINTNEXTLINE(google-readability-casting)
-            *((ValueType**)buffer) + startingOffset,
-            pattern,
-            bytes,
-            deviceStream.stream());
+            *reinterpret_cast<ValueType**>(buffer) + startingOffset, pattern, bytes, deviceStream.stream());
     GMX_RELEASE_ASSERT(stat == cudaSuccess,
                        ("Couldn't clear the device buffer. " + gmx::getDeviceErrorString(stat)).c_str());
 }
@@ -358,9 +348,8 @@ void initParamLookupTable(DeviceBuffer<ValueType>* deviceBuffer,
 
     const size_t sizeInBytes = numValues * sizeof(ValueType);
 
-    cudaError_t stat =
-            // NOLINTNEXTLINE(google-readability-casting)
-            cudaMemcpy(*((ValueType**)deviceBuffer), hostBuffer, sizeInBytes, cudaMemcpyHostToDevice);
+    cudaError_t stat = cudaMemcpy(
+            *reinterpret_cast<ValueType**>(deviceBuffer), hostBuffer, sizeInBytes, cudaMemcpyHostToDevice);
 
     GMX_RELEASE_ASSERT(stat == cudaSuccess,
                        ("Synchronous H2D copy failed. " + gmx::getDeviceErrorString(stat)).c_str());
-- 
2.22.0