Fix clang-tidy warnings for OCL

author Roland Schulz <roland.schulz@intel.com>

Wed, 12 Sep 2018 09:34:10 +0000 (02:34 -0700)

committer Mark Abraham <mark.j.abraham@gmail.com>

Wed, 12 Sep 2018 12:24:14 +0000 (14:24 +0200)
author Roland Schulz <roland.schulz@intel.com>
Wed, 12 Sep 2018 09:34:10 +0000 (02:34 -0700)
committer Mark Abraham <mark.j.abraham@gmail.com>
Wed, 12 Sep 2018 12:24:14 +0000 (14:24 +0200)
diff --git a/src/gromacs/CMakeLists.txt b/src/gromacs/CMakeLists.txt

index c2d2bb23b038c272c3e191e2e566187a6f52857c..7f9093399ba8fd3afac773cd2bb3315f7cf80ea2 100644 (file)
--- a/src/gromacs/CMakeLists.txt
+++ b/src/gromacs/CMakeLists.txt
@@ -223,7 +223,7 @@ if (GMX_USE_OPENCL)
          target_sources(libgromacs PRIVATE
              $<TARGET_OBJECTS:clFFT>
          )
-        target_include_directories(libgromacs PRIVATE ${_clFFT_dir}/include)
+        target_include_directories(libgromacs SYSTEM PRIVATE ${_clFFT_dir}/include)
          # Use the magic variable for how to link any library needed for
          # dlopen, etc.  which is -ldl where needed, and empty otherwise
          # (e.g. Windows, BSD, Mac).
diff --git a/src/gromacs/ewald/pme-gpu-3dfft-ocl.cpp b/src/gromacs/ewald/pme-gpu-3dfft-ocl.cpp

index dcc29cc69e021fb76f1bf63caa4150e366961639..729a32a07d807d84d04745cc877c33056bbc41d0 100644 (file)
--- a/src/gromacs/ewald/pme-gpu-3dfft-ocl.cpp
+++ b/src/gromacs/ewald/pme-gpu-3dfft-ocl.cpp
@@ -69,7 +69,7 @@ GpuParallel3dFft::GpuParallel3dFft(const PmeGpu *pmeGpu)
      std::array<size_t, DIM> realGridSize, realGridSizePadded, complexGridSizePadded;
  
      GMX_RELEASE_ASSERT(!pme_gpu_uses_dd(pmeGpu), "FFT decomposition not implemented");
-    PmeGpuKernelParamsBase *kernelParamsPtr = (PmeGpuKernelParamsBase *)pmeGpu->kernelParams.get();
+    PmeGpuKernelParamsBase *kernelParamsPtr = pmeGpu->kernelParams.get();
      for (int i = 0; i < DIM; i++)
      {
          realGridSize[i]          = kernelParamsPtr->grid.realGridSize[i];
@@ -152,18 +152,14 @@ void GpuParallel3dFft::perform3dFft(gmx_fft_direction  dir,
              inputGrids  = &realGrid_;
              outputGrids = &complexGrid_;
              break;
-
-            break;
          case GMX_FFT_COMPLEX_TO_REAL:
              plan        = planC2R_;
              direction   = CLFFT_BACKWARD;
              inputGrids  = &complexGrid_;
              outputGrids = &realGrid_;
              break;
-
          default:
              GMX_THROW(gmx::NotImplementedError("The chosen 3D-FFT case is not implemented on GPUs"));
-            break;
      }
      handleClfftError(clfftEnqueueTransform(plan, direction,
                                             commandStreams_.size(), commandStreams_.data(),
diff --git a/src/gromacs/ewald/pme-gpu-internal.cpp b/src/gromacs/ewald/pme-gpu-internal.cpp

index d919b84a669a26b881197360730d2b120dd9c171..a030b45a30c7323573f6e0054a582b5c446a9e8c 100644 (file)
--- a/src/gromacs/ewald/pme-gpu-internal.cpp
+++ b/src/gromacs/ewald/pme-gpu-internal.cpp
@@ -54,6 +54,7 @@
  #include <list>
  #include <string>
  
+#include "gromacs/compat/make_unique.h"
  #include "gromacs/ewald/ewald-utils.h"
  #include "gromacs/gpu_utils/gpu_utils.h"
  #include "gromacs/math/invertmatrix.h"
@@ -117,11 +118,11 @@ void pme_gpu_synchronize(const PmeGpu *pmeGpu)
      gpuStreamSynchronize(pmeGpu->archSpecific->pmeStream);
  }
  
-void pme_gpu_alloc_energy_virial(const PmeGpu *pmeGpu)
+void pme_gpu_alloc_energy_virial(PmeGpu *pmeGpu)
  {
      const size_t energyAndVirialSize = c_virialAndEnergyCount * sizeof(float);
      allocateDeviceBuffer(&pmeGpu->kernelParams->constants.d_virialAndEnergy, c_virialAndEnergyCount, pmeGpu->archSpecific->context);
-    pmalloc((void **)&pmeGpu->staging.h_virialAndEnergy, energyAndVirialSize);
+    pmalloc(reinterpret_cast<void **>(&pmeGpu->staging.h_virialAndEnergy), energyAndVirialSize);
  }
  
  void pme_gpu_free_energy_virial(PmeGpu *pmeGpu)
@@ -137,14 +138,14 @@ void pme_gpu_clear_energy_virial(const PmeGpu *pmeGpu)
                             c_virialAndEnergyCount, pmeGpu->archSpecific->pmeStream);
  }
  
-void pme_gpu_realloc_and_copy_bspline_values(const PmeGpu *pmeGpu)
+void pme_gpu_realloc_and_copy_bspline_values(PmeGpu *pmeGpu)
  {
      const int splineValuesOffset[DIM] = {
          0,
          pmeGpu->kernelParams->grid.realGridSize[XX],
          pmeGpu->kernelParams->grid.realGridSize[XX] + pmeGpu->kernelParams->grid.realGridSize[YY]
      };
-    memcpy((void *)&pmeGpu->kernelParams->grid.splineValuesOffset, &splineValuesOffset, sizeof(splineValuesOffset));
+    memcpy(&pmeGpu->kernelParams->grid.splineValuesOffset, &splineValuesOffset, sizeof(splineValuesOffset));
  
      const int newSplineValuesSize = pmeGpu->kernelParams->grid.realGridSize[XX] +
          pmeGpu->kernelParams->grid.realGridSize[YY] +
@@ -156,7 +157,7 @@ void pme_gpu_realloc_and_copy_bspline_values(const PmeGpu *pmeGpu)
      {
          /* Reallocate the host buffer */
          pfree(pmeGpu->staging.h_splineModuli);
-        pmalloc((void **)&pmeGpu->staging.h_splineModuli, newSplineValuesSize * sizeof(float));
+        pmalloc(reinterpret_cast<void **>(&pmeGpu->staging.h_splineModuli), newSplineValuesSize * sizeof(float));
      }
      for (int i = 0; i < DIM; i++)
      {
@@ -271,7 +272,7 @@ void pme_gpu_free_coefficients(const PmeGpu *pmeGpu)
      freeDeviceBuffer(&pmeGpu->kernelParams->atoms.d_coefficients);
  }
  
-void pme_gpu_realloc_spline_data(const PmeGpu *pmeGpu)
+void pme_gpu_realloc_spline_data(PmeGpu *pmeGpu)
  {
      const int    order             = pmeGpu->common->pme_order;
      const int    alignment         = pme_gpu_get_atoms_per_warp(pmeGpu);
@@ -290,9 +291,9 @@ void pme_gpu_realloc_spline_data(const PmeGpu *pmeGpu)
      if (shouldRealloc)
      {
          pfree(pmeGpu->staging.h_theta);
-        pmalloc((void **)&pmeGpu->staging.h_theta, newSplineDataSize * sizeof(float));
+        pmalloc(reinterpret_cast<void **>(&pmeGpu->staging.h_theta), newSplineDataSize * sizeof(float));
          pfree(pmeGpu->staging.h_dtheta);
-        pmalloc((void **)&pmeGpu->staging.h_dtheta, newSplineDataSize * sizeof(float));
+        pmalloc(reinterpret_cast<void **>(&pmeGpu->staging.h_dtheta), newSplineDataSize * sizeof(float));
      }
  }
  
@@ -305,14 +306,14 @@ void pme_gpu_free_spline_data(const PmeGpu *pmeGpu)
      pfree(pmeGpu->staging.h_dtheta);
  }
  
-void pme_gpu_realloc_grid_indices(const PmeGpu *pmeGpu)
+void pme_gpu_realloc_grid_indices(PmeGpu *pmeGpu)
  {
      const size_t newIndicesSize = DIM * pmeGpu->nAtomsAlloc;
      GMX_ASSERT(newIndicesSize > 0, "Bad number of atoms in PME GPU");
      reallocateDeviceBuffer(&pmeGpu->kernelParams->atoms.d_gridlineIndices, newIndicesSize,
                             &pmeGpu->archSpecific->gridlineIndicesSize, &pmeGpu->archSpecific->gridlineIndicesSizeAlloc, pmeGpu->archSpecific->context);
      pfree(pmeGpu->staging.h_gridlineIndices);
-    pmalloc((void **)&pmeGpu->staging.h_gridlineIndices, newIndicesSize * sizeof(int));
+    pmalloc(reinterpret_cast<void **>(&pmeGpu->staging.h_gridlineIndices), newIndicesSize * sizeof(int));
  }
  
  void pme_gpu_free_grid_indices(const PmeGpu *pmeGpu)
@@ -580,7 +581,7 @@ void pme_gpu_reinit_3dfft(const PmeGpu *pmeGpu)
          pmeGpu->archSpecific->fftSetup.resize(0);
          for (int i = 0; i < pmeGpu->common->ngrids; i++)
          {
-            pmeGpu->archSpecific->fftSetup.push_back(std::unique_ptr<GpuParallel3dFft>(new GpuParallel3dFft(pmeGpu)));
+            pmeGpu->archSpecific->fftSetup.push_back(gmx::compat::make_unique<GpuParallel3dFft>(pmeGpu));
          }
      }
  }
@@ -680,7 +681,7 @@ static void pme_gpu_reinit_grids(PmeGpu *pmeGpu)
      for (int i = 0; i < DIM; i++)
      {
          kernelParamsPtr->grid.realGridSize[i]       = pmeGpu->common->nk[i];
-        kernelParamsPtr->grid.realGridSizeFP[i]     = (float)kernelParamsPtr->grid.realGridSize[i];
+        kernelParamsPtr->grid.realGridSizeFP[i]     = static_cast<float>(kernelParamsPtr->grid.realGridSize[i]);
          kernelParamsPtr->grid.realGridSizePadded[i] = kernelParamsPtr->grid.realGridSize[i];
  
          // The complex grid currently uses no padding;
@@ -761,7 +762,7 @@ static void pme_gpu_init(gmx_pme_t          *pme,
      pme->gpu          = new PmeGpu();
      PmeGpu *pmeGpu = pme->gpu;
      changePinningPolicy(&pmeGpu->staging.h_forces, pme_get_pinning_policy());
-    pmeGpu->common = std::shared_ptr<PmeShared>(new PmeShared());
+    pmeGpu->common = std::make_shared<PmeShared>();
  
      /* These settings are set here for the whole run; dynamic ones are set in pme_gpu_reinit() */
      /* A convenience variable. */
diff --git a/src/gromacs/ewald/pme-gpu-internal.h b/src/gromacs/ewald/pme-gpu-internal.h

index cb6371f2d3a2a56b9f9b7079ab1715b968dd3790..e48e18a7f543d044c2e5f981b7501b2277025a3c 100644 (file)
--- a/src/gromacs/ewald/pme-gpu-internal.h
+++ b/src/gromacs/ewald/pme-gpu-internal.h
@@ -105,9 +105,9 @@ GPU_FUNC_QUALIFIER void pme_gpu_synchronize(const PmeGpu *GPU_FUNC_ARGUMENT(pmeG
  /*! \libinternal \brief
   * Allocates the fixed size energy and virial buffer both on GPU and CPU.
   *
- * \param[in] pmeGpu            The PME GPU structure.
+ * \param[in,out] pmeGpu            The PME GPU structure.
   */
-void pme_gpu_alloc_energy_virial(const PmeGpu *pmeGpu);
+void pme_gpu_alloc_energy_virial(PmeGpu *pmeGpu);
  
  /*! \libinternal \brief
   * Frees the energy and virial memory both on GPU and CPU.
@@ -127,9 +127,9 @@ void pme_gpu_clear_energy_virial(const PmeGpu *pmeGpu);
  /*! \libinternal \brief
   * Reallocates and copies the pre-computed B-spline values to the GPU.
   *
- * \param[in] pmeGpu             The PME GPU structure.
+ * \param[in,out] pmeGpu             The PME GPU structure.
   */
-void pme_gpu_realloc_and_copy_bspline_values(const PmeGpu *pmeGpu);
+void pme_gpu_realloc_and_copy_bspline_values(PmeGpu *pmeGpu);
  
  /*! \libinternal \brief
   * Frees the pre-computed B-spline values on the GPU (and the transfer CPU buffers).
@@ -226,9 +226,9 @@ void pme_gpu_free_coefficients(const PmeGpu *pmeGpu);
  /*! \libinternal \brief
   * Reallocates the buffers on the GPU and the host for the atoms spline data.
   *
- * \param[in] pmeGpu            The PME GPU structure.
+ * \param[in,out] pmeGpu            The PME GPU structure.
   */
-void pme_gpu_realloc_spline_data(const PmeGpu *pmeGpu);
+void pme_gpu_realloc_spline_data(PmeGpu *pmeGpu);
  
  /*! \libinternal \brief
   * Frees the buffers on the GPU for the atoms spline data.
@@ -240,9 +240,9 @@ void pme_gpu_free_spline_data(const PmeGpu *pmeGpu);
  /*! \libinternal \brief
   * Reallocates the buffers on the GPU and the host for the particle gridline indices.
   *
- * \param[in] pmeGpu            The PME GPU structure.
+ * \param[in,out] pmeGpu            The PME GPU structure.
   */
-void pme_gpu_realloc_grid_indices(const PmeGpu *pmeGpu);
+void pme_gpu_realloc_grid_indices(PmeGpu *pmeGpu);
  
  /*! \libinternal \brief
   * Frees the buffer on the GPU for the particle gridline indices.
@@ -654,7 +654,7 @@ GPU_FUNC_QUALIFIER void pme_gpu_destroy(PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu)) GPU_F
   * Should be called before the pme_gpu_set_io_ranges.
   */
  GPU_FUNC_QUALIFIER void pme_gpu_reinit_atoms(PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu),
-                                             const int         GPU_FUNC_ARGUMENT(nAtoms),
+                                             int         GPU_FUNC_ARGUMENT(nAtoms),
                                               const real       *GPU_FUNC_ARGUMENT(charges)) GPU_FUNC_TERM
  
  /*! \brief \libinternal
diff --git a/src/gromacs/ewald/pme-gpu-program-impl-ocl.cpp b/src/gromacs/ewald/pme-gpu-program-impl-ocl.cpp

index 1c138d2894caaf1980448f7c138b43a48e254453..13f7decf5408a1ec3174f880bf92605a8305a455 100644 (file)
--- a/src/gromacs/ewald/pme-gpu-program-impl-ocl.cpp
+++ b/src/gromacs/ewald/pme-gpu-program-impl-ocl.cpp
@@ -58,7 +58,7 @@ PmeGpuProgramImpl::PmeGpuProgramImpl(const gmx_device_info_t *deviceInfo)
      cl_device_id          deviceId   = deviceInfo->ocl_gpu_id.ocl_device_id;
      cl_context_properties contextProperties[3];
      contextProperties[0] = CL_CONTEXT_PLATFORM;
-    contextProperties[1] = (cl_context_properties) platformId;
+    contextProperties[1] = reinterpret_cast<cl_context_properties>(platformId);
      contextProperties[2] = 0; /* Terminates the list of properties */
  
      cl_int  clError;
diff --git a/src/gromacs/ewald/pme-gpu.cpp b/src/gromacs/ewald/pme-gpu.cpp

index 29c357d3ca821e6e255d55279ad795e4ff4f52d1..6f8caf32498dd57033eeca47e1c0ec8d6c12f1e6 100644 (file)
--- a/src/gromacs/ewald/pme-gpu.cpp
+++ b/src/gromacs/ewald/pme-gpu.cpp
@@ -190,8 +190,8 @@ void pme_gpu_launch_complex_transforms(gmx_pme_t      *pme,
                                         gmx_wallcycle  *wcycle)
  {
      PmeGpu            *pmeGpu                 = pme->gpu;
-    const bool         computeEnergyAndVirial = pmeGpu->settings.currentFlags & GMX_PME_CALC_ENER_VIR;
-    const bool         performBackFFT         = pmeGpu->settings.currentFlags & (GMX_PME_CALC_F | GMX_PME_CALC_POT);
+    const bool         computeEnergyAndVirial = (pmeGpu->settings.currentFlags & GMX_PME_CALC_ENER_VIR) != 0;
+    const bool         performBackFFT         = (pmeGpu->settings.currentFlags & (GMX_PME_CALC_F | GMX_PME_CALC_POT)) != 0;
      const unsigned int gridIndex              = 0;
      t_complex         *cfftgrid               = pme->cfftgrid[gridIndex];
  
@@ -274,7 +274,7 @@ static void pme_gpu_get_staged_results(const gmx_pme_t                *pme,
                                         matrix                          virial,
                                         real                           *energy)
  {
-    const bool haveComputedEnergyAndVirial = pme->gpu->settings.currentFlags & GMX_PME_CALC_ENER_VIR;
+    const bool haveComputedEnergyAndVirial = (pme->gpu->settings.currentFlags & GMX_PME_CALC_ENER_VIR) != 0;
      *forces = pme_gpu_get_forces(pme->gpu);
  
      if (haveComputedEnergyAndVirial)
diff --git a/src/gromacs/gpu_utils/devicebuffer_ocl.h b/src/gromacs/gpu_utils/devicebuffer_ocl.h

index 64f969fa9d8a84a9092b0965da7f91ccaf474416..52c949fc762bff3087ccc99c2e9388a7b465e389 100644 (file)
--- a/src/gromacs/gpu_utils/devicebuffer_ocl.h
+++ b/src/gromacs/gpu_utils/devicebuffer_ocl.h
@@ -61,7 +61,7 @@ class TypedClMemory
          cl_mem data_;
      public:
          //! \brief An assignment operator - the purpose is to make allocation/zeroing work
-        void operator=(cl_mem data){data_ = data; }
+        TypedClMemory &operator=(cl_mem data){data_ = data; return *this; }
          //! \brief Returns underlying cl_mem transparently
          operator cl_mem() {return data_; }
  };
diff --git a/src/gromacs/gpu_utils/gpu_utils.h b/src/gromacs/gpu_utils/gpu_utils.h

index 6ee8b09a9dd667d5bcec0dfd61cf9b40fa675425..65d8502e39e6d8ee6a9842dffdcd2e7c524f3dd0 100644 (file)
--- a/src/gromacs/gpu_utils/gpu_utils.h
+++ b/src/gromacs/gpu_utils/gpu_utils.h
@@ -213,7 +213,7 @@ void get_gpu_device_info_string(char *GPU_FUNC_ARGUMENT(s),
   * \returns                 size in bytes of gpu_dev_info
   */
  GPU_FUNC_QUALIFIER
-size_t sizeof_gpu_dev_info(void) GPU_FUNC_TERM_WITH_RETURN(0)
+size_t sizeof_gpu_dev_info() GPU_FUNC_TERM_WITH_RETURN(0)
  
  /*! \brief Returns a pointer *ptr to page-locked memory of size nbytes.
   *
diff --git a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp

index c541eb06e72d5c907858cbb7c59566f100760248..ea8451fb2ed2c97d2f375af1403ba8fe9ff3d16d 100644 (file)
--- a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp
+++ b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp
@@ -191,7 +191,7 @@ void findGpus(gmx_gpu_info_t *gpu_info)
          req_dev_type = CL_DEVICE_TYPE_CPU;
      }
  
-    while (1)
+    while (true)
      {
          cl_int status = clGetPlatformIDs(0, nullptr, &ocl_platform_count);
          if (CL_SUCCESS != status)
@@ -413,7 +413,7 @@ gmx_device_info_t *getDeviceInfo(const gmx_gpu_info_t &gpu_info,
  }
  
  //! This function is documented in the header file
-size_t sizeof_gpu_dev_info(void)
+size_t sizeof_gpu_dev_info()
  {
      return sizeof(gmx_device_info_t);
  }
diff --git a/src/gromacs/gpu_utils/gpuregiontimer_ocl.h b/src/gromacs/gpu_utils/gpuregiontimer_ocl.h

index 00f28c633fd032235896eac981d872bfd7f28511..47209b9bc493d310c9ca1075b4de22c90cceee0a 100644 (file)
--- a/src/gromacs/gpu_utils/gpuregiontimer_ocl.h
+++ b/src/gromacs/gpu_utils/gpuregiontimer_ocl.h
@@ -83,9 +83,9 @@ class GpuRegionTimerImpl
          GpuRegionTimerImpl(GpuRegionTimerImpl &&)            = delete;
  
          /*! \brief Should be called before the region start. */
-        inline void openTimingRegion(CommandStream){}
+        inline void openTimingRegion(CommandStream /*unused*/){}
          /*! \brief Should be called after the region end. */
-        inline void closeTimingRegion(CommandStream){}
+        inline void closeTimingRegion(CommandStream /*unused*/){}
          /*! \brief Returns the last measured region timespan (in milliseconds) and calls reset(). */
          inline double getLastRangeTime()
          {
diff --git a/src/gromacs/gpu_utils/ocl_caching.cpp b/src/gromacs/gpu_utils/ocl_caching.cpp

index 566885e5b1447f296593111f7f2eb2df418f9a98..ef7fa819eafcf5ed8df23119bce8c76970f64394 100644 (file)
--- a/src/gromacs/gpu_utils/ocl_caching.cpp
+++ b/src/gromacs/gpu_utils/ocl_caching.cpp
@@ -179,5 +179,5 @@ writeBinaryToCache(cl_program program, const std::string &filename)
      fwrite(binary, 1, fileSize, f.get());
  }
  
-} // namespace
-} // namespace
+} // namespace ocl
+} // namespace gmx
diff --git a/src/gromacs/gpu_utils/ocl_caching.h b/src/gromacs/gpu_utils/ocl_caching.h

index 47ee8f62254f29ea88c535a2581eed8db3af003f..cb6c9811e0f1cf538ba6a9d254742377209f2a52 100644 (file)
--- a/src/gromacs/gpu_utils/ocl_caching.h
+++ b/src/gromacs/gpu_utils/ocl_caching.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2018, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -99,7 +99,7 @@ makeProgramFromCache(const std::string &filename,
  void
  writeBinaryToCache(cl_program program, const std::string &filename);
  
-} // namespace
-} // namespace
+} // namespace ocl
+} // namespace gmx
  
  #endif
diff --git a/src/gromacs/gpu_utils/ocl_compiler.cpp b/src/gromacs/gpu_utils/ocl_compiler.cpp

index c52a6ec74bf30646a973020a0cf59c60c01d8ce3..fa8c4137e610d95c00f7809d171d449e17e578b3 100644 (file)
--- a/src/gromacs/gpu_utils/ocl_compiler.cpp
+++ b/src/gromacs/gpu_utils/ocl_compiler.cpp
@@ -75,7 +75,7 @@ namespace ocl
   *
   *  Currently caching is disabled by default unless the env var override
   *  is used until we resolve concurrency issues. */
-static bool useBuildCache = getenv("GMX_OCL_GENCACHE"); // (NULL == getenv("GMX_OCL_NOGENCACHE"));
+static bool useBuildCache = getenv("GMX_OCL_GENCACHE") != nullptr;
  
  /*! \brief Handles writing the OpenCL JIT compilation log to \c fplog.
   *
@@ -257,7 +257,7 @@ size_t getWarpSize(cl_context context, cl_device_id deviceId)
  {
      cl_int      cl_error;
      const char *warpSizeKernel = "__kernel void test(__global int* test){test[get_local_id(0)] = 0;}";
-    cl_program  program        = clCreateProgramWithSource(context, 1, (const char**)&warpSizeKernel, nullptr, &cl_error);
+    cl_program  program        = clCreateProgramWithSource(context, 1, &warpSizeKernel, nullptr, &cl_error);
      if (cl_error != CL_SUCCESS)
      {
          GMX_THROW(InternalError("Could not create OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error)));
@@ -374,7 +374,7 @@ removeExtraSpaces(std::string *str)
  {
      GMX_RELEASE_ASSERT(str != nullptr, "A pointer to an actual string must be provided");
      std::string::iterator newEnd =
-        std::unique( str->begin(), str->end(), [ = ](char a, char b){ return isspace(a) && (a == b); } );
+        std::unique( str->begin(), str->end(), [ = ](char a, char b){ return isspace(a) != 0 && (a == b); } );
      str->erase(newEnd, str->end());
  }
  
@@ -551,5 +551,5 @@ compileProgram(FILE              *fplog,
      return program;
  }
  
-} // namespace
-} // namespace
+} // namespace ocl
+} // namespace gmx
diff --git a/src/gromacs/gpu_utils/ocl_compiler.h b/src/gromacs/gpu_utils/ocl_compiler.h

index 54f0332fed6da121367ba820485466cd7953bca9..94dbe02ccba30e490bb83b583586073eff770421 100644 (file)
--- a/src/gromacs/gpu_utils/ocl_compiler.h
+++ b/src/gromacs/gpu_utils/ocl_compiler.h
@@ -98,7 +98,7 @@ compileProgram(FILE              *fplog,
                 cl_device_id       deviceId,
                 ocl_vendor_id_t    deviceVendorId);
  
-} // namespace
-} // namespace
+}  // namespace ocl
+}  // namespace gmx
  
  #endif
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu

index 960649956abe6a938cd5566fdd9e7bd3344e72c8..e9d816e1fefa0d9726915afe1149b9d3207b50bc 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu
@@ -610,7 +610,7 @@ void nbnxn_gpu_launch_cpyback(gmx_nbnxn_cuda_t       *nb,
          return;
      }
  
-    getGpuAtomRange(adat, aloc, adat_begin, adat_len);
+    getGpuAtomRange(adat, aloc, &adat_begin, &adat_len);
  
      /* beginning of timed D2H section */
      if (bDoTime)
diff --git a/src/gromacs/mdlib/nbnxn_gpu_common.h b/src/gromacs/mdlib/nbnxn_gpu_common.h

index 90f4bc620451d82d5e3da364e0061fd7df01c0eb..7f361182c95225340789d5b1517e5dd64da4d163 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_gpu_common.h
+++ b/src/gromacs/mdlib/nbnxn_gpu_common.h
@@ -57,6 +57,8 @@
  
  #include "gromacs/gpu_utils/gpu_utils.h"
  #include "gromacs/math/vec.h"
+#include "gromacs/mdlib/force_flags.h"
+#include "gromacs/mdlib/nb_verlet.h"
  #include "gromacs/mdlib/nbnxn_gpu_types.h"
  #include "gromacs/pbcutil/ishift.h"
  #include "gromacs/timing/gpu_timing.h"
@@ -117,8 +119,8 @@ static inline int gpuAtomToInteractionLocality(int atomLocality)
  template <typename AtomDataT>
  static inline void getGpuAtomRange(const AtomDataT *atomData,
                                     int              atomLocality,
-                                   int             &atomRangeBegin,
-                                   int             &atomRangeLen)
+                                   int             *atomRangeBegin,
+                                   int             *atomRangeLen)
  {
      assert(atomData);
      validateGpuAtomLocality(atomLocality);
@@ -126,13 +128,13 @@ static inline void getGpuAtomRange(const AtomDataT *atomData,
      /* calculate the atom data index range based on locality */
      if (LOCAL_A(atomLocality))
      {
-        atomRangeBegin  = 0;
-        atomRangeLen    = atomData->natoms_local;
+        *atomRangeBegin  = 0;
+        *atomRangeLen    = atomData->natoms_local;
      }
      else
      {
-        atomRangeBegin  = atomData->natoms_local;
-        atomRangeLen    = atomData->natoms - atomData->natoms_local;
+        *atomRangeBegin  = atomData->natoms_local;
+        *atomRangeLen    = atomData->natoms - atomData->natoms_local;
      }
  }
  
@@ -298,6 +300,8 @@ static inline void nbnxn_gpu_accumulate_timings(gmx_wallclock_gpu_nbnxn_t *timin
      }
  }
  
+//TODO: move into shared source file with gmx_compile_cpp_as_cuda
+//NOLINTNEXTLINE(misc-definitions-in-headers)
  bool nbnxn_gpu_try_finish_task(gmx_nbnxn_gpu_t  *nb,
                                 int               flags,
                                 int               aloc,
@@ -328,10 +332,11 @@ bool nbnxn_gpu_try_finish_task(gmx_nbnxn_gpu_t  *nb,
              gpuStreamSynchronize(nb->stream[iLocality]);
          }
  
-        bool calcEner   = flags & GMX_FORCE_ENERGY;
-        bool calcFshift = flags & GMX_FORCE_VIRIAL;
+        bool calcEner   = (flags & GMX_FORCE_ENERGY) != 0;
+        bool calcFshift = (flags & GMX_FORCE_VIRIAL) != 0;
  
-        nbnxn_gpu_accumulate_timings(nb->timings, nb->timers, nb->plist[iLocality], aloc, calcEner, nb->bDoTime);
+        nbnxn_gpu_accumulate_timings(nb->timings, nb->timers, nb->plist[iLocality], aloc, calcEner,
+                                     nb->bDoTime != 0);
  
          nbnxn_gpu_reduce_staged_outputs(nb->nbst, iLocality, calcEner, calcFshift, e_lj, e_el, fshift);
      }
@@ -360,6 +365,7 @@ bool nbnxn_gpu_try_finish_task(gmx_nbnxn_gpu_t  *nb,
   * \param[out] e_el Pointer to the electrostatics energy output to accumulate into
   * \param[out] fshift Pointer to the shift force buffer to accumulate into
   */
+//NOLINTNEXTLINE(misc-definitions-in-headers) TODO: move into source file
  void nbnxn_gpu_wait_finish_task(gmx_nbnxn_gpu_t *nb,
                                  int              flags,
                                  int              aloc,
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp

index c2a055b79c0dd7c71c876e2d514fae37dbf4da37..3ef88eaea15612c99d205ac6a2b320bbe7b2cb5c 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp
@@ -119,7 +119,7 @@ static inline void validate_global_work_size(const KernelLaunchConfig &config, i
         https://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/clEnqueueNDRangeKernel.html
       */
      device_size_t_size_bits = dinfo->adress_bits;
-    host_size_t_size_bits   = (cl_uint)(sizeof(size_t) * 8);
+    host_size_t_size_bits   = static_cast<cl_uint>(sizeof(size_t) * 8);
  
      /* If sizeof(host size_t) <= sizeof(device size_t)
              => global_work_size components will always be valid
@@ -131,7 +131,7 @@ static inline void validate_global_work_size(const KernelLaunchConfig &config, i
      {
          size_t device_limit;
  
-        device_limit = (((size_t)1) << device_size_t_size_bits) - 1;
+        device_limit = (1ull << device_size_t_size_bits) - 1;
  
          for (int i = 0; i < work_dim; i++)
          {
@@ -393,9 +393,9 @@ void nbnxn_gpu_launch_kernel(gmx_nbnxn_ocl_t               *nb,
      cl_timers_t         *t       = nb->timers;
      cl_command_queue     stream  = nb->stream[iloc];
  
-    bool                 bCalcEner   = flags & GMX_FORCE_ENERGY;
+    bool                 bCalcEner   = (flags & GMX_FORCE_ENERGY) != 0;
      int                  bCalcFshift = flags & GMX_FORCE_VIRIAL;
-    bool                 bDoTime     = nb->bDoTime;
+    bool                 bDoTime     = (nb->bDoTime) != 0;
  
      cl_nbparam_params_t  nbparams_params;
  
@@ -508,10 +508,10 @@ void nbnxn_gpu_launch_kernel(gmx_nbnxn_ocl_t               *nb,
  
      if (debug)
      {
-        fprintf(debug, "Non-bonded GPU launch configuration:\n\tLocal work size: %dx%dx%d\n\t"
-                "Global work size : %dx%d\n\t#Super-clusters/clusters: %d/%d (%d)\n",
-                (int)(config.blockSize[0]), (int)(config.blockSize[1]), (int)(config.blockSize[2]),
-                (int)(config.blockSize[0] * config.gridSize[0]), (int)(config.blockSize[1] * config.gridSize[1]), plist->nsci*c_numClPerSupercl,
+        fprintf(debug, "Non-bonded GPU launch configuration:\n\tLocal work size: %zux%zux%zu\n\t"
+                "Global work size : %zux%zu\n\t#Super-clusters/clusters: %d/%d (%d)\n",
+                config.blockSize[0], config.blockSize[1], config.blockSize[2],
+                config.blockSize[0] * config.gridSize[0], config.blockSize[1] * config.gridSize[1], plist->nsci*c_numClPerSupercl,
                  c_numClPerSupercl, plist->na_c);
      }
  
@@ -581,7 +581,7 @@ void nbnxn_gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t       *nb,
      cl_plist_t          *plist   = nb->plist[iloc];
      cl_timers_t         *t       = nb->timers;
      cl_command_queue     stream  = nb->stream[iloc];
-    bool                 bDoTime = nb->bDoTime;
+    bool                 bDoTime = nb->bDoTime == CL_TRUE;
  
      if (plist->haveFreshList)
      {
@@ -658,11 +658,11 @@ void nbnxn_gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t       *nb,
  
      if (debug)
      {
-        fprintf(debug, "Pruning GPU kernel launch configuration:\n\tLocal work size: %dx%dx%d\n\t"
-                "\tGlobal work size: %dx%d\n\t#Super-clusters/clusters: %d/%d (%d)\n"
+        fprintf(debug, "Pruning GPU kernel launch configuration:\n\tLocal work size: %zux%zux%zu\n\t"
+                "\tGlobal work size: %zux%zu\n\t#Super-clusters/clusters: %d/%d (%d)\n"
                  "\tShMem: %zu\n",
-                (int)(config.blockSize[0]), (int)(config.blockSize[1]), (int)(config.blockSize[2]),
-                (int)(config.blockSize[0] * config.gridSize[0]), (int)(config.blockSize[1] * config.gridSize[1]), plist->nsci*c_numClPerSupercl,
+                config.blockSize[0], config.blockSize[1], config.blockSize[2],
+                config.blockSize[0] * config.gridSize[0], config.blockSize[1] * config.gridSize[1], plist->nsci*c_numClPerSupercl,
                  c_numClPerSupercl, plist->na_c, config.sharedMemorySize);
      }
  
@@ -711,10 +711,10 @@ void nbnxn_gpu_launch_cpyback(gmx_nbnxn_ocl_t               *nb,
  
      cl_atomdata_t   *adat    = nb->atdat;
      cl_timers_t     *t       = nb->timers;
-    bool             bDoTime = nb->bDoTime;
+    bool             bDoTime = nb->bDoTime == CL_TRUE;
      cl_command_queue stream  = nb->stream[iloc];
  
-    bool             bCalcEner   = flags & GMX_FORCE_ENERGY;
+    bool             bCalcEner   = (flags & GMX_FORCE_ENERGY) != 0;
      int              bCalcFshift = flags & GMX_FORCE_VIRIAL;
  
  
@@ -729,11 +729,11 @@ void nbnxn_gpu_launch_cpyback(gmx_nbnxn_ocl_t               *nb,
             test case, overall simulation performance was higher with
             the API calls, but this has not been tested on AMD OpenCL,
             so could be worth considering in future. */
-        nb->bNonLocalStreamActive = false;
+        nb->bNonLocalStreamActive = CL_FALSE;
          return;
      }
  
-    getGpuAtomRange(adat, aloc, adat_begin, adat_len);
+    getGpuAtomRange(adat, aloc, &adat_begin, &adat_len);
  
      /* beginning of timed D2H section */
      if (bDoTime)
@@ -764,7 +764,7 @@ void nbnxn_gpu_launch_cpyback(gmx_nbnxn_ocl_t               *nb,
      {
          cl_error = clEnqueueMarkerWithWaitList(stream, 0, nullptr, &(nb->nonlocal_done));
          assert(CL_SUCCESS == cl_error);
-        nb->bNonLocalStreamActive = true;
+        nb->bNonLocalStreamActive = CL_TRUE;
      }
  
      /* only transfer energies in the local stream */
@@ -819,7 +819,7 @@ int nbnxn_gpu_pick_ewald_kernel_type(bool bTwinCut)
       *
       */
      //if ((dev_info->prop.major >= 3 || bForceAnalyticalEwald) && !bForceTabulatedEwald)
-    if ((1                         || bForceAnalyticalEwald) && !bForceTabulatedEwald)
+    if (!bForceTabulatedEwald)
      {
          bUseAnalyticalEwald = true;
  
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp

index f1bddab95d9816e4ca9fdf772707bd22eb29e3a4..0b931f6028e3573edb308f8418ce364d4f20d37e 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp
@@ -149,7 +149,7 @@ static void init_atomdata_first(cl_atomdata_t *ad, int ntypes, gmx_device_runtim
      // TODO: handle errors, check clCreateBuffer flags
      ad->shift_vec = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, SHIFTS * ad->shift_vec_elem_size, nullptr, &cl_error);
      assert(cl_error == CL_SUCCESS);
-    ad->bShiftVecUploaded = false;
+    ad->bShiftVecUploaded = CL_FALSE;
  
      /* An element of the fshift device buffer has the same size as one element
         of the host side fshift buffer. */
@@ -233,7 +233,6 @@ map_interaction_types_to_gpu_kernel_flavors(const interaction_const_t *ic,
                          break;
                      default:
                          gmx_incons("The requested LJ combination rule is not implemented in the OpenCL GPU accelerated kernels!");
-                        break;
                  }
                  break;
              case eintmodFORCESWITCH:
@@ -244,7 +243,6 @@ map_interaction_types_to_gpu_kernel_flavors(const interaction_const_t *ic,
                  break;
              default:
                  gmx_incons("The requested VdW interaction modifier is not implemented in the GPU accelerated kernels!");
-                break;
          }
      }
      else if (ic->vdwtype == evdwPME)
@@ -500,7 +498,7 @@ nbnxn_gpu_create_context(gmx_device_runtime_data_t *runtimeData,
      device_id        = devInfo->ocl_gpu_id.ocl_device_id;
  
      context_properties[0] = CL_CONTEXT_PLATFORM;
-    context_properties[1] = (cl_context_properties) platform_id;
+    context_properties[1] = reinterpret_cast<cl_context_properties>(platform_id);
      context_properties[2] = 0; /* Terminates the list of properties */
  
      context = clCreateContext(context_properties, 1, &device_id, nullptr, nullptr, &cl_error);
@@ -510,7 +508,6 @@ nbnxn_gpu_create_context(gmx_device_runtime_data_t *runtimeData,
                    rank,
                    devInfo->device_name,
                    cl_error, ocl_get_error_string(cl_error).c_str());
-        return;
      }
  
      runtimeData->context = context;
@@ -640,7 +637,7 @@ void nbnxn_gpu_init(gmx_nbnxn_ocl_t          **p_nb,
          snew(nb->plist[eintNonlocal], 1);
      }
  
-    nb->bUseTwoStreams = bLocalAndNonlocal;
+    nb->bUseTwoStreams = static_cast<cl_bool>(bLocalAndNonlocal);
  
      nb->timers = new cl_timers_t();
      snew(nb->timings, 1);
@@ -650,14 +647,14 @@ void nbnxn_gpu_init(gmx_nbnxn_ocl_t          **p_nb,
      snew(nb->dev_rundata, 1);
  
      /* init nbst */
-    pmalloc((void**)&nb->nbst.e_lj, sizeof(*nb->nbst.e_lj));
-    pmalloc((void**)&nb->nbst.e_el, sizeof(*nb->nbst.e_el));
-    pmalloc((void**)&nb->nbst.fshift, SHIFTS * sizeof(*nb->nbst.fshift));
+    pmalloc(reinterpret_cast<void**>(&nb->nbst.e_lj), sizeof(*nb->nbst.e_lj));
+    pmalloc(reinterpret_cast<void**>(&nb->nbst.e_el), sizeof(*nb->nbst.e_el));
+    pmalloc(reinterpret_cast<void**>(&nb->nbst.fshift), SHIFTS * sizeof(*nb->nbst.fshift));
  
      init_plist(nb->plist[eintLocal]);
  
      /* OpenCL timing disabled if GMX_DISABLE_GPU_TIMING is defined. */
-    nb->bDoTime = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
+    nb->bDoTime = static_cast<cl_bool>(getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
  
      /* Create queues only after bDoTime has been initialized */
      if (nb->bDoTime)
@@ -679,7 +676,6 @@ void nbnxn_gpu_init(gmx_nbnxn_ocl_t          **p_nb,
                    rank,
                    nb->dev_info->device_name,
                    cl_error);
-        return;
      }
  
      if (nb->bUseTwoStreams)
@@ -693,13 +689,12 @@ void nbnxn_gpu_init(gmx_nbnxn_ocl_t          **p_nb,
                        rank,
                        nb->dev_info->device_name,
                        cl_error);
-            return;
          }
      }
  
      if (nb->bDoTime)
      {
-        init_timers(nb->timers, nb->bUseTwoStreams);
+        init_timers(nb->timers, nb->bUseTwoStreams == CL_TRUE);
          init_timings(nb->timings);
      }
  
@@ -796,7 +791,7 @@ void nbnxn_gpu_init_pairlist(gmx_nbnxn_ocl_t        *nb,
      // Timing accumulation should happen only if there was work to do
      // because getLastRangeTime() gets skipped with empty lists later
      // which leads to the counter not being reset.
-    bool             bDoTime    = (nb->bDoTime && h_plist->nsci > 0);
+    bool             bDoTime    = ((nb->bDoTime == CL_TRUE) && h_plist->nsci > 0);
      cl_command_queue stream     = nb->stream[iloc];
      cl_plist_t      *d_plist    = nb->plist[iloc];
  
@@ -865,7 +860,7 @@ void nbnxn_gpu_upload_shiftvec(gmx_nbnxn_ocl_t        *nb,
      {
          ocl_copy_H2D_async(adat->shift_vec, nbatom->shift_vec, 0,
                             SHIFTS * adat->shift_vec_elem_size, ls, nullptr);
-        adat->bShiftVecUploaded = true;
+        adat->bShiftVecUploaded = CL_TRUE;
      }
  }
  
@@ -876,7 +871,7 @@ void nbnxn_gpu_init_atomdata(gmx_nbnxn_ocl_t               *nb,
      cl_int           cl_error;
      int              nalloc, natoms;
      bool             realloced;
-    bool             bDoTime = nb->bDoTime;
+    bool             bDoTime = nb->bDoTime == CL_TRUE;
      cl_timers_t     *timers  = nb->timers;
      cl_atomdata_t   *d_atdat = nb->atdat;
      cl_command_queue ls      = nb->stream[eintLocal];
@@ -1035,16 +1030,16 @@ void nbnxn_gpu_free(gmx_nbnxn_ocl_t *nb)
  
      /* Free kernels */
      int kernel_count = sizeof(nb->kernel_ener_noprune_ptr) / sizeof(nb->kernel_ener_noprune_ptr[0][0]);
-    free_kernels((cl_kernel*)nb->kernel_ener_noprune_ptr, kernel_count);
+    free_kernels(nb->kernel_ener_noprune_ptr[0], kernel_count);
  
      kernel_count = sizeof(nb->kernel_ener_prune_ptr) / sizeof(nb->kernel_ener_prune_ptr[0][0]);
-    free_kernels((cl_kernel*)nb->kernel_ener_prune_ptr, kernel_count);
+    free_kernels(nb->kernel_ener_prune_ptr[0], kernel_count);
  
      kernel_count = sizeof(nb->kernel_noener_noprune_ptr) / sizeof(nb->kernel_noener_noprune_ptr[0][0]);
-    free_kernels((cl_kernel*)nb->kernel_noener_noprune_ptr, kernel_count);
+    free_kernels(nb->kernel_noener_noprune_ptr[0], kernel_count);
  
      kernel_count = sizeof(nb->kernel_noener_prune_ptr) / sizeof(nb->kernel_noener_prune_ptr[0][0]);
-    free_kernels((cl_kernel*)nb->kernel_noener_prune_ptr, kernel_count);
+    free_kernels(nb->kernel_noener_prune_ptr[0], kernel_count);
  
      free_kernel(&(nb->kernel_memset_f));
      free_kernel(&(nb->kernel_memset_f2));
author	Roland Schulz <roland.schulz@intel.com>
	Wed, 12 Sep 2018 09:34:10 +0000 (02:34 -0700)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Wed, 12 Sep 2018 12:24:14 +0000 (14:24 +0200)
src/gromacs/CMakeLists.txt		patch \| blob \| history
src/gromacs/ewald/pme-gpu-3dfft-ocl.cpp		patch \| blob \| history
src/gromacs/ewald/pme-gpu-internal.cpp		patch \| blob \| history
src/gromacs/ewald/pme-gpu-internal.h		patch \| blob \| history
src/gromacs/ewald/pme-gpu-program-impl-ocl.cpp		patch \| blob \| history
src/gromacs/ewald/pme-gpu.cpp		patch \| blob \| history
src/gromacs/gpu_utils/devicebuffer_ocl.h		patch \| blob \| history
src/gromacs/gpu_utils/gpu_utils.h		patch \| blob \| history
src/gromacs/gpu_utils/gpu_utils_ocl.cpp		patch \| blob \| history
src/gromacs/gpu_utils/gpuregiontimer_ocl.h		patch \| blob \| history
src/gromacs/gpu_utils/ocl_caching.cpp		patch \| blob \| history
src/gromacs/gpu_utils/ocl_caching.h		patch \| blob \| history
src/gromacs/gpu_utils/ocl_compiler.cpp		patch \| blob \| history
src/gromacs/gpu_utils/ocl_compiler.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu		patch \| blob \| history
src/gromacs/mdlib/nbnxn_gpu_common.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp		patch \| blob \| history
src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp		patch \| blob \| history