Fix more clang-6 warnings in CUDA code

author Szilárd Páll <pall.szilard@gmail.com>

Mon, 8 Oct 2018 22:49:16 +0000 (00:49 +0200)

committer Szilárd Páll <pall.szilard@gmail.com>

Sun, 14 Oct 2018 16:26:41 +0000 (18:26 +0200)
author Szilárd Páll <pall.szilard@gmail.com>
Mon, 8 Oct 2018 22:49:16 +0000 (00:49 +0200)
committer Szilárd Páll <pall.szilard@gmail.com>
Sun, 14 Oct 2018 16:26:41 +0000 (18:26 +0200)
diff --git a/src/gromacs/ewald/pme-gpu-program-impl.cu b/src/gromacs/ewald/pme-gpu-program-impl.cu

index ac2c3a359ee114065303ee129f6cd3e443863f1e..db2f1b178db2eb78b7eda59b23aa3893d4f4d48e 100644 (file)
--- a/src/gromacs/ewald/pme-gpu-program-impl.cu
+++ b/src/gromacs/ewald/pme-gpu-program-impl.cu
@@ -49,6 +49,12 @@
  #include "pme-gpu-internal.h"                    // for GridOrdering enum
  #include "pme-gpu-types-host.h"
  
+// PME interpolation order
+constexpr int  c_pmeOrder = 4;
+// These hardcoded spread/gather parameters refer to not-implemented PME GPU 2D decomposition in X/Y
+constexpr bool c_wrapX = true;
+constexpr bool c_wrapY = true;
+
  //! PME CUDA kernels forward declarations. Kernels are documented in their respective files.
  template <
      const int order,
@@ -59,12 +65,32 @@ template <
      >
  void pme_spline_and_spread_kernel(const PmeGpuCudaKernelParams kernelParams);
  
+// Add extern declarations to inform that there will be a definition
+// provided in another translation unit.
+extern template
+void pme_spline_and_spread_kernel<c_pmeOrder, true, true, c_wrapX, c_wrapY>(const PmeGpuCudaKernelParams);
+extern template
+void pme_spline_and_spread_kernel<c_pmeOrder, true, false, c_wrapX, c_wrapY>(const PmeGpuCudaKernelParams);
+extern template
+void pme_spline_and_spread_kernel<c_pmeOrder, false, true, c_wrapX, c_wrapY>(const PmeGpuCudaKernelParams);
+
  template<
      GridOrdering gridOrdering,
      bool computeEnergyAndVirial
      >
  void pme_solve_kernel(const PmeGpuCudaKernelParams kernelParams);
  
+// Add extern declarations to inform that there will be a definition
+// provided in another translation unit.
+extern template
+void pme_solve_kernel<GridOrdering::XYZ, false>(const PmeGpuCudaKernelParams);
+extern template
+void pme_solve_kernel<GridOrdering::XYZ, true>(const PmeGpuCudaKernelParams);
+extern template
+void pme_solve_kernel<GridOrdering::YZX, false>(const PmeGpuCudaKernelParams);
+extern template
+void pme_solve_kernel<GridOrdering::YZX, true>(const PmeGpuCudaKernelParams);
+
  template <
      const int order,
      const bool overwriteForces,
@@ -73,6 +99,12 @@ template <
      >
  void pme_gather_kernel(const PmeGpuCudaKernelParams kernelParams);
  
+// Add extern declarations to inform that there will be a definition
+// provided in another translation unit.
+extern template
+void pme_gather_kernel<c_pmeOrder, true, c_wrapX, c_wrapY>(const PmeGpuCudaKernelParams);
+extern template
+void pme_gather_kernel<c_pmeOrder, false, c_wrapX, c_wrapY>(const PmeGpuCudaKernelParams);
  
  PmeGpuProgramImpl::PmeGpuProgramImpl(const gmx_device_info_t *)
  {
@@ -82,19 +114,11 @@ PmeGpuProgramImpl::PmeGpuProgramImpl(const gmx_device_info_t *)
      solveMaxWorkGroupSize = c_solveMaxThreadsPerBlock;
      gatherWorkGroupSize   = c_gatherMaxThreadsPerBlock;
  
-    // PME interpolation order
-    constexpr int  pmeOrder = 4;
-    GMX_UNUSED_VALUE(pmeOrder);
-    // These hardcoded spread/gather parameters refer to not-implemented PME GPU 2D decomposition in X/Y
-    constexpr bool wrapX = true;
-    constexpr bool wrapY = true;
-    GMX_UNUSED_VALUE(wrapX);
-    GMX_UNUSED_VALUE(wrapY);
-    splineAndSpreadKernel       = pme_spline_and_spread_kernel<pmeOrder, true, true, wrapX, wrapY>;
-    splineKernel                = pme_spline_and_spread_kernel<pmeOrder, true, false, wrapX, wrapY>;
-    spreadKernel                = pme_spline_and_spread_kernel<pmeOrder, false, true, wrapX, wrapY>;
-    gatherKernel                = pme_gather_kernel<pmeOrder, true, wrapX, wrapY>;
-    gatherReduceWithInputKernel = pme_gather_kernel<pmeOrder, false, wrapX, wrapY>;
+    splineAndSpreadKernel       = pme_spline_and_spread_kernel<c_pmeOrder, true, true, c_wrapX, c_wrapY>;
+    splineKernel                = pme_spline_and_spread_kernel<c_pmeOrder, true, false, c_wrapX, c_wrapY>;
+    spreadKernel                = pme_spline_and_spread_kernel<c_pmeOrder, false, true, c_wrapX, c_wrapY>;
+    gatherKernel                = pme_gather_kernel<c_pmeOrder, true, c_wrapX, c_wrapY>;
+    gatherReduceWithInputKernel = pme_gather_kernel<c_pmeOrder, false, c_wrapX, c_wrapY>;
      solveXYZKernel              = pme_solve_kernel<GridOrdering::XYZ, false>;
      solveXYZEnergyKernel        = pme_solve_kernel<GridOrdering::XYZ, true>;
      solveYZXKernel              = pme_solve_kernel<GridOrdering::YZX, false>;
diff --git a/src/gromacs/gpu_utils/cudautils.cu b/src/gromacs/gpu_utils/cudautils.cu

index b66be40f09d5a9df3026eb96f0eee4fead8fbe35..6731fef53d050c9edf1c1a07cf59783748e71b6c 100644 (file)
--- a/src/gromacs/gpu_utils/cudautils.cu
+++ b/src/gromacs/gpu_utils/cudautils.cu
@@ -48,11 +48,11 @@
  
  // TODO: template on transferKind to avoid runtime conditionals
  int cu_copy_D2H(void *h_dest, void *d_src, size_t bytes,
-                GpuApiCallBehavior transferKind, cudaStream_t s = 0)
+                GpuApiCallBehavior transferKind, cudaStream_t s = nullptr)
  {
      cudaError_t stat;
  
-    if (h_dest == NULL || d_src == NULL || bytes == 0)
+    if (h_dest == nullptr || d_src == nullptr || bytes == 0)
      {
          return -1;
      }
@@ -85,18 +85,18 @@ int cu_copy_D2H_sync(void * h_dest, void * d_src, size_t bytes)
  /*!
   *  The copy is launched in stream s or if not specified, in stream 0.
   */
-int cu_copy_D2H_async(void * h_dest, void * d_src, size_t bytes, cudaStream_t s = 0)
+int cu_copy_D2H_async(void * h_dest, void * d_src, size_t bytes, cudaStream_t s = nullptr)
  {
      return cu_copy_D2H(h_dest, d_src, bytes, GpuApiCallBehavior::Async, s);
  }
  
  // TODO: template on transferKind to avoid runtime conditionals
  int cu_copy_H2D(void *d_dest, void *h_src, size_t bytes,
-                GpuApiCallBehavior transferKind, cudaStream_t s = 0)
+                GpuApiCallBehavior transferKind, cudaStream_t s = nullptr)
  {
      cudaError_t stat;
  
-    if (d_dest == NULL || h_src == NULL || bytes == 0)
+    if (d_dest == nullptr || h_src == nullptr || bytes == 0)
      {
          return -1;
      }
@@ -129,7 +129,7 @@ int cu_copy_H2D_sync(void * d_dest, void * h_src, size_t bytes)
  /*!
   *  The copy is launched in stream s or if not specified, in stream 0.
   */
-int cu_copy_H2D_async(void * d_dest, void * h_src, size_t bytes, cudaStream_t s = 0)
+int cu_copy_H2D_async(void * d_dest, void * h_src, size_t bytes, cudaStream_t s = nullptr)
  {
      return cu_copy_H2D(d_dest, h_src, bytes, GpuApiCallBehavior::Async, s);
  }
@@ -175,7 +175,7 @@ static void setup1DTexture(cudaTextureObject_t &texObj,
  
      memset(&td, 0, sizeof(td));
      td.readMode                 = cudaReadModeElementType;
-    stat = cudaCreateTextureObject(&texObj, &rd, &td, NULL);
+    stat = cudaCreateTextureObject(&texObj, &rd, &td, nullptr);
      CU_RET_ERR(stat, "cudaCreateTextureObject failed");
  }
  
diff --git a/src/gromacs/gpu_utils/cudautils.cuh b/src/gromacs/gpu_utils/cudautils.cuh

index 230f968e361feacca35b58910e3d53057ccb85ff..2fb14ba46a328d2626a0c345b8c56a1ee4ae19b9 100644 (file)
--- a/src/gromacs/gpu_utils/cudautils.cuh
+++ b/src/gromacs/gpu_utils/cudautils.cuh
@@ -145,25 +145,25 @@ struct gmx_device_info_t
   *
   *  The copy is launched in stream s or if not specified, in stream 0.
   */
-int cu_copy_D2H(void *h_dest, void *d_src, size_t bytes, GpuApiCallBehavior transferKind, cudaStream_t s /*= 0*/);
+int cu_copy_D2H(void *h_dest, void *d_src, size_t bytes, GpuApiCallBehavior transferKind, cudaStream_t /*s = nullptr*/);
  
  /*! Launches synchronous host to device memory copy in stream 0. */
  int cu_copy_D2H_sync(void * /*h_dest*/, void * /*d_src*/, size_t /*bytes*/);
  
  /*! Launches asynchronous host to device memory copy in stream s. */
-int cu_copy_D2H_async(void * /*h_dest*/, void * /*d_src*/, size_t /*bytes*/, cudaStream_t /*s = 0*/);
+int cu_copy_D2H_async(void * /*h_dest*/, void * /*d_src*/, size_t /*bytes*/, cudaStream_t /*s = nullptr*/);
  
  /*! Launches synchronous or asynchronous host to device memory copy.
   *
   *  The copy is launched in stream s or if not specified, in stream 0.
   */
-int cu_copy_H2D(void *d_dest, void *h_src, size_t bytes, GpuApiCallBehavior transferKind, cudaStream_t /*s = 0*/);
+int cu_copy_H2D(void *d_dest, void *h_src, size_t bytes, GpuApiCallBehavior transferKind, cudaStream_t /*s = nullptr*/);
  
  /*! Launches synchronous host to device memory copy. */
  int cu_copy_H2D_sync(void * /*d_dest*/, void * /*h_src*/, size_t /*bytes*/);
  
  /*! Launches asynchronous host to device memory copy in stream s. */
-int cu_copy_H2D_async(void * /*d_dest*/, void * /*h_src*/, size_t /*bytes*/, cudaStream_t /*s = 0*/);
+int cu_copy_H2D_async(void * /*d_dest*/, void * /*h_src*/, size_t /*bytes*/, cudaStream_t /*s = nullptr*/);
  
  // TODO: the 2 functions below are pretty much a constructor/destructor of a simple
  // GPU table object. There is also almost self-contained fetchFromParamLookupTable()
@@ -188,6 +188,11 @@ void initParamLookupTable(T                        * &d_ptr,
                            int                         numElem,
                            const gmx_device_info_t    *devInfo);
  
+// Add extern declarations so each translation unit understands that
+// there will be a definition provided.
+extern template void initParamLookupTable<int>(int * &, cudaTextureObject_t &, const int *, int, const gmx_device_info_t *);
+extern template void initParamLookupTable<float>(float * &, cudaTextureObject_t &, const float *, int, const gmx_device_info_t *);
+
  /*! \brief Destroy parameter lookup table.
   *
   * Unbinds texture object, deallocates device memory.
@@ -202,6 +207,11 @@ void destroyParamLookupTable(T                       *d_ptr,
                               cudaTextureObject_t      texObj,
                               const gmx_device_info_t *devInfo);
  
+// Add extern declarations so each translation unit understands that
+// there will be a definition provided.
+extern template void destroyParamLookupTable<int>(int *, cudaTextureObject_t, const gmx_device_info_t *);
+extern template void destroyParamLookupTable<float>(float *, cudaTextureObject_t, const gmx_device_info_t *);
+
  /*! \brief Add a triplets stored in a float3 to an rvec variable.
   *
   * \param[out]  a Rvec to increment
diff --git a/src/gromacs/gpu_utils/gpu_utils.cu b/src/gromacs/gpu_utils/gpu_utils.cu

index 110c6fc0b07c33e47c1cd2068da2ffe294c1a5d1..04afee54db518929cd0edcb158ad69e7d85573b8 100644 (file)
--- a/src/gromacs/gpu_utils/gpu_utils.cu
+++ b/src/gromacs/gpu_utils/gpu_utils.cu
@@ -68,7 +68,7 @@
   */
  static int  cuda_max_device_count = 32;
  
-static bool cudaProfilerRun      = ((getenv("NVPROF_ID") != NULL));
+static bool cudaProfilerRun      = ((getenv("NVPROF_ID") != nullptr));
  
  /** Dummy kernel used for sanity checking. */
  static __global__ void k_dummy_test(void)
@@ -421,19 +421,12 @@ bool canDetectGpus(std::string *errorMessage)
  
  void findGpus(gmx_gpu_info_t *gpu_info)
  {
-    int                i, ndev, checkres;
-    cudaError_t        stat;
-    cudaDeviceProp     prop;
-    gmx_device_info_t *devs;
-
      assert(gpu_info);
  
      gpu_info->n_dev_compatible = 0;
  
-    ndev    = 0;
-    devs    = NULL;
-
-    stat = cudaGetDeviceCount(&ndev);
+    int         ndev;
+    cudaError_t stat = cudaGetDeviceCount(&ndev);
      if (stat != cudaSuccess)
      {
          GMX_THROW(gmx::InternalError("Invalid call of findGpus() when CUDA API returned an error, perhaps "
@@ -443,10 +436,12 @@ void findGpus(gmx_gpu_info_t *gpu_info)
      // We expect to start device support/sanity checks with a clean runtime error state
      gmx::ensureNoPendingCudaError("");
  
+    gmx_device_info_t *devs;
      snew(devs, ndev);
-    for (i = 0; i < ndev; i++)
+    for (int i = 0; i < ndev; i++)
      {
-        checkres = is_gmx_supported_gpu_id(i, &prop);
+        cudaDeviceProp prop;
+        int            checkres = is_gmx_supported_gpu_id(i, &prop);
  
          devs[i].id   = i;
          devs[i].prop = prop;
@@ -540,8 +535,8 @@ void gpu_set_host_malloc_and_free(bool               bUseGpuKernels,
      }
      else
      {
-        *nb_alloc = NULL;
-        *nb_free  = NULL;
+        *nb_alloc = nullptr;
+        *nb_free  = nullptr;
      }
  }
  
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu

index 1c4f06e4763bad73082e2376b207a4ca417d0fd8..58e6d343e04be40160bfdd9996a35ec6e8ed065f 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
@@ -104,7 +104,7 @@ static void init_ewald_coulomb_force_table(const interaction_const_t *ic,
                                             cu_nbparam_t              *nbp,
                                             const gmx_device_info_t   *dev_info)
  {
-    if (nbp->coulomb_tab != NULL)
+    if (nbp->coulomb_tab != nullptr)
      {
          nbnxn_cuda_free_nbparam_table(nbp, dev_info);
      }
@@ -134,10 +134,10 @@ static void init_atomdata_first(cu_atomdata_t *ad, int ntypes)
      stat = cudaMalloc((void**)&ad->e_el, sizeof(*ad->e_el));
      CU_RET_ERR(stat, "cudaMalloc failed on ad->e_el");
  
-    /* initialize to NULL poiters to data that is not allocated here and will
+    /* initialize to nullptr poiters to data that is not allocated here and will
         need reallocation in nbnxn_cuda_init_atomdata */
-    ad->xq = NULL;
-    ad->f  = NULL;
+    ad->xq = nullptr;
+    ad->f  = nullptr;
  
      /* size -1 indicates that the respective array hasn't been initialized yet */
      ad->natoms = -1;
@@ -154,8 +154,8 @@ static int pick_ewald_kernel_type(bool                     bTwinCut,
  
      /* Benchmarking/development environment variables to force the use of
         analytical or tabulated Ewald kernel. */
-    bForceAnalyticalEwald = (getenv("GMX_CUDA_NB_ANA_EWALD") != NULL);
-    bForceTabulatedEwald  = (getenv("GMX_CUDA_NB_TAB_EWALD") != NULL);
+    bForceAnalyticalEwald = (getenv("GMX_CUDA_NB_ANA_EWALD") != nullptr);
+    bForceTabulatedEwald  = (getenv("GMX_CUDA_NB_TAB_EWALD") != nullptr);
  
      if (bForceAnalyticalEwald && bForceTabulatedEwald)
      {
@@ -185,7 +185,7 @@ static int pick_ewald_kernel_type(bool                     bTwinCut,
  
      /* Use twin cut-off kernels if requested by bTwinCut or the env. var.
         forces it (use it for debugging/benchmarking only). */
-    if (!bTwinCut && (getenv("GMX_CUDA_NB_EWALD_TWINCUT") == NULL))
+    if (!bTwinCut && (getenv("GMX_CUDA_NB_EWALD_TWINCUT") == nullptr))
      {
          kernel_type = bUseAnalyticalEwald ? eelCuEWALD_ANA : eelCuEWALD_TAB;
      }
@@ -263,7 +263,6 @@ static void init_nbparam(cu_nbparam_t              *nbp,
                          break;
                      default:
                          gmx_incons("The requested LJ combination rule is not implemented in the CUDA GPU accelerated kernels!");
-                        break;
                  }
                  break;
              case eintmodFORCESWITCH:
@@ -274,7 +273,6 @@ static void init_nbparam(cu_nbparam_t              *nbp,
                  break;
              default:
                  gmx_incons("The requested VdW interaction modifier is not implemented in the CUDA GPU accelerated kernels!");
-                break;
          }
      }
      else if (ic->vdwtype == evdwPME)
@@ -315,7 +313,7 @@ static void init_nbparam(cu_nbparam_t              *nbp,
      }
  
      /* generate table for PME */
-    nbp->coulomb_tab = NULL;
+    nbp->coulomb_tab = nullptr;
      if (nbp->eeltype == eelCuEWALD_TAB || nbp->eeltype == eelCuEWALD_TAB_TWIN)
      {
          init_ewald_coulomb_force_table(ic, nbp, dev_info);
@@ -360,12 +358,12 @@ void nbnxn_gpu_pme_loadbal_update_param(const nonbonded_verlet_t    *nbv,
  /*! Initializes the pair list data structure. */
  static void init_plist(cu_plist_t *pl)
  {
-    /* initialize to NULL pointers to data that is not allocated here and will
+    /* initialize to nullptr pointers to data that is not allocated here and will
         need reallocation in nbnxn_gpu_init_pairlist */
-    pl->sci      = NULL;
-    pl->cj4      = NULL;
-    pl->imask    = NULL;
-    pl->excl     = NULL;
+    pl->sci      = nullptr;
+    pl->cj4      = nullptr;
+    pl->imask    = nullptr;
+    pl->excl     = nullptr;
  
      /* size -1 indicates that the respective array hasn't been initialized yet */
      pl->na_c           = -1;
@@ -440,7 +438,7 @@ void nbnxn_gpu_init(gmx_nbnxn_cuda_t         **p_nb,
      cudaError_t       stat;
      gmx_nbnxn_cuda_t *nb;
  
-    if (p_nb == NULL)
+    if (p_nb == nullptr)
      {
          return;
      }
@@ -481,7 +479,7 @@ void nbnxn_gpu_init(gmx_nbnxn_cuda_t         **p_nb,
           * case will be a single value.
           */
          int highest_priority;
-        stat = cudaDeviceGetStreamPriorityRange(NULL, &highest_priority);
+        stat = cudaDeviceGetStreamPriorityRange(nullptr, &highest_priority);
          CU_RET_ERR(stat, "cudaDeviceGetStreamPriorityRange failed");
  
          stat = cudaStreamCreateWithPriority(&nb->stream[eintNonlocal],
@@ -500,7 +498,7 @@ void nbnxn_gpu_init(gmx_nbnxn_cuda_t         **p_nb,
       *          This is the main reason why they are disabled by default.
       */
      // TODO: Consider turning on by default when we can detect nr of streams.
-    nb->bDoTime = (getenv("GMX_ENABLE_GPU_TIMING") != NULL);
+    nb->bDoTime = (getenv("GMX_ENABLE_GPU_TIMING") != nullptr);
  
      if (nb->bDoTime)
      {
@@ -731,7 +729,7 @@ void nbnxn_gpu_free(gmx_nbnxn_cuda_t *nb)
      cu_atomdata_t   *atdat;
      cu_nbparam_t    *nbparam;
  
-    if (nb == NULL)
+    if (nb == nullptr)
      {
          return;
      }
@@ -804,13 +802,13 @@ void nbnxn_gpu_free(gmx_nbnxn_cuda_t *nb)
  
      /* Free nbst */
      pfree(nb->nbst.e_lj);
-    nb->nbst.e_lj = NULL;
+    nb->nbst.e_lj = nullptr;
  
      pfree(nb->nbst.e_el);
-    nb->nbst.e_el = NULL;
+    nb->nbst.e_el = nullptr;
  
      pfree(nb->nbst.fshift);
-    nb->nbst.fshift = NULL;
+    nb->nbst.fshift = nullptr;
  
      sfree(atdat);
      sfree(nbparam);
@@ -839,7 +837,7 @@ void nbnxn_gpu_reset_timings(nonbonded_verlet_t* nbv)
  
  int nbnxn_gpu_min_ci_balanced(gmx_nbnxn_cuda_t *nb)
  {
-    return nb != NULL ?
+    return nb != nullptr ?
             gpu_min_ci_balanced_factor*nb->dev_info->prop.multiProcessorCount : 0;
  
  }
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_pruneonly.cuh b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_pruneonly.cuh

index df6185bcd4d7203cdeb47157a02b7f2f2126e738..d2d1ebb8083cf2a3f9b66190d0a6afca7d018f14 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_pruneonly.cuh
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_pruneonly.cuh
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -110,6 +110,17 @@ __global__ void nbnxn_kernel_prune_cuda(const cu_atomdata_t atdat,
                                          int                 part)
  #ifdef FUNCTION_DECLARATION_ONLY
  ;     /* Only do function declaration, omit the function body. */
+
+// Add extern declarations so each translation unit understands that
+// there will be a definition provided.
+extern template
+__global__ void
+nbnxn_kernel_prune_cuda<true>(const cu_atomdata_t, const cu_nbparam_t,
+                              const cu_plist_t, int, int);
+extern template
+__global__ void
+nbnxn_kernel_prune_cuda<false>(const cu_atomdata_t, const cu_nbparam_t,
+                               const cu_plist_t, int, int);
  #else
  {
author	Szilárd Páll <pall.szilard@gmail.com>
	Mon, 8 Oct 2018 22:49:16 +0000 (00:49 +0200)
committer	Szilárd Páll <pall.szilard@gmail.com>
	Sun, 14 Oct 2018 16:26:41 +0000 (18:26 +0200)
src/gromacs/ewald/pme-gpu-program-impl.cu		patch \| blob \| history
src/gromacs/gpu_utils/cudautils.cu		patch \| blob \| history
src/gromacs/gpu_utils/cudautils.cuh		patch \| blob \| history
src/gromacs/gpu_utils/gpu_utils.cu		patch \| blob \| history
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu		patch \| blob \| history
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_pruneonly.cuh		patch \| blob \| history