Move DeviceInfo into GPU traits

[alexxy/gromacs.git] / src / gromacs / nbnxm / cuda / nbnxm_cuda.cu
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu

index 359e6c590525b8b295515231025dbfa29a51fd06..94e99879db0e3c54976e19a24cf030885143ca7e 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
@@ -125,16 +125,16 @@ typedef void (*nbnxn_cu_kfunc_ptr_t)(const cu_atomdata_t, const cu_nbparam_t, co
  /*********************************/
  
  /*! Returns the number of blocks to be used for the nonbonded GPU kernel. */
-static inline int calc_nb_kernel_nblock(int nwork_units, const gmx_device_info_t* dinfo)
+static inline int calc_nb_kernel_nblock(int nwork_units, const DeviceInformation* deviceInfo)
  {
      int max_grid_x_size;
  
-    assert(dinfo);
+    assert(deviceInfo);
      /* CUDA does not accept grid dimension of 0 (which can happen e.g. with an
         empty domain) and that case should be handled before this point. */
      assert(nwork_units > 0);
  
-    max_grid_x_size = dinfo->prop.maxGridSize[0];
+    max_grid_x_size = deviceInfo->prop.maxGridSize[0];
  
      /* do we exceed the grid x dimension limit? */
      if (nwork_units > max_grid_x_size)
@@ -284,7 +284,7 @@ static inline nbnxn_cu_kfunc_ptr_t select_nbnxn_kernel(int                     e
                                                         int                     evdwtype,
                                                         bool                    bDoEne,
                                                         bool                    bDoPrune,
-                                                       const gmx_device_info_t gmx_unused* devInfo)
+                                                       const DeviceInformation gmx_unused* deviceInfo)
  {
      nbnxn_cu_kfunc_ptr_t res;
  
@@ -295,7 +295,7 @@ static inline nbnxn_cu_kfunc_ptr_t select_nbnxn_kernel(int                     e
  
      /* assert assumptions made by the kernels */
      GMX_ASSERT(c_nbnxnGpuClusterSize * c_nbnxnGpuClusterSize / c_nbnxnGpuClusterpairSplit
-                       == devInfo->prop.warpSize,
+                       == deviceInfo->prop.warpSize,
                 "The CUDA kernels require the "
                 "cluster_size_i*cluster_size_j/nbnxn_gpu_clusterpair_split to match the warp size "
                 "of the architecture targeted.");
@@ -328,12 +328,12 @@ static inline nbnxn_cu_kfunc_ptr_t select_nbnxn_kernel(int                     e
  
  /*! \brief Calculates the amount of shared memory required by the nonbonded kernel in use. */
  static inline int calc_shmem_required_nonbonded(const int               num_threads_z,
-                                                const gmx_device_info_t gmx_unused* dinfo,
+                                                const DeviceInformation gmx_unused* deviceInfo,
                                                  const cu_nbparam_t*                 nbp)
  {
      int shmem;
  
-    assert(dinfo);
+    assert(deviceInfo);
  
      /* size of shmem (force-buffers/xq/atom type preloading) */
      /* NOTE: with the default kernel on sm3.0 we need shmem only for pre-loading */
@@ -530,11 +530,11 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const In
       * - The 1D block-grid contains as many blocks as super-clusters.
       */
      int num_threads_z = 1;
-    if (nb->dev_info->prop.major == 3 && nb->dev_info->prop.minor == 7)
+    if (nb->deviceInfo->prop.major == 3 && nb->deviceInfo->prop.minor == 7)
      {
          num_threads_z = 2;
      }
-    int nblock = calc_nb_kernel_nblock(plist->nsci, nb->dev_info);
+    int nblock = calc_nb_kernel_nblock(plist->nsci, nb->deviceInfo);
  
  
      KernelLaunchConfig config;
@@ -542,7 +542,7 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const In
      config.blockSize[1]     = c_clSize;
      config.blockSize[2]     = num_threads_z;
      config.gridSize[0]      = nblock;
-    config.sharedMemorySize = calc_shmem_required_nonbonded(num_threads_z, nb->dev_info, nbp);
+    config.sharedMemorySize = calc_shmem_required_nonbonded(num_threads_z, nb->deviceInfo, nbp);
      config.stream           = stream;
  
      if (debug)
@@ -559,7 +559,7 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const In
      auto*      timingEvent = bDoTime ? t->interaction[iloc].nb_k.fetchNextEvent() : nullptr;
      const auto kernel      = select_nbnxn_kernel(
              nbp->eeltype, nbp->vdwtype, stepWork.computeEnergy,
-            (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune), nb->dev_info);
+            (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune), nb->deviceInfo);
      const auto kernelArgs =
              prepareGpuKernelArguments(kernel, config, adat, nbp, plist, &stepWork.computeVirial);
      launchGpuKernel(kernel, config, timingEvent, "k_calc_nb", kernelArgs);
@@ -660,7 +660,7 @@ void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, c
       * - The 1D block-grid contains as many blocks as super-clusters.
       */
      int                num_threads_z = c_cudaPruneKernelJ4Concurrency;
-    int                nblock        = calc_nb_kernel_nblock(numSciInPart, nb->dev_info);
+    int                nblock        = calc_nb_kernel_nblock(numSciInPart, nb->deviceInfo);
      KernelLaunchConfig config;
      config.blockSize[0]     = c_clSize;
      config.blockSize[1]     = c_clSize;