Make cl_nbparam into a struct

author Artem Zhmurov <zhmurov@gmail.com>

Thu, 25 Jun 2020 15:31:36 +0000 (15:31 +0000)

committer Paul Bauer <paul.bauer.q@gmail.com>

Thu, 25 Jun 2020 15:31:36 +0000 (15:31 +0000)
author Artem Zhmurov <zhmurov@gmail.com>
Thu, 25 Jun 2020 15:31:36 +0000 (15:31 +0000)
committer Paul Bauer <paul.bauer.q@gmail.com>
Thu, 25 Jun 2020 15:31:36 +0000 (15:31 +0000)
diff --git a/src/gromacs/gpu_utils/gputraits.cuh b/src/gromacs/gpu_utils/gputraits.cuh

index 76606611b8e329604037052571ad948398009d26..98fd8d04efc9c5b3d3f8a657c3bd7f7eb55711bf 100644 (file)
--- a/src/gromacs/gpu_utils/gputraits.cuh
+++ b/src/gromacs/gpu_utils/gputraits.cuh
@@ -48,6 +48,9 @@
  
  #include "gromacs/hardware/gpu_hw_info.h"
  
+//! Device texture for fast read-only data fetching
+using DeviceTexture = cudaTextureObject_t;
+
  /*! \brief CUDA device information.
   *
   * The CUDA device information is queried and set at detection and contains
diff --git a/src/gromacs/gpu_utils/gputraits.h b/src/gromacs/gpu_utils/gputraits.h

index 5fec00303adc59588557726d188efe5d6d9e2f84..9ae87f1436aabe7a6c04298e2a0839dc5ada17d9 100644 (file)
--- a/src/gromacs/gpu_utils/gputraits.h
+++ b/src/gromacs/gpu_utils/gputraits.h
@@ -57,6 +57,8 @@
  
  #else
  
+using DeviceTexture = void*;
+
  //! Stub for device information.
  struct DeviceInformation
  {
diff --git a/src/gromacs/gpu_utils/gputraits_ocl.h b/src/gromacs/gpu_utils/gputraits_ocl.h

index ff4572e1affbfef1414493fa3928b08511855ba1..a3eb510c95cf2135c9b82537c5bda106bdbe0a7a 100644 (file)
--- a/src/gromacs/gpu_utils/gputraits_ocl.h
+++ b/src/gromacs/gpu_utils/gputraits_ocl.h
@@ -48,6 +48,8 @@
  #include "gromacs/gpu_utils/gmxopencl.h"
  #include "gromacs/hardware/gpu_hw_info.h"
  
+using DeviceTexture = void*;
+
  //! OpenCL device vendors
  enum class DeviceVendor : int
  {
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu

index 241392f187dfbcd636b954950975d95b4ecc8993..f7a12cf99d3abd07a10b93b8721e116777760827 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
@@ -121,7 +121,7 @@ namespace Nbnxm
  constexpr static int c_bufOpsThreadsPerBlock = 128;
  
  /*! Nonbonded kernel function pointer type */
-typedef void (*nbnxn_cu_kfunc_ptr_t)(const cu_atomdata_t, const cu_nbparam_t, const cu_plist_t, bool);
+typedef void (*nbnxn_cu_kfunc_ptr_t)(const cu_atomdata_t, const NBParamGpu, const cu_plist_t, bool);
  
  /*********************************/
  
@@ -330,7 +330,7 @@ static inline nbnxn_cu_kfunc_ptr_t select_nbnxn_kernel(int                     e
  /*! \brief Calculates the amount of shared memory required by the nonbonded kernel in use. */
  static inline int calc_shmem_required_nonbonded(const int               num_threads_z,
                                                  const DeviceInformation gmx_unused* deviceInfo,
-                                                const cu_nbparam_t*                 nbp)
+                                                const NBParamGpu*                   nbp)
  {
      int shmem;
  
@@ -483,7 +483,7 @@ void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const Atom
  void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const InteractionLocality iloc)
  {
      cu_atomdata_t*      adat         = nb->atdat;
-    cu_nbparam_t*       nbp          = nb->nbparam;
+    NBParamGpu*         nbp          = nb->nbparam;
      cu_plist_t*         plist        = nb->plist[iloc];
      cu_timers_t*        t            = nb->timers;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
@@ -596,7 +596,7 @@ static inline int calc_shmem_required_prune(const int num_threads_z)
  void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, const int numParts)
  {
      cu_atomdata_t*      adat         = nb->atdat;
-    cu_nbparam_t*       nbp          = nb->nbparam;
+    NBParamGpu*         nbp          = nb->nbparam;
      cu_plist_t*         plist        = nb->plist[iloc];
      cu_timers_t*        t            = nb->timers;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu

index 962f5a049e3ef706ee24e4add3268bc422bd0046..69beab6b6fec14062b832452e901ab9056f68bc5 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
@@ -93,7 +93,7 @@ static unsigned int gpu_min_ci_balanced_factor = 44;
  static void nbnxn_cuda_clear_e_fshift(NbnxmGpu* nb);
  
  /* Fw. decl, */
-static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t* nbparam);
+static void nbnxn_cuda_free_nbparam_table(NBParamGpu* nbparam);
  
  /*! \brief Initialized the Ewald Coulomb correction GPU table.
  
@@ -102,7 +102,7 @@ static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t* nbparam);
      it just re-uploads the table.
   */
  static void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables,
-                                           cu_nbparam_t*                nbp,
+                                           NBParamGpu*                  nbp,
                                             const DeviceContext&         deviceContext)
  {
      if (nbp->coulomb_tab != nullptr)
@@ -192,7 +192,7 @@ static int pick_ewald_kernel_type(const interaction_const_t& ic)
  }
  
  /*! Copies all parameters related to the cut-off from ic to nbp */
-static void set_cutoff_parameters(cu_nbparam_t* nbp, const interaction_const_t* ic, const PairlistParams& listParams)
+static void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t* ic, const PairlistParams& listParams)
  {
      nbp->ewald_beta        = ic->ewaldcoeff_q;
      nbp->sh_ewald          = ic->sh_ewald;
@@ -215,7 +215,7 @@ static void set_cutoff_parameters(cu_nbparam_t* nbp, const interaction_const_t*
  }
  
  /*! Initializes the nonbonded parameter data structure. */
-static void init_nbparam(cu_nbparam_t*                   nbp,
+static void init_nbparam(NBParamGpu*                     nbp,
                           const interaction_const_t*      ic,
                           const PairlistParams&           listParams,
                           const nbnxn_atomdata_t::Params& nbatParams,
@@ -331,8 +331,8 @@ void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interacti
      {
          return;
      }
-    NbnxmGpu*     nb  = nbv->gpu_nbv;
-    cu_nbparam_t* nbp = nbv->gpu_nbv->nbparam;
+    NbnxmGpu*   nb  = nbv->gpu_nbv;
+    NBParamGpu* nbp = nbv->gpu_nbv->nbparam;
  
      set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params());
  
@@ -679,7 +679,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
      }
  }
  
-static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t* nbparam)
+static void nbnxn_cuda_free_nbparam_table(NBParamGpu* nbparam)
  {
      if (nbparam->eeltype == eelTypeEWALD_TAB || nbparam->eeltype == eelTypeEWALD_TAB_TWIN)
      {
@@ -691,7 +691,7 @@ void gpu_free(NbnxmGpu* nb)
  {
      cudaError_t    stat;
      cu_atomdata_t* atdat;
-    cu_nbparam_t*  nbparam;
+    NBParamGpu*    nbparam;
  
      if (nb == nullptr)
      {
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel.cuh b/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel.cuh

index 89f75da5f9c2731d88b4c7b8aae9060c2c451e9a..7faea980b7c5cc8df1fa8ce698ab2cc2bc575b9b 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel.cuh
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel.cuh
@@ -158,7 +158,7 @@ __launch_bounds__(THREADS_PER_BLOCK)
          __global__ void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_cuda)
  #    endif /* CALC_ENERGIES */
  #endif     /* PRUNE_NBL */
-                (const cu_atomdata_t atdat, const cu_nbparam_t nbparam, const cu_plist_t plist, bool bCalcFshift)
+                (const cu_atomdata_t atdat, const NBParamGpu nbparam, const cu_plist_t plist, bool bCalcFshift)
  #ifdef FUNCTION_DECLARATION_ONLY
                          ; /* Only do function declaration, omit the function body. */
  #else
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cu

index 945b1912fced824dc8345836bd2c6d0d455a9d42..81755cb9039a1cc9201bd6744ee59aa0cd2f2952 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cu
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2016,2017,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -39,7 +39,7 @@
  #ifndef FUNCTION_DECLARATION_ONLY
  /* Instantiate external template functions */
  template __global__ void
-nbnxn_kernel_prune_cuda<false>(const cu_atomdata_t, const cu_nbparam_t, const cu_plist_t, int, int);
+nbnxn_kernel_prune_cuda<false>(const cu_atomdata_t, const NBParamGpu, const cu_plist_t, int, int);
  template __global__ void
-nbnxn_kernel_prune_cuda<true>(const cu_atomdata_t, const cu_nbparam_t, const cu_plist_t, int, int);
+nbnxn_kernel_prune_cuda<true>(const cu_atomdata_t, const NBParamGpu, const cu_plist_t, int, int);
  #endif
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cuh b/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cuh

index e9c5b5114397901af4a22ce1534c54e95ab0e883..e5bf2b967cb397c79d0a7bfa9f048727b7b3a1cd 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cuh
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cuh
@@ -104,7 +104,7 @@
  template<bool haveFreshList>
  __launch_bounds__(THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP) __global__
          void nbnxn_kernel_prune_cuda(const cu_atomdata_t atdat,
-                                     const cu_nbparam_t  nbparam,
+                                     const NBParamGpu    nbparam,
                                       const cu_plist_t    plist,
                                       int                 numParts,
                                       int                 part)
@@ -114,9 +114,9 @@ __launch_bounds__(THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP) __global__
  // Add extern declarations so each translation unit understands that
  // there will be a definition provided.
  extern template __global__ void
-nbnxn_kernel_prune_cuda<true>(const cu_atomdata_t, const cu_nbparam_t, const cu_plist_t, int, int);
+nbnxn_kernel_prune_cuda<true>(const cu_atomdata_t, const NBParamGpu, const cu_plist_t, int, int);
  extern template __global__ void
-nbnxn_kernel_prune_cuda<false>(const cu_atomdata_t, const cu_nbparam_t, const cu_plist_t, int, int);
+nbnxn_kernel_prune_cuda<false>(const cu_atomdata_t, const NBParamGpu, const cu_plist_t, int, int);
  #else
  {
  
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_utils.cuh b/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_utils.cuh

index cbca9452b99b597fca4d03bdf434f6053cd2bb8d..4850298f8f154082b8b19c2a395aab938fe8d59c 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_utils.cuh
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_utils.cuh
@@ -90,7 +90,7 @@ static __forceinline__ __device__ void
  
  /*! Apply force switch,  force + energy version. */
  static __forceinline__ __device__ void
-                       calculate_force_switch_F(const cu_nbparam_t nbparam, float c6, float c12, float inv_r, float r2, float* F_invr)
+                       calculate_force_switch_F(const NBParamGpu nbparam, float c6, float c12, float inv_r, float r2, float* F_invr)
  {
      float r, r_switch;
  
@@ -109,13 +109,13 @@ static __forceinline__ __device__ void
  }
  
  /*! Apply force switch, force-only version. */
-static __forceinline__ __device__ void calculate_force_switch_F_E(const cu_nbparam_t nbparam,
-                                                                  float              c6,
-                                                                  float              c12,
-                                                                  float              inv_r,
-                                                                  float              r2,
-                                                                  float*             F_invr,
-                                                                  float*             E_lj)
+static __forceinline__ __device__ void calculate_force_switch_F_E(const NBParamGpu nbparam,
+                                                                  float            c6,
+                                                                  float            c12,
+                                                                  float            inv_r,
+                                                                  float            r2,
+                                                                  float*           F_invr,
+                                                                  float*           E_lj)
  {
      float r, r_switch;
  
@@ -142,7 +142,7 @@ static __forceinline__ __device__ void calculate_force_switch_F_E(const cu_nbpar
  
  /*! Apply potential switch, force-only version. */
  static __forceinline__ __device__ void
-                       calculate_potential_switch_F(const cu_nbparam_t nbparam, float inv_r, float r2, float* F_invr, float* E_lj)
+                       calculate_potential_switch_F(const NBParamGpu nbparam, float inv_r, float r2, float* F_invr, float* E_lj)
  {
      float r, r_switch;
      float sw, dsw;
@@ -170,7 +170,7 @@ static __forceinline__ __device__ void
  
  /*! Apply potential switch, force + energy version. */
  static __forceinline__ __device__ void
-                       calculate_potential_switch_F_E(const cu_nbparam_t nbparam, float inv_r, float r2, float* F_invr, float* E_lj)
+                       calculate_potential_switch_F_E(const NBParamGpu nbparam, float inv_r, float r2, float* F_invr, float* E_lj)
  {
      float r, r_switch;
      float sw, dsw;
@@ -201,7 +201,7 @@ static __forceinline__ __device__ void
   *  Depending on what is supported, it fetches parameters either
   *  using direct load, texture objects, or texrefs.
   */
-static __forceinline__ __device__ float calculate_lj_ewald_c6grid(const cu_nbparam_t nbparam, int typei, int typej)
+static __forceinline__ __device__ float calculate_lj_ewald_c6grid(const NBParamGpu nbparam, int typei, int typej)
  {
  #    if DISABLE_CUDA_TEXTURES
      return LDG(&nbparam.nbfp_comb[2 * typei]) * LDG(&nbparam.nbfp_comb[2 * typej]);
@@ -215,14 +215,14 @@ static __forceinline__ __device__ float calculate_lj_ewald_c6grid(const cu_nbpar
  /*! Calculate LJ-PME grid force contribution with
   *  geometric combination rule.
   */
-static __forceinline__ __device__ void calculate_lj_ewald_comb_geom_F(const cu_nbparam_t nbparam,
-                                                                      int                typei,
-                                                                      int                typej,
-                                                                      float              r2,
-                                                                      float              inv_r2,
-                                                                      float              lje_coeff2,
-                                                                      float  lje_coeff6_6,
-                                                                      float* F_invr)
+static __forceinline__ __device__ void calculate_lj_ewald_comb_geom_F(const NBParamGpu nbparam,
+                                                                      int              typei,
+                                                                      int              typej,
+                                                                      float            r2,
+                                                                      float            inv_r2,
+                                                                      float            lje_coeff2,
+                                                                      float            lje_coeff6_6,
+                                                                      float*           F_invr)
  {
      float c6grid, inv_r6_nm, cr2, expmcr2, poly;
  
@@ -242,12 +242,12 @@ static __forceinline__ __device__ void calculate_lj_ewald_comb_geom_F(const cu_n
  /*! Calculate LJ-PME grid force + energy contribution with
   *  geometric combination rule.
   */
-static __forceinline__ __device__ void calculate_lj_ewald_comb_geom_F_E(const cu_nbparam_t nbparam,
-                                                                        int                typei,
-                                                                        int                typej,
-                                                                        float              r2,
-                                                                        float              inv_r2,
-                                                                        float  lje_coeff2,
+static __forceinline__ __device__ void calculate_lj_ewald_comb_geom_F_E(const NBParamGpu nbparam,
+                                                                        int              typei,
+                                                                        int              typej,
+                                                                        float            r2,
+                                                                        float            inv_r2,
+                                                                        float            lje_coeff2,
                                                                          float  lje_coeff6_6,
                                                                          float  int_bit,
                                                                          float* F_invr,
@@ -276,7 +276,7 @@ static __forceinline__ __device__ void calculate_lj_ewald_comb_geom_F_E(const cu
   *  Depending on what is supported, it fetches parameters either
   *  using direct load, texture objects, or texrefs.
   */
-static __forceinline__ __device__ float2 fetch_nbfp_comb_c6_c12(const cu_nbparam_t nbparam, int type)
+static __forceinline__ __device__ float2 fetch_nbfp_comb_c6_c12(const NBParamGpu nbparam, int type)
  {
      float2 c6c12;
  #    if DISABLE_CUDA_TEXTURES
@@ -299,16 +299,16 @@ static __forceinline__ __device__ float2 fetch_nbfp_comb_c6_c12(const cu_nbparam
   *  We use a single F+E kernel with conditional because the performance impact
   *  of this is pretty small and LB on the CPU is anyway very slow.
   */
-static __forceinline__ __device__ void calculate_lj_ewald_comb_LB_F_E(const cu_nbparam_t nbparam,
-                                                                      int                typei,
-                                                                      int                typej,
-                                                                      float              r2,
-                                                                      float              inv_r2,
-                                                                      float              lje_coeff2,
-                                                                      float  lje_coeff6_6,
-                                                                      float  int_bit,
-                                                                      float* F_invr,
-                                                                      float* E_lj)
+static __forceinline__ __device__ void calculate_lj_ewald_comb_LB_F_E(const NBParamGpu nbparam,
+                                                                      int              typei,
+                                                                      int              typej,
+                                                                      float            r2,
+                                                                      float            inv_r2,
+                                                                      float            lje_coeff2,
+                                                                      float            lje_coeff6_6,
+                                                                      float            int_bit,
+                                                                      float*           F_invr,
+                                                                      float*           E_lj)
  {
      float c6grid, inv_r6_nm, cr2, expmcr2, poly;
      float sigma, sigma2, epsilon;
@@ -348,7 +348,7 @@ static __forceinline__ __device__ void calculate_lj_ewald_comb_LB_F_E(const cu_n
   *  Depending on what is supported, it fetches parameters either
   *  using direct load, texture objects, or texrefs.
   */
-static __forceinline__ __device__ float2 fetch_coulomb_force_r(const cu_nbparam_t nbparam, int index)
+static __forceinline__ __device__ float2 fetch_coulomb_force_r(const NBParamGpu nbparam, int index)
  {
      float2 d;
  
@@ -379,7 +379,7 @@ __forceinline__ __host__ __device__ T lerp(T d0, T d1, T t)
  
  /*! Interpolate Ewald coulomb force correction using the F*r table.
   */
-static __forceinline__ __device__ float interpolate_coulomb_force_r(const cu_nbparam_t nbparam, float r)
+static __forceinline__ __device__ float interpolate_coulomb_force_r(const NBParamGpu nbparam, float r)
  {
      float normalized = nbparam.coulomb_tab_scale * r;
      int   index      = (int)normalized;
@@ -395,7 +395,7 @@ static __forceinline__ __device__ float interpolate_coulomb_force_r(const cu_nbp
   *  Depending on what is supported, it fetches parameters either
   *  using direct load, texture objects, or texrefs.
   */
-static __forceinline__ __device__ void fetch_nbfp_c6_c12(float& c6, float& c12, const cu_nbparam_t nbparam, int baseIndex)
+static __forceinline__ __device__ void fetch_nbfp_c6_c12(float& c6, float& c12, const NBParamGpu nbparam, int baseIndex)
  {
  #    if DISABLE_CUDA_TEXTURES
      /* Force an 8-byte fetch to save a memory instruction. */
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h b/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h

index 67f220d15d807899a67d4ad06fb34c4294a39874..acadca29c5fcf4e17410d471db55e8d0ab134bc8 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h
@@ -80,7 +80,6 @@ static constexpr int c_clSize = c_nbnxnGpuClusterSize;
   * are passed to the kernels, except cu_timers_t. */
  /*! \cond */
  typedef struct cu_atomdata cu_atomdata_t;
-typedef struct cu_nbparam  cu_nbparam_t;
  /*! \endcond */
  
  
@@ -138,72 +137,6 @@ struct cu_atomdata
      bool bShiftVecUploaded;
  };
  
-/** \internal
- * \brief Parameters required for the CUDA nonbonded calculations.
- */
-struct cu_nbparam
-{
-
-    //! type of electrostatics, takes values from #eelType
-    int eeltype;
-    //! type of VdW impl., takes values from #evdwType
-    int vdwtype;
-
-    //! charge multiplication factor
-    float epsfac;
-    //! Reaction-field/plain cutoff electrostatics const.
-    float c_rf;
-    //! Reaction-field electrostatics constant
-    float two_k_rf;
-    //! Ewald/PME parameter
-    float ewald_beta;
-    //! Ewald/PME correction term substracted from the direct-space potential
-    float sh_ewald;
-    //! LJ-Ewald/PME correction term added to the correction potential
-    float sh_lj_ewald;
-    //! LJ-Ewald/PME coefficient
-    float ewaldcoeff_lj;
-
-    //! Coulomb cut-off squared
-    float rcoulomb_sq;
-
-    //! VdW cut-off squared
-    float rvdw_sq;
-    //! VdW switched cut-off
-    float rvdw_switch;
-    //! Full, outer pair-list cut-off squared
-    float rlistOuter_sq;
-    //! Inner, dynamic pruned pair-list cut-off squared
-    float rlistInner_sq;
-    //! True if we use dynamic pair-list pruning
-    bool useDynamicPruning;
-
-    //! VdW shift dispersion constants
-    shift_consts_t dispersion_shift;
-    //! VdW shift repulsion constants
-    shift_consts_t repulsion_shift;
-    //! VdW switch constants
-    switch_consts_t vdw_switch;
-
-    /* LJ non-bonded parameters - accessed through texture memory */
-    //! nonbonded parameter table with C6/C12 pairs per atom type-pair, 2*ntype^2 elements
-    float* nbfp;
-    //! texture object bound to nbfp
-    cudaTextureObject_t nbfp_texobj;
-    //! nonbonded parameter table per atom type, 2*ntype elements
-    float* nbfp_comb;
-    //! texture object bound to nbfp_texobj
-    cudaTextureObject_t nbfp_comb_texobj;
-
-    /* Ewald Coulomb force table data - accessed through texture memory */
-    //! table scale/spacing
-    float coulomb_tab_scale;
-    //! pointer to the table in the device memory
-    float* coulomb_tab;
-    //! texture object bound to coulomb_tab
-    cudaTextureObject_t coulomb_tab_texobj;
-};
-
  /** \internal
   * \brief Pair list data.
   */
@@ -255,7 +188,7 @@ struct NbnxmGpu
      /*! \brief number of elements allocated allocated in device buffer */
      int ncxy_ind_alloc = 0;
      /*! \brief parameters required for the non-bonded calc. */
-    cu_nbparam_t* nbparam = nullptr;
+    NBParamGpu* nbparam = nullptr;
      /*! \brief pair-list data structures (local and non-local) */
      gmx::EnumerationArray<Nbnxm::InteractionLocality, cu_plist_t*> plist = { { nullptr } };
      /*! \brief staging area where fshift/energies get downloaded */
diff --git a/src/gromacs/nbnxm/gpu_types_common.h b/src/gromacs/nbnxm/gpu_types_common.h

index 28dbe70459e4728c67b17331da1fb29e8e0b245f..17b66e49d8243df0609f06577997b280526035e8 100644 (file)
--- a/src/gromacs/nbnxm/gpu_types_common.h
+++ b/src/gromacs/nbnxm/gpu_types_common.h
@@ -57,6 +57,72 @@
  #    include "gromacs/gpu_utils/gpuregiontimer.cuh"
  #endif
  
+/** \internal
+ * \brief Parameters required for the GPU nonbonded calculations.
+ */
+struct NBParamGpu
+{
+
+    //! type of electrostatics, takes values from #eelType
+    int eeltype;
+    //! type of VdW impl., takes values from #evdwType
+    int vdwtype;
+
+    //! charge multiplication factor
+    float epsfac;
+    //! Reaction-field/plain cutoff electrostatics const.
+    float c_rf;
+    //! Reaction-field electrostatics constant
+    float two_k_rf;
+    //! Ewald/PME parameter
+    float ewald_beta;
+    //! Ewald/PME correction term substracted from the direct-space potential
+    float sh_ewald;
+    //! LJ-Ewald/PME correction term added to the correction potential
+    float sh_lj_ewald;
+    //! LJ-Ewald/PME coefficient
+    float ewaldcoeff_lj;
+
+    //! Coulomb cut-off squared
+    float rcoulomb_sq;
+
+    //! VdW cut-off squared
+    float rvdw_sq;
+    //! VdW switched cut-off
+    float rvdw_switch;
+    //! Full, outer pair-list cut-off squared
+    float rlistOuter_sq;
+    //! Inner, dynamic pruned pair-list cut-off squared
+    float rlistInner_sq;
+    //! True if we use dynamic pair-list pruning
+    bool useDynamicPruning;
+
+    //! VdW shift dispersion constants
+    shift_consts_t dispersion_shift;
+    //! VdW shift repulsion constants
+    shift_consts_t repulsion_shift;
+    //! VdW switch constants
+    switch_consts_t vdw_switch;
+
+    /* LJ non-bonded parameters - accessed through texture memory */
+    //! nonbonded parameter table with C6/C12 pairs per atom type-pair, 2*ntype^2 elements
+    DeviceBuffer<float> nbfp;
+    //! texture object bound to nbfp
+    DeviceTexture nbfp_texobj;
+    //! nonbonded parameter table per atom type, 2*ntype elements
+    DeviceBuffer<float> nbfp_comb;
+    //! texture object bound to nbfp_comb
+    DeviceTexture nbfp_comb_texobj;
+
+    /* Ewald Coulomb force table data - accessed through texture memory */
+    //! table scale/spacing
+    float coulomb_tab_scale;
+    //! pointer to the table in the device memory
+    DeviceBuffer<float> coulomb_tab;
+    //! texture object bound to coulomb_tab
+    DeviceTexture coulomb_tab_texobj;
+};
+
  namespace Nbnxm
  {
  
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp

index 226571dfd16a3698a9994b27f5e51ec98f3c275e..cd929a4dbd628c1740662f1db145e6b0c09c4dca 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
@@ -431,7 +431,7 @@ static inline int calc_shmem_required_nonbonded(int vdwType, bool bPrefetchLjPar
   *
   *  This function is called before the launch of both nbnxn and prune kernels.
   */
-static void fillin_ocl_structures(cl_nbparam_t* nbp, cl_nbparam_params_t* nbparams_params)
+static void fillin_ocl_structures(NBParamGpu* nbp, cl_nbparam_params_t* nbparams_params)
  {
      nbparams_params->coulomb_tab_scale = nbp->coulomb_tab_scale;
      nbparams_params->c_rf              = nbp->c_rf;
@@ -585,7 +585,7 @@ void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const Atom
  void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const Nbnxm::InteractionLocality iloc)
  {
      cl_atomdata_t*      adat         = nb->atdat;
-    cl_nbparam_t*       nbp          = nb->nbparam;
+    NBParamGpu*         nbp          = nb->nbparam;
      cl_plist_t*         plist        = nb->plist[iloc];
      cl_timers_t*        t            = nb->timers;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
@@ -669,8 +669,8 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const Nb
      {
          const auto kernelArgs = prepareGpuKernelArguments(
                  kernel, config, &nbparams_params, &adat->xq, &adat->f, &adat->e_lj, &adat->e_el,
-                &adat->fshift, &adat->lj_comb, &adat->shift_vec, &nbp->nbfp_climg2d, &nbp->nbfp_comb_climg2d,
-                &nbp->coulomb_tab_climg2d, &plist->sci, &plist->cj4, &plist->excl, &computeFshift);
+                &adat->fshift, &adat->lj_comb, &adat->shift_vec, &nbp->nbfp, &nbp->nbfp_comb,
+                &nbp->coulomb_tab, &plist->sci, &plist->cj4, &plist->excl, &computeFshift);
  
          launchGpuKernel(kernel, config, deviceStream, timingEvent, kernelName, kernelArgs);
      }
@@ -678,9 +678,8 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const Nb
      {
          const auto kernelArgs = prepareGpuKernelArguments(
                  kernel, config, &adat->ntypes, &nbparams_params, &adat->xq, &adat->f, &adat->e_lj,
-                &adat->e_el, &adat->fshift, &adat->atom_types, &adat->shift_vec, &nbp->nbfp_climg2d,
-                &nbp->nbfp_comb_climg2d, &nbp->coulomb_tab_climg2d, &plist->sci, &plist->cj4,
-                &plist->excl, &computeFshift);
+                &adat->e_el, &adat->fshift, &adat->atom_types, &adat->shift_vec, &nbp->nbfp, &nbp->nbfp_comb,
+                &nbp->coulomb_tab, &plist->sci, &plist->cj4, &plist->excl, &computeFshift);
          launchGpuKernel(kernel, config, deviceStream, timingEvent, kernelName, kernelArgs);
      }
  
@@ -723,7 +722,7 @@ static inline int calc_shmem_required_prune(const int num_threads_z)
  void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, const int numParts)
  {
      cl_atomdata_t*      adat         = nb->atdat;
-    cl_nbparam_t*       nbp          = nb->nbparam;
+    NBParamGpu*         nbp          = nb->nbparam;
      cl_plist_t*         plist        = nb->plist[iloc];
      cl_timers_t*        t            = nb->timers;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp

index ce17f8152f935f031acfce823239bd28c4d3ab71..766789b930df1da68dfd5ee918be3f85f6f47d8c 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
@@ -105,20 +105,20 @@ static unsigned int gpu_min_ci_balanced_factor = 50;
   * table.
   */
  static void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables,
-                                           cl_nbparam_t*                nbp,
+                                           NBParamGpu*                  nbp,
                                             const DeviceContext&         deviceContext)
  {
-    if (nbp->coulomb_tab_climg2d != nullptr)
+    if (nbp->coulomb_tab != nullptr)
      {
-        freeDeviceBuffer(&(nbp->coulomb_tab_climg2d));
+        freeDeviceBuffer(&(nbp->coulomb_tab));
      }
  
      DeviceBuffer<real> coulomb_tab;
  
      initParamLookupTable(&coulomb_tab, nullptr, tables.tableF.data(), tables.tableF.size(), deviceContext);
  
-    nbp->coulomb_tab_climg2d = coulomb_tab;
-    nbp->coulomb_tab_scale   = tables.scale;
+    nbp->coulomb_tab       = coulomb_tab;
+    nbp->coulomb_tab_scale = tables.scale;
  }
  
  
@@ -148,7 +148,7 @@ static void init_atomdata_first(cl_atomdata_t* ad, int ntypes, const DeviceConte
  
  /*! \brief Copies all parameters related to the cut-off from ic to nbp
   */
-static void set_cutoff_parameters(cl_nbparam_t* nbp, const interaction_const_t* ic, const PairlistParams& listParams)
+static void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t* ic, const PairlistParams& listParams)
  {
      nbp->ewald_beta        = ic->ewaldcoeff_q;
      nbp->sh_ewald          = ic->sh_ewald;
@@ -244,7 +244,7 @@ static void map_interaction_types_to_gpu_kernel_flavors(const interaction_const_
  
  /*! \brief Initializes the nonbonded parameter data structure.
   */
-static void init_nbparam(cl_nbparam_t*                   nbp,
+static void init_nbparam(NBParamGpu*                     nbp,
                           const interaction_const_t*      ic,
                           const PairlistParams&           listParams,
                           const nbnxn_atomdata_t::Params& nbatParams,
@@ -266,7 +266,7 @@ static void init_nbparam(cl_nbparam_t*                   nbp,
          }
      }
      /* generate table for PME */
-    nbp->coulomb_tab_climg2d = nullptr;
+    nbp->coulomb_tab = nullptr;
      if (nbp->eeltype == eelTypeEWALD_TAB || nbp->eeltype == eelTypeEWALD_TAB_TWIN)
      {
          GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
@@ -274,7 +274,7 @@ static void init_nbparam(cl_nbparam_t*                   nbp,
      }
      else
      {
-        allocateDeviceBuffer(&nbp->coulomb_tab_climg2d, 1, deviceContext);
+        allocateDeviceBuffer(&nbp->coulomb_tab, 1, deviceContext);
      }
  
      const int nnbfp      = 2 * nbatParams.numTypes * nbatParams.numTypes;
@@ -284,13 +284,13 @@ static void init_nbparam(cl_nbparam_t*                   nbp,
          /* set up LJ parameter lookup table */
          DeviceBuffer<real> nbfp;
          initParamLookupTable(&nbfp, nullptr, nbatParams.nbfp.data(), nnbfp, deviceContext);
-        nbp->nbfp_climg2d = nbfp;
+        nbp->nbfp = nbfp;
  
          if (ic->vdwtype == evdwPME)
          {
              DeviceBuffer<float> nbfp_comb;
              initParamLookupTable(&nbfp_comb, nullptr, nbatParams.nbfp_comb.data(), nnbfp_comb, deviceContext);
-            nbp->nbfp_comb_climg2d = nbfp_comb;
+            nbp->nbfp_comb = nbfp_comb;
          }
      }
  }
@@ -302,8 +302,8 @@ void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interacti
      {
          return;
      }
-    NbnxmGpu*     nb  = nbv->gpu_nbv;
-    cl_nbparam_t* nbp = nb->nbparam;
+    NbnxmGpu*   nb  = nbv->gpu_nbv;
+    NBParamGpu* nbp = nb->nbparam;
  
      set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params());
  
@@ -444,7 +444,7 @@ static void nbnxn_gpu_init_kernels(NbnxmGpu* nb)
   *  clears e/fshift output buffers.
   */
  static void nbnxn_ocl_init_const(cl_atomdata_t*                  atomData,
-                                 cl_nbparam_t*                   nbParams,
+                                 NBParamGpu*                     nbParams,
                                   const interaction_const_t*      ic,
                                   const PairlistParams&           listParams,
                                   const nbnxn_atomdata_t::Params& nbatParams,
@@ -825,9 +825,9 @@ void gpu_free(NbnxmGpu* nb)
      sfree(nb->atdat);
  
      /* Free nbparam */
-    freeDeviceBuffer(&(nb->nbparam->nbfp_climg2d));
-    freeDeviceBuffer(&(nb->nbparam->nbfp_comb_climg2d));
-    freeDeviceBuffer(&(nb->nbparam->coulomb_tab_climg2d));
+    freeDeviceBuffer(&(nb->nbparam->nbfp));
+    freeDeviceBuffer(&(nb->nbparam->nbfp_comb));
+    freeDeviceBuffer(&(nb->nbparam->coulomb_tab));
      sfree(nb->nbparam);
  
      /* Free plist */
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h b/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h

index a1db11c93089bf1d2e8b1d87948ac984709eca36..a2f6913a90fd7db56a9a3bac769bf72b3b17325e 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h
@@ -146,66 +146,6 @@ typedef struct cl_atomdata
      bool bShiftVecUploaded;
  } cl_atomdata_t;
  
-/*! \internal
- * \brief Parameters required for the OpenCL nonbonded calculations.
- */
-typedef struct cl_nbparam
-{
-
-    //! type of electrostatics, takes values from #eelType
-    int eeltype;
-    //! type of VdW impl., takes values from #evdwType
-    int vdwtype;
-
-    //! charge multiplication factor
-    float epsfac;
-    //! Reaction-field/plain cutoff electrostatics const.
-    float c_rf;
-    //! Reaction-field electrostatics constant
-    float two_k_rf;
-    //! Ewald/PME parameter
-    float ewald_beta;
-    //! Ewald/PME correction term substracted from the direct-space potential
-    float sh_ewald;
-    //! LJ-Ewald/PME correction term added to the correction potential
-    float sh_lj_ewald;
-    //! LJ-Ewald/PME coefficient
-    float ewaldcoeff_lj;
-
-    //! Coulomb cut-off squared
-    float rcoulomb_sq;
-
-    //! VdW cut-off squared
-    float rvdw_sq;
-    //! VdW switched cut-off
-    float rvdw_switch;
-    //! Full, outer pair-list cut-off squared
-    float rlistOuter_sq;
-    //! Inner, dynamic pruned pair-list cut-off squared
-    float rlistInner_sq;
-    //! True if we use dynamic pair-list pruning
-    bool useDynamicPruning;
-
-    //! VdW shift dispersion constants
-    shift_consts_t dispersion_shift;
-    //! VdW shift repulsion constants
-    shift_consts_t repulsion_shift;
-    //! VdW switch constants
-    switch_consts_t vdw_switch;
-
-    /* LJ non-bonded parameters - accessed through texture memory */
-    //! nonbonded parameter table with C6/C12 pairs per atom type-pair, 2*ntype^2 elements
-    cl_mem nbfp_climg2d;
-    //! nonbonded parameter table per atom type, 2*ntype elements
-    cl_mem nbfp_comb_climg2d;
-
-    /* Ewald Coulomb force table data - accessed through texture memory */
-    //! table scale/spacing
-    float coulomb_tab_scale;
-    //! pointer to the table in the device memory
-    DeviceBuffer<float> coulomb_tab_climg2d;
-} cl_nbparam_t;
-
  /*! \internal
   * \brief Data structure shared between the OpenCL device code and OpenCL host code
   *
@@ -312,7 +252,7 @@ struct NbnxmGpu
      //! atom data
      cl_atomdata_t* atdat = nullptr;
      //! parameters required for the non-bonded calc.
-    cl_nbparam_t* nbparam = nullptr;
+    NBParamGpu* nbparam = nullptr;
      //! pair-list data structures (local and non-local)
      gmx::EnumerationArray<Nbnxm::InteractionLocality, cl_plist_t*> plist = { nullptr };
      //! staging area where fshift/energies get downloaded
author	Artem Zhmurov <zhmurov@gmail.com>
	Thu, 25 Jun 2020 15:31:36 +0000 (15:31 +0000)
committer	Paul Bauer <paul.bauer.q@gmail.com>
	Thu, 25 Jun 2020 15:31:36 +0000 (15:31 +0000)
src/gromacs/gpu_utils/gputraits.cuh		patch \| blob \| history
src/gromacs/gpu_utils/gputraits.h		patch \| blob \| history
src/gromacs/gpu_utils/gputraits_ocl.h		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda.cu		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel.cuh		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cu		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cuh		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_utils.cuh		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h		patch \| blob \| history
src/gromacs/nbnxm/gpu_types_common.h		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h		patch \| blob \| history