Make cl_nbparam into a struct
authorArtem Zhmurov <zhmurov@gmail.com>
Thu, 25 Jun 2020 15:31:36 +0000 (15:31 +0000)
committerPaul Bauer <paul.bauer.q@gmail.com>
Thu, 25 Jun 2020 15:31:36 +0000 (15:31 +0000)
This is needed to unify with CUDA path

14 files changed:
src/gromacs/gpu_utils/gputraits.cuh
src/gromacs/gpu_utils/gputraits.h
src/gromacs/gpu_utils/gputraits_ocl.h
src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel.cuh
src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cu
src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cuh
src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_utils.cuh
src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h
src/gromacs/nbnxm/gpu_types_common.h
src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h

index 76606611b8e329604037052571ad948398009d26..98fd8d04efc9c5b3d3f8a657c3bd7f7eb55711bf 100644 (file)
@@ -48,6 +48,9 @@
 
 #include "gromacs/hardware/gpu_hw_info.h"
 
+//! Device texture for fast read-only data fetching
+using DeviceTexture = cudaTextureObject_t;
+
 /*! \brief CUDA device information.
  *
  * The CUDA device information is queried and set at detection and contains
index 5fec00303adc59588557726d188efe5d6d9e2f84..9ae87f1436aabe7a6c04298e2a0839dc5ada17d9 100644 (file)
@@ -57,6 +57,8 @@
 
 #else
 
+using DeviceTexture = void*;
+
 //! Stub for device information.
 struct DeviceInformation
 {
index ff4572e1affbfef1414493fa3928b08511855ba1..a3eb510c95cf2135c9b82537c5bda106bdbe0a7a 100644 (file)
@@ -48,6 +48,8 @@
 #include "gromacs/gpu_utils/gmxopencl.h"
 #include "gromacs/hardware/gpu_hw_info.h"
 
+using DeviceTexture = void*;
+
 //! OpenCL device vendors
 enum class DeviceVendor : int
 {
index 241392f187dfbcd636b954950975d95b4ecc8993..f7a12cf99d3abd07a10b93b8721e116777760827 100644 (file)
@@ -121,7 +121,7 @@ namespace Nbnxm
 constexpr static int c_bufOpsThreadsPerBlock = 128;
 
 /*! Nonbonded kernel function pointer type */
-typedef void (*nbnxn_cu_kfunc_ptr_t)(const cu_atomdata_t, const cu_nbparam_t, const cu_plist_t, bool);
+typedef void (*nbnxn_cu_kfunc_ptr_t)(const cu_atomdata_t, const NBParamGpu, const cu_plist_t, bool);
 
 /*********************************/
 
@@ -330,7 +330,7 @@ static inline nbnxn_cu_kfunc_ptr_t select_nbnxn_kernel(int                     e
 /*! \brief Calculates the amount of shared memory required by the nonbonded kernel in use. */
 static inline int calc_shmem_required_nonbonded(const int               num_threads_z,
                                                 const DeviceInformation gmx_unused* deviceInfo,
-                                                const cu_nbparam_t*                 nbp)
+                                                const NBParamGpu*                   nbp)
 {
     int shmem;
 
@@ -483,7 +483,7 @@ void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const Atom
 void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const InteractionLocality iloc)
 {
     cu_atomdata_t*      adat         = nb->atdat;
-    cu_nbparam_t*       nbp          = nb->nbparam;
+    NBParamGpu*         nbp          = nb->nbparam;
     cu_plist_t*         plist        = nb->plist[iloc];
     cu_timers_t*        t            = nb->timers;
     const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
@@ -596,7 +596,7 @@ static inline int calc_shmem_required_prune(const int num_threads_z)
 void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, const int numParts)
 {
     cu_atomdata_t*      adat         = nb->atdat;
-    cu_nbparam_t*       nbp          = nb->nbparam;
+    NBParamGpu*         nbp          = nb->nbparam;
     cu_plist_t*         plist        = nb->plist[iloc];
     cu_timers_t*        t            = nb->timers;
     const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
index 962f5a049e3ef706ee24e4add3268bc422bd0046..69beab6b6fec14062b832452e901ab9056f68bc5 100644 (file)
@@ -93,7 +93,7 @@ static unsigned int gpu_min_ci_balanced_factor = 44;
 static void nbnxn_cuda_clear_e_fshift(NbnxmGpu* nb);
 
 /* Fw. decl, */
-static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t* nbparam);
+static void nbnxn_cuda_free_nbparam_table(NBParamGpu* nbparam);
 
 /*! \brief Initialized the Ewald Coulomb correction GPU table.
 
@@ -102,7 +102,7 @@ static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t* nbparam);
     it just re-uploads the table.
  */
 static void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables,
-                                           cu_nbparam_t*                nbp,
+                                           NBParamGpu*                  nbp,
                                            const DeviceContext&         deviceContext)
 {
     if (nbp->coulomb_tab != nullptr)
@@ -192,7 +192,7 @@ static int pick_ewald_kernel_type(const interaction_const_t& ic)
 }
 
 /*! Copies all parameters related to the cut-off from ic to nbp */
-static void set_cutoff_parameters(cu_nbparam_t* nbp, const interaction_const_t* ic, const PairlistParams& listParams)
+static void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t* ic, const PairlistParams& listParams)
 {
     nbp->ewald_beta        = ic->ewaldcoeff_q;
     nbp->sh_ewald          = ic->sh_ewald;
@@ -215,7 +215,7 @@ static void set_cutoff_parameters(cu_nbparam_t* nbp, const interaction_const_t*
 }
 
 /*! Initializes the nonbonded parameter data structure. */
-static void init_nbparam(cu_nbparam_t*                   nbp,
+static void init_nbparam(NBParamGpu*                     nbp,
                          const interaction_const_t*      ic,
                          const PairlistParams&           listParams,
                          const nbnxn_atomdata_t::Params& nbatParams,
@@ -331,8 +331,8 @@ void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interacti
     {
         return;
     }
-    NbnxmGpu*     nb  = nbv->gpu_nbv;
-    cu_nbparam_t* nbp = nbv->gpu_nbv->nbparam;
+    NbnxmGpu*   nb  = nbv->gpu_nbv;
+    NBParamGpu* nbp = nbv->gpu_nbv->nbparam;
 
     set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params());
 
@@ -679,7 +679,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
     }
 }
 
-static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t* nbparam)
+static void nbnxn_cuda_free_nbparam_table(NBParamGpu* nbparam)
 {
     if (nbparam->eeltype == eelTypeEWALD_TAB || nbparam->eeltype == eelTypeEWALD_TAB_TWIN)
     {
@@ -691,7 +691,7 @@ void gpu_free(NbnxmGpu* nb)
 {
     cudaError_t    stat;
     cu_atomdata_t* atdat;
-    cu_nbparam_t*  nbparam;
+    NBParamGpu*    nbparam;
 
     if (nb == nullptr)
     {
index 89f75da5f9c2731d88b4c7b8aae9060c2c451e9a..7faea980b7c5cc8df1fa8ce698ab2cc2bc575b9b 100644 (file)
@@ -158,7 +158,7 @@ __launch_bounds__(THREADS_PER_BLOCK)
         __global__ void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_cuda)
 #    endif /* CALC_ENERGIES */
 #endif     /* PRUNE_NBL */
-                (const cu_atomdata_t atdat, const cu_nbparam_t nbparam, const cu_plist_t plist, bool bCalcFshift)
+                (const cu_atomdata_t atdat, const NBParamGpu nbparam, const cu_plist_t plist, bool bCalcFshift)
 #ifdef FUNCTION_DECLARATION_ONLY
                         ; /* Only do function declaration, omit the function body. */
 #else
index 945b1912fced824dc8345836bd2c6d0d455a9d42..81755cb9039a1cc9201bd6744ee59aa0cd2f2952 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2016,2017,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2019,2020, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -39,7 +39,7 @@
 #ifndef FUNCTION_DECLARATION_ONLY
 /* Instantiate external template functions */
 template __global__ void
-nbnxn_kernel_prune_cuda<false>(const cu_atomdata_t, const cu_nbparam_t, const cu_plist_t, int, int);
+nbnxn_kernel_prune_cuda<false>(const cu_atomdata_t, const NBParamGpu, const cu_plist_t, int, int);
 template __global__ void
-nbnxn_kernel_prune_cuda<true>(const cu_atomdata_t, const cu_nbparam_t, const cu_plist_t, int, int);
+nbnxn_kernel_prune_cuda<true>(const cu_atomdata_t, const NBParamGpu, const cu_plist_t, int, int);
 #endif
index e9c5b5114397901af4a22ce1534c54e95ab0e883..e5bf2b967cb397c79d0a7bfa9f048727b7b3a1cd 100644 (file)
 template<bool haveFreshList>
 __launch_bounds__(THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP) __global__
         void nbnxn_kernel_prune_cuda(const cu_atomdata_t atdat,
-                                     const cu_nbparam_t  nbparam,
+                                     const NBParamGpu    nbparam,
                                      const cu_plist_t    plist,
                                      int                 numParts,
                                      int                 part)
@@ -114,9 +114,9 @@ __launch_bounds__(THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP) __global__
 // Add extern declarations so each translation unit understands that
 // there will be a definition provided.
 extern template __global__ void
-nbnxn_kernel_prune_cuda<true>(const cu_atomdata_t, const cu_nbparam_t, const cu_plist_t, int, int);
+nbnxn_kernel_prune_cuda<true>(const cu_atomdata_t, const NBParamGpu, const cu_plist_t, int, int);
 extern template __global__ void
-nbnxn_kernel_prune_cuda<false>(const cu_atomdata_t, const cu_nbparam_t, const cu_plist_t, int, int);
+nbnxn_kernel_prune_cuda<false>(const cu_atomdata_t, const NBParamGpu, const cu_plist_t, int, int);
 #else
 {
 
index cbca9452b99b597fca4d03bdf434f6053cd2bb8d..4850298f8f154082b8b19c2a395aab938fe8d59c 100644 (file)
@@ -90,7 +90,7 @@ static __forceinline__ __device__ void
 
 /*! Apply force switch,  force + energy version. */
 static __forceinline__ __device__ void
-                       calculate_force_switch_F(const cu_nbparam_t nbparam, float c6, float c12, float inv_r, float r2, float* F_invr)
+                       calculate_force_switch_F(const NBParamGpu nbparam, float c6, float c12, float inv_r, float r2, float* F_invr)
 {
     float r, r_switch;
 
@@ -109,13 +109,13 @@ static __forceinline__ __device__ void
 }
 
 /*! Apply force switch, force-only version. */
-static __forceinline__ __device__ void calculate_force_switch_F_E(const cu_nbparam_t nbparam,
-                                                                  float              c6,
-                                                                  float              c12,
-                                                                  float              inv_r,
-                                                                  float              r2,
-                                                                  float*             F_invr,
-                                                                  float*             E_lj)
+static __forceinline__ __device__ void calculate_force_switch_F_E(const NBParamGpu nbparam,
+                                                                  float            c6,
+                                                                  float            c12,
+                                                                  float            inv_r,
+                                                                  float            r2,
+                                                                  float*           F_invr,
+                                                                  float*           E_lj)
 {
     float r, r_switch;
 
@@ -142,7 +142,7 @@ static __forceinline__ __device__ void calculate_force_switch_F_E(const cu_nbpar
 
 /*! Apply potential switch, force-only version. */
 static __forceinline__ __device__ void
-                       calculate_potential_switch_F(const cu_nbparam_t nbparam, float inv_r, float r2, float* F_invr, float* E_lj)
+                       calculate_potential_switch_F(const NBParamGpu nbparam, float inv_r, float r2, float* F_invr, float* E_lj)
 {
     float r, r_switch;
     float sw, dsw;
@@ -170,7 +170,7 @@ static __forceinline__ __device__ void
 
 /*! Apply potential switch, force + energy version. */
 static __forceinline__ __device__ void
-                       calculate_potential_switch_F_E(const cu_nbparam_t nbparam, float inv_r, float r2, float* F_invr, float* E_lj)
+                       calculate_potential_switch_F_E(const NBParamGpu nbparam, float inv_r, float r2, float* F_invr, float* E_lj)
 {
     float r, r_switch;
     float sw, dsw;
@@ -201,7 +201,7 @@ static __forceinline__ __device__ void
  *  Depending on what is supported, it fetches parameters either
  *  using direct load, texture objects, or texrefs.
  */
-static __forceinline__ __device__ float calculate_lj_ewald_c6grid(const cu_nbparam_t nbparam, int typei, int typej)
+static __forceinline__ __device__ float calculate_lj_ewald_c6grid(const NBParamGpu nbparam, int typei, int typej)
 {
 #    if DISABLE_CUDA_TEXTURES
     return LDG(&nbparam.nbfp_comb[2 * typei]) * LDG(&nbparam.nbfp_comb[2 * typej]);
@@ -215,14 +215,14 @@ static __forceinline__ __device__ float calculate_lj_ewald_c6grid(const cu_nbpar
 /*! Calculate LJ-PME grid force contribution with
  *  geometric combination rule.
  */
-static __forceinline__ __device__ void calculate_lj_ewald_comb_geom_F(const cu_nbparam_t nbparam,
-                                                                      int                typei,
-                                                                      int                typej,
-                                                                      float              r2,
-                                                                      float              inv_r2,
-                                                                      float              lje_coeff2,
-                                                                      float  lje_coeff6_6,
-                                                                      float* F_invr)
+static __forceinline__ __device__ void calculate_lj_ewald_comb_geom_F(const NBParamGpu nbparam,
+                                                                      int              typei,
+                                                                      int              typej,
+                                                                      float            r2,
+                                                                      float            inv_r2,
+                                                                      float            lje_coeff2,
+                                                                      float            lje_coeff6_6,
+                                                                      float*           F_invr)
 {
     float c6grid, inv_r6_nm, cr2, expmcr2, poly;
 
@@ -242,12 +242,12 @@ static __forceinline__ __device__ void calculate_lj_ewald_comb_geom_F(const cu_n
 /*! Calculate LJ-PME grid force + energy contribution with
  *  geometric combination rule.
  */
-static __forceinline__ __device__ void calculate_lj_ewald_comb_geom_F_E(const cu_nbparam_t nbparam,
-                                                                        int                typei,
-                                                                        int                typej,
-                                                                        float              r2,
-                                                                        float              inv_r2,
-                                                                        float  lje_coeff2,
+static __forceinline__ __device__ void calculate_lj_ewald_comb_geom_F_E(const NBParamGpu nbparam,
+                                                                        int              typei,
+                                                                        int              typej,
+                                                                        float            r2,
+                                                                        float            inv_r2,
+                                                                        float            lje_coeff2,
                                                                         float  lje_coeff6_6,
                                                                         float  int_bit,
                                                                         float* F_invr,
@@ -276,7 +276,7 @@ static __forceinline__ __device__ void calculate_lj_ewald_comb_geom_F_E(const cu
  *  Depending on what is supported, it fetches parameters either
  *  using direct load, texture objects, or texrefs.
  */
-static __forceinline__ __device__ float2 fetch_nbfp_comb_c6_c12(const cu_nbparam_t nbparam, int type)
+static __forceinline__ __device__ float2 fetch_nbfp_comb_c6_c12(const NBParamGpu nbparam, int type)
 {
     float2 c6c12;
 #    if DISABLE_CUDA_TEXTURES
@@ -299,16 +299,16 @@ static __forceinline__ __device__ float2 fetch_nbfp_comb_c6_c12(const cu_nbparam
  *  We use a single F+E kernel with conditional because the performance impact
  *  of this is pretty small and LB on the CPU is anyway very slow.
  */
-static __forceinline__ __device__ void calculate_lj_ewald_comb_LB_F_E(const cu_nbparam_t nbparam,
-                                                                      int                typei,
-                                                                      int                typej,
-                                                                      float              r2,
-                                                                      float              inv_r2,
-                                                                      float              lje_coeff2,
-                                                                      float  lje_coeff6_6,
-                                                                      float  int_bit,
-                                                                      float* F_invr,
-                                                                      float* E_lj)
+static __forceinline__ __device__ void calculate_lj_ewald_comb_LB_F_E(const NBParamGpu nbparam,
+                                                                      int              typei,
+                                                                      int              typej,
+                                                                      float            r2,
+                                                                      float            inv_r2,
+                                                                      float            lje_coeff2,
+                                                                      float            lje_coeff6_6,
+                                                                      float            int_bit,
+                                                                      float*           F_invr,
+                                                                      float*           E_lj)
 {
     float c6grid, inv_r6_nm, cr2, expmcr2, poly;
     float sigma, sigma2, epsilon;
@@ -348,7 +348,7 @@ static __forceinline__ __device__ void calculate_lj_ewald_comb_LB_F_E(const cu_n
  *  Depending on what is supported, it fetches parameters either
  *  using direct load, texture objects, or texrefs.
  */
-static __forceinline__ __device__ float2 fetch_coulomb_force_r(const cu_nbparam_t nbparam, int index)
+static __forceinline__ __device__ float2 fetch_coulomb_force_r(const NBParamGpu nbparam, int index)
 {
     float2 d;
 
@@ -379,7 +379,7 @@ __forceinline__ __host__ __device__ T lerp(T d0, T d1, T t)
 
 /*! Interpolate Ewald coulomb force correction using the F*r table.
  */
-static __forceinline__ __device__ float interpolate_coulomb_force_r(const cu_nbparam_t nbparam, float r)
+static __forceinline__ __device__ float interpolate_coulomb_force_r(const NBParamGpu nbparam, float r)
 {
     float normalized = nbparam.coulomb_tab_scale * r;
     int   index      = (int)normalized;
@@ -395,7 +395,7 @@ static __forceinline__ __device__ float interpolate_coulomb_force_r(const cu_nbp
  *  Depending on what is supported, it fetches parameters either
  *  using direct load, texture objects, or texrefs.
  */
-static __forceinline__ __device__ void fetch_nbfp_c6_c12(float& c6, float& c12, const cu_nbparam_t nbparam, int baseIndex)
+static __forceinline__ __device__ void fetch_nbfp_c6_c12(float& c6, float& c12, const NBParamGpu nbparam, int baseIndex)
 {
 #    if DISABLE_CUDA_TEXTURES
     /* Force an 8-byte fetch to save a memory instruction. */
index 67f220d15d807899a67d4ad06fb34c4294a39874..acadca29c5fcf4e17410d471db55e8d0ab134bc8 100644 (file)
@@ -80,7 +80,6 @@ static constexpr int c_clSize = c_nbnxnGpuClusterSize;
  * are passed to the kernels, except cu_timers_t. */
 /*! \cond */
 typedef struct cu_atomdata cu_atomdata_t;
-typedef struct cu_nbparam  cu_nbparam_t;
 /*! \endcond */
 
 
@@ -138,72 +137,6 @@ struct cu_atomdata
     bool bShiftVecUploaded;
 };
 
-/** \internal
- * \brief Parameters required for the CUDA nonbonded calculations.
- */
-struct cu_nbparam
-{
-
-    //! type of electrostatics, takes values from #eelType
-    int eeltype;
-    //! type of VdW impl., takes values from #evdwType
-    int vdwtype;
-
-    //! charge multiplication factor
-    float epsfac;
-    //! Reaction-field/plain cutoff electrostatics const.
-    float c_rf;
-    //! Reaction-field electrostatics constant
-    float two_k_rf;
-    //! Ewald/PME parameter
-    float ewald_beta;
-    //! Ewald/PME correction term substracted from the direct-space potential
-    float sh_ewald;
-    //! LJ-Ewald/PME correction term added to the correction potential
-    float sh_lj_ewald;
-    //! LJ-Ewald/PME coefficient
-    float ewaldcoeff_lj;
-
-    //! Coulomb cut-off squared
-    float rcoulomb_sq;
-
-    //! VdW cut-off squared
-    float rvdw_sq;
-    //! VdW switched cut-off
-    float rvdw_switch;
-    //! Full, outer pair-list cut-off squared
-    float rlistOuter_sq;
-    //! Inner, dynamic pruned pair-list cut-off squared
-    float rlistInner_sq;
-    //! True if we use dynamic pair-list pruning
-    bool useDynamicPruning;
-
-    //! VdW shift dispersion constants
-    shift_consts_t dispersion_shift;
-    //! VdW shift repulsion constants
-    shift_consts_t repulsion_shift;
-    //! VdW switch constants
-    switch_consts_t vdw_switch;
-
-    /* LJ non-bonded parameters - accessed through texture memory */
-    //! nonbonded parameter table with C6/C12 pairs per atom type-pair, 2*ntype^2 elements
-    float* nbfp;
-    //! texture object bound to nbfp
-    cudaTextureObject_t nbfp_texobj;
-    //! nonbonded parameter table per atom type, 2*ntype elements
-    float* nbfp_comb;
-    //! texture object bound to nbfp_texobj
-    cudaTextureObject_t nbfp_comb_texobj;
-
-    /* Ewald Coulomb force table data - accessed through texture memory */
-    //! table scale/spacing
-    float coulomb_tab_scale;
-    //! pointer to the table in the device memory
-    float* coulomb_tab;
-    //! texture object bound to coulomb_tab
-    cudaTextureObject_t coulomb_tab_texobj;
-};
-
 /** \internal
  * \brief Pair list data.
  */
@@ -255,7 +188,7 @@ struct NbnxmGpu
     /*! \brief number of elements allocated allocated in device buffer */
     int ncxy_ind_alloc = 0;
     /*! \brief parameters required for the non-bonded calc. */
-    cu_nbparam_t* nbparam = nullptr;
+    NBParamGpu* nbparam = nullptr;
     /*! \brief pair-list data structures (local and non-local) */
     gmx::EnumerationArray<Nbnxm::InteractionLocality, cu_plist_t*> plist = { { nullptr } };
     /*! \brief staging area where fshift/energies get downloaded */
index 28dbe70459e4728c67b17331da1fb29e8e0b245f..17b66e49d8243df0609f06577997b280526035e8 100644 (file)
 #    include "gromacs/gpu_utils/gpuregiontimer.cuh"
 #endif
 
+/** \internal
+ * \brief Parameters required for the GPU nonbonded calculations.
+ */
+struct NBParamGpu
+{
+
+    //! type of electrostatics, takes values from #eelType
+    int eeltype;
+    //! type of VdW impl., takes values from #evdwType
+    int vdwtype;
+
+    //! charge multiplication factor
+    float epsfac;
+    //! Reaction-field/plain cutoff electrostatics const.
+    float c_rf;
+    //! Reaction-field electrostatics constant
+    float two_k_rf;
+    //! Ewald/PME parameter
+    float ewald_beta;
+    //! Ewald/PME correction term substracted from the direct-space potential
+    float sh_ewald;
+    //! LJ-Ewald/PME correction term added to the correction potential
+    float sh_lj_ewald;
+    //! LJ-Ewald/PME coefficient
+    float ewaldcoeff_lj;
+
+    //! Coulomb cut-off squared
+    float rcoulomb_sq;
+
+    //! VdW cut-off squared
+    float rvdw_sq;
+    //! VdW switched cut-off
+    float rvdw_switch;
+    //! Full, outer pair-list cut-off squared
+    float rlistOuter_sq;
+    //! Inner, dynamic pruned pair-list cut-off squared
+    float rlistInner_sq;
+    //! True if we use dynamic pair-list pruning
+    bool useDynamicPruning;
+
+    //! VdW shift dispersion constants
+    shift_consts_t dispersion_shift;
+    //! VdW shift repulsion constants
+    shift_consts_t repulsion_shift;
+    //! VdW switch constants
+    switch_consts_t vdw_switch;
+
+    /* LJ non-bonded parameters - accessed through texture memory */
+    //! nonbonded parameter table with C6/C12 pairs per atom type-pair, 2*ntype^2 elements
+    DeviceBuffer<float> nbfp;
+    //! texture object bound to nbfp
+    DeviceTexture nbfp_texobj;
+    //! nonbonded parameter table per atom type, 2*ntype elements
+    DeviceBuffer<float> nbfp_comb;
+    //! texture object bound to nbfp_comb
+    DeviceTexture nbfp_comb_texobj;
+
+    /* Ewald Coulomb force table data - accessed through texture memory */
+    //! table scale/spacing
+    float coulomb_tab_scale;
+    //! pointer to the table in the device memory
+    DeviceBuffer<float> coulomb_tab;
+    //! texture object bound to coulomb_tab
+    DeviceTexture coulomb_tab_texobj;
+};
+
 namespace Nbnxm
 {
 
index 226571dfd16a3698a9994b27f5e51ec98f3c275e..cd929a4dbd628c1740662f1db145e6b0c09c4dca 100644 (file)
@@ -431,7 +431,7 @@ static inline int calc_shmem_required_nonbonded(int vdwType, bool bPrefetchLjPar
  *
  *  This function is called before the launch of both nbnxn and prune kernels.
  */
-static void fillin_ocl_structures(cl_nbparam_t* nbp, cl_nbparam_params_t* nbparams_params)
+static void fillin_ocl_structures(NBParamGpu* nbp, cl_nbparam_params_t* nbparams_params)
 {
     nbparams_params->coulomb_tab_scale = nbp->coulomb_tab_scale;
     nbparams_params->c_rf              = nbp->c_rf;
@@ -585,7 +585,7 @@ void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const Atom
 void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const Nbnxm::InteractionLocality iloc)
 {
     cl_atomdata_t*      adat         = nb->atdat;
-    cl_nbparam_t*       nbp          = nb->nbparam;
+    NBParamGpu*         nbp          = nb->nbparam;
     cl_plist_t*         plist        = nb->plist[iloc];
     cl_timers_t*        t            = nb->timers;
     const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
@@ -669,8 +669,8 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const Nb
     {
         const auto kernelArgs = prepareGpuKernelArguments(
                 kernel, config, &nbparams_params, &adat->xq, &adat->f, &adat->e_lj, &adat->e_el,
-                &adat->fshift, &adat->lj_comb, &adat->shift_vec, &nbp->nbfp_climg2d, &nbp->nbfp_comb_climg2d,
-                &nbp->coulomb_tab_climg2d, &plist->sci, &plist->cj4, &plist->excl, &computeFshift);
+                &adat->fshift, &adat->lj_comb, &adat->shift_vec, &nbp->nbfp, &nbp->nbfp_comb,
+                &nbp->coulomb_tab, &plist->sci, &plist->cj4, &plist->excl, &computeFshift);
 
         launchGpuKernel(kernel, config, deviceStream, timingEvent, kernelName, kernelArgs);
     }
@@ -678,9 +678,8 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const Nb
     {
         const auto kernelArgs = prepareGpuKernelArguments(
                 kernel, config, &adat->ntypes, &nbparams_params, &adat->xq, &adat->f, &adat->e_lj,
-                &adat->e_el, &adat->fshift, &adat->atom_types, &adat->shift_vec, &nbp->nbfp_climg2d,
-                &nbp->nbfp_comb_climg2d, &nbp->coulomb_tab_climg2d, &plist->sci, &plist->cj4,
-                &plist->excl, &computeFshift);
+                &adat->e_el, &adat->fshift, &adat->atom_types, &adat->shift_vec, &nbp->nbfp, &nbp->nbfp_comb,
+                &nbp->coulomb_tab, &plist->sci, &plist->cj4, &plist->excl, &computeFshift);
         launchGpuKernel(kernel, config, deviceStream, timingEvent, kernelName, kernelArgs);
     }
 
@@ -723,7 +722,7 @@ static inline int calc_shmem_required_prune(const int num_threads_z)
 void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, const int numParts)
 {
     cl_atomdata_t*      adat         = nb->atdat;
-    cl_nbparam_t*       nbp          = nb->nbparam;
+    NBParamGpu*         nbp          = nb->nbparam;
     cl_plist_t*         plist        = nb->plist[iloc];
     cl_timers_t*        t            = nb->timers;
     const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
index ce17f8152f935f031acfce823239bd28c4d3ab71..766789b930df1da68dfd5ee918be3f85f6f47d8c 100644 (file)
@@ -105,20 +105,20 @@ static unsigned int gpu_min_ci_balanced_factor = 50;
  * table.
  */
 static void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables,
-                                           cl_nbparam_t*                nbp,
+                                           NBParamGpu*                  nbp,
                                            const DeviceContext&         deviceContext)
 {
-    if (nbp->coulomb_tab_climg2d != nullptr)
+    if (nbp->coulomb_tab != nullptr)
     {
-        freeDeviceBuffer(&(nbp->coulomb_tab_climg2d));
+        freeDeviceBuffer(&(nbp->coulomb_tab));
     }
 
     DeviceBuffer<real> coulomb_tab;
 
     initParamLookupTable(&coulomb_tab, nullptr, tables.tableF.data(), tables.tableF.size(), deviceContext);
 
-    nbp->coulomb_tab_climg2d = coulomb_tab;
-    nbp->coulomb_tab_scale   = tables.scale;
+    nbp->coulomb_tab       = coulomb_tab;
+    nbp->coulomb_tab_scale = tables.scale;
 }
 
 
@@ -148,7 +148,7 @@ static void init_atomdata_first(cl_atomdata_t* ad, int ntypes, const DeviceConte
 
 /*! \brief Copies all parameters related to the cut-off from ic to nbp
  */
-static void set_cutoff_parameters(cl_nbparam_t* nbp, const interaction_const_t* ic, const PairlistParams& listParams)
+static void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t* ic, const PairlistParams& listParams)
 {
     nbp->ewald_beta        = ic->ewaldcoeff_q;
     nbp->sh_ewald          = ic->sh_ewald;
@@ -244,7 +244,7 @@ static void map_interaction_types_to_gpu_kernel_flavors(const interaction_const_
 
 /*! \brief Initializes the nonbonded parameter data structure.
  */
-static void init_nbparam(cl_nbparam_t*                   nbp,
+static void init_nbparam(NBParamGpu*                     nbp,
                          const interaction_const_t*      ic,
                          const PairlistParams&           listParams,
                          const nbnxn_atomdata_t::Params& nbatParams,
@@ -266,7 +266,7 @@ static void init_nbparam(cl_nbparam_t*                   nbp,
         }
     }
     /* generate table for PME */
-    nbp->coulomb_tab_climg2d = nullptr;
+    nbp->coulomb_tab = nullptr;
     if (nbp->eeltype == eelTypeEWALD_TAB || nbp->eeltype == eelTypeEWALD_TAB_TWIN)
     {
         GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
@@ -274,7 +274,7 @@ static void init_nbparam(cl_nbparam_t*                   nbp,
     }
     else
     {
-        allocateDeviceBuffer(&nbp->coulomb_tab_climg2d, 1, deviceContext);
+        allocateDeviceBuffer(&nbp->coulomb_tab, 1, deviceContext);
     }
 
     const int nnbfp      = 2 * nbatParams.numTypes * nbatParams.numTypes;
@@ -284,13 +284,13 @@ static void init_nbparam(cl_nbparam_t*                   nbp,
         /* set up LJ parameter lookup table */
         DeviceBuffer<real> nbfp;
         initParamLookupTable(&nbfp, nullptr, nbatParams.nbfp.data(), nnbfp, deviceContext);
-        nbp->nbfp_climg2d = nbfp;
+        nbp->nbfp = nbfp;
 
         if (ic->vdwtype == evdwPME)
         {
             DeviceBuffer<float> nbfp_comb;
             initParamLookupTable(&nbfp_comb, nullptr, nbatParams.nbfp_comb.data(), nnbfp_comb, deviceContext);
-            nbp->nbfp_comb_climg2d = nbfp_comb;
+            nbp->nbfp_comb = nbfp_comb;
         }
     }
 }
@@ -302,8 +302,8 @@ void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interacti
     {
         return;
     }
-    NbnxmGpu*     nb  = nbv->gpu_nbv;
-    cl_nbparam_t* nbp = nb->nbparam;
+    NbnxmGpu*   nb  = nbv->gpu_nbv;
+    NBParamGpu* nbp = nb->nbparam;
 
     set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params());
 
@@ -444,7 +444,7 @@ static void nbnxn_gpu_init_kernels(NbnxmGpu* nb)
  *  clears e/fshift output buffers.
  */
 static void nbnxn_ocl_init_const(cl_atomdata_t*                  atomData,
-                                 cl_nbparam_t*                   nbParams,
+                                 NBParamGpu*                     nbParams,
                                  const interaction_const_t*      ic,
                                  const PairlistParams&           listParams,
                                  const nbnxn_atomdata_t::Params& nbatParams,
@@ -825,9 +825,9 @@ void gpu_free(NbnxmGpu* nb)
     sfree(nb->atdat);
 
     /* Free nbparam */
-    freeDeviceBuffer(&(nb->nbparam->nbfp_climg2d));
-    freeDeviceBuffer(&(nb->nbparam->nbfp_comb_climg2d));
-    freeDeviceBuffer(&(nb->nbparam->coulomb_tab_climg2d));
+    freeDeviceBuffer(&(nb->nbparam->nbfp));
+    freeDeviceBuffer(&(nb->nbparam->nbfp_comb));
+    freeDeviceBuffer(&(nb->nbparam->coulomb_tab));
     sfree(nb->nbparam);
 
     /* Free plist */
index a1db11c93089bf1d2e8b1d87948ac984709eca36..a2f6913a90fd7db56a9a3bac769bf72b3b17325e 100644 (file)
@@ -146,66 +146,6 @@ typedef struct cl_atomdata
     bool bShiftVecUploaded;
 } cl_atomdata_t;
 
-/*! \internal
- * \brief Parameters required for the OpenCL nonbonded calculations.
- */
-typedef struct cl_nbparam
-{
-
-    //! type of electrostatics, takes values from #eelType
-    int eeltype;
-    //! type of VdW impl., takes values from #evdwType
-    int vdwtype;
-
-    //! charge multiplication factor
-    float epsfac;
-    //! Reaction-field/plain cutoff electrostatics const.
-    float c_rf;
-    //! Reaction-field electrostatics constant
-    float two_k_rf;
-    //! Ewald/PME parameter
-    float ewald_beta;
-    //! Ewald/PME correction term substracted from the direct-space potential
-    float sh_ewald;
-    //! LJ-Ewald/PME correction term added to the correction potential
-    float sh_lj_ewald;
-    //! LJ-Ewald/PME coefficient
-    float ewaldcoeff_lj;
-
-    //! Coulomb cut-off squared
-    float rcoulomb_sq;
-
-    //! VdW cut-off squared
-    float rvdw_sq;
-    //! VdW switched cut-off
-    float rvdw_switch;
-    //! Full, outer pair-list cut-off squared
-    float rlistOuter_sq;
-    //! Inner, dynamic pruned pair-list cut-off squared
-    float rlistInner_sq;
-    //! True if we use dynamic pair-list pruning
-    bool useDynamicPruning;
-
-    //! VdW shift dispersion constants
-    shift_consts_t dispersion_shift;
-    //! VdW shift repulsion constants
-    shift_consts_t repulsion_shift;
-    //! VdW switch constants
-    switch_consts_t vdw_switch;
-
-    /* LJ non-bonded parameters - accessed through texture memory */
-    //! nonbonded parameter table with C6/C12 pairs per atom type-pair, 2*ntype^2 elements
-    cl_mem nbfp_climg2d;
-    //! nonbonded parameter table per atom type, 2*ntype elements
-    cl_mem nbfp_comb_climg2d;
-
-    /* Ewald Coulomb force table data - accessed through texture memory */
-    //! table scale/spacing
-    float coulomb_tab_scale;
-    //! pointer to the table in the device memory
-    DeviceBuffer<float> coulomb_tab_climg2d;
-} cl_nbparam_t;
-
 /*! \internal
  * \brief Data structure shared between the OpenCL device code and OpenCL host code
  *
@@ -312,7 +252,7 @@ struct NbnxmGpu
     //! atom data
     cl_atomdata_t* atdat = nullptr;
     //! parameters required for the non-bonded calc.
-    cl_nbparam_t* nbparam = nullptr;
+    NBParamGpu* nbparam = nullptr;
     //! pair-list data structures (local and non-local)
     gmx::EnumerationArray<Nbnxm::InteractionLocality, cl_plist_t*> plist = { nullptr };
     //! staging area where fshift/energies get downloaded