#include "gromacs/hardware/gpu_hw_info.h"
+//! Device texture for fast read-only data fetching
+using DeviceTexture = cudaTextureObject_t;
+
/*! \brief CUDA device information.
*
* The CUDA device information is queried and set at detection and contains
#else
+using DeviceTexture = void*;
+
//! Stub for device information.
struct DeviceInformation
{
#include "gromacs/gpu_utils/gmxopencl.h"
#include "gromacs/hardware/gpu_hw_info.h"
+using DeviceTexture = void*;
+
//! OpenCL device vendors
enum class DeviceVendor : int
{
constexpr static int c_bufOpsThreadsPerBlock = 128;
/*! Nonbonded kernel function pointer type */
-typedef void (*nbnxn_cu_kfunc_ptr_t)(const cu_atomdata_t, const cu_nbparam_t, const cu_plist_t, bool);
+typedef void (*nbnxn_cu_kfunc_ptr_t)(const cu_atomdata_t, const NBParamGpu, const cu_plist_t, bool);
/*********************************/
/*! \brief Calculates the amount of shared memory required by the nonbonded kernel in use. */
static inline int calc_shmem_required_nonbonded(const int num_threads_z,
const DeviceInformation gmx_unused* deviceInfo,
- const cu_nbparam_t* nbp)
+ const NBParamGpu* nbp)
{
int shmem;
void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const InteractionLocality iloc)
{
cu_atomdata_t* adat = nb->atdat;
- cu_nbparam_t* nbp = nb->nbparam;
+ NBParamGpu* nbp = nb->nbparam;
cu_plist_t* plist = nb->plist[iloc];
cu_timers_t* t = nb->timers;
const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, const int numParts)
{
cu_atomdata_t* adat = nb->atdat;
- cu_nbparam_t* nbp = nb->nbparam;
+ NBParamGpu* nbp = nb->nbparam;
cu_plist_t* plist = nb->plist[iloc];
cu_timers_t* t = nb->timers;
const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
static void nbnxn_cuda_clear_e_fshift(NbnxmGpu* nb);
/* Fw. decl, */
-static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t* nbparam);
+static void nbnxn_cuda_free_nbparam_table(NBParamGpu* nbparam);
/*! \brief Initialized the Ewald Coulomb correction GPU table.
it just re-uploads the table.
*/
static void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables,
- cu_nbparam_t* nbp,
+ NBParamGpu* nbp,
const DeviceContext& deviceContext)
{
if (nbp->coulomb_tab != nullptr)
}
/*! Copies all parameters related to the cut-off from ic to nbp */
-static void set_cutoff_parameters(cu_nbparam_t* nbp, const interaction_const_t* ic, const PairlistParams& listParams)
+static void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t* ic, const PairlistParams& listParams)
{
nbp->ewald_beta = ic->ewaldcoeff_q;
nbp->sh_ewald = ic->sh_ewald;
}
/*! Initializes the nonbonded parameter data structure. */
-static void init_nbparam(cu_nbparam_t* nbp,
+static void init_nbparam(NBParamGpu* nbp,
const interaction_const_t* ic,
const PairlistParams& listParams,
const nbnxn_atomdata_t::Params& nbatParams,
{
return;
}
- NbnxmGpu* nb = nbv->gpu_nbv;
- cu_nbparam_t* nbp = nbv->gpu_nbv->nbparam;
+ NbnxmGpu* nb = nbv->gpu_nbv;
+ NBParamGpu* nbp = nbv->gpu_nbv->nbparam;
set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params());
}
}
-static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t* nbparam)
+static void nbnxn_cuda_free_nbparam_table(NBParamGpu* nbparam)
{
if (nbparam->eeltype == eelTypeEWALD_TAB || nbparam->eeltype == eelTypeEWALD_TAB_TWIN)
{
{
cudaError_t stat;
cu_atomdata_t* atdat;
- cu_nbparam_t* nbparam;
+ NBParamGpu* nbparam;
if (nb == nullptr)
{
__global__ void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_cuda)
# endif /* CALC_ENERGIES */
#endif /* PRUNE_NBL */
- (const cu_atomdata_t atdat, const cu_nbparam_t nbparam, const cu_plist_t plist, bool bCalcFshift)
+ (const cu_atomdata_t atdat, const NBParamGpu nbparam, const cu_plist_t plist, bool bCalcFshift)
#ifdef FUNCTION_DECLARATION_ONLY
; /* Only do function declaration, omit the function body. */
#else
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2016,2017,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef FUNCTION_DECLARATION_ONLY
/* Instantiate external template functions */
template __global__ void
-nbnxn_kernel_prune_cuda<false>(const cu_atomdata_t, const cu_nbparam_t, const cu_plist_t, int, int);
+nbnxn_kernel_prune_cuda<false>(const cu_atomdata_t, const NBParamGpu, const cu_plist_t, int, int);
template __global__ void
-nbnxn_kernel_prune_cuda<true>(const cu_atomdata_t, const cu_nbparam_t, const cu_plist_t, int, int);
+nbnxn_kernel_prune_cuda<true>(const cu_atomdata_t, const NBParamGpu, const cu_plist_t, int, int);
#endif
template<bool haveFreshList>
__launch_bounds__(THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP) __global__
void nbnxn_kernel_prune_cuda(const cu_atomdata_t atdat,
- const cu_nbparam_t nbparam,
+ const NBParamGpu nbparam,
const cu_plist_t plist,
int numParts,
int part)
// Add extern declarations so each translation unit understands that
// there will be a definition provided.
extern template __global__ void
-nbnxn_kernel_prune_cuda<true>(const cu_atomdata_t, const cu_nbparam_t, const cu_plist_t, int, int);
+nbnxn_kernel_prune_cuda<true>(const cu_atomdata_t, const NBParamGpu, const cu_plist_t, int, int);
extern template __global__ void
-nbnxn_kernel_prune_cuda<false>(const cu_atomdata_t, const cu_nbparam_t, const cu_plist_t, int, int);
+nbnxn_kernel_prune_cuda<false>(const cu_atomdata_t, const NBParamGpu, const cu_plist_t, int, int);
#else
{
/*! Apply force switch, force + energy version. */
static __forceinline__ __device__ void
- calculate_force_switch_F(const cu_nbparam_t nbparam, float c6, float c12, float inv_r, float r2, float* F_invr)
+ calculate_force_switch_F(const NBParamGpu nbparam, float c6, float c12, float inv_r, float r2, float* F_invr)
{
float r, r_switch;
}
/*! Apply force switch, force-only version. */
-static __forceinline__ __device__ void calculate_force_switch_F_E(const cu_nbparam_t nbparam,
- float c6,
- float c12,
- float inv_r,
- float r2,
- float* F_invr,
- float* E_lj)
+static __forceinline__ __device__ void calculate_force_switch_F_E(const NBParamGpu nbparam,
+ float c6,
+ float c12,
+ float inv_r,
+ float r2,
+ float* F_invr,
+ float* E_lj)
{
float r, r_switch;
/*! Apply potential switch, force-only version. */
static __forceinline__ __device__ void
- calculate_potential_switch_F(const cu_nbparam_t nbparam, float inv_r, float r2, float* F_invr, float* E_lj)
+ calculate_potential_switch_F(const NBParamGpu nbparam, float inv_r, float r2, float* F_invr, float* E_lj)
{
float r, r_switch;
float sw, dsw;
/*! Apply potential switch, force + energy version. */
static __forceinline__ __device__ void
- calculate_potential_switch_F_E(const cu_nbparam_t nbparam, float inv_r, float r2, float* F_invr, float* E_lj)
+ calculate_potential_switch_F_E(const NBParamGpu nbparam, float inv_r, float r2, float* F_invr, float* E_lj)
{
float r, r_switch;
float sw, dsw;
* Depending on what is supported, it fetches parameters either
* using direct load, texture objects, or texrefs.
*/
-static __forceinline__ __device__ float calculate_lj_ewald_c6grid(const cu_nbparam_t nbparam, int typei, int typej)
+static __forceinline__ __device__ float calculate_lj_ewald_c6grid(const NBParamGpu nbparam, int typei, int typej)
{
# if DISABLE_CUDA_TEXTURES
return LDG(&nbparam.nbfp_comb[2 * typei]) * LDG(&nbparam.nbfp_comb[2 * typej]);
/*! Calculate LJ-PME grid force contribution with
* geometric combination rule.
*/
-static __forceinline__ __device__ void calculate_lj_ewald_comb_geom_F(const cu_nbparam_t nbparam,
- int typei,
- int typej,
- float r2,
- float inv_r2,
- float lje_coeff2,
- float lje_coeff6_6,
- float* F_invr)
+static __forceinline__ __device__ void calculate_lj_ewald_comb_geom_F(const NBParamGpu nbparam,
+ int typei,
+ int typej,
+ float r2,
+ float inv_r2,
+ float lje_coeff2,
+ float lje_coeff6_6,
+ float* F_invr)
{
float c6grid, inv_r6_nm, cr2, expmcr2, poly;
/*! Calculate LJ-PME grid force + energy contribution with
* geometric combination rule.
*/
-static __forceinline__ __device__ void calculate_lj_ewald_comb_geom_F_E(const cu_nbparam_t nbparam,
- int typei,
- int typej,
- float r2,
- float inv_r2,
- float lje_coeff2,
+static __forceinline__ __device__ void calculate_lj_ewald_comb_geom_F_E(const NBParamGpu nbparam,
+ int typei,
+ int typej,
+ float r2,
+ float inv_r2,
+ float lje_coeff2,
float lje_coeff6_6,
float int_bit,
float* F_invr,
* Depending on what is supported, it fetches parameters either
* using direct load, texture objects, or texrefs.
*/
-static __forceinline__ __device__ float2 fetch_nbfp_comb_c6_c12(const cu_nbparam_t nbparam, int type)
+static __forceinline__ __device__ float2 fetch_nbfp_comb_c6_c12(const NBParamGpu nbparam, int type)
{
float2 c6c12;
# if DISABLE_CUDA_TEXTURES
* We use a single F+E kernel with conditional because the performance impact
* of this is pretty small and LB on the CPU is anyway very slow.
*/
-static __forceinline__ __device__ void calculate_lj_ewald_comb_LB_F_E(const cu_nbparam_t nbparam,
- int typei,
- int typej,
- float r2,
- float inv_r2,
- float lje_coeff2,
- float lje_coeff6_6,
- float int_bit,
- float* F_invr,
- float* E_lj)
+static __forceinline__ __device__ void calculate_lj_ewald_comb_LB_F_E(const NBParamGpu nbparam,
+ int typei,
+ int typej,
+ float r2,
+ float inv_r2,
+ float lje_coeff2,
+ float lje_coeff6_6,
+ float int_bit,
+ float* F_invr,
+ float* E_lj)
{
float c6grid, inv_r6_nm, cr2, expmcr2, poly;
float sigma, sigma2, epsilon;
* Depending on what is supported, it fetches parameters either
* using direct load, texture objects, or texrefs.
*/
-static __forceinline__ __device__ float2 fetch_coulomb_force_r(const cu_nbparam_t nbparam, int index)
+static __forceinline__ __device__ float2 fetch_coulomb_force_r(const NBParamGpu nbparam, int index)
{
float2 d;
/*! Interpolate Ewald coulomb force correction using the F*r table.
*/
-static __forceinline__ __device__ float interpolate_coulomb_force_r(const cu_nbparam_t nbparam, float r)
+static __forceinline__ __device__ float interpolate_coulomb_force_r(const NBParamGpu nbparam, float r)
{
float normalized = nbparam.coulomb_tab_scale * r;
int index = (int)normalized;
* Depending on what is supported, it fetches parameters either
* using direct load, texture objects, or texrefs.
*/
-static __forceinline__ __device__ void fetch_nbfp_c6_c12(float& c6, float& c12, const cu_nbparam_t nbparam, int baseIndex)
+static __forceinline__ __device__ void fetch_nbfp_c6_c12(float& c6, float& c12, const NBParamGpu nbparam, int baseIndex)
{
# if DISABLE_CUDA_TEXTURES
/* Force an 8-byte fetch to save a memory instruction. */
* are passed to the kernels, except cu_timers_t. */
/*! \cond */
typedef struct cu_atomdata cu_atomdata_t;
-typedef struct cu_nbparam cu_nbparam_t;
/*! \endcond */
bool bShiftVecUploaded;
};
-/** \internal
- * \brief Parameters required for the CUDA nonbonded calculations.
- */
-struct cu_nbparam
-{
-
- //! type of electrostatics, takes values from #eelType
- int eeltype;
- //! type of VdW impl., takes values from #evdwType
- int vdwtype;
-
- //! charge multiplication factor
- float epsfac;
- //! Reaction-field/plain cutoff electrostatics const.
- float c_rf;
- //! Reaction-field electrostatics constant
- float two_k_rf;
- //! Ewald/PME parameter
- float ewald_beta;
- //! Ewald/PME correction term substracted from the direct-space potential
- float sh_ewald;
- //! LJ-Ewald/PME correction term added to the correction potential
- float sh_lj_ewald;
- //! LJ-Ewald/PME coefficient
- float ewaldcoeff_lj;
-
- //! Coulomb cut-off squared
- float rcoulomb_sq;
-
- //! VdW cut-off squared
- float rvdw_sq;
- //! VdW switched cut-off
- float rvdw_switch;
- //! Full, outer pair-list cut-off squared
- float rlistOuter_sq;
- //! Inner, dynamic pruned pair-list cut-off squared
- float rlistInner_sq;
- //! True if we use dynamic pair-list pruning
- bool useDynamicPruning;
-
- //! VdW shift dispersion constants
- shift_consts_t dispersion_shift;
- //! VdW shift repulsion constants
- shift_consts_t repulsion_shift;
- //! VdW switch constants
- switch_consts_t vdw_switch;
-
- /* LJ non-bonded parameters - accessed through texture memory */
- //! nonbonded parameter table with C6/C12 pairs per atom type-pair, 2*ntype^2 elements
- float* nbfp;
- //! texture object bound to nbfp
- cudaTextureObject_t nbfp_texobj;
- //! nonbonded parameter table per atom type, 2*ntype elements
- float* nbfp_comb;
- //! texture object bound to nbfp_texobj
- cudaTextureObject_t nbfp_comb_texobj;
-
- /* Ewald Coulomb force table data - accessed through texture memory */
- //! table scale/spacing
- float coulomb_tab_scale;
- //! pointer to the table in the device memory
- float* coulomb_tab;
- //! texture object bound to coulomb_tab
- cudaTextureObject_t coulomb_tab_texobj;
-};
-
/** \internal
* \brief Pair list data.
*/
/*! \brief number of elements allocated allocated in device buffer */
int ncxy_ind_alloc = 0;
/*! \brief parameters required for the non-bonded calc. */
- cu_nbparam_t* nbparam = nullptr;
+ NBParamGpu* nbparam = nullptr;
/*! \brief pair-list data structures (local and non-local) */
gmx::EnumerationArray<Nbnxm::InteractionLocality, cu_plist_t*> plist = { { nullptr } };
/*! \brief staging area where fshift/energies get downloaded */
# include "gromacs/gpu_utils/gpuregiontimer.cuh"
#endif
+/** \internal
+ * \brief Parameters required for the GPU nonbonded calculations.
+ */
+struct NBParamGpu
+{
+
+ //! type of electrostatics, takes values from #eelType
+ int eeltype;
+ //! type of VdW impl., takes values from #evdwType
+ int vdwtype;
+
+ //! charge multiplication factor
+ float epsfac;
+ //! Reaction-field/plain cutoff electrostatics const.
+ float c_rf;
+ //! Reaction-field electrostatics constant
+ float two_k_rf;
+ //! Ewald/PME parameter
+ float ewald_beta;
+ //! Ewald/PME correction term substracted from the direct-space potential
+ float sh_ewald;
+ //! LJ-Ewald/PME correction term added to the correction potential
+ float sh_lj_ewald;
+ //! LJ-Ewald/PME coefficient
+ float ewaldcoeff_lj;
+
+ //! Coulomb cut-off squared
+ float rcoulomb_sq;
+
+ //! VdW cut-off squared
+ float rvdw_sq;
+ //! VdW switched cut-off
+ float rvdw_switch;
+ //! Full, outer pair-list cut-off squared
+ float rlistOuter_sq;
+ //! Inner, dynamic pruned pair-list cut-off squared
+ float rlistInner_sq;
+ //! True if we use dynamic pair-list pruning
+ bool useDynamicPruning;
+
+ //! VdW shift dispersion constants
+ shift_consts_t dispersion_shift;
+ //! VdW shift repulsion constants
+ shift_consts_t repulsion_shift;
+ //! VdW switch constants
+ switch_consts_t vdw_switch;
+
+ /* LJ non-bonded parameters - accessed through texture memory */
+ //! nonbonded parameter table with C6/C12 pairs per atom type-pair, 2*ntype^2 elements
+ DeviceBuffer<float> nbfp;
+ //! texture object bound to nbfp
+ DeviceTexture nbfp_texobj;
+ //! nonbonded parameter table per atom type, 2*ntype elements
+ DeviceBuffer<float> nbfp_comb;
+ //! texture object bound to nbfp_comb
+ DeviceTexture nbfp_comb_texobj;
+
+ /* Ewald Coulomb force table data - accessed through texture memory */
+ //! table scale/spacing
+ float coulomb_tab_scale;
+ //! pointer to the table in the device memory
+ DeviceBuffer<float> coulomb_tab;
+ //! texture object bound to coulomb_tab
+ DeviceTexture coulomb_tab_texobj;
+};
+
namespace Nbnxm
{
*
* This function is called before the launch of both nbnxn and prune kernels.
*/
-static void fillin_ocl_structures(cl_nbparam_t* nbp, cl_nbparam_params_t* nbparams_params)
+static void fillin_ocl_structures(NBParamGpu* nbp, cl_nbparam_params_t* nbparams_params)
{
nbparams_params->coulomb_tab_scale = nbp->coulomb_tab_scale;
nbparams_params->c_rf = nbp->c_rf;
void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const Nbnxm::InteractionLocality iloc)
{
cl_atomdata_t* adat = nb->atdat;
- cl_nbparam_t* nbp = nb->nbparam;
+ NBParamGpu* nbp = nb->nbparam;
cl_plist_t* plist = nb->plist[iloc];
cl_timers_t* t = nb->timers;
const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
{
const auto kernelArgs = prepareGpuKernelArguments(
kernel, config, &nbparams_params, &adat->xq, &adat->f, &adat->e_lj, &adat->e_el,
- &adat->fshift, &adat->lj_comb, &adat->shift_vec, &nbp->nbfp_climg2d, &nbp->nbfp_comb_climg2d,
- &nbp->coulomb_tab_climg2d, &plist->sci, &plist->cj4, &plist->excl, &computeFshift);
+ &adat->fshift, &adat->lj_comb, &adat->shift_vec, &nbp->nbfp, &nbp->nbfp_comb,
+ &nbp->coulomb_tab, &plist->sci, &plist->cj4, &plist->excl, &computeFshift);
launchGpuKernel(kernel, config, deviceStream, timingEvent, kernelName, kernelArgs);
}
{
const auto kernelArgs = prepareGpuKernelArguments(
kernel, config, &adat->ntypes, &nbparams_params, &adat->xq, &adat->f, &adat->e_lj,
- &adat->e_el, &adat->fshift, &adat->atom_types, &adat->shift_vec, &nbp->nbfp_climg2d,
- &nbp->nbfp_comb_climg2d, &nbp->coulomb_tab_climg2d, &plist->sci, &plist->cj4,
- &plist->excl, &computeFshift);
+ &adat->e_el, &adat->fshift, &adat->atom_types, &adat->shift_vec, &nbp->nbfp, &nbp->nbfp_comb,
+ &nbp->coulomb_tab, &plist->sci, &plist->cj4, &plist->excl, &computeFshift);
launchGpuKernel(kernel, config, deviceStream, timingEvent, kernelName, kernelArgs);
}
void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, const int numParts)
{
cl_atomdata_t* adat = nb->atdat;
- cl_nbparam_t* nbp = nb->nbparam;
+ NBParamGpu* nbp = nb->nbparam;
cl_plist_t* plist = nb->plist[iloc];
cl_timers_t* t = nb->timers;
const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
* table.
*/
static void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables,
- cl_nbparam_t* nbp,
+ NBParamGpu* nbp,
const DeviceContext& deviceContext)
{
- if (nbp->coulomb_tab_climg2d != nullptr)
+ if (nbp->coulomb_tab != nullptr)
{
- freeDeviceBuffer(&(nbp->coulomb_tab_climg2d));
+ freeDeviceBuffer(&(nbp->coulomb_tab));
}
DeviceBuffer<real> coulomb_tab;
initParamLookupTable(&coulomb_tab, nullptr, tables.tableF.data(), tables.tableF.size(), deviceContext);
- nbp->coulomb_tab_climg2d = coulomb_tab;
- nbp->coulomb_tab_scale = tables.scale;
+ nbp->coulomb_tab = coulomb_tab;
+ nbp->coulomb_tab_scale = tables.scale;
}
/*! \brief Copies all parameters related to the cut-off from ic to nbp
*/
-static void set_cutoff_parameters(cl_nbparam_t* nbp, const interaction_const_t* ic, const PairlistParams& listParams)
+static void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t* ic, const PairlistParams& listParams)
{
nbp->ewald_beta = ic->ewaldcoeff_q;
nbp->sh_ewald = ic->sh_ewald;
/*! \brief Initializes the nonbonded parameter data structure.
*/
-static void init_nbparam(cl_nbparam_t* nbp,
+static void init_nbparam(NBParamGpu* nbp,
const interaction_const_t* ic,
const PairlistParams& listParams,
const nbnxn_atomdata_t::Params& nbatParams,
}
}
/* generate table for PME */
- nbp->coulomb_tab_climg2d = nullptr;
+ nbp->coulomb_tab = nullptr;
if (nbp->eeltype == eelTypeEWALD_TAB || nbp->eeltype == eelTypeEWALD_TAB_TWIN)
{
GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
}
else
{
- allocateDeviceBuffer(&nbp->coulomb_tab_climg2d, 1, deviceContext);
+ allocateDeviceBuffer(&nbp->coulomb_tab, 1, deviceContext);
}
const int nnbfp = 2 * nbatParams.numTypes * nbatParams.numTypes;
/* set up LJ parameter lookup table */
DeviceBuffer<real> nbfp;
initParamLookupTable(&nbfp, nullptr, nbatParams.nbfp.data(), nnbfp, deviceContext);
- nbp->nbfp_climg2d = nbfp;
+ nbp->nbfp = nbfp;
if (ic->vdwtype == evdwPME)
{
DeviceBuffer<float> nbfp_comb;
initParamLookupTable(&nbfp_comb, nullptr, nbatParams.nbfp_comb.data(), nnbfp_comb, deviceContext);
- nbp->nbfp_comb_climg2d = nbfp_comb;
+ nbp->nbfp_comb = nbfp_comb;
}
}
}
{
return;
}
- NbnxmGpu* nb = nbv->gpu_nbv;
- cl_nbparam_t* nbp = nb->nbparam;
+ NbnxmGpu* nb = nbv->gpu_nbv;
+ NBParamGpu* nbp = nb->nbparam;
set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params());
* clears e/fshift output buffers.
*/
static void nbnxn_ocl_init_const(cl_atomdata_t* atomData,
- cl_nbparam_t* nbParams,
+ NBParamGpu* nbParams,
const interaction_const_t* ic,
const PairlistParams& listParams,
const nbnxn_atomdata_t::Params& nbatParams,
sfree(nb->atdat);
/* Free nbparam */
- freeDeviceBuffer(&(nb->nbparam->nbfp_climg2d));
- freeDeviceBuffer(&(nb->nbparam->nbfp_comb_climg2d));
- freeDeviceBuffer(&(nb->nbparam->coulomb_tab_climg2d));
+ freeDeviceBuffer(&(nb->nbparam->nbfp));
+ freeDeviceBuffer(&(nb->nbparam->nbfp_comb));
+ freeDeviceBuffer(&(nb->nbparam->coulomb_tab));
sfree(nb->nbparam);
/* Free plist */
bool bShiftVecUploaded;
} cl_atomdata_t;
-/*! \internal
- * \brief Parameters required for the OpenCL nonbonded calculations.
- */
-typedef struct cl_nbparam
-{
-
- //! type of electrostatics, takes values from #eelType
- int eeltype;
- //! type of VdW impl., takes values from #evdwType
- int vdwtype;
-
- //! charge multiplication factor
- float epsfac;
- //! Reaction-field/plain cutoff electrostatics const.
- float c_rf;
- //! Reaction-field electrostatics constant
- float two_k_rf;
- //! Ewald/PME parameter
- float ewald_beta;
- //! Ewald/PME correction term substracted from the direct-space potential
- float sh_ewald;
- //! LJ-Ewald/PME correction term added to the correction potential
- float sh_lj_ewald;
- //! LJ-Ewald/PME coefficient
- float ewaldcoeff_lj;
-
- //! Coulomb cut-off squared
- float rcoulomb_sq;
-
- //! VdW cut-off squared
- float rvdw_sq;
- //! VdW switched cut-off
- float rvdw_switch;
- //! Full, outer pair-list cut-off squared
- float rlistOuter_sq;
- //! Inner, dynamic pruned pair-list cut-off squared
- float rlistInner_sq;
- //! True if we use dynamic pair-list pruning
- bool useDynamicPruning;
-
- //! VdW shift dispersion constants
- shift_consts_t dispersion_shift;
- //! VdW shift repulsion constants
- shift_consts_t repulsion_shift;
- //! VdW switch constants
- switch_consts_t vdw_switch;
-
- /* LJ non-bonded parameters - accessed through texture memory */
- //! nonbonded parameter table with C6/C12 pairs per atom type-pair, 2*ntype^2 elements
- cl_mem nbfp_climg2d;
- //! nonbonded parameter table per atom type, 2*ntype elements
- cl_mem nbfp_comb_climg2d;
-
- /* Ewald Coulomb force table data - accessed through texture memory */
- //! table scale/spacing
- float coulomb_tab_scale;
- //! pointer to the table in the device memory
- DeviceBuffer<float> coulomb_tab_climg2d;
-} cl_nbparam_t;
-
/*! \internal
* \brief Data structure shared between the OpenCL device code and OpenCL host code
*
//! atom data
cl_atomdata_t* atdat = nullptr;
//! parameters required for the non-bonded calc.
- cl_nbparam_t* nbparam = nullptr;
+ NBParamGpu* nbparam = nullptr;
//! pair-list data structures (local and non-local)
gmx::EnumerationArray<Nbnxm::InteractionLocality, cl_plist_t*> plist = { nullptr };
//! staging area where fshift/energies get downloaded