* This file is part of the GROMACS molecular simulation package.
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2012, The GROMACS development team,
- * check out http://www.gromacs.org for more information.
- * Copyright (c) 2012,2013, by the GROMACS development team, led by
- * David van der Spoel, Berk Hess, Erik Lindahl, and including many
- * others, as listed in the AUTHORS file in the top-level source
- * directory and at http://www.gromacs.org.
+ * Copyright (c) 2001-2012, The GROMACS development team.
+ * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
*
* GROMACS is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* the research papers on the package. Check out http://www.gromacs.org.
*/
+/*! \internal \file
+ * \brief
+ * Data types used internally in the nbnxn_cuda module.
+ *
+ * \author Szilárd Páll <pall.szilard@gmail.com>
+ * \ingroup module_mdlib
+ */
+
#ifndef NBNXN_CUDA_TYPES_H
#define NBNXN_CUDA_TYPES_H
-#include "types/nbnxn_pairlist.h"
-#include "types/nbnxn_cuda_types_ext.h"
-#include "../../gmxlib/cuda_tools/cudautils.cuh"
+#include "gromacs/gmxlib/cuda_tools/cudautils.cuh"
+#include "gromacs/legacyheaders/types/interaction_const.h"
+#include "gromacs/legacyheaders/types/nbnxn_cuda_types_ext.h"
+#include "gromacs/mdlib/nbnxn_pairlist.h"
+
+/* CUDA versions from 5.0 above support texture objects. */
+#if CUDA_VERSION >= 5000
+#define TEXOBJ_SUPPORTED
+#else /* CUDA_VERSION */
+/** This typedef allows us to define only one version of struct cu_nbparam */
+typedef int cudaTextureObject_t;
+#endif /* CUDA_VERSION */
#ifdef __cplusplus
extern "C" {
#endif
-/** Types of electrostatics implementations available in the CUDA non-bonded
+/*! \brief Electrostatic CUDA kernel flavors.
+ *
+ * Types of electrostatics implementations available in the CUDA non-bonded
* force kernels. These represent both the electrostatics types implemented
* by the kernels (cut-off, RF, and Ewald - a subset of what's defined in
* enums.h) as well as encode implementation details analytical/tabulated
* Note that the cut-off and RF kernels have only analytical flavor and unlike
* in the CPU kernels, the tabulated kernels are ATM Ewald-only.
*
- * The order of pointers to different electrostatic kernels defined in
- * nbnxn_cuda.cu by the nb_default_kfunc_ptr and nb_legacy_kfunc_ptr arrays
- * should match the order of enumerated types below. */
-enum {
+ * The row-order of pointers to different electrostatic kernels defined in
+ * nbnxn_cuda.cu by the nb_*_kfunc_ptr function pointer table
+ * should match the order of enumerated types below.
+ */
+enum eelCu {
eelCuCUT, eelCuRF, eelCuEWALD_TAB, eelCuEWALD_TAB_TWIN, eelCuEWALD_ANA, eelCuEWALD_ANA_TWIN, eelCuNR
};
-/** Kernel flavors with different set of optimizations: default for CUDA <=v4.1
- * compilers and legacy for earlier, 3.2 and 4.0 CUDA compilers. */
-enum {
- eNbnxnCuKDefault, eNbnxnCuKLegacy, eNbnxnCuKNR
+/*! \brief VdW CUDA kernel flavors.
+ *
+ * The enumerates values correspond to the LJ implementations in the CUDA non-bonded
+ * kernels.
+ *
+ * The column-order of pointers to different electrostatic kernels defined in
+ * nbnxn_cuda.cu by the nb_*_kfunc_ptr function pointer table
+ * should match the order of enumerated types below.
+ */
+enum evdwCu {
+ evdwCuCUT, evdwCuFSWITCH, evdwCuPSWITCH, evdwCuEWALDGEOM, evdwCuEWALDLB, evdwCuNR
};
-#define NBNXN_KVER_OLD(k) (k == eNbnxnCuKOld)
-#define NBNXN_KVER_LEGACY(k) (k == eNbnxnCuKLegacy)
-#define NBNXN_KVER_DEFAULT(k) (k == eNbnxnCuKDefault)
-
-/* Non-bonded kernel versions. */
-
-/* All structs prefixed with "cu_" hold data used in GPU calculations and
- * are passed to the kernels, except cu_timers_t. */
+/* All structs prefixed with "cu_" hold data used in GPU calculations and
+ * are passed to the kernels, except cu_timers_t. */
+/*! \cond */
typedef struct cu_plist cu_plist_t;
typedef struct cu_atomdata cu_atomdata_t;
typedef struct cu_nbparam cu_nbparam_t;
typedef struct cu_timers cu_timers_t;
typedef struct nb_staging nb_staging_t;
+/*! \endcond */
-/** Staging area for temporary data. The energies get downloaded here first,
- * before getting added to the CPU-side aggregate values.
+/** \internal
+ * \brief Staging area for temporary data downloaded from the GPU.
+ *
+ * The energies/shift forces get downloaded here first, before getting added
+ * to the CPU-side aggregate values.
*/
struct nb_staging
{
float3 *fshift; /**< shift forces */
};
-/** Nonbonded atom data -- both inputs and outputs. */
+/** \internal
+ * \brief Nonbonded atom data - both inputs and outputs.
+ */
struct cu_atomdata
{
int natoms; /**< number of atoms */
float4 *xq; /**< atom coordinates + charges, size natoms */
float3 *f; /**< force output array, size natoms */
- /* TODO: try float2 for the energies */
- float *e_lj, /**< LJ energy output, size 1 */
- *e_el; /**< Electrostatics energy input, size 1 */
+
+ float *e_lj; /**< LJ energy output, size 1 */
+ float *e_el; /**< Electrostatics energy input, size 1 */
float3 *fshift; /**< shift forces */
bool bShiftVecUploaded; /**< true if the shift vector has been uploaded */
};
-/** Parameters required for the CUDA nonbonded calculations. */
+/** \internal
+ * \brief Parameters required for the CUDA nonbonded calculations.
+ */
struct cu_nbparam
{
- int eeltype; /**< type of electrostatics */
-
- float epsfac; /**< charge multiplication factor */
- float c_rf, /**< Reaction-field/plain cutoff electrostatics const. */
- two_k_rf; /**< Reaction-field electrostatics constant */
- float ewald_beta; /**< Ewald/PME parameter */
- float sh_ewald; /**< Ewald/PME correction term */
- float rvdw_sq; /**< VdW cut-off */
- float rcoulomb_sq; /**< Coulomb cut-off */
- float rlist_sq; /**< pair-list cut-off */
- float sh_invrc6; /**< LJ potential correction term */
-
- float *nbfp; /**< nonbonded parameter table with C6/C12 pairs */
-
- /* Ewald Coulomb force table data */
- int coulomb_tab_size; /**< table size (s.t. it fits in texture cache) */
- float coulomb_tab_scale; /**< table scale/spacing */
- float *coulomb_tab; /**< pointer to the table in the device memory */
+
+ int eeltype; /**< type of electrostatics, takes values from #eelCu */
+ int vdwtype; /**< type of VdW impl., takes values from #evdwCu */
+
+ float epsfac; /**< charge multiplication factor */
+ float c_rf; /**< Reaction-field/plain cutoff electrostatics const. */
+ float two_k_rf; /**< Reaction-field electrostatics constant */
+ float ewald_beta; /**< Ewald/PME parameter */
+ float sh_ewald; /**< Ewald/PME correction term substracted from the direct-space potential */
+ float sh_lj_ewald; /**< LJ-Ewald/PME correction term added to the correction potential */
+ float ewaldcoeff_lj; /**< LJ-Ewald/PME coefficient */
+
+ float rcoulomb_sq; /**< Coulomb cut-off squared */
+
+ float rvdw_sq; /**< VdW cut-off squared */
+ float rvdw_switch; /**< VdW switched cut-off */
+ float rlist_sq; /**< pair-list cut-off squared */
+
+ shift_consts_t dispersion_shift; /**< VdW shift dispersion constants */
+ shift_consts_t repulsion_shift; /**< VdW shift repulsion constants */
+ switch_consts_t vdw_switch; /**< VdW switch constants */
+
+ /* LJ non-bonded parameters - accessed through texture memory */
+ float *nbfp; /**< nonbonded parameter table with C6/C12 pairs per atom type-pair, 2*ntype^2 elements */
+ cudaTextureObject_t nbfp_texobj; /**< texture object bound to nbfp */
+ float *nbfp_comb; /**< nonbonded parameter table per atom type, 2*ntype elements */
+ cudaTextureObject_t nbfp_comb_texobj; /**< texture object bound to nbfp_texobj */
+
+ /* Ewald Coulomb force table data - accessed through texture memory */
+ int coulomb_tab_size; /**< table size (s.t. it fits in texture cache) */
+ float coulomb_tab_scale; /**< table scale/spacing */
+ float *coulomb_tab; /**< pointer to the table in the device memory */
+ cudaTextureObject_t coulomb_tab_texobj; /**< texture object bound to coulomb_tab */
};
-/** Pair list data */
+/** \internal
+ * \brief Pair list data.
+ */
struct cu_plist
{
int na_c; /**< number of atoms per cluster */
done during the current step */
};
-/** CUDA events used for timing GPU kernels and H2D/D2H transfers.
+/** \internal
+ * \brief CUDA events used for timing GPU kernels and H2D/D2H transfers.
+ *
* The two-sized arrays hold the local and non-local values and should always
* be indexed with eintLocal/eintNonlocal.
*/
cudaEvent_t stop_nb_k[2]; /**< stop event non-bonded kernels (l/nl, every step) */
};
-/** Main data structure for CUDA nonbonded force calculations. */
+/** \internal
+ * \brief Main data structure for CUDA nonbonded force calculations.
+ */
struct nbnxn_cuda
{
cuda_dev_info_t *dev_info; /**< CUDA device information */
- int kernel_ver; /**< The version of the kernel to be executed on the
- device in use, possible values: eNbnxnCuK* */
bool bUseTwoStreams; /**< true if doing both local/non-local NB work on GPU */
bool bUseStreamSync; /**< true if the standard cudaStreamSynchronize is used
and not memory polling-based waiting */
cudaStream_t stream[2]; /**< local and non-local GPU streams */
/** events used for synchronization */
- cudaEvent_t nonlocal_done; /**< event triggered when the non-local non-bonded kernel
- is done (and the local transfer can proceed) */
- cudaEvent_t misc_ops_done; /**< event triggered when the operations that precede the
- main force calculations are done (e.g. buffer 0-ing) */
+ cudaEvent_t nonlocal_done; /**< event triggered when the non-local non-bonded kernel
+ is done (and the local transfer can proceed) */
+ cudaEvent_t misc_ops_done; /**< event triggered when the operations that precede the
+ main force calculations are done (e.g. buffer 0-ing) */
/* NOTE: With current CUDA versions (<=5.0) timing doesn't work with multiple
* concurrent streams, so we won't time if both l/nl work is done on GPUs.