-/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+/*
+ * This file is part of the GROMACS molecular simulation package.
*
- *
- * This source code is part of
- *
- * G R O M A C S
- *
- * GROningen MAchine for Chemical Simulations
- *
- * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2012, The GROMACS development team,
- * check out http://www.gromacs.org for more information.
+ * Copyright (c) 2001-2012, The GROMACS development team.
+ * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
- * If you want to redistribute modifications, please consider that
- * scientific software is very special. Version control is crucial -
- * bugs must be traceable. We will be happy to consider code for
- * inclusion in the official distribution, but derived work must not
- * be called official GROMACS. Details are found in the README & COPYING
- * files - if they are missing, get the official version at www.gromacs.org.
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * To help us fund GROMACS development, we humbly ask that you cite
- * the papers on the package - you can find them in the top README file.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
*
- * For more info, check our website at http://www.gromacs.org
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+/*! \internal \file
+ * \brief
+ * Data types used internally in the nbnxn_cuda module.
*
- * And Hey:
- * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
+ * \author Szilárd Páll <pall.szilard@gmail.com>
+ * \ingroup module_mdlib
*/
#ifndef NBNXN_CUDA_TYPES_H
#define NBNXN_CUDA_TYPES_H
-#include "types/nbnxn_pairlist.h"
-#include "types/nbnxn_cuda_types_ext.h"
-#include "../../gmxlib/cuda_tools/cudautils.cuh"
+#include "gromacs/gmxlib/cuda_tools/cudautils.cuh"
+#include "gromacs/legacyheaders/types/interaction_const.h"
+#include "gromacs/legacyheaders/types/nbnxn_cuda_types_ext.h"
+#include "gromacs/mdlib/nbnxn_pairlist.h"
+
+/* CUDA versions from 5.0 above support texture objects. */
+#if CUDA_VERSION >= 5000
+#define TEXOBJ_SUPPORTED
+#else /* CUDA_VERSION */
+/** This typedef allows us to define only one version of struct cu_nbparam */
+typedef int cudaTextureObject_t;
+#endif /* CUDA_VERSION */
#ifdef __cplusplus
extern "C" {
#endif
-/** Types of electrostatics available in the CUDA nonbonded force kernels. */
-enum {
- eelCuEWALD, eelCuEWALD_TWIN, eelCuRF, eelCuCUT, eelCuNR
+/*! \brief Electrostatic CUDA kernel flavors.
+ *
+ * Types of electrostatics implementations available in the CUDA non-bonded
+ * force kernels. These represent both the electrostatics types implemented
+ * by the kernels (cut-off, RF, and Ewald - a subset of what's defined in
+ * enums.h) as well as encode implementation details analytical/tabulated
+ * and single or twin cut-off (for Ewald kernels).
+ * Note that the cut-off and RF kernels have only analytical flavor and unlike
+ * in the CPU kernels, the tabulated kernels are ATM Ewald-only.
+ *
+ * The row-order of pointers to different electrostatic kernels defined in
+ * nbnxn_cuda.cu by the nb_*_kfunc_ptr function pointer table
+ * should match the order of enumerated types below.
+ */
+enum eelCu {
+ eelCuCUT, eelCuRF, eelCuEWALD_TAB, eelCuEWALD_TAB_TWIN, eelCuEWALD_ANA, eelCuEWALD_ANA_TWIN, eelCuNR
};
-enum {
- eNbnxnCuKDefault, eNbnxnCuKLegacy, eNbnxnCuKOld, eNbnxnCuKNR
+/*! \brief VdW CUDA kernel flavors.
+ *
+ * The enumerates values correspond to the LJ implementations in the CUDA non-bonded
+ * kernels.
+ *
+ * The column-order of pointers to different electrostatic kernels defined in
+ * nbnxn_cuda.cu by the nb_*_kfunc_ptr function pointer table
+ * should match the order of enumerated types below.
+ */
+enum evdwCu {
+ evdwCuCUT, evdwCuFSWITCH, evdwCuPSWITCH, evdwCuEWALDGEOM, evdwCuEWALDLB, evdwCuNR
};
-#define NBNXN_KVER_OLD(k) (k == eNbnxnCuKOld)
-#define NBNXN_KVER_LEGACY(k) (k == eNbnxnCuKLegacy)
-#define NBNXN_KVER_DEFAULT(k) (k == eNbnxnCuKDefault)
-
-/* Non-bonded kernel versions. */
-
-/* All structs prefixed with "cu_" hold data used in GPU calculations and
- * are passed to the kernels, except cu_timers_t. */
+/* All structs prefixed with "cu_" hold data used in GPU calculations and
+ * are passed to the kernels, except cu_timers_t. */
+/*! \cond */
typedef struct cu_plist cu_plist_t;
typedef struct cu_atomdata cu_atomdata_t;
typedef struct cu_nbparam cu_nbparam_t;
typedef struct cu_timers cu_timers_t;
typedef struct nb_staging nb_staging_t;
+/*! \endcond */
-/** Staging area for temporary data. The energies get downloaded here first,
- * before getting added to the CPU-side aggregate values.
+/** \internal
+ * \brief Staging area for temporary data downloaded from the GPU.
+ *
+ * The energies/shift forces get downloaded here first, before getting added
+ * to the CPU-side aggregate values.
*/
struct nb_staging
{
float3 *fshift; /**< shift forces */
};
-/** Nonbonded atom data -- both inputs and outputs. */
+/** \internal
+ * \brief Nonbonded atom data - both inputs and outputs.
+ */
struct cu_atomdata
{
int natoms; /**< number of atoms */
float4 *xq; /**< atom coordinates + charges, size natoms */
float3 *f; /**< force output array, size natoms */
- /* TODO: try float2 for the energies */
- float *e_lj, /**< LJ energy output, size 1 */
- *e_el; /**< Electrostatics energy input, size 1 */
+
+ float *e_lj; /**< LJ energy output, size 1 */
+ float *e_el; /**< Electrostatics energy input, size 1 */
float3 *fshift; /**< shift forces */
bool bShiftVecUploaded; /**< true if the shift vector has been uploaded */
};
-/** Parameters required for the CUDA nonbonded calculations. */
+/** \internal
+ * \brief Parameters required for the CUDA nonbonded calculations.
+ */
struct cu_nbparam
{
- int eeltype; /**< type of electrostatics */
-
- float epsfac; /**< charge multiplication factor */
- float c_rf, /**< Reaction-field/plain cutoff electrostatics const. */
- two_k_rf; /**< Reaction-field electrostatics constant */
- float ewald_beta; /**< Ewald/PME parameter */
- float sh_ewald; /**< Ewald/PME correction term */
- float rvdw_sq; /**< VdW cut-off */
- float rcoulomb_sq; /**< Coulomb cut-off */
- float rlist_sq; /**< pair-list cut-off */
- float sh_invrc6; /**< LJ potential correction term */
-
- float *nbfp; /**< nonbonded parameter table with C6/C12 pairs */
-
- /* Ewald Coulomb force table data */
- int coulomb_tab_size; /**< table size (s.t. it fits in texture cache) */
- float coulomb_tab_scale; /**< table scale/spacing */
- float *coulomb_tab; /**< pointer to the table in the device memory */
+
+ int eeltype; /**< type of electrostatics, takes values from #eelCu */
+ int vdwtype; /**< type of VdW impl., takes values from #evdwCu */
+
+ float epsfac; /**< charge multiplication factor */
+ float c_rf; /**< Reaction-field/plain cutoff electrostatics const. */
+ float two_k_rf; /**< Reaction-field electrostatics constant */
+ float ewald_beta; /**< Ewald/PME parameter */
+ float sh_ewald; /**< Ewald/PME correction term substracted from the direct-space potential */
+ float sh_lj_ewald; /**< LJ-Ewald/PME correction term added to the correction potential */
+ float ewaldcoeff_lj; /**< LJ-Ewald/PME coefficient */
+
+ float rcoulomb_sq; /**< Coulomb cut-off squared */
+
+ float rvdw_sq; /**< VdW cut-off squared */
+ float rvdw_switch; /**< VdW switched cut-off */
+ float rlist_sq; /**< pair-list cut-off squared */
+
+ shift_consts_t dispersion_shift; /**< VdW shift dispersion constants */
+ shift_consts_t repulsion_shift; /**< VdW shift repulsion constants */
+ switch_consts_t vdw_switch; /**< VdW switch constants */
+
+ /* LJ non-bonded parameters - accessed through texture memory */
+ float *nbfp; /**< nonbonded parameter table with C6/C12 pairs per atom type-pair, 2*ntype^2 elements */
+ cudaTextureObject_t nbfp_texobj; /**< texture object bound to nbfp */
+ float *nbfp_comb; /**< nonbonded parameter table per atom type, 2*ntype elements */
+ cudaTextureObject_t nbfp_comb_texobj; /**< texture object bound to nbfp_texobj */
+
+ /* Ewald Coulomb force table data - accessed through texture memory */
+ int coulomb_tab_size; /**< table size (s.t. it fits in texture cache) */
+ float coulomb_tab_scale; /**< table scale/spacing */
+ float *coulomb_tab; /**< pointer to the table in the device memory */
+ cudaTextureObject_t coulomb_tab_texobj; /**< texture object bound to coulomb_tab */
};
-/** Pair list data */
+/** \internal
+ * \brief Pair list data.
+ */
struct cu_plist
{
int na_c; /**< number of atoms per cluster */
done during the current step */
};
-/** CUDA events used for timing GPU kernels and H2D/D2H transfers.
+/** \internal
+ * \brief CUDA events used for timing GPU kernels and H2D/D2H transfers.
+ *
* The two-sized arrays hold the local and non-local values and should always
* be indexed with eintLocal/eintNonlocal.
*/
cudaEvent_t stop_nb_k[2]; /**< stop event non-bonded kernels (l/nl, every step) */
};
-/** Main data structure for CUDA nonbonded force calculations. */
+/** \internal
+ * \brief Main data structure for CUDA nonbonded force calculations.
+ */
struct nbnxn_cuda
{
cuda_dev_info_t *dev_info; /**< CUDA device information */
- int kernel_ver; /**< The version of the kernel to be executed on the
- device in use, possible values: eNbnxnCuK* */
bool bUseTwoStreams; /**< true if doing both local/non-local NB work on GPU */
bool bUseStreamSync; /**< true if the standard cudaStreamSynchronize is used
and not memory polling-based waiting */
cudaStream_t stream[2]; /**< local and non-local GPU streams */
/** events used for synchronization */
- cudaEvent_t nonlocal_done; /**< event triggered when the non-local non-bonded kernel
- is done (and the local transfer can proceed) */
- cudaEvent_t misc_ops_done; /**< event triggered when the operations that precede the
- main force calculations are done (e.g. buffer 0-ing) */
+ cudaEvent_t nonlocal_done; /**< event triggered when the non-local non-bonded kernel
+ is done (and the local transfer can proceed) */
+ cudaEvent_t misc_ops_done; /**< event triggered when the operations that precede the
+ main force calculations are done (e.g. buffer 0-ing) */
/* NOTE: With current CUDA versions (<=5.0) timing doesn't work with multiple
* concurrent streams, so we won't time if both l/nl work is done on GPUs.