/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
#include <stdlib.h>
#include <assert.h>
#define USE_TEXOBJ
#endif
-/*! Texture reference for nonbonded parameters; bound to cu_nbparam_t.nbfp*/
+/*! Texture reference for LJ C6/C12 parameters; bound to cu_nbparam_t.nbfp */
texture<float, 1, cudaReadModeElementType> nbfp_texref;
+/*! Texture reference for LJ-PME parameters; bound to cu_nbparam_t.nbfp_comb */
+texture<float, 1, cudaReadModeElementType> nbfp_comb_texref;
+
/*! Texture reference for Ewald coulomb force table; bound to cu_nbparam_t.coulomb_tab */
texture<float, 1, cudaReadModeElementType> coulomb_tab_texref;
/* Constant arrays listing all kernel function pointers and enabling selection
of a kernel in an elegant manner. */
-static const int nEnergyKernelTypes = 2; /* 0 - no energy, 1 - energy */
-static const int nPruneKernelTypes = 2; /* 0 - no prune, 1 - prune */
-
-/*! Pointers to the default kernels organized in a 3 dim array by:
- * electrostatics type, energy calculation on/off, and pruning on/off.
+/*! Pointers to the non-bonded kernels organized in 2-dim arrays by:
+ * electrostatics and VDW type.
*
- * Note that the order of electrostatics (1st dimension) has to match the
- * order of corresponding enumerated types defined in nbnxn_cuda_types.h.
+ * Note that the row- and column-order of function pointers has to match the
+ * order of corresponding enumerated electrostatics and vdw types, resp.,
+ * defined in nbnxn_cuda_types.h.
*/
-static const nbnxn_cu_kfunc_ptr_t
- nb_default_kfunc_ptr[eelCuNR][nEnergyKernelTypes][nPruneKernelTypes] =
+
+/*! Force-only kernel function pointers. */
+static const nbnxn_cu_kfunc_ptr_t nb_kfunc_noener_noprune_ptr[eelCuNR][evdwCuNR] =
+{
+ { nbnxn_kernel_ElecCut_VdwLJ_F_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_F_cuda, nbnxn_kernel_ElecCut_VdwLJPsw_F_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_F_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombLB_F_cuda },
+ { nbnxn_kernel_ElecRF_VdwLJ_F_cuda, nbnxn_kernel_ElecRF_VdwLJFsw_F_cuda, nbnxn_kernel_ElecRF_VdwLJPsw_F_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombLB_F_cuda },
+ { nbnxn_kernel_ElecEwQSTab_VdwLJ_F_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJFsw_F_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJPsw_F_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombGeom_F_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombLB_F_cuda },
+ { nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJ_F_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJFsw_F_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJPsw_F_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombGeom_F_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombLB_F_cuda },
+ { nbnxn_kernel_ElecEw_VdwLJ_F_cuda, nbnxn_kernel_ElecEw_VdwLJFsw_F_cuda, nbnxn_kernel_ElecEw_VdwLJPsw_F_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombGeom_F_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombLB_F_cuda },
+ { nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJFsw_F_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJPsw_F_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombLB_F_cuda }
+};
+
+/*! Force + energy kernel function pointers. */
+static const nbnxn_cu_kfunc_ptr_t nb_kfunc_ener_noprune_ptr[eelCuNR][evdwCuNR] =
+{
+ { nbnxn_kernel_ElecCut_VdwLJ_VF_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_VF_cuda, nbnxn_kernel_ElecCut_VdwLJPsw_VF_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_VF_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombLB_VF_cuda },
+ { nbnxn_kernel_ElecRF_VdwLJ_VF_cuda, nbnxn_kernel_ElecRF_VdwLJFsw_VF_cuda, nbnxn_kernel_ElecRF_VdwLJPsw_VF_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombLB_VF_cuda },
+ { nbnxn_kernel_ElecEwQSTab_VdwLJ_VF_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJFsw_VF_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJPsw_VF_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombGeom_VF_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombLB_VF_cuda },
+ { nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJ_VF_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJFsw_VF_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJPsw_VF_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombGeom_VF_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombLB_VF_cuda },
+ { nbnxn_kernel_ElecEw_VdwLJ_VF_cuda, nbnxn_kernel_ElecEw_VdwLJFsw_VF_cuda, nbnxn_kernel_ElecEw_VdwLJPsw_VF_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VF_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombLB_VF_cuda },
+ { nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJFsw_VF_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJPsw_VF_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VF_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombLB_VF_cuda }
+};
+
+/*! Force + pruning kernel function pointers. */
+static const nbnxn_cu_kfunc_ptr_t nb_kfunc_noener_prune_ptr[eelCuNR][evdwCuNR] =
{
- { { k_nbnxn_cutoff, k_nbnxn_cutoff_prune },
- { k_nbnxn_cutoff_ener, k_nbnxn_cutoff_ener_prune } },
- { { k_nbnxn_rf, k_nbnxn_rf_prune },
- { k_nbnxn_rf_ener, k_nbnxn_rf_ener_prune } },
- { { k_nbnxn_ewald_tab, k_nbnxn_ewald_tab_prune },
- { k_nbnxn_ewald_tab_ener, k_nbnxn_ewald_tab_ener_prune } },
- { { k_nbnxn_ewald_tab_twin, k_nbnxn_ewald_tab_twin_prune },
- { k_nbnxn_ewald_tab_twin_ener, k_nbnxn_ewald_twin_ener_prune } },
- { { k_nbnxn_ewald, k_nbnxn_ewald_prune },
- { k_nbnxn_ewald_ener, k_nbnxn_ewald_ener_prune } },
- { { k_nbnxn_ewald_twin, k_nbnxn_ewald_twin_prune },
- { k_nbnxn_ewald_twin_ener, k_nbnxn_ewald_twin_ener_prune } },
+ { nbnxn_kernel_ElecCut_VdwLJ_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJPsw_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombLB_F_prune_cuda },
+ { nbnxn_kernel_ElecRF_VdwLJ_F_prune_cuda, nbnxn_kernel_ElecRF_VdwLJFsw_F_prune_cuda, nbnxn_kernel_ElecRF_VdwLJPsw_F_prune_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_prune_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombLB_F_prune_cuda },
+ { nbnxn_kernel_ElecEwQSTab_VdwLJ_F_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJFsw_F_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJPsw_F_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombGeom_F_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombLB_F_prune_cuda },
+ { nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJ_F_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJFsw_F_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJPsw_F_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombGeom_F_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombLB_F_prune_cuda },
+ { nbnxn_kernel_ElecEw_VdwLJ_F_prune_cuda, nbnxn_kernel_ElecEw_VdwLJFsw_F_prune_cuda, nbnxn_kernel_ElecEw_VdwLJPsw_F_prune_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombGeom_F_prune_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombLB_F_prune_cuda },
+ { nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJFsw_F_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJPsw_F_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombLB_F_prune_cuda }
+};
+
+/*! Force + energy + pruning kernel function pointers. */
+static const nbnxn_cu_kfunc_ptr_t nb_kfunc_ener_prune_ptr[eelCuNR][evdwCuNR] =
+{
+ { nbnxn_kernel_ElecCut_VdwLJ_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJPsw_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombLB_VF_prune_cuda },
+ { nbnxn_kernel_ElecRF_VdwLJ_VF_prune_cuda, nbnxn_kernel_ElecRF_VdwLJFsw_VF_prune_cuda, nbnxn_kernel_ElecRF_VdwLJPsw_VF_prune_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_prune_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombLB_VF_prune_cuda },
+ { nbnxn_kernel_ElecEwQSTab_VdwLJ_VF_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJFsw_VF_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJPsw_VF_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombGeom_VF_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombLB_VF_prune_cuda },
+ { nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJ_VF_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJFsw_VF_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJPsw_VF_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombGeom_VF_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombLB_VF_prune_cuda },
+ { nbnxn_kernel_ElecEw_VdwLJ_VF_prune_cuda, nbnxn_kernel_ElecEw_VdwLJFsw_VF_prune_cuda, nbnxn_kernel_ElecEw_VdwLJPsw_VF_prune_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VF_prune_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombLB_VF_prune_cuda },
+ { nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJFsw_VF_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJPsw_VF_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VF_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombLB_VF_prune_cuda }
};
/*! Return a pointer to the kernel version to be executed at the current step. */
static inline nbnxn_cu_kfunc_ptr_t select_nbnxn_kernel(int eeltype,
+ int evdwtype,
bool bDoEne,
bool bDoPrune)
{
+ nbnxn_cu_kfunc_ptr_t res;
+
assert(eeltype < eelCuNR);
+ assert(evdwtype < evdwCuNR);
+
+ if (bDoEne)
+ {
+ if (bDoPrune)
+ {
+ res = nb_kfunc_ener_prune_ptr[eeltype][evdwtype];
+ }
+ else
+ {
+ res = nb_kfunc_ener_noprune_ptr[eeltype][evdwtype];
+ }
+ }
+ else
+ {
+ if (bDoPrune)
+ {
+ res = nb_kfunc_noener_prune_ptr[eeltype][evdwtype];
+ }
+ else
+ {
+ res = nb_kfunc_noener_noprune_ptr[eeltype][evdwtype];
+ }
+ }
- return nb_default_kfunc_ptr[eeltype][bDoEne][bDoPrune];
+ return res;
}
/*! Calculates the amount of shared memory required by the CUDA kernel in use. */
}
/* get the pointer to the kernel flavor we need to use */
- nb_kernel = select_nbnxn_kernel(nbp->eeltype, bCalcEner,
+ nb_kernel = select_nbnxn_kernel(nbp->eeltype,
+ nbp->vdwtype,
+ bCalcEner,
plist->bDoPrune || always_prune);
/* kernel launch config */
return nbfp_texref;
}
+/*! Return the reference to the nbfp_comb texture. */
+const struct texture<float, 1, cudaReadModeElementType> &nbnxn_cuda_get_nbfp_comb_texref()
+{
+ return nbfp_comb_texref;
+}
+
/*! Return the reference to the coulomb_tab. */
const struct texture<float, 1, cudaReadModeElementType> &nbnxn_cuda_get_coulomb_tab_texref()
{
for (int i = 0; i < eelCuNR; i++)
{
- for (int j = 0; j < nEnergyKernelTypes; j++)
+ for (int j = 0; j < evdwCuNR; j++)
{
- for (int k = 0; k < nPruneKernelTypes; k++)
+ if (devinfo->prop.major >= 3)
+ {
+ /* Default kernel on sm 3.x 48/16 kB Shared/L1 */
+ stat = cudaFuncSetCacheConfig(nb_kfunc_ener_prune_ptr[i][j], cudaFuncCachePreferShared);
+ stat = cudaFuncSetCacheConfig(nb_kfunc_ener_noprune_ptr[i][j], cudaFuncCachePreferShared);
+ stat = cudaFuncSetCacheConfig(nb_kfunc_noener_prune_ptr[i][j], cudaFuncCachePreferShared);
+ stat = cudaFuncSetCacheConfig(nb_kfunc_noener_noprune_ptr[i][j], cudaFuncCachePreferShared);
+ }
+ else
{
- if (devinfo->prop.major >= 3)
- {
- /* Default kernel on sm 3.x 48/16 kB Shared/L1 */
- stat = cudaFuncSetCacheConfig(nb_default_kfunc_ptr[i][j][k], cudaFuncCachePreferShared);
- }
- else
- {
- /* On Fermi prefer L1 gives 2% higher performance */
- /* Default kernel on sm_2.x 16/48 kB Shared/L1 */
- stat = cudaFuncSetCacheConfig(nb_default_kfunc_ptr[i][j][k], cudaFuncCachePreferL1);
- }
- CU_RET_ERR(stat, "cudaFuncSetCacheConfig failed");
+ /* On Fermi prefer L1 gives 2% higher performance */
+ /* Default kernel on sm_2.x 16/48 kB Shared/L1 */
+ stat = cudaFuncSetCacheConfig(nb_kfunc_ener_prune_ptr[i][j], cudaFuncCachePreferL1);
+ stat = cudaFuncSetCacheConfig(nb_kfunc_ener_noprune_ptr[i][j], cudaFuncCachePreferL1);
+ stat = cudaFuncSetCacheConfig(nb_kfunc_noener_prune_ptr[i][j], cudaFuncCachePreferL1);
+ stat = cudaFuncSetCacheConfig(nb_kfunc_noener_noprune_ptr[i][j], cudaFuncCachePreferL1);
}
+ CU_RET_ERR(stat, "cudaFuncSetCacheConfig failed");
}
}
}