{ k_nbnxn_ewald_twin_ener, k_nbnxn_ewald_twin_ener_prune } },
};
-/*! Pointers to the legacy kernels organized in a 3 dim array by:
- * electrostatics type, energy calculation on/off, and pruning on/off.
- *
- * Note that the order of electrostatics (1st dimension) has to match the
- * order of corresponding enumerated types defined in nbnxn_cuda_types.h.
- */
-static const nbnxn_cu_kfunc_ptr_t
-nb_legacy_kfunc_ptr[eelCuNR][nEnergyKernelTypes][nPruneKernelTypes] =
-{
- { { k_nbnxn_cutoff_legacy, k_nbnxn_cutoff_prune_legacy },
- { k_nbnxn_cutoff_ener_legacy, k_nbnxn_cutoff_ener_prune_legacy } },
- { { k_nbnxn_rf_legacy, k_nbnxn_rf_prune_legacy },
- { k_nbnxn_rf_ener_legacy, k_nbnxn_rf_ener_prune_legacy } },
- { { k_nbnxn_ewald_tab_legacy, k_nbnxn_ewald_tab_prune_legacy },
- { k_nbnxn_ewald_tab_ener_legacy, k_nbnxn_ewald_tab_ener_prune_legacy } },
- { { k_nbnxn_ewald_tab_twin_legacy, k_nbnxn_ewald_tab_twin_prune_legacy },
- { k_nbnxn_ewald_tab_twin_ener_legacy, k_nbnxn_ewald_tab_twin_ener_prune_legacy } },
-};
-
/*! Return a pointer to the kernel version to be executed at the current step. */
-static inline nbnxn_cu_kfunc_ptr_t select_nbnxn_kernel(int kver, int eeltype,
- bool bDoEne, bool bDoPrune)
+static inline nbnxn_cu_kfunc_ptr_t select_nbnxn_kernel(int eeltype,
+ bool bDoEne,
+ bool bDoPrune)
{
- assert(kver < eNbnxnCuKNR);
assert(eeltype < eelCuNR);
- if (NBNXN_KVER_LEGACY(kver))
- {
- /* no analytical Ewald with legacy kernels */
- assert(eeltype <= eelCuEWALD_TAB_TWIN);
-
- return nb_legacy_kfunc_ptr[eeltype][bDoEne][bDoPrune];
- }
- else
- {
- return nb_default_kfunc_ptr[eeltype][bDoEne][bDoPrune];
- }
+ return nb_default_kfunc_ptr[eeltype][bDoEne][bDoPrune];
}
-/*! Calculates the amount of shared memory required for kernel version in use. */
-static inline int calc_shmem_required(int kver)
+/*! Calculates the amount of shared memory required by the CUDA kernel in use. */
+static inline int calc_shmem_required()
{
int shmem;
/* size of shmem (force-buffers/xq/atom type preloading) */
- if (NBNXN_KVER_LEGACY(kver))
- {
- /* i-atom x+q in shared memory */
- shmem = NCL_PER_SUPERCL * CL_SIZE * sizeof(float4);
- /* force reduction buffers in shared memory */
- shmem += CL_SIZE * CL_SIZE * 3 * sizeof(float);
- }
- else
- {
- /* NOTE: with the default kernel on sm3.0 we need shmem only for pre-loading */
- /* i-atom x+q in shared memory */
- shmem = NCL_PER_SUPERCL * CL_SIZE * sizeof(float4);
- /* cj in shared memory, for both warps separately */
- shmem += 2 * NBNXN_GPU_JGROUP_SIZE * sizeof(int);
+ /* NOTE: with the default kernel on sm3.0 we need shmem only for pre-loading */
+ /* i-atom x+q in shared memory */
+ shmem = NCL_PER_SUPERCL * CL_SIZE * sizeof(float4);
+ /* cj in shared memory, for both warps separately */
+ shmem += 2 * NBNXN_GPU_JGROUP_SIZE * sizeof(int);
#ifdef IATYPE_SHMEM
- /* i-atom types in shared memory */
- shmem += NCL_PER_SUPERCL * CL_SIZE * sizeof(int);
+ /* i-atom types in shared memory */
+ shmem += NCL_PER_SUPERCL * CL_SIZE * sizeof(int);
#endif
#if __CUDA_ARCH__ < 300
- /* force reduction buffers in shared memory */
- shmem += CL_SIZE * CL_SIZE * 3 * sizeof(float);
+ /* force reduction buffers in shared memory */
+ shmem += CL_SIZE * CL_SIZE * 3 * sizeof(float);
#endif
- }
return shmem;
}
}
/* get the pointer to the kernel flavor we need to use */
- nb_kernel = select_nbnxn_kernel(cu_nb->kernel_ver, nbp->eeltype, bCalcEner,
+ nb_kernel = select_nbnxn_kernel(nbp->eeltype, bCalcEner,
plist->bDoPrune || always_prune);
/* kernel launch config */
nblock = calc_nb_kernel_nblock(plist->nsci, cu_nb->dev_info);
dim_block = dim3(CL_SIZE, CL_SIZE, 1);
dim_grid = dim3(nblock, 1, 1);
- shmem = calc_shmem_required(cu_nb->kernel_ver);
+ shmem = calc_shmem_required();
if (debug)
{
{
for (int k = 0; k < nPruneKernelTypes; k++)
{
- /* Legacy kernel 16/48 kB Shared/L1
- * No analytical Ewald!
- */
- if (i != eelCuEWALD_ANA && i != eelCuEWALD_ANA_TWIN)
- {
- stat = cudaFuncSetCacheConfig(nb_legacy_kfunc_ptr[i][j][k], cudaFuncCachePreferL1);
- CU_RET_ERR(stat, "cudaFuncSetCacheConfig failed");
- }
-
if (devinfo->prop.major >= 3)
{
/* Default kernel on sm 3.x 48/16 kB Shared/L1 */