used in initializing domain decomposition communicators. Rank reordering
is default, but can be switched off with this environment variable.
+``GMX_NO_LJ_COMB_RULE``
+ force the use of LJ paremeter lookup instead of using combination rules
+ in the non-bonded kernels.
+
``GMX_NO_CUDA_STREAMSYNC``
the opposite of ``GMX_CUDA_STREAMSYNC``. Disables the use of the
standard cudaStreamSynchronize-based GPU waiting to improve performance when using CUDA driver API
bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
- if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
+ if (fr->vdwtype == evdwCUT &&
+ (fr->vdw_modifier == eintmodNONE ||
+ fr->vdw_modifier == eintmodPOTSHIFT) &&
+ getenv("GMX_NO_LJ_COMB_RULE") == NULL)
{
/* Plain LJ cut-off: we can optimize with combination rules */
enbnxninitcombrule = enbnxninitcombruleDETECT;
ncz*grid->na_sc,
nbat->lj_comb + ash*2);
}
+ else if (nbat->XFormat == nbatXYZQ)
+ {
+ copy_lj_to_nbat_lj_comb<1>(nbat->nbfp_comb,
+ nbat->type + ash,
+ ncz*grid->na_sc,
+ nbat->lj_comb + ash*2);
+ }
}
}
}
/*! Force-only kernel function pointers. */
static const nbnxn_cu_kfunc_ptr_t nb_kfunc_noener_noprune_ptr[eelCuNR][evdwCuNR] =
{
- { nbnxn_kernel_ElecCut_VdwLJ_F_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_F_cuda, nbnxn_kernel_ElecCut_VdwLJPsw_F_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_F_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombLB_F_cuda },
- { nbnxn_kernel_ElecRF_VdwLJ_F_cuda, nbnxn_kernel_ElecRF_VdwLJFsw_F_cuda, nbnxn_kernel_ElecRF_VdwLJPsw_F_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombLB_F_cuda },
- { nbnxn_kernel_ElecEwQSTab_VdwLJ_F_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJFsw_F_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJPsw_F_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombGeom_F_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombLB_F_cuda },
- { nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJ_F_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJFsw_F_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJPsw_F_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombGeom_F_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombLB_F_cuda },
- { nbnxn_kernel_ElecEw_VdwLJ_F_cuda, nbnxn_kernel_ElecEw_VdwLJFsw_F_cuda, nbnxn_kernel_ElecEw_VdwLJPsw_F_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombGeom_F_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombLB_F_cuda },
- { nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJFsw_F_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJPsw_F_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombLB_F_cuda }
+ { nbnxn_kernel_ElecCut_VdwLJ_F_cuda, nbnxn_kernel_ElecCut_VdwLJCombGeom_F_cuda, nbnxn_kernel_ElecCut_VdwLJCombLB_F_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_F_cuda, nbnxn_kernel_ElecCut_VdwLJPsw_F_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_F_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombLB_F_cuda },
+ { nbnxn_kernel_ElecRF_VdwLJ_F_cuda, nbnxn_kernel_ElecRF_VdwLJCombGeom_F_cuda, nbnxn_kernel_ElecRF_VdwLJCombLB_F_cuda, nbnxn_kernel_ElecRF_VdwLJFsw_F_cuda, nbnxn_kernel_ElecRF_VdwLJPsw_F_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombLB_F_cuda },
+ { nbnxn_kernel_ElecEwQSTab_VdwLJ_F_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJCombGeom_F_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJCombLB_F_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJFsw_F_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJPsw_F_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombGeom_F_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombLB_F_cuda },
+ { nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJ_F_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJCombGeom_F_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJCombLB_F_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJFsw_F_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJPsw_F_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombGeom_F_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombLB_F_cuda },
+ { nbnxn_kernel_ElecEw_VdwLJ_F_cuda, nbnxn_kernel_ElecEw_VdwLJCombGeom_F_cuda, nbnxn_kernel_ElecEw_VdwLJCombLB_F_cuda, nbnxn_kernel_ElecEw_VdwLJFsw_F_cuda, nbnxn_kernel_ElecEw_VdwLJPsw_F_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombGeom_F_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombLB_F_cuda },
+ { nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_F_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJFsw_F_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJPsw_F_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombLB_F_cuda }
};
/*! Force + energy kernel function pointers. */
static const nbnxn_cu_kfunc_ptr_t nb_kfunc_ener_noprune_ptr[eelCuNR][evdwCuNR] =
{
- { nbnxn_kernel_ElecCut_VdwLJ_VF_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_VF_cuda, nbnxn_kernel_ElecCut_VdwLJPsw_VF_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_VF_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombLB_VF_cuda },
- { nbnxn_kernel_ElecRF_VdwLJ_VF_cuda, nbnxn_kernel_ElecRF_VdwLJFsw_VF_cuda, nbnxn_kernel_ElecRF_VdwLJPsw_VF_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombLB_VF_cuda },
- { nbnxn_kernel_ElecEwQSTab_VdwLJ_VF_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJFsw_VF_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJPsw_VF_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombGeom_VF_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombLB_VF_cuda },
- { nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJ_VF_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJFsw_VF_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJPsw_VF_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombGeom_VF_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombLB_VF_cuda },
- { nbnxn_kernel_ElecEw_VdwLJ_VF_cuda, nbnxn_kernel_ElecEw_VdwLJFsw_VF_cuda, nbnxn_kernel_ElecEw_VdwLJPsw_VF_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VF_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombLB_VF_cuda },
- { nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJFsw_VF_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJPsw_VF_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VF_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombLB_VF_cuda }
+ { nbnxn_kernel_ElecCut_VdwLJ_VF_cuda, nbnxn_kernel_ElecCut_VdwLJCombGeom_VF_cuda, nbnxn_kernel_ElecCut_VdwLJCombLB_VF_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_VF_cuda, nbnxn_kernel_ElecCut_VdwLJPsw_VF_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_VF_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombLB_VF_cuda },
+ { nbnxn_kernel_ElecRF_VdwLJ_VF_cuda, nbnxn_kernel_ElecRF_VdwLJCombGeom_VF_cuda, nbnxn_kernel_ElecRF_VdwLJCombLB_VF_cuda, nbnxn_kernel_ElecRF_VdwLJFsw_VF_cuda, nbnxn_kernel_ElecRF_VdwLJPsw_VF_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombLB_VF_cuda },
+ { nbnxn_kernel_ElecEwQSTab_VdwLJ_VF_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJCombGeom_VF_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJCombLB_VF_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJFsw_VF_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJPsw_VF_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombGeom_VF_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombLB_VF_cuda },
+ { nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJ_VF_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJCombGeom_VF_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJCombLB_VF_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJFsw_VF_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJPsw_VF_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombGeom_VF_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombLB_VF_cuda },
+ { nbnxn_kernel_ElecEw_VdwLJ_VF_cuda, nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_cuda, nbnxn_kernel_ElecEw_VdwLJCombLB_VF_cuda, nbnxn_kernel_ElecEw_VdwLJFsw_VF_cuda, nbnxn_kernel_ElecEw_VdwLJPsw_VF_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VF_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombLB_VF_cuda },
+ { nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VF_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJFsw_VF_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJPsw_VF_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VF_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombLB_VF_cuda }
};
/*! Force + pruning kernel function pointers. */
static const nbnxn_cu_kfunc_ptr_t nb_kfunc_noener_prune_ptr[eelCuNR][evdwCuNR] =
{
- { nbnxn_kernel_ElecCut_VdwLJ_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJPsw_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombLB_F_prune_cuda },
- { nbnxn_kernel_ElecRF_VdwLJ_F_prune_cuda, nbnxn_kernel_ElecRF_VdwLJFsw_F_prune_cuda, nbnxn_kernel_ElecRF_VdwLJPsw_F_prune_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_prune_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombLB_F_prune_cuda },
- { nbnxn_kernel_ElecEwQSTab_VdwLJ_F_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJFsw_F_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJPsw_F_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombGeom_F_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombLB_F_prune_cuda },
- { nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJ_F_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJFsw_F_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJPsw_F_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombGeom_F_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombLB_F_prune_cuda },
- { nbnxn_kernel_ElecEw_VdwLJ_F_prune_cuda, nbnxn_kernel_ElecEw_VdwLJFsw_F_prune_cuda, nbnxn_kernel_ElecEw_VdwLJPsw_F_prune_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombGeom_F_prune_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombLB_F_prune_cuda },
- { nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJFsw_F_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJPsw_F_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombLB_F_prune_cuda }
+ { nbnxn_kernel_ElecCut_VdwLJ_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJCombGeom_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJCombLB_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJPsw_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombLB_F_prune_cuda },
+ { nbnxn_kernel_ElecRF_VdwLJ_F_prune_cuda, nbnxn_kernel_ElecRF_VdwLJCombGeom_F_prune_cuda, nbnxn_kernel_ElecRF_VdwLJCombLB_F_prune_cuda, nbnxn_kernel_ElecRF_VdwLJFsw_F_prune_cuda, nbnxn_kernel_ElecRF_VdwLJPsw_F_prune_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_prune_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombLB_F_prune_cuda },
+ { nbnxn_kernel_ElecEwQSTab_VdwLJ_F_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJCombGeom_F_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJCombLB_F_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJFsw_F_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJPsw_F_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombGeom_F_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombLB_F_prune_cuda },
+ { nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJ_F_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJCombGeom_F_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJCombLB_F_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJFsw_F_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJPsw_F_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombGeom_F_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombLB_F_prune_cuda },
+ { nbnxn_kernel_ElecEw_VdwLJ_F_prune_cuda, nbnxn_kernel_ElecEw_VdwLJCombGeom_F_prune_cuda, nbnxn_kernel_ElecEw_VdwLJCombLB_F_prune_cuda, nbnxn_kernel_ElecEw_VdwLJFsw_F_prune_cuda, nbnxn_kernel_ElecEw_VdwLJPsw_F_prune_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombGeom_F_prune_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombLB_F_prune_cuda },
+ { nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_F_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJFsw_F_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJPsw_F_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombLB_F_prune_cuda }
};
/*! Force + energy + pruning kernel function pointers. */
static const nbnxn_cu_kfunc_ptr_t nb_kfunc_ener_prune_ptr[eelCuNR][evdwCuNR] =
{
- { nbnxn_kernel_ElecCut_VdwLJ_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJPsw_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombLB_VF_prune_cuda },
- { nbnxn_kernel_ElecRF_VdwLJ_VF_prune_cuda, nbnxn_kernel_ElecRF_VdwLJFsw_VF_prune_cuda, nbnxn_kernel_ElecRF_VdwLJPsw_VF_prune_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_prune_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombLB_VF_prune_cuda },
- { nbnxn_kernel_ElecEwQSTab_VdwLJ_VF_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJFsw_VF_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJPsw_VF_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombGeom_VF_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombLB_VF_prune_cuda },
- { nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJ_VF_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJFsw_VF_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJPsw_VF_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombGeom_VF_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombLB_VF_prune_cuda },
- { nbnxn_kernel_ElecEw_VdwLJ_VF_prune_cuda, nbnxn_kernel_ElecEw_VdwLJFsw_VF_prune_cuda, nbnxn_kernel_ElecEw_VdwLJPsw_VF_prune_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VF_prune_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombLB_VF_prune_cuda },
- { nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJFsw_VF_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJPsw_VF_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VF_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombLB_VF_prune_cuda }
+ { nbnxn_kernel_ElecCut_VdwLJ_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJCombGeom_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJCombLB_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJPsw_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombLB_VF_prune_cuda },
+ { nbnxn_kernel_ElecRF_VdwLJ_VF_prune_cuda, nbnxn_kernel_ElecRF_VdwLJCombGeom_VF_prune_cuda, nbnxn_kernel_ElecRF_VdwLJCombLB_VF_prune_cuda, nbnxn_kernel_ElecRF_VdwLJFsw_VF_prune_cuda, nbnxn_kernel_ElecRF_VdwLJPsw_VF_prune_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_prune_cuda, nbnxn_kernel_ElecRF_VdwLJEwCombLB_VF_prune_cuda },
+ { nbnxn_kernel_ElecEwQSTab_VdwLJ_VF_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJCombGeom_VF_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJCombLB_VF_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJFsw_VF_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJPsw_VF_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombGeom_VF_prune_cuda, nbnxn_kernel_ElecEwQSTab_VdwLJEwCombLB_VF_prune_cuda },
+ { nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJ_VF_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJCombGeom_VF_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJCombLB_VF_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJFsw_VF_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJPsw_VF_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombGeom_VF_prune_cuda, nbnxn_kernel_ElecEwQSTabTwinCut_VdwLJEwCombLB_VF_prune_cuda },
+ { nbnxn_kernel_ElecEw_VdwLJ_VF_prune_cuda, nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_prune_cuda, nbnxn_kernel_ElecEw_VdwLJCombLB_VF_prune_cuda, nbnxn_kernel_ElecEw_VdwLJFsw_VF_prune_cuda, nbnxn_kernel_ElecEw_VdwLJPsw_VF_prune_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VF_prune_cuda, nbnxn_kernel_ElecEw_VdwLJEwCombLB_VF_prune_cuda },
+ { nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VF_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJFsw_VF_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJPsw_VF_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VF_prune_cuda, nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombLB_VF_prune_cuda }
};
/*! Return a pointer to the kernel version to be executed at the current step. */
}
/*! Calculates the amount of shared memory required by the CUDA kernel in use. */
-static inline int calc_shmem_required(const int num_threads_z, gmx_device_info_t gmx_unused *dinfo)
+static inline int calc_shmem_required(const int num_threads_z, gmx_device_info_t gmx_unused *dinfo, const cu_nbparam_t *nbp)
{
int shmem;
shmem += num_threads_z * 2 * c_nbnxnGpuJgroupSize * sizeof(int);
if (dinfo->prop.major >= 3)
{
- /* i-atom types in shared memory */
- shmem += c_numClPerSupercl * c_clSize * sizeof(int);
+ if (nbp->vdwtype == evdwCuCUTCOMBGEOM ||
+ nbp->vdwtype == evdwCuCUTCOMBLB)
+ {
+ /* i-atom LJ combination parameters in shared memory */
+ shmem += c_numClPerSupercl * c_clSize * sizeof(float2);
+ }
+ else
+ {
+ /* i-atom types in shared memory */
+ shmem += c_numClPerSupercl * c_clSize * sizeof(int);
+ }
}
if (dinfo->prop.major < 3)
{
nblock = calc_nb_kernel_nblock(plist->nsci, nb->dev_info);
dim_block = dim3(c_clSize, c_clSize, num_threads_z);
dim_grid = dim3(nblock, 1, 1);
- shmem = calc_shmem_required(num_threads_z, nb->dev_info);
+ shmem = calc_shmem_required(num_threads_z, nb->dev_info, nbp);
if (debug)
{
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
set_cutoff_parameters(nbp, ic);
+ /* The kernel code supports LJ combination rules (geometric and LB) for
+ * all kernel types, but we only generate useful combination rule kernels.
+ * We currently only use LJ combination rule (geometric and LB) kernels
+ * for plain cut-off LJ. On Maxwell the force only kernels speed up 15%
+ * with PME and 20% with RF, the other kernels speed up about half as much.
+ * For LJ force-switch the geometric rule would give 7% speed-up, but this
+ * combination is rarely used. LJ force-switch with LB rule is more common,
+ * but gives only 1% speed-up.
+ */
if (ic->vdwtype == evdwCUT)
{
switch (ic->vdw_modifier)
{
case eintmodNONE:
case eintmodPOTSHIFT:
- nbp->vdwtype = evdwCuCUT;
+ switch (nbat->comb_rule)
+ {
+ case ljcrNONE:
+ nbp->vdwtype = evdwCuCUT;
+ break;
+ case ljcrGEOM:
+ nbp->vdwtype = evdwCuCUTCOMBGEOM;
+ break;
+ case ljcrLB:
+ nbp->vdwtype = evdwCuCUTCOMBLB;
+ break;
+ default:
+ gmx_incons("The requested LJ combination rule is not implemented in the CUDA GPU accelerated kernels!");
+ break;
+ }
break;
case eintmodFORCESWITCH:
nbp->vdwtype = evdwCuFSWITCH;
cu_atomdata_t *d_atdat = nb->atdat;
cudaStream_t ls = nb->stream[eintLocal];
+ bool bUseLjCombination = (nb->nbparam->vdwtype == evdwCuCUTCOMBGEOM ||
+ nb->nbparam->vdwtype == evdwCuCUTCOMBLB);
+
natoms = nbat->natoms;
realloced = false;
cu_free_buffered(d_atdat->f, &d_atdat->natoms, &d_atdat->nalloc);
cu_free_buffered(d_atdat->xq);
cu_free_buffered(d_atdat->atom_types);
+ cu_free_buffered(d_atdat->lj_comb);
}
stat = cudaMalloc((void **)&d_atdat->f, nalloc*sizeof(*d_atdat->f));
CU_RET_ERR(stat, "cudaMalloc failed on d_atdat->f");
stat = cudaMalloc((void **)&d_atdat->xq, nalloc*sizeof(*d_atdat->xq));
CU_RET_ERR(stat, "cudaMalloc failed on d_atdat->xq");
-
- stat = cudaMalloc((void **)&d_atdat->atom_types, nalloc*sizeof(*d_atdat->atom_types));
- CU_RET_ERR(stat, "cudaMalloc failed on d_atdat->atom_types");
+ if (bUseLjCombination)
+ {
+ stat = cudaMalloc((void **)&d_atdat->lj_comb, nalloc*sizeof(*d_atdat->lj_comb));
+ CU_RET_ERR(stat, "cudaMalloc failed on d_atdat->lj_comb");
+ }
+ else
+ {
+ stat = cudaMalloc((void **)&d_atdat->atom_types, nalloc*sizeof(*d_atdat->atom_types));
+ CU_RET_ERR(stat, "cudaMalloc failed on d_atdat->atom_types");
+ }
d_atdat->nalloc = nalloc;
realloced = true;
nbnxn_cuda_clear_f(nb, nalloc);
}
- cu_copy_H2D_async(d_atdat->atom_types, nbat->type,
- natoms*sizeof(*d_atdat->atom_types), ls);
+ if (bUseLjCombination)
+ {
+ cu_copy_H2D_async(d_atdat->lj_comb, nbat->lj_comb,
+ natoms*sizeof(*d_atdat->lj_comb), ls);
+ }
+ else
+ {
+ cu_copy_H2D_async(d_atdat->atom_types, nbat->type,
+ natoms*sizeof(*d_atdat->atom_types), ls);
+ }
if (bDoTime)
{
cu_free_buffered(atdat->f, &atdat->natoms, &atdat->nalloc);
cu_free_buffered(atdat->xq);
cu_free_buffered(atdat->atom_types, &atdat->ntypes);
+ cu_free_buffered(atdat->lj_comb);
cu_free_buffered(plist->sci, &plist->nsci, &plist->sci_nalloc);
cu_free_buffered(plist->cj4, &plist->ncj4, &plist->cj4_nalloc);
#define LJ_EWALD
#endif
+#if defined LJ_COMB_GEOM || defined LJ_COMB_LB
+#define LJ_COMB
+#endif
/*
Kernel launch parameters:
#endif
nbnxn_cj4_t *pl_cj4 = plist.cj4;
const nbnxn_excl_t *excl = plist.excl;
+#ifndef LJ_COMB
const int *atom_types = atdat.atom_types;
int ntypes = atdat.ntypes;
+#else
+ const float2 *lj_comb = atdat.lj_comb;
+ float2 ljcp_i, ljcp_j;
+#endif
const float4 *xq = atdat.xq;
float3 *f = atdat.f;
const float3 *shift_vec = atdat.shift_vec;
int sci, ci, cj,
ai, aj,
- cij4_start, cij4_end,
- typei, typej,
- i, jm, j4, wexcl_idx;
+ cij4_start, cij4_end;
+#ifndef LJ_COMB
+ int typei, typej;
+#endif
+ int i, jm, j4, wexcl_idx;
float qi, qj_f,
- r2, inv_r, inv_r2, inv_r6,
- c6, c12,
- int_bit,
+ r2, inv_r, inv_r2;
+#if !defined LJ_COMB_LB || defined CALC_ENERGIES
+ float inv_r6, c6, c12;
+#endif
+#ifdef LJ_COMB_LB
+ float sigma, epsilon;
+#endif
+ float int_bit,
F_invr;
#ifdef CALC_ENERGIES
float E_lj, E_el;
/* shmem buffer for i x+q pre-loading */
extern __shared__ float4 xqib[];
+
/* shmem buffer for cj, for each warp separately */
- int *cjs = ((int *)(xqib + c_numClPerSupercl * c_clSize)) + tidxz * 2 * c_nbnxnGpuJgroupSize;
+ int *cjs = ((int *)(xqib + c_numClPerSupercl * c_clSize)) + tidxz * 2 * c_nbnxnGpuJgroupSize;
+#ifndef LJ_COMB
/* shmem buffer for i atom-type pre-loading */
- int *atib = ((int *)(xqib + c_numClPerSupercl * c_clSize)) + NTHREAD_Z * 2 * c_nbnxnGpuJgroupSize;
+ int *atib = ((int *)(xqib + c_numClPerSupercl * c_clSize)) + NTHREAD_Z * 2 * c_nbnxnGpuJgroupSize;
+#else
+ /* shmem buffer for i-atom LJ combination rule parameters */
+ float2 *ljcpib = ((float2 *)(xqib + c_numClPerSupercl * c_clSize)) + NTHREAD_Z * 2 * c_nbnxnGpuJgroupSize;
+#endif
nb_sci = pl_sci[bidx]; /* my i super-cluster's index = current bidx */
sci = nb_sci.sci; /* super-cluster */
xqbuf.w *= nbparam.epsfac;
xqib[tidxj * c_clSize + tidxi] = xqbuf;
+#ifndef LJ_COMB
/* Pre-load the i-atom types into shared memory */
atib[tidxj * c_clSize + tidxi] = atom_types[ai];
+#else
+ /* Pre-load the LJ combination parameters into shared memory */
+ ljcpib[tidxj * c_clSize + tidxi] = lj_comb[ai];
+#endif
}
__syncthreads();
xqbuf = xq[aj];
xj = make_float3(xqbuf.x, xqbuf.y, xqbuf.z);
qj_f = xqbuf.w;
+#ifndef LJ_COMB
typej = atom_types[aj];
+#else
+ ljcp_j = lj_comb[aj];
+#endif
fcj_buf = make_float3(0.0f);
{
/* load the rest of the i-atom parameters */
qi = xqbuf.w;
- typei = atib[i * c_clSize + tidxi];
+#ifndef LJ_COMB
/* LJ 6*C6 and 12*C12 */
+ typei = atib[i * c_clSize + tidxi];
c6 = tex1Dfetch<float>(nbparam.nbfp_texobj, 2 * (ntypes * typei + typej));
c12 = tex1Dfetch<float>(nbparam.nbfp_texobj, 2 * (ntypes * typei + typej) + 1);
+#else
+ ljcp_i = ljcpib[i * c_clSize + tidxi];
+#ifdef LJ_COMB_GEOM
+ c6 = ljcp_i.x * ljcp_j.x;
+ c12 = ljcp_i.y * ljcp_j.y;
+#else
+ /* LJ 2^(1/6)*sigma and 12*epsilon */
+ sigma = ljcp_i.x + ljcp_j.x;
+ epsilon = ljcp_i.y * ljcp_j.y;
+#if defined CALC_ENERGIES || defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH
+ convert_sigma_epsilon_to_c6_c12(sigma, epsilon, &c6, &c12);
+#endif
+#endif /* LJ_COMB_GEOM */
+#endif /* LJ_COMB */
/* avoid NaN for excluded pairs at r=0 */
r2 += (1.0f - int_bit) * NBNXN_AVOID_SING_R2_INC;
inv_r = rsqrt(r2);
inv_r2 = inv_r * inv_r;
+#if !defined LJ_COMB_LB || defined CALC_ENERGIES
inv_r6 = inv_r2 * inv_r2 * inv_r2;
#ifdef EXCLUSION_FORCES
/* We could mask inv_r2, but with Ewald
E_lj_p = int_bit * (c12 * (inv_r6 * inv_r6 + nbparam.repulsion_shift.cpot)*c_oneTwelveth -
c6 * (inv_r6 + nbparam.dispersion_shift.cpot)*c_oneSixth);
#endif
+#else /* !LJ_COMB_LB || CALC_ENERGIES */
+ float sig_r = sigma*inv_r;
+ float sig_r2 = sig_r*sig_r;
+ float sig_r6 = sig_r2*sig_r2*sig_r2;
+#ifdef EXCLUSION_FORCES
+ sig_r6 *= int_bit;
+#endif /* EXCLUSION_FORCES */
+
+ F_invr = epsilon * sig_r6 * (sig_r6 - 1.0f) * inv_r2;
+#endif /* !LJ_COMB_LB || CALC_ENERGIES */
#ifdef LJ_FORCE_SWITCH
#ifdef CALC_ENERGIES
#undef EL_EWALD_ANY
#undef EXCLUSION_FORCES
#undef LJ_EWALD
+
+#undef LJ_COMB
#define LJ_EWALD
#endif
+#if defined LJ_COMB_GEOM || defined LJ_COMB_LB
+#define LJ_COMB
+#endif
/*
Kernel launch parameters:
#endif
nbnxn_cj4_t *pl_cj4 = plist.cj4;
const nbnxn_excl_t *excl = plist.excl;
+#ifndef LJ_COMB
const int *atom_types = atdat.atom_types;
int ntypes = atdat.ntypes;
+#else
+ const float2 *lj_comb = atdat.lj_comb;
+ float2 ljcp_i, ljcp_j;
+#endif
const float4 *xq = atdat.xq;
float3 *f = atdat.f;
const float3 *shift_vec = atdat.shift_vec;
int sci, ci, cj,
ai, aj,
- cij4_start, cij4_end,
- typei, typej,
- i, jm, j4, wexcl_idx;
+ cij4_start, cij4_end;
+#ifndef LJ_COMB
+ int typei, typej;
+#endif
+ int i, jm, j4, wexcl_idx;
float qi, qj_f,
- r2, inv_r, inv_r2, inv_r6,
- c6, c12,
- int_bit,
+ r2, inv_r, inv_r2;
+#if !defined LJ_COMB_LB || defined CALC_ENERGIES
+ float inv_r6, c6, c12;
+#endif
+#ifdef LJ_COMB_LB
+ float sigma, epsilon;
+#endif
+ float int_bit,
F_invr;
#ifdef CALC_ENERGIES
float E_lj, E_el;
xqbuf = xq[aj];
xj = make_float3(xqbuf.x, xqbuf.y, xqbuf.z);
qj_f = xqbuf.w;
+#ifndef LJ_COMB
typej = atom_types[aj];
+#else
+ ljcp_j = lj_comb[aj];
+#endif
fcj_buf = make_float3(0.0f);
{
/* load the rest of the i-atom parameters */
qi = xqbuf.w;
- typei = atom_types[ai];
+#ifndef LJ_COMB
/* LJ 6*C6 and 12*C12 */
+ typei = atom_types[ai];
c6 = tex1Dfetch(nbfp_texref, 2 * (ntypes * typei + typej));
c12 = tex1Dfetch(nbfp_texref, 2 * (ntypes * typei + typej) + 1);
+#else
+ ljcp_i = lj_comb[ai];
+#ifdef LJ_COMB_GEOM
+ c6 = ljcp_i.x * ljcp_j.x;
+ c12 = ljcp_i.y * ljcp_j.y;
+#else
+ /* LJ 2^(1/6)*sigma and 12*epsilon */
+ sigma = ljcp_i.x + ljcp_j.x;
+ epsilon = ljcp_i.y * ljcp_j.y;
+#if defined CALC_ENERGIES || defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH
+ convert_sigma_epsilon_to_c6_c12(sigma, epsilon, &c6, &c12);
+#endif
+#endif /* LJ_COMB_GEOM */
+#endif /* LJ_COMB */
/* avoid NaN for excluded pairs at r=0 */
r2 += (1.0f - int_bit) * NBNXN_AVOID_SING_R2_INC;
inv_r = rsqrt(r2);
inv_r2 = inv_r * inv_r;
+#if !defined LJ_COMB_LB || defined CALC_ENERGIES
inv_r6 = inv_r2 * inv_r2 * inv_r2;
#ifdef EXCLUSION_FORCES
/* We could mask inv_r2, but with Ewald
E_lj_p = int_bit * (c12 * (inv_r6 * inv_r6 + nbparam.repulsion_shift.cpot)*c_oneTwelveth -
c6 * (inv_r6 + nbparam.dispersion_shift.cpot)*c_oneSixth);
#endif
+#else /* !LJ_COMB_LB || CALC_ENERGIES */
+ float sig_r = sigma*inv_r;
+ float sig_r2 = sig_r*sig_r;
+ float sig_r6 = sig_r2*sig_r2*sig_r2;
+#ifdef EXCLUSION_FORCES
+ sig_r6 *= int_bit;
+#endif /* EXCLUSION_FORCES */
+
+ F_invr = epsilon * sig_r6 * (sig_r6 - 1.0f) * inv_r2;
+#endif /* !LJ_COMB_LB || CALC_ENERGIES */
#ifdef LJ_FORCE_SWITCH
#ifdef CALC_ENERGIES
#undef EL_EWALD_ANY
#undef EXCLUSION_FORCES
#undef LJ_EWALD
+
+#undef LJ_COMB
extern texture<float, 1, cudaReadModeElementType> coulomb_tab_texref;
#endif /* GMX_CUDA_NB_SINGLE_COMPILATION_UNIT */
+/*! Convert LJ sigma,epsilon parameters to C6,C12. */
+static __forceinline__ __device__
+void convert_sigma_epsilon_to_c6_c12(const float sigma,
+ const float epsilon,
+ float *c6,
+ float *c12)
+{
+ float sigma2, sigma6;
+
+ sigma2 = sigma * sigma;
+ sigma6 = sigma2 *sigma2 * sigma2;
+ *c6 = epsilon * sigma6;
+ *c12 = *c6 * sigma6;
+}
+
/*! Apply force switch, force + energy version. */
static __forceinline__ __device__
void calculate_force_switch_F(const cu_nbparam_t nbparam,
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecCut_VdwLJ ## __VA_ARGS__
#include FLAVOR_LEVEL_GENERATOR
#undef NB_KERNEL_FUNC_NAME
+/* cut-off + V shift LJ w geometric combination rules */
+#define LJ_COMB_GEOM
+#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecCut_VdwLJCombGeom ## __VA_ARGS__
+#include FLAVOR_LEVEL_GENERATOR
+#undef LJ_COMB_GEOM
+#undef NB_KERNEL_FUNC_NAME
+/* cut-off + V shift LJ w LB combination rules */
+#define LJ_COMB_LB
+#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecCut_VdwLJCombLB ## __VA_ARGS__
+#include FLAVOR_LEVEL_GENERATOR
+#undef LJ_COMB_LB
+#undef NB_KERNEL_FUNC_NAME
/* LJ-Ewald w geometric combination rules */
#define LJ_EWALD_COMB_GEOM
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecCut_VdwLJEwCombGeom ## __VA_ARGS__
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecRF_VdwLJ ## __VA_ARGS__
#include FLAVOR_LEVEL_GENERATOR
#undef NB_KERNEL_FUNC_NAME
+/* cut-off + V shift LJ w geometric combination rules */
+#define LJ_COMB_GEOM
+#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecRF_VdwLJCombGeom ## __VA_ARGS__
+#include FLAVOR_LEVEL_GENERATOR
+#undef LJ_COMB_GEOM
+#undef NB_KERNEL_FUNC_NAME
+/* cut-off + V shift LJ w LB combination rules */
+#define LJ_COMB_LB
+#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecRF_VdwLJCombLB ## __VA_ARGS__
+#include FLAVOR_LEVEL_GENERATOR
+#undef LJ_COMB_LB
+#undef NB_KERNEL_FUNC_NAME
/* LJ-Ewald w geometric combination rules */
#define LJ_EWALD_COMB_GEOM
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecRF_VdwLJEwCombGeom ## __VA_ARGS__
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEw_VdwLJ ## __VA_ARGS__
#include FLAVOR_LEVEL_GENERATOR
#undef NB_KERNEL_FUNC_NAME
+/* cut-off + V shift LJ w geometric combination rules */
+#define LJ_COMB_GEOM
+#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEw_VdwLJCombGeom ## __VA_ARGS__
+#include FLAVOR_LEVEL_GENERATOR
+#undef LJ_COMB_GEOM
+#undef NB_KERNEL_FUNC_NAME
+/* cut-off + V shift LJ w LB combination rules */
+#define LJ_COMB_LB
+#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEw_VdwLJCombLB ## __VA_ARGS__
+#include FLAVOR_LEVEL_GENERATOR
+#undef LJ_COMB_LB
+#undef NB_KERNEL_FUNC_NAME
/* LJ-Ewald w geometric combination rules */
#define LJ_EWALD_COMB_GEOM
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEw_VdwLJEwCombGeom ## __VA_ARGS__
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwTwinCut_VdwLJ ## __VA_ARGS__
#include FLAVOR_LEVEL_GENERATOR
#undef NB_KERNEL_FUNC_NAME
+/* cut-off + V shift LJ w geometric combination rules */
+#define LJ_COMB_GEOM
+#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwTwinCut_VdwLJCombGeom ## __VA_ARGS__
+#include FLAVOR_LEVEL_GENERATOR
+#undef LJ_COMB_GEOM
+#undef NB_KERNEL_FUNC_NAME
+/* cut-off + V shift LJ w LB combination rules */
+#define LJ_COMB_LB
+#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwTwinCut_VdwLJCombLB ## __VA_ARGS__
+#include FLAVOR_LEVEL_GENERATOR
+#undef LJ_COMB_LB
+#undef NB_KERNEL_FUNC_NAME
/* LJ-Ewald w geometric combination rules */
#define LJ_EWALD_COMB_GEOM
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwTwinCut_VdwLJEwCombGeom ## __VA_ARGS__
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTab_VdwLJ ## __VA_ARGS__
#include FLAVOR_LEVEL_GENERATOR
#undef NB_KERNEL_FUNC_NAME
+/* cut-off + V shift LJ w geometric combination rules */
+#define LJ_COMB_GEOM
+#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTab_VdwLJCombGeom ## __VA_ARGS__
+#include FLAVOR_LEVEL_GENERATOR
+#undef LJ_COMB_GEOM
+#undef NB_KERNEL_FUNC_NAME
+/* cut-off + V shift LJ w LB combination rules */
+#define LJ_COMB_LB
+#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTab_VdwLJCombLB ## __VA_ARGS__
+#include FLAVOR_LEVEL_GENERATOR
+#undef LJ_COMB_LB
+#undef NB_KERNEL_FUNC_NAME
/* LJ-Ewald w geometric combination rules */
#define LJ_EWALD_COMB_GEOM
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTab_VdwLJEwCombGeom ## __VA_ARGS__
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTabTwinCut_VdwLJ ## __VA_ARGS__
#include FLAVOR_LEVEL_GENERATOR
#undef NB_KERNEL_FUNC_NAME
+/* cut-off + V shift LJ w geometric combination rules */
+#define LJ_COMB_GEOM
+#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTabTwinCut_VdwLJCombGeom ## __VA_ARGS__
+#include FLAVOR_LEVEL_GENERATOR
+#undef LJ_COMB_GEOM
+#undef NB_KERNEL_FUNC_NAME
+/* cut-off + V shift LJ w LB combination rules */
+#define LJ_COMB_LB
+#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTabTwinCut_VdwLJCombLB ## __VA_ARGS__
+#include FLAVOR_LEVEL_GENERATOR
+#undef LJ_COMB_LB
+#undef NB_KERNEL_FUNC_NAME
/* LJ-Ewald w geometric combination rules */
#define LJ_EWALD_COMB_GEOM
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTabTwinCut_VdwLJEwCombGeom ## __VA_ARGS__
* should match the order of enumerated types below.
*/
enum evdwCu {
- evdwCuCUT, evdwCuFSWITCH, evdwCuPSWITCH, evdwCuEWALDGEOM, evdwCuEWALDLB, evdwCuNR
+ evdwCuCUT, evdwCuCUTCOMBGEOM, evdwCuCUTCOMBLB, evdwCuFSWITCH, evdwCuPSWITCH, evdwCuEWALDGEOM, evdwCuEWALDLB, evdwCuNR
};
/* All structs prefixed with "cu_" hold data used in GPU calculations and
int ntypes; /**< number of atom types */
int *atom_types; /**< atom type indices, size natoms */
+ float2 *lj_comb; /**< sqrt(c6),sqrt(c12) size natoms */
float3 *shift_vec; /**< shifts */
bool bShiftVecUploaded; /**< true if the shift vector has been uploaded */