#include <stdlib.h>
#include <string.h>
+#include <algorithm>
+
#include "gromacs/domdec/domdec.h"
#include "gromacs/ewald/ewald.h"
#include "gromacs/gmxlib/gpu_utils/gpu_utils.h"
}
static void init_ewald_f_table(interaction_const_t *ic,
- gmx_bool bUsesSimpleTables,
real rtab)
{
real maxr;
- if (bUsesSimpleTables)
- {
- /* Get the Ewald table spacing based on Coulomb and/or LJ
- * Ewald coefficients and rtol.
- */
- ic->tabq_scale = ewald_spline3_table_scale(ic);
+ /* Get the Ewald table spacing based on Coulomb and/or LJ
+ * Ewald coefficients and rtol.
+ */
+ ic->tabq_scale = ewald_spline3_table_scale(ic);
- maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
- ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
+ if (ic->cutoff_scheme == ecutsVERLET)
+ {
+ maxr = ic->rcoulomb;
}
else
{
- ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
- /* Subtract 2 iso 1 to avoid access out of range due to rounding */
- ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
+ maxr = std::max(ic->rcoulomb, rtab);
}
+ ic->tabq_size = static_cast<int>(maxr*ic->tabq_scale) + 2;
sfree_aligned(ic->tabq_coul_FDV0);
sfree_aligned(ic->tabq_coul_F);
void init_interaction_const_tables(FILE *fp,
interaction_const_t *ic,
- gmx_bool bUsesSimpleTables,
real rtab)
{
if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
{
- init_ewald_f_table(ic, bUsesSimpleTables, rtab);
+ init_ewald_f_table(ic, rtab);
if (fp != NULL)
{
snew(ic, 1);
+ ic->cutoff_scheme = fr->cutoff_scheme;
+
/* Just allocate something so we can free it */
snew_aligned(ic->tabq_coul_FDV0, 16, 32);
snew_aligned(ic->tabq_coul_F, 16, 32);
init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
}
+ init_interaction_const_tables(fp, fr->ic, rtab);
+
initialize_gpu_constants(cr, fr->ic, fr->nbv);
- init_interaction_const_tables(fp, fr->ic,
- uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1),
- rtab);
if (ir->eDispCorr != edispcNO)
{
#include "gromacs/gmxlib/cuda_tools/pmalloc_cuda.h"
#include "gromacs/gmxlib/gpu_utils/gpu_utils.h"
#include "gromacs/legacyheaders/gmx_detect_hardware.h"
-#include "gromacs/legacyheaders/tables.h"
#include "gromacs/legacyheaders/typedefs.h"
#include "gromacs/legacyheaders/types/enums.h"
#include "gromacs/legacyheaders/types/force_flags.h"
/* Fw. decl. */
static void nbnxn_cuda_clear_e_fshift(gmx_nbnxn_cuda_t *nb);
+/* Fw. decl, */
+static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t *nbparam,
+ const gmx_device_info_t *dev_info);
+
/*! Tabulates the Ewald Coulomb force and initializes the size/scale
and the table GPU array. If called with an already allocated table,
it just re-uploads the table.
*/
-static void init_ewald_coulomb_force_table(cu_nbparam_t *nbp,
- const gmx_device_info_t *dev_info)
+static void init_ewald_coulomb_force_table(const interaction_const_t *ic,
+ cu_nbparam_t *nbp,
+ const gmx_device_info_t *dev_info)
{
- float *ftmp, *coul_tab;
- int tabsize;
- double tabscale;
+ float *coul_tab;
cudaError_t stat;
- tabsize = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
- /* Subtract 2 iso 1 to avoid access out of range due to rounding */
- tabscale = (tabsize - 2) / sqrt(nbp->rcoulomb_sq);
-
- pmalloc((void**)&ftmp, tabsize*sizeof(*ftmp));
-
- table_spline3_fill_ewald_lr(ftmp, NULL, NULL, tabsize,
- 1/tabscale, nbp->ewald_beta, v_q_ewald_lr);
-
- /* If the table pointer == NULL the table is generated the first time =>
- the array pointer will be saved to nbparam and the texture is bound.
- */
- coul_tab = nbp->coulomb_tab;
- if (coul_tab == NULL)
+ if (nbp->coulomb_tab != NULL)
{
- stat = cudaMalloc((void **)&coul_tab, tabsize*sizeof(*coul_tab));
- CU_RET_ERR(stat, "cudaMalloc failed on coul_tab");
+ nbnxn_cuda_free_nbparam_table(nbp, dev_info);
+ }
+
+ stat = cudaMalloc((void **)&coul_tab, ic->tabq_size*sizeof(*coul_tab));
+ CU_RET_ERR(stat, "cudaMalloc failed on coul_tab");
- nbp->coulomb_tab = coul_tab;
+ nbp->coulomb_tab = coul_tab;
#ifdef HAVE_CUDA_TEXOBJ_SUPPORT
- /* Only device CC >= 3.0 (Kepler and later) support texture objects */
- if (dev_info->prop.major >= 3)
- {
- cudaResourceDesc rd;
- memset(&rd, 0, sizeof(rd));
- rd.resType = cudaResourceTypeLinear;
- rd.res.linear.devPtr = nbp->coulomb_tab;
- rd.res.linear.desc.f = cudaChannelFormatKindFloat;
- rd.res.linear.desc.x = 32;
- rd.res.linear.sizeInBytes = tabsize*sizeof(*coul_tab);
+ /* Only device CC >= 3.0 (Kepler and later) support texture objects */
+ if (dev_info->prop.major >= 3)
+ {
+ cudaResourceDesc rd;
+ memset(&rd, 0, sizeof(rd));
+ rd.resType = cudaResourceTypeLinear;
+ rd.res.linear.devPtr = nbp->coulomb_tab;
+ rd.res.linear.desc.f = cudaChannelFormatKindFloat;
+ rd.res.linear.desc.x = 32;
+ rd.res.linear.sizeInBytes = ic->tabq_size*sizeof(*coul_tab);
- cudaTextureDesc td;
- memset(&td, 0, sizeof(td));
- td.readMode = cudaReadModeElementType;
- stat = cudaCreateTextureObject(&nbp->coulomb_tab_texobj, &rd, &td, NULL);
- CU_RET_ERR(stat, "cudaCreateTextureObject on coulomb_tab_texobj failed");
- }
- else
+ cudaTextureDesc td;
+ memset(&td, 0, sizeof(td));
+ td.readMode = cudaReadModeElementType;
+ stat = cudaCreateTextureObject(&nbp->coulomb_tab_texobj, &rd, &td, NULL);
+ CU_RET_ERR(stat, "cudaCreateTextureObject on coulomb_tab_texobj failed");
+ }
+ else
#endif /* HAVE_CUDA_TEXOBJ_SUPPORT */
- {
- GMX_UNUSED_VALUE(dev_info);
- cudaChannelFormatDesc cd = cudaCreateChannelDesc<float>();
- stat = cudaBindTexture(NULL, &nbnxn_cuda_get_coulomb_tab_texref(),
- coul_tab, &cd, tabsize*sizeof(*coul_tab));
- CU_RET_ERR(stat, "cudaBindTexture on coulomb_tab_texref failed");
- }
+ {
+ GMX_UNUSED_VALUE(dev_info);
+ cudaChannelFormatDesc cd = cudaCreateChannelDesc<float>();
+ stat = cudaBindTexture(NULL, &nbnxn_cuda_get_coulomb_tab_texref(),
+ coul_tab, &cd,
+ ic->tabq_size*sizeof(*coul_tab));
+ CU_RET_ERR(stat, "cudaBindTexture on coulomb_tab_texref failed");
}
- cu_copy_H2D(coul_tab, ftmp, tabsize*sizeof(*coul_tab));
-
- nbp->coulomb_tab_size = tabsize;
- nbp->coulomb_tab_scale = tabscale;
+ cu_copy_H2D(coul_tab, ic->tabq_coul_F, ic->tabq_size*sizeof(*coul_tab));
- pfree(ftmp);
+ nbp->coulomb_tab_size = ic->tabq_size;
+ nbp->coulomb_tab_scale = ic->tabq_scale;
}
nbp->coulomb_tab = NULL;
if (nbp->eeltype == eelCuEWALD_TAB || nbp->eeltype == eelCuEWALD_TAB_TWIN)
{
- init_ewald_coulomb_force_table(nbp, dev_info);
+ init_ewald_coulomb_force_table(ic, nbp, dev_info);
}
nnbfp = 2*ntypes*ntypes;
nbp->eeltype = pick_ewald_kernel_type(ic->rcoulomb != ic->rvdw,
nb->dev_info);
- init_ewald_coulomb_force_table(nb->nbparam, nb->dev_info);
+ init_ewald_coulomb_force_table(ic, nb->nbparam, nb->dev_info);
}
/*! Initializes the pair list data structure. */
}
}
+static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t *nbparam,
+ const gmx_device_info_t *dev_info)
+{
+ cudaError_t stat;
+
+ if (nbparam->eeltype == eelCuEWALD_TAB || nbparam->eeltype == eelCuEWALD_TAB_TWIN)
+ {
+#ifdef HAVE_CUDA_TEXOBJ_SUPPORT
+ /* Only device CC >= 3.0 (Kepler and later) support texture objects */
+ if (dev_info->prop.major >= 3)
+ {
+ stat = cudaDestroyTextureObject(nbparam->coulomb_tab_texobj);
+ CU_RET_ERR(stat, "cudaDestroyTextureObject on coulomb_tab_texobj failed");
+ }
+ else
+#endif
+ {
+ GMX_UNUSED_VALUE(dev_info);
+ stat = cudaUnbindTexture(nbnxn_cuda_get_coulomb_tab_texref());
+ CU_RET_ERR(stat, "cudaUnbindTexture on coulomb_tab_texref failed");
+ }
+ cu_free_buffered(nbparam->coulomb_tab, &nbparam->coulomb_tab_size);
+ }
+}
+
void nbnxn_gpu_free(gmx_nbnxn_cuda_t *nb)
{
cudaError_t stat;
plist_nl = nb->plist[eintNonlocal];
timers = nb->timers;
- if (nbparam->eeltype == eelCuEWALD_TAB || nbparam->eeltype == eelCuEWALD_TAB_TWIN)
- {
-
-#ifdef HAVE_CUDA_TEXOBJ_SUPPORT
- /* Only device CC >= 3.0 (Kepler and later) support texture objects */
- if (nb->dev_info->prop.major >= 3)
- {
- stat = cudaDestroyTextureObject(nbparam->coulomb_tab_texobj);
- CU_RET_ERR(stat, "cudaDestroyTextureObject on coulomb_tab_texobj failed");
- }
- else
-#endif
- {
- stat = cudaUnbindTexture(nbnxn_cuda_get_coulomb_tab_texref());
- CU_RET_ERR(stat, "cudaUnbindTexture on coulomb_tab_texref failed");
- }
- cu_free_buffered(nbparam->coulomb_tab, &nbparam->coulomb_tab_size);
- }
+ nbnxn_cuda_free_nbparam_table(nbparam, nb->dev_info);
stat = cudaEventDestroy(nb->nonlocal_done);
CU_RET_ERR(stat, "cudaEventDestroy failed on timers->nonlocal_done");