* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
+
+#include "nbnxn_cuda_data_mgmt.h"
+
+#include "config.h"
-#include <stdlib.h>
-#include <stdio.h>
#include <assert.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
#include <cuda.h>
-#include "gmx_fatal.h"
-#include "smalloc.h"
-#include "tables.h"
-#include "typedefs.h"
-#include "types/enums.h"
-#include "types/nb_verlet.h"
-#include "types/interaction_const.h"
-#include "types/force_flags.h"
-#include "../nbnxn_consts.h"
-#include "gmx_detect_hardware.h"
+#include "gromacs/gmxlib/cuda_tools/cudautils.cuh"
+#include "gromacs/legacyheaders/gmx_detect_hardware.h"
+#include "gromacs/legacyheaders/gpu_utils.h"
+#include "gromacs/legacyheaders/pmalloc_cuda.h"
+#include "gromacs/legacyheaders/tables.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/legacyheaders/types/enums.h"
+#include "gromacs/legacyheaders/types/force_flags.h"
+#include "gromacs/legacyheaders/types/interaction_const.h"
+#include "gromacs/mdlib/nb_verlet.h"
+#include "gromacs/mdlib/nbnxn_consts.h"
+#include "gromacs/pbcutil/ishift.h"
+#include "gromacs/utility/common.h"
+#include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/smalloc.h"
#include "nbnxn_cuda_types.h"
-#include "../../gmxlib/cuda_tools/cudautils.cuh"
-#include "nbnxn_cuda_data_mgmt.h"
-#include "pmalloc_cuda.h"
-#include "gpu_utils.h"
-
-#include "gromacs/utility/common.h"
static bool bUseCudaEventBlockingSync = false; /* makes the CPU thread block */
/* Functions from nbnxn_cuda.cu */
extern void nbnxn_cuda_set_cacheconfig(cuda_dev_info_t *devinfo);
extern const struct texture<float, 1, cudaReadModeElementType> &nbnxn_cuda_get_nbfp_texref();
+extern const struct texture<float, 1, cudaReadModeElementType> &nbnxn_cuda_get_nbfp_comb_texref();
extern const struct texture<float, 1, cudaReadModeElementType> &nbnxn_cuda_get_coulomb_tab_texref();
/* We should actually be using md_print_warn in md_logging.c,
pmalloc((void**)&ftmp, tabsize*sizeof(*ftmp));
table_spline3_fill_ewald_lr(ftmp, NULL, NULL, tabsize,
- 1/tabscale, nbp->ewald_beta);
+ 1/tabscale, nbp->ewald_beta, v_q_ewald_lr);
/* If the table pointer == NULL the table is generated the first time =>
the array pointer will be saved to nbparam and the texture is bound.
return kernel_type;
}
+/*! Copies all parameters related to the cut-off from ic to nbp */
+static void set_cutoff_parameters(cu_nbparam_t *nbp,
+ const interaction_const_t *ic)
+{
+ nbp->ewald_beta = ic->ewaldcoeff_q;
+ nbp->sh_ewald = ic->sh_ewald;
+ nbp->epsfac = ic->epsfac;
+ nbp->two_k_rf = 2.0 * ic->k_rf;
+ nbp->c_rf = ic->c_rf;
+ nbp->rvdw_sq = ic->rvdw * ic->rvdw;
+ nbp->rcoulomb_sq = ic->rcoulomb * ic->rcoulomb;
+ nbp->rlist_sq = ic->rlist * ic->rlist;
+
+ nbp->sh_lj_ewald = ic->sh_lj_ewald;
+ nbp->ewaldcoeff_lj = ic->ewaldcoeff_lj;
+
+ nbp->rvdw_switch = ic->rvdw_switch;
+ nbp->dispersion_shift = ic->dispersion_shift;
+ nbp->repulsion_shift = ic->repulsion_shift;
+ nbp->vdw_switch = ic->vdw_switch;
+}
/*! Initializes the nonbonded parameter data structure. */
static void init_nbparam(cu_nbparam_t *nbp,
const cuda_dev_info_t *dev_info)
{
cudaError_t stat;
- int ntypes, nnbfp;
+ int ntypes, nnbfp, nnbfp_comb;
ntypes = nbat->ntype;
- nbp->ewald_beta = ic->ewaldcoeff_q;
- nbp->sh_ewald = ic->sh_ewald;
- nbp->epsfac = ic->epsfac;
- nbp->two_k_rf = 2.0 * ic->k_rf;
- nbp->c_rf = ic->c_rf;
- nbp->rvdw_sq = ic->rvdw * ic->rvdw;
- nbp->rcoulomb_sq = ic->rcoulomb * ic->rcoulomb;
- nbp->rlist_sq = ic->rlist * ic->rlist;
+ set_cutoff_parameters(nbp, ic);
- nbp->rvdw_switch = ic->rvdw_switch;
- nbp->dispersion_shift = ic->dispersion_shift;
- nbp->repulsion_shift = ic->repulsion_shift;
- nbp->vdw_switch = ic->vdw_switch;
-
- switch (ic->vdw_modifier)
+ if (ic->vdwtype == evdwCUT)
+ {
+ switch (ic->vdw_modifier)
+ {
+ case eintmodNONE:
+ case eintmodPOTSHIFT:
+ nbp->vdwtype = evdwCuCUT;
+ break;
+ case eintmodFORCESWITCH:
+ nbp->vdwtype = evdwCuFSWITCH;
+ break;
+ case eintmodPOTSWITCH:
+ nbp->vdwtype = evdwCuPSWITCH;
+ break;
+ default:
+ gmx_incons("The requested VdW interaction modifier is not implemented in the CUDA GPU accelerated kernels!");
+ break;
+ }
+ }
+ else if (ic->vdwtype == evdwPME)
+ {
+ if (ic->ljpme_comb_rule == ljcrGEOM)
+ {
+ assert(nbat->comb_rule == ljcrGEOM);
+ nbp->vdwtype = evdwCuEWALDGEOM;
+ }
+ else
+ {
+ assert(nbat->comb_rule == ljcrLB);
+ nbp->vdwtype = evdwCuEWALDLB;
+ }
+ }
+ else
{
- case eintmodNONE:
- case eintmodPOTSHIFT:
- nbp->vdwtype = evdwCuCUT;
- break;
- case eintmodFORCESWITCH:
- nbp->vdwtype = evdwCuFSWITCH;
- break;
- case eintmodPOTSWITCH:
- nbp->vdwtype = evdwCuPSWITCH;
- break;
- default:
- gmx_incons("The requested VdW interaction modifier is not implemented in the CUDA GPU accelerated kernels!");
- break;
+ gmx_incons("The requested VdW type is not implemented in the CUDA GPU accelerated kernels!");
}
if (ic->eeltype == eelCUT)
init_ewald_coulomb_force_table(nbp, dev_info);
}
- nnbfp = 2*ntypes*ntypes;
+ nnbfp = 2*ntypes*ntypes;
+ nnbfp_comb = 2*ntypes;
+
stat = cudaMalloc((void **)&nbp->nbfp, nnbfp*sizeof(*nbp->nbfp));
CU_RET_ERR(stat, "cudaMalloc failed on nbp->nbfp");
cu_copy_H2D(nbp->nbfp, nbat->nbfp, nnbfp*sizeof(*nbp->nbfp));
+
+ if (ic->vdwtype == evdwPME)
+ {
+ stat = cudaMalloc((void **)&nbp->nbfp_comb, nnbfp_comb*sizeof(*nbp->nbfp_comb));
+ CU_RET_ERR(stat, "cudaMalloc failed on nbp->nbfp_comb");
+ cu_copy_H2D(nbp->nbfp_comb, nbat->nbfp_comb, nnbfp_comb*sizeof(*nbp->nbfp_comb));
+ }
+
#ifdef TEXOBJ_SUPPORTED
/* Only device CC >= 3.0 (Kepler and later) support texture objects */
if (dev_info->prop.major >= 3)
{
cudaResourceDesc rd;
+ cudaTextureDesc td;
+
memset(&rd, 0, sizeof(rd));
rd.resType = cudaResourceTypeLinear;
rd.res.linear.devPtr = nbp->nbfp;
rd.res.linear.desc.x = 32;
rd.res.linear.sizeInBytes = nnbfp*sizeof(*nbp->nbfp);
- cudaTextureDesc td;
memset(&td, 0, sizeof(td));
td.readMode = cudaReadModeElementType;
stat = cudaCreateTextureObject(&nbp->nbfp_texobj, &rd, &td, NULL);
CU_RET_ERR(stat, "cudaCreateTextureObject on nbfp_texobj failed");
+
+ if (ic->vdwtype == evdwPME)
+ {
+ memset(&rd, 0, sizeof(rd));
+ rd.resType = cudaResourceTypeLinear;
+ rd.res.linear.devPtr = nbp->nbfp_comb;
+ rd.res.linear.desc.f = cudaChannelFormatKindFloat;
+ rd.res.linear.desc.x = 32;
+ rd.res.linear.sizeInBytes = nnbfp_comb*sizeof(*nbp->nbfp_comb);
+
+ memset(&td, 0, sizeof(td));
+ td.readMode = cudaReadModeElementType;
+ stat = cudaCreateTextureObject(&nbp->nbfp_comb_texobj, &rd, &td, NULL);
+ CU_RET_ERR(stat, "cudaCreateTextureObject on nbfp_comb_texobj failed");
+ }
}
else
#endif
stat = cudaBindTexture(NULL, &nbnxn_cuda_get_nbfp_texref(),
nbp->nbfp, &cd, nnbfp*sizeof(*nbp->nbfp));
CU_RET_ERR(stat, "cudaBindTexture on nbfp_texref failed");
+
+ if (ic->vdwtype == evdwPME)
+ {
+ stat = cudaBindTexture(NULL, &nbnxn_cuda_get_nbfp_comb_texref(),
+ nbp->nbfp_comb, &cd, nnbfp_comb*sizeof(*nbp->nbfp_comb));
+ CU_RET_ERR(stat, "cudaBindTexture on nbfp_comb_texref failed");
+ }
}
}
/*! Re-generate the GPU Ewald force table, resets rlist, and update the
* electrostatic type switching to twin cut-off (or back) if needed. */
-void nbnxn_cuda_pme_loadbal_update_param(nbnxn_cuda_ptr_t cu_nb,
- const interaction_const_t *ic)
+void nbnxn_cuda_pme_loadbal_update_param(const nonbonded_verlet_t *nbv,
+ const interaction_const_t *ic)
{
- cu_nbparam_t *nbp = cu_nb->nbparam;
+ if (!nbv || nbv->grp[0].kernel_type != nbnxnk8x8x8_CUDA)
+ {
+ return;
+ }
+ nbnxn_cuda_ptr_t cu_nb = nbv->cu_nbv;
+ cu_nbparam_t *nbp = cu_nb->nbparam;
- nbp->rlist_sq = ic->rlist * ic->rlist;
- nbp->rcoulomb_sq = ic->rcoulomb * ic->rcoulomb;
- nbp->ewald_beta = ic->ewaldcoeff_q;
+ set_cutoff_parameters(nbp, ic);
nbp->eeltype = pick_ewald_kernel_type(ic->rcoulomb != ic->rvdw,
cu_nb->dev_info);
* priorities, because we are querying the priority range which in this
* case will be a single value.
*/
-#if CUDA_VERSION >= 5500
+#if CUDA_VERSION >= 5050
{
int highest_priority;
stat = cudaDeviceGetStreamPriorityRange(NULL, &highest_priority);
}
cu_free_buffered(nbparam->nbfp);
+ if (nbparam->vdwtype == evdwCuEWALDGEOM || nbparam->vdwtype == evdwCuEWALDLB)
+ {
+#ifdef TEXOBJ_SUPPORTED
+ /* Only device CC >= 3.0 (Kepler and later) support texture objects */
+ if (cu_nb->dev_info->prop.major >= 3)
+ {
+ stat = cudaDestroyTextureObject(nbparam->nbfp_comb_texobj);
+ CU_RET_ERR(stat, "cudaDestroyTextureObject on nbfp_comb_texobj failed");
+ }
+ else
+#endif
+ {
+ stat = cudaUnbindTexture(nbnxn_cuda_get_nbfp_comb_texref());
+ CU_RET_ERR(stat, "cudaUnbindTexture on nbfp_comb_texref failed");
+ }
+ cu_free_buffered(nbparam->nbfp_comb);
+ }
+
stat = cudaFree(atdat->shift_vec);
CU_RET_ERR(stat, "cudaFree failed on atdat->shift_vec");
stat = cudaFree(atdat->fshift);
return (cu_nb != NULL && cu_nb->bDoTime) ? cu_nb->timings : NULL;
}
-void nbnxn_cuda_reset_timings(nbnxn_cuda_ptr_t cu_nb)
+void nbnxn_cuda_reset_timings(nonbonded_verlet_t* nbv)
{
- if (cu_nb->bDoTime)
+ if (nbv->cu_nbv && nbv->cu_nbv->bDoTime)
{
- init_timings(cu_nb->timings);
+ init_timings(nbv->cu_nbv->timings);
}
}