const int flags,
const InteractionLocality iloc)
{
- /* CUDA kernel launch-related stuff */
- int nblock;
- dim3 dim_block, dim_grid;
- nbnxn_cu_kfunc_ptr_t nb_kernel = nullptr; /* fn pointer to the nonbonded kernel */
-
cu_atomdata_t *adat = nb->atdat;
cu_nbparam_t *nbp = nb->nbparam;
cu_plist_t *plist = nb->plist[iloc];
t->interaction[iloc].nb_k.openTimingRegion(stream);
}
- /* get the pointer to the kernel flavor we need to use */
- nb_kernel = select_nbnxn_kernel(nbp->eeltype,
- nbp->vdwtype,
- bCalcEner,
- (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune),
- nb->dev_info);
-
/* Kernel launch config:
* - The thread block dimensions match the size of i-clusters, j-clusters,
* and j-cluster concurrency, in x, y, and z, respectively.
{
num_threads_z = 2;
}
- nblock = calc_nb_kernel_nblock(plist->nsci, nb->dev_info);
+ int nblock = calc_nb_kernel_nblock(plist->nsci, nb->dev_info);
+
KernelLaunchConfig config;
config.blockSize[0] = c_clSize;
config.sharedMemorySize);
}
- auto *timingEvent = bDoTime ? t->interaction[iloc].nb_k.fetchNextEvent() : nullptr;
- const auto kernelArgs = prepareGpuKernelArguments(nb_kernel, config, adat, nbp, plist, &bCalcFshift);
- launchGpuKernel(nb_kernel, config, timingEvent, "k_calc_nb", kernelArgs);
+ auto *timingEvent = bDoTime ? t->interaction[iloc].nb_k.fetchNextEvent() : nullptr;
+ const auto kernel = select_nbnxn_kernel(nbp->eeltype,
+ nbp->vdwtype,
+ bCalcEner,
+ (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune),
+ nb->dev_info);
+ const auto kernelArgs = prepareGpuKernelArguments(kernel, config, adat, nbp, plist, &bCalcFshift);
+ launchGpuKernel(kernel, config, timingEvent, "k_calc_nb", kernelArgs);
if (bDoTime)
{
auto *timingEvent = bDoTime ? timer->fetchNextEvent() : nullptr;
constexpr char kernelName[] = "k_pruneonly";
- const auto &kernel = plist->haveFreshList ? nbnxn_kernel_prune_cuda<true> : nbnxn_kernel_prune_cuda<false>;
+ const auto kernel = plist->haveFreshList ? nbnxn_kernel_prune_cuda<true> : nbnxn_kernel_prune_cuda<false>;
const auto kernelArgs = prepareGpuKernelArguments(kernel, config, adat, nbp, plist, &numParts, &part);
launchGpuKernel(kernel, config, timingEvent, kernelName, kernelArgs);
const int flags,
const Nbnxm::InteractionLocality iloc)
{
- /* OpenCL kernel launch-related stuff */
- cl_kernel nb_kernel = nullptr; /* fn pointer to the nonbonded kernel */
-
cl_atomdata_t *adat = nb->atdat;
cl_nbparam_t *nbp = nb->nbparam;
cl_plist_t *plist = nb->plist[iloc];
t->interaction[iloc].nb_k.openTimingRegion(stream);
}
- /* get the pointer to the kernel flavor we need to use */
- nb_kernel = select_nbnxn_kernel(nb,
- nbp->eeltype,
- nbp->vdwtype,
- bCalcEner,
- (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune));
-
/* kernel launch config */
KernelLaunchConfig config;
auto *timingEvent = bDoTime ? t->interaction[iloc].nb_k.fetchNextEvent() : nullptr;
constexpr char kernelName[] = "k_calc_nb";
+ const auto kernel = select_nbnxn_kernel(nb,
+ nbp->eeltype,
+ nbp->vdwtype,
+ bCalcEner,
+ (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune));
+
+
if (useLjCombRule(nb->nbparam->vdwtype))
{
- const auto kernelArgs = prepareGpuKernelArguments(nb_kernel, config,
+ const auto kernelArgs = prepareGpuKernelArguments(kernel, config,
&nbparams_params, &adat->xq, &adat->f, &adat->e_lj, &adat->e_el, &adat->fshift,
&adat->lj_comb,
&adat->shift_vec, &nbp->nbfp_climg2d, &nbp->nbfp_comb_climg2d, &nbp->coulomb_tab_climg2d,
&plist->sci, &plist->cj4, &plist->excl, &bCalcFshift);
- launchGpuKernel(nb_kernel, config, timingEvent, kernelName, kernelArgs);
+ launchGpuKernel(kernel, config, timingEvent, kernelName, kernelArgs);
}
else
{
- const auto kernelArgs = prepareGpuKernelArguments(nb_kernel, config,
+ const auto kernelArgs = prepareGpuKernelArguments(kernel, config,
&adat->ntypes,
&nbparams_params, &adat->xq, &adat->f, &adat->e_lj, &adat->e_el, &adat->fshift,
&adat->atom_types,
&adat->shift_vec, &nbp->nbfp_climg2d, &nbp->nbfp_comb_climg2d, &nbp->coulomb_tab_climg2d,
&plist->sci, &plist->cj4, &plist->excl, &bCalcFshift);
- launchGpuKernel(nb_kernel, config, timingEvent, kernelName, kernelArgs);
+ launchGpuKernel(kernel, config, timingEvent, kernelName, kernelArgs);
}
if (bDoTime)
* - The 1D block-grid contains as many blocks as super-clusters.
*/
int num_threads_z = getOclPruneKernelJ4Concurrency(nb->dev_info->vendor_e);
- cl_kernel pruneKernel = selectPruneKernel(nb->kernel_pruneonly, plist->haveFreshList);
/* kernel launch config */
KernelLaunchConfig config;
auto *timingEvent = bDoTime ? timer->fetchNextEvent() : nullptr;
constexpr char kernelName[] = "k_pruneonly";
+ const auto pruneKernel = selectPruneKernel(nb->kernel_pruneonly, plist->haveFreshList);
const auto kernelArgs = prepareGpuKernelArguments(pruneKernel, config,
&nbparams_params, &adat->xq, &adat->shift_vec,
&plist->sci, &plist->cj4, &plist->imask, &numParts, &part);