#include <cmath>
+// TODO We would like to move this down, but the way gmx_nbnxn_gpu_t
+// is currently declared means this has to be before gpu_types.h
+#include "nbnxm_ocl_types.h"
+
+// TODO Remove this comment when the above order issue is resolved
#include "gromacs/gpu_utils/gpu_utils.h"
#include "gromacs/gpu_utils/oclutils.h"
#include "gromacs/hardware/gpu_hw_info.h"
#include "gromacs/utility/smalloc.h"
#include "nbnxm_ocl_internal.h"
-#include "nbnxm_ocl_types.h"
+
+namespace Nbnxm
+{
/*! \brief This parameter should be determined heuristically from the
* kernel execution times
else if ((EEL_PME(ic->eeltype) || ic->eeltype == eelEWALD))
{
/* Initially rcoulomb == rvdw, so it's surely not twin cut-off. */
- *gpu_eeltype = nbnxn_gpu_pick_ewald_kernel_type(false);
+ *gpu_eeltype = gpu_pick_ewald_kernel_type(false);
}
else
{
}
//! This function is documented in the header file
-void nbnxn_gpu_pme_loadbal_update_param(const nonbonded_verlet_t *nbv,
- const interaction_const_t *ic,
- const NbnxnListParameters *listParams)
+void gpu_pme_loadbal_update_param(const nonbonded_verlet_t *nbv,
+ const interaction_const_t *ic,
+ const NbnxnListParameters *listParams)
{
- if (!nbv || nbv->grp[0].kernel_type != nbnxnk8x8x8_GPU)
+ if (!nbv || nbv->grp[InteractionLocality::Local].kernel_type != nbnxnk8x8x8_GPU)
{
return;
}
set_cutoff_parameters(nbp, ic, listParams);
- nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(ic->rcoulomb != ic->rvdw);
+ nbp->eeltype = gpu_pick_ewald_kernel_type(ic->rcoulomb != ic->rvdw);
init_ewald_coulomb_force_table(ic, nb->nbparam, nb->dev_rundata);
}
pl->haveFreshList = false;
}
-/*! \brief Initializes the timer data structure.
- */
-static void init_timers(cl_timers_t *t,
- bool bUseTwoStreams)
-{
- for (int i = 0; i <= (bUseTwoStreams ? 1 : 0); i++)
- {
- t->didPairlistH2D[i] = false;
- t->didPrune[i] = false;
- t->didRollingPrune[i] = false;
- }
-}
-
/*! \brief Initializes the timings data structure.
*/
static void init_timings(gmx_wallclock_gpu_nbnxn_t *t)
cl_int cl_error;
cl_atomdata_t * adat = nb->atdat;
- cl_command_queue ls = nb->stream[eintLocal];
+ cl_command_queue ls = nb->stream[InteractionLocality::Local];
size_t local_work_size[3] = {1, 1, 1};
size_t global_work_size[3] = {1, 1, 1};
//! This function is documented in the header file
-void nbnxn_gpu_init(gmx_nbnxn_ocl_t **p_nb,
- const gmx_device_info_t *deviceInfo,
- const interaction_const_t *ic,
- const NbnxnListParameters *listParams,
- const nbnxn_atomdata_t *nbat,
- int rank,
- gmx_bool bLocalAndNonlocal)
+void gpu_init(gmx_nbnxn_ocl_t **p_nb,
+ const gmx_device_info_t *deviceInfo,
+ const interaction_const_t *ic,
+ const NbnxnListParameters *listParams,
+ const nbnxn_atomdata_t *nbat,
+ const int rank,
+ const gmx_bool bLocalAndNonlocal)
{
gmx_nbnxn_ocl_t *nb;
cl_int cl_error;
snew(nb, 1);
snew(nb->atdat, 1);
snew(nb->nbparam, 1);
- snew(nb->plist[eintLocal], 1);
+ snew(nb->plist[InteractionLocality::Local], 1);
if (bLocalAndNonlocal)
{
- snew(nb->plist[eintNonlocal], 1);
+ snew(nb->plist[InteractionLocality::NonLocal], 1);
}
nb->bUseTwoStreams = static_cast<cl_bool>(bLocalAndNonlocal);
pmalloc(reinterpret_cast<void**>(&nb->nbst.e_el), sizeof(*nb->nbst.e_el));
pmalloc(reinterpret_cast<void**>(&nb->nbst.fshift), SHIFTS * sizeof(*nb->nbst.fshift));
- init_plist(nb->plist[eintLocal]);
+ init_plist(nb->plist[InteractionLocality::Local]);
/* OpenCL timing disabled if GMX_DISABLE_GPU_TIMING is defined. */
nb->bDoTime = static_cast<cl_bool>(getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
nbnxn_gpu_create_context(nb->dev_rundata, nb->dev_info, rank);
/* local/non-local GPU streams */
- nb->stream[eintLocal] = clCreateCommandQueue(nb->dev_rundata->context, nb->dev_info->ocl_gpu_id.ocl_device_id, queue_properties, &cl_error);
+ nb->stream[InteractionLocality::Local] =
+ clCreateCommandQueue(nb->dev_rundata->context, nb->dev_info->ocl_gpu_id.ocl_device_id, queue_properties, &cl_error);
if (CL_SUCCESS != cl_error)
{
gmx_fatal(FARGS, "On rank %d failed to create context for GPU #%s: OpenCL error %d",
if (nb->bUseTwoStreams)
{
- init_plist(nb->plist[eintNonlocal]);
+ init_plist(nb->plist[InteractionLocality::NonLocal]);
- nb->stream[eintNonlocal] = clCreateCommandQueue(nb->dev_rundata->context, nb->dev_info->ocl_gpu_id.ocl_device_id, queue_properties, &cl_error);
+ nb->stream[InteractionLocality::NonLocal] =
+ clCreateCommandQueue(nb->dev_rundata->context, nb->dev_info->ocl_gpu_id.ocl_device_id, queue_properties, &cl_error);
if (CL_SUCCESS != cl_error)
{
gmx_fatal(FARGS, "On rank %d failed to create context for GPU #%s: OpenCL error %d",
if (nb->bDoTime)
{
- init_timers(nb->timers, nb->bUseTwoStreams == CL_TRUE);
init_timings(nb->timings);
}
cl_int gmx_used_in_debug cl_error;
cl_atomdata_t *atomData = nb->atdat;
- cl_command_queue ls = nb->stream[eintLocal];
+ cl_command_queue ls = nb->stream[InteractionLocality::Local];
cl_float value = 0.0f;
cl_error = clEnqueueFillBuffer(ls, atomData->f, &value, sizeof(cl_float),
//! This function is documented in the header file
void
-nbnxn_gpu_clear_outputs(gmx_nbnxn_ocl_t *nb,
- int flags)
+gpu_clear_outputs(gmx_nbnxn_ocl_t *nb,
+ const int flags)
{
nbnxn_ocl_clear_f(nb, nb->atdat->natoms);
/* clear shift force array and energies if the outputs were
/* kick off buffer clearing kernel to ensure concurrency with constraints/update */
cl_int gmx_unused cl_error;
- cl_error = clFlush(nb->stream[eintLocal]);
+ cl_error = clFlush(nb->stream[InteractionLocality::Local]);
assert(CL_SUCCESS == cl_error);
}
//! This function is documented in the header file
-void nbnxn_gpu_init_pairlist(gmx_nbnxn_ocl_t *nb,
- const NbnxnPairlistGpu *h_plist,
- int iloc)
+void gpu_init_pairlist(gmx_nbnxn_ocl_t *nb,
+ const NbnxnPairlistGpu *h_plist,
+ const InteractionLocality iloc)
{
char sbuf[STRLEN];
// Timing accumulation should happen only if there was work to do
}
}
+ gpu_timers_t::Interaction &iTimers = nb->timers->interaction[iloc];
+
if (bDoTime)
{
- nb->timers->pl_h2d[iloc].openTimingRegion(stream);
- nb->timers->didPairlistH2D[iloc] = true;
+ iTimers.pl_h2d.openTimingRegion(stream);
+ iTimers.didPairlistH2D = true;
}
// TODO most of this function is same in CUDA and OpenCL, move into the header
&d_plist->nsci, &d_plist->sci_nalloc, context);
copyToDeviceBuffer(&d_plist->sci, h_plist->sci.data(), 0, h_plist->sci.size(),
stream, GpuApiCallBehavior::Async,
- bDoTime ? nb->timers->pl_h2d[iloc].fetchNextEvent() : nullptr);
+ bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
reallocateDeviceBuffer(&d_plist->cj4, h_plist->cj4.size(),
&d_plist->ncj4, &d_plist->cj4_nalloc, context);
copyToDeviceBuffer(&d_plist->cj4, h_plist->cj4.data(), 0, h_plist->cj4.size(),
stream, GpuApiCallBehavior::Async,
- bDoTime ? nb->timers->pl_h2d[iloc].fetchNextEvent() : nullptr);
+ bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
reallocateDeviceBuffer(&d_plist->imask, h_plist->cj4.size()*c_nbnxnGpuClusterpairSplit,
&d_plist->nimask, &d_plist->imask_nalloc, context);
&d_plist->nexcl, &d_plist->excl_nalloc, context);
copyToDeviceBuffer(&d_plist->excl, h_plist->excl.data(), 0, h_plist->excl.size(),
stream, GpuApiCallBehavior::Async,
- bDoTime ? nb->timers->pl_h2d[iloc].fetchNextEvent() : nullptr);
+ bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
if (bDoTime)
{
- nb->timers->pl_h2d[iloc].closeTimingRegion(stream);
+ iTimers.pl_h2d.closeTimingRegion(stream);
}
/* need to prune the pair list during the next step */
}
//! This function is documented in the header file
-void nbnxn_gpu_upload_shiftvec(gmx_nbnxn_ocl_t *nb,
- const nbnxn_atomdata_t *nbatom)
+void gpu_upload_shiftvec(gmx_nbnxn_ocl_t *nb,
+ const nbnxn_atomdata_t *nbatom)
{
cl_atomdata_t *adat = nb->atdat;
- cl_command_queue ls = nb->stream[eintLocal];
+ cl_command_queue ls = nb->stream[InteractionLocality::Local];
/* only if we have a dynamic box */
if (nbatom->bDynamicBox || !adat->bShiftVecUploaded)
}
//! This function is documented in the header file
-void nbnxn_gpu_init_atomdata(gmx_nbnxn_ocl_t *nb,
- const nbnxn_atomdata_t *nbat)
+void gpu_init_atomdata(gmx_nbnxn_ocl_t *nb,
+ const nbnxn_atomdata_t *nbat)
{
cl_int cl_error;
int nalloc, natoms;
bool bDoTime = nb->bDoTime == CL_TRUE;
cl_timers_t *timers = nb->timers;
cl_atomdata_t *d_atdat = nb->atdat;
- cl_command_queue ls = nb->stream[eintLocal];
+ cl_command_queue ls = nb->stream[InteractionLocality::Local];
natoms = nbat->numAtoms();
realloced = false;
}
//! This function is documented in the header file
-void nbnxn_gpu_free(gmx_nbnxn_ocl_t *nb)
+void gpu_free(gmx_nbnxn_ocl_t *nb)
{
if (nb == nullptr)
{
sfree(nb->nbparam);
/* Free plist */
- auto *plist = nb->plist[eintLocal];
+ auto *plist = nb->plist[InteractionLocality::Local];
freeDeviceBuffer(&plist->sci);
freeDeviceBuffer(&plist->cj4);
freeDeviceBuffer(&plist->imask);
sfree(plist);
if (nb->bUseTwoStreams)
{
- auto *plist_nl = nb->plist[eintNonlocal];
+ auto *plist_nl = nb->plist[InteractionLocality::NonLocal];
freeDeviceBuffer(&plist_nl->sci);
freeDeviceBuffer(&plist_nl->cj4);
freeDeviceBuffer(&plist_nl->imask);
nb->nbst.fshift = nullptr;
/* Free command queues */
- clReleaseCommandQueue(nb->stream[eintLocal]);
- nb->stream[eintLocal] = nullptr;
+ clReleaseCommandQueue(nb->stream[InteractionLocality::Local]);
+ nb->stream[InteractionLocality::Local] = nullptr;
if (nb->bUseTwoStreams)
{
- clReleaseCommandQueue(nb->stream[eintNonlocal]);
- nb->stream[eintNonlocal] = nullptr;
+ clReleaseCommandQueue(nb->stream[InteractionLocality::NonLocal]);
+ nb->stream[InteractionLocality::NonLocal] = nullptr;
}
/* Free other events */
if (nb->nonlocal_done)
}
//! This function is documented in the header file
-gmx_wallclock_gpu_nbnxn_t *nbnxn_gpu_get_timings(gmx_nbnxn_ocl_t *nb)
+gmx_wallclock_gpu_nbnxn_t *gpu_get_timings(gmx_nbnxn_ocl_t *nb)
{
return (nb != nullptr && nb->bDoTime) ? nb->timings : nullptr;
}
//! This function is documented in the header file
-void nbnxn_gpu_reset_timings(nonbonded_verlet_t* nbv)
+void gpu_reset_timings(nonbonded_verlet_t* nbv)
{
if (nbv->gpu_nbv && nbv->gpu_nbv->bDoTime)
{
}
//! This function is documented in the header file
-int nbnxn_gpu_min_ci_balanced(gmx_nbnxn_ocl_t *nb)
+int gpu_min_ci_balanced(gmx_nbnxn_ocl_t *nb)
{
return nb != nullptr ?
gpu_min_ci_balanced_factor * nb->dev_info->compute_units : 0;
}
//! This function is documented in the header file
-gmx_bool nbnxn_gpu_is_kernel_ewald_analytical(const gmx_nbnxn_ocl_t *nb)
+gmx_bool gpu_is_kernel_ewald_analytical(const gmx_nbnxn_ocl_t *nb)
{
return ((nb->nbparam->eeltype == eelOclEWALD_ANA) ||
(nb->nbparam->eeltype == eelOclEWALD_ANA_TWIN));
}
+
+} // namespace Nbnxm