src/gromacs/nbnxm/kernels_simd_4xm/kernel_common.h: warning: should include "nbnxm_simd.h"
# This seems to be a false positive
-src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h: error: gmx_nbnxm_gpu_t: is in internal file(s), but appears in public documentation
+src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h: error: NbnxmGpu: is in internal file(s), but appears in public documentation
# Temporary while we change the SIMD implementation
src/gromacs/simd/impl_sparc64_hpc_ace/impl_sparc64_hpc_ace_common.h: warning: should include "simd.h"
void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet& gridSet,
const gmx::AtomLocality locality,
bool fillLocal,
- gmx_nbnxm_gpu_t* gpu_nbv,
+ NbnxmGpu* gpu_nbv,
DeviceBuffer<float> d_x,
GpuEventSynchronizer* xReadyOnDevice)
{
const Nbnxm::GridSet& gridSet,
void* pmeForcesDevice,
gmx::ArrayRef<GpuEventSynchronizer* const> dependencyList,
- gmx_nbnxm_gpu_t* gpu_nbv,
+ NbnxmGpu* gpu_nbv,
bool useGpuFPmeReduction,
bool accumulateForce)
{
class MDLogger;
}
-struct gmx_nbnxm_gpu_t;
+struct NbnxmGpu;
struct nbnxn_atomdata_t;
struct nonbonded_verlet_t;
struct t_mdatoms;
void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet& gridSet,
gmx::AtomLocality locality,
bool fillLocal,
- gmx_nbnxm_gpu_t* gpu_nbv,
+ NbnxmGpu* gpu_nbv,
DeviceBuffer<float> d_x,
GpuEventSynchronizer* xReadyOnDevice);
const Nbnxm::GridSet& gridSet,
void* pmeForcesDevice,
gmx::ArrayRef<GpuEventSynchronizer* const> dependencyList,
- gmx_nbnxm_gpu_t* gpu_nbv,
+ NbnxmGpu* gpu_nbv,
bool useGpuFPmeReduction,
bool accumulateForce);
* the local, this function records the event if called with the local stream as
* argument and inserts in the GPU stream a wait on the event on the nonlocal.
*/
-void nbnxnInsertNonlocalGpuDependency(const gmx_nbnxm_gpu_t* nb, const InteractionLocality interactionLocality)
+void nbnxnInsertNonlocalGpuDependency(const NbnxmGpu* nb, const InteractionLocality interactionLocality)
{
cudaStream_t stream = nb->stream[interactionLocality];
}
/*! \brief Launch asynchronously the xq buffer host to device copy. */
-void gpu_copy_xq_to_gpu(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbatom, const AtomLocality atomLocality)
+void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const AtomLocality atomLocality)
{
GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
the local x+q H2D (and all preceding) tasks are complete and synchronize
with this event in the non-local stream before launching the non-bonded kernel.
*/
-void gpu_launch_kernel(gmx_nbnxm_gpu_t* nb, const gmx::StepWorkload& stepWork, const InteractionLocality iloc)
+void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const InteractionLocality iloc)
{
cu_atomdata_t* adat = nb->atdat;
cu_nbparam_t* nbp = nb->nbparam;
return shmem;
}
-void gpu_launch_kernel_pruneonly(gmx_nbnxm_gpu_t* nb, const InteractionLocality iloc, const int numParts)
+void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, const int numParts)
{
cu_atomdata_t* adat = nb->atdat;
cu_nbparam_t* nbp = nb->nbparam;
}
}
-void gpu_launch_cpyback(gmx_nbnxm_gpu_t* nb,
+void gpu_launch_cpyback(NbnxmGpu* nb,
nbnxn_atomdata_t* nbatom,
const gmx::StepWorkload& stepWork,
const AtomLocality atomLocality)
/* X buffer operations on GPU: performs conversion from rvec to nb format. */
void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid& grid,
bool setFillerCoords,
- gmx_nbnxm_gpu_t* nb,
+ NbnxmGpu* nb,
DeviceBuffer<float> d_x,
GpuEventSynchronizer* xReadyOnDevice,
const Nbnxm::AtomLocality locality,
*/
void nbnxn_gpu_add_nbat_f_to_f(const AtomLocality atomLocality,
DeviceBuffer<float> totalForcesDevice,
- gmx_nbnxm_gpu_t* nb,
+ NbnxmGpu* nb,
void* pmeForcesDevice,
gmx::ArrayRef<GpuEventSynchronizer* const> dependencyList,
int atomStart,
#include <stdio.h>
#include <stdlib.h>
-// TODO We would like to move this down, but the way gmx_nbnxm_gpu_t
+// TODO We would like to move this down, but the way NbnxmGpu
// is currently declared means this has to be before gpu_types.h
#include "nbnxm_cuda_types.h"
static unsigned int gpu_min_ci_balanced_factor = 44;
/* Fw. decl. */
-static void nbnxn_cuda_clear_e_fshift(gmx_nbnxm_gpu_t* nb);
+static void nbnxn_cuda_clear_e_fshift(NbnxmGpu* nb);
/* Fw. decl, */
static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t* nbparam);
}
/*! Initializes simulation constant data. */
-static void cuda_init_const(gmx_nbnxm_gpu_t* nb,
+static void cuda_init_const(NbnxmGpu* nb,
const interaction_const_t* ic,
const PairlistParams& listParams,
const nbnxn_atomdata_t::Params& nbatParams)
nbnxn_cuda_clear_e_fshift(nb);
}
-gmx_nbnxm_gpu_t* gpu_init(const gmx_device_info_t* deviceInfo,
- const interaction_const_t* ic,
- const PairlistParams& listParams,
- const nbnxn_atomdata_t* nbat,
- int /*rank*/,
- gmx_bool bLocalAndNonlocal)
+NbnxmGpu* gpu_init(const gmx_device_info_t* deviceInfo,
+ const interaction_const_t* ic,
+ const PairlistParams& listParams,
+ const nbnxn_atomdata_t* nbat,
+ int /*rank*/,
+ gmx_bool bLocalAndNonlocal)
{
cudaError_t stat;
- gmx_nbnxm_gpu_t* nb;
- snew(nb, 1);
+ auto nb = new NbnxmGpu;
snew(nb->atdat, 1);
snew(nb->nbparam, 1);
snew(nb->plist[InteractionLocality::Local], 1);
return nb;
}
-void gpu_init_pairlist(gmx_nbnxm_gpu_t* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc)
+void gpu_init_pairlist(NbnxmGpu* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc)
{
char sbuf[STRLEN];
bool bDoTime = (nb->bDoTime && !h_plist->sci.empty());
d_plist->haveFreshList = true;
}
-void gpu_upload_shiftvec(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbatom)
+void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom)
{
cu_atomdata_t* adat = nb->atdat;
cudaStream_t ls = nb->stream[InteractionLocality::Local];
}
/*! Clears the first natoms_clear elements of the GPU nonbonded force output array. */
-static void nbnxn_cuda_clear_f(gmx_nbnxm_gpu_t* nb, int natoms_clear)
+static void nbnxn_cuda_clear_f(NbnxmGpu* nb, int natoms_clear)
{
cudaError_t stat;
cu_atomdata_t* adat = nb->atdat;
}
/*! Clears nonbonded shift force output array and energy outputs on the GPU. */
-static void nbnxn_cuda_clear_e_fshift(gmx_nbnxm_gpu_t* nb)
+static void nbnxn_cuda_clear_e_fshift(NbnxmGpu* nb)
{
cudaError_t stat;
cu_atomdata_t* adat = nb->atdat;
CU_RET_ERR(stat, "cudaMemsetAsync on e_el falied");
}
-void gpu_clear_outputs(gmx_nbnxm_gpu_t* nb, bool computeVirial)
+void gpu_clear_outputs(NbnxmGpu* nb, bool computeVirial)
{
nbnxn_cuda_clear_f(nb, nb->atdat->natoms);
/* clear shift force array and energies if the outputs were
}
}
-void gpu_init_atomdata(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbat)
+void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
{
cudaError_t stat;
int nalloc, natoms;
}
}
-void gpu_free(gmx_nbnxm_gpu_t* nb)
+void gpu_free(NbnxmGpu* nb)
{
cudaError_t stat;
cu_atomdata_t* atdat;
sfree(atdat);
sfree(nbparam);
sfree(nb->timings);
- sfree(nb);
+ delete nb;
if (debug)
{
}
//! This function is documented in the header file
-gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(gmx_nbnxm_gpu_t* nb)
+gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(NbnxmGpu* nb)
{
return (nb != nullptr && nb->bDoTime) ? nb->timings : nullptr;
}
}
}
-int gpu_min_ci_balanced(gmx_nbnxm_gpu_t* nb)
+int gpu_min_ci_balanced(NbnxmGpu* nb)
{
return nb != nullptr ? gpu_min_ci_balanced_factor * nb->dev_info->prop.multiProcessorCount : 0;
}
-gmx_bool gpu_is_kernel_ewald_analytical(const gmx_nbnxm_gpu_t* nb)
+gmx_bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb)
{
return ((nb->nbparam->eeltype == eelCuEWALD_ANA) || (nb->nbparam->eeltype == eelCuEWALD_ANA_TWIN));
}
-void* gpu_get_command_stream(gmx_nbnxm_gpu_t* nb, const InteractionLocality iloc)
+void* gpu_get_command_stream(NbnxmGpu* nb, const InteractionLocality iloc)
{
assert(nb);
return static_cast<void*>(&nb->stream[iloc]);
}
-void* gpu_get_xq(gmx_nbnxm_gpu_t* nb)
+void* gpu_get_xq(NbnxmGpu* nb)
{
assert(nb);
return static_cast<void*>(nb->atdat->xq);
}
-void* gpu_get_f(gmx_nbnxm_gpu_t* nb)
+void* gpu_get_f(NbnxmGpu* nb)
{
assert(nb);
return static_cast<void*>(nb->atdat->f);
}
-rvec* gpu_get_fshift(gmx_nbnxm_gpu_t* nb)
+rvec* gpu_get_fshift(NbnxmGpu* nb)
{
assert(nb);
/* Initialization for X buffer operations on GPU. */
/* TODO Remove explicit pinning from host arrays from here and manage in a more natural way*/
-void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet& gridSet, gmx_nbnxm_gpu_t* gpu_nbv)
+void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet& gridSet, NbnxmGpu* gpu_nbv)
{
cudaStream_t stream = gpu_nbv->stream[InteractionLocality::Local];
bool bDoTime = gpu_nbv->bDoTime;
/* Initialization for F buffer operations on GPU. */
void nbnxn_gpu_init_add_nbat_f_to_f(const int* cell,
- gmx_nbnxm_gpu_t* gpu_nbv,
+ NbnxmGpu* gpu_nbv,
int natoms_total,
GpuEventSynchronizer* const localReductionDone)
{
/*! \cond */
typedef struct cu_atomdata cu_atomdata_t;
typedef struct cu_nbparam cu_nbparam_t;
-typedef struct nb_staging nb_staging_t;
/*! \endcond */
* The energies/shift forces get downloaded here first, before getting added
* to the CPU-side aggregate values.
*/
-struct nb_staging
+struct nb_staging_t
{
//! LJ energy
- float* e_lj;
+ float* e_lj = nullptr;
//! electrostatic energy
- float* e_el;
+ float* e_el = nullptr;
//! shift forces
- float3* fshift;
+ float3* fshift = nullptr;
};
/** \internal
/*! \internal
* \brief Main data structure for CUDA nonbonded force calculations.
*/
-struct gmx_nbnxm_gpu_t
+struct NbnxmGpu
{
/*! \brief CUDA device information */
- const gmx_device_info_t* dev_info;
+ const gmx_device_info_t* dev_info = nullptr;
/*! \brief true if doing both local/non-local NB work on GPU */
- bool bUseTwoStreams;
+ bool bUseTwoStreams = false;
/*! \brief atom data */
- cu_atomdata_t* atdat;
+ cu_atomdata_t* atdat = nullptr;
/*! \brief f buf ops cell index mapping */
- int* cell;
+ int* cell = nullptr;
/*! \brief number of indices in cell buffer */
- int ncell;
+ int ncell = 0;
/*! \brief number of indices allocated in cell buffer */
- int ncell_alloc;
+ int ncell_alloc = 0;
/*! \brief array of atom indices */
- int* atomIndices;
+ int* atomIndices = nullptr;
/*! \brief size of atom indices */
- int atomIndicesSize;
+ int atomIndicesSize = 0;
/*! \brief size of atom indices allocated in device buffer */
- int atomIndicesSize_alloc;
+ int atomIndicesSize_alloc = 0;
/*! \brief x buf ops num of atoms */
- int* cxy_na;
+ int* cxy_na = nullptr;
/*! \brief number of elements in cxy_na */
- int ncxy_na;
+ int ncxy_na = 0;
/*! \brief number of elements allocated allocated in device buffer */
- int ncxy_na_alloc;
+ int ncxy_na_alloc = 0;
/*! \brief x buf ops cell index mapping */
- int* cxy_ind;
+ int* cxy_ind = nullptr;
/*! \brief number of elements in cxy_ind */
- int ncxy_ind;
+ int ncxy_ind = 0;
/*! \brief number of elements allocated allocated in device buffer */
- int ncxy_ind_alloc;
+ int ncxy_ind_alloc = 0;
/*! \brief parameters required for the non-bonded calc. */
- cu_nbparam_t* nbparam;
+ cu_nbparam_t* nbparam = nullptr;
/*! \brief pair-list data structures (local and non-local) */
- gmx::EnumerationArray<Nbnxm::InteractionLocality, cu_plist_t*> plist;
+ gmx::EnumerationArray<Nbnxm::InteractionLocality, cu_plist_t*> plist = { { nullptr } };
/*! \brief staging area where fshift/energies get downloaded */
nb_staging_t nbst;
/*! \brief local and non-local GPU streams */
- gmx::EnumerationArray<Nbnxm::InteractionLocality, cudaStream_t> stream;
+ gmx::EnumerationArray<Nbnxm::InteractionLocality, cudaStream_t> stream = { { nullptr } };
/*! \brief Events used for synchronization */
/*! \{ */
/*! \brief Event triggered when the non-local non-bonded
* kernel is done (and the local transfer can proceed) */
- cudaEvent_t nonlocal_done;
+ cudaEvent_t nonlocal_done = nullptr;
/*! \brief Event triggered when the tasks issued in the local
* stream that need to precede the non-local force or buffer
* operation calculations are done (e.g. f buffer 0-ing, local
* x/q H2D, buffer op initialization in local stream that is
* required also by nonlocal stream ) */
- cudaEvent_t misc_ops_and_local_H2D_done;
+ cudaEvent_t misc_ops_and_local_H2D_done = nullptr;
/*! \} */
/*! \brief True if there is work for the current domain in the
* domain. As long as bonded work is not split up into
* local/nonlocal, if there is bonded GPU work, both flags
* will be true. */
- gmx::EnumerationArray<Nbnxm::InteractionLocality, bool> haveWork;
+ gmx::EnumerationArray<Nbnxm::InteractionLocality, bool> haveWork = { { false } };
/*! \brief Pointer to event synchronizer triggered when the local
* GPU buffer ops / reduction is complete
* \note That the synchronizer is managed outside of this module
* in StatePropagatorDataGpu.
*/
- GpuEventSynchronizer* localFReductionDone;
+ GpuEventSynchronizer* localFReductionDone = nullptr;
/*! \brief Event triggered when non-local coordinate buffer
* has been copied from device to host. */
- GpuEventSynchronizer* xNonLocalCopyD2HDone;
+ GpuEventSynchronizer* xNonLocalCopyD2HDone = nullptr;
/* NOTE: With current CUDA versions (<=5.0) timing doesn't work with multiple
* concurrent streams, so we won't time if both l/nl work is done on GPUs.
* Timer init/uninit is still done even with timing off so only the condition
* setting bDoTime needs to be change if this CUDA "feature" gets fixed. */
/*! \brief True if event-based timing is enabled. */
- bool bDoTime;
+ bool bDoTime = false;
/*! \brief CUDA event-based timers. */
- cu_timers_t* timers;
+ cu_timers_t* timers = nullptr;
/*! \brief Timing data. TODO: deprecate this and query timers for accumulated data instead */
- gmx_wallclock_gpu_nbnxn_t* timings;
+ gmx_wallclock_gpu_nbnxn_t* timings = nullptr;
};
#endif /* NBNXN_CUDA_TYPES_H */
//NOLINTNEXTLINE(misc-definitions-in-headers)
-void setupGpuShortRangeWork(gmx_nbnxm_gpu_t* nb,
- const gmx::GpuBonded* gpuBonded,
- const gmx::InteractionLocality iLocality)
+void setupGpuShortRangeWork(NbnxmGpu* nb, const gmx::GpuBonded* gpuBonded, const gmx::InteractionLocality iLocality)
{
GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
* \param[inout] nb Pointer to the nonbonded GPU data structure
* \param[in] iLocality Interaction locality identifier
*/
-static bool haveGpuShortRangeWork(const gmx_nbnxm_gpu_t& nb, const gmx::InteractionLocality iLocality)
+static bool haveGpuShortRangeWork(const NbnxmGpu& nb, const gmx::InteractionLocality iLocality)
{
return nb.haveWork[iLocality];
}
//NOLINTNEXTLINE(misc-definitions-in-headers)
-bool haveGpuShortRangeWork(const gmx_nbnxm_gpu_t* nb, const gmx::AtomLocality aLocality)
+bool haveGpuShortRangeWork(const NbnxmGpu* nb, const gmx::AtomLocality aLocality)
{
GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
* \todo Move into shared source file with gmx_compile_cpp_as_cuda
*/
//NOLINTNEXTLINE(misc-definitions-in-headers)
-bool gpu_try_finish_task(gmx_nbnxm_gpu_t* nb,
+bool gpu_try_finish_task(NbnxmGpu* nb,
const gmx::StepWorkload& stepWork,
const AtomLocality aloc,
real* e_lj,
* \return The number of cycles the gpu wait took
*/
//NOLINTNEXTLINE(misc-definitions-in-headers) TODO: move into source file
-float gpu_wait_finish_task(gmx_nbnxm_gpu_t* nb,
+float gpu_wait_finish_task(NbnxmGpu* nb,
const gmx::StepWorkload& stepWork,
AtomLocality aloc,
real* e_lj,
* local part of the force array also depends on the non-local kernel.
* The skip of the local kernel is taken care of separately.
*/
-static inline bool canSkipNonbondedWork(const gmx_nbnxm_gpu_t& nb, InteractionLocality iloc)
+static inline bool canSkipNonbondedWork(const NbnxmGpu& nb, InteractionLocality iloc)
{
assert(nb.plist[iloc]);
return (iloc == InteractionLocality::NonLocal && nb.plist[iloc]->nsci == 0);
#include "gromacs/mdtypes/interaction_const.h"
#include "gromacs/mdtypes/locality.h"
-struct gmx_nbnxm_gpu_t;
+struct NbnxmGpu;
struct gmx_gpu_info_t;
struct gmx_device_info_t;
struct gmx_wallclock_gpu_nbnxn_t;
/** Initializes the data structures related to GPU nonbonded calculations. */
GPU_FUNC_QUALIFIER
-gmx_nbnxm_gpu_t* gpu_init(const gmx_device_info_t gmx_unused* deviceInfo,
- const interaction_const_t gmx_unused* ic,
- const PairlistParams gmx_unused& listParams,
- const nbnxn_atomdata_t gmx_unused* nbat,
- int gmx_unused rank,
- /* true if both local and non-local are done on GPU */
- gmx_bool gmx_unused bLocalAndNonlocal) GPU_FUNC_TERM_WITH_RETURN(nullptr);
+NbnxmGpu* gpu_init(const gmx_device_info_t gmx_unused* deviceInfo,
+ const interaction_const_t gmx_unused* ic,
+ const PairlistParams gmx_unused& listParams,
+ const nbnxn_atomdata_t gmx_unused* nbat,
+ int gmx_unused rank,
+ /* true if both local and non-local are done on GPU */
+ gmx_bool gmx_unused bLocalAndNonlocal) GPU_FUNC_TERM_WITH_RETURN(nullptr);
/** Initializes pair-list data for GPU, called at every pair search step. */
GPU_FUNC_QUALIFIER
-void gpu_init_pairlist(gmx_nbnxm_gpu_t gmx_unused* nb,
+void gpu_init_pairlist(NbnxmGpu gmx_unused* nb,
const struct NbnxnPairlistGpu gmx_unused* h_nblist,
gmx::InteractionLocality gmx_unused iloc) GPU_FUNC_TERM;
/** Initializes atom-data on the GPU, called at every pair search step. */
GPU_FUNC_QUALIFIER
-void gpu_init_atomdata(gmx_nbnxm_gpu_t gmx_unused* nb, const nbnxn_atomdata_t gmx_unused* nbat) GPU_FUNC_TERM;
+void gpu_init_atomdata(NbnxmGpu gmx_unused* nb, const nbnxn_atomdata_t gmx_unused* nbat) GPU_FUNC_TERM;
/*! \brief Re-generate the GPU Ewald force table, resets rlist, and update the
* electrostatic type switching to twin cut-off (or back) if needed.
/** Uploads shift vector to the GPU if the box is dynamic (otherwise just returns). */
GPU_FUNC_QUALIFIER
-void gpu_upload_shiftvec(gmx_nbnxm_gpu_t gmx_unused* nb, const nbnxn_atomdata_t gmx_unused* nbatom) GPU_FUNC_TERM;
+void gpu_upload_shiftvec(NbnxmGpu gmx_unused* nb, const nbnxn_atomdata_t gmx_unused* nbatom) GPU_FUNC_TERM;
/** Clears GPU outputs: nonbonded force, shift force and energy. */
GPU_FUNC_QUALIFIER
-void gpu_clear_outputs(gmx_nbnxm_gpu_t gmx_unused* nb, bool gmx_unused computeVirial) GPU_FUNC_TERM;
+void gpu_clear_outputs(NbnxmGpu gmx_unused* nb, bool gmx_unused computeVirial) GPU_FUNC_TERM;
/** Frees all GPU resources used for the nonbonded calculations. */
GPU_FUNC_QUALIFIER
-void gpu_free(gmx_nbnxm_gpu_t gmx_unused* nb) GPU_FUNC_TERM;
+void gpu_free(NbnxmGpu gmx_unused* nb) GPU_FUNC_TERM;
/** Returns the GPU timings structure or NULL if GPU is not used or timing is off. */
GPU_FUNC_QUALIFIER
-struct gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(gmx_nbnxm_gpu_t gmx_unused* nb)
+struct gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(NbnxmGpu gmx_unused* nb)
GPU_FUNC_TERM_WITH_RETURN(nullptr);
/** Resets nonbonded GPU timings. */
/** Calculates the minimum size of proximity lists to improve SM load balance
* with GPU non-bonded kernels. */
GPU_FUNC_QUALIFIER
-int gpu_min_ci_balanced(gmx_nbnxm_gpu_t gmx_unused* nb) GPU_FUNC_TERM_WITH_RETURN(-1);
+int gpu_min_ci_balanced(NbnxmGpu gmx_unused* nb) GPU_FUNC_TERM_WITH_RETURN(-1);
/** Returns if analytical Ewald GPU kernels are used. */
GPU_FUNC_QUALIFIER
-gmx_bool gpu_is_kernel_ewald_analytical(const gmx_nbnxm_gpu_t gmx_unused* nb)
- GPU_FUNC_TERM_WITH_RETURN(FALSE);
+gmx_bool gpu_is_kernel_ewald_analytical(const NbnxmGpu gmx_unused* nb) GPU_FUNC_TERM_WITH_RETURN(FALSE);
/** Returns an opaque pointer to the GPU command stream
* Note: CUDA only.
*/
CUDA_FUNC_QUALIFIER
-void* gpu_get_command_stream(gmx_nbnxm_gpu_t gmx_unused* nb, gmx::InteractionLocality gmx_unused iloc)
+void* gpu_get_command_stream(NbnxmGpu gmx_unused* nb, gmx::InteractionLocality gmx_unused iloc)
CUDA_FUNC_TERM_WITH_RETURN(nullptr);
/** Returns an opaque pointer to the GPU coordinate+charge array
* Note: CUDA only.
*/
CUDA_FUNC_QUALIFIER
-void* gpu_get_xq(gmx_nbnxm_gpu_t gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
+void* gpu_get_xq(NbnxmGpu gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
/** Returns an opaque pointer to the GPU force array
* Note: CUDA only.
*/
CUDA_FUNC_QUALIFIER
-void* gpu_get_f(gmx_nbnxm_gpu_t gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
+void* gpu_get_f(NbnxmGpu gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
/** Returns an opaque pointer to the GPU shift force array
* Note: CUDA only.
*/
CUDA_FUNC_QUALIFIER
-rvec* gpu_get_fshift(gmx_nbnxm_gpu_t gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
+rvec* gpu_get_fshift(NbnxmGpu gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
} // namespace Nbnxm
#include "gromacs/utility/basedefinitions.h"
-struct gmx_nbnxm_gpu_t;
+struct NbnxmGpu;
/*! \brief Handles any JIT compilation of nbnxn kernels for the selected device */
-OPENCL_FUNC_QUALIFIER void nbnxn_gpu_compile_kernels(gmx_nbnxm_gpu_t gmx_unused* nb) OPENCL_FUNC_TERM;
+OPENCL_FUNC_QUALIFIER void nbnxn_gpu_compile_kernels(NbnxmGpu gmx_unused* nb) OPENCL_FUNC_TERM;
#endif
struct gmx_enerdata_t;
struct gmx_hw_info_t;
struct gmx_mtop_t;
-struct gmx_nbnxm_gpu_t;
+struct NbnxmGpu;
struct gmx_wallcycle;
struct interaction_const_t;
struct nbnxn_atomdata_t;
std::unique_ptr<PairSearch> pairSearch,
std::unique_ptr<nbnxn_atomdata_t> nbat,
const Nbnxm::KernelSetup& kernelSetup,
- gmx_nbnxm_gpu_t* gpu_nbv,
+ NbnxmGpu* gpu_nbv,
gmx_wallcycle* wcycle);
~nonbonded_verlet_t();
public:
//! GPU Nbnxm data, only used with a physical GPU (TODO: use unique_ptr)
- gmx_nbnxm_gpu_t* gpu_nbv;
+ NbnxmGpu* gpu_nbv;
};
namespace Nbnxm
* \param [in] aloc Atom locality flag.
*/
GPU_FUNC_QUALIFIER
-void gpu_copy_xq_to_gpu(gmx_nbnxm_gpu_t gmx_unused* nb,
+void gpu_copy_xq_to_gpu(NbnxmGpu gmx_unused* nb,
const struct nbnxn_atomdata_t gmx_unused* nbdata,
gmx::AtomLocality gmx_unused aloc) GPU_FUNC_TERM;
*
*/
GPU_FUNC_QUALIFIER
-void gpu_launch_kernel(gmx_nbnxm_gpu_t gmx_unused* nb,
+void gpu_launch_kernel(NbnxmGpu gmx_unused* nb,
const gmx::StepWorkload gmx_unused& stepWork,
gmx::InteractionLocality gmx_unused iloc) GPU_FUNC_TERM;
* \param [in] numParts Number of parts the pair list is split into in the rolling kernel.
*/
GPU_FUNC_QUALIFIER
-void gpu_launch_kernel_pruneonly(gmx_nbnxm_gpu_t gmx_unused* nb,
+void gpu_launch_kernel_pruneonly(NbnxmGpu gmx_unused* nb,
gmx::InteractionLocality gmx_unused iloc,
int gmx_unused numParts) GPU_FUNC_TERM;
* (and energies/shift forces if required).
*/
GPU_FUNC_QUALIFIER
-void gpu_launch_cpyback(gmx_nbnxm_gpu_t gmx_unused* nb,
+void gpu_launch_cpyback(NbnxmGpu gmx_unused* nb,
nbnxn_atomdata_t gmx_unused* nbatom,
const gmx::StepWorkload gmx_unused& stepWork,
gmx::AtomLocality gmx_unused aloc) GPU_FUNC_TERM;
* \returns True if the nonbonded tasks associated with \p aloc locality have completed
*/
GPU_FUNC_QUALIFIER
-bool gpu_try_finish_task(gmx_nbnxm_gpu_t gmx_unused* nb,
+bool gpu_try_finish_task(NbnxmGpu gmx_unused* nb,
const gmx::StepWorkload gmx_unused& stepWork,
gmx::AtomLocality gmx_unused aloc,
real gmx_unused* e_lj,
* \param[out] shiftForces Shift forces buffer to accumulate into
* \param[out] wcycle Pointer to wallcycle data structure */
GPU_FUNC_QUALIFIER
-float gpu_wait_finish_task(gmx_nbnxm_gpu_t gmx_unused* nb,
+float gpu_wait_finish_task(NbnxmGpu gmx_unused* nb,
const gmx::StepWorkload gmx_unused& stepWork,
gmx::AtomLocality gmx_unused aloc,
real gmx_unused* e_lj,
* Called on the NS step and performs (re-)allocations and memory copies. !*/
CUDA_FUNC_QUALIFIER
void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet gmx_unused& gridSet,
- gmx_nbnxm_gpu_t gmx_unused* gpu_nbv) CUDA_FUNC_TERM;
+ NbnxmGpu gmx_unused* gpu_nbv) CUDA_FUNC_TERM;
/*! \brief X buffer operations on GPU: performs conversion from rvec to nb format.
*
CUDA_FUNC_QUALIFIER
void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid gmx_unused& grid,
bool gmx_unused setFillerCoords,
- gmx_nbnxm_gpu_t gmx_unused* gpu_nbv,
+ NbnxmGpu gmx_unused* gpu_nbv,
DeviceBuffer<float> gmx_unused d_x,
GpuEventSynchronizer gmx_unused* xReadyOnDevice,
gmx::AtomLocality gmx_unused locality,
* \param[in] interactionLocality Local or NonLocal sync point
*/
CUDA_FUNC_QUALIFIER
-void nbnxnInsertNonlocalGpuDependency(const gmx_nbnxm_gpu_t gmx_unused* nb,
+void nbnxnInsertNonlocalGpuDependency(const NbnxmGpu gmx_unused* nb,
gmx::InteractionLocality gmx_unused interactionLocality) CUDA_FUNC_TERM;
/*! \brief Set up internal flags that indicate what type of short-range work there is.
* \param[in] iLocality Interaction locality identifier
*/
GPU_FUNC_QUALIFIER
-void setupGpuShortRangeWork(gmx_nbnxm_gpu_t gmx_unused* nb,
+void setupGpuShortRangeWork(NbnxmGpu gmx_unused* nb,
const gmx::GpuBonded gmx_unused* gpuBonded,
gmx::InteractionLocality gmx_unused iLocality) GPU_FUNC_TERM;
* \param[in] aLocality Atom locality identifier
*/
GPU_FUNC_QUALIFIER
-bool haveGpuShortRangeWork(const gmx_nbnxm_gpu_t gmx_unused* nb, gmx::AtomLocality gmx_unused aLocality)
+bool haveGpuShortRangeWork(const NbnxmGpu gmx_unused* nb, gmx::AtomLocality gmx_unused aLocality)
GPU_FUNC_TERM_WITH_RETURN(false);
/*! \brief Initialization for F buffer operations on GPU */
CUDA_FUNC_QUALIFIER
void nbnxn_gpu_init_add_nbat_f_to_f(const int gmx_unused* cell,
- gmx_nbnxm_gpu_t gmx_unused* gpu_nbv,
+ NbnxmGpu gmx_unused* gpu_nbv,
int gmx_unused natoms_total,
GpuEventSynchronizer gmx_unused* localReductionDone) CUDA_FUNC_TERM;
CUDA_FUNC_QUALIFIER
void nbnxn_gpu_add_nbat_f_to_f(gmx::AtomLocality gmx_unused atomLocality,
DeviceBuffer<float> gmx_unused totalForcesDevice,
- gmx_nbnxm_gpu_t gmx_unused* gpu_nbv,
+ NbnxmGpu gmx_unused* gpu_nbv,
void gmx_unused* pmeForcesDevice,
gmx::ArrayRef<GpuEventSynchronizer* const> gmx_unused dependencyList,
int gmx_unused atomStart,
* \param[in] nb The nonbonded data GPU structure
*/
CUDA_FUNC_QUALIFIER
-void nbnxn_wait_x_on_device(gmx_nbnxm_gpu_t gmx_unused* nb) CUDA_FUNC_TERM;
+void nbnxn_wait_x_on_device(NbnxmGpu gmx_unused* nb) CUDA_FUNC_TERM;
} // namespace Nbnxm
#endif
{
/*! \brief Gets and returns the minimum i-list count for balacing based on the GPU used or env.var. when set */
-static int getMinimumIlistCountForGpuBalancing(gmx_nbnxm_gpu_t* nbnxmGpu)
+static int getMinimumIlistCountForGpuBalancing(NbnxmGpu* nbnxmGpu)
{
int minimumIlistCount;
fr->nbfp, mimimumNumEnergyGroupNonbonded,
(useGpu || emulateGpu) ? 1 : gmx_omp_nthreads_get(emntNonbonded));
- gmx_nbnxm_gpu_t* gpu_nbv = nullptr;
- int minimumIlistCountForGpuBalancing = 0;
+ NbnxmGpu* gpu_nbv = nullptr;
+ int minimumIlistCountForGpuBalancing = 0;
if (useGpu)
{
/* init the NxN GPU data; the last argument tells whether we'll have
std::unique_ptr<PairSearch> pairSearch,
std::unique_ptr<nbnxn_atomdata_t> nbat_in,
const Nbnxm::KernelSetup& kernelSetup,
- gmx_nbnxm_gpu_t* gpu_nbv_ptr,
+ NbnxmGpu* gpu_nbv_ptr,
gmx_wallcycle* wcycle) :
pairlistSets_(std::move(pairlistSets)),
pairSearch_(std::move(pairSearch)),
* OpenCL kernel objects are cached in nb. If the requested kernel is not
* found in the cache, it will be created and the cache will be updated.
*/
-static inline cl_kernel select_nbnxn_kernel(gmx_nbnxm_gpu_t* nb, int eeltype, int evdwtype, bool bDoEne, bool bDoPrune)
+static inline cl_kernel select_nbnxn_kernel(NbnxmGpu* nb, int eeltype, int evdwtype, bool bDoEne, bool bDoPrune)
{
const char* kernel_name_to_run;
cl_kernel* kernel_ptr;
}
/*! \brief Launch asynchronously the xq buffer host to device copy. */
-void gpu_copy_xq_to_gpu(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbatom, const AtomLocality atomLocality)
+void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const AtomLocality atomLocality)
{
GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
misc_ops_done event to record the point in time when the above operations
are finished and synchronize with this event in the non-local stream.
*/
-void gpu_launch_kernel(gmx_nbnxm_gpu_t* nb, const gmx::StepWorkload& stepWork, const Nbnxm::InteractionLocality iloc)
+void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const Nbnxm::InteractionLocality iloc)
{
cl_atomdata_t* adat = nb->atdat;
cl_nbparam_t* nbp = nb->nbparam;
* Launch the pairlist prune only kernel for the given locality.
* \p numParts tells in how many parts, i.e. calls the list will be pruned.
*/
-void gpu_launch_kernel_pruneonly(gmx_nbnxm_gpu_t* nb, const InteractionLocality iloc, const int numParts)
+void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, const int numParts)
{
cl_atomdata_t* adat = nb->atdat;
cl_nbparam_t* nbp = nb->nbparam;
* Launch asynchronously the download of nonbonded forces from the GPU
* (and energies/shift forces if required).
*/
-void gpu_launch_cpyback(gmx_nbnxm_gpu_t* nb,
+void gpu_launch_cpyback(NbnxmGpu* nb,
struct nbnxn_atomdata_t* nbatom,
const gmx::StepWorkload& stepWork,
const AtomLocality aloc)
/* An element of the fshift device buffer has the same size as one element
of the host side fshift buffer. */
- ad->fshift_elem_size = sizeof(*cl_nb_staging_t::fshift);
+ ad->fshift_elem_size = sizeof(*nb_staging_t::fshift);
ad->fshift = clCreateBuffer(runData->context, CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
SHIFTS * ad->fshift_elem_size, nullptr, &cl_error);
{
return;
}
- gmx_nbnxm_gpu_t* nb = nbv->gpu_nbv;
- cl_nbparam_t* nbp = nb->nbparam;
+ NbnxmGpu* nb = nbv->gpu_nbv;
+ cl_nbparam_t* nbp = nb->nbparam;
set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params());
}
/*! \brief Initializes the OpenCL kernel pointers of the nbnxn_ocl_ptr_t input data structure. */
-static cl_kernel nbnxn_gpu_create_kernel(gmx_nbnxm_gpu_t* nb, const char* kernel_name)
+static cl_kernel nbnxn_gpu_create_kernel(NbnxmGpu* nb, const char* kernel_name)
{
cl_kernel kernel;
cl_int cl_error;
/*! \brief Clears nonbonded shift force output array and energy outputs on the GPU.
*/
-static void nbnxn_ocl_clear_e_fshift(gmx_nbnxm_gpu_t* nb)
+static void nbnxn_ocl_clear_e_fshift(NbnxmGpu* nb)
{
cl_int cl_error;
}
/*! \brief Initializes the OpenCL kernel pointers of the nbnxn_ocl_ptr_t input data structure. */
-static void nbnxn_gpu_init_kernels(gmx_nbnxm_gpu_t* nb)
+static void nbnxn_gpu_init_kernels(NbnxmGpu* nb)
{
/* Init to 0 main kernel arrays */
/* They will be later on initialized in select_nbnxn_kernel */
* Initializes members of the atomdata and nbparam structs and
* clears e/fshift output buffers.
*/
-static void nbnxn_ocl_init_const(gmx_nbnxm_gpu_t* nb,
+static void nbnxn_ocl_init_const(NbnxmGpu* nb,
const interaction_const_t* ic,
const PairlistParams& listParams,
const nbnxn_atomdata_t::Params& nbatParams)
//! This function is documented in the header file
-gmx_nbnxm_gpu_t* gpu_init(const gmx_device_info_t* deviceInfo,
- const interaction_const_t* ic,
- const PairlistParams& listParams,
- const nbnxn_atomdata_t* nbat,
- const int rank,
- const gmx_bool bLocalAndNonlocal)
+NbnxmGpu* gpu_init(const gmx_device_info_t* deviceInfo,
+ const interaction_const_t* ic,
+ const PairlistParams& listParams,
+ const nbnxn_atomdata_t* nbat,
+ const int rank,
+ const gmx_bool bLocalAndNonlocal)
{
- gmx_nbnxm_gpu_t* nb;
cl_int cl_error;
cl_command_queue_properties queue_properties;
assert(ic);
- snew(nb, 1);
+ auto nb = new NbnxmGpu;
snew(nb->atdat, 1);
snew(nb->nbparam, 1);
snew(nb->plist[InteractionLocality::Local], 1);
/*! \brief Clears the first natoms_clear elements of the GPU nonbonded force output array.
*/
-static void nbnxn_ocl_clear_f(gmx_nbnxm_gpu_t* nb, int natoms_clear)
+static void nbnxn_ocl_clear_f(NbnxmGpu* nb, int natoms_clear)
{
if (natoms_clear == 0)
{
}
//! This function is documented in the header file
-void gpu_clear_outputs(gmx_nbnxm_gpu_t* nb, bool computeVirial)
+void gpu_clear_outputs(NbnxmGpu* nb, bool computeVirial)
{
nbnxn_ocl_clear_f(nb, nb->atdat->natoms);
/* clear shift force array and energies if the outputs were
}
//! This function is documented in the header file
-void gpu_init_pairlist(gmx_nbnxm_gpu_t* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc)
+void gpu_init_pairlist(NbnxmGpu* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc)
{
char sbuf[STRLEN];
// Timing accumulation should happen only if there was work to do
}
//! This function is documented in the header file
-void gpu_upload_shiftvec(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbatom)
+void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom)
{
cl_atomdata_t* adat = nb->atdat;
cl_command_queue ls = nb->stream[InteractionLocality::Local];
}
//! This function is documented in the header file
-void gpu_init_atomdata(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbat)
+void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
{
cl_int cl_error;
int nalloc, natoms;
}
//! This function is documented in the header file
-void gpu_free(gmx_nbnxm_gpu_t* nb)
+void gpu_free(NbnxmGpu* nb)
{
if (nb == nullptr)
{
/* Free timers and timings */
delete nb->timers;
sfree(nb->timings);
- sfree(nb);
+ delete nb;
if (debug)
{
}
//! This function is documented in the header file
-gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(gmx_nbnxm_gpu_t* nb)
+gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(NbnxmGpu* nb)
{
return (nb != nullptr && nb->bDoTime) ? nb->timings : nullptr;
}
}
//! This function is documented in the header file
-int gpu_min_ci_balanced(gmx_nbnxm_gpu_t* nb)
+int gpu_min_ci_balanced(NbnxmGpu* nb)
{
return nb != nullptr ? gpu_min_ci_balanced_factor * nb->dev_info->compute_units : 0;
}
//! This function is documented in the header file
-gmx_bool gpu_is_kernel_ewald_analytical(const gmx_nbnxm_gpu_t* nb)
+gmx_bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb)
{
return ((nb->nbparam->eeltype == eelOclEWALD_ANA) || (nb->nbparam->eeltype == eelOclEWALD_ANA_TWIN));
}
*
* Does not throw
*/
-void nbnxn_gpu_compile_kernels(gmx_nbnxm_gpu_t* nb)
+void nbnxn_gpu_compile_kernels(NbnxmGpu* nb)
{
gmx_bool bFastGen = TRUE;
cl_program program = nullptr;
#include "nbnxm_ocl_consts.h"
+struct gmx_wallclock_gpu_nbnxn_t;
+
/* kernel does #include "gromacs/math/utilities.h" */
/* Move the actual useful stuff here: */
* The energies/shift forces get downloaded here first, before getting added
* to the CPU-side aggregate values.
*/
-typedef struct cl_nb_staging
+struct nb_staging_t
{
//! LJ energy
- float* e_lj;
+ float* e_lj = nullptr;
//! electrostatic energy
- float* e_el;
+ float* e_el = nullptr;
//! float3 buffer with shift forces
- float (*fshift)[3];
-} cl_nb_staging_t;
+ float (*fshift)[3] = nullptr;
+};
/*! \internal
* \brief Nonbonded atom data - both inputs and outputs.
size_t shift_vec_elem_size;
//! true if the shift vector has been uploaded
- cl_bool bShiftVecUploaded;
+ bool bShiftVecUploaded;
} cl_atomdata_t;
/*! \internal
/*! \internal
* \brief Main data structure for OpenCL nonbonded force calculations.
*/
-struct gmx_nbnxm_gpu_t
+struct NbnxmGpu
{
//! OpenCL device information
- const gmx_device_info_t* dev_info;
+ const gmx_device_info_t* dev_info = nullptr;
//! OpenCL runtime data (context, kernels)
- struct gmx_device_runtime_data_t* dev_rundata;
+ struct gmx_device_runtime_data_t* dev_rundata = nullptr;
/**< Pointers to non-bonded kernel functions
* organized similar with nb_kfunc_xxx arrays in nbnxn_ocl.cpp */
///@{
- cl_kernel kernel_noener_noprune_ptr[eelOclNR][evdwOclNR];
- cl_kernel kernel_ener_noprune_ptr[eelOclNR][evdwOclNR];
- cl_kernel kernel_noener_prune_ptr[eelOclNR][evdwOclNR];
- cl_kernel kernel_ener_prune_ptr[eelOclNR][evdwOclNR];
+ cl_kernel kernel_noener_noprune_ptr[eelOclNR][evdwOclNR] = { { nullptr } };
+ cl_kernel kernel_ener_noprune_ptr[eelOclNR][evdwOclNR] = { { nullptr } };
+ cl_kernel kernel_noener_prune_ptr[eelOclNR][evdwOclNR] = { { nullptr } };
+ cl_kernel kernel_ener_prune_ptr[eelOclNR][evdwOclNR] = { { nullptr } };
///@}
//! prune kernels, ePruneKind defined the kernel kinds
- cl_kernel kernel_pruneonly[ePruneNR];
+ cl_kernel kernel_pruneonly[ePruneNR] = { nullptr };
//! true if prefetching fg i-atom LJ parameters should be used in the kernels
- bool bPrefetchLjParam;
+ bool bPrefetchLjParam = false;
/**< auxiliary kernels implementing memset-like functions */
///@{
- cl_kernel kernel_memset_f;
- cl_kernel kernel_memset_f2;
- cl_kernel kernel_memset_f3;
- cl_kernel kernel_zero_e_fshift;
+ cl_kernel kernel_memset_f = nullptr;
+ cl_kernel kernel_memset_f2 = nullptr;
+ cl_kernel kernel_memset_f3 = nullptr;
+ cl_kernel kernel_zero_e_fshift = nullptr;
///@}
//! true if doing both local/non-local NB work on GPU
- cl_bool bUseTwoStreams;
+ bool bUseTwoStreams = false;
//! true indicates that the nonlocal_done event was enqueued
- cl_bool bNonLocalStreamActive;
+ bool bNonLocalStreamActive = false;
//! atom data
- cl_atomdata_t* atdat;
+ cl_atomdata_t* atdat = nullptr;
//! parameters required for the non-bonded calc.
- cl_nbparam_t* nbparam;
+ cl_nbparam_t* nbparam = nullptr;
//! pair-list data structures (local and non-local)
- gmx::EnumerationArray<Nbnxm::InteractionLocality, cl_plist_t*> plist;
+ gmx::EnumerationArray<Nbnxm::InteractionLocality, cl_plist_t*> plist = { nullptr };
//! staging area where fshift/energies get downloaded
- cl_nb_staging_t nbst;
+ nb_staging_t nbst;
//! local and non-local GPU queues
gmx::EnumerationArray<Nbnxm::InteractionLocality, cl_command_queue> stream;
/*! \{ */
/*! \brief Event triggered when the non-local non-bonded
* kernel is done (and the local transfer can proceed) */
- cl_event nonlocal_done;
+ cl_event nonlocal_done = nullptr;
/*! \brief Event triggered when the tasks issued in the local
* stream that need to precede the non-local force or buffer
* operation calculations are done (e.g. f buffer 0-ing, local
* x/q H2D, buffer op initialization in local stream that is
* required also by nonlocal stream ) */
- cl_event misc_ops_and_local_H2D_done;
+ cl_event misc_ops_and_local_H2D_done = nullptr;
/*! \} */
//! True if there has been local/nonlocal GPU work, either bonded or nonbonded, scheduled
//! True if event-based timing is enabled.
- cl_bool bDoTime;
+ bool bDoTime = false;
//! OpenCL event-based timers.
- cl_timers_t* timers;
+ cl_timers_t* timers = nullptr;
//! Timing data. TODO: deprecate this and query timers for accumulated data instead
- struct gmx_wallclock_gpu_nbnxn_t* timings;
+ gmx_wallclock_gpu_nbnxn_t* timings = nullptr;
};
#endif /* NBNXN_OPENCL_TYPES_H */