target_sources(libgromacs PRIVATE
$<TARGET_OBJECTS:clFFT>
)
- target_include_directories(libgromacs PRIVATE ${_clFFT_dir}/include)
+ target_include_directories(libgromacs SYSTEM PRIVATE ${_clFFT_dir}/include)
# Use the magic variable for how to link any library needed for
# dlopen, etc. which is -ldl where needed, and empty otherwise
# (e.g. Windows, BSD, Mac).
std::array<size_t, DIM> realGridSize, realGridSizePadded, complexGridSizePadded;
GMX_RELEASE_ASSERT(!pme_gpu_uses_dd(pmeGpu), "FFT decomposition not implemented");
- PmeGpuKernelParamsBase *kernelParamsPtr = (PmeGpuKernelParamsBase *)pmeGpu->kernelParams.get();
+ PmeGpuKernelParamsBase *kernelParamsPtr = pmeGpu->kernelParams.get();
for (int i = 0; i < DIM; i++)
{
realGridSize[i] = kernelParamsPtr->grid.realGridSize[i];
inputGrids = &realGrid_;
outputGrids = &complexGrid_;
break;
-
- break;
case GMX_FFT_COMPLEX_TO_REAL:
plan = planC2R_;
direction = CLFFT_BACKWARD;
inputGrids = &complexGrid_;
outputGrids = &realGrid_;
break;
-
default:
GMX_THROW(gmx::NotImplementedError("The chosen 3D-FFT case is not implemented on GPUs"));
- break;
}
handleClfftError(clfftEnqueueTransform(plan, direction,
commandStreams_.size(), commandStreams_.data(),
#include <list>
#include <string>
+#include "gromacs/compat/make_unique.h"
#include "gromacs/ewald/ewald-utils.h"
#include "gromacs/gpu_utils/gpu_utils.h"
#include "gromacs/math/invertmatrix.h"
gpuStreamSynchronize(pmeGpu->archSpecific->pmeStream);
}
-void pme_gpu_alloc_energy_virial(const PmeGpu *pmeGpu)
+void pme_gpu_alloc_energy_virial(PmeGpu *pmeGpu)
{
const size_t energyAndVirialSize = c_virialAndEnergyCount * sizeof(float);
allocateDeviceBuffer(&pmeGpu->kernelParams->constants.d_virialAndEnergy, c_virialAndEnergyCount, pmeGpu->archSpecific->context);
- pmalloc((void **)&pmeGpu->staging.h_virialAndEnergy, energyAndVirialSize);
+ pmalloc(reinterpret_cast<void **>(&pmeGpu->staging.h_virialAndEnergy), energyAndVirialSize);
}
void pme_gpu_free_energy_virial(PmeGpu *pmeGpu)
c_virialAndEnergyCount, pmeGpu->archSpecific->pmeStream);
}
-void pme_gpu_realloc_and_copy_bspline_values(const PmeGpu *pmeGpu)
+void pme_gpu_realloc_and_copy_bspline_values(PmeGpu *pmeGpu)
{
const int splineValuesOffset[DIM] = {
0,
pmeGpu->kernelParams->grid.realGridSize[XX],
pmeGpu->kernelParams->grid.realGridSize[XX] + pmeGpu->kernelParams->grid.realGridSize[YY]
};
- memcpy((void *)&pmeGpu->kernelParams->grid.splineValuesOffset, &splineValuesOffset, sizeof(splineValuesOffset));
+ memcpy(&pmeGpu->kernelParams->grid.splineValuesOffset, &splineValuesOffset, sizeof(splineValuesOffset));
const int newSplineValuesSize = pmeGpu->kernelParams->grid.realGridSize[XX] +
pmeGpu->kernelParams->grid.realGridSize[YY] +
{
/* Reallocate the host buffer */
pfree(pmeGpu->staging.h_splineModuli);
- pmalloc((void **)&pmeGpu->staging.h_splineModuli, newSplineValuesSize * sizeof(float));
+ pmalloc(reinterpret_cast<void **>(&pmeGpu->staging.h_splineModuli), newSplineValuesSize * sizeof(float));
}
for (int i = 0; i < DIM; i++)
{
freeDeviceBuffer(&pmeGpu->kernelParams->atoms.d_coefficients);
}
-void pme_gpu_realloc_spline_data(const PmeGpu *pmeGpu)
+void pme_gpu_realloc_spline_data(PmeGpu *pmeGpu)
{
const int order = pmeGpu->common->pme_order;
const int alignment = pme_gpu_get_atoms_per_warp(pmeGpu);
if (shouldRealloc)
{
pfree(pmeGpu->staging.h_theta);
- pmalloc((void **)&pmeGpu->staging.h_theta, newSplineDataSize * sizeof(float));
+ pmalloc(reinterpret_cast<void **>(&pmeGpu->staging.h_theta), newSplineDataSize * sizeof(float));
pfree(pmeGpu->staging.h_dtheta);
- pmalloc((void **)&pmeGpu->staging.h_dtheta, newSplineDataSize * sizeof(float));
+ pmalloc(reinterpret_cast<void **>(&pmeGpu->staging.h_dtheta), newSplineDataSize * sizeof(float));
}
}
pfree(pmeGpu->staging.h_dtheta);
}
-void pme_gpu_realloc_grid_indices(const PmeGpu *pmeGpu)
+void pme_gpu_realloc_grid_indices(PmeGpu *pmeGpu)
{
const size_t newIndicesSize = DIM * pmeGpu->nAtomsAlloc;
GMX_ASSERT(newIndicesSize > 0, "Bad number of atoms in PME GPU");
reallocateDeviceBuffer(&pmeGpu->kernelParams->atoms.d_gridlineIndices, newIndicesSize,
&pmeGpu->archSpecific->gridlineIndicesSize, &pmeGpu->archSpecific->gridlineIndicesSizeAlloc, pmeGpu->archSpecific->context);
pfree(pmeGpu->staging.h_gridlineIndices);
- pmalloc((void **)&pmeGpu->staging.h_gridlineIndices, newIndicesSize * sizeof(int));
+ pmalloc(reinterpret_cast<void **>(&pmeGpu->staging.h_gridlineIndices), newIndicesSize * sizeof(int));
}
void pme_gpu_free_grid_indices(const PmeGpu *pmeGpu)
pmeGpu->archSpecific->fftSetup.resize(0);
for (int i = 0; i < pmeGpu->common->ngrids; i++)
{
- pmeGpu->archSpecific->fftSetup.push_back(std::unique_ptr<GpuParallel3dFft>(new GpuParallel3dFft(pmeGpu)));
+ pmeGpu->archSpecific->fftSetup.push_back(gmx::compat::make_unique<GpuParallel3dFft>(pmeGpu));
}
}
}
for (int i = 0; i < DIM; i++)
{
kernelParamsPtr->grid.realGridSize[i] = pmeGpu->common->nk[i];
- kernelParamsPtr->grid.realGridSizeFP[i] = (float)kernelParamsPtr->grid.realGridSize[i];
+ kernelParamsPtr->grid.realGridSizeFP[i] = static_cast<float>(kernelParamsPtr->grid.realGridSize[i]);
kernelParamsPtr->grid.realGridSizePadded[i] = kernelParamsPtr->grid.realGridSize[i];
// The complex grid currently uses no padding;
pme->gpu = new PmeGpu();
PmeGpu *pmeGpu = pme->gpu;
changePinningPolicy(&pmeGpu->staging.h_forces, pme_get_pinning_policy());
- pmeGpu->common = std::shared_ptr<PmeShared>(new PmeShared());
+ pmeGpu->common = std::make_shared<PmeShared>();
/* These settings are set here for the whole run; dynamic ones are set in pme_gpu_reinit() */
/* A convenience variable. */
/*! \libinternal \brief
* Allocates the fixed size energy and virial buffer both on GPU and CPU.
*
- * \param[in] pmeGpu The PME GPU structure.
+ * \param[in,out] pmeGpu The PME GPU structure.
*/
-void pme_gpu_alloc_energy_virial(const PmeGpu *pmeGpu);
+void pme_gpu_alloc_energy_virial(PmeGpu *pmeGpu);
/*! \libinternal \brief
* Frees the energy and virial memory both on GPU and CPU.
/*! \libinternal \brief
* Reallocates and copies the pre-computed B-spline values to the GPU.
*
- * \param[in] pmeGpu The PME GPU structure.
+ * \param[in,out] pmeGpu The PME GPU structure.
*/
-void pme_gpu_realloc_and_copy_bspline_values(const PmeGpu *pmeGpu);
+void pme_gpu_realloc_and_copy_bspline_values(PmeGpu *pmeGpu);
/*! \libinternal \brief
* Frees the pre-computed B-spline values on the GPU (and the transfer CPU buffers).
/*! \libinternal \brief
* Reallocates the buffers on the GPU and the host for the atoms spline data.
*
- * \param[in] pmeGpu The PME GPU structure.
+ * \param[in,out] pmeGpu The PME GPU structure.
*/
-void pme_gpu_realloc_spline_data(const PmeGpu *pmeGpu);
+void pme_gpu_realloc_spline_data(PmeGpu *pmeGpu);
/*! \libinternal \brief
* Frees the buffers on the GPU for the atoms spline data.
/*! \libinternal \brief
* Reallocates the buffers on the GPU and the host for the particle gridline indices.
*
- * \param[in] pmeGpu The PME GPU structure.
+ * \param[in,out] pmeGpu The PME GPU structure.
*/
-void pme_gpu_realloc_grid_indices(const PmeGpu *pmeGpu);
+void pme_gpu_realloc_grid_indices(PmeGpu *pmeGpu);
/*! \libinternal \brief
* Frees the buffer on the GPU for the particle gridline indices.
* Should be called before the pme_gpu_set_io_ranges.
*/
GPU_FUNC_QUALIFIER void pme_gpu_reinit_atoms(PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu),
- const int GPU_FUNC_ARGUMENT(nAtoms),
+ int GPU_FUNC_ARGUMENT(nAtoms),
const real *GPU_FUNC_ARGUMENT(charges)) GPU_FUNC_TERM
/*! \brief \libinternal
cl_device_id deviceId = deviceInfo->ocl_gpu_id.ocl_device_id;
cl_context_properties contextProperties[3];
contextProperties[0] = CL_CONTEXT_PLATFORM;
- contextProperties[1] = (cl_context_properties) platformId;
+ contextProperties[1] = reinterpret_cast<cl_context_properties>(platformId);
contextProperties[2] = 0; /* Terminates the list of properties */
cl_int clError;
gmx_wallcycle *wcycle)
{
PmeGpu *pmeGpu = pme->gpu;
- const bool computeEnergyAndVirial = pmeGpu->settings.currentFlags & GMX_PME_CALC_ENER_VIR;
- const bool performBackFFT = pmeGpu->settings.currentFlags & (GMX_PME_CALC_F | GMX_PME_CALC_POT);
+ const bool computeEnergyAndVirial = (pmeGpu->settings.currentFlags & GMX_PME_CALC_ENER_VIR) != 0;
+ const bool performBackFFT = (pmeGpu->settings.currentFlags & (GMX_PME_CALC_F | GMX_PME_CALC_POT)) != 0;
const unsigned int gridIndex = 0;
t_complex *cfftgrid = pme->cfftgrid[gridIndex];
matrix virial,
real *energy)
{
- const bool haveComputedEnergyAndVirial = pme->gpu->settings.currentFlags & GMX_PME_CALC_ENER_VIR;
+ const bool haveComputedEnergyAndVirial = (pme->gpu->settings.currentFlags & GMX_PME_CALC_ENER_VIR) != 0;
*forces = pme_gpu_get_forces(pme->gpu);
if (haveComputedEnergyAndVirial)
cl_mem data_;
public:
//! \brief An assignment operator - the purpose is to make allocation/zeroing work
- void operator=(cl_mem data){data_ = data; }
+ TypedClMemory &operator=(cl_mem data){data_ = data; return *this; }
//! \brief Returns underlying cl_mem transparently
operator cl_mem() {return data_; }
};
* \returns size in bytes of gpu_dev_info
*/
GPU_FUNC_QUALIFIER
-size_t sizeof_gpu_dev_info(void) GPU_FUNC_TERM_WITH_RETURN(0)
+size_t sizeof_gpu_dev_info() GPU_FUNC_TERM_WITH_RETURN(0)
/*! \brief Returns a pointer *ptr to page-locked memory of size nbytes.
*
req_dev_type = CL_DEVICE_TYPE_CPU;
}
- while (1)
+ while (true)
{
cl_int status = clGetPlatformIDs(0, nullptr, &ocl_platform_count);
if (CL_SUCCESS != status)
}
//! This function is documented in the header file
-size_t sizeof_gpu_dev_info(void)
+size_t sizeof_gpu_dev_info()
{
return sizeof(gmx_device_info_t);
}
GpuRegionTimerImpl(GpuRegionTimerImpl &&) = delete;
/*! \brief Should be called before the region start. */
- inline void openTimingRegion(CommandStream){}
+ inline void openTimingRegion(CommandStream /*unused*/){}
/*! \brief Should be called after the region end. */
- inline void closeTimingRegion(CommandStream){}
+ inline void closeTimingRegion(CommandStream /*unused*/){}
/*! \brief Returns the last measured region timespan (in milliseconds) and calls reset(). */
inline double getLastRangeTime()
{
fwrite(binary, 1, fileSize, f.get());
}
-} // namespace
-} // namespace
+} // namespace ocl
+} // namespace gmx
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2018, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
void
writeBinaryToCache(cl_program program, const std::string &filename);
-} // namespace
-} // namespace
+} // namespace ocl
+} // namespace gmx
#endif
*
* Currently caching is disabled by default unless the env var override
* is used until we resolve concurrency issues. */
-static bool useBuildCache = getenv("GMX_OCL_GENCACHE"); // (NULL == getenv("GMX_OCL_NOGENCACHE"));
+static bool useBuildCache = getenv("GMX_OCL_GENCACHE") != nullptr;
/*! \brief Handles writing the OpenCL JIT compilation log to \c fplog.
*
{
cl_int cl_error;
const char *warpSizeKernel = "__kernel void test(__global int* test){test[get_local_id(0)] = 0;}";
- cl_program program = clCreateProgramWithSource(context, 1, (const char**)&warpSizeKernel, nullptr, &cl_error);
+ cl_program program = clCreateProgramWithSource(context, 1, &warpSizeKernel, nullptr, &cl_error);
if (cl_error != CL_SUCCESS)
{
GMX_THROW(InternalError("Could not create OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error)));
{
GMX_RELEASE_ASSERT(str != nullptr, "A pointer to an actual string must be provided");
std::string::iterator newEnd =
- std::unique( str->begin(), str->end(), [ = ](char a, char b){ return isspace(a) && (a == b); } );
+ std::unique( str->begin(), str->end(), [ = ](char a, char b){ return isspace(a) != 0 && (a == b); } );
str->erase(newEnd, str->end());
}
return program;
}
-} // namespace
-} // namespace
+} // namespace ocl
+} // namespace gmx
cl_device_id deviceId,
ocl_vendor_id_t deviceVendorId);
-} // namespace
-} // namespace
+} // namespace ocl
+} // namespace gmx
#endif
return;
}
- getGpuAtomRange(adat, aloc, adat_begin, adat_len);
+ getGpuAtomRange(adat, aloc, &adat_begin, &adat_len);
/* beginning of timed D2H section */
if (bDoTime)
#include "gromacs/gpu_utils/gpu_utils.h"
#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/force_flags.h"
+#include "gromacs/mdlib/nb_verlet.h"
#include "gromacs/mdlib/nbnxn_gpu_types.h"
#include "gromacs/pbcutil/ishift.h"
#include "gromacs/timing/gpu_timing.h"
template <typename AtomDataT>
static inline void getGpuAtomRange(const AtomDataT *atomData,
int atomLocality,
- int &atomRangeBegin,
- int &atomRangeLen)
+ int *atomRangeBegin,
+ int *atomRangeLen)
{
assert(atomData);
validateGpuAtomLocality(atomLocality);
/* calculate the atom data index range based on locality */
if (LOCAL_A(atomLocality))
{
- atomRangeBegin = 0;
- atomRangeLen = atomData->natoms_local;
+ *atomRangeBegin = 0;
+ *atomRangeLen = atomData->natoms_local;
}
else
{
- atomRangeBegin = atomData->natoms_local;
- atomRangeLen = atomData->natoms - atomData->natoms_local;
+ *atomRangeBegin = atomData->natoms_local;
+ *atomRangeLen = atomData->natoms - atomData->natoms_local;
}
}
}
}
+//TODO: move into shared source file with gmx_compile_cpp_as_cuda
+//NOLINTNEXTLINE(misc-definitions-in-headers)
bool nbnxn_gpu_try_finish_task(gmx_nbnxn_gpu_t *nb,
int flags,
int aloc,
gpuStreamSynchronize(nb->stream[iLocality]);
}
- bool calcEner = flags & GMX_FORCE_ENERGY;
- bool calcFshift = flags & GMX_FORCE_VIRIAL;
+ bool calcEner = (flags & GMX_FORCE_ENERGY) != 0;
+ bool calcFshift = (flags & GMX_FORCE_VIRIAL) != 0;
- nbnxn_gpu_accumulate_timings(nb->timings, nb->timers, nb->plist[iLocality], aloc, calcEner, nb->bDoTime);
+ nbnxn_gpu_accumulate_timings(nb->timings, nb->timers, nb->plist[iLocality], aloc, calcEner,
+ nb->bDoTime != 0);
nbnxn_gpu_reduce_staged_outputs(nb->nbst, iLocality, calcEner, calcFshift, e_lj, e_el, fshift);
}
* \param[out] e_el Pointer to the electrostatics energy output to accumulate into
* \param[out] fshift Pointer to the shift force buffer to accumulate into
*/
+//NOLINTNEXTLINE(misc-definitions-in-headers) TODO: move into source file
void nbnxn_gpu_wait_finish_task(gmx_nbnxn_gpu_t *nb,
int flags,
int aloc,
https://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/clEnqueueNDRangeKernel.html
*/
device_size_t_size_bits = dinfo->adress_bits;
- host_size_t_size_bits = (cl_uint)(sizeof(size_t) * 8);
+ host_size_t_size_bits = static_cast<cl_uint>(sizeof(size_t) * 8);
/* If sizeof(host size_t) <= sizeof(device size_t)
=> global_work_size components will always be valid
{
size_t device_limit;
- device_limit = (((size_t)1) << device_size_t_size_bits) - 1;
+ device_limit = (1ull << device_size_t_size_bits) - 1;
for (int i = 0; i < work_dim; i++)
{
cl_timers_t *t = nb->timers;
cl_command_queue stream = nb->stream[iloc];
- bool bCalcEner = flags & GMX_FORCE_ENERGY;
+ bool bCalcEner = (flags & GMX_FORCE_ENERGY) != 0;
int bCalcFshift = flags & GMX_FORCE_VIRIAL;
- bool bDoTime = nb->bDoTime;
+ bool bDoTime = (nb->bDoTime) != 0;
cl_nbparam_params_t nbparams_params;
if (debug)
{
- fprintf(debug, "Non-bonded GPU launch configuration:\n\tLocal work size: %dx%dx%d\n\t"
- "Global work size : %dx%d\n\t#Super-clusters/clusters: %d/%d (%d)\n",
- (int)(config.blockSize[0]), (int)(config.blockSize[1]), (int)(config.blockSize[2]),
- (int)(config.blockSize[0] * config.gridSize[0]), (int)(config.blockSize[1] * config.gridSize[1]), plist->nsci*c_numClPerSupercl,
+ fprintf(debug, "Non-bonded GPU launch configuration:\n\tLocal work size: %zux%zux%zu\n\t"
+ "Global work size : %zux%zu\n\t#Super-clusters/clusters: %d/%d (%d)\n",
+ config.blockSize[0], config.blockSize[1], config.blockSize[2],
+ config.blockSize[0] * config.gridSize[0], config.blockSize[1] * config.gridSize[1], plist->nsci*c_numClPerSupercl,
c_numClPerSupercl, plist->na_c);
}
cl_plist_t *plist = nb->plist[iloc];
cl_timers_t *t = nb->timers;
cl_command_queue stream = nb->stream[iloc];
- bool bDoTime = nb->bDoTime;
+ bool bDoTime = nb->bDoTime == CL_TRUE;
if (plist->haveFreshList)
{
if (debug)
{
- fprintf(debug, "Pruning GPU kernel launch configuration:\n\tLocal work size: %dx%dx%d\n\t"
- "\tGlobal work size: %dx%d\n\t#Super-clusters/clusters: %d/%d (%d)\n"
+ fprintf(debug, "Pruning GPU kernel launch configuration:\n\tLocal work size: %zux%zux%zu\n\t"
+ "\tGlobal work size: %zux%zu\n\t#Super-clusters/clusters: %d/%d (%d)\n"
"\tShMem: %zu\n",
- (int)(config.blockSize[0]), (int)(config.blockSize[1]), (int)(config.blockSize[2]),
- (int)(config.blockSize[0] * config.gridSize[0]), (int)(config.blockSize[1] * config.gridSize[1]), plist->nsci*c_numClPerSupercl,
+ config.blockSize[0], config.blockSize[1], config.blockSize[2],
+ config.blockSize[0] * config.gridSize[0], config.blockSize[1] * config.gridSize[1], plist->nsci*c_numClPerSupercl,
c_numClPerSupercl, plist->na_c, config.sharedMemorySize);
}
cl_atomdata_t *adat = nb->atdat;
cl_timers_t *t = nb->timers;
- bool bDoTime = nb->bDoTime;
+ bool bDoTime = nb->bDoTime == CL_TRUE;
cl_command_queue stream = nb->stream[iloc];
- bool bCalcEner = flags & GMX_FORCE_ENERGY;
+ bool bCalcEner = (flags & GMX_FORCE_ENERGY) != 0;
int bCalcFshift = flags & GMX_FORCE_VIRIAL;
test case, overall simulation performance was higher with
the API calls, but this has not been tested on AMD OpenCL,
so could be worth considering in future. */
- nb->bNonLocalStreamActive = false;
+ nb->bNonLocalStreamActive = CL_FALSE;
return;
}
- getGpuAtomRange(adat, aloc, adat_begin, adat_len);
+ getGpuAtomRange(adat, aloc, &adat_begin, &adat_len);
/* beginning of timed D2H section */
if (bDoTime)
{
cl_error = clEnqueueMarkerWithWaitList(stream, 0, nullptr, &(nb->nonlocal_done));
assert(CL_SUCCESS == cl_error);
- nb->bNonLocalStreamActive = true;
+ nb->bNonLocalStreamActive = CL_TRUE;
}
/* only transfer energies in the local stream */
*
*/
//if ((dev_info->prop.major >= 3 || bForceAnalyticalEwald) && !bForceTabulatedEwald)
- if ((1 || bForceAnalyticalEwald) && !bForceTabulatedEwald)
+ if (!bForceTabulatedEwald)
{
bUseAnalyticalEwald = true;
// TODO: handle errors, check clCreateBuffer flags
ad->shift_vec = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, SHIFTS * ad->shift_vec_elem_size, nullptr, &cl_error);
assert(cl_error == CL_SUCCESS);
- ad->bShiftVecUploaded = false;
+ ad->bShiftVecUploaded = CL_FALSE;
/* An element of the fshift device buffer has the same size as one element
of the host side fshift buffer. */
break;
default:
gmx_incons("The requested LJ combination rule is not implemented in the OpenCL GPU accelerated kernels!");
- break;
}
break;
case eintmodFORCESWITCH:
break;
default:
gmx_incons("The requested VdW interaction modifier is not implemented in the GPU accelerated kernels!");
- break;
}
}
else if (ic->vdwtype == evdwPME)
device_id = devInfo->ocl_gpu_id.ocl_device_id;
context_properties[0] = CL_CONTEXT_PLATFORM;
- context_properties[1] = (cl_context_properties) platform_id;
+ context_properties[1] = reinterpret_cast<cl_context_properties>(platform_id);
context_properties[2] = 0; /* Terminates the list of properties */
context = clCreateContext(context_properties, 1, &device_id, nullptr, nullptr, &cl_error);
rank,
devInfo->device_name,
cl_error, ocl_get_error_string(cl_error).c_str());
- return;
}
runtimeData->context = context;
snew(nb->plist[eintNonlocal], 1);
}
- nb->bUseTwoStreams = bLocalAndNonlocal;
+ nb->bUseTwoStreams = static_cast<cl_bool>(bLocalAndNonlocal);
nb->timers = new cl_timers_t();
snew(nb->timings, 1);
snew(nb->dev_rundata, 1);
/* init nbst */
- pmalloc((void**)&nb->nbst.e_lj, sizeof(*nb->nbst.e_lj));
- pmalloc((void**)&nb->nbst.e_el, sizeof(*nb->nbst.e_el));
- pmalloc((void**)&nb->nbst.fshift, SHIFTS * sizeof(*nb->nbst.fshift));
+ pmalloc(reinterpret_cast<void**>(&nb->nbst.e_lj), sizeof(*nb->nbst.e_lj));
+ pmalloc(reinterpret_cast<void**>(&nb->nbst.e_el), sizeof(*nb->nbst.e_el));
+ pmalloc(reinterpret_cast<void**>(&nb->nbst.fshift), SHIFTS * sizeof(*nb->nbst.fshift));
init_plist(nb->plist[eintLocal]);
/* OpenCL timing disabled if GMX_DISABLE_GPU_TIMING is defined. */
- nb->bDoTime = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
+ nb->bDoTime = static_cast<cl_bool>(getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
/* Create queues only after bDoTime has been initialized */
if (nb->bDoTime)
rank,
nb->dev_info->device_name,
cl_error);
- return;
}
if (nb->bUseTwoStreams)
rank,
nb->dev_info->device_name,
cl_error);
- return;
}
}
if (nb->bDoTime)
{
- init_timers(nb->timers, nb->bUseTwoStreams);
+ init_timers(nb->timers, nb->bUseTwoStreams == CL_TRUE);
init_timings(nb->timings);
}
// Timing accumulation should happen only if there was work to do
// because getLastRangeTime() gets skipped with empty lists later
// which leads to the counter not being reset.
- bool bDoTime = (nb->bDoTime && h_plist->nsci > 0);
+ bool bDoTime = ((nb->bDoTime == CL_TRUE) && h_plist->nsci > 0);
cl_command_queue stream = nb->stream[iloc];
cl_plist_t *d_plist = nb->plist[iloc];
{
ocl_copy_H2D_async(adat->shift_vec, nbatom->shift_vec, 0,
SHIFTS * adat->shift_vec_elem_size, ls, nullptr);
- adat->bShiftVecUploaded = true;
+ adat->bShiftVecUploaded = CL_TRUE;
}
}
cl_int cl_error;
int nalloc, natoms;
bool realloced;
- bool bDoTime = nb->bDoTime;
+ bool bDoTime = nb->bDoTime == CL_TRUE;
cl_timers_t *timers = nb->timers;
cl_atomdata_t *d_atdat = nb->atdat;
cl_command_queue ls = nb->stream[eintLocal];
/* Free kernels */
int kernel_count = sizeof(nb->kernel_ener_noprune_ptr) / sizeof(nb->kernel_ener_noprune_ptr[0][0]);
- free_kernels((cl_kernel*)nb->kernel_ener_noprune_ptr, kernel_count);
+ free_kernels(nb->kernel_ener_noprune_ptr[0], kernel_count);
kernel_count = sizeof(nb->kernel_ener_prune_ptr) / sizeof(nb->kernel_ener_prune_ptr[0][0]);
- free_kernels((cl_kernel*)nb->kernel_ener_prune_ptr, kernel_count);
+ free_kernels(nb->kernel_ener_prune_ptr[0], kernel_count);
kernel_count = sizeof(nb->kernel_noener_noprune_ptr) / sizeof(nb->kernel_noener_noprune_ptr[0][0]);
- free_kernels((cl_kernel*)nb->kernel_noener_noprune_ptr, kernel_count);
+ free_kernels(nb->kernel_noener_noprune_ptr[0], kernel_count);
kernel_count = sizeof(nb->kernel_noener_prune_ptr) / sizeof(nb->kernel_noener_prune_ptr[0][0]);
- free_kernels((cl_kernel*)nb->kernel_noener_prune_ptr, kernel_count);
+ free_kernels(nb->kernel_noener_prune_ptr[0], kernel_count);
free_kernel(&(nb->kernel_memset_f));
free_kernel(&(nb->kernel_memset_f2));