From 103069d89d71e9948019171a35dbe7e745dfb0e8 Mon Sep 17 00:00:00 2001 From: Roland Schulz Date: Wed, 4 Jul 2018 17:33:16 -0700 Subject: [PATCH] Fix compiler warnings in OCL Change-Id: I4d5d0fac37a09bd1e74db946db54c17526414a24 --- src/gromacs/ewald/pme-gpu-3dfft-ocl.cpp | 2 +- src/gromacs/ewald/pme-gpu-internal.cpp | 2 +- src/gromacs/gpu_utils/gpu_utils_ocl.cpp | 28 ++--- src/gromacs/gpu_utils/ocl_caching.cpp | 8 +- src/gromacs/gpu_utils/ocl_compiler.cpp | 16 +-- src/gromacs/gpu_utils/oclutils.cpp | 19 ++-- src/gromacs/mdlib/nbnxn_gpu_common.h | 7 +- src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp | 18 ++-- .../mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp | 100 +++++++++--------- .../mdlib/nbnxn_ocl/nbnxn_ocl_jit_support.cpp | 2 +- 10 files changed, 100 insertions(+), 102 deletions(-) diff --git a/src/gromacs/ewald/pme-gpu-3dfft-ocl.cpp b/src/gromacs/ewald/pme-gpu-3dfft-ocl.cpp index c625a7a3ac..dcc29cc69e 100644 --- a/src/gromacs/ewald/pme-gpu-3dfft-ocl.cpp +++ b/src/gromacs/ewald/pme-gpu-3dfft-ocl.cpp @@ -137,7 +137,7 @@ GpuParallel3dFft::~GpuParallel3dFft() void GpuParallel3dFft::perform3dFft(gmx_fft_direction dir, CommandEvent *timingEvent) { - constexpr cl_mem tempBuffer = nullptr; + cl_mem tempBuffer = nullptr; constexpr std::array waitEvents {{}}; clfftPlanHandle plan; diff --git a/src/gromacs/ewald/pme-gpu-internal.cpp b/src/gromacs/ewald/pme-gpu-internal.cpp index ec83ef20f9..746974a6b1 100644 --- a/src/gromacs/ewald/pme-gpu-internal.cpp +++ b/src/gromacs/ewald/pme-gpu-internal.cpp @@ -99,7 +99,7 @@ static PmeGpuKernelParamsBase *pme_gpu_get_kernel_params_base_ptr(const PmeGpu * return kernelParamsPtr; } -int pme_gpu_get_atom_data_alignment(const PmeGpu *) +int pme_gpu_get_atom_data_alignment(const PmeGpu * /*unused*/) { //TODO: this can be simplified, as PME_ATOM_DATA_ALIGNMENT is now constant return PME_ATOM_DATA_ALIGNMENT; diff --git a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp index e19bec37e7..0e9769e4fe 100644 --- a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp +++ b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp @@ -102,7 +102,7 @@ runningOnCompatibleOSForAmd() */ static int is_gmx_supported_gpu_id(struct gmx_device_info_t *ocl_gpu_device) { - if ((getenv("GMX_OCL_DISABLE_COMPATIBILITY_CHECK")) != NULL) + if ((getenv("GMX_OCL_DISABLE_COMPATIBILITY_CHECK")) != nullptr) { return egpuCompatible; } @@ -184,16 +184,16 @@ void findGpus(gmx_gpu_info_t *gpu_info) cl_platform_id *ocl_platform_ids; cl_device_type req_dev_type = CL_DEVICE_TYPE_GPU; - ocl_platform_ids = NULL; + ocl_platform_ids = nullptr; - if (getenv("GMX_OCL_FORCE_CPU") != NULL) + if (getenv("GMX_OCL_FORCE_CPU") != nullptr) { req_dev_type = CL_DEVICE_TYPE_CPU; } while (1) { - cl_int status = clGetPlatformIDs(0, NULL, &ocl_platform_count); + cl_int status = clGetPlatformIDs(0, nullptr, &ocl_platform_count); if (CL_SUCCESS != status) { GMX_THROW(gmx::InternalError(gmx::formatString("An unexpected value %u was returned from clGetPlatformIDs: ", @@ -208,7 +208,7 @@ void findGpus(gmx_gpu_info_t *gpu_info) snew(ocl_platform_ids, ocl_platform_count); - status = clGetPlatformIDs(ocl_platform_count, ocl_platform_ids, NULL); + status = clGetPlatformIDs(ocl_platform_count, ocl_platform_ids, nullptr); if (CL_SUCCESS != status) { GMX_THROW(gmx::InternalError(gmx::formatString("An unexpected value %u was returned from clGetPlatformIDs: ", @@ -220,7 +220,7 @@ void findGpus(gmx_gpu_info_t *gpu_info) cl_uint ocl_device_count; /* If requesting req_dev_type devices fails, just go to the next platform */ - if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, 0, NULL, &ocl_device_count)) + if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, 0, nullptr, &ocl_device_count)) { continue; } @@ -266,19 +266,19 @@ void findGpus(gmx_gpu_info_t *gpu_info) gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_device_id = ocl_device_ids[j]; gpu_info->gpu_dev[device_index].device_name[0] = 0; - clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_NAME, sizeof(gpu_info->gpu_dev[device_index].device_name), gpu_info->gpu_dev[device_index].device_name, NULL); + clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_NAME, sizeof(gpu_info->gpu_dev[device_index].device_name), gpu_info->gpu_dev[device_index].device_name, nullptr); gpu_info->gpu_dev[device_index].device_version[0] = 0; - clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VERSION, sizeof(gpu_info->gpu_dev[device_index].device_version), gpu_info->gpu_dev[device_index].device_version, NULL); + clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VERSION, sizeof(gpu_info->gpu_dev[device_index].device_version), gpu_info->gpu_dev[device_index].device_version, nullptr); gpu_info->gpu_dev[device_index].device_vendor[0] = 0; - clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VENDOR, sizeof(gpu_info->gpu_dev[device_index].device_vendor), gpu_info->gpu_dev[device_index].device_vendor, NULL); + clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VENDOR, sizeof(gpu_info->gpu_dev[device_index].device_vendor), gpu_info->gpu_dev[device_index].device_vendor, nullptr); gpu_info->gpu_dev[device_index].compute_units = 0; - clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(gpu_info->gpu_dev[device_index].compute_units), &(gpu_info->gpu_dev[device_index].compute_units), NULL); + clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(gpu_info->gpu_dev[device_index].compute_units), &(gpu_info->gpu_dev[device_index].compute_units), nullptr); gpu_info->gpu_dev[device_index].adress_bits = 0; - clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_ADDRESS_BITS, sizeof(gpu_info->gpu_dev[device_index].adress_bits), &(gpu_info->gpu_dev[device_index].adress_bits), NULL); + clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_ADDRESS_BITS, sizeof(gpu_info->gpu_dev[device_index].adress_bits), &(gpu_info->gpu_dev[device_index].adress_bits), nullptr); gpu_info->gpu_dev[device_index].vendor_e = get_vendor_id(gpu_info->gpu_dev[device_index].device_vendor); @@ -349,7 +349,7 @@ void findGpus(gmx_gpu_info_t *gpu_info) //! This function is documented in the header file void free_gpu_info(const gmx_gpu_info_t gmx_unused *gpu_info) { - if (gpu_info == NULL) + if (gpu_info == nullptr) { return; } @@ -466,7 +466,7 @@ void gpu_set_host_malloc_and_free(bool bUseGpuKernels, } else { - *nb_alloc = NULL; - *nb_free = NULL; + *nb_alloc = nullptr; + *nb_free = nullptr; } } diff --git a/src/gromacs/gpu_utils/ocl_caching.cpp b/src/gromacs/gpu_utils/ocl_caching.cpp index d6016671ef..566885e5b1 100644 --- a/src/gromacs/gpu_utils/ocl_caching.cpp +++ b/src/gromacs/gpu_utils/ocl_caching.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015,2016,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -139,7 +139,7 @@ makeProgramFromCache(const std::string &filename, &deviceId, &fileSize, const_cast(&binary), - NULL, + nullptr, &cl_error); if (cl_error != CL_SUCCESS) { @@ -153,7 +153,7 @@ void writeBinaryToCache(cl_program program, const std::string &filename) { size_t fileSize; - cl_int cl_error = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(fileSize), &fileSize, NULL); + cl_int cl_error = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(fileSize), &fileSize, nullptr); if (cl_error != CL_SUCCESS) { GMX_THROW(InternalError("Could not get OpenCL program binary size, error was " + ocl_get_error_string(cl_error))); @@ -164,7 +164,7 @@ writeBinaryToCache(cl_program program, const std::string &filename) snew(binary, fileSize); const unique_cptr binaryGuard(binary); - cl_error = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(binary), &binary, NULL); + cl_error = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(binary), &binary, nullptr); if (cl_error != CL_SUCCESS) { GMX_THROW(InternalError("Could not get OpenCL program binary, error was " + ocl_get_error_string(cl_error))); diff --git a/src/gromacs/gpu_utils/ocl_compiler.cpp b/src/gromacs/gpu_utils/ocl_compiler.cpp index 3384e20772..bfc1f1c2c5 100644 --- a/src/gromacs/gpu_utils/ocl_compiler.cpp +++ b/src/gromacs/gpu_utils/ocl_compiler.cpp @@ -113,7 +113,7 @@ writeOclBuildLog(FILE *fplog, deviceId, CL_PROGRAM_BUILD_LOG, 0, - NULL, + nullptr, &buildLogSize); if (cl_error != CL_SUCCESS) { @@ -135,7 +135,7 @@ writeOclBuildLog(FILE *fplog, CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, - NULL); + nullptr); if (cl_error != CL_SUCCESS) { GMX_THROW(InternalError("Could not get OpenCL program build log, error was " + ocl_get_error_string(cl_error))); @@ -176,7 +176,7 @@ selectCompilerOptions(ocl_vendor_id_t deviceVendorId) } /* Fastmath imprves performance on all supported arch */ - if (getenv("GMX_OCL_DISABLE_FASTMATH") == NULL) + if (getenv("GMX_OCL_DISABLE_FASTMATH") == nullptr) { compilerOptions += " -cl-fast-relaxed-math"; } @@ -264,13 +264,13 @@ getWarpSize(cl_context context, cl_device_id deviceId) { cl_int cl_error; const char *warpSizeKernel = "__kernel void test(__global int* test){test[get_local_id(0)] = 0;}"; - cl_program program = clCreateProgramWithSource(context, 1, (const char**)&warpSizeKernel, NULL, &cl_error); + cl_program program = clCreateProgramWithSource(context, 1, (const char**)&warpSizeKernel, nullptr, &cl_error); if (cl_error != CL_SUCCESS) { GMX_THROW(InternalError("Could not create OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error))); } - cl_error = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); + cl_error = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr); if (cl_error != CL_SUCCESS) { GMX_THROW(InternalError("Could not build OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error))); @@ -284,7 +284,7 @@ getWarpSize(cl_context context, cl_device_id deviceId) size_t warpSize = 0; cl_error = clGetKernelWorkGroupInfo(kernel, deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, - sizeof(warpSize), &warpSize, NULL); + sizeof(warpSize), &warpSize, nullptr); if (cl_error != CL_SUCCESS) { GMX_THROW(InternalError("Could not measure OpenCL warp size, error was " + ocl_get_error_string(cl_error))); @@ -497,7 +497,7 @@ compileProgram(FILE *fplog, /* Build the OpenCL program, keeping the status to potentially write to the simulation log file. */ - cl_int buildStatus = clBuildProgram(program, 0, NULL, preprocessorOptions.c_str(), NULL, NULL); + cl_int buildStatus = clBuildProgram(program, 0, nullptr, preprocessorOptions.c_str(), nullptr, nullptr); /* Write log first, and then throw exception that the user know what is the issue even if the build fails. */ @@ -536,7 +536,7 @@ compileProgram(FILE *fplog, => write PTX to file */ char buffer[STRLEN]; - cl_error = clGetDeviceInfo(deviceId, CL_DEVICE_NAME, sizeof(buffer), buffer, NULL); + cl_error = clGetDeviceInfo(deviceId, CL_DEVICE_NAME, sizeof(buffer), buffer, nullptr); if (cl_error != CL_SUCCESS) { GMX_THROW(InternalError("Could not get OpenCL device info, error was " + ocl_get_error_string(cl_error))); diff --git a/src/gromacs/gpu_utils/oclutils.cpp b/src/gromacs/gpu_utils/oclutils.cpp index cfbe2720a8..26e750a2e4 100644 --- a/src/gromacs/gpu_utils/oclutils.cpp +++ b/src/gromacs/gpu_utils/oclutils.cpp @@ -60,7 +60,7 @@ int ocl_copy_H2D(cl_mem d_dest, void* h_src, { cl_int gmx_unused cl_error; - if (d_dest == NULL || h_src == NULL || bytes == 0) + if (d_dest == nullptr || h_src == nullptr || bytes == 0) { return -1; } @@ -68,13 +68,13 @@ int ocl_copy_H2D(cl_mem d_dest, void* h_src, switch (transferKind) { case GpuApiCallBehavior::Async: - cl_error = clEnqueueWriteBuffer(command_queue, d_dest, CL_FALSE, offset, bytes, h_src, 0, NULL, copy_event); + cl_error = clEnqueueWriteBuffer(command_queue, d_dest, CL_FALSE, offset, bytes, h_src, 0, nullptr, copy_event); assert(cl_error == CL_SUCCESS); // TODO: handle errors break; case GpuApiCallBehavior::Sync: - cl_error = clEnqueueWriteBuffer(command_queue, d_dest, CL_TRUE, offset, bytes, h_src, 0, NULL, copy_event); + cl_error = clEnqueueWriteBuffer(command_queue, d_dest, CL_TRUE, offset, bytes, h_src, 0, nullptr, copy_event); assert(cl_error == CL_SUCCESS); // TODO: handle errors break; @@ -88,7 +88,7 @@ int ocl_copy_H2D(cl_mem d_dest, void* h_src, /*! \brief Launches asynchronous host to device memory copy. * - * If copy_event is not NULL, on return it will contain an event object + * If copy_event is not nullptr, on return it will contain an event object * identifying this particular host to device operation. The event can further * be used to queue a wait for this operation or to query profiling information. */ @@ -106,7 +106,7 @@ int ocl_copy_H2D_sync(cl_mem d_dest, void * h_src, size_t offset, size_t bytes, cl_command_queue command_queue) { - return ocl_copy_H2D(d_dest, h_src, offset, bytes, GpuApiCallBehavior::Sync, command_queue, NULL); + return ocl_copy_H2D(d_dest, h_src, offset, bytes, GpuApiCallBehavior::Sync, command_queue, nullptr); } int ocl_copy_D2H(void * h_dest, cl_mem d_src, @@ -117,7 +117,7 @@ int ocl_copy_D2H(void * h_dest, cl_mem d_src, { cl_int gmx_unused cl_error; - if (h_dest == NULL || d_src == NULL || bytes == 0) + if (h_dest == nullptr || d_src == nullptr || bytes == 0) { return -1; } @@ -125,13 +125,13 @@ int ocl_copy_D2H(void * h_dest, cl_mem d_src, switch (transferKind) { case GpuApiCallBehavior::Async: - cl_error = clEnqueueReadBuffer(command_queue, d_src, CL_FALSE, offset, bytes, h_dest, 0, NULL, copy_event); + cl_error = clEnqueueReadBuffer(command_queue, d_src, CL_FALSE, offset, bytes, h_dest, 0, nullptr, copy_event); assert(cl_error == CL_SUCCESS); // TODO: handle errors break; case GpuApiCallBehavior::Sync: - cl_error = clEnqueueReadBuffer(command_queue, d_src, CL_TRUE, offset, bytes, h_dest, 0, NULL, copy_event); + cl_error = clEnqueueReadBuffer(command_queue, d_src, CL_TRUE, offset, bytes, h_dest, 0, nullptr, copy_event); assert(cl_error == CL_SUCCESS); // TODO: handle errors break; @@ -145,7 +145,7 @@ int ocl_copy_D2H(void * h_dest, cl_mem d_src, /*! \brief Launches asynchronous device to host memory copy. * - * If copy_event is not NULL, on return it will contain an event object + * If copy_event is not nullptr, on return it will contain an event object * identifying this particular host to device operation. The event can further * be used to queue a wait for this operation or to query profiling information. */ @@ -192,7 +192,6 @@ void pfree(void *h_ptr) { sfree_aligned(h_ptr); } - return; } /*! \brief Convert error code to diagnostic string */ diff --git a/src/gromacs/mdlib/nbnxn_gpu_common.h b/src/gromacs/mdlib/nbnxn_gpu_common.h index b11be12411..90f4bc6204 100644 --- a/src/gromacs/mdlib/nbnxn_gpu_common.h +++ b/src/gromacs/mdlib/nbnxn_gpu_common.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2017, by the GROMACS development team, led by + * Copyright (c) 2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -60,6 +60,7 @@ #include "gromacs/mdlib/nbnxn_gpu_types.h" #include "gromacs/pbcutil/ishift.h" #include "gromacs/timing/gpu_timing.h" +#include "gromacs/utility/fatalerror.h" #include "gromacs/utility/stringutil.h" #include "nbnxn_gpu_common_utils.h" @@ -102,9 +103,7 @@ static inline int gpuAtomToInteractionLocality(int atomLocality) } else { - // can't be reached - assert(false); - return -1; + gmx_incons("Wrong locality"); } } diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp index af56d62424..c2a055b79c 100644 --- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp +++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp @@ -265,7 +265,7 @@ static inline cl_kernel select_nbnxn_kernel(gmx_nbnxn_ocl_t *nb, } } - if (NULL == kernel_ptr[0]) + if (nullptr == kernel_ptr[0]) { *kernel_ptr = clCreateKernel(nb->dev_rundata->program, kernel_name_to_run, &cl_error); assert(cl_error == CL_SUCCESS); @@ -351,13 +351,13 @@ static void sync_ocl_event(cl_command_queue stream, cl_event *ocl_event) cl_int gmx_unused cl_error; /* Enqueue wait */ - cl_error = clEnqueueBarrierWithWaitList(stream, 1, ocl_event, NULL); + cl_error = clEnqueueBarrierWithWaitList(stream, 1, ocl_event, nullptr); GMX_RELEASE_ASSERT(CL_SUCCESS == cl_error, ocl_get_error_string(cl_error).c_str()); /* Release event and reset it to 0. It is ok to release it as enqueuewaitforevents performs implicit retain for events. */ cl_error = clReleaseEvent(*ocl_event); assert(CL_SUCCESS == cl_error); - *ocl_event = 0; + *ocl_event = nullptr; } /*! \brief Launch GPU kernel @@ -385,7 +385,7 @@ void nbnxn_gpu_launch_kernel(gmx_nbnxn_ocl_t *nb, { int adat_begin, adat_len; /* local/nonlocal offset and length used for xq and f */ /* OpenCL kernel launch-related stuff */ - cl_kernel nb_kernel = NULL; /* fn pointer to the nonbonded kernel */ + cl_kernel nb_kernel = nullptr; /* fn pointer to the nonbonded kernel */ cl_atomdata_t *adat = nb->atdat; cl_nbparam_t *nbp = nb->nbparam; @@ -449,7 +449,7 @@ void nbnxn_gpu_launch_kernel(gmx_nbnxn_ocl_t *nb, { if (iloc == eintLocal) { - cl_int gmx_used_in_debug cl_error = clEnqueueMarkerWithWaitList(stream, 0, NULL, &(nb->misc_ops_and_local_H2D_done)); + cl_int gmx_used_in_debug cl_error = clEnqueueMarkerWithWaitList(stream, 0, nullptr, &(nb->misc_ops_and_local_H2D_done)); assert(CL_SUCCESS == cl_error); /* Based on the v1.2 section 5.13 of the OpenCL spec, a flush is needed @@ -762,7 +762,7 @@ void nbnxn_gpu_launch_cpyback(gmx_nbnxn_ocl_t *nb, data back first. */ if (iloc == eintNonlocal) { - cl_error = clEnqueueMarkerWithWaitList(stream, 0, NULL, &(nb->nonlocal_done)); + cl_error = clEnqueueMarkerWithWaitList(stream, 0, nullptr, &(nb->nonlocal_done)); assert(CL_SUCCESS == cl_error); nb->bNonLocalStreamActive = true; } @@ -803,8 +803,8 @@ int nbnxn_gpu_pick_ewald_kernel_type(bool bTwinCut) /* Benchmarking/development environment variables to force the use of analytical or tabulated Ewald kernel. */ - bForceAnalyticalEwald = (getenv("GMX_OCL_NB_ANA_EWALD") != NULL); - bForceTabulatedEwald = (getenv("GMX_OCL_NB_TAB_EWALD") != NULL); + bForceAnalyticalEwald = (getenv("GMX_OCL_NB_ANA_EWALD") != nullptr); + bForceTabulatedEwald = (getenv("GMX_OCL_NB_TAB_EWALD") != nullptr); if (bForceAnalyticalEwald && bForceTabulatedEwald) { @@ -840,7 +840,7 @@ int nbnxn_gpu_pick_ewald_kernel_type(bool bTwinCut) /* Use twin cut-off kernels if requested by bTwinCut or the env. var. forces it (use it for debugging/benchmarking only). */ - if (!bTwinCut && (getenv("GMX_OCL_NB_EWALD_TWINCUT") == NULL)) + if (!bTwinCut && (getenv("GMX_OCL_NB_EWALD_TWINCUT") == nullptr)) { kernel_type = bUseAnalyticalEwald ? eelOclEWALD_ANA : eelOclEWALD_TAB; } diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp index 348456c124..cbd7eb8e1f 100644 --- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp +++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp @@ -108,7 +108,7 @@ static void init_ewald_coulomb_force_table(const interaction_const_t *ic, cl_int cl_error; - if (nbp->coulomb_tab_climg2d != NULL) + if (nbp->coulomb_tab_climg2d != nullptr) { freeDeviceBuffer(&(nbp->coulomb_tab_climg2d)); } @@ -145,33 +145,33 @@ static void init_atomdata_first(cl_atomdata_t *ad, int ntypes, gmx_device_runtim /* An element of the shift_vec device buffer has the same size as one element of the host side shift_vec buffer. */ - ad->shift_vec_elem_size = sizeof(*(((nbnxn_atomdata_t*)0)->shift_vec)); + ad->shift_vec_elem_size = sizeof(*nbnxn_atomdata_t::shift_vec); // TODO: handle errors, check clCreateBuffer flags - ad->shift_vec = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, SHIFTS * ad->shift_vec_elem_size, NULL, &cl_error); + ad->shift_vec = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, SHIFTS * ad->shift_vec_elem_size, nullptr, &cl_error); assert(cl_error == CL_SUCCESS); ad->bShiftVecUploaded = false; /* An element of the fshift device buffer has the same size as one element of the host side fshift buffer. */ - ad->fshift_elem_size = sizeof(*(((cl_nb_staging_t*)0)->fshift)); + ad->fshift_elem_size = sizeof(*cl_nb_staging_t::fshift); - ad->fshift = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, SHIFTS * ad->fshift_elem_size, NULL, &cl_error); + ad->fshift = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, SHIFTS * ad->fshift_elem_size, nullptr, &cl_error); assert(cl_error == CL_SUCCESS); // TODO: handle errors, check clCreateBuffer flags - ad->e_lj = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, sizeof(float), NULL, &cl_error); + ad->e_lj = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, sizeof(float), nullptr, &cl_error); assert(cl_error == CL_SUCCESS); // TODO: handle errors, check clCreateBuffer flags - ad->e_el = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, sizeof(float), NULL, &cl_error); + ad->e_el = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, sizeof(float), nullptr, &cl_error); assert(cl_error == CL_SUCCESS); // TODO: handle errors, check clCreateBuffer flags - /* initialize to NULL pointers to data that is not allocated here and will + /* initialize to nullptr pointers to data that is not allocated here and will need reallocation in nbnxn_gpu_init_atomdata */ - ad->xq = NULL; - ad->f = NULL; + ad->xq = nullptr; + ad->f = nullptr; /* size -1 indicates that the respective array hasn't been initialized yet */ ad->natoms = -1; @@ -317,7 +317,7 @@ static void init_nbparam(cl_nbparam_t *nbp, } } /* generate table for PME */ - nbp->coulomb_tab_climg2d = NULL; + nbp->coulomb_tab_climg2d = nullptr; if (nbp->eeltype == eelOclEWALD_TAB || nbp->eeltype == eelOclEWALD_TAB_TWIN) { init_ewald_coulomb_force_table(ic, nbp, runData); @@ -325,7 +325,7 @@ static void init_nbparam(cl_nbparam_t *nbp, else // TODO: improvement needed. // The image2d is created here even if eeltype is not eelCuEWALD_TAB or eelCuEWALD_TAB_TWIN because the OpenCL kernels - // don't accept NULL values for image2D parameters. + // don't accept nullptr values for image2D parameters. { /* Switched from using textures to using buffers */ // TODO: decide which alternative is most efficient - textures or buffers. @@ -336,10 +336,10 @@ static void init_nbparam(cl_nbparam_t *nbp, array_format.image_channel_order = CL_R; nbp->coulomb_tab_climg2d = clCreateImage2D(runData->context, CL_MEM_READ_WRITE, - &array_format, 1, 1, 0, NULL, &cl_error); + &array_format, 1, 1, 0, nullptr, &cl_error); */ - nbp->coulomb_tab_climg2d = clCreateBuffer(runData->context, CL_MEM_READ_ONLY, sizeof(cl_float), NULL, &cl_error); + nbp->coulomb_tab_climg2d = clCreateBuffer(runData->context, CL_MEM_READ_ONLY, sizeof(cl_float), nullptr, &cl_error); // TODO: handle errors } @@ -379,12 +379,12 @@ static void init_nbparam(cl_nbparam_t *nbp, { // TODO: improvement needed. // The image2d is created here even if vdwtype is not evdwPME because the OpenCL kernels - // don't accept NULL values for image2D parameters. + // don't accept nullptr values for image2D parameters. /* Switched from using textures to using buffers */ // TODO: decide which alternative is most efficient - textures or buffers. /* nbp->nbfp_comb_climg2d = clCreateImage2D(runData->context, CL_MEM_READ_WRITE, - &array_format, 1, 1, 0, NULL, &cl_error);*/ - nbp->nbfp_comb_climg2d = clCreateBuffer(runData->context, CL_MEM_READ_ONLY, sizeof(cl_float), NULL, &cl_error); + &array_format, 1, 1, 0, nullptr, &cl_error);*/ + nbp->nbfp_comb_climg2d = clCreateBuffer(runData->context, CL_MEM_READ_ONLY, sizeof(cl_float), nullptr, &cl_error); assert(cl_error == CL_SUCCESS); @@ -416,12 +416,12 @@ void nbnxn_gpu_pme_loadbal_update_param(const nonbonded_verlet_t *nbv, */ static void init_plist(cl_plist_t *pl) { - /* initialize to NULL pointers to data that is not allocated here and will + /* initialize to nullptr pointers to data that is not allocated here and will need reallocation in nbnxn_gpu_init_pairlist */ - pl->sci = NULL; - pl->cj4 = NULL; - pl->imask = NULL; - pl->excl = NULL; + pl->sci = nullptr; + pl->cj4 = nullptr; + pl->imask = nullptr; + pl->excl = nullptr; /* size -1 indicates that the respective array hasn't been initialized yet */ pl->na_c = -1; @@ -494,8 +494,8 @@ nbnxn_gpu_create_context(gmx_device_runtime_data_t *runtimeData, cl_context context; cl_int cl_error; - assert(runtimeData != NULL); - assert(devInfo != NULL); + assert(runtimeData != nullptr); + assert(devInfo != nullptr); platform_id = devInfo->ocl_gpu_id.ocl_platform_id; device_id = devInfo->ocl_gpu_id.ocl_device_id; @@ -504,7 +504,7 @@ nbnxn_gpu_create_context(gmx_device_runtime_data_t *runtimeData, context_properties[1] = (cl_context_properties) platform_id; context_properties[2] = 0; /* Terminates the list of properties */ - context = clCreateContext(context_properties, 1, &device_id, NULL, NULL, &cl_error); + context = clCreateContext(context_properties, 1, &device_id, nullptr, nullptr, &cl_error); if (CL_SUCCESS != cl_error) { gmx_fatal(FARGS, "On rank %d failed to create context for GPU #%s:\n OpenCL error %d: %s", @@ -566,7 +566,7 @@ nbnxn_ocl_clear_e_fshift(gmx_nbnxn_ocl_t *nb) cl_error |= clSetKernelArg(zero_e_fshift, arg_no++, sizeof(cl_uint), &shifts); GMX_ASSERT(cl_error == CL_SUCCESS, ocl_get_error_string(cl_error).c_str()); - cl_error = clEnqueueNDRangeKernel(ls, zero_e_fshift, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL); + cl_error = clEnqueueNDRangeKernel(ls, zero_e_fshift, 3, nullptr, global_work_size, local_work_size, 0, nullptr, nullptr); GMX_ASSERT(cl_error == CL_SUCCESS, ocl_get_error_string(cl_error).c_str()); } @@ -627,7 +627,7 @@ void nbnxn_gpu_init(gmx_nbnxn_ocl_t **p_nb, assert(ic); - if (p_nb == NULL) + if (p_nb == nullptr) { return; } @@ -658,7 +658,7 @@ void nbnxn_gpu_init(gmx_nbnxn_ocl_t **p_nb, init_plist(nb->plist[eintLocal]); /* OpenCL timing disabled if GMX_DISABLE_GPU_TIMING is defined. */ - nb->bDoTime = (getenv("GMX_DISABLE_GPU_TIMING") == NULL); + nb->bDoTime = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr); /* Create queues only after bDoTime has been initialized */ if (nb->bDoTime) @@ -710,8 +710,8 @@ void nbnxn_gpu_init(gmx_nbnxn_ocl_t **p_nb, * TODO: decide about NVIDIA */ nb->bPrefetchLjParam = - (getenv("GMX_OCL_DISABLE_I_PREFETCH") == NULL) && - ((nb->dev_info->vendor_e == OCL_VENDOR_AMD) || (getenv("GMX_OCL_ENABLE_I_PREFETCH") != NULL)); + (getenv("GMX_OCL_DISABLE_I_PREFETCH") == nullptr) && + ((nb->dev_info->vendor_e == OCL_VENDOR_AMD) || (getenv("GMX_OCL_ENABLE_I_PREFETCH") != nullptr)); /* NOTE: in CUDA we pick L1 cache configuration for the nbnxn kernels here, * but sadly this is not supported in OpenCL (yet?). Consider adding it if @@ -765,7 +765,7 @@ static void nbnxn_ocl_clear_f(gmx_nbnxn_ocl_t *nb, int natoms_clear) cl_error |= clSetKernelArg(memset_f, arg_no++, sizeof(cl_uint), &natoms_flat); assert(cl_error == CL_SUCCESS); - cl_error = clEnqueueNDRangeKernel(ls, memset_f, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL); + cl_error = clEnqueueNDRangeKernel(ls, memset_f, 3, nullptr, global_work_size, local_work_size, 0, nullptr, nullptr); assert(cl_error == CL_SUCCESS); } @@ -867,7 +867,7 @@ void nbnxn_gpu_upload_shiftvec(gmx_nbnxn_ocl_t *nb, if (nbatom->bDynamicBox || !adat->bShiftVecUploaded) { ocl_copy_H2D_async(adat->shift_vec, nbatom->shift_vec, 0, - SHIFTS * adat->shift_vec_elem_size, ls, NULL); + SHIFTS * adat->shift_vec_elem_size, ls, nullptr); adat->bShiftVecUploaded = true; } } @@ -911,25 +911,25 @@ void nbnxn_gpu_init_atomdata(gmx_nbnxn_ocl_t *nb, d_atdat->f_elem_size = sizeof(rvec); // TODO: handle errors, check clCreateBuffer flags - d_atdat->f = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * d_atdat->f_elem_size, NULL, &cl_error); + d_atdat->f = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * d_atdat->f_elem_size, nullptr, &cl_error); assert(CL_SUCCESS == cl_error); // TODO: change the flag to read-only - d_atdat->xq = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * sizeof(cl_float4), NULL, &cl_error); + d_atdat->xq = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * sizeof(cl_float4), nullptr, &cl_error); assert(CL_SUCCESS == cl_error); // TODO: handle errors, check clCreateBuffer flags if (useLjCombRule(nb->nbparam->vdwtype)) { // TODO: change the flag to read-only - d_atdat->lj_comb = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * sizeof(cl_float2), NULL, &cl_error); + d_atdat->lj_comb = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * sizeof(cl_float2), nullptr, &cl_error); assert(CL_SUCCESS == cl_error); // TODO: handle errors, check clCreateBuffer flags } else { // TODO: change the flag to read-only - d_atdat->atom_types = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * sizeof(int), NULL, &cl_error); + d_atdat->atom_types = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * sizeof(int), nullptr, &cl_error); assert(CL_SUCCESS == cl_error); // TODO: handle errors, check clCreateBuffer flags } @@ -974,14 +974,14 @@ static void free_kernel(cl_kernel *kernel_ptr) { cl_int gmx_unused cl_error; - assert(NULL != kernel_ptr); + assert(nullptr != kernel_ptr); if (*kernel_ptr) { cl_error = clReleaseKernel(*kernel_ptr); assert(cl_error == CL_SUCCESS); - *kernel_ptr = NULL; + *kernel_ptr = nullptr; } } @@ -1005,7 +1005,7 @@ static void free_kernels(cl_kernel *kernels, int count) */ static void free_gpu_device_runtime_data(gmx_device_runtime_data_t *runData) { - if (runData == NULL) + if (runData == nullptr) { return; } @@ -1015,14 +1015,14 @@ static void free_gpu_device_runtime_data(gmx_device_runtime_data_t *runData) if (runData->context) { cl_error = clReleaseContext(runData->context); - runData->context = NULL; + runData->context = nullptr; assert(CL_SUCCESS == cl_error); } if (runData->program) { cl_error = clReleaseProgram(runData->program); - runData->program = NULL; + runData->program = nullptr; assert(CL_SUCCESS == cl_error); } @@ -1031,7 +1031,7 @@ static void free_gpu_device_runtime_data(gmx_device_runtime_data_t *runData) //! This function is documented in the header file void nbnxn_gpu_free(gmx_nbnxn_ocl_t *nb) { - if (nb == NULL) + if (nb == nullptr) { return; } @@ -1090,32 +1090,32 @@ void nbnxn_gpu_free(gmx_nbnxn_ocl_t *nb) /* Free nbst */ pfree(nb->nbst.e_lj); - nb->nbst.e_lj = NULL; + nb->nbst.e_lj = nullptr; pfree(nb->nbst.e_el); - nb->nbst.e_el = NULL; + nb->nbst.e_el = nullptr; pfree(nb->nbst.fshift); - nb->nbst.fshift = NULL; + nb->nbst.fshift = nullptr; /* Free command queues */ clReleaseCommandQueue(nb->stream[eintLocal]); - nb->stream[eintLocal] = NULL; + nb->stream[eintLocal] = nullptr; if (nb->bUseTwoStreams) { clReleaseCommandQueue(nb->stream[eintNonlocal]); - nb->stream[eintNonlocal] = NULL; + nb->stream[eintNonlocal] = nullptr; } /* Free other events */ if (nb->nonlocal_done) { clReleaseEvent(nb->nonlocal_done); - nb->nonlocal_done = NULL; + nb->nonlocal_done = nullptr; } if (nb->misc_ops_and_local_H2D_done) { clReleaseEvent(nb->misc_ops_and_local_H2D_done); - nb->misc_ops_and_local_H2D_done = NULL; + nb->misc_ops_and_local_H2D_done = nullptr; } free_gpu_device_runtime_data(nb->dev_rundata); @@ -1150,7 +1150,7 @@ void nbnxn_gpu_reset_timings(nonbonded_verlet_t* nbv) //! This function is documented in the header file int nbnxn_gpu_min_ci_balanced(gmx_nbnxn_ocl_t *nb) { - return nb != NULL ? + return nb != nullptr ? gpu_min_ci_balanced_factor * nb->dev_info->compute_units : 0; } diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_jit_support.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_jit_support.cpp index 874869a2f3..9c5eaebea3 100644 --- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_jit_support.cpp +++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_jit_support.cpp @@ -179,7 +179,7 @@ nbnxn_gpu_compile_kernels(gmx_nbnxn_ocl_t *nb) gmx_bool bFastGen = TRUE; cl_program program = nullptr; - if (getenv("GMX_OCL_NOFASTGEN") != NULL) + if (getenv("GMX_OCL_NOFASTGEN") != nullptr) { bFastGen = FALSE; } -- 2.22.0