From 103069d89d71e9948019171a35dbe7e745dfb0e8 Mon Sep 17 00:00:00 2001
From: Roland Schulz <roland.schulz@intel.com>
Date: Wed, 4 Jul 2018 17:33:16 -0700
Subject: [PATCH] Fix compiler warnings in OCL

Change-Id: I4d5d0fac37a09bd1e74db946db54c17526414a24
---
 src/gromacs/ewald/pme-gpu-3dfft-ocl.cpp       |   2 +-
 src/gromacs/ewald/pme-gpu-internal.cpp        |   2 +-
 src/gromacs/gpu_utils/gpu_utils_ocl.cpp       |  28 ++---
 src/gromacs/gpu_utils/ocl_caching.cpp         |   8 +-
 src/gromacs/gpu_utils/ocl_compiler.cpp        |  16 +--
 src/gromacs/gpu_utils/oclutils.cpp            |  19 ++--
 src/gromacs/mdlib/nbnxn_gpu_common.h          |   7 +-
 src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp     |  18 ++--
 .../mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp   | 100 +++++++++---------
 .../mdlib/nbnxn_ocl/nbnxn_ocl_jit_support.cpp |   2 +-
 10 files changed, 100 insertions(+), 102 deletions(-)

diff --git a/src/gromacs/ewald/pme-gpu-3dfft-ocl.cpp b/src/gromacs/ewald/pme-gpu-3dfft-ocl.cpp
index c625a7a3ac..dcc29cc69e 100644
--- a/src/gromacs/ewald/pme-gpu-3dfft-ocl.cpp
+++ b/src/gromacs/ewald/pme-gpu-3dfft-ocl.cpp
@@ -137,7 +137,7 @@ GpuParallel3dFft::~GpuParallel3dFft()
 void GpuParallel3dFft::perform3dFft(gmx_fft_direction  dir,
                                     CommandEvent      *timingEvent)
 {
-    constexpr cl_mem                  tempBuffer = nullptr;
+    cl_mem                            tempBuffer = nullptr;
     constexpr std::array<cl_event, 0> waitEvents {{}};
 
     clfftPlanHandle                   plan;
diff --git a/src/gromacs/ewald/pme-gpu-internal.cpp b/src/gromacs/ewald/pme-gpu-internal.cpp
index ec83ef20f9..746974a6b1 100644
--- a/src/gromacs/ewald/pme-gpu-internal.cpp
+++ b/src/gromacs/ewald/pme-gpu-internal.cpp
@@ -99,7 +99,7 @@ static PmeGpuKernelParamsBase *pme_gpu_get_kernel_params_base_ptr(const PmeGpu *
     return kernelParamsPtr;
 }
 
-int pme_gpu_get_atom_data_alignment(const PmeGpu *)
+int pme_gpu_get_atom_data_alignment(const PmeGpu * /*unused*/)
 {
     //TODO: this can be simplified, as PME_ATOM_DATA_ALIGNMENT is now constant
     return PME_ATOM_DATA_ALIGNMENT;
diff --git a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp
index e19bec37e7..0e9769e4fe 100644
--- a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp
+++ b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp
@@ -102,7 +102,7 @@ runningOnCompatibleOSForAmd()
  */
 static int is_gmx_supported_gpu_id(struct gmx_device_info_t *ocl_gpu_device)
 {
-    if ((getenv("GMX_OCL_DISABLE_COMPATIBILITY_CHECK")) != NULL)
+    if ((getenv("GMX_OCL_DISABLE_COMPATIBILITY_CHECK")) != nullptr)
     {
         return egpuCompatible;
     }
@@ -184,16 +184,16 @@ void findGpus(gmx_gpu_info_t *gpu_info)
     cl_platform_id *ocl_platform_ids;
     cl_device_type  req_dev_type = CL_DEVICE_TYPE_GPU;
 
-    ocl_platform_ids = NULL;
+    ocl_platform_ids = nullptr;
 
-    if (getenv("GMX_OCL_FORCE_CPU") != NULL)
+    if (getenv("GMX_OCL_FORCE_CPU") != nullptr)
     {
         req_dev_type = CL_DEVICE_TYPE_CPU;
     }
 
     while (1)
     {
-        cl_int status = clGetPlatformIDs(0, NULL, &ocl_platform_count);
+        cl_int status = clGetPlatformIDs(0, nullptr, &ocl_platform_count);
         if (CL_SUCCESS != status)
         {
             GMX_THROW(gmx::InternalError(gmx::formatString("An unexpected value %u was returned from clGetPlatformIDs: ",
@@ -208,7 +208,7 @@ void findGpus(gmx_gpu_info_t *gpu_info)
 
         snew(ocl_platform_ids, ocl_platform_count);
 
-        status = clGetPlatformIDs(ocl_platform_count, ocl_platform_ids, NULL);
+        status = clGetPlatformIDs(ocl_platform_count, ocl_platform_ids, nullptr);
         if (CL_SUCCESS != status)
         {
             GMX_THROW(gmx::InternalError(gmx::formatString("An unexpected value %u was returned from clGetPlatformIDs: ",
@@ -220,7 +220,7 @@ void findGpus(gmx_gpu_info_t *gpu_info)
             cl_uint ocl_device_count;
 
             /* If requesting req_dev_type devices fails, just go to the next platform */
-            if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, 0, NULL, &ocl_device_count))
+            if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, 0, nullptr, &ocl_device_count))
             {
                 continue;
             }
@@ -266,19 +266,19 @@ void findGpus(gmx_gpu_info_t *gpu_info)
                     gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_device_id   = ocl_device_ids[j];
 
                     gpu_info->gpu_dev[device_index].device_name[0] = 0;
-                    clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_NAME, sizeof(gpu_info->gpu_dev[device_index].device_name), gpu_info->gpu_dev[device_index].device_name, NULL);
+                    clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_NAME, sizeof(gpu_info->gpu_dev[device_index].device_name), gpu_info->gpu_dev[device_index].device_name, nullptr);
 
                     gpu_info->gpu_dev[device_index].device_version[0] = 0;
-                    clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VERSION, sizeof(gpu_info->gpu_dev[device_index].device_version), gpu_info->gpu_dev[device_index].device_version, NULL);
+                    clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VERSION, sizeof(gpu_info->gpu_dev[device_index].device_version), gpu_info->gpu_dev[device_index].device_version, nullptr);
 
                     gpu_info->gpu_dev[device_index].device_vendor[0] = 0;
-                    clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VENDOR, sizeof(gpu_info->gpu_dev[device_index].device_vendor), gpu_info->gpu_dev[device_index].device_vendor, NULL);
+                    clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VENDOR, sizeof(gpu_info->gpu_dev[device_index].device_vendor), gpu_info->gpu_dev[device_index].device_vendor, nullptr);
 
                     gpu_info->gpu_dev[device_index].compute_units = 0;
-                    clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(gpu_info->gpu_dev[device_index].compute_units), &(gpu_info->gpu_dev[device_index].compute_units), NULL);
+                    clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(gpu_info->gpu_dev[device_index].compute_units), &(gpu_info->gpu_dev[device_index].compute_units), nullptr);
 
                     gpu_info->gpu_dev[device_index].adress_bits = 0;
-                    clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_ADDRESS_BITS, sizeof(gpu_info->gpu_dev[device_index].adress_bits), &(gpu_info->gpu_dev[device_index].adress_bits), NULL);
+                    clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_ADDRESS_BITS, sizeof(gpu_info->gpu_dev[device_index].adress_bits), &(gpu_info->gpu_dev[device_index].adress_bits), nullptr);
 
                     gpu_info->gpu_dev[device_index].vendor_e = get_vendor_id(gpu_info->gpu_dev[device_index].device_vendor);
 
@@ -349,7 +349,7 @@ void findGpus(gmx_gpu_info_t *gpu_info)
 //! This function is documented in the header file
 void free_gpu_info(const gmx_gpu_info_t gmx_unused *gpu_info)
 {
-    if (gpu_info == NULL)
+    if (gpu_info == nullptr)
     {
         return;
     }
@@ -466,7 +466,7 @@ void gpu_set_host_malloc_and_free(bool               bUseGpuKernels,
     }
     else
     {
-        *nb_alloc = NULL;
-        *nb_free  = NULL;
+        *nb_alloc = nullptr;
+        *nb_free  = nullptr;
     }
 }
diff --git a/src/gromacs/gpu_utils/ocl_caching.cpp b/src/gromacs/gpu_utils/ocl_caching.cpp
index d6016671ef..566885e5b1 100644
--- a/src/gromacs/gpu_utils/ocl_caching.cpp
+++ b/src/gromacs/gpu_utils/ocl_caching.cpp
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -139,7 +139,7 @@ makeProgramFromCache(const std::string &filename,
                                                    &deviceId,
                                                    &fileSize,
                                                    const_cast<const unsigned char **>(&binary),
-                                                   NULL,
+                                                   nullptr,
                                                    &cl_error);
     if (cl_error != CL_SUCCESS)
     {
@@ -153,7 +153,7 @@ void
 writeBinaryToCache(cl_program program, const std::string &filename)
 {
     size_t fileSize;
-    cl_int cl_error = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(fileSize), &fileSize, NULL);
+    cl_int cl_error = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(fileSize), &fileSize, nullptr);
     if (cl_error != CL_SUCCESS)
     {
         GMX_THROW(InternalError("Could not get OpenCL program binary size, error was " + ocl_get_error_string(cl_error)));
@@ -164,7 +164,7 @@ writeBinaryToCache(cl_program program, const std::string &filename)
     snew(binary, fileSize);
     const unique_cptr<unsigned char> binaryGuard(binary);
 
-    cl_error = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(binary), &binary, NULL);
+    cl_error = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(binary), &binary, nullptr);
     if (cl_error != CL_SUCCESS)
     {
         GMX_THROW(InternalError("Could not get OpenCL program binary, error was " + ocl_get_error_string(cl_error)));
diff --git a/src/gromacs/gpu_utils/ocl_compiler.cpp b/src/gromacs/gpu_utils/ocl_compiler.cpp
index 3384e20772..bfc1f1c2c5 100644
--- a/src/gromacs/gpu_utils/ocl_compiler.cpp
+++ b/src/gromacs/gpu_utils/ocl_compiler.cpp
@@ -113,7 +113,7 @@ writeOclBuildLog(FILE              *fplog,
                                             deviceId,
                                             CL_PROGRAM_BUILD_LOG,
                                             0,
-                                            NULL,
+                                            nullptr,
                                             &buildLogSize);
     if (cl_error != CL_SUCCESS)
     {
@@ -135,7 +135,7 @@ writeOclBuildLog(FILE              *fplog,
                                          CL_PROGRAM_BUILD_LOG,
                                          buildLogSize,
                                          buildLog,
-                                         NULL);
+                                         nullptr);
         if (cl_error != CL_SUCCESS)
         {
             GMX_THROW(InternalError("Could not get OpenCL program build log, error was " + ocl_get_error_string(cl_error)));
@@ -176,7 +176,7 @@ selectCompilerOptions(ocl_vendor_id_t deviceVendorId)
     }
 
     /* Fastmath imprves performance on all supported arch */
-    if (getenv("GMX_OCL_DISABLE_FASTMATH") == NULL)
+    if (getenv("GMX_OCL_DISABLE_FASTMATH") == nullptr)
     {
         compilerOptions += " -cl-fast-relaxed-math";
     }
@@ -264,13 +264,13 @@ getWarpSize(cl_context context, cl_device_id deviceId)
 {
     cl_int      cl_error;
     const char *warpSizeKernel = "__kernel void test(__global int* test){test[get_local_id(0)] = 0;}";
-    cl_program  program        = clCreateProgramWithSource(context, 1, (const char**)&warpSizeKernel, NULL, &cl_error);
+    cl_program  program        = clCreateProgramWithSource(context, 1, (const char**)&warpSizeKernel, nullptr, &cl_error);
     if (cl_error != CL_SUCCESS)
     {
         GMX_THROW(InternalError("Could not create OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error)));
     }
 
-    cl_error = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+    cl_error = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
     if (cl_error != CL_SUCCESS)
     {
         GMX_THROW(InternalError("Could not build OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error)));
@@ -284,7 +284,7 @@ getWarpSize(cl_context context, cl_device_id deviceId)
 
     size_t warpSize = 0;
     cl_error = clGetKernelWorkGroupInfo(kernel, deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
-                                        sizeof(warpSize), &warpSize, NULL);
+                                        sizeof(warpSize), &warpSize, nullptr);
     if (cl_error != CL_SUCCESS)
     {
         GMX_THROW(InternalError("Could not measure OpenCL warp size, error was " + ocl_get_error_string(cl_error)));
@@ -497,7 +497,7 @@ compileProgram(FILE              *fplog,
 
     /* Build the OpenCL program, keeping the status to potentially
        write to the simulation log file. */
-    cl_int buildStatus = clBuildProgram(program, 0, NULL, preprocessorOptions.c_str(), NULL, NULL);
+    cl_int buildStatus = clBuildProgram(program, 0, nullptr, preprocessorOptions.c_str(), nullptr, nullptr);
 
     /* Write log first, and then throw exception that the user know what is
        the issue even if the build fails. */
@@ -536,7 +536,7 @@ compileProgram(FILE              *fplog,
            => write PTX to file */
         char buffer[STRLEN];
 
-        cl_error = clGetDeviceInfo(deviceId, CL_DEVICE_NAME, sizeof(buffer), buffer, NULL);
+        cl_error = clGetDeviceInfo(deviceId, CL_DEVICE_NAME, sizeof(buffer), buffer, nullptr);
         if (cl_error != CL_SUCCESS)
         {
             GMX_THROW(InternalError("Could not get OpenCL device info, error was " + ocl_get_error_string(cl_error)));
diff --git a/src/gromacs/gpu_utils/oclutils.cpp b/src/gromacs/gpu_utils/oclutils.cpp
index cfbe2720a8..26e750a2e4 100644
--- a/src/gromacs/gpu_utils/oclutils.cpp
+++ b/src/gromacs/gpu_utils/oclutils.cpp
@@ -60,7 +60,7 @@ int ocl_copy_H2D(cl_mem d_dest, void* h_src,
 {
     cl_int gmx_unused cl_error;
 
-    if (d_dest == NULL || h_src == NULL || bytes == 0)
+    if (d_dest == nullptr || h_src == nullptr || bytes == 0)
     {
         return -1;
     }
@@ -68,13 +68,13 @@ int ocl_copy_H2D(cl_mem d_dest, void* h_src,
     switch (transferKind)
     {
         case GpuApiCallBehavior::Async:
-            cl_error = clEnqueueWriteBuffer(command_queue, d_dest, CL_FALSE, offset, bytes, h_src, 0, NULL, copy_event);
+            cl_error = clEnqueueWriteBuffer(command_queue, d_dest, CL_FALSE, offset, bytes, h_src, 0, nullptr, copy_event);
             assert(cl_error == CL_SUCCESS);
             // TODO: handle errors
             break;
 
         case GpuApiCallBehavior::Sync:
-            cl_error = clEnqueueWriteBuffer(command_queue, d_dest, CL_TRUE, offset, bytes, h_src, 0, NULL, copy_event);
+            cl_error = clEnqueueWriteBuffer(command_queue, d_dest, CL_TRUE, offset, bytes, h_src, 0, nullptr, copy_event);
             assert(cl_error == CL_SUCCESS);
             // TODO: handle errors
             break;
@@ -88,7 +88,7 @@ int ocl_copy_H2D(cl_mem d_dest, void* h_src,
 
 /*! \brief Launches asynchronous host to device memory copy.
  *
- *  If copy_event is not NULL, on return it will contain an event object
+ *  If copy_event is not nullptr, on return it will contain an event object
  *  identifying this particular host to device operation. The event can further
  *  be used to queue a wait for this operation or to query profiling information.
  */
@@ -106,7 +106,7 @@ int ocl_copy_H2D_sync(cl_mem d_dest, void * h_src,
                       size_t offset, size_t bytes,
                       cl_command_queue command_queue)
 {
-    return ocl_copy_H2D(d_dest, h_src, offset, bytes, GpuApiCallBehavior::Sync, command_queue, NULL);
+    return ocl_copy_H2D(d_dest, h_src, offset, bytes, GpuApiCallBehavior::Sync, command_queue, nullptr);
 }
 
 int ocl_copy_D2H(void * h_dest, cl_mem d_src,
@@ -117,7 +117,7 @@ int ocl_copy_D2H(void * h_dest, cl_mem d_src,
 {
     cl_int gmx_unused cl_error;
 
-    if (h_dest == NULL || d_src == NULL || bytes == 0)
+    if (h_dest == nullptr || d_src == nullptr || bytes == 0)
     {
         return -1;
     }
@@ -125,13 +125,13 @@ int ocl_copy_D2H(void * h_dest, cl_mem d_src,
     switch (transferKind)
     {
         case GpuApiCallBehavior::Async:
-            cl_error = clEnqueueReadBuffer(command_queue, d_src, CL_FALSE, offset, bytes, h_dest, 0, NULL, copy_event);
+            cl_error = clEnqueueReadBuffer(command_queue, d_src, CL_FALSE, offset, bytes, h_dest, 0, nullptr, copy_event);
             assert(cl_error == CL_SUCCESS);
             // TODO: handle errors
             break;
 
         case GpuApiCallBehavior::Sync:
-            cl_error = clEnqueueReadBuffer(command_queue, d_src, CL_TRUE, offset, bytes, h_dest, 0, NULL, copy_event);
+            cl_error = clEnqueueReadBuffer(command_queue, d_src, CL_TRUE, offset, bytes, h_dest, 0, nullptr, copy_event);
             assert(cl_error == CL_SUCCESS);
             // TODO: handle errors
             break;
@@ -145,7 +145,7 @@ int ocl_copy_D2H(void * h_dest, cl_mem d_src,
 
 /*! \brief Launches asynchronous device to host memory copy.
  *
- *  If copy_event is not NULL, on return it will contain an event object
+ *  If copy_event is not nullptr, on return it will contain an event object
  *  identifying this particular host to device operation. The event can further
  *  be used to queue a wait for this operation or to query profiling information.
  */
@@ -192,7 +192,6 @@ void pfree(void *h_ptr)
     {
         sfree_aligned(h_ptr);
     }
-    return;
 }
 
 /*! \brief Convert error code to diagnostic string */
diff --git a/src/gromacs/mdlib/nbnxn_gpu_common.h b/src/gromacs/mdlib/nbnxn_gpu_common.h
index b11be12411..90f4bc6204 100644
--- a/src/gromacs/mdlib/nbnxn_gpu_common.h
+++ b/src/gromacs/mdlib/nbnxn_gpu_common.h
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2017, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -60,6 +60,7 @@
 #include "gromacs/mdlib/nbnxn_gpu_types.h"
 #include "gromacs/pbcutil/ishift.h"
 #include "gromacs/timing/gpu_timing.h"
+#include "gromacs/utility/fatalerror.h"
 #include "gromacs/utility/stringutil.h"
 
 #include "nbnxn_gpu_common_utils.h"
@@ -102,9 +103,7 @@ static inline int gpuAtomToInteractionLocality(int atomLocality)
     }
     else
     {
-        // can't be reached
-        assert(false);
-        return -1;
+        gmx_incons("Wrong locality");
     }
 }
 
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp
index af56d62424..c2a055b79c 100644
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp
@@ -265,7 +265,7 @@ static inline cl_kernel select_nbnxn_kernel(gmx_nbnxn_ocl_t   *nb,
         }
     }
 
-    if (NULL == kernel_ptr[0])
+    if (nullptr == kernel_ptr[0])
     {
         *kernel_ptr = clCreateKernel(nb->dev_rundata->program, kernel_name_to_run, &cl_error);
         assert(cl_error == CL_SUCCESS);
@@ -351,13 +351,13 @@ static void sync_ocl_event(cl_command_queue stream, cl_event *ocl_event)
     cl_int gmx_unused cl_error;
 
     /* Enqueue wait */
-    cl_error = clEnqueueBarrierWithWaitList(stream, 1, ocl_event, NULL);
+    cl_error = clEnqueueBarrierWithWaitList(stream, 1, ocl_event, nullptr);
     GMX_RELEASE_ASSERT(CL_SUCCESS == cl_error, ocl_get_error_string(cl_error).c_str());
 
     /* Release event and reset it to 0. It is ok to release it as enqueuewaitforevents performs implicit retain for events. */
     cl_error = clReleaseEvent(*ocl_event);
     assert(CL_SUCCESS == cl_error);
-    *ocl_event = 0;
+    *ocl_event = nullptr;
 }
 
 /*! \brief Launch GPU kernel
@@ -385,7 +385,7 @@ void nbnxn_gpu_launch_kernel(gmx_nbnxn_ocl_t               *nb,
 {
     int                  adat_begin, adat_len; /* local/nonlocal offset and length used for xq and f */
     /* OpenCL kernel launch-related stuff */
-    cl_kernel            nb_kernel = NULL;     /* fn pointer to the nonbonded kernel */
+    cl_kernel            nb_kernel = nullptr;  /* fn pointer to the nonbonded kernel */
 
     cl_atomdata_t       *adat    = nb->atdat;
     cl_nbparam_t        *nbp     = nb->nbparam;
@@ -449,7 +449,7 @@ void nbnxn_gpu_launch_kernel(gmx_nbnxn_ocl_t               *nb,
     {
         if (iloc == eintLocal)
         {
-            cl_int gmx_used_in_debug cl_error = clEnqueueMarkerWithWaitList(stream, 0, NULL, &(nb->misc_ops_and_local_H2D_done));
+            cl_int gmx_used_in_debug cl_error = clEnqueueMarkerWithWaitList(stream, 0, nullptr, &(nb->misc_ops_and_local_H2D_done));
             assert(CL_SUCCESS == cl_error);
 
             /* Based on the v1.2 section 5.13 of the OpenCL spec, a flush is needed
@@ -762,7 +762,7 @@ void nbnxn_gpu_launch_cpyback(gmx_nbnxn_ocl_t               *nb,
        data back first. */
     if (iloc == eintNonlocal)
     {
-        cl_error = clEnqueueMarkerWithWaitList(stream, 0, NULL, &(nb->nonlocal_done));
+        cl_error = clEnqueueMarkerWithWaitList(stream, 0, nullptr, &(nb->nonlocal_done));
         assert(CL_SUCCESS == cl_error);
         nb->bNonLocalStreamActive = true;
     }
@@ -803,8 +803,8 @@ int nbnxn_gpu_pick_ewald_kernel_type(bool bTwinCut)
 
     /* Benchmarking/development environment variables to force the use of
        analytical or tabulated Ewald kernel. */
-    bForceAnalyticalEwald = (getenv("GMX_OCL_NB_ANA_EWALD") != NULL);
-    bForceTabulatedEwald  = (getenv("GMX_OCL_NB_TAB_EWALD") != NULL);
+    bForceAnalyticalEwald = (getenv("GMX_OCL_NB_ANA_EWALD") != nullptr);
+    bForceTabulatedEwald  = (getenv("GMX_OCL_NB_TAB_EWALD") != nullptr);
 
     if (bForceAnalyticalEwald && bForceTabulatedEwald)
     {
@@ -840,7 +840,7 @@ int nbnxn_gpu_pick_ewald_kernel_type(bool bTwinCut)
 
     /* Use twin cut-off kernels if requested by bTwinCut or the env. var.
        forces it (use it for debugging/benchmarking only). */
-    if (!bTwinCut && (getenv("GMX_OCL_NB_EWALD_TWINCUT") == NULL))
+    if (!bTwinCut && (getenv("GMX_OCL_NB_EWALD_TWINCUT") == nullptr))
     {
         kernel_type = bUseAnalyticalEwald ? eelOclEWALD_ANA : eelOclEWALD_TAB;
     }
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp
index 348456c124..cbd7eb8e1f 100644
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp
@@ -108,7 +108,7 @@ static void init_ewald_coulomb_force_table(const interaction_const_t       *ic,
 
     cl_int       cl_error;
 
-    if (nbp->coulomb_tab_climg2d != NULL)
+    if (nbp->coulomb_tab_climg2d != nullptr)
     {
         freeDeviceBuffer(&(nbp->coulomb_tab_climg2d));
     }
@@ -145,33 +145,33 @@ static void init_atomdata_first(cl_atomdata_t *ad, int ntypes, gmx_device_runtim
 
     /* An element of the shift_vec device buffer has the same size as one element
        of the host side shift_vec buffer. */
-    ad->shift_vec_elem_size = sizeof(*(((nbnxn_atomdata_t*)0)->shift_vec));
+    ad->shift_vec_elem_size = sizeof(*nbnxn_atomdata_t::shift_vec);
 
     // TODO: handle errors, check clCreateBuffer flags
-    ad->shift_vec = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, SHIFTS * ad->shift_vec_elem_size, NULL, &cl_error);
+    ad->shift_vec = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, SHIFTS * ad->shift_vec_elem_size, nullptr, &cl_error);
     assert(cl_error == CL_SUCCESS);
     ad->bShiftVecUploaded = false;
 
     /* An element of the fshift device buffer has the same size as one element
        of the host side fshift buffer. */
-    ad->fshift_elem_size = sizeof(*(((cl_nb_staging_t*)0)->fshift));
+    ad->fshift_elem_size = sizeof(*cl_nb_staging_t::fshift);
 
-    ad->fshift = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, SHIFTS * ad->fshift_elem_size, NULL, &cl_error);
+    ad->fshift = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, SHIFTS * ad->fshift_elem_size, nullptr, &cl_error);
     assert(cl_error == CL_SUCCESS);
     // TODO: handle errors, check clCreateBuffer flags
 
-    ad->e_lj = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, sizeof(float), NULL, &cl_error);
+    ad->e_lj = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, sizeof(float), nullptr, &cl_error);
     assert(cl_error == CL_SUCCESS);
     // TODO: handle errors, check clCreateBuffer flags
 
-    ad->e_el = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, sizeof(float), NULL, &cl_error);
+    ad->e_el = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, sizeof(float), nullptr, &cl_error);
     assert(cl_error == CL_SUCCESS);
     // TODO: handle errors, check clCreateBuffer flags
 
-    /* initialize to NULL pointers to data that is not allocated here and will
+    /* initialize to nullptr pointers to data that is not allocated here and will
        need reallocation in nbnxn_gpu_init_atomdata */
-    ad->xq = NULL;
-    ad->f  = NULL;
+    ad->xq = nullptr;
+    ad->f  = nullptr;
 
     /* size -1 indicates that the respective array hasn't been initialized yet */
     ad->natoms = -1;
@@ -317,7 +317,7 @@ static void init_nbparam(cl_nbparam_t                    *nbp,
         }
     }
     /* generate table for PME */
-    nbp->coulomb_tab_climg2d = NULL;
+    nbp->coulomb_tab_climg2d = nullptr;
     if (nbp->eeltype == eelOclEWALD_TAB || nbp->eeltype == eelOclEWALD_TAB_TWIN)
     {
         init_ewald_coulomb_force_table(ic, nbp, runData);
@@ -325,7 +325,7 @@ static void init_nbparam(cl_nbparam_t                    *nbp,
     else
     // TODO: improvement needed.
     // The image2d is created here even if eeltype is not eelCuEWALD_TAB or eelCuEWALD_TAB_TWIN because the OpenCL kernels
-    // don't accept NULL values for image2D parameters.
+    // don't accept nullptr values for image2D parameters.
     {
         /* Switched from using textures to using buffers */
         // TODO: decide which alternative is most efficient - textures or buffers.
@@ -336,10 +336,10 @@ static void init_nbparam(cl_nbparam_t                    *nbp,
            array_format.image_channel_order     = CL_R;
 
            nbp->coulomb_tab_climg2d = clCreateImage2D(runData->context, CL_MEM_READ_WRITE,
-            &array_format, 1, 1, 0, NULL, &cl_error);
+            &array_format, 1, 1, 0, nullptr, &cl_error);
          */
 
-        nbp->coulomb_tab_climg2d = clCreateBuffer(runData->context, CL_MEM_READ_ONLY, sizeof(cl_float), NULL, &cl_error);
+        nbp->coulomb_tab_climg2d = clCreateBuffer(runData->context, CL_MEM_READ_ONLY, sizeof(cl_float), nullptr, &cl_error);
         // TODO: handle errors
     }
 
@@ -379,12 +379,12 @@ static void init_nbparam(cl_nbparam_t                    *nbp,
         {
             // TODO: improvement needed.
             // The image2d is created here even if vdwtype is not evdwPME because the OpenCL kernels
-            // don't accept NULL values for image2D parameters.
+            // don't accept nullptr values for image2D parameters.
             /* Switched from using textures to using buffers */
             // TODO: decide which alternative is most efficient - textures or buffers.
             /* nbp->nbfp_comb_climg2d = clCreateImage2D(runData->context, CL_MEM_READ_WRITE,
-                &array_format, 1, 1, 0, NULL, &cl_error);*/
-            nbp->nbfp_comb_climg2d = clCreateBuffer(runData->context, CL_MEM_READ_ONLY, sizeof(cl_float), NULL, &cl_error);
+                &array_format, 1, 1, 0, nullptr, &cl_error);*/
+            nbp->nbfp_comb_climg2d = clCreateBuffer(runData->context, CL_MEM_READ_ONLY, sizeof(cl_float), nullptr, &cl_error);
 
 
             assert(cl_error == CL_SUCCESS);
@@ -416,12 +416,12 @@ void nbnxn_gpu_pme_loadbal_update_param(const nonbonded_verlet_t    *nbv,
  */
 static void init_plist(cl_plist_t *pl)
 {
-    /* initialize to NULL pointers to data that is not allocated here and will
+    /* initialize to nullptr pointers to data that is not allocated here and will
        need reallocation in nbnxn_gpu_init_pairlist */
-    pl->sci     = NULL;
-    pl->cj4     = NULL;
-    pl->imask   = NULL;
-    pl->excl    = NULL;
+    pl->sci     = nullptr;
+    pl->cj4     = nullptr;
+    pl->imask   = nullptr;
+    pl->excl    = nullptr;
 
     /* size -1 indicates that the respective array hasn't been initialized yet */
     pl->na_c           = -1;
@@ -494,8 +494,8 @@ nbnxn_gpu_create_context(gmx_device_runtime_data_t *runtimeData,
     cl_context                context;
     cl_int                    cl_error;
 
-    assert(runtimeData != NULL);
-    assert(devInfo != NULL);
+    assert(runtimeData != nullptr);
+    assert(devInfo != nullptr);
 
     platform_id      = devInfo->ocl_gpu_id.ocl_platform_id;
     device_id        = devInfo->ocl_gpu_id.ocl_device_id;
@@ -504,7 +504,7 @@ nbnxn_gpu_create_context(gmx_device_runtime_data_t *runtimeData,
     context_properties[1] = (cl_context_properties) platform_id;
     context_properties[2] = 0; /* Terminates the list of properties */
 
-    context = clCreateContext(context_properties, 1, &device_id, NULL, NULL, &cl_error);
+    context = clCreateContext(context_properties, 1, &device_id, nullptr, nullptr, &cl_error);
     if (CL_SUCCESS != cl_error)
     {
         gmx_fatal(FARGS, "On rank %d failed to create context for GPU #%s:\n OpenCL error %d: %s",
@@ -566,7 +566,7 @@ nbnxn_ocl_clear_e_fshift(gmx_nbnxn_ocl_t *nb)
     cl_error |= clSetKernelArg(zero_e_fshift, arg_no++, sizeof(cl_uint), &shifts);
     GMX_ASSERT(cl_error == CL_SUCCESS, ocl_get_error_string(cl_error).c_str());
 
-    cl_error = clEnqueueNDRangeKernel(ls, zero_e_fshift, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+    cl_error = clEnqueueNDRangeKernel(ls, zero_e_fshift, 3, nullptr, global_work_size, local_work_size, 0, nullptr, nullptr);
     GMX_ASSERT(cl_error == CL_SUCCESS, ocl_get_error_string(cl_error).c_str());
 }
 
@@ -627,7 +627,7 @@ void nbnxn_gpu_init(gmx_nbnxn_ocl_t          **p_nb,
 
     assert(ic);
 
-    if (p_nb == NULL)
+    if (p_nb == nullptr)
     {
         return;
     }
@@ -658,7 +658,7 @@ void nbnxn_gpu_init(gmx_nbnxn_ocl_t          **p_nb,
     init_plist(nb->plist[eintLocal]);
 
     /* OpenCL timing disabled if GMX_DISABLE_GPU_TIMING is defined. */
-    nb->bDoTime = (getenv("GMX_DISABLE_GPU_TIMING") == NULL);
+    nb->bDoTime = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
 
     /* Create queues only after bDoTime has been initialized */
     if (nb->bDoTime)
@@ -710,8 +710,8 @@ void nbnxn_gpu_init(gmx_nbnxn_ocl_t          **p_nb,
      * TODO: decide about NVIDIA
      */
     nb->bPrefetchLjParam =
-        (getenv("GMX_OCL_DISABLE_I_PREFETCH") == NULL) &&
-        ((nb->dev_info->vendor_e == OCL_VENDOR_AMD) || (getenv("GMX_OCL_ENABLE_I_PREFETCH") != NULL));
+        (getenv("GMX_OCL_DISABLE_I_PREFETCH") == nullptr) &&
+        ((nb->dev_info->vendor_e == OCL_VENDOR_AMD) || (getenv("GMX_OCL_ENABLE_I_PREFETCH") != nullptr));
 
     /* NOTE: in CUDA we pick L1 cache configuration for the nbnxn kernels here,
      * but sadly this is not supported in OpenCL (yet?). Consider adding it if
@@ -765,7 +765,7 @@ static void nbnxn_ocl_clear_f(gmx_nbnxn_ocl_t *nb, int natoms_clear)
     cl_error |= clSetKernelArg(memset_f, arg_no++, sizeof(cl_uint), &natoms_flat);
     assert(cl_error == CL_SUCCESS);
 
-    cl_error = clEnqueueNDRangeKernel(ls, memset_f, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+    cl_error = clEnqueueNDRangeKernel(ls, memset_f, 3, nullptr, global_work_size, local_work_size, 0, nullptr, nullptr);
     assert(cl_error == CL_SUCCESS);
 }
 
@@ -867,7 +867,7 @@ void nbnxn_gpu_upload_shiftvec(gmx_nbnxn_ocl_t        *nb,
     if (nbatom->bDynamicBox || !adat->bShiftVecUploaded)
     {
         ocl_copy_H2D_async(adat->shift_vec, nbatom->shift_vec, 0,
-                           SHIFTS * adat->shift_vec_elem_size, ls, NULL);
+                           SHIFTS * adat->shift_vec_elem_size, ls, nullptr);
         adat->bShiftVecUploaded = true;
     }
 }
@@ -911,25 +911,25 @@ void nbnxn_gpu_init_atomdata(gmx_nbnxn_ocl_t               *nb,
         d_atdat->f_elem_size = sizeof(rvec);
 
         // TODO: handle errors, check clCreateBuffer flags
-        d_atdat->f = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * d_atdat->f_elem_size, NULL, &cl_error);
+        d_atdat->f = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * d_atdat->f_elem_size, nullptr, &cl_error);
         assert(CL_SUCCESS == cl_error);
 
         // TODO: change the flag to read-only
-        d_atdat->xq = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * sizeof(cl_float4), NULL, &cl_error);
+        d_atdat->xq = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * sizeof(cl_float4), nullptr, &cl_error);
         assert(CL_SUCCESS == cl_error);
         // TODO: handle errors, check clCreateBuffer flags
 
         if (useLjCombRule(nb->nbparam->vdwtype))
         {
             // TODO: change the flag to read-only
-            d_atdat->lj_comb = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * sizeof(cl_float2), NULL, &cl_error);
+            d_atdat->lj_comb = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * sizeof(cl_float2), nullptr, &cl_error);
             assert(CL_SUCCESS == cl_error);
             // TODO: handle errors, check clCreateBuffer flags
         }
         else
         {
             // TODO: change the flag to read-only
-            d_atdat->atom_types = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * sizeof(int), NULL, &cl_error);
+            d_atdat->atom_types = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * sizeof(int), nullptr, &cl_error);
             assert(CL_SUCCESS == cl_error);
             // TODO: handle errors, check clCreateBuffer flags
         }
@@ -974,14 +974,14 @@ static void free_kernel(cl_kernel *kernel_ptr)
 {
     cl_int gmx_unused cl_error;
 
-    assert(NULL != kernel_ptr);
+    assert(nullptr != kernel_ptr);
 
     if (*kernel_ptr)
     {
         cl_error = clReleaseKernel(*kernel_ptr);
         assert(cl_error == CL_SUCCESS);
 
-        *kernel_ptr = NULL;
+        *kernel_ptr = nullptr;
     }
 }
 
@@ -1005,7 +1005,7 @@ static void free_kernels(cl_kernel *kernels, int count)
  */
 static void free_gpu_device_runtime_data(gmx_device_runtime_data_t *runData)
 {
-    if (runData == NULL)
+    if (runData == nullptr)
     {
         return;
     }
@@ -1015,14 +1015,14 @@ static void free_gpu_device_runtime_data(gmx_device_runtime_data_t *runData)
     if (runData->context)
     {
         cl_error         = clReleaseContext(runData->context);
-        runData->context = NULL;
+        runData->context = nullptr;
         assert(CL_SUCCESS == cl_error);
     }
 
     if (runData->program)
     {
         cl_error         = clReleaseProgram(runData->program);
-        runData->program = NULL;
+        runData->program = nullptr;
         assert(CL_SUCCESS == cl_error);
     }
 
@@ -1031,7 +1031,7 @@ static void free_gpu_device_runtime_data(gmx_device_runtime_data_t *runData)
 //! This function is documented in the header file
 void nbnxn_gpu_free(gmx_nbnxn_ocl_t *nb)
 {
-    if (nb == NULL)
+    if (nb == nullptr)
     {
         return;
     }
@@ -1090,32 +1090,32 @@ void nbnxn_gpu_free(gmx_nbnxn_ocl_t *nb)
 
     /* Free nbst */
     pfree(nb->nbst.e_lj);
-    nb->nbst.e_lj = NULL;
+    nb->nbst.e_lj = nullptr;
 
     pfree(nb->nbst.e_el);
-    nb->nbst.e_el = NULL;
+    nb->nbst.e_el = nullptr;
 
     pfree(nb->nbst.fshift);
-    nb->nbst.fshift = NULL;
+    nb->nbst.fshift = nullptr;
 
     /* Free command queues */
     clReleaseCommandQueue(nb->stream[eintLocal]);
-    nb->stream[eintLocal] = NULL;
+    nb->stream[eintLocal] = nullptr;
     if (nb->bUseTwoStreams)
     {
         clReleaseCommandQueue(nb->stream[eintNonlocal]);
-        nb->stream[eintNonlocal] = NULL;
+        nb->stream[eintNonlocal] = nullptr;
     }
     /* Free other events */
     if (nb->nonlocal_done)
     {
         clReleaseEvent(nb->nonlocal_done);
-        nb->nonlocal_done = NULL;
+        nb->nonlocal_done = nullptr;
     }
     if (nb->misc_ops_and_local_H2D_done)
     {
         clReleaseEvent(nb->misc_ops_and_local_H2D_done);
-        nb->misc_ops_and_local_H2D_done = NULL;
+        nb->misc_ops_and_local_H2D_done = nullptr;
     }
 
     free_gpu_device_runtime_data(nb->dev_rundata);
@@ -1150,7 +1150,7 @@ void nbnxn_gpu_reset_timings(nonbonded_verlet_t* nbv)
 //! This function is documented in the header file
 int nbnxn_gpu_min_ci_balanced(gmx_nbnxn_ocl_t *nb)
 {
-    return nb != NULL ?
+    return nb != nullptr ?
            gpu_min_ci_balanced_factor * nb->dev_info->compute_units : 0;
 }
 
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_jit_support.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_jit_support.cpp
index 874869a2f3..9c5eaebea3 100644
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_jit_support.cpp
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_jit_support.cpp
@@ -179,7 +179,7 @@ nbnxn_gpu_compile_kernels(gmx_nbnxn_ocl_t *nb)
     gmx_bool                  bFastGen = TRUE;
     cl_program                program  = nullptr;
 
-    if (getenv("GMX_OCL_NOFASTGEN") != NULL)
+    if (getenv("GMX_OCL_NOFASTGEN") != nullptr)
     {
         bFastGen = FALSE;
     }
-- 
2.22.0