From a12ff7816d5e142306b0e4f658e7e0c34390f9e1 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Szil=C3=A1rd=20P=C3=A1ll?= <pall.szilard@gmail.com>
Date: Fri, 26 Mar 2021 08:14:16 +0000
Subject: [PATCH] Allow setting nbnxm cluster size for SYCL

Rename GMX_OPENCL_NB_CLUSTER_SIZE to GMX_GPU_NB_CLUSTER_SIZE and use its
value in SYCL builds too. The former is still taken into account at
cmake-time, but users are advised to use the latter.

Refs #3847 #3933 #3935
---
 CMakeLists.txt                                | 29 +++++++++++++++++++
 cmake/gmxManageOpenCL.cmake                   |  5 +---
 docs/install-guide/index.rst                  |  2 +-
 .../2022/major/deprecated-functionality.rst   |  4 +++
 src/config.h.cmakein                          |  4 +--
 src/gromacs/hardware/device_information.h     |  4 +--
 .../hardware/device_management_ocl.cpp        |  6 ++--
 src/gromacs/nbnxm/pairlistparams.h            |  8 ++---
 8 files changed, 45 insertions(+), 17 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 33dd5ef0c7..4c0ca0ef12 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -557,6 +557,35 @@ if(GMX_GPU)
         message(WARNING "To use GPU acceleration efficiently, mdrun requires OpenMP multi-threading, which is currently not enabled.")
     endif()
 
+    if (GMX_OPENCL_NB_CLUSTER_SIZE)
+        message(WARNING "GMX_OPENCL_NB_CLUSTER_SIZE is deprecated, use GMX_GPU_NB_CLUSTER_SIZE instead")
+    endif()
+    if (GMX_OPENCL_NB_CLUSTER_SIZE AND GMX_GPU_NB_CLUSTER_SIZE)
+        if (NOT ${GMX_OPENCL_NB_CLUSTER_SIZE} EQUAL ${GMX_GPU_NB_CLUSTER_SIZE})
+            message(FATAL_ERROR "Mismatching values passed to GMX_OPENCL_NB_CLUSTER_SIZE and GMX_GPU_NB_CLUSTER_SIZE; the former is deprecated, use only the latter!")
+        endif()
+    endif()
+    # Only OpenCL and SYCL support changing the default cluster size
+    if (${_gmx_gpu_uppercase} STREQUAL "CUDA")
+        if (GMX_GPU_NB_CLUSTER_SIZE AND NOT "${GMX_GPU_NB_CLUSTER_SIZE}" EQUAL 8)
+            message(FATAL_ERROR "Setting GMX_GPU_NB_CLUSTER_SIZE is not supported in CUDA (the default GMX_GPU_NB_CLUSTER_SIZE=8 is used)")
+        endif()
+    else()
+        # use the legacy GMX_OPENCL_NB_CLUSTER_SIZE variable if set, otherwise set the defaults
+        if (GMX_OPENCL_NB_CLUSTER_SIZE)
+            set(_gmx_gpu_nb_cluster_size_value ${GMX_OPENCL_NB_CLUSTER_SIZE})
+        else()
+            # default cluster size is 8 with OpenCL and 4 with SYCL for now
+            if(${_gmx_gpu_uppercase} STREQUAL "OPENCL")
+                set(_gmx_gpu_nb_cluster_size_value 8)
+            else()
+                set(_gmx_gpu_nb_cluster_size_value 4)
+            endif()
+        endif()
+        set(GMX_GPU_NB_CLUSTER_SIZE ${_gmx_gpu_nb_cluster_size_value} CACHE STRING "Cluster size used by the nonbonded kernel. Set to 4 for Intel GPUs.")
+        mark_as_advanced(GMX_GPU_NB_CLUSTER_SIZE)
+    endif()
+
 endif()
 
 # For build with CUDA and Lib-MPI, check if underlying MPI implementation is CUDA-aware
diff --git a/cmake/gmxManageOpenCL.cmake b/cmake/gmxManageOpenCL.cmake
index 3f849e9e28..f95395fef2 100644
--- a/cmake/gmxManageOpenCL.cmake
+++ b/cmake/gmxManageOpenCL.cmake
@@ -2,7 +2,7 @@
 # This file is part of the GROMACS molecular simulation package.
 #
 # Copyright (c) 2012,2013,2014,2015,2018 by the GROMACS development team.
-# Copyright (c) 2019,2020, by the GROMACS development team, led by
+# Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 # and including many others, as listed in the AUTHORS file in the
 # top-level source directory and at http://www.gromacs.org.
@@ -79,7 +79,4 @@ if (NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
     message(FATAL_ERROR "The OpenCL implementation is only supported on 64-bit platforms.")
 endif()
 
-set(GMX_OPENCL_NB_CLUSTER_SIZE 8 CACHE STRING "Cluster size used by nonbonded OpenCL kernel. Set to 4 for Intel GPUs.")
-mark_as_advanced(GMX_OPENCL_NB_CLUSTER_SIZE)
-
 set(GMX_INSTALL_OCLDIR       ${GMX_INSTALL_GMXDATADIR}/opencl)
diff --git a/docs/install-guide/index.rst b/docs/install-guide/index.rst
index 7a5ff8d15f..c9666bf246 100644
--- a/docs/install-guide/index.rst
+++ b/docs/install-guide/index.rst
@@ -712,7 +712,7 @@ To trigger an OpenCL_ build the following CMake flags must be set
     cmake .. -DGMX_GPU=OpenCL
 
 To build with support for Intel integrated GPUs, it is required
-to add ``-DGMX_OPENCL_NB_CLUSTER_SIZE=4`` to the cmake command line,
+to add ``-DGMX_GPU_NB_CLUSTER_SIZE=4`` to the cmake command line,
 so that the GPU kernels match the characteristics of the hardware.
 The `Neo driver <https://github.com/intel/compute-runtime/releases>`_
 is recommended.
diff --git a/docs/release-notes/2022/major/deprecated-functionality.rst b/docs/release-notes/2022/major/deprecated-functionality.rst
index 1a7bff774f..8dfd7f85f7 100644
--- a/docs/release-notes/2022/major/deprecated-functionality.rst
+++ b/docs/release-notes/2022/major/deprecated-functionality.rst
@@ -12,3 +12,7 @@ Changes anticipated to |Gromacs| 2022 functionality
 Functionality deprecated in |Gromacs| 2022
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+GMX_OPENCL_NB_CLUSTER_SIZE CMake variable deprecated in favor of GMX_GPU_NB_CLUSTER_SIZE
+""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+Both OpenCL and SYCL support different cluster sizes, so GMX_GPU_NB_CLUSTER_SIZE should
+be used going forward.
diff --git a/src/config.h.cmakein b/src/config.h.cmakein
index 869ab690ec..1554d4aa85 100644
--- a/src/config.h.cmakein
+++ b/src/config.h.cmakein
@@ -239,8 +239,8 @@
 /* Use CUDA-aware MPI.  */
 #cmakedefine01 HAVE_CUDA_AWARE_MPI
 
-/* Cluster size used by nonbonded OpenCL kernel. Should be 8 for NVIDIA/AMD and 4 for Intel */
-#define GMX_OPENCL_NB_CLUSTER_SIZE @GMX_OPENCL_NB_CLUSTER_SIZE@
+/* Cluster size used by nonbonded kernel. Should be 8 for NVIDIA/AMD and 4 for Intel */
+#define GMX_GPU_NB_CLUSTER_SIZE @GMX_GPU_NB_CLUSTER_SIZE@
 
 /* Define constants for build types (starting at 1 to make sure undefined values don't match) */
 #define CMAKE_BUILD_TYPE_DEBUG 1
diff --git a/src/gromacs/hardware/device_information.h b/src/gromacs/hardware/device_information.h
index 3b02e9ae6f..d5f4dd7683 100644
--- a/src/gromacs/hardware/device_information.h
+++ b/src/gromacs/hardware/device_information.h
@@ -2,7 +2,7 @@
  * This file is part of the GROMACS molecular simulation package.
  *
  * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team.
- * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -107,7 +107,7 @@ static const gmx::EnumerationArray<DeviceStatus, const char*> c_deviceStateStrin
     "incompatible",
     // clang-format off
     // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
-    "incompatible (please recompile with correct GMX" "_OPENCL_NB_CLUSTER_SIZE of 4)",
+    "incompatible (please recompile with correct GMX" "_GPU_NB_CLUSTER_SIZE of 4)",
     // clang-format on
     "incompatible (please use CUDA build for NVIDIA Volta GPUs or newer)",
     "non-functional",
diff --git a/src/gromacs/hardware/device_management_ocl.cpp b/src/gromacs/hardware/device_management_ocl.cpp
index 9f84049072..0f3fbbc56b 100644
--- a/src/gromacs/hardware/device_management_ocl.cpp
+++ b/src/gromacs/hardware/device_management_ocl.cpp
@@ -2,7 +2,7 @@
  * This file is part of the GROMACS molecular simulation package.
  *
  * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team.
- * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -170,8 +170,8 @@ static DeviceStatus isDeviceFunctional(const DeviceInformation& deviceInfo)
         case DeviceVendor::Amd:
             return runningOnCompatibleOSForAmd() ? DeviceStatus::Compatible : DeviceStatus::Incompatible;
         case DeviceVendor::Intel:
-            return GMX_OPENCL_NB_CLUSTER_SIZE == 4 ? DeviceStatus::Compatible
-                                                   : DeviceStatus::IncompatibleClusterSize;
+            return GMX_GPU_NB_CLUSTER_SIZE == 4 ? DeviceStatus::Compatible
+                                                : DeviceStatus::IncompatibleClusterSize;
         default: return DeviceStatus::Incompatible;
     }
 }
diff --git a/src/gromacs/nbnxm/pairlistparams.h b/src/gromacs/nbnxm/pairlistparams.h
index 72bed6748b..a204a3eeda 100644
--- a/src/gromacs/nbnxm/pairlistparams.h
+++ b/src/gromacs/nbnxm/pairlistparams.h
@@ -59,11 +59,9 @@ enum class KernelType;
 //! The i-cluster size for CPU kernels, always 4 atoms
 static constexpr int c_nbnxnCpuIClusterSize = 4;
 
-//! The i- and j-cluster size for GPU lists, 8 atoms for CUDA, set at compile time for OpenCL
-#if GMX_GPU_OPENCL
-static constexpr int c_nbnxnGpuClusterSize = GMX_OPENCL_NB_CLUSTER_SIZE;
-#elif GMX_GPU_SYCL
-static constexpr int c_nbnxnGpuClusterSize = 4;
+//! The i- and j-cluster size for GPU lists, 8 atoms for CUDA, set at compile time for OpenCL and SYCL
+#if GMX_GPU_OPENCL || GMX_GPU_SYCL
+static constexpr int c_nbnxnGpuClusterSize = GMX_GPU_NB_CLUSTER_SIZE;
 #else
 static constexpr int c_nbnxnGpuClusterSize = 8;
 #endif
-- 
2.22.0