Update device detection code for hipSYCL
authorAndrey Alekseenko <al42and@gmail.com>
Fri, 18 Jun 2021 09:16:12 +0000 (09:16 +0000)
committerPaul Bauer <paul.bauer.q@gmail.com>
Fri, 18 Jun 2021 09:16:12 +0000 (09:16 +0000)
src/gromacs/hardware/device_management_sycl.cpp
src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp

index 3b687f3660e7f382c654393aefb258737c7bc937..c021062f670338f21dfca1a06eddadc0619c0d97 100644 (file)
@@ -106,21 +106,18 @@ static DeviceStatus isDeviceCompatible(const cl::sycl::device& syclDevice)
         return DeviceStatus::Incompatible;
     }
 
-#if GMX_SYCL_HIPSYCL
-    /* At the time of writing:
-     * 1. SYCL NB kernels currently don't support sub_group size of 32 or 64, which are the only
-     * ones available on NVIDIA and AMD hardware, respectively. That's not a fundamental limitation,
-     * but requires porting more OpenCL code, see #3934.
-     * 2. hipSYCL does not support cl::sycl::info::device::sub_group_sizes,
-     * see https://github.com/illuhad/hipSYCL/pull/449
-     */
-    const std::vector<size_t> supportedSubGroupSizes{ warpSize };
-#else
     const std::vector<size_t> supportedSubGroupSizes =
             syclDevice.get_info<cl::sycl::info::device::sub_group_sizes>();
+
+    // Ensure any changes stay in sync with subGroupSize in src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp
+    constexpr size_t requiredSubGroupSizeForNbnxm =
+#if defined(HIPSYCL_PLATFORM_ROCM)
+            GMX_GPU_NB_CLUSTER_SIZE * GMX_GPU_NB_CLUSTER_SIZE;
+#else
+            GMX_GPU_NB_CLUSTER_SIZE * GMX_GPU_NB_CLUSTER_SIZE / 2;
 #endif
-    const size_t requiredSubGroupSizeForNBNXM = 8;
-    if (std::find(supportedSubGroupSizes.begin(), supportedSubGroupSizes.end(), requiredSubGroupSizeForNBNXM)
+
+    if (std::find(supportedSubGroupSizes.begin(), supportedSubGroupSizes.end(), requiredSubGroupSizeForNbnxm)
         == supportedSubGroupSizes.end())
     {
         return DeviceStatus::IncompatibleClusterSize;
index c5de5de121b4696917f3ee7304f5ec1febded4f4..560575fdb5cdf95e0dd5859b29c6aed4384dac30 100644 (file)
@@ -615,6 +615,7 @@ auto nbnxmKernel(cl::sycl::handler&                                   cgh,
     // Currently this is ideally suited for 32-wide subgroup size but slightly less so for others,
     // e.g. subGroupSize > prunedClusterPairSize on AMD GCN / CDNA.
     // Hence, the two are decoupled.
+    // When changing this code, please update requiredSubGroupSizeForNbnxm in src/gromacs/hardware/device_management_sycl.cpp.
     constexpr int prunedClusterPairSize = c_clSize * c_splitClSize;
 #if defined(HIPSYCL_PLATFORM_ROCM) // SYCL-TODO AMD RDNA/RDNA2 has 32-wide exec; how can we check for that?
     gmx_unused constexpr int subGroupSize = c_clSize * c_clSize;