From ac2fbebd3eed97ca17db731c4b28e03657420f78 Mon Sep 17 00:00:00 2001 From: Andrey Alekseenko Date: Fri, 18 Jun 2021 09:16:12 +0000 Subject: [PATCH] Update device detection code for hipSYCL --- .../hardware/device_management_sycl.cpp | 21 ++++++++----------- src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp | 1 + 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/gromacs/hardware/device_management_sycl.cpp b/src/gromacs/hardware/device_management_sycl.cpp index 3b687f3660..c021062f67 100644 --- a/src/gromacs/hardware/device_management_sycl.cpp +++ b/src/gromacs/hardware/device_management_sycl.cpp @@ -106,21 +106,18 @@ static DeviceStatus isDeviceCompatible(const cl::sycl::device& syclDevice) return DeviceStatus::Incompatible; } -#if GMX_SYCL_HIPSYCL - /* At the time of writing: - * 1. SYCL NB kernels currently don't support sub_group size of 32 or 64, which are the only - * ones available on NVIDIA and AMD hardware, respectively. That's not a fundamental limitation, - * but requires porting more OpenCL code, see #3934. - * 2. hipSYCL does not support cl::sycl::info::device::sub_group_sizes, - * see https://github.com/illuhad/hipSYCL/pull/449 - */ - const std::vector supportedSubGroupSizes{ warpSize }; -#else const std::vector supportedSubGroupSizes = syclDevice.get_info(); + + // Ensure any changes stay in sync with subGroupSize in src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp + constexpr size_t requiredSubGroupSizeForNbnxm = +#if defined(HIPSYCL_PLATFORM_ROCM) + GMX_GPU_NB_CLUSTER_SIZE * GMX_GPU_NB_CLUSTER_SIZE; +#else + GMX_GPU_NB_CLUSTER_SIZE * GMX_GPU_NB_CLUSTER_SIZE / 2; #endif - const size_t requiredSubGroupSizeForNBNXM = 8; - if (std::find(supportedSubGroupSizes.begin(), supportedSubGroupSizes.end(), requiredSubGroupSizeForNBNXM) + + if (std::find(supportedSubGroupSizes.begin(), supportedSubGroupSizes.end(), requiredSubGroupSizeForNbnxm) == supportedSubGroupSizes.end()) { return DeviceStatus::IncompatibleClusterSize; diff --git a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp index c5de5de121..560575fdb5 100644 --- a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp +++ b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp @@ -615,6 +615,7 @@ auto nbnxmKernel(cl::sycl::handler& cgh, // Currently this is ideally suited for 32-wide subgroup size but slightly less so for others, // e.g. subGroupSize > prunedClusterPairSize on AMD GCN / CDNA. // Hence, the two are decoupled. + // When changing this code, please update requiredSubGroupSizeForNbnxm in src/gromacs/hardware/device_management_sycl.cpp. constexpr int prunedClusterPairSize = c_clSize * c_splitClSize; #if defined(HIPSYCL_PLATFORM_ROCM) // SYCL-TODO AMD RDNA/RDNA2 has 32-wide exec; how can we check for that? gmx_unused constexpr int subGroupSize = c_clSize * c_clSize; -- 2.22.0