return DeviceStatus::Incompatible;
}
-#if GMX_SYCL_HIPSYCL
- /* At the time of writing:
- * 1. SYCL NB kernels currently don't support sub_group size of 32 or 64, which are the only
- * ones available on NVIDIA and AMD hardware, respectively. That's not a fundamental limitation,
- * but requires porting more OpenCL code, see #3934.
- * 2. hipSYCL does not support cl::sycl::info::device::sub_group_sizes,
- * see https://github.com/illuhad/hipSYCL/pull/449
- */
- const std::vector<size_t> supportedSubGroupSizes{ warpSize };
-#else
const std::vector<size_t> supportedSubGroupSizes =
syclDevice.get_info<cl::sycl::info::device::sub_group_sizes>();
+
+ // Ensure any changes stay in sync with subGroupSize in src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp
+ constexpr size_t requiredSubGroupSizeForNbnxm =
+#if defined(HIPSYCL_PLATFORM_ROCM)
+ GMX_GPU_NB_CLUSTER_SIZE * GMX_GPU_NB_CLUSTER_SIZE;
+#else
+ GMX_GPU_NB_CLUSTER_SIZE * GMX_GPU_NB_CLUSTER_SIZE / 2;
#endif
- const size_t requiredSubGroupSizeForNBNXM = 8;
- if (std::find(supportedSubGroupSizes.begin(), supportedSubGroupSizes.end(), requiredSubGroupSizeForNBNXM)
+
+ if (std::find(supportedSubGroupSizes.begin(), supportedSubGroupSizes.end(), requiredSubGroupSizeForNbnxm)
== supportedSubGroupSizes.end())
{
return DeviceStatus::IncompatibleClusterSize;
// Currently this is ideally suited for 32-wide subgroup size but slightly less so for others,
// e.g. subGroupSize > prunedClusterPairSize on AMD GCN / CDNA.
// Hence, the two are decoupled.
+ // When changing this code, please update requiredSubGroupSizeForNbnxm in src/gromacs/hardware/device_management_sycl.cpp.
constexpr int prunedClusterPairSize = c_clSize * c_splitClSize;
#if defined(HIPSYCL_PLATFORM_ROCM) // SYCL-TODO AMD RDNA/RDNA2 has 32-wide exec; how can we check for that?
gmx_unused constexpr int subGroupSize = c_clSize * c_clSize;