static constexpr auto mode_atomic = GMX_SYCL_DPCPP ? cl::sycl::access::mode::read_write :
/* GMX_SYCL_HIPSYCL */ cl::sycl::access::mode::atomic;
+// \brief Full warp active thread mask used in CUDA warp-level primitives.
+static constexpr unsigned int c_cudaFullWarpMask = 0xffffffff;
+
/*! \brief Convenience wrapper to do atomic addition to a global buffer.
*
* The implementation differences between DPCPP and hipSYCL are explained in \ref mode_atomic.
#endif
}
+/* \brief Issue an intra sub-group barrier.
+ *
+ * Equivalent with CUDA syncwarp(c_cudaFullWarpMask).
+ *
+ */
+static inline void subGroupBarrier(const cl::sycl::nd_item<1> itemIdx)
+{
+#if GMX_SYCL_HIPSYCL
+ cl::sycl::group_barrier(itemIdx.get_sub_group(), cl::sycl::memory_scope::sub_group);
+#else
+ itemIdx.get_sub_group().barrier();
+#endif
+}
+
namespace sycl_2020
{
#if GMX_SYCL_HIPSYCL
// No sycl::sub_group::shift_left / shuffle_down in hipSYCL yet
# ifdef SYCL_DEVICE_ONLY
# if defined(HIPSYCL_PLATFORM_CUDA) && defined(__HIPSYCL_ENABLE_CUDA_TARGET__)
- static const unsigned int sc_cudaFullWarpMask = 0xffffffff;
- return __shfl_down_sync(sc_cudaFullWarpMask, var, delta);
+ return __shfl_down_sync(c_cudaFullWarpMask, var, delta);
# elif defined(HIPSYCL_PLATFORM_ROCM) && defined(__HIPSYCL_ENABLE_HIP_TARGET__)
// Do we need more ifdefs? https://github.com/ROCm-Developer-Tools/HIP/issues/1491
return __shfl_down(var, delta);
// No sycl::sub_group::shift_right / shuffle_up in hipSYCL yet
# ifdef SYCL_DEVICE_ONLY
# if defined(HIPSYCL_PLATFORM_CUDA) && defined(__HIPSYCL_ENABLE_CUDA_TARGET__)
- static const unsigned int sc_cudaFullWarpMask = 0xffffffff;
- return __shfl_up_sync(sc_cudaFullWarpMask, var, delta);
+ return __shfl_up_sync(c_cudaFullWarpMask, var, delta);
# elif defined(HIPSYCL_PLATFORM_ROCM) && defined(__HIPSYCL_ENABLE_HIP_TARGET__)
// Do we need more ifdefs? https://github.com/ROCm-Developer-Tools/HIP/issues/1491
return __shfl_up(var, delta);