* \author Andrey Alekseenko <al42and@gmail.com>
*/
-/*! \brief Access mode to use for atomic accessors.
- *
- * Intel DPCPP compiler has \c sycl::atomic_ref, but has no \c sycl::atomic_fetch_add for floats.
- * However, \c atomic_ref can not be constructed from \c sycl::atomic, so we can not use
- * atomic accessors. Thus, we use \c mode::read_write accessors and \c atomic_ref.
- *
- * hipSYCL does not have \c sycl::atomic_ref, but has \c sycl::atomic_fetch_add for floats, which
- * requires using atomic accessors. Thus, we use \c mode::atomic accessors.
- *
- * The \ref atomicFetchAdd function could be used for doing operations on such accessors.
- */
-static constexpr auto mode_atomic = GMX_SYCL_DPCPP ? cl::sycl::access::mode::read_write :
- /* GMX_SYCL_HIPSYCL */ cl::sycl::access::mode::atomic;
-
//! \brief Full warp active thread mask used in CUDA warp-level primitives.
static constexpr unsigned int c_cudaFullWarpMask = 0xffffffff;
/*! \brief Convenience wrapper to do atomic addition to a global buffer.
- *
- * The implementation differences between DPCPP and hipSYCL are explained in \ref mode_atomic.
*/
-template<class IndexType>
-static inline void atomicFetchAdd(DeviceAccessor<float, mode_atomic> acc, const IndexType idx, const float val)
+template<typename T, sycl_2020::memory_scope MemoryScope = sycl_2020::memory_scope::device>
+static inline void atomicFetchAdd(T& val, const T delta)
{
-#if GMX_SYCL_DPCPP
- sycl_2020::atomic_ref<float, sycl_2020::memory_order::relaxed, sycl_2020::memory_scope::device, cl::sycl::access::address_space::global_space>
- fout_atomic(acc[idx]);
- fout_atomic.fetch_add(val);
-#elif GMX_SYCL_HIPSYCL
-# ifdef SYCL_DEVICE_ONLY
- /* While there is support for float atomics on device, the host implementation uses
- * Clang's __atomic_fetch_add intrinsic, that, at least in Clang 11, does not support
- * floats. Luckily, we don't want to run on host. */
- // The pragmas below can be removed once we switch to sycl::atomic
-# pragma clang diagnostic push
-# pragma clang diagnostic ignored "-Wdeprecated-declarations"
- acc[idx].fetch_add(val);
-# pragma clang diagnostic push
-# else
- GMX_ASSERT(false, "hipSYCL host codepath not supported");
- GMX_UNUSED_VALUE(val);
- GMX_UNUSED_VALUE(acc);
- GMX_UNUSED_VALUE(idx);
-# endif
-#endif
+ sycl_2020::atomic_ref<T, sycl_2020::memory_order::relaxed, MemoryScope, cl::sycl::access::address_space::global_space> ref(
+ val);
+ ref.fetch_add(delta);
+}
+
+/*! \brief Convenience wrapper to do atomic loads from a global buffer.
+ */
+template<typename T, sycl_2020::memory_scope MemoryScope = sycl_2020::memory_scope::device>
+static inline T atomicLoad(T& val)
+{
+ sycl_2020::atomic_ref<T, sycl_2020::memory_order::relaxed, MemoryScope, cl::sycl::access::address_space::global_space> ref(
+ val);
+ return ref.load();
}
+
/*! \brief Issue an intra sub-group barrier.
*
* Equivalent with CUDA's \c syncwarp(c_cudaFullWarpMask).
*
*/
-static inline void subGroupBarrier(const cl::sycl::nd_item<1> itemIdx)
+template<int Dim>
+static inline void subGroupBarrier(const cl::sycl::nd_item<Dim> itemIdx)
{
#if GMX_SYCL_HIPSYCL
cl::sycl::group_barrier(itemIdx.get_sub_group(), cl::sycl::memory_scope::sub_group);
return sg.shuffle_up(var, delta);
}
#endif
+
+#if GMX_SYCL_HIPSYCL
+/*! \brief Polyfill for sycl::isfinite missing from hipSYCL
+ *
+ * Does not follow GROMACS style because it should follow the name for
+ * which it is a polyfill. */
+template<typename Real>
+__device__ __host__ static inline bool isfinite(Real value)
+{
+ // This is not yet implemented in hipSYCL pending
+ // https://github.com/illuhad/hipSYCL/issues/636
+# ifdef SYCL_DEVICE_ONLY
+# if defined(HIPSYCL_PLATFORM_CUDA) && defined(__HIPSYCL_ENABLE_CUDA_TARGET__)
+ return ::isfinite(value);
+# elif defined(HIPSYCL_PLATFORM_ROCM) && defined(__HIPSYCL_ENABLE_HIP_TARGET__)
+ return ::isfinite(value);
+# else
+# error "Unsupported hipSYCL target"
+# endif
+# else
+ // Should never be called
+ assert(false);
+ GMX_UNUSED_VALUE(value);
+ return false;
+# endif
+}
+#elif GMX_SYCL_DPCPP
+template<typename Real>
+static inline bool isfinite(Real value)
+{
+ return cl::sycl::isfinite(value);
+}
+
+#endif
+
+#if GMX_SYCL_HIPSYCL
+
+/*! \brief Polyfill for sycl::vec::load buggy in hipSYCL
+ *
+ * Loads from the address \c ptr offset in elements of type T by
+ * NumElements * offset, into the components of \c v.
+ *
+ * Can probably be removed when
+ * https://github.com/illuhad/hipSYCL/issues/647 is resolved. */
+template<cl::sycl::access::address_space AddressSpace, typename T, int NumElements>
+static inline void loadToVec(size_t offset,
+ cl::sycl::multi_ptr<const T, AddressSpace> ptr,
+ cl::sycl::vec<T, NumElements>* v)
+{
+ for (int i = 0; i < NumElements; ++i)
+ {
+ (*v)[i] = ptr.get()[offset * NumElements + i];
+ }
+}
+
+/*! \brief Polyfill for sycl::vec::store buggy in hipSYCL
+ *
+ * Loads from the address \c ptr offset in elements of type T by
+ * NumElements * offset, into the components of \c v.
+ *
+ * Can probably be removed when
+ * https://github.com/illuhad/hipSYCL/issues/647 is resolved. */
+template<cl::sycl::access::address_space AddressSpace, typename T, int NumElements>
+static inline void storeFromVec(const cl::sycl::vec<T, NumElements>& v,
+ size_t offset,
+ cl::sycl::multi_ptr<T, AddressSpace> ptr)
+{
+ for (int i = 0; i < NumElements; ++i)
+ {
+ ptr.get()[offset * NumElements + i] = v[i];
+ }
+}
+
+#elif GMX_SYCL_DPCPP
+
+/*! \brief Polyfill for sycl::vec::load buggy in hipSYCL
+ *
+ * Loads from the address \c ptr offset in elements of type T by
+ * NumElements * offset, into the components of \c v.
+ *
+ * Can probably be removed when
+ * https://github.com/illuhad/hipSYCL/issues/647 is resolved. */
+template<cl::sycl::access::address_space AddressSpace, typename T, int NumElements>
+static inline void loadToVec(size_t offset,
+ cl::sycl::multi_ptr<const T, AddressSpace> ptr,
+ cl::sycl::vec<T, NumElements>* v)
+{
+ v->load(offset, ptr);
+}
+
+/*! \brief Polyfill for sycl::vec::store buggy in hipSYCL
+ *
+ * Loads from the address \c ptr offset in elements of type T by
+ * NumElements * offset, into the components of \c v.
+ *
+ * Can probably be removed when
+ * https://github.com/illuhad/hipSYCL/issues/647 is resolved. */
+template<cl::sycl::access::address_space AddressSpace, typename T, int NumElements>
+static inline void storeFromVec(const cl::sycl::vec<T, NumElements>& v,
+ size_t offset,
+ cl::sycl::multi_ptr<T, AddressSpace> ptr)
+{
+ v.store(offset, ptr);
+}
+
+#endif
+
} // namespace sycl_2020
#endif /* GMX_GPU_UTILS_SYCL_KERNEL_UTILS_H */