biod.pnpi.spb.ru
/
alexxy
/
gromacs.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
33babf7
)
Fix nbnxm hipSYCL kernels 64-wide exec on AMD
author
Szilárd Páll
<pall.szilard@gmail.com>
Wed, 21 Apr 2021 10:57:02 +0000
(10:57 +0000)
committer
Artem Zhmurov
<zhmurov@gmail.com>
Wed, 21 Apr 2021 10:57:02 +0000
(10:57 +0000)
src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp
patch
|
blob
|
history
diff --git
a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp
b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp
index e2d4e151faa02332b2475f8e1b16ed3eb42a32a0..a2bfe59abcba36f92c0c0238d124f4f40553ee20 100644
(file)
--- a/
src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp
+++ b/
src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp
@@
-535,9
+535,9
@@
auto nbnxmKernel(cl::sycl::handler& cgh,
// Hence, the two are decoupled.
constexpr int prunedClusterPairSize = c_clSize * c_splitClSize;
#if defined(HIPSYCL_PLATFORM_ROCM) // SYCL-TODO AMD RDNA/RDNA2 has 32-wide exec; how can we check for that?
// Hence, the two are decoupled.
constexpr int prunedClusterPairSize = c_clSize * c_splitClSize;
#if defined(HIPSYCL_PLATFORM_ROCM) // SYCL-TODO AMD RDNA/RDNA2 has 32-wide exec; how can we check for that?
- constexpr int subGroupSize = c_clSize * c_clSize;
+
gmx_unused
constexpr int subGroupSize = c_clSize * c_clSize;
#else
#else
- constexpr int subGroupSize = prunedClusterPairSize;
+
gmx_unused
constexpr int subGroupSize = prunedClusterPairSize;
#endif
return [=](cl::sycl::nd_item<1> itemIdx) [[intel::reqd_sub_group_size(subGroupSize)]]
#endif
return [=](cl::sycl::nd_item<1> itemIdx) [[intel::reqd_sub_group_size(subGroupSize)]]
@@
-664,7
+664,8
@@
auto nbnxmKernel(cl::sycl::handler& cgh,
continue;
}
const int wexclIdx = a_plistCJ4[j4].imei[imeiIdx].excl_ind;
continue;
}
const int wexclIdx = a_plistCJ4[j4].imei[imeiIdx].excl_ind;
- const unsigned wexcl = a_plistExcl[wexclIdx].pair[tidx & (subGroupSize - 1)]; // sg.get_local_linear_id()
+ static_assert(gmx::isPowerOfTwo(prunedClusterPairSize));
+ const unsigned wexcl = a_plistExcl[wexclIdx].pair[tidx & (prunedClusterPairSize - 1)];
for (int jm = 0; jm < c_nbnxnGpuJgroupSize; jm++)
{
const bool maskSet =
for (int jm = 0; jm < c_nbnxnGpuJgroupSize; jm++)
{
const bool maskSet =