SYCL: Add missing barrier to NBNXM energy reduction
authorAndrey Alekseenko <al42and@gmail.com>
Wed, 6 Oct 2021 13:28:45 +0000 (13:28 +0000)
committerMark Abraham <mark.j.abraham@gmail.com>
Wed, 6 Oct 2021 13:28:45 +0000 (13:28 +0000)
src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp

index 39811fd11d26f9f4f2086f1c382a3c13998535b4..ea5321645d826439d21e1e0689b432c5f04caedc 100644 (file)
@@ -347,6 +347,8 @@ static inline void reduceForceJShuffle(Float3
  * While SYCL has \c sycl::reduce_over_group, it currently (oneAPI 2021.3.0) uses a very large
  * shared memory buffer, which leads to a reduced occupancy.
  *
+ * \note The caller must make sure there are no races when reusing the \p sm_buf.
+ *
  * \tparam subGroupSize Size of a sub-group.
  * \tparam groupSize Size of a work-group.
  * \param itemIdx Current thread's \c sycl::nd_item.
@@ -1046,6 +1048,7 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
         {
             const float energyVdwGroup =
                     groupReduce<subGroupSize, c_clSizeSq>(itemIdx, tidx, sm_reductionBuffer, energyVdw);
+            itemIdx.barrier(fence_space::local_space); // Prevent the race on sm_reductionBuffer.
             const float energyElecGroup = groupReduce<subGroupSize, c_clSizeSq>(
                     itemIdx, tidx, sm_reductionBuffer, energyElec);