SYCL: Use acc.bind(cgh) instead of cgh.require(acc)
[alexxy/gromacs.git] / src / gromacs / nbnxm / sycl / nbnxm_sycl_kernel.cpp
index 39811fd11d26f9f4f2086f1c382a3c13998535b4..a7b2c6cde6b080ac4946855703037d4209aab454 100644 (file)
@@ -347,6 +347,8 @@ static inline void reduceForceJShuffle(Float3
  * While SYCL has \c sycl::reduce_over_group, it currently (oneAPI 2021.3.0) uses a very large
  * shared memory buffer, which leads to a reduced occupancy.
  *
+ * \note The caller must make sure there are no races when reusing the \p sm_buf.
+ *
  * \tparam subGroupSize Size of a sub-group.
  * \tparam groupSize Size of a work-group.
  * \param itemIdx Current thread's \c sycl::nd_item.
@@ -581,34 +583,34 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
 {
     static constexpr EnergyFunctionProperties<elecType, vdwType> props;
 
-    cgh.require(a_xq);
-    cgh.require(a_f);
-    cgh.require(a_shiftVec);
-    cgh.require(a_fShift);
+    a_xq.bind(cgh);
+    a_f.bind(cgh);
+    a_shiftVec.bind(cgh);
+    a_fShift.bind(cgh);
     if constexpr (doCalcEnergies)
     {
-        cgh.require(a_energyElec);
-        cgh.require(a_energyVdw);
+        a_energyElec.bind(cgh);
+        a_energyVdw.bind(cgh);
     }
-    cgh.require(a_plistCJ4);
-    cgh.require(a_plistSci);
-    cgh.require(a_plistExcl);
+    a_plistCJ4.bind(cgh);
+    a_plistSci.bind(cgh);
+    a_plistExcl.bind(cgh);
     if constexpr (!props.vdwComb)
     {
-        cgh.require(a_atomTypes);
-        cgh.require(a_nbfp);
+        a_atomTypes.bind(cgh);
+        a_nbfp.bind(cgh);
     }
     else
     {
-        cgh.require(a_ljComb);
+        a_ljComb.bind(cgh);
     }
     if constexpr (props.vdwEwald)
     {
-        cgh.require(a_nbfpComb);
+        a_nbfpComb.bind(cgh);
     }
     if constexpr (props.elecEwaldTab)
     {
-        cgh.require(a_coulombTab);
+        a_coulombTab.bind(cgh);
     }
 
     // shmem buffer for i x+q pre-loading
@@ -1046,6 +1048,7 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
         {
             const float energyVdwGroup =
                     groupReduce<subGroupSize, c_clSizeSq>(itemIdx, tidx, sm_reductionBuffer, energyVdw);
+            itemIdx.barrier(fence_space::local_space); // Prevent the race on sm_reductionBuffer.
             const float energyElecGroup = groupReduce<subGroupSize, c_clSizeSq>(
                     itemIdx, tidx, sm_reductionBuffer, energyElec);