SYCL: Use acc.bind(cgh) instead of cgh.require(acc)

[alexxy/gromacs.git] / src / gromacs / nbnxm / sycl / nbnxm_sycl_kernel.cpp
diff --git a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp

index 39811fd11d26f9f4f2086f1c382a3c13998535b4..a7b2c6cde6b080ac4946855703037d4209aab454 100644 (file)
--- a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp
+++ b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp
@@ -347,6 +347,8 @@ static inline void reduceForceJShuffle(Float3
   * While SYCL has \c sycl::reduce_over_group, it currently (oneAPI 2021.3.0) uses a very large
   * shared memory buffer, which leads to a reduced occupancy.
   *
+ * \note The caller must make sure there are no races when reusing the \p sm_buf.
+ *
   * \tparam subGroupSize Size of a sub-group.
   * \tparam groupSize Size of a work-group.
   * \param itemIdx Current thread's \c sycl::nd_item.
@@ -581,34 +583,34 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
  {
      static constexpr EnergyFunctionProperties<elecType, vdwType> props;
  
-    cgh.require(a_xq);
-    cgh.require(a_f);
-    cgh.require(a_shiftVec);
-    cgh.require(a_fShift);
+    a_xq.bind(cgh);
+    a_f.bind(cgh);
+    a_shiftVec.bind(cgh);
+    a_fShift.bind(cgh);
      if constexpr (doCalcEnergies)
      {
-        cgh.require(a_energyElec);
-        cgh.require(a_energyVdw);
+        a_energyElec.bind(cgh);
+        a_energyVdw.bind(cgh);
      }
-    cgh.require(a_plistCJ4);
-    cgh.require(a_plistSci);
-    cgh.require(a_plistExcl);
+    a_plistCJ4.bind(cgh);
+    a_plistSci.bind(cgh);
+    a_plistExcl.bind(cgh);
      if constexpr (!props.vdwComb)
      {
-        cgh.require(a_atomTypes);
-        cgh.require(a_nbfp);
+        a_atomTypes.bind(cgh);
+        a_nbfp.bind(cgh);
      }
      else
      {
-        cgh.require(a_ljComb);
+        a_ljComb.bind(cgh);
      }
      if constexpr (props.vdwEwald)
      {
-        cgh.require(a_nbfpComb);
+        a_nbfpComb.bind(cgh);
      }
      if constexpr (props.elecEwaldTab)
      {
-        cgh.require(a_coulombTab);
+        a_coulombTab.bind(cgh);
      }
  
      // shmem buffer for i x+q pre-loading
@@ -1046,6 +1048,7 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
          {
              const float energyVdwGroup =
                      groupReduce<subGroupSize, c_clSizeSq>(itemIdx, tidx, sm_reductionBuffer, energyVdw);
+            itemIdx.barrier(fence_space::local_space); // Prevent the race on sm_reductionBuffer.
              const float energyElecGroup = groupReduce<subGroupSize, c_clSizeSq>(
                      itemIdx, tidx, sm_reductionBuffer, energyElec);