Rename our SYCL aliases floatN to FloatN

[alexxy/gromacs.git] / src / gromacs / nbnxm / sycl / nbnxm_sycl_kernel.cpp
diff --git a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp

index a68c9d8b2f026346f881ff941e3da9cfea4bdc5b..946eb2dd4d955b9680908ddaed0f2a8d018841a7 100644 (file)
--- a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp
+++ b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp
@@ -301,7 +301,7 @@ static inline float interpolateCoulombForceR(const DeviceAccessor<float, mode::r
      return lerp(left, right, fraction); // TODO: cl::sycl::mix
  }
  
-static inline void reduceForceJShuffle(float3                                  f,
+static inline void reduceForceJShuffle(Float3                                  f,
                                         const cl::sycl::nd_item<1>              itemIdx,
                                         const int                               tidxi,
                                         const int                               aidx,
@@ -342,7 +342,7 @@ static inline void reduceForceJShuffle(float3                                  f
   * This implementation works only with power of two array sizes.
   */
  static inline void reduceForceIAndFShift(cl::sycl::accessor<float, 1, mode::read_write, target::local> sm_buf,
-                                         const float3 fCiBuf[c_nbnxnGpuNumClusterPerSupercluster],
+                                         const Float3 fCiBuf[c_nbnxnGpuNumClusterPerSupercluster],
                                           const bool   calcFShift,
                                           const cl::sycl::nd_item<1>              itemIdx,
                                           const int                               tidxi,
@@ -418,16 +418,16 @@ static inline void reduceForceIAndFShift(cl::sycl::accessor<float, 1, mode::read
   */
  template<bool doPruneNBL, bool doCalcEnergies, enum ElecType elecType, enum VdwType vdwType>
  auto nbnxmKernel(cl::sycl::handler&                                        cgh,
-                 DeviceAccessor<float4, mode::read>                        a_xq,
+                 DeviceAccessor<Float4, mode::read>                        a_xq,
                   DeviceAccessor<float, mode::read_write>                   a_f,
-                 DeviceAccessor<float3, mode::read>                        a_shiftVec,
+                 DeviceAccessor<Float3, mode::read>                        a_shiftVec,
                   DeviceAccessor<float, mode::read_write>                   a_fShift,
                   OptionalAccessor<float, mode::read_write, doCalcEnergies> a_energyElec,
                   OptionalAccessor<float, mode::read_write, doCalcEnergies> a_energyVdw,
                   DeviceAccessor<nbnxn_cj4_t, doPruneNBL ? mode::read_write : mode::read> a_plistCJ4,
                   DeviceAccessor<nbnxn_sci_t, mode::read>                                 a_plistSci,
                   DeviceAccessor<nbnxn_excl_t, mode::read>                    a_plistExcl,
-                 OptionalAccessor<float2, mode::read, ljComb<vdwType>>       a_ljComb,
+                 OptionalAccessor<Float2, mode::read, ljComb<vdwType>>       a_ljComb,
                   OptionalAccessor<int, mode::read, !ljComb<vdwType>>         a_atomTypes,
                   OptionalAccessor<float, mode::read, !ljComb<vdwType>>       a_nbfp,
                   OptionalAccessor<float, mode::read, ljEwald<vdwType>>       a_nbfpComb,
@@ -483,7 +483,7 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
      }
  
      // shmem buffer for i x+q pre-loading
-    cl::sycl::accessor<float4, 2, mode::read_write, target::local> sm_xq(
+    cl::sycl::accessor<Float4, 2, mode::read_write, target::local> sm_xq(
              cl::sycl::range<2>(c_nbnxnGpuNumClusterPerSupercluster, c_clSize), cgh);
  
      // shmem buffer for force reduction
@@ -506,7 +506,7 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
      auto sm_ljCombI = [&]() {
          if constexpr (props.vdwComb)
          {
-            return cl::sycl::accessor<float2, 2, mode::read_write, target::local>(
+            return cl::sycl::accessor<Float2, 2, mode::read_write, target::local>(
                      cl::sycl::range<2>(c_nbnxnGpuNumClusterPerSupercluster, c_clSize), cgh);
          }
          else
@@ -539,10 +539,10 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
          // Better use sg.get_group_range, but too much of the logic relies on it anyway
          const unsigned widx = tidx / subGroupSize;
  
-        float3 fCiBuf[c_nbnxnGpuNumClusterPerSupercluster]; // i force buffer
+        Float3 fCiBuf[c_nbnxnGpuNumClusterPerSupercluster]; // i force buffer
          for (int i = 0; i < c_nbnxnGpuNumClusterPerSupercluster; i++)
          {
-            fCiBuf[i] = float3(0.0F, 0.0F, 0.0F);
+            fCiBuf[i] = Float3(0.0F, 0.0F, 0.0F);
          }
  
          const nbnxn_sci_t nbSci     = a_plistSci[bidx];
@@ -561,9 +561,9 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
              const int             ai       = ci * c_clSize + tidxi;
              const cl::sycl::id<2> cacheIdx = cl::sycl::id<2>(tidxj + i, tidxi);
  
-            const float3 shift = a_shiftVec[nbSci.shift];
-            float4       xqi   = a_xq[ai];
-            xqi += float4(shift[0], shift[1], shift[2], 0.0F);
+            const Float3 shift = a_shiftVec[nbSci.shift];
+            Float4       xqi   = a_xq[ai];
+            xqi += Float4(shift[0], shift[1], shift[2], 0.0F);
              xqi[3] *= epsFac;
              sm_xq[cacheIdx] = xqi;
  
@@ -659,12 +659,12 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
                  const int aj     = cj * c_clSize + tidxj;
  
                  // load j atom data
-                const float4 xqj = a_xq[aj];
+                const Float4 xqj = a_xq[aj];
  
-                const float3 xj(xqj[0], xqj[1], xqj[2]);
+                const Float3 xj(xqj[0], xqj[1], xqj[2]);
                  const float  qj = xqj[3];
                  int          atomTypeJ; // Only needed if (!props.vdwComb)
-                float2       ljCombJ;   // Only needed if (props.vdwComb)
+                Float2       ljCombJ;   // Only needed if (props.vdwComb)
                  if constexpr (props.vdwComb)
                  {
                      ljCombJ = a_ljComb[aj];
@@ -674,7 +674,7 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
                      atomTypeJ = a_atomTypes[aj];
                  }
  
-                float3 fCjBuf(0.0F, 0.0F, 0.0F);
+                Float3 fCjBuf(0.0F, 0.0F, 0.0F);
  
                  for (int i = 0; i < c_nbnxnGpuNumClusterPerSupercluster; i++)
                  {
@@ -683,11 +683,11 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
                          // i cluster index
                          const int ci = sci * c_nbnxnGpuNumClusterPerSupercluster + i;
                          // all threads load an atom from i cluster ci into shmem!
-                        const float4 xqi = sm_xq[i][tidxi];
-                        const float3 xi(xqi[0], xqi[1], xqi[2]);
+                        const Float4 xqi = sm_xq[i][tidxi];
+                        const Float3 xi(xqi[0], xqi[1], xqi[2]);
  
                          // distance between i and j atoms
-                        const float3 rv = xi - xj;
+                        const Float3 rv = xi - xj;
                          float        r2 = norm2(rv);
  
                          if constexpr (doPruneNBL)
@@ -724,7 +724,7 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
                              }
                              else
                              {
-                                const float2 ljCombI = sm_ljCombI[i][tidxi];
+                                const Float2 ljCombI = sm_ljCombI[i][tidxi];
                                  if constexpr (props.vdwCombGeom)
                                  {
                                      c6  = ljCombI[0] * ljCombJ[0];
@@ -867,7 +867,7 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
                                  }
                              }
  
-                            const float3 forceIJ = rv * fInvR;
+                            const Float3 forceIJ = rv * fInvR;
  
                              /* accumulate j forces in registers */
                              fCjBuf -= forceIJ;
@@ -969,9 +969,9 @@ void launchNbnxmKernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const In
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
  
      // Casting to float simplifies using atomic ops in the kernel
-    cl::sycl::buffer<float3, 1> f(*adat->f.buffer_);
+    cl::sycl::buffer<Float3, 1> f(*adat->f.buffer_);
      auto                        fAsFloat = f.reinterpret<float, 1>(f.get_count() * DIM);
-    cl::sycl::buffer<float3, 1> fShift(*adat->fShift.buffer_);
+    cl::sycl::buffer<Float3, 1> fShift(*adat->fShift.buffer_);
      auto fShiftAsFloat = fShift.reinterpret<float, 1>(fShift.get_count() * DIM);
  
      cl::sycl::event e = chooseAndLaunchNbnxmKernel(doPruneNBL,