Rename our SYCL aliases floatN to FloatN
[alexxy/gromacs.git] / src / gromacs / nbnxm / sycl / nbnxm_sycl_kernel.cpp
index a68c9d8b2f026346f881ff941e3da9cfea4bdc5b..946eb2dd4d955b9680908ddaed0f2a8d018841a7 100644 (file)
@@ -301,7 +301,7 @@ static inline float interpolateCoulombForceR(const DeviceAccessor<float, mode::r
     return lerp(left, right, fraction); // TODO: cl::sycl::mix
 }
 
-static inline void reduceForceJShuffle(float3                                  f,
+static inline void reduceForceJShuffle(Float3                                  f,
                                        const cl::sycl::nd_item<1>              itemIdx,
                                        const int                               tidxi,
                                        const int                               aidx,
@@ -342,7 +342,7 @@ static inline void reduceForceJShuffle(float3                                  f
  * This implementation works only with power of two array sizes.
  */
 static inline void reduceForceIAndFShift(cl::sycl::accessor<float, 1, mode::read_write, target::local> sm_buf,
-                                         const float3 fCiBuf[c_nbnxnGpuNumClusterPerSupercluster],
+                                         const Float3 fCiBuf[c_nbnxnGpuNumClusterPerSupercluster],
                                          const bool   calcFShift,
                                          const cl::sycl::nd_item<1>              itemIdx,
                                          const int                               tidxi,
@@ -418,16 +418,16 @@ static inline void reduceForceIAndFShift(cl::sycl::accessor<float, 1, mode::read
  */
 template<bool doPruneNBL, bool doCalcEnergies, enum ElecType elecType, enum VdwType vdwType>
 auto nbnxmKernel(cl::sycl::handler&                                        cgh,
-                 DeviceAccessor<float4, mode::read>                        a_xq,
+                 DeviceAccessor<Float4, mode::read>                        a_xq,
                  DeviceAccessor<float, mode::read_write>                   a_f,
-                 DeviceAccessor<float3, mode::read>                        a_shiftVec,
+                 DeviceAccessor<Float3, mode::read>                        a_shiftVec,
                  DeviceAccessor<float, mode::read_write>                   a_fShift,
                  OptionalAccessor<float, mode::read_write, doCalcEnergies> a_energyElec,
                  OptionalAccessor<float, mode::read_write, doCalcEnergies> a_energyVdw,
                  DeviceAccessor<nbnxn_cj4_t, doPruneNBL ? mode::read_write : mode::read> a_plistCJ4,
                  DeviceAccessor<nbnxn_sci_t, mode::read>                                 a_plistSci,
                  DeviceAccessor<nbnxn_excl_t, mode::read>                    a_plistExcl,
-                 OptionalAccessor<float2, mode::read, ljComb<vdwType>>       a_ljComb,
+                 OptionalAccessor<Float2, mode::read, ljComb<vdwType>>       a_ljComb,
                  OptionalAccessor<int, mode::read, !ljComb<vdwType>>         a_atomTypes,
                  OptionalAccessor<float, mode::read, !ljComb<vdwType>>       a_nbfp,
                  OptionalAccessor<float, mode::read, ljEwald<vdwType>>       a_nbfpComb,
@@ -483,7 +483,7 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
     }
 
     // shmem buffer for i x+q pre-loading
-    cl::sycl::accessor<float4, 2, mode::read_write, target::local> sm_xq(
+    cl::sycl::accessor<Float4, 2, mode::read_write, target::local> sm_xq(
             cl::sycl::range<2>(c_nbnxnGpuNumClusterPerSupercluster, c_clSize), cgh);
 
     // shmem buffer for force reduction
@@ -506,7 +506,7 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
     auto sm_ljCombI = [&]() {
         if constexpr (props.vdwComb)
         {
-            return cl::sycl::accessor<float2, 2, mode::read_write, target::local>(
+            return cl::sycl::accessor<Float2, 2, mode::read_write, target::local>(
                     cl::sycl::range<2>(c_nbnxnGpuNumClusterPerSupercluster, c_clSize), cgh);
         }
         else
@@ -539,10 +539,10 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
         // Better use sg.get_group_range, but too much of the logic relies on it anyway
         const unsigned widx = tidx / subGroupSize;
 
-        float3 fCiBuf[c_nbnxnGpuNumClusterPerSupercluster]; // i force buffer
+        Float3 fCiBuf[c_nbnxnGpuNumClusterPerSupercluster]; // i force buffer
         for (int i = 0; i < c_nbnxnGpuNumClusterPerSupercluster; i++)
         {
-            fCiBuf[i] = float3(0.0F, 0.0F, 0.0F);
+            fCiBuf[i] = Float3(0.0F, 0.0F, 0.0F);
         }
 
         const nbnxn_sci_t nbSci     = a_plistSci[bidx];
@@ -561,9 +561,9 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
             const int             ai       = ci * c_clSize + tidxi;
             const cl::sycl::id<2> cacheIdx = cl::sycl::id<2>(tidxj + i, tidxi);
 
-            const float3 shift = a_shiftVec[nbSci.shift];
-            float4       xqi   = a_xq[ai];
-            xqi += float4(shift[0], shift[1], shift[2], 0.0F);
+            const Float3 shift = a_shiftVec[nbSci.shift];
+            Float4       xqi   = a_xq[ai];
+            xqi += Float4(shift[0], shift[1], shift[2], 0.0F);
             xqi[3] *= epsFac;
             sm_xq[cacheIdx] = xqi;
 
@@ -659,12 +659,12 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
                 const int aj     = cj * c_clSize + tidxj;
 
                 // load j atom data
-                const float4 xqj = a_xq[aj];
+                const Float4 xqj = a_xq[aj];
 
-                const float3 xj(xqj[0], xqj[1], xqj[2]);
+                const Float3 xj(xqj[0], xqj[1], xqj[2]);
                 const float  qj = xqj[3];
                 int          atomTypeJ; // Only needed if (!props.vdwComb)
-                float2       ljCombJ;   // Only needed if (props.vdwComb)
+                Float2       ljCombJ;   // Only needed if (props.vdwComb)
                 if constexpr (props.vdwComb)
                 {
                     ljCombJ = a_ljComb[aj];
@@ -674,7 +674,7 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
                     atomTypeJ = a_atomTypes[aj];
                 }
 
-                float3 fCjBuf(0.0F, 0.0F, 0.0F);
+                Float3 fCjBuf(0.0F, 0.0F, 0.0F);
 
                 for (int i = 0; i < c_nbnxnGpuNumClusterPerSupercluster; i++)
                 {
@@ -683,11 +683,11 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
                         // i cluster index
                         const int ci = sci * c_nbnxnGpuNumClusterPerSupercluster + i;
                         // all threads load an atom from i cluster ci into shmem!
-                        const float4 xqi = sm_xq[i][tidxi];
-                        const float3 xi(xqi[0], xqi[1], xqi[2]);
+                        const Float4 xqi = sm_xq[i][tidxi];
+                        const Float3 xi(xqi[0], xqi[1], xqi[2]);
 
                         // distance between i and j atoms
-                        const float3 rv = xi - xj;
+                        const Float3 rv = xi - xj;
                         float        r2 = norm2(rv);
 
                         if constexpr (doPruneNBL)
@@ -724,7 +724,7 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
                             }
                             else
                             {
-                                const float2 ljCombI = sm_ljCombI[i][tidxi];
+                                const Float2 ljCombI = sm_ljCombI[i][tidxi];
                                 if constexpr (props.vdwCombGeom)
                                 {
                                     c6  = ljCombI[0] * ljCombJ[0];
@@ -867,7 +867,7 @@ auto nbnxmKernel(cl::sycl::handler&                                        cgh,
                                 }
                             }
 
-                            const float3 forceIJ = rv * fInvR;
+                            const Float3 forceIJ = rv * fInvR;
 
                             /* accumulate j forces in registers */
                             fCjBuf -= forceIJ;
@@ -969,9 +969,9 @@ void launchNbnxmKernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const In
     const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
 
     // Casting to float simplifies using atomic ops in the kernel
-    cl::sycl::buffer<float3, 1> f(*adat->f.buffer_);
+    cl::sycl::buffer<Float3, 1> f(*adat->f.buffer_);
     auto                        fAsFloat = f.reinterpret<float, 1>(f.get_count() * DIM);
-    cl::sycl::buffer<float3, 1> fShift(*adat->fShift.buffer_);
+    cl::sycl::buffer<Float3, 1> fShift(*adat->fShift.buffer_);
     auto fShiftAsFloat = fShift.reinterpret<float, 1>(fShift.get_count() * DIM);
 
     cl::sycl::event e = chooseAndLaunchNbnxmKernel(doPruneNBL,