Apply clang-format to source tree
[alexxy/gromacs.git] / src / gromacs / ewald / pme_solve.clh
index a4b877e0dd82f9ee4b70b53bb01428fe37218304..ac38117d385facc6e7b63d5f3420a11e948fe172 100644 (file)
  *
  * \param[in]     kernelParams         Input PME GPU data in constant memory.
  * \param[in]     gm_splineModuli      B-Spline moduli.
- * \param[out]    gm_virialAndEnergy   Reduced virial and enrgy (only with computeEnergyAndVirial == true)
- * \param[in,out] gm_grid              Fourier grid to transform.
+ * \param[out]    gm_virialAndEnergy   Reduced virial and enrgy (only with computeEnergyAndVirial ==
+ * true) \param[in,out] gm_grid              Fourier grid to transform.
  */
 __attribute__((work_group_size_hint(c_solveMaxWorkGroupSize, 1, 1)))
-__kernel void CUSTOMIZED_KERNEL_NAME(pme_solve_kernel)(const struct PmeOpenCLKernelParams    kernelParams,
-                                                       __global const float * __restrict__   gm_splineModuli,
-                                                       __global float * __restrict__         gm_virialAndEnergy,
-                                                       __global float2 * __restrict__        gm_grid)
+__kernel void CUSTOMIZED_KERNEL_NAME(pme_solve_kernel)(const struct PmeOpenCLKernelParams kernelParams,
+                                                       __global const float* __restrict__ gm_splineModuli,
+                                                       __global float* __restrict__ gm_virialAndEnergy,
+                                                       __global float2* __restrict__ gm_grid)
 {
     /* This kernel supports 2 different grid dimension orderings: YZX and XYZ */
     int majorDim, middleDim, minorDim;
@@ -82,12 +82,15 @@ __kernel void CUSTOMIZED_KERNEL_NAME(pme_solve_kernel)(const struct PmeOpenCLKer
         minorDim  = ZZ;
     }
 
-    __global const float * __restrict__ gm_splineValueMajor   = gm_splineModuli + kernelParams.grid.splineValuesOffset[majorDim];
-    __global const float * __restrict__ gm_splineValueMiddle  = gm_splineModuli + kernelParams.grid.splineValuesOffset[middleDim];
-    __global const float * __restrict__ gm_splineValueMinor   = gm_splineModuli + kernelParams.grid.splineValuesOffset[minorDim];
+    __global const float* __restrict__ gm_splineValueMajor =
+            gm_splineModuli + kernelParams.grid.splineValuesOffset[majorDim];
+    __global const float* __restrict__ gm_splineValueMiddle =
+            gm_splineModuli + kernelParams.grid.splineValuesOffset[middleDim];
+    __global const float* __restrict__ gm_splineValueMinor =
+            gm_splineModuli + kernelParams.grid.splineValuesOffset[minorDim];
 
     /* Various grid sizes and indices */
-    const int localOffsetMinor = 0, localOffsetMajor = 0, localOffsetMiddle = 0; //unused
+    const int localOffsetMinor = 0, localOffsetMajor = 0, localOffsetMiddle = 0; // unused
     const int localSizeMinor   = kernelParams.grid.complexGridSizePadded[minorDim];
     const int localSizeMiddle  = kernelParams.grid.complexGridSizePadded[middleDim];
     const int localCountMiddle = kernelParams.grid.complexGridSize[middleDim];
@@ -124,34 +127,35 @@ __kernel void CUSTOMIZED_KERNEL_NAME(pme_solve_kernel)(const struct PmeOpenCLKer
     float virzz  = 0.0f;
 
     assert(indexMajor < kernelParams.grid.complexGridSize[majorDim]);
-    if ((indexMiddle < localCountMiddle) & (indexMinor < localCountMinor) & (gridLineIndex < gridLinesPerBlock))
+    if ((indexMiddle < localCountMiddle) & (indexMinor < localCountMinor)
+        & (gridLineIndex < gridLinesPerBlock))
     {
         /* The offset should be equal to the global thread index for coalesced access */
-        const int                      gridIndex     = (indexMajor * localSizeMiddle + indexMiddle) * localSizeMinor + indexMinor;
-        __global float2 * __restrict__ gm_gridCell   = gm_grid + gridIndex;
+        const int gridIndex = (indexMajor * localSizeMiddle + indexMiddle) * localSizeMinor + indexMinor;
+        __global float2* __restrict__ gm_gridCell = gm_grid + gridIndex;
 
-        const int                      kMajor  = indexMajor + localOffsetMajor;
+        const int kMajor = indexMajor + localOffsetMajor;
         /* Checking either X in XYZ, or Y in YZX cases */
-        const float                    mMajor  = (kMajor < maxkMajor) ? kMajor : (kMajor - nMajor);
+        const float mMajor = (kMajor < maxkMajor) ? kMajor : (kMajor - nMajor);
 
-        const int                      kMiddle = indexMiddle + localOffsetMiddle;
-        float                          mMiddle = kMiddle;
+        const int kMiddle = indexMiddle + localOffsetMiddle;
+        float     mMiddle = kMiddle;
         /* Checking Y in XYZ case */
         if (gridOrdering == XYZ)
         {
             mMiddle = (kMiddle < maxkMiddle) ? kMiddle : (kMiddle - nMiddle);
         }
-        const int             kMinor  = localOffsetMinor + indexMinor;
-        float                 mMinor  = kMinor;
+        const int kMinor = localOffsetMinor + indexMinor;
+        float     mMinor = kMinor;
         /* Checking X in YZX case */
         if (gridOrdering == YZX)
         {
             mMinor = (kMinor < maxkMinor) ? kMinor : (kMinor - nMinor);
         }
         /* We should skip the k-space point (0,0,0) */
-        const bool notZeroPoint  = (kMinor > 0) | (kMajor > 0) | (kMiddle > 0);
+        const bool notZeroPoint = (kMinor > 0) | (kMajor > 0) | (kMiddle > 0);
 
-        float      mX, mY, mZ;
+        float mX, mY, mZ;
         if (gridOrdering == YZX)
         {
             mX = mMinor;
@@ -185,40 +189,45 @@ __kernel void CUSTOMIZED_KERNEL_NAME(pme_solve_kernel)(const struct PmeOpenCLKer
         if (notZeroPoint)
         {
             const float mhxk = mX * kernelParams.current.recipBox[XX][XX];
-            const float mhyk = mX * kernelParams.current.recipBox[XX][YY] + mY * kernelParams.current.recipBox[YY][YY];
-            const float mhzk = mX * kernelParams.current.recipBox[XX][ZZ] + mY * kernelParams.current.recipBox[YY][ZZ] + mZ * kernelParams.current.recipBox[ZZ][ZZ];
+            const float mhyk = mX * kernelParams.current.recipBox[XX][YY]
+                               + mY * kernelParams.current.recipBox[YY][YY];
+            const float mhzk = mX * kernelParams.current.recipBox[XX][ZZ]
+                               + mY * kernelParams.current.recipBox[YY][ZZ]
+                               + mZ * kernelParams.current.recipBox[ZZ][ZZ];
 
-            const float m2k        = mhxk * mhxk + mhyk * mhyk + mhzk * mhzk;
+            const float m2k = mhxk * mhxk + mhyk * mhyk + mhzk * mhzk;
             assert(m2k != 0.0f);
-            const float denom = m2k * M_PI_F * kernelParams.current.boxVolume * gm_splineValueMajor[kMajor] * gm_splineValueMiddle[kMiddle] * gm_splineValueMinor[kMinor];
+            const float denom = m2k * M_PI_F * kernelParams.current.boxVolume * gm_splineValueMajor[kMajor]
+                                * gm_splineValueMiddle[kMiddle] * gm_splineValueMinor[kMinor];
             assert(isfinite(denom));
             assert(denom != 0.0f);
-            const float   tmp1   = exp(-kernelParams.grid.ewaldFactor * m2k);
-            const float   etermk = kernelParams.constants.elFactor * tmp1 / denom;
+            const float tmp1   = exp(-kernelParams.grid.ewaldFactor * m2k);
+            const float etermk = kernelParams.constants.elFactor * tmp1 / denom;
 
-            float2        gridValue    = *gm_gridCell;
-            const float2  oldGridValue = gridValue;
+            float2       gridValue    = *gm_gridCell;
+            const float2 oldGridValue = gridValue;
 
-            gridValue.x   *= etermk;
-            gridValue.y   *= etermk;
-            *gm_gridCell   = gridValue;
+            gridValue.x *= etermk;
+            gridValue.y *= etermk;
+            *gm_gridCell = gridValue;
 
             if (computeEnergyAndVirial)
             {
-                const float tmp1k = 2.0f * (gridValue.x * oldGridValue.x + gridValue.y * oldGridValue.y);
+                const float tmp1k =
+                        2.0f * (gridValue.x * oldGridValue.x + gridValue.y * oldGridValue.y);
 
                 const float vfactor = (kernelParams.grid.ewaldFactor + 1.0f / m2k) * 2.0f;
                 const float ets2    = corner_fac * tmp1k;
-                energy = ets2;
+                energy              = ets2;
 
-                const float ets2vf  = ets2 * vfactor;
+                const float ets2vf = ets2 * vfactor;
 
-                virxx   = ets2vf * mhxk * mhxk - ets2;
-                virxy   = ets2vf * mhxk * mhyk;
-                virxz   = ets2vf * mhxk * mhzk;
-                viryy   = ets2vf * mhyk * mhyk - ets2;
-                viryz   = ets2vf * mhyk * mhzk;
-                virzz   = ets2vf * mhzk * mhzk - ets2;
+                virxx = ets2vf * mhxk * mhxk - ets2;
+                virxy = ets2vf * mhxk * mhyk;
+                virxz = ets2vf * mhxk * mhzk;
+                viryy = ets2vf * mhyk * mhyk - ets2;
+                viryz = ets2vf * mhyk * mhzk;
+                virzz = ets2vf * mhzk * mhzk - ets2;
             }
         }
     }
@@ -271,16 +280,17 @@ __kernel void CUSTOMIZED_KERNEL_NAME(pme_solve_kernel)(const struct PmeOpenCLKer
             if (componentIndex < c_virialAndEnergyCount)
             {
                 const int targetIndex = componentIndex * warp_size + lane;
-    #pragma unroll
+#pragma unroll
                 for (int reductionStride = warp_size >> 1; reductionStride >= 1; reductionStride >>= 1)
                 {
                     if (lane < reductionStride)
                     {
-                        sm_virialAndEnergy[targetIndex] += sm_virialAndEnergy[targetIndex + reductionStride];
+                        sm_virialAndEnergy[targetIndex] +=
+                                sm_virialAndEnergy[targetIndex + reductionStride];
                     }
 #ifdef _NVIDIA_SOURCE_
-                    /* FIXME: this execution happens within execution width aka warp, but somehow  NVIDIA OpenCL of all things
-                     * fails without the memory barrier here. #2519
+                    /* FIXME: this execution happens within execution width aka warp, but somehow
+                     * NVIDIA OpenCL of all things fails without the memory barrier here. #2519
                      */
                     barrier(CLK_LOCAL_MEM_FENCE);
 #endif