Remove hardcoded warp_size == 32 assumption from PME GPU

[alexxy/gromacs.git] / src / gromacs / ewald / pme-gather.cu
diff --git a/src/gromacs/ewald/pme-gather.cu b/src/gromacs/ewald/pme-gather.cu

index ccbd57023752d8ebdaa26f920be73c44a14b1da8..bb3577322d160c3cb04c701d33694a48501d4c27 100644 (file)
--- a/src/gromacs/ewald/pme-gather.cu
+++ b/src/gromacs/ewald/pme-gather.cu
@@ -238,6 +238,7 @@ __global__ void pme_gather_kernel(const PmeGpuCudaKernelParams    kernelParams)
      const int    atomsPerBlock  = (c_gatherMaxThreadsPerBlock / PME_SPREADGATHER_THREADS_PER_ATOM);
      const int    atomDataSize   = PME_SPREADGATHER_THREADS_PER_ATOM; /* Number of data components and threads for a single atom */
      const int    blockSize      = atomsPerBlock * atomDataSize;
+    const int    atomsPerWarp   = PME_SPREADGATHER_ATOMS_PER_WARP;
  
      const int    blockIndex = blockIdx.y * gridDim.x + blockIdx.x;
  
@@ -308,13 +309,13 @@ __global__ void pme_gather_kernel(const PmeGpuCudaKernelParams    kernelParams)
          const int    pny       = kernelParams.grid.realGridSizePadded[YY];
          const int    pnz       = kernelParams.grid.realGridSizePadded[ZZ];
  
-        const int    atomWarpIndex = atomIndexLocal % PME_SPREADGATHER_ATOMS_PER_WARP;
-        const int    warpIndex     = atomIndexLocal / PME_SPREADGATHER_ATOMS_PER_WARP;
+        const int    atomWarpIndex = atomIndexLocal % atomsPerWarp;
+        const int    warpIndex     = atomIndexLocal / atomsPerWarp;
  
-        const int    splineIndexBase = getSplineParamIndexBase<order>(warpIndex, atomWarpIndex);
-        const int    splineIndexY    = getSplineParamIndex<order>(splineIndexBase, YY, ithy);
+        const int    splineIndexBase = getSplineParamIndexBase<order, atomsPerWarp>(warpIndex, atomWarpIndex);
+        const int    splineIndexY    = getSplineParamIndex<order, atomsPerWarp>(splineIndexBase, YY, ithy);
          const float2 tdy             = sm_splineParams[splineIndexY];
-        const int    splineIndexZ    = getSplineParamIndex<order>(splineIndexBase, ZZ, ithz);
+        const int    splineIndexZ    = getSplineParamIndex<order, atomsPerWarp>(splineIndexBase, ZZ, ithz);
          const float2 tdz             = sm_splineParams[splineIndexZ];
  
          const int    ixBase         = sm_gridlineIndices[atomIndexLocal * DIM + XX];
@@ -342,7 +343,7 @@ __global__ void pme_gather_kernel(const PmeGpuCudaKernelParams    kernelParams)
              assert(gridIndexGlobal >= 0);
              const float   gridValue    = gm_grid[gridIndexGlobal];
              assert(isfinite(gridValue));
-            const int     splineIndexX = getSplineParamIndex<order>(splineIndexBase, XX, ithx);
+            const int     splineIndexX = getSplineParamIndex<order, atomsPerWarp>(splineIndexBase, XX, ithx);
              const float2  tdx          = sm_splineParams[splineIndexX];
              const float   fxy1         = tdz.x * gridValue;
              const float   fz1          = tdz.y * gridValue;