Make large PME grids work on GPU

author Berk Hess <hess@kth.se>

Wed, 5 Dec 2018 08:54:07 +0000 (09:54 +0100)

committer Berk Hess <hess@kth.se>

Wed, 5 Dec 2018 08:54:07 +0000 (09:54 +0100)
author Berk Hess <hess@kth.se>
Wed, 5 Dec 2018 08:54:07 +0000 (09:54 +0100)
committer Berk Hess <hess@kth.se>
Wed, 5 Dec 2018 08:54:07 +0000 (09:54 +0100)
diff --git a/docs/release-notes/2018/2018.5.rst b/docs/release-notes/2018/2018.5.rst

index d1649a948a76c16e115e0b32edfeb8569000d4ab..f746fcced0775a9d9fb5d05f6be5bcc0bf184d14 100644 (file)
--- a/docs/release-notes/2018/2018.5.rst
+++ b/docs/release-notes/2018/2018.5.rst
@@ -19,6 +19,14 @@ was reduced to near machine precision. The change does not affect
  the results for non-polarizable systems, such as proteins or small
  molecules.
  
+Make large PME grids work on GPU
+"""""""""""""""""""""""""""""""""""""""""""
+
+PME grids with size along Z larger than 511 would make mdrun exit
+with a cryptic CUDA error.
+
+:issue: `2779`
+        
  Fixes for ``gmx`` tools
  ^^^^^^^^^^^^^^^^^^^^^^^
  
diff --git a/src/gromacs/ewald/pme-solve.cu b/src/gromacs/ewald/pme-solve.cu

index eceac2028605708f65ba0222dca0d6e3a42de9a7..a7a70bc3f80f699a59a92cb8c64608208d478d25 100644 (file)
--- a/src/gromacs/ewald/pme-solve.cu
+++ b/src/gromacs/ewald/pme-solve.cu
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -132,7 +132,7 @@ __global__ void pme_solve_kernel(const struct PmeGpuCudaKernelParams kernelParam
      const int gridLineSize      = localCountMinor;
      const int gridLineIndex     = threadLocalId / gridLineSize;
      const int gridLineCellIndex = threadLocalId - gridLineSize * gridLineIndex;
-    const int gridLinesPerBlock = blockDim.x / gridLineSize;
+    const int gridLinesPerBlock = max(blockDim.x / gridLineSize, 1);
      const int activeWarps       = (blockDim.x / warp_size);
      const int indexMinor        = blockIdx.x * blockDim.x + gridLineCellIndex;
      const int indexMiddle       = blockIdx.y * gridLinesPerBlock + gridLineIndex;
@@ -454,7 +454,15 @@ void pme_gpu_solve(const PmeGpu *pmeGpu, t_complex *h_grid,
      const int gridLineSize      = pmeGpu->kernelParams->grid.complexGridSize[minorDim];
      const int gridLinesPerBlock = std::max(maxBlockSize / gridLineSize, 1);
      const int blocksPerGridLine = (gridLineSize + maxBlockSize - 1) / maxBlockSize;
-    const int cellsPerBlock     = gridLineSize * gridLinesPerBlock;
+    int       cellsPerBlock;
+    if (blocksPerGridLine == 1)
+    {
+        cellsPerBlock           = gridLineSize * gridLinesPerBlock;
+    }
+    else
+    {
+        cellsPerBlock           = (gridLineSize + blocksPerGridLine - 1) / blocksPerGridLine;
+    }
      const int blockSize         = (cellsPerBlock + warp_size - 1) / warp_size * warp_size;
      // rounding up to full warps so that shuffle operations produce defined results
      dim3 threads(blockSize);
author	Berk Hess <hess@kth.se>
	Wed, 5 Dec 2018 08:54:07 +0000 (09:54 +0100)
committer	Berk Hess <hess@kth.se>
	Wed, 5 Dec 2018 08:54:07 +0000 (09:54 +0100)
docs/release-notes/2018/2018.5.rst		patch \| blob \| history
src/gromacs/ewald/pme-solve.cu		patch \| blob \| history