Merge branch release-2018 into master

[alexxy/gromacs.git] / src / gromacs / ewald / pme.cuh
diff --git a/src/gromacs/ewald/pme.cuh b/src/gromacs/ewald/pme.cuh

index c84e4fed8a66f8da65b70b9b8e72fae7b48f8b35..e58c3de4957dd5416027746c46875ce31ac9facf 100644 (file)
--- a/src/gromacs/ewald/pme.cuh
+++ b/src/gromacs/ewald/pme.cuh
@@ -137,6 +137,22 @@ int __device__ __forceinline__ pme_gpu_check_atom_charge(const float coefficient
      return c_skipNeutralAtoms ? (coefficient != 0.0f) : 1;
  }
  
+/*! \brief \internal
+ * Given possibly large \p blockCount, returns a compact 1D or 2D grid for kernel scheduling,
+ * to minimize number of unused blocks.
+ */
+template <typename PmeGpu>
+dim3 __host__ inline pmeGpuCreateGrid(const PmeGpu *pmeGpu, int blockCount)
+{
+    // How many maximum widths in X do we need (hopefully just one)
+    const int minRowCount = (blockCount + pmeGpu->maxGridWidthX - 1) / pmeGpu->maxGridWidthX;
+    // Trying to make things even
+    const int colCount = (blockCount + minRowCount - 1) / minRowCount;
+    GMX_ASSERT((colCount * minRowCount - blockCount) >= 0, "pmeGpuCreateGrid: totally wrong");
+    GMX_ASSERT((colCount * minRowCount - blockCount) < minRowCount, "pmeGpuCreateGrid: excessive blocks");
+    return dim3(colCount, minRowCount);
+}
+
  /*! \brief \internal
   * The main PME CUDA-specific host data structure, included in the PME GPU structure by the archSpecific pointer.
   */