Change nbnxm macros to constexpr

author Berk Hess <hess@kth.se>

Wed, 13 Mar 2019 09:47:49 +0000 (10:47 +0100)

committer Berk Hess <hess@kth.se>

Thu, 14 Mar 2019 10:39:43 +0000 (11:39 +0100)
author Berk Hess <hess@kth.se>
Wed, 13 Mar 2019 09:47:49 +0000 (10:47 +0100)
committer Berk Hess <hess@kth.se>
Thu, 14 Mar 2019 10:39:43 +0000 (11:39 +0100)
diff --git a/src/gromacs/nbnxm/grid.cpp b/src/gromacs/nbnxm/grid.cpp

index 9d7ae9cc8474de3e878c9cb8decd5ee33610040d..38818258d50734e0285707059a565a3cc657f915 100644 (file)
--- a/src/gromacs/nbnxm/grid.cpp
+++ b/src/gromacs/nbnxm/grid.cpp
@@ -205,7 +205,7 @@ void Grid::setDimensions(const int           ddZone,
      else
      {
  #if NBNXN_BBXXXX
-        pbb_.resize(maxNumCells*c_gpuNumClusterPerCell/STRIDE_PBB*NNBSBB_XXXX);
+        pbb_.resize(packedBoundingBoxesIndex(maxNumCells*c_gpuNumClusterPerCell));
  #else
          bb_.resize(maxNumCells*c_gpuNumClusterPerCell);
  #endif
@@ -584,12 +584,12 @@ static void calc_bounding_box_xxxx(int na, int stride, const real *x, float *bb)
          i += stride;
      }
      /* Note: possible double to float conversion here */
-    bb[0*STRIDE_PBB] = R2F_D(xl);
-    bb[1*STRIDE_PBB] = R2F_D(yl);
-    bb[2*STRIDE_PBB] = R2F_D(zl);
-    bb[3*STRIDE_PBB] = R2F_U(xh);
-    bb[4*STRIDE_PBB] = R2F_U(yh);
-    bb[5*STRIDE_PBB] = R2F_U(zh);
+    bb[0*c_packedBoundingBoxesDimSize] = R2F_D(xl);
+    bb[1*c_packedBoundingBoxesDimSize] = R2F_D(yl);
+    bb[2*c_packedBoundingBoxesDimSize] = R2F_D(zl);
+    bb[3*c_packedBoundingBoxesDimSize] = R2F_U(xh);
+    bb[4*c_packedBoundingBoxesDimSize] = R2F_U(yh);
+    bb[5*c_packedBoundingBoxesDimSize] = R2F_U(zh);
  }
  
  #endif /* NBNXN_BBXXXX */
@@ -629,12 +629,12 @@ static void calc_bounding_box_xxxx_simd4(int na, const float *x,
  {
      calc_bounding_box_simd4(na, x, bb_work_aligned);
  
-    bb[0*STRIDE_PBB] = bb_work_aligned->lower.x;
-    bb[1*STRIDE_PBB] = bb_work_aligned->lower.y;
-    bb[2*STRIDE_PBB] = bb_work_aligned->lower.z;
-    bb[3*STRIDE_PBB] = bb_work_aligned->upper.x;
-    bb[4*STRIDE_PBB] = bb_work_aligned->upper.y;
-    bb[5*STRIDE_PBB] = bb_work_aligned->upper.z;
+    bb[0*c_packedBoundingBoxesDimSize] = bb_work_aligned->lower.x;
+    bb[1*c_packedBoundingBoxesDimSize] = bb_work_aligned->lower.y;
+    bb[2*c_packedBoundingBoxesDimSize] = bb_work_aligned->lower.z;
+    bb[3*c_packedBoundingBoxesDimSize] = bb_work_aligned->upper.x;
+    bb[4*c_packedBoundingBoxesDimSize] = bb_work_aligned->upper.y;
+    bb[5*c_packedBoundingBoxesDimSize] = bb_work_aligned->upper.z;
  }
  
  #endif /* NBNXN_BBXXXX */
@@ -724,16 +724,17 @@ static void print_bbsizes_supersub(FILE       *fp,
      for (int c = 0; c < grid.numCells(); c++)
      {
  #if NBNXN_BBXXXX
-        for (int s = 0; s < grid.numClustersPerCell()[c]; s += STRIDE_PBB)
+        for (int s = 0; s < grid.numClustersPerCell()[c]; s += c_packedBoundingBoxesDimSize)
          {
-            int cs_w = (c*c_gpuNumClusterPerCell + s)/STRIDE_PBB;
-            for (int i = 0; i < STRIDE_PBB; i++)
+            int  cs_w          = (c*c_gpuNumClusterPerCell + s)/c_packedBoundingBoxesDimSize;
+            auto boundingBoxes = grid.packedBoundingBoxes().subArray(cs_w*c_packedBoundingBoxesSize, c_packedBoundingBoxesSize);
+            for (int i = 0; i < c_packedBoundingBoxesDimSize; i++)
              {
                  for (int d = 0; d < DIM; d++)
                  {
                      ba[d] +=
-                        grid.packedBoundingBoxes()[cs_w*NNBSBB_XXXX + (DIM + d)*STRIDE_PBB + i] -
-                        grid.packedBoundingBoxes()[cs_w*NNBSBB_XXXX +        d *STRIDE_PBB + i];
+                        boundingBoxes[(DIM + d)*c_packedBoundingBoxesDimSize + i] -
+                        boundingBoxes[(0   + d)*c_packedBoundingBoxesDimSize + i];
                  }
              }
          }
@@ -925,10 +926,11 @@ void Grid::fillCell(const GridSetData              &gridSetData,
          /* Store the bounding boxes in a format convenient
           * for SIMD4 calculations: xxxxyyyyzzzz...
           */
-        float *pbb_ptr =
+        const int clusterIndex    = ((atomStart - cellOffset_*geometry_.numAtomsPerCell) >> geometry_.numAtomsICluster2Log);
+        float    *pbb_ptr         =
              pbb_.data() +
-            ((atomStart - cellOffset_*geometry_.numAtomsPerCell) >> (geometry_.numAtomsICluster2Log + STRIDE_PBB_2LOG))*NNBSBB_XXXX +
-            (((atomStart - cellOffset_*geometry_.numAtomsPerCell) >> geometry_.numAtomsICluster2Log) & (STRIDE_PBB - 1));
+            packedBoundingBoxesIndex(clusterIndex) +
+            (clusterIndex & (c_packedBoundingBoxesDimSize - 1));
  
  #if NBNXN_SEARCH_SIMD4_FLOAT_X_BB
          if (nbat->XFormat == nbatXYZQ)
@@ -946,9 +948,9 @@ void Grid::fillCell(const GridSetData              &gridSetData,
          {
              fprintf(debug, "cell %4d bb %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n",
                      atomToCluster(atomStart),
-                    pbb_ptr[0*STRIDE_PBB], pbb_ptr[3*STRIDE_PBB],
-                    pbb_ptr[1*STRIDE_PBB], pbb_ptr[4*STRIDE_PBB],
-                    pbb_ptr[2*STRIDE_PBB], pbb_ptr[5*STRIDE_PBB]);
+                    pbb_ptr[0*c_packedBoundingBoxesDimSize], pbb_ptr[3*c_packedBoundingBoxesDimSize],
+                    pbb_ptr[1*c_packedBoundingBoxesDimSize], pbb_ptr[4*c_packedBoundingBoxesDimSize],
+                    pbb_ptr[2*c_packedBoundingBoxesDimSize], pbb_ptr[5*c_packedBoundingBoxesDimSize]);
          }
      }
  #endif
diff --git a/src/gromacs/nbnxm/grid.h b/src/gromacs/nbnxm/grid.h

index fa59bd8fc6c603f1e4ba3e6c04206cae2f2fa744..c16f6830e1a60aedc9b8c5bff5dc09bf2f1ac2a3 100644 (file)
--- a/src/gromacs/nbnxm/grid.h
+++ b/src/gromacs/nbnxm/grid.h
@@ -152,9 +152,9 @@ struct BoundingBox1D
  /*! \brief The number of bounds along one dimension of a bounding box */
  static constexpr int c_numBoundingBoxBounds1D = 2;
  
-#ifndef DOXYGEN
+} // namespace Nbnxm
  
-// TODO: Convert macros to constexpr int
+#ifndef DOXYGEN
  
  /* Bounding box calculations are (currently) always in single precision, so
   * we only need to check for single precision support here.
@@ -162,12 +162,8 @@ static constexpr int c_numBoundingBoxBounds1D = 2;
   */
  #if GMX_SIMD4_HAVE_FLOAT
  #    define NBNXN_SEARCH_BB_SIMD4      1
-/* Memory alignment in bytes as required by SIMD aligned loads/stores */
-#    define NBNXN_SEARCH_BB_MEM_ALIGN  (GMX_SIMD4_WIDTH*sizeof(float))
  #else
  #    define NBNXN_SEARCH_BB_SIMD4      0
-/* No alignment required, but set it so we can call the same routines */
-#    define NBNXN_SEARCH_BB_MEM_ALIGN  32
  #endif
  
  
@@ -181,16 +177,25 @@ static constexpr int c_numBoundingBoxBounds1D = 2;
  #        define NBNXN_SEARCH_SIMD4_FLOAT_X_BB  0
  #    endif
  
-/* The packed bounding box coordinate stride is always set to 4.
+/* Store bounding boxes corners as quadruplets: xxxxyyyyzzzz
+ *
+ * The packed bounding box coordinate stride is always set to 4.
   * With AVX we could use 8, but that turns out not to be faster.
   */
-#    define STRIDE_PBB       GMX_SIMD4_WIDTH
-#    define STRIDE_PBB_2LOG  2
-
-/* Store bounding boxes corners as quadruplets: xxxxyyyyzzzz */
  #    define NBNXN_BBXXXX  1
-/* Size of a quadruplet of bounding boxes, each 2 corners, stored packed */
-#    define NNBSBB_XXXX  (STRIDE_PBB*DIM*Nbnxm::c_numBoundingBoxBounds1D)
+
+//! The number of bounding boxes in a pack, also the size of a pack along one dimension
+static constexpr int c_packedBoundingBoxesDimSize = GMX_SIMD4_WIDTH;
+
+//! Total number of corners (floats) in a pack of bounding boxes
+static constexpr int c_packedBoundingBoxesSize    =
+    c_packedBoundingBoxesDimSize*DIM*Nbnxm::c_numBoundingBoxBounds1D;
+
+//! Returns the starting index of the bouding box pack that contains the given cluster
+static constexpr inline int packedBoundingBoxesIndex(int clusterIndex)
+{
+    return (clusterIndex/c_packedBoundingBoxesDimSize)*c_packedBoundingBoxesSize;
+}
  
  #else  /* NBNXN_SEARCH_BB_SIMD4 */
  
@@ -201,6 +206,8 @@ static constexpr int c_numBoundingBoxBounds1D = 2;
  
  #endif // !DOXYGEN
  
+namespace Nbnxm
+{
  
  /*! \internal
   * \brief Helper struct to pass data that is shared over all grids
diff --git a/src/gromacs/nbnxm/pairlist.cpp b/src/gromacs/nbnxm/pairlist.cpp

index cf0c0f2c8b25ff8631bc7074da1d79551c226c6a..17cb35706a4d461e6fcf9014acb060ee96f537af 100644 (file)
--- a/src/gromacs/nbnxm/pairlist.cpp
+++ b/src/gromacs/nbnxm/pairlist.cpp
@@ -504,16 +504,18 @@ clusterBoundingBoxDistance2_xxxx_simd4_inner(const float      *bb_i,
                                               const Simd4Float  yj_h,
                                               const Simd4Float  zj_h)
  {
+    constexpr int    stride = c_packedBoundingBoxesDimSize;
+
      const int        shi  = boundingBoxStart*Nbnxm::c_numBoundingBoxBounds1D*DIM;
  
      const Simd4Float zero = setZero();
  
-    const Simd4Float xi_l = load4(bb_i + shi + 0*STRIDE_PBB);
-    const Simd4Float yi_l = load4(bb_i + shi + 1*STRIDE_PBB);
-    const Simd4Float zi_l = load4(bb_i + shi + 2*STRIDE_PBB);
-    const Simd4Float xi_h = load4(bb_i + shi + 3*STRIDE_PBB);
-    const Simd4Float yi_h = load4(bb_i + shi + 4*STRIDE_PBB);
-    const Simd4Float zi_h = load4(bb_i + shi + 5*STRIDE_PBB);
+    const Simd4Float xi_l = load4(bb_i + shi + 0*stride);
+    const Simd4Float yi_l = load4(bb_i + shi + 1*stride);
+    const Simd4Float zi_l = load4(bb_i + shi + 2*stride);
+    const Simd4Float xi_h = load4(bb_i + shi + 3*stride);
+    const Simd4Float yi_h = load4(bb_i + shi + 4*stride);
+    const Simd4Float zi_h = load4(bb_i + shi + 5*stride);
  
      const Simd4Float dx_0 = xi_l - xj_h;
      const Simd4Float dy_0 = yi_l - yj_h;
@@ -548,27 +550,29 @@ clusterBoundingBoxDistance2_xxxx_simd4(const float *bb_j,
                                         const float *bb_i,
                                         float       *d2)
  {
+    constexpr int    stride = c_packedBoundingBoxesDimSize;
+
      // TODO: During SIMDv2 transition only some archs use namespace (remove when done)
      using namespace gmx;
  
-    const Simd4Float xj_l = Simd4Float(bb_j[0*STRIDE_PBB]);
-    const Simd4Float yj_l = Simd4Float(bb_j[1*STRIDE_PBB]);
-    const Simd4Float zj_l = Simd4Float(bb_j[2*STRIDE_PBB]);
-    const Simd4Float xj_h = Simd4Float(bb_j[3*STRIDE_PBB]);
-    const Simd4Float yj_h = Simd4Float(bb_j[4*STRIDE_PBB]);
-    const Simd4Float zj_h = Simd4Float(bb_j[5*STRIDE_PBB]);
+    const Simd4Float xj_l = Simd4Float(bb_j[0*stride]);
+    const Simd4Float yj_l = Simd4Float(bb_j[1*stride]);
+    const Simd4Float zj_l = Simd4Float(bb_j[2*stride]);
+    const Simd4Float xj_h = Simd4Float(bb_j[3*stride]);
+    const Simd4Float yj_h = Simd4Float(bb_j[4*stride]);
+    const Simd4Float zj_h = Simd4Float(bb_j[5*stride]);
  
-    /* Here we "loop" over si (0,STRIDE_PBB) from 0 to nsi with step STRIDE_PBB.
+    /* Here we "loop" over si (0,stride) from 0 to nsi with step stride.
       * But as we know the number of iterations is 1 or 2, we unroll manually.
       */
      clusterBoundingBoxDistance2_xxxx_simd4_inner<0>(bb_i, d2,
                                                      xj_l, yj_l, zj_l,
                                                      xj_h, yj_h, zj_h);
-    if (STRIDE_PBB < nsi)
+    if (stride < nsi)
      {
-        clusterBoundingBoxDistance2_xxxx_simd4_inner<STRIDE_PBB>(bb_i, d2,
-                                                                 xj_l, yj_l, zj_l,
-                                                                 xj_h, yj_h, zj_h);
+        clusterBoundingBoxDistance2_xxxx_simd4_inner<stride>(bb_i, d2,
+                                                             xj_l, yj_l, zj_l,
+                                                             xj_h, yj_h, zj_h);
      }
  }
  
@@ -1239,7 +1243,8 @@ static void make_cluster_list_supersub(const Grid         &iGrid,
  
  #if NBNXN_BBXXXX
          /* Determine all ci1 bb distances in one call with SIMD4 */
-        clusterBoundingBoxDistance2_xxxx_simd4(jGrid.packedBoundingBoxes().data() + (cj >> STRIDE_PBB_2LOG)*NNBSBB_XXXX + (cj & (STRIDE_PBB-1)),
+        const int offset = packedBoundingBoxesIndex(cj) + (cj & (c_packedBoundingBoxesDimSize - 1));
+        clusterBoundingBoxDistance2_xxxx_simd4(jGrid.packedBoundingBoxes().data() + offset,
                                                 ci1, pbb_ci, d2l);
          *numDistanceChecks += c_nbnxnGpuClusterSize*2;
  #endif
@@ -2365,17 +2370,19 @@ static void set_icell_bbxxxx_supersub(gmx::ArrayRef<const float> bb,
                                        real shx, real shy, real shz,
                                        float *bb_ci)
  {
-    int ia = ci*(c_gpuNumClusterPerCell >> STRIDE_PBB_2LOG)*NNBSBB_XXXX;
-    for (int m = 0; m < (c_gpuNumClusterPerCell >> STRIDE_PBB_2LOG)*NNBSBB_XXXX; m += NNBSBB_XXXX)
+    constexpr int cellBBStride = packedBoundingBoxesIndex(c_gpuNumClusterPerCell);
+    constexpr int pbbStride    = c_packedBoundingBoxesDimSize;
+    const int     ia           = ci*cellBBStride;
+    for (int m = 0; m < cellBBStride; m += c_packedBoundingBoxesSize)
      {
-        for (int i = 0; i < STRIDE_PBB; i++)
+        for (int i = 0; i < pbbStride; i++)
          {
-            bb_ci[m+0*STRIDE_PBB+i] = bb[ia+m+0*STRIDE_PBB+i] + shx;
-            bb_ci[m+1*STRIDE_PBB+i] = bb[ia+m+1*STRIDE_PBB+i] + shy;
-            bb_ci[m+2*STRIDE_PBB+i] = bb[ia+m+2*STRIDE_PBB+i] + shz;
-            bb_ci[m+3*STRIDE_PBB+i] = bb[ia+m+3*STRIDE_PBB+i] + shx;
-            bb_ci[m+4*STRIDE_PBB+i] = bb[ia+m+4*STRIDE_PBB+i] + shy;
-            bb_ci[m+5*STRIDE_PBB+i] = bb[ia+m+5*STRIDE_PBB+i] + shz;
+            bb_ci[m + 0*pbbStride + i] = bb[ia + m + 0*pbbStride + i] + shx;
+            bb_ci[m + 1*pbbStride + i] = bb[ia + m + 1*pbbStride + i] + shy;
+            bb_ci[m + 2*pbbStride + i] = bb[ia + m + 2*pbbStride + i] + shz;
+            bb_ci[m + 3*pbbStride + i] = bb[ia + m + 3*pbbStride + i] + shx;
+            bb_ci[m + 4*pbbStride + i] = bb[ia + m + 4*pbbStride + i] + shy;
+            bb_ci[m + 5*pbbStride + i] = bb[ia + m + 5*pbbStride + i] + shz;
          }
      }
  }
diff --git a/src/gromacs/nbnxm/pairlistwork.h b/src/gromacs/nbnxm/pairlistwork.h

index 35de7acdb6f792b823900218b8e90848684c79fc..8716632010d19fa82321800e00cdd62dcd9a6050 100644 (file)
--- a/src/gromacs/nbnxm/pairlistwork.h
+++ b/src/gromacs/nbnxm/pairlistwork.h
@@ -101,7 +101,7 @@ struct NbnxnPairlistGpuWork
          ISuperClusterData() :
              bb(c_gpuNumClusterPerCell),
  #if NBNXN_SEARCH_BB_SIMD4
-            bbPacked(c_gpuNumClusterPerCell/STRIDE_PBB*NNBSBB_XXXX),
+            bbPacked(c_gpuNumClusterPerCell/c_packedBoundingBoxesDimSize*c_packedBoundingBoxesSize),
  #endif
              x(c_gpuNumClusterPerCell*c_nbnxnGpuClusterSize*DIM),
              xSimd(c_gpuNumClusterPerCell*c_nbnxnGpuClusterSize*DIM)
author	Berk Hess <hess@kth.se>
	Wed, 13 Mar 2019 09:47:49 +0000 (10:47 +0100)
committer	Berk Hess <hess@kth.se>
	Thu, 14 Mar 2019 10:39:43 +0000 (11:39 +0100)
src/gromacs/nbnxm/grid.cpp		patch \| blob \| history
src/gromacs/nbnxm/grid.h		patch \| blob \| history
src/gromacs/nbnxm/pairlist.cpp		patch \| blob \| history
src/gromacs/nbnxm/pairlistwork.h		patch \| blob \| history