i += stride;
}
/* Note: possible double to float conversion here */
- bb->lower[BB_X] = R2F_D(xl);
- bb->lower[BB_Y] = R2F_D(yl);
- bb->lower[BB_Z] = R2F_D(zl);
- bb->upper[BB_X] = R2F_U(xh);
- bb->upper[BB_Y] = R2F_U(yh);
- bb->upper[BB_Z] = R2F_U(zh);
+ bb->lower.x = R2F_D(xl);
+ bb->lower.y = R2F_D(yl);
+ bb->lower.z = R2F_D(zl);
+ bb->upper.x = R2F_U(xh);
+ bb->upper.y = R2F_U(yh);
+ bb->upper.z = R2F_U(zh);
}
/*! \brief Computes the bounding box for na packed coordinates, bb order xyz0 */
zh = std::max(zh, x[j+ZZ*c_packX4]);
}
/* Note: possible double to float conversion here */
- bb->lower[BB_X] = R2F_D(xl);
- bb->lower[BB_Y] = R2F_D(yl);
- bb->lower[BB_Z] = R2F_D(zl);
- bb->upper[BB_X] = R2F_U(xh);
- bb->upper[BB_Y] = R2F_U(yh);
- bb->upper[BB_Z] = R2F_U(zh);
+ bb->lower.x = R2F_D(xl);
+ bb->lower.y = R2F_D(yl);
+ bb->lower.z = R2F_D(zl);
+ bb->upper.x = R2F_U(xh);
+ bb->upper.y = R2F_U(yh);
+ bb->upper.z = R2F_U(zh);
}
/*! \brief Computes the bounding box for na coordinates, bb order xyz0 */
zh = std::max(zh, x[j+ZZ*c_packX8]);
}
/* Note: possible double to float conversion here */
- bb->lower[BB_X] = R2F_D(xl);
- bb->lower[BB_Y] = R2F_D(yl);
- bb->lower[BB_Z] = R2F_D(zl);
- bb->upper[BB_X] = R2F_U(xh);
- bb->upper[BB_Y] = R2F_U(yh);
- bb->upper[BB_Z] = R2F_U(zh);
+ bb->lower.x = R2F_D(xl);
+ bb->lower.y = R2F_D(yl);
+ bb->lower.z = R2F_D(zl);
+ bb->upper.x = R2F_U(xh);
+ bb->upper.y = R2F_U(yh);
+ bb->upper.z = R2F_U(zh);
}
/*! \brief Computes the bounding box for na packed coordinates, bb order xyz0 */
* so we don't need to treat special cases in the rest of the code.
*/
#if NBNXN_SEARCH_BB_SIMD4
- store4(&bbj[1].lower[0], load4(&bbj[0].lower[0]));
- store4(&bbj[1].upper[0], load4(&bbj[0].upper[0]));
+ store4(bbj[1].lower.ptr(), load4(bbj[0].lower.ptr()));
+ store4(bbj[1].upper.ptr(), load4(bbj[0].upper.ptr()));
#else
bbj[1] = bbj[0];
#endif
}
#if NBNXN_SEARCH_BB_SIMD4
- store4(&bb->lower[0], min(load4(&bbj[0].lower[0]), load4(&bbj[1].lower[0])));
- store4(&bb->upper[0], max(load4(&bbj[0].upper[0]), load4(&bbj[1].upper[0])));
+ store4(bb->lower.ptr(),
+ min(load4(bbj[0].lower.ptr()), load4(bbj[1].lower.ptr())));
+ store4(bb->upper.ptr(),
+ max(load4(bbj[0].upper.ptr()), load4(bbj[1].upper.ptr())));
#else
{
- int i;
-
- for (i = 0; i < NNBSBB_C; i++)
- {
- bb->lower[i] = std::min(bbj[0].lower[i], bbj[1].lower[i]);
- bb->upper[i] = std::max(bbj[0].upper[i], bbj[1].upper[i]);
- }
+ bb->lower = BoundingBox::Corner::min(bbj[0].lower,
+ bbj[1].lower);
+ bb->upper = BoundingBox::Corner::max(bbj[0].upper,
+ bbj[1].upper);
}
#endif
}
// TODO: During SIMDv2 transition only some archs use namespace (remove when done)
using namespace gmx;
- Simd4Float bb_0_S, bb_1_S;
- Simd4Float x_S;
+ static_assert(sizeof(BoundingBox::Corner) == GMX_SIMD4_WIDTH*sizeof(float),
+ "The Corner struct should hold exactly 4 floats");
- bb_0_S = load4(x);
- bb_1_S = bb_0_S;
+ Simd4Float bb_0_S = load4(x);
+ Simd4Float bb_1_S = bb_0_S;
for (int i = 1; i < na; i++)
{
- x_S = load4(x+i*NNBSBB_C);
- bb_0_S = min(bb_0_S, x_S);
- bb_1_S = max(bb_1_S, x_S);
+ Simd4Float x_S = load4(x + i*GMX_SIMD4_WIDTH);
+ bb_0_S = min(bb_0_S, x_S);
+ bb_1_S = max(bb_1_S, x_S);
}
- store4(&bb->lower[0], bb_0_S);
- store4(&bb->upper[0], bb_1_S);
+ store4(bb->lower.ptr(), bb_0_S);
+ store4(bb->upper.ptr(), bb_1_S);
}
/*! \brief Computes the bounding box for na coordinates in order xyz?, bb order xxxxyyyyzzzz */
{
calc_bounding_box_simd4(na, x, bb_work_aligned);
- bb[0*STRIDE_PBB] = bb_work_aligned->lower[BB_X];
- bb[1*STRIDE_PBB] = bb_work_aligned->lower[BB_Y];
- bb[2*STRIDE_PBB] = bb_work_aligned->lower[BB_Z];
- bb[3*STRIDE_PBB] = bb_work_aligned->upper[BB_X];
- bb[4*STRIDE_PBB] = bb_work_aligned->upper[BB_Y];
- bb[5*STRIDE_PBB] = bb_work_aligned->upper[BB_Z];
+ bb[0*STRIDE_PBB] = bb_work_aligned->lower.x;
+ bb[1*STRIDE_PBB] = bb_work_aligned->lower.y;
+ bb[2*STRIDE_PBB] = bb_work_aligned->lower.z;
+ bb[3*STRIDE_PBB] = bb_work_aligned->upper.x;
+ bb[4*STRIDE_PBB] = bb_work_aligned->upper.y;
+ bb[5*STRIDE_PBB] = bb_work_aligned->upper.z;
}
#endif /* NBNXN_SEARCH_SIMD4_FLOAT_X_BB */
#if NBNXN_SEARCH_BB_SIMD4
Simd4Float min_S, max_S;
- min_S = min(load4(&bb[c2*2+0].lower[0]),
- load4(&bb[c2*2+1].lower[0]));
- max_S = max(load4(&bb[c2*2+0].upper[0]),
- load4(&bb[c2*2+1].upper[0]));
- store4(&bbj[c2].lower[0], min_S);
- store4(&bbj[c2].upper[0], max_S);
+ min_S = min(load4(bb[c2*2+0].lower.ptr()),
+ load4(bb[c2*2+1].lower.ptr()));
+ max_S = max(load4(bb[c2*2+0].upper.ptr()),
+ load4(bb[c2*2+1].upper.ptr()));
+ store4(bbj[c2].lower.ptr(), min_S);
+ store4(bbj[c2].upper.ptr(), max_S);
#else
- for (int j = 0; j < NNBSBB_C; j++)
- {
- bbj[c2].lower[j] = std::min(bb[c2*2 + 0].lower[j],
- bb[c2*2 + 1].lower[j]);
- bbj[c2].upper[j] = std::max(bb[c2*2 + 0].upper[j],
- bb[c2*2 + 1].upper[j]);
- }
+ bbj[c2].lower = BoundingBox::Corner::min(bb[c2*2 + 0].lower,
+ bb[c2*2 + 1].lower);
+ bbj[c2].upper = BoundingBox::Corner::max(bb[c2*2 + 0].upper,
+ bb[c2*2 + 1].upper);
#endif
}
if (((grid.numAtomsInColumn(i) + 3) >> 2) & 1)
{
/* The bb count in this column is odd: duplicate the last bb */
- for (int j = 0; j < NNBSBB_C; j++)
- {
- bbj[c2].lower[j] = bb[c2*2].lower[j];
- bbj[c2].upper[j] = bb[c2*2].upper[j];
- }
+ bbj[c2].lower = bb[c2*2].lower;
+ bbj[c2].upper = bb[c2*2].upper;
}
}
}
static void print_bbsizes_simple(FILE *fp,
const Grid &grid)
{
- dvec ba;
-
- clear_dvec(ba);
+ dvec ba = { 0 };
for (int c = 0; c < grid.numCells(); c++)
{
- for (int d = 0; d < DIM; d++)
- {
- ba[d] += grid.iBoundingBoxes()[c].upper[d] - grid.iBoundingBoxes()[c].lower[d];
- }
+ const BoundingBox &bb = grid.iBoundingBoxes()[c];
+ ba[XX] += bb.upper.x - bb.lower.x;
+ ba[YY] += bb.upper.y - bb.lower.y;
+ ba[ZZ] += bb.upper.z - bb.lower.z;
}
dsvmul(1.0/grid.numCells(), ba, ba);
#else
for (int s = 0; s < grid.numClustersPerCell()[c]; s++)
{
- int cs = c*c_gpuNumClusterPerCell + s;
- for (int d = 0; d < DIM; d++)
- {
- ba[d] += grid.iBoundingBoxes()[cs].upper[d] - grid.iBoundingBoxes()[cs].lower[d];
- }
+ const BoundingBox &bb = grid.iBoundingBoxes()[c*c_gpuNumClusterPerCell + s];
+ ba[XX] += bb.upper.x - bb.lower.x;
+ ba[YY] += bb.upper.y - bb.lower.y;
+ ba[ZZ] += bb.upper.z - bb.lower.z;
}
#endif
ns += grid.numClustersPerCell()[c];
int bbo = atomToCluster(atomStart - cellOffset_*geometry_.numAtomsPerCell);
fprintf(debug, "cell %4d bb %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n",
atomToCluster(atomStart),
- bb_[bbo].lower[BB_X],
- bb_[bbo].lower[BB_Y],
- bb_[bbo].lower[BB_Z],
- bb_[bbo].upper[BB_X],
- bb_[bbo].upper[BB_Y],
- bb_[bbo].upper[BB_Z]);
+ bb_[bbo].lower.x,
+ bb_[bbo].lower.y,
+ bb_[bbo].lower.z,
+ bb_[bbo].upper.x,
+ bb_[bbo].upper.y,
+ bb_[bbo].upper.z);
}
}
}
{
cellFilled = cell;
}
- bbcz_[cell*NNBSBB_D ] = bb_[cellFilled].lower[BB_Z];
- bbcz_[cell*NNBSBB_D + 1] = bb_[cellFilled].upper[BB_Z];
+ bbcz_[cell*NNBSBB_D ] = bb_[cellFilled].lower.z;
+ bbcz_[cell*NNBSBB_D + 1] = bb_[cellFilled].upper.z;
}
/* Set the unused atom indices to -1 */
namespace Nbnxm
{
-#ifndef DOXYGEN
+/*! \internal
+ * \brief Bounding box for a nbnxm atom cluster
+ *
+ * \note Should be aligned in memory to enable 4-wide SIMD operations.
+ */
+struct BoundingBox
+{
+ /*! \internal
+ * \brief Corner for the bounding box, padded with one element to enable 4-wide SIMD operations
+ */
+ struct Corner
+ {
+ //! Returns a corner with the minimum coordinates along each dimension
+ static Corner min(const Corner &c1,
+ const Corner &c2)
+ {
+ Corner cMin;
-// TODO: Convert macros to constexpr int and replace BB_? indexing by struct with x,y,z
+ cMin.x = std::min(c1.x, c2.x);
+ cMin.y = std::min(c1.y, c2.y);
+ cMin.z = std::min(c1.z, c2.z);
+ /* This value of the padding is irrelevant, as long as it
+ * is initialized. We use min to allow auto-vectorization.
+ */
+ cMin.padding = std::min(c1.padding, c2.padding);
-/* Pair search box lower and upper corner in x,y,z.
- * Store this in 4 iso 3 reals, which is useful with 4-wide SIMD.
- * To avoid complicating the code we also use 4 without 4-wide SIMD.
- */
-#define NNBSBB_C 4
-/* Pair search box lower and upper bound in z only. */
-#define NNBSBB_D 2
-/* Pair search box lower and upper corner x,y,z indices, entry 3 is unused */
-#define BB_X 0
-#define BB_Y 1
-#define BB_Z 2
+ return cMin;
+ }
-#endif // !DOXYGEN
+ //! Returns a corner with the maximum coordinates along each dimension
+ static Corner max(const Corner &c1,
+ const Corner &c2)
+ {
+ Corner cMax;
+ cMax.x = std::max(c1.x, c2.x);
+ cMax.y = std::max(c1.y, c2.y);
+ cMax.z = std::max(c1.z, c2.z);
+ cMax.padding = std::max(c1.padding, c2.padding);
-/* Bounding box for a nbnxn atom cluster */
-struct BoundingBox
-{
- float lower[NNBSBB_C];
- float upper[NNBSBB_C];
+ return cMax;
+ }
+
+ //! Returns a pointer for SIMD loading of a Corner object
+ const float *ptr() const
+ {
+ return &x;
+ }
+
+ //! Returns a pointer for SIMD storing of a Corner object
+ float *ptr()
+ {
+ return &x;
+ }
+
+ float x; //!< x coordinate
+ float y; //!< y coordinate
+ float z; //!< z coordinate
+ float padding; //!< padding, unused, but should be set to avoid operations on unitialized data
+ };
+
+ Corner lower; //!< lower, along x and y and z, corner
+ Corner upper; //!< upper, along x and y and z, corner
};
// TODO: Convert macros to constexpr int
+/* Pair search box lower and upper bound in z only. */
+#define NNBSBB_D 2
+
/* Bounding box calculations are (currently) always in single precision, so
* we only need to check for single precision support here.
* This uses less (cache-)memory and SIMD is faster, at least on x86.
/* Store bounding boxes corners as quadruplets: xxxxyyyyzzzz */
# define NBNXN_BBXXXX 1
-/* Size of bounding box corners quadruplet */
-# define NNBSBB_XXXX (NNBSBB_D*DIM*STRIDE_PBB)
+/* Size of a quadruplet of bounding boxes, each 2 corners, stored packed */
+# define NNBSBB_XXXX (2*DIM*STRIDE_PBB)
#else /* NBNXN_SEARCH_BB_SIMD4 */
d2 = 0;
- dl = bx0 - bb->upper[BB_X];
- dh = bb->lower[BB_X] - bx1;
+ dl = bx0 - bb->upper.x;
+ dh = bb->lower.x - bx1;
dm = std::max(dl, dh);
dm0 = std::max(dm, 0.0f);
d2 += dm0*dm0;
- dl = by0 - bb->upper[BB_Y];
- dh = bb->lower[BB_Y] - by1;
+ dl = by0 - bb->upper.y;
+ dh = bb->lower.y - by1;
dm = std::max(dl, dh);
dm0 = std::max(dm, 0.0f);
d2 += dm0*dm0;
- dl = bz0 - bb->upper[BB_Z];
- dh = bb->lower[BB_Z] - bz1;
+ dl = bz0 - bb->upper.z;
+ dh = bb->lower.z - bz1;
dm = std::max(dl, dh);
dm0 = std::max(dm, 0.0f);
d2 += dm0*dm0;
int csj,
gmx::ArrayRef<const nbnxn_bb_t> bb_j_all)
{
- const nbnxn_bb_t *bb_i = bb_i_ci + si;
- const nbnxn_bb_t *bb_j = bb_j_all.data() + csj;
+ const nbnxn_bb_t &bb_i = bb_i_ci[si];
+ const nbnxn_bb_t &bb_j = bb_j_all[csj];
float d2 = 0;
float dl, dh, dm, dm0;
- dl = bb_i->lower[BB_X] - bb_j->upper[BB_X];
- dh = bb_j->lower[BB_X] - bb_i->upper[BB_X];
+ dl = bb_i.lower.x - bb_j.upper.x;
+ dh = bb_j.lower.x - bb_i.upper.x;
dm = std::max(dl, dh);
dm0 = std::max(dm, 0.0f);
d2 += dm0*dm0;
- dl = bb_i->lower[BB_Y] - bb_j->upper[BB_Y];
- dh = bb_j->lower[BB_Y] - bb_i->upper[BB_Y];
+ dl = bb_i.lower.y - bb_j.upper.y;
+ dh = bb_j.lower.y - bb_i.upper.y;
dm = std::max(dl, dh);
dm0 = std::max(dm, 0.0f);
d2 += dm0*dm0;
- dl = bb_i->lower[BB_Z] - bb_j->upper[BB_Z];
- dh = bb_j->lower[BB_Z] - bb_i->upper[BB_Z];
+ dl = bb_i.lower.z - bb_j.upper.z;
+ dh = bb_j.lower.z - bb_i.upper.z;
dm = std::max(dl, dh);
dm0 = std::max(dm, 0.0f);
d2 += dm0*dm0;
Simd4Float dm_S;
Simd4Float dm0_S;
- bb_i_S0 = load4(&bb_i_ci[si].lower[0]);
- bb_i_S1 = load4(&bb_i_ci[si].upper[0]);
- bb_j_S0 = load4(&bb_j_all[csj].lower[0]);
- bb_j_S1 = load4(&bb_j_all[csj].upper[0]);
+ bb_i_S0 = load4(bb_i_ci[si].lower.ptr());
+ bb_i_S1 = load4(bb_i_ci[si].upper.ptr());
+ bb_j_S0 = load4(bb_j_all[csj].lower.ptr());
+ bb_j_S1 = load4(bb_j_all[csj].upper.ptr());
dl_S = bb_i_S0 - bb_j_S1;
dh_S = bb_j_S0 - bb_i_S1;
real shx, real shy, real shz,
nbnxn_bb_t *bb_ci)
{
- bb_ci->lower[BB_X] = bb[ci].lower[BB_X] + shx;
- bb_ci->lower[BB_Y] = bb[ci].lower[BB_Y] + shy;
- bb_ci->lower[BB_Z] = bb[ci].lower[BB_Z] + shz;
- bb_ci->upper[BB_X] = bb[ci].upper[BB_X] + shx;
- bb_ci->upper[BB_Y] = bb[ci].upper[BB_Y] + shy;
- bb_ci->upper[BB_Z] = bb[ci].upper[BB_Z] + shz;
+ bb_ci->lower.x = bb[ci].lower.x + shx;
+ bb_ci->lower.y = bb[ci].lower.y + shy;
+ bb_ci->lower.z = bb[ci].lower.z + shz;
+ bb_ci->upper.x = bb[ci].upper.x + shx;
+ bb_ci->upper.y = bb[ci].upper.y + shy;
+ bb_ci->upper.z = bb[ci].upper.z + shz;
}
/* Sets a simple list i-cell bounding box, including PBC shift */
{
if (bSimple)
{
- bx1 = bb_i[ci].upper[BB_X];
+ bx1 = bb_i[ci].upper.x;
}
else
{
if (bSimple)
{
- by0 = bb_i[ci].lower[BB_Y] + shy;
- by1 = bb_i[ci].upper[BB_Y] + shy;
+ by0 = bb_i[ci].lower.y + shy;
+ by1 = bb_i[ci].upper.y + shy;
}
else
{
if (bSimple)
{
- bx0 = bb_i[ci].lower[BB_X] + shx;
- bx1 = bb_i[ci].upper[BB_X] + shx;
+ bx0 = bb_i[ci].lower.x + shx;
+ bx1 = bb_i[ci].upper.x + shx;
}
else
{