-#define SUBC_BB_DIST2_SIMD4_XXXX_INNER(si, bb_i, d2) \
- { \
- int shi; \
- \
- Simd4Float dx_0, dy_0, dz_0; \
- Simd4Float dx_1, dy_1, dz_1; \
- \
- Simd4Float mx, my, mz; \
- Simd4Float m0x, m0y, m0z; \
- \
- Simd4Float d2x, d2y, d2z; \
- Simd4Float d2s, d2t; \
- \
- shi = (si)*Nbnxm::c_numBoundingBoxBounds1D*DIM; \
- \
- xi_l = load4((bb_i)+shi+0*STRIDE_PBB); \
- yi_l = load4((bb_i)+shi+1*STRIDE_PBB); \
- zi_l = load4((bb_i)+shi+2*STRIDE_PBB); \
- xi_h = load4((bb_i)+shi+3*STRIDE_PBB); \
- yi_h = load4((bb_i)+shi+4*STRIDE_PBB); \
- zi_h = load4((bb_i)+shi+5*STRIDE_PBB); \
- \
- dx_0 = xi_l - xj_h; \
- dy_0 = yi_l - yj_h; \
- dz_0 = zi_l - zj_h; \
- \
- dx_1 = xj_l - xi_h; \
- dy_1 = yj_l - yi_h; \
- dz_1 = zj_l - zi_h; \
- \
- mx = max(dx_0, dx_1); \
- my = max(dy_0, dy_1); \
- mz = max(dz_0, dz_1); \
- \
- m0x = max(mx, zero); \
- m0y = max(my, zero); \
- m0z = max(mz, zero); \
- \
- d2x = m0x * m0x; \
- d2y = m0y * m0y; \
- d2z = m0z * m0z; \
- \
- d2s = d2x + d2y; \
- d2t = d2s + d2z; \
- \
- store4((d2)+(si), d2t); \
- }
+template <int boundingBoxStart>
+static inline void gmx_simdcall
+clusterBoundingBoxDistance2_xxxx_simd4_inner(const float *bb_i,
+ float *d2,
+ const Simd4Float xj_l,
+ const Simd4Float yj_l,
+ const Simd4Float zj_l,
+ const Simd4Float xj_h,
+ const Simd4Float yj_h,
+ const Simd4Float zj_h)
+{
+ const int shi = boundingBoxStart*Nbnxm::c_numBoundingBoxBounds1D*DIM;
+
+ const Simd4Float zero = setZero();
+
+ const Simd4Float xi_l = load4(bb_i + shi + 0*STRIDE_PBB);
+ const Simd4Float yi_l = load4(bb_i + shi + 1*STRIDE_PBB);
+ const Simd4Float zi_l = load4(bb_i + shi + 2*STRIDE_PBB);
+ const Simd4Float xi_h = load4(bb_i + shi + 3*STRIDE_PBB);
+ const Simd4Float yi_h = load4(bb_i + shi + 4*STRIDE_PBB);
+ const Simd4Float zi_h = load4(bb_i + shi + 5*STRIDE_PBB);
+
+ const Simd4Float dx_0 = xi_l - xj_h;
+ const Simd4Float dy_0 = yi_l - yj_h;
+ const Simd4Float dz_0 = zi_l - zj_h;
+
+ const Simd4Float dx_1 = xj_l - xi_h;
+ const Simd4Float dy_1 = yj_l - yi_h;
+ const Simd4Float dz_1 = zj_l - zi_h;
+
+ const Simd4Float mx = max(dx_0, dx_1);
+ const Simd4Float my = max(dy_0, dy_1);
+ const Simd4Float mz = max(dz_0, dz_1);
+
+ const Simd4Float m0x = max(mx, zero);
+ const Simd4Float m0y = max(my, zero);
+ const Simd4Float m0z = max(mz, zero);
+
+ const Simd4Float d2x = m0x * m0x;
+ const Simd4Float d2y = m0y * m0y;
+ const Simd4Float d2z = m0z * m0z;
+
+ const Simd4Float d2s = d2x + d2y;
+ const Simd4Float d2t = d2s + d2z;
+
+ store4(d2 + boundingBoxStart, d2t);
+}