*/
/* Stride of the packed x coordinate array */
-static constexpr int c_xStride4xN = (GMX_SIMD_REAL_WIDTH > c_nbnxnCpuIClusterSize ? GMX_SIMD_REAL_WIDTH : c_nbnxnCpuIClusterSize);
+static constexpr int c_xStride4xN =
+ (GMX_SIMD_REAL_WIDTH > c_nbnxnCpuIClusterSize ? GMX_SIMD_REAL_WIDTH : c_nbnxnCpuIClusterSize);
/* Copies PBC shifted i-cell packed atom coordinates to working array */
-static inline void
-icell_set_x_simd_4xn(int ci,
- real shx, real shy, real shz,
- int gmx_unused stride, const real *x,
- NbnxnPairlistCpuWork *work)
+static inline void icell_set_x_simd_4xn(int ci,
+ real shx,
+ real shy,
+ real shz,
+ int gmx_unused stride,
+ const real* x,
+ NbnxnPairlistCpuWork* work)
{
- int ia;
- real *x_ci_simd = work->iClusterData.xSimd.data();
+ int ia;
+ real* x_ci_simd = work->iClusterData.xSimd.data();
ia = xIndexFromCi<NbnxnLayout::Simd4xN>(ci);
- store(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0*c_xStride4xN ] + shx) );
- store(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1*c_xStride4xN ] + shy) );
- store(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2*c_xStride4xN ] + shz) );
- store(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0*c_xStride4xN + 1] + shx) );
- store(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1*c_xStride4xN + 1] + shy) );
- store(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2*c_xStride4xN + 1] + shz) );
- store(x_ci_simd + 6*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0*c_xStride4xN + 2] + shx) );
- store(x_ci_simd + 7*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1*c_xStride4xN + 2] + shy) );
- store(x_ci_simd + 8*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2*c_xStride4xN + 2] + shz) );
- store(x_ci_simd + 9*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0*c_xStride4xN + 3] + shx) );
- store(x_ci_simd + 10*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1*c_xStride4xN + 3] + shy) );
- store(x_ci_simd + 11*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2*c_xStride4xN + 3] + shz) );
+ store(x_ci_simd + 0 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0 * c_xStride4xN] + shx));
+ store(x_ci_simd + 1 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1 * c_xStride4xN] + shy));
+ store(x_ci_simd + 2 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2 * c_xStride4xN] + shz));
+ store(x_ci_simd + 3 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0 * c_xStride4xN + 1] + shx));
+ store(x_ci_simd + 4 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1 * c_xStride4xN + 1] + shy));
+ store(x_ci_simd + 5 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2 * c_xStride4xN + 1] + shz));
+ store(x_ci_simd + 6 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0 * c_xStride4xN + 2] + shx));
+ store(x_ci_simd + 7 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1 * c_xStride4xN + 2] + shy));
+ store(x_ci_simd + 8 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2 * c_xStride4xN + 2] + shz));
+ store(x_ci_simd + 9 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0 * c_xStride4xN + 3] + shx));
+ store(x_ci_simd + 10 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1 * c_xStride4xN + 3] + shy));
+ store(x_ci_simd + 11 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2 * c_xStride4xN + 3] + shz));
}
/* SIMD code for checking and adding cluster-pairs to the list using coordinates in packed format.
* \param[in] rbb2 The squared cut-off for putting cluster-pairs in the list based on bounding box distance only
* \param[in,out] numDistanceChecks The number of distance checks performed
*/
-static inline void
-makeClusterListSimd4xn(const Grid &jGrid,
- NbnxnPairlistCpu * nbl,
- int icluster,
- int firstCell,
- int lastCell,
- bool excludeSubDiagonal,
- const real * gmx_restrict x_j,
- real rlist2,
- float rbb2,
- int * gmx_restrict numDistanceChecks)
+static inline void makeClusterListSimd4xn(const Grid& jGrid,
+ NbnxnPairlistCpu* nbl,
+ int icluster,
+ int firstCell,
+ int lastCell,
+ bool excludeSubDiagonal,
+ const real* gmx_restrict x_j,
+ real rlist2,
+ float rbb2,
+ int* gmx_restrict numDistanceChecks)
{
using namespace gmx;
- const real * gmx_restrict x_ci_simd = nbl->work->iClusterData.xSimd.data();
- const BoundingBox * gmx_restrict bb_ci = nbl->work->iClusterData.bb.data();
+ const real* gmx_restrict x_ci_simd = nbl->work->iClusterData.xSimd.data();
+ const BoundingBox* gmx_restrict bb_ci = nbl->work->iClusterData.bb.data();
- SimdReal jx_S, jy_S, jz_S;
+ SimdReal jx_S, jy_S, jz_S;
- SimdReal dx_S0, dy_S0, dz_S0;
- SimdReal dx_S1, dy_S1, dz_S1;
- SimdReal dx_S2, dy_S2, dz_S2;
- SimdReal dx_S3, dy_S3, dz_S3;
+ SimdReal dx_S0, dy_S0, dz_S0;
+ SimdReal dx_S1, dy_S1, dz_S1;
+ SimdReal dx_S2, dy_S2, dz_S2;
+ SimdReal dx_S3, dy_S3, dz_S3;
- SimdReal rsq_S0;
- SimdReal rsq_S1;
- SimdReal rsq_S2;
- SimdReal rsq_S3;
+ SimdReal rsq_S0;
+ SimdReal rsq_S1;
+ SimdReal rsq_S2;
+ SimdReal rsq_S3;
- SimdBool wco_S0;
- SimdBool wco_S1;
- SimdBool wco_S2;
- SimdBool wco_S3;
- SimdBool wco_any_S01, wco_any_S23, wco_any_S;
+ SimdBool wco_S0;
+ SimdBool wco_S1;
+ SimdBool wco_S2;
+ SimdBool wco_S3;
+ SimdBool wco_any_S01, wco_any_S23, wco_any_S;
- SimdReal rc2_S;
+ SimdReal rc2_S;
- gmx_bool InRange;
- float d2;
- int xind_f, xind_l;
+ gmx_bool InRange;
+ float d2;
+ int xind_f, xind_l;
/* Convert the j-range from i-cluster size indexing to j-cluster indexing */
int jclusterFirst = cjFromCi<NbnxnLayout::Simd4xN, 0>(firstCell);
int jclusterLast = cjFromCi<NbnxnLayout::Simd4xN, 1>(lastCell);
- GMX_ASSERT(jclusterLast >= jclusterFirst, "We should have a non-empty j-cluster range, since the calling code should have ensured a non-empty cell range");
+ GMX_ASSERT(jclusterLast >= jclusterFirst,
+ "We should have a non-empty j-cluster range, since the calling code should have "
+ "ensured a non-empty cell range");
- rc2_S = SimdReal(rlist2);
+ rc2_S = SimdReal(rlist2);
InRange = FALSE;
while (!InRange && jclusterFirst <= jclusterLast)
{
- d2 = clusterBoundingBoxDistance2(bb_ci[0], jGrid.jBoundingBoxes()[jclusterFirst]);
+ d2 = clusterBoundingBoxDistance2(bb_ci[0], jGrid.jBoundingBoxes()[jclusterFirst]);
*numDistanceChecks += 2;
/* Check if the distance is within the distance where
}
else if (d2 < rlist2)
{
- xind_f = xIndexFromCj<NbnxnLayout::Simd4xN>(cjFromCi<NbnxnLayout::Simd4xN, 0>(jGrid.cellOffset()) + jclusterFirst);
+ xind_f = xIndexFromCj<NbnxnLayout::Simd4xN>(
+ cjFromCi<NbnxnLayout::Simd4xN, 0>(jGrid.cellOffset()) + jclusterFirst);
- jx_S = load<SimdReal>(x_j + xind_f + 0*c_xStride4xN);
- jy_S = load<SimdReal>(x_j + xind_f + 1*c_xStride4xN);
- jz_S = load<SimdReal>(x_j + xind_f + 2*c_xStride4xN);
+ jx_S = load<SimdReal>(x_j + xind_f + 0 * c_xStride4xN);
+ jy_S = load<SimdReal>(x_j + xind_f + 1 * c_xStride4xN);
+ jz_S = load<SimdReal>(x_j + xind_f + 2 * c_xStride4xN);
/* Calculate distance */
- dx_S0 = load<SimdReal>(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S0 = load<SimdReal>(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S0 = load<SimdReal>(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH) - jz_S;
- dx_S1 = load<SimdReal>(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S1 = load<SimdReal>(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S1 = load<SimdReal>(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH) - jz_S;
- dx_S2 = load<SimdReal>(x_ci_simd + 6*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S2 = load<SimdReal>(x_ci_simd + 7*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S2 = load<SimdReal>(x_ci_simd + 8*GMX_SIMD_REAL_WIDTH) - jz_S;
- dx_S3 = load<SimdReal>(x_ci_simd + 9*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S3 = load<SimdReal>(x_ci_simd + 10*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S3 = load<SimdReal>(x_ci_simd + 11*GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S0 = load<SimdReal>(x_ci_simd + 0 * GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S0 = load<SimdReal>(x_ci_simd + 1 * GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S0 = load<SimdReal>(x_ci_simd + 2 * GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S1 = load<SimdReal>(x_ci_simd + 3 * GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S1 = load<SimdReal>(x_ci_simd + 4 * GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S1 = load<SimdReal>(x_ci_simd + 5 * GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S2 = load<SimdReal>(x_ci_simd + 6 * GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S2 = load<SimdReal>(x_ci_simd + 7 * GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S2 = load<SimdReal>(x_ci_simd + 8 * GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S3 = load<SimdReal>(x_ci_simd + 9 * GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S3 = load<SimdReal>(x_ci_simd + 10 * GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S3 = load<SimdReal>(x_ci_simd + 11 * GMX_SIMD_REAL_WIDTH) - jz_S;
/* rsq = dx*dx+dy*dy+dz*dz */
- rsq_S0 = norm2(dx_S0, dy_S0, dz_S0);
- rsq_S1 = norm2(dx_S1, dy_S1, dz_S1);
- rsq_S2 = norm2(dx_S2, dy_S2, dz_S2);
- rsq_S3 = norm2(dx_S3, dy_S3, dz_S3);
+ rsq_S0 = norm2(dx_S0, dy_S0, dz_S0);
+ rsq_S1 = norm2(dx_S1, dy_S1, dz_S1);
+ rsq_S2 = norm2(dx_S2, dy_S2, dz_S2);
+ rsq_S3 = norm2(dx_S3, dy_S3, dz_S3);
- wco_S0 = (rsq_S0 < rc2_S);
- wco_S1 = (rsq_S1 < rc2_S);
- wco_S2 = (rsq_S2 < rc2_S);
- wco_S3 = (rsq_S3 < rc2_S);
+ wco_S0 = (rsq_S0 < rc2_S);
+ wco_S1 = (rsq_S1 < rc2_S);
+ wco_S2 = (rsq_S2 < rc2_S);
+ wco_S3 = (rsq_S3 < rc2_S);
- wco_any_S01 = wco_S0 || wco_S1;
- wco_any_S23 = wco_S2 || wco_S3;
- wco_any_S = wco_any_S01 || wco_any_S23;
+ wco_any_S01 = wco_S0 || wco_S1;
+ wco_any_S23 = wco_S2 || wco_S3;
+ wco_any_S = wco_any_S01 || wco_any_S23;
- InRange = anyTrue(wco_any_S);
+ InRange = anyTrue(wco_any_S);
- *numDistanceChecks += 4*GMX_SIMD_REAL_WIDTH;
+ *numDistanceChecks += 4 * GMX_SIMD_REAL_WIDTH;
}
if (!InRange)
{
InRange = FALSE;
while (!InRange && jclusterLast > jclusterFirst)
{
- d2 = clusterBoundingBoxDistance2(bb_ci[0], jGrid.jBoundingBoxes()[jclusterLast]);
+ d2 = clusterBoundingBoxDistance2(bb_ci[0], jGrid.jBoundingBoxes()[jclusterLast]);
*numDistanceChecks += 2;
/* Check if the distance is within the distance where
}
else if (d2 < rlist2)
{
- xind_l = xIndexFromCj<NbnxnLayout::Simd4xN>(cjFromCi<NbnxnLayout::Simd4xN, 0>(jGrid.cellOffset()) + jclusterLast);
+ xind_l = xIndexFromCj<NbnxnLayout::Simd4xN>(
+ cjFromCi<NbnxnLayout::Simd4xN, 0>(jGrid.cellOffset()) + jclusterLast);
- jx_S = load<SimdReal>(x_j +xind_l + 0*c_xStride4xN);
- jy_S = load<SimdReal>(x_j +xind_l + 1*c_xStride4xN);
- jz_S = load<SimdReal>(x_j +xind_l + 2*c_xStride4xN);
+ jx_S = load<SimdReal>(x_j + xind_l + 0 * c_xStride4xN);
+ jy_S = load<SimdReal>(x_j + xind_l + 1 * c_xStride4xN);
+ jz_S = load<SimdReal>(x_j + xind_l + 2 * c_xStride4xN);
/* Calculate distance */
- dx_S0 = load<SimdReal>(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S0 = load<SimdReal>(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S0 = load<SimdReal>(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH) - jz_S;
- dx_S1 = load<SimdReal>(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S1 = load<SimdReal>(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S1 = load<SimdReal>(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH) - jz_S;
- dx_S2 = load<SimdReal>(x_ci_simd + 6*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S2 = load<SimdReal>(x_ci_simd + 7*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S2 = load<SimdReal>(x_ci_simd + 8*GMX_SIMD_REAL_WIDTH) - jz_S;
- dx_S3 = load<SimdReal>(x_ci_simd + 9*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S3 = load<SimdReal>(x_ci_simd + 10*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S3 = load<SimdReal>(x_ci_simd + 11*GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S0 = load<SimdReal>(x_ci_simd + 0 * GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S0 = load<SimdReal>(x_ci_simd + 1 * GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S0 = load<SimdReal>(x_ci_simd + 2 * GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S1 = load<SimdReal>(x_ci_simd + 3 * GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S1 = load<SimdReal>(x_ci_simd + 4 * GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S1 = load<SimdReal>(x_ci_simd + 5 * GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S2 = load<SimdReal>(x_ci_simd + 6 * GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S2 = load<SimdReal>(x_ci_simd + 7 * GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S2 = load<SimdReal>(x_ci_simd + 8 * GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S3 = load<SimdReal>(x_ci_simd + 9 * GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S3 = load<SimdReal>(x_ci_simd + 10 * GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S3 = load<SimdReal>(x_ci_simd + 11 * GMX_SIMD_REAL_WIDTH) - jz_S;
/* rsq = dx*dx+dy*dy+dz*dz */
- rsq_S0 = norm2(dx_S0, dy_S0, dz_S0);
- rsq_S1 = norm2(dx_S1, dy_S1, dz_S1);
- rsq_S2 = norm2(dx_S2, dy_S2, dz_S2);
- rsq_S3 = norm2(dx_S3, dy_S3, dz_S3);
+ rsq_S0 = norm2(dx_S0, dy_S0, dz_S0);
+ rsq_S1 = norm2(dx_S1, dy_S1, dz_S1);
+ rsq_S2 = norm2(dx_S2, dy_S2, dz_S2);
+ rsq_S3 = norm2(dx_S3, dy_S3, dz_S3);
- wco_S0 = (rsq_S0 < rc2_S);
- wco_S1 = (rsq_S1 < rc2_S);
- wco_S2 = (rsq_S2 < rc2_S);
- wco_S3 = (rsq_S3 < rc2_S);
+ wco_S0 = (rsq_S0 < rc2_S);
+ wco_S1 = (rsq_S1 < rc2_S);
+ wco_S2 = (rsq_S2 < rc2_S);
+ wco_S3 = (rsq_S3 < rc2_S);
- wco_any_S01 = wco_S0 || wco_S1;
- wco_any_S23 = wco_S2 || wco_S3;
- wco_any_S = wco_any_S01 || wco_any_S23;
+ wco_any_S01 = wco_S0 || wco_S1;
+ wco_any_S23 = wco_S2 || wco_S3;
+ wco_any_S = wco_any_S01 || wco_any_S23;
- InRange = anyTrue(wco_any_S);
+ InRange = anyTrue(wco_any_S);
- *numDistanceChecks += 4*GMX_SIMD_REAL_WIDTH;
+ *numDistanceChecks += 4 * GMX_SIMD_REAL_WIDTH;
}
if (!InRange)
{