static constexpr int c_xStride2xNN = c_nbnxnCpuIClusterSize;
/* Copies PBC shifted i-cell packed atom coordinates to working array */
-static inline void
-icell_set_x_simd_2xnn(int ci,
- real shx, real shy, real shz,
- int gmx_unused stride, const real *x,
- NbnxnPairlistCpuWork *work)
+static inline void icell_set_x_simd_2xnn(int ci,
+ real shx,
+ real shy,
+ real shz,
+ int gmx_unused stride,
+ const real* x,
+ NbnxnPairlistCpuWork* work)
{
int ia;
- real *x_ci_simd = work->iClusterData.xSimd.data();
+ real* x_ci_simd = work->iClusterData.xSimd.data();
ia = xIndexFromCi<NbnxnLayout::Simd2xNN>(ci);
- store(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH, loadU1DualHsimd(x + ia + 0*c_xStride2xNN + 0) + SimdReal(shx) );
- store(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH, loadU1DualHsimd(x + ia + 1*c_xStride2xNN + 0) + SimdReal(shy) );
- store(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH, loadU1DualHsimd(x + ia + 2*c_xStride2xNN + 0) + SimdReal(shz) );
- store(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH, loadU1DualHsimd(x + ia + 0*c_xStride2xNN + 2) + SimdReal(shx) );
- store(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH, loadU1DualHsimd(x + ia + 1*c_xStride2xNN + 2) + SimdReal(shy) );
- store(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH, loadU1DualHsimd(x + ia + 2*c_xStride2xNN + 2) + SimdReal(shz) );
+ store(x_ci_simd + 0 * GMX_SIMD_REAL_WIDTH,
+ loadU1DualHsimd(x + ia + 0 * c_xStride2xNN + 0) + SimdReal(shx));
+ store(x_ci_simd + 1 * GMX_SIMD_REAL_WIDTH,
+ loadU1DualHsimd(x + ia + 1 * c_xStride2xNN + 0) + SimdReal(shy));
+ store(x_ci_simd + 2 * GMX_SIMD_REAL_WIDTH,
+ loadU1DualHsimd(x + ia + 2 * c_xStride2xNN + 0) + SimdReal(shz));
+ store(x_ci_simd + 3 * GMX_SIMD_REAL_WIDTH,
+ loadU1DualHsimd(x + ia + 0 * c_xStride2xNN + 2) + SimdReal(shx));
+ store(x_ci_simd + 4 * GMX_SIMD_REAL_WIDTH,
+ loadU1DualHsimd(x + ia + 1 * c_xStride2xNN + 2) + SimdReal(shy));
+ store(x_ci_simd + 5 * GMX_SIMD_REAL_WIDTH,
+ loadU1DualHsimd(x + ia + 2 * c_xStride2xNN + 2) + SimdReal(shz));
}
/* SIMD code for checking and adding cluster-pairs to the list using coordinates in packed format.
* \param[in] rbb2 The squared cut-off for putting cluster-pairs in the list based on bounding box distance only
* \param[in,out] numDistanceChecks The number of distance checks performed
*/
-static inline void
-makeClusterListSimd2xnn(const Grid &jGrid,
- NbnxnPairlistCpu * nbl,
- int icluster,
- int firstCell,
- int lastCell,
- bool excludeSubDiagonal,
- const real * gmx_restrict x_j,
- real rlist2,
- float rbb2,
- int * gmx_restrict numDistanceChecks)
+static inline void makeClusterListSimd2xnn(const Grid& jGrid,
+ NbnxnPairlistCpu* nbl,
+ int icluster,
+ int firstCell,
+ int lastCell,
+ bool excludeSubDiagonal,
+ const real* gmx_restrict x_j,
+ real rlist2,
+ float rbb2,
+ int* gmx_restrict numDistanceChecks)
{
using namespace gmx;
- const real * gmx_restrict x_ci_simd = nbl->work->iClusterData.xSimd.data();
- const BoundingBox * gmx_restrict bb_ci = nbl->work->iClusterData.bb.data();
+ const real* gmx_restrict x_ci_simd = nbl->work->iClusterData.xSimd.data();
+ const BoundingBox* gmx_restrict bb_ci = nbl->work->iClusterData.bb.data();
- SimdReal jx_S, jy_S, jz_S;
+ SimdReal jx_S, jy_S, jz_S;
- SimdReal dx_S0, dy_S0, dz_S0;
- SimdReal dx_S2, dy_S2, dz_S2;
+ SimdReal dx_S0, dy_S0, dz_S0;
+ SimdReal dx_S2, dy_S2, dz_S2;
- SimdReal rsq_S0;
- SimdReal rsq_S2;
+ SimdReal rsq_S0;
+ SimdReal rsq_S2;
- SimdBool wco_S0;
- SimdBool wco_S2;
- SimdBool wco_any_S;
+ SimdBool wco_S0;
+ SimdBool wco_S2;
+ SimdBool wco_any_S;
- SimdReal rc2_S;
+ SimdReal rc2_S;
- gmx_bool InRange;
- float d2;
- int xind_f, xind_l;
+ gmx_bool InRange;
+ float d2;
+ int xind_f, xind_l;
int jclusterFirst = cjFromCi<NbnxnLayout::Simd2xNN, 0>(firstCell);
int jclusterLast = cjFromCi<NbnxnLayout::Simd2xNN, 1>(lastCell);
- GMX_ASSERT(jclusterLast >= jclusterFirst, "We should have a non-empty j-cluster range, since the calling code should have ensured a non-empty cell range");
+ GMX_ASSERT(jclusterLast >= jclusterFirst,
+ "We should have a non-empty j-cluster range, since the calling code should have "
+ "ensured a non-empty cell range");
- rc2_S = SimdReal(rlist2);
+ rc2_S = SimdReal(rlist2);
InRange = FALSE;
while (!InRange && jclusterFirst <= jclusterLast)
{
- d2 = clusterBoundingBoxDistance2(bb_ci[0], jGrid.jBoundingBoxes()[jclusterFirst]);
+ d2 = clusterBoundingBoxDistance2(bb_ci[0], jGrid.jBoundingBoxes()[jclusterFirst]);
*numDistanceChecks += 2;
/* Check if the distance is within the distance where
}
else if (d2 < rlist2)
{
- xind_f = xIndexFromCj<NbnxnLayout::Simd2xNN>(cjFromCi<NbnxnLayout::Simd2xNN, 0>(jGrid.cellOffset()) + jclusterFirst);
+ xind_f = xIndexFromCj<NbnxnLayout::Simd2xNN>(
+ cjFromCi<NbnxnLayout::Simd2xNN, 0>(jGrid.cellOffset()) + jclusterFirst);
- jx_S = loadDuplicateHsimd(x_j + xind_f + 0*c_xStride2xNN);
- jy_S = loadDuplicateHsimd(x_j + xind_f + 1*c_xStride2xNN);
- jz_S = loadDuplicateHsimd(x_j + xind_f + 2*c_xStride2xNN);
+ jx_S = loadDuplicateHsimd(x_j + xind_f + 0 * c_xStride2xNN);
+ jy_S = loadDuplicateHsimd(x_j + xind_f + 1 * c_xStride2xNN);
+ jz_S = loadDuplicateHsimd(x_j + xind_f + 2 * c_xStride2xNN);
/* Calculate distance */
- dx_S0 = load<SimdReal>(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S0 = load<SimdReal>(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S0 = load<SimdReal>(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH) - jz_S;
- dx_S2 = load<SimdReal>(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S2 = load<SimdReal>(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S2 = load<SimdReal>(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S0 = load<SimdReal>(x_ci_simd + 0 * GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S0 = load<SimdReal>(x_ci_simd + 1 * GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S0 = load<SimdReal>(x_ci_simd + 2 * GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S2 = load<SimdReal>(x_ci_simd + 3 * GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S2 = load<SimdReal>(x_ci_simd + 4 * GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S2 = load<SimdReal>(x_ci_simd + 5 * GMX_SIMD_REAL_WIDTH) - jz_S;
/* rsq = dx*dx+dy*dy+dz*dz */
- rsq_S0 = norm2(dx_S0, dy_S0, dz_S0);
- rsq_S2 = norm2(dx_S2, dy_S2, dz_S2);
+ rsq_S0 = norm2(dx_S0, dy_S0, dz_S0);
+ rsq_S2 = norm2(dx_S2, dy_S2, dz_S2);
- wco_S0 = (rsq_S0 < rc2_S);
- wco_S2 = (rsq_S2 < rc2_S);
+ wco_S0 = (rsq_S0 < rc2_S);
+ wco_S2 = (rsq_S2 < rc2_S);
- wco_any_S = wco_S0 || wco_S2;
+ wco_any_S = wco_S0 || wco_S2;
- InRange = anyTrue(wco_any_S);
+ InRange = anyTrue(wco_any_S);
- *numDistanceChecks += 2*GMX_SIMD_REAL_WIDTH;
+ *numDistanceChecks += 2 * GMX_SIMD_REAL_WIDTH;
}
if (!InRange)
{
InRange = FALSE;
while (!InRange && jclusterLast > jclusterFirst)
{
- d2 = clusterBoundingBoxDistance2(bb_ci[0], jGrid.jBoundingBoxes()[jclusterLast]);
+ d2 = clusterBoundingBoxDistance2(bb_ci[0], jGrid.jBoundingBoxes()[jclusterLast]);
*numDistanceChecks += 2;
/* Check if the distance is within the distance where
}
else if (d2 < rlist2)
{
- xind_l = xIndexFromCj<NbnxnLayout::Simd2xNN>(cjFromCi<NbnxnLayout::Simd2xNN, 0>(jGrid.cellOffset()) + jclusterLast);
+ xind_l = xIndexFromCj<NbnxnLayout::Simd2xNN>(
+ cjFromCi<NbnxnLayout::Simd2xNN, 0>(jGrid.cellOffset()) + jclusterLast);
- jx_S = loadDuplicateHsimd(x_j + xind_l + 0*c_xStride2xNN);
- jy_S = loadDuplicateHsimd(x_j + xind_l + 1*c_xStride2xNN);
- jz_S = loadDuplicateHsimd(x_j + xind_l + 2*c_xStride2xNN);
+ jx_S = loadDuplicateHsimd(x_j + xind_l + 0 * c_xStride2xNN);
+ jy_S = loadDuplicateHsimd(x_j + xind_l + 1 * c_xStride2xNN);
+ jz_S = loadDuplicateHsimd(x_j + xind_l + 2 * c_xStride2xNN);
/* Calculate distance */
- dx_S0 = load<SimdReal>(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S0 = load<SimdReal>(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S0 = load<SimdReal>(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH) - jz_S;
- dx_S2 = load<SimdReal>(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S2 = load<SimdReal>(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S2 = load<SimdReal>(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S0 = load<SimdReal>(x_ci_simd + 0 * GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S0 = load<SimdReal>(x_ci_simd + 1 * GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S0 = load<SimdReal>(x_ci_simd + 2 * GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S2 = load<SimdReal>(x_ci_simd + 3 * GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S2 = load<SimdReal>(x_ci_simd + 4 * GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S2 = load<SimdReal>(x_ci_simd + 5 * GMX_SIMD_REAL_WIDTH) - jz_S;
/* rsq = dx*dx+dy*dy+dz*dz */
- rsq_S0 = norm2(dx_S0, dy_S0, dz_S0);
- rsq_S2 = norm2(dx_S2, dy_S2, dz_S2);
+ rsq_S0 = norm2(dx_S0, dy_S0, dz_S0);
+ rsq_S2 = norm2(dx_S2, dy_S2, dz_S2);
- wco_S0 = (rsq_S0 < rc2_S);
- wco_S2 = (rsq_S2 < rc2_S);
+ wco_S0 = (rsq_S0 < rc2_S);
+ wco_S2 = (rsq_S2 < rc2_S);
- wco_any_S = wco_S0 || wco_S2;
+ wco_any_S = wco_S0 || wco_S2;
- InRange = anyTrue(wco_any_S);
+ InRange = anyTrue(wco_any_S);
- *numDistanceChecks += 2*GMX_SIMD_REAL_WIDTH;
+ *numDistanceChecks += 2 * GMX_SIMD_REAL_WIDTH;
}
if (!InRange)
{