ia = X_IND_CI_SIMD_2XNN(ci);
- x_ci->ix_SSE0 = gmx_set_2real_shift_pr(x + ia + 0*STRIDE_S + 0, shx);
- x_ci->iy_SSE0 = gmx_set_2real_shift_pr(x + ia + 1*STRIDE_S + 0, shy);
- x_ci->iz_SSE0 = gmx_set_2real_shift_pr(x + ia + 2*STRIDE_S + 0, shz);
- x_ci->ix_SSE2 = gmx_set_2real_shift_pr(x + ia + 0*STRIDE_S + 2, shx);
- x_ci->iy_SSE2 = gmx_set_2real_shift_pr(x + ia + 1*STRIDE_S + 2, shy);
- x_ci->iz_SSE2 = gmx_set_2real_shift_pr(x + ia + 2*STRIDE_S + 2, shz);
+ x_ci->ix_S0 = gmx_set_2real_shift_pr(x + ia + 0*STRIDE_S + 0, shx);
+ x_ci->iy_S0 = gmx_set_2real_shift_pr(x + ia + 1*STRIDE_S + 0, shy);
+ x_ci->iz_S0 = gmx_set_2real_shift_pr(x + ia + 2*STRIDE_S + 0, shz);
+ x_ci->ix_S2 = gmx_set_2real_shift_pr(x + ia + 0*STRIDE_S + 2, shx);
+ x_ci->iy_S2 = gmx_set_2real_shift_pr(x + ia + 1*STRIDE_S + 2, shy);
+ x_ci->iz_S2 = gmx_set_2real_shift_pr(x + ia + 2*STRIDE_S + 2, shz);
}
-#ifndef GMX_SIMD_HAVE_ANYTRUE
-/* Fallback function in case gmx_anytrue_pr is not present */
-static gmx_inline gmx_bool
-gmx_anytrue_2xn_pb(gmx_mm_pb bool_S)
-{
- real bools_array[2*GMX_SIMD_WIDTH_HERE], *bools;
- gmx_bool any;
- int s;
-
- bools = gmx_simd_align_real(bools_array);
-
- gmx_store_pb(bools, bool_S);
-
- any = FALSE;
- for (s = 0; s < GMX_SIMD_WIDTH_HERE; s++)
- {
- if (GMX_SIMD_IS_TRUE(s))
- {
- any = TRUE;
- }
- }
-
- return any;
-}
-#endif
-
/* SIMD code for making a pair list of cell ci vs cell cjf-cjl
* for coordinates in packed format.
* Checks bouding box distances and possibly atom pair distances.
const nbnxn_x_ci_simd_2xnn_t *work;
const nbnxn_bb_t *bb_ci;
- gmx_mm_pr jx_SSE, jy_SSE, jz_SSE;
+ gmx_mm_pr jx_S, jy_S, jz_S;
- gmx_mm_pr dx_SSE0, dy_SSE0, dz_SSE0;
- gmx_mm_pr dx_SSE2, dy_SSE2, dz_SSE2;
+ gmx_mm_pr dx_S0, dy_S0, dz_S0;
+ gmx_mm_pr dx_S2, dy_S2, dz_S2;
- gmx_mm_pr rsq_SSE0;
- gmx_mm_pr rsq_SSE2;
+ gmx_mm_pr rsq_S0;
+ gmx_mm_pr rsq_S2;
- gmx_mm_pb wco_SSE0;
- gmx_mm_pb wco_SSE2;
- gmx_mm_pb wco_any_SSE;
+ gmx_mm_pb wco_S0;
+ gmx_mm_pb wco_S2;
+ gmx_mm_pb wco_any_S;
- gmx_mm_pr rc2_SSE;
+ gmx_mm_pr rc2_S;
gmx_bool InRange;
float d2;
bb_ci = nbl->work->bb_ci;
- rc2_SSE = gmx_set1_pr(rl2);
+ rc2_S = gmx_set1_pr(rl2);
InRange = FALSE;
while (!InRange && cjf <= cjl)
{
-#ifdef NBNXN_SEARCH_BB_SSE
- d2 = subc_bb_dist2_sse(0, bb_ci, cjf, gridj->bbj);
+#ifdef NBNXN_SEARCH_BB_SIMD4
+ d2 = subc_bb_dist2_simd4(0, bb_ci, cjf, gridj->bbj);
#else
d2 = subc_bb_dist2(0, bb_ci, cjf, gridj->bbj);
#endif
{
xind_f = X_IND_CJ_SIMD_2XNN(CI_TO_CJ_SIMD_2XNN(gridj->cell0) + cjf);
- jx_SSE = gmx_load_hpr_hilo_pr(x_j+xind_f+0*STRIDE_S);
- jy_SSE = gmx_load_hpr_hilo_pr(x_j+xind_f+1*STRIDE_S);
- jz_SSE = gmx_load_hpr_hilo_pr(x_j+xind_f+2*STRIDE_S);
+ jx_S = gmx_load_hpr_hilo_pr(x_j+xind_f+0*STRIDE_S);
+ jy_S = gmx_load_hpr_hilo_pr(x_j+xind_f+1*STRIDE_S);
+ jz_S = gmx_load_hpr_hilo_pr(x_j+xind_f+2*STRIDE_S);
/* Calculate distance */
- dx_SSE0 = gmx_sub_pr(work->ix_SSE0, jx_SSE);
- dy_SSE0 = gmx_sub_pr(work->iy_SSE0, jy_SSE);
- dz_SSE0 = gmx_sub_pr(work->iz_SSE0, jz_SSE);
- dx_SSE2 = gmx_sub_pr(work->ix_SSE2, jx_SSE);
- dy_SSE2 = gmx_sub_pr(work->iy_SSE2, jy_SSE);
- dz_SSE2 = gmx_sub_pr(work->iz_SSE2, jz_SSE);
+ dx_S0 = gmx_sub_pr(work->ix_S0, jx_S);
+ dy_S0 = gmx_sub_pr(work->iy_S0, jy_S);
+ dz_S0 = gmx_sub_pr(work->iz_S0, jz_S);
+ dx_S2 = gmx_sub_pr(work->ix_S2, jx_S);
+ dy_S2 = gmx_sub_pr(work->iy_S2, jy_S);
+ dz_S2 = gmx_sub_pr(work->iz_S2, jz_S);
/* rsq = dx*dx+dy*dy+dz*dz */
- rsq_SSE0 = gmx_calc_rsq_pr(dx_SSE0, dy_SSE0, dz_SSE0);
- rsq_SSE2 = gmx_calc_rsq_pr(dx_SSE2, dy_SSE2, dz_SSE2);
+ rsq_S0 = gmx_calc_rsq_pr(dx_S0, dy_S0, dz_S0);
+ rsq_S2 = gmx_calc_rsq_pr(dx_S2, dy_S2, dz_S2);
- wco_SSE0 = gmx_cmplt_pr(rsq_SSE0, rc2_SSE);
- wco_SSE2 = gmx_cmplt_pr(rsq_SSE2, rc2_SSE);
+ wco_S0 = gmx_cmplt_pr(rsq_S0, rc2_S);
+ wco_S2 = gmx_cmplt_pr(rsq_S2, rc2_S);
- wco_any_SSE = gmx_or_pb(wco_SSE0, wco_SSE2);
+ wco_any_S = gmx_or_pb(wco_S0, wco_S2);
-#ifdef GMX_SIMD_HAVE_ANYTRUE
- InRange = gmx_anytrue_pb(wco_any_SSE);
-#else
- InRange = gmx_anytrue_2xn_pb(wco_any_SSE);
-#endif
+ InRange = gmx_anytrue_pb(wco_any_S);
*ndistc += 2*GMX_SIMD_WIDTH_HERE;
}
InRange = FALSE;
while (!InRange && cjl > cjf)
{
-#ifdef NBNXN_SEARCH_BB_SSE
- d2 = subc_bb_dist2_sse(0, bb_ci, cjl, gridj->bbj);
+#ifdef NBNXN_SEARCH_BB_SIMD4
+ d2 = subc_bb_dist2_simd4(0, bb_ci, cjl, gridj->bbj);
#else
d2 = subc_bb_dist2(0, bb_ci, cjl, gridj->bbj);
#endif
{
xind_l = X_IND_CJ_SIMD_2XNN(CI_TO_CJ_SIMD_2XNN(gridj->cell0) + cjl);
- jx_SSE = gmx_load_hpr_hilo_pr(x_j+xind_l+0*STRIDE_S);
- jy_SSE = gmx_load_hpr_hilo_pr(x_j+xind_l+1*STRIDE_S);
- jz_SSE = gmx_load_hpr_hilo_pr(x_j+xind_l+2*STRIDE_S);
+ jx_S = gmx_load_hpr_hilo_pr(x_j+xind_l+0*STRIDE_S);
+ jy_S = gmx_load_hpr_hilo_pr(x_j+xind_l+1*STRIDE_S);
+ jz_S = gmx_load_hpr_hilo_pr(x_j+xind_l+2*STRIDE_S);
/* Calculate distance */
- dx_SSE0 = gmx_sub_pr(work->ix_SSE0, jx_SSE);
- dy_SSE0 = gmx_sub_pr(work->iy_SSE0, jy_SSE);
- dz_SSE0 = gmx_sub_pr(work->iz_SSE0, jz_SSE);
- dx_SSE2 = gmx_sub_pr(work->ix_SSE2, jx_SSE);
- dy_SSE2 = gmx_sub_pr(work->iy_SSE2, jy_SSE);
- dz_SSE2 = gmx_sub_pr(work->iz_SSE2, jz_SSE);
+ dx_S0 = gmx_sub_pr(work->ix_S0, jx_S);
+ dy_S0 = gmx_sub_pr(work->iy_S0, jy_S);
+ dz_S0 = gmx_sub_pr(work->iz_S0, jz_S);
+ dx_S2 = gmx_sub_pr(work->ix_S2, jx_S);
+ dy_S2 = gmx_sub_pr(work->iy_S2, jy_S);
+ dz_S2 = gmx_sub_pr(work->iz_S2, jz_S);
/* rsq = dx*dx+dy*dy+dz*dz */
- rsq_SSE0 = gmx_calc_rsq_pr(dx_SSE0, dy_SSE0, dz_SSE0);
- rsq_SSE2 = gmx_calc_rsq_pr(dx_SSE2, dy_SSE2, dz_SSE2);
+ rsq_S0 = gmx_calc_rsq_pr(dx_S0, dy_S0, dz_S0);
+ rsq_S2 = gmx_calc_rsq_pr(dx_S2, dy_S2, dz_S2);
- wco_SSE0 = gmx_cmplt_pr(rsq_SSE0, rc2_SSE);
- wco_SSE2 = gmx_cmplt_pr(rsq_SSE2, rc2_SSE);
+ wco_S0 = gmx_cmplt_pr(rsq_S0, rc2_S);
+ wco_S2 = gmx_cmplt_pr(rsq_S2, rc2_S);
- wco_any_SSE = gmx_or_pb(wco_SSE0, wco_SSE2);
+ wco_any_S = gmx_or_pb(wco_S0, wco_S2);
-#ifdef GMX_SIMD_HAVE_ANYTRUE
- InRange = gmx_anytrue_pb(wco_any_SSE);
-#else
- InRange = gmx_anytrue_2xn_pb(wco_any_SSE);
-#endif
+ InRange = gmx_anytrue_pb(wco_any_S);
*ndistc += 2*GMX_SIMD_WIDTH_HERE;
}