* This code does not assume any memory alignment for the grid.
*/
{
- real fx_tmp[4], fy_tmp[4], fz_tmp[4];
-
gmx_simd4_real_t fx_S, fy_S, fz_S;
gmx_simd4_real_t tx_S, ty_S, tz_S;
}
}
- gmx_simd4_storeu_r(fx_tmp, fx_S);
- gmx_simd4_storeu_r(fy_tmp, fy_S);
- gmx_simd4_storeu_r(fz_tmp, fz_S);
-
- fx += fx_tmp[0]+fx_tmp[1]+fx_tmp[2]+fx_tmp[3];
- fy += fy_tmp[0]+fy_tmp[1]+fy_tmp[2]+fy_tmp[3];
- fz += fz_tmp[0]+fz_tmp[1]+fz_tmp[2]+fz_tmp[3];
+ fx += gmx_simd4_reduce_r(fx_S);
+ fy += gmx_simd4_reduce_r(fy_S);
+ fz += gmx_simd4_reduce_r(fz_S);
}
#undef PME_GATHER_F_SIMD4_ORDER4
#endif
{
int offset;
- real fx_tmp[4], fy_tmp[4], fz_tmp[4];
-
gmx_simd4_real_t fx_S, fy_S, fz_S;
gmx_simd4_real_t tx_S, ty_S, tz_S0, tz_S1;
}
}
- gmx_simd4_store_r(fx_tmp, fx_S);
- gmx_simd4_store_r(fy_tmp, fy_S);
- gmx_simd4_store_r(fz_tmp, fz_S);
-
- fx += fx_tmp[0]+fx_tmp[1]+fx_tmp[2]+fx_tmp[3];
- fy += fy_tmp[0]+fy_tmp[1]+fy_tmp[2]+fy_tmp[3];
- fz += fz_tmp[0]+fz_tmp[1]+fz_tmp[2]+fz_tmp[3];
+ fx += gmx_simd4_reduce_r(fx_S);
+ fy += gmx_simd4_reduce_r(fy_S);
+ fz += gmx_simd4_reduce_r(fz_S);
}
#undef PME_ORDER
#undef PME_GATHER_F_SIMD4_ALIGNED