*/
for (kx = 0; kx < end; kx += GMX_SIMD_REAL_WIDTH)
{
- tmp_d1 = load(d_aligned+kx);
- tmp_r = load(r_aligned+kx);
+ tmp_d1 = load<SimdReal>(d_aligned+kx);
+ tmp_r = load<SimdReal>(r_aligned+kx);
tmp_r = gmx::exp(tmp_r);
tmp_e = f_simd / tmp_d1;
tmp_e = tmp_e * tmp_r;
/* We only need to calculate from start. But since start is 0 or 1
* and we want to use aligned loads/stores, we always start from 0.
*/
- tmp_d = load(d_aligned+kx);
+ tmp_d = load<SimdReal>(d_aligned+kx);
d_inv = SimdReal(1.0) / tmp_d;
store(d_aligned+kx, d_inv);
- tmp_r = load(r_aligned+kx);
+ tmp_r = load<SimdReal>(r_aligned+kx);
tmp_r = gmx::exp(tmp_r);
store(r_aligned+kx, tmp_r);
- tmp_mk = load(factor_aligned+kx);
+ tmp_mk = load<SimdReal>(factor_aligned+kx);
tmp_fac = sqr_PI * tmp_mk * erfc(tmp_mk);
store(factor_aligned+kx, tmp_fac);
}
rkjy_S = yk_S - yj_S;
rkjz_S = zk_S - zj_S;
- k_S = load(coeff);
- theta0_S = load(coeff+GMX_SIMD_REAL_WIDTH) * deg2rad_S;
+ k_S = load<SimdReal>(coeff);
+ theta0_S = load<SimdReal>(coeff+GMX_SIMD_REAL_WIDTH) * deg2rad_S;
pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
&nrkj_n2_S,
&p_S, &q_S);
- cp_S = load(cp);
- phi0_S = load(phi0) * deg2rad_S;
- mult_S = load(mult);
+ cp_S = load<SimdReal>(cp);
+ phi0_S = load<SimdReal>(phi0) * deg2rad_S;
+ mult_S = load<SimdReal>(mult);
mdphi_S = fms(mult_S, phi_S, phi0_S);
cosfac_S = one_S;
for (j = 1; j < NR_RBDIHS; j++)
{
- parm_S = load(parm + j*GMX_SIMD_REAL_WIDTH);
+ parm_S = load<SimdReal>(parm + j*GMX_SIMD_REAL_WIDTH);
ddphi_S = fma(c_S * parm_S, cosfac_S, ddphi_S);
cosfac_S = cosfac_S * cos_S;
c_S = c_S + one_S;
gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), ai, &xi[XX], &xi[YY], &xi[ZZ]);
gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), aj, &xj[XX], &xj[YY], &xj[ZZ]);
- T c6 = load(coeff + 0*pack_size);
- T c12 = load(coeff + 1*pack_size);
- T qq = load(coeff + 2*pack_size);
+ T c6 = load<T>(coeff + 0*pack_size);
+ T c12 = load<T>(coeff + 1*pack_size);
+ T qq = load<T>(coeff + 2*pack_size);
/* We could save these operations by storing 6*C6,12*C12 */
c6 = six*c6;
ip_S = iprod(rx_S, ry_S, rz_S, fx_S, fy_S, fz_S);
- rhs_S = load(blc + bs) * ip_S;
+ rhs_S = load<SimdReal>(blc + bs) * ip_S;
store(rhs + bs, rhs_S);
store(sol + bs, rhs_S);
ip_S = iprod(rx_S, ry_S, rz_S, rxp_S, ryp_S, rzp_S);
- rhs_S = load(blc + bs) * (ip_S - load(bllen + bs));
+ rhs_S = load<SimdReal>(blc + bs) * (ip_S - load<SimdReal>(bllen + bs));
store(rhs + bs, rhs_S);
store(sol + bs, rhs_S);
n2_S = norm2(rx_S, ry_S, rz_S);
- len_S = load(bllen + bs);
+ len_S = load<SimdReal>(bllen + bs);
len2_S = len_S * len_S;
dlen2_S = fms(two_S, len2_S, n2_S);
lc_S = fnma(dlen2_S, invsqrt(dlen2_S), len_S);
- blc_S = load(blc + bs);
+ blc_S = load<SimdReal>(blc + bs);
lc_S = blc_S * lc_S;
#if GMX_SIMD_HAVE_REAL
for (b = b0; b < b1; b += GMX_SIMD_REAL_WIDTH)
{
- SimdReal t1 = load(blc + b);
- SimdReal t2 = load(sol + b);
+ SimdReal t1 = load<SimdReal>(blc + b);
+ SimdReal t2 = load<SimdReal>(sol + b);
store(mlambda + b, t1 * t2);
}
#else
#if GMX_SIMD_HAVE_REAL
for (b = b0; b < b1; b += GMX_SIMD_REAL_WIDTH)
{
- SimdReal t1 = load(blc + b);
- SimdReal t2 = load(sol + b);
+ SimdReal t1 = load<SimdReal>(blc + b);
+ SimdReal t2 = load<SimdReal>(sol + b);
SimdReal mvb = t1 * t2;
store(blc_sol + b, mvb);
- store(mlambda + b, load(mlambda + b) + mvb);
+ store(mlambda + b, load<SimdReal>(mlambda + b) + mvb);
}
#else
for (b = b0; b < b1; b++)
if (bCalcVirial)
{
/* Filter out the non-local settles */
- T filter = load(settled->virfac + i);
+ T filter = load<T>(settled->virfac + i);
T mOf = filter*mO;
T mHf = filter*mH;
{
for (int i = i0; i < i1; i += GMX_SIMD_REAL_WIDTH)
{
- dest_SSE = load(dest+i);
+ dest_SSE = load<SimdReal>(dest+i);
for (int s = 0; s < nsrc; s++)
{
- src_SSE = load(src[s]+i);
+ src_SSE = load<SimdReal>(src[s]+i);
dest_SSE = dest_SSE + src_SSE;
}
store(dest+i, dest_SSE);
{
for (int i = i0; i < i1; i += GMX_SIMD_REAL_WIDTH)
{
- dest_SSE = load(src[0]+i);
+ dest_SSE = load<SimdReal>(src[0]+i);
for (int s = 1; s < nsrc; s++)
{
- src_SSE = load(src[s]+i);
+ src_SSE = load<SimdReal>(src[s]+i);
dest_SSE = dest_SSE + src_SSE;
}
store(dest+i, dest_SSE);
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#endif
/* Load j-i for the first i */
- diagonal_jmi_S = load(nbat->simd_2xnn_diagonal_j_minus_i);
+ diagonal_jmi_S = load<SimdReal>(nbat->simd_2xnn_diagonal_j_minus_i);
/* Generate all the diagonal masks as comparison results */
#if UNROLLI == UNROLLJ
diagonal_mask_S0 = (zero_S < diagonal_jmi_S);
* matter, as long as both filter and mask data are treated the same way.
*/
#if GMX_SIMD_HAVE_INT32_LOGICAL
- filter_S0 = load(reinterpret_cast<const int *>(exclusion_filter + 0*UNROLLJ));
- filter_S2 = load(reinterpret_cast<const int *>(exclusion_filter + 2*UNROLLJ));
+ filter_S0 = load<SimdBitMask>(reinterpret_cast<const int *>(exclusion_filter + 0*UNROLLJ));
+ filter_S2 = load<SimdBitMask>(reinterpret_cast<const int *>(exclusion_filter + 2*UNROLLJ));
#else
- filter_S0 = load(reinterpret_cast<const real *>(exclusion_filter + 0*UNROLLJ));
- filter_S2 = load(reinterpret_cast<const real *>(exclusion_filter + 2*UNROLLJ));
+ filter_S0 = load<SimdBitMask>(reinterpret_cast<const real *>(exclusion_filter + 0*UNROLLJ));
+ filter_S2 = load<SimdBitMask>(reinterpret_cast<const real *>(exclusion_filter + 2*UNROLLJ));
#endif
#ifdef CALC_COUL_RF
pvdw_c12[2*UNROLLJ+jp] = nbat->nbfp[0*2+1];
pvdw_c12[3*UNROLLJ+jp] = nbat->nbfp[0*2+1];
}
- SimdReal c6_S0 = load(pvdw_c6 +0*UNROLLJ);
- SimdReal c6_S1 = load(pvdw_c6 +1*UNROLLJ);
- SimdReal c6_S2 = load(pvdw_c6 +2*UNROLLJ);
- SimdReal c6_S3 = load(pvdw_c6 +3*UNROLLJ);
-
- SimdReal c12_S0 = load(pvdw_c12+0*UNROLLJ);
- SimdReal c12_S1 = load(pvdw_c12+1*UNROLLJ);
- SimdReal c12_S2 = load(pvdw_c12+2*UNROLLJ);
- SimdReal c12_S3 = load(pvdw_c12+3*UNROLLJ);
+ SimdReal c6_S0 = load<SimdReal>(pvdw_c6 +0*UNROLLJ);
+ SimdReal c6_S1 = load<SimdReal>(pvdw_c6 +1*UNROLLJ);
+ SimdReal c6_S2 = load<SimdReal>(pvdw_c6 +2*UNROLLJ);
+ SimdReal c6_S3 = load<SimdReal>(pvdw_c6 +3*UNROLLJ);
+
+ SimdReal c12_S0 = load<SimdReal>(pvdw_c12+0*UNROLLJ);
+ SimdReal c12_S1 = load<SimdReal>(pvdw_c12+1*UNROLLJ);
+ SimdReal c12_S2 = load<SimdReal>(pvdw_c12+2*UNROLLJ);
+ SimdReal c12_S3 = load<SimdReal>(pvdw_c12+3*UNROLLJ);
#endif /* FIX_LJ_C */
#ifdef ENERGY_GROUPS
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
{
SimdReal v_S;
- v_S = load(v+offset_jj[jj]+jj*GMX_SIMD_REAL_WIDTH);
+ v_S = load<SimdReal>(v+offset_jj[jj]+jj*GMX_SIMD_REAL_WIDTH);
store(v+offset_jj[jj]+jj*GMX_SIMD_REAL_WIDTH, v_S + e_S);
}
}
// Neither real or integer bitwise logical operations supported.
// Load masks from memory instead.
SimdReal zero = setZero();
- *interact_S0 = ( zero < load( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (0 * UNROLLJ)) & 0xF) ) );
- *interact_S1 = ( zero < load( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (1 * UNROLLJ)) & 0xF) ) );
- *interact_S2 = ( zero < load( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (2 * UNROLLJ)) & 0xF) ) );
- *interact_S3 = ( zero < load( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (3 * UNROLLJ)) & 0xF) ) );
+ *interact_S0 = ( zero < load<SimdReal>( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (0 * UNROLLJ)) & 0xF) ) );
+ *interact_S1 = ( zero < load<SimdReal>( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (1 * UNROLLJ)) & 0xF) ) );
+ *interact_S2 = ( zero < load<SimdReal>( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (2 * UNROLLJ)) & 0xF) ) );
+ *interact_S3 = ( zero < load<SimdReal>( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (3 * UNROLLJ)) & 0xF) ) );
#endif
}
#endif /* CHECK_EXCLS */
/* load j atom coordinates */
- jx_S = load(x+ajx);
- jy_S = load(x+ajy);
- jz_S = load(x+ajz);
+ jx_S = load<SimdReal>(x+ajx);
+ jy_S = load<SimdReal>(x+ajy);
+ jz_S = load<SimdReal>(x+ajz);
/* Calculate distance */
dx_S0 = ix_S0 - jx_S;
#ifdef CALC_COULOMB
/* Load parameters for j atom */
- jq_S = load(q+aj);
+ jq_S = load<SimdReal>(q+aj);
qq_S0 = iq_S0 * jq_S;
qq_S1 = iq_S1 * jq_S;
qq_S2 = iq_S2 * jq_S;
#endif /* not defined any LJ rule */
#ifdef LJ_COMB_GEOM
- c6s_j_S = load(ljc+aj2+0);
- c12s_j_S = load(ljc+aj2+STRIDE);
+ c6s_j_S = load<SimdReal>(ljc+aj2+0);
+ c12s_j_S = load<SimdReal>(ljc+aj2+STRIDE);
SimdReal c6_S0 = c6s_S0 * c6s_j_S;
SimdReal c6_S1 = c6s_S1 * c6s_j_S;
#ifndef HALF_LJ
#endif /* LJ_COMB_GEOM */
#ifdef LJ_COMB_LB
- hsig_j_S = load(ljc+aj2+0);
- seps_j_S = load(ljc+aj2+STRIDE);
+ hsig_j_S = load<SimdReal>(ljc+aj2+0);
+ seps_j_S = load<SimdReal>(ljc+aj2+STRIDE);
sig_S0 = hsig_i_S0 + hsig_j_S;
sig_S1 = hsig_i_S1 + hsig_j_S;
#endif
/* Determine C6 for the grid using the geometric combination rule */
- c6s_j_S = load(ljc+aj2+0);
+ c6s_j_S = load<SimdReal>(ljc+aj2+0);
c6grid_S0 = c6s_S0 * c6s_j_S;
c6grid_S1 = c6s_S1 * c6s_j_S;
#ifndef HALF_LJ
fiz_S3 = fiz_S3 + tz_S3;
/* Decrement j atom force */
- store(f+ajx, load(f+ajx) - (tx_S0 + tx_S1 + tx_S2 + tx_S3));
- store(f+ajy, load(f+ajy) - (ty_S0 + ty_S1 + ty_S2 + ty_S3));
- store(f+ajz, load(f+ajz) - (tz_S0 + tz_S1 + tz_S2 + tz_S3));
+ store(f+ajx, load<SimdReal>(f+ajx) - (tx_S0 + tx_S1 + tx_S2 + tx_S3));
+ store(f+ajy, load<SimdReal>(f+ajy) - (ty_S0 + ty_S1 + ty_S2 + ty_S3));
+ store(f+ajz, load<SimdReal>(f+ajz) - (tz_S0 + tz_S1 + tz_S2 + tz_S3));
}
#undef rinv_ex_S0
#endif
/* Load j-i for the first i */
- diagonal_jmi_S = load(nbat->simd_4xn_diagonal_j_minus_i);
+ diagonal_jmi_S = load<SimdReal>(nbat->simd_4xn_diagonal_j_minus_i);
/* Generate all the diagonal masks as comparison results */
#if UNROLLI == UNROLLJ
diagonal_mask_S0 = (zero_S < diagonal_jmi_S);
#if UNROLLI == 2*UNROLLJ
/* Load j-i for the second half of the j-cluster */
- diagonal_jmi_S = load(nbat->simd_4xn_diagonal_j_minus_i + UNROLLJ);
+ diagonal_jmi_S = load<SimdReal>(nbat->simd_4xn_diagonal_j_minus_i + UNROLLJ);
#endif
diagonal_mask1_S0 = (zero_S < diagonal_jmi_S);
* matter, as long as both filter and mask data are treated the same way.
*/
#if GMX_SIMD_HAVE_INT32_LOGICAL
- filter_S0 = load(reinterpret_cast<const int *>(exclusion_filter + 0*UNROLLJ));
- filter_S1 = load(reinterpret_cast<const int *>(exclusion_filter + 1*UNROLLJ));
- filter_S2 = load(reinterpret_cast<const int *>(exclusion_filter + 2*UNROLLJ));
- filter_S3 = load(reinterpret_cast<const int *>(exclusion_filter + 3*UNROLLJ));
+ filter_S0 = load<SimdBitMask>(reinterpret_cast<const int *>(exclusion_filter + 0*UNROLLJ));
+ filter_S1 = load<SimdBitMask>(reinterpret_cast<const int *>(exclusion_filter + 1*UNROLLJ));
+ filter_S2 = load<SimdBitMask>(reinterpret_cast<const int *>(exclusion_filter + 2*UNROLLJ));
+ filter_S3 = load<SimdBitMask>(reinterpret_cast<const int *>(exclusion_filter + 3*UNROLLJ));
#else
- filter_S0 = load(reinterpret_cast<const real *>(exclusion_filter + 0*UNROLLJ));
- filter_S1 = load(reinterpret_cast<const real *>(exclusion_filter + 1*UNROLLJ));
- filter_S2 = load(reinterpret_cast<const real *>(exclusion_filter + 2*UNROLLJ));
- filter_S3 = load(reinterpret_cast<const real *>(exclusion_filter + 3*UNROLLJ));
+ filter_S0 = load<SimdBitMask>(reinterpret_cast<const real *>(exclusion_filter + 0*UNROLLJ));
+ filter_S1 = load<SimdBitMask>(reinterpret_cast<const real *>(exclusion_filter + 1*UNROLLJ));
+ filter_S2 = load<SimdBitMask>(reinterpret_cast<const real *>(exclusion_filter + 2*UNROLLJ));
+ filter_S3 = load<SimdBitMask>(reinterpret_cast<const real *>(exclusion_filter + 3*UNROLLJ));
#endif
#ifdef CALC_COUL_RF
pvdw_c12[2*UNROLLJ+jp] = nbat->nbfp[0*2+1];
pvdw_c12[3*UNROLLJ+jp] = nbat->nbfp[0*2+1];
}
- SimdReal c6_S0 = simdLoad(pvdw_c6 +0*UNROLLJ);
- SimdReal c6_S1 = simdLoad(pvdw_c6 +1*UNROLLJ);
- SimdReal c6_S2 = simdLoad(pvdw_c6 +2*UNROLLJ);
- SimdReal c6_S3 = simdLoad(pvdw_c6 +3*UNROLLJ);
-
- SimdReal c12_S0 = simdLoad(pvdw_c12+0*UNROLLJ);
- SimdReal c12_S1 = simdLoad(pvdw_c12+1*UNROLLJ);
- SimdReal c12_S2 = simdLoad(pvdw_c12+2*UNROLLJ);
- SimdReal c12_S3 = simdLoad(pvdw_c12+3*UNROLLJ);
+ SimdReal c6_S0 = load<SimdReal>(pvdw_c6 +0*UNROLLJ);
+ SimdReal c6_S1 = load<SimdReal>(pvdw_c6 +1*UNROLLJ);
+ SimdReal c6_S2 = load<SimdReal>(pvdw_c6 +2*UNROLLJ);
+ SimdReal c6_S3 = load<SimdReal>(pvdw_c6 +3*UNROLLJ);
+
+ SimdReal c12_S0 = load<SimdReal>(pvdw_c12+0*UNROLLJ);
+ SimdReal c12_S1 = load<SimdReal>(pvdw_c12+1*UNROLLJ);
+ SimdReal c12_S2 = load<SimdReal>(pvdw_c12+2*UNROLLJ);
+ SimdReal c12_S3 = load<SimdReal>(pvdw_c12+3*UNROLLJ);
#endif /* FIX_LJ_C */
#ifdef ENERGY_GROUPS
int ajz = ajy + STRIDE;
/* load j atom coordinates */
- SimdReal jx_S = load(x + ajx);
- SimdReal jy_S = load(x + ajy);
- SimdReal jz_S = load(x + ajz);
+ SimdReal jx_S = load<SimdReal>(x + ajx);
+ SimdReal jy_S = load<SimdReal>(x + ajy);
+ SimdReal jz_S = load<SimdReal>(x + ajz);
/* Calculate distance */
SimdReal dx_S0 = ix_S0 - jx_S;
jz_S = loadDuplicateHsimd(x_j + xind_f + 2*STRIDE_S);
/* Calculate distance */
- dx_S0 = load(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S0 = load(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S0 = load(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH) - jz_S;
- dx_S2 = load(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S2 = load(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S2 = load(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S0 = load<SimdReal>(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S0 = load<SimdReal>(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S0 = load<SimdReal>(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S2 = load<SimdReal>(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S2 = load<SimdReal>(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S2 = load<SimdReal>(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH) - jz_S;
/* rsq = dx*dx+dy*dy+dz*dz */
rsq_S0 = norm2(dx_S0, dy_S0, dz_S0);
jz_S = loadDuplicateHsimd(x_j + xind_l + 2*STRIDE_S);
/* Calculate distance */
- dx_S0 = load(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S0 = load(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S0 = load(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH) - jz_S;
- dx_S2 = load(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S2 = load(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S2 = load(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S0 = load<SimdReal>(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S0 = load<SimdReal>(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S0 = load<SimdReal>(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S2 = load<SimdReal>(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S2 = load<SimdReal>(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S2 = load<SimdReal>(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH) - jz_S;
/* rsq = dx*dx+dy*dy+dz*dz */
rsq_S0 = norm2(dx_S0, dy_S0, dz_S0);
{
xind_f = xIndexFromCj<NbnxnLayout::Simd4xN>(cjFromCi<NbnxnLayout::Simd4xN>(gridj->cell0) + jclusterFirst);
- jx_S = load(x_j + xind_f + 0*STRIDE_S);
- jy_S = load(x_j + xind_f + 1*STRIDE_S);
- jz_S = load(x_j + xind_f + 2*STRIDE_S);
+ jx_S = load<SimdReal>(x_j + xind_f + 0*STRIDE_S);
+ jy_S = load<SimdReal>(x_j + xind_f + 1*STRIDE_S);
+ jz_S = load<SimdReal>(x_j + xind_f + 2*STRIDE_S);
/* Calculate distance */
- dx_S0 = load(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S0 = load(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S0 = load(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH) - jz_S;
- dx_S1 = load(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S1 = load(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S1 = load(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH) - jz_S;
- dx_S2 = load(x_ci_simd + 6*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S2 = load(x_ci_simd + 7*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S2 = load(x_ci_simd + 8*GMX_SIMD_REAL_WIDTH) - jz_S;
- dx_S3 = load(x_ci_simd + 9*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S3 = load(x_ci_simd + 10*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S3 = load(x_ci_simd + 11*GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S0 = load<SimdReal>(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S0 = load<SimdReal>(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S0 = load<SimdReal>(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S1 = load<SimdReal>(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S1 = load<SimdReal>(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S1 = load<SimdReal>(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S2 = load<SimdReal>(x_ci_simd + 6*GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S2 = load<SimdReal>(x_ci_simd + 7*GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S2 = load<SimdReal>(x_ci_simd + 8*GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S3 = load<SimdReal>(x_ci_simd + 9*GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S3 = load<SimdReal>(x_ci_simd + 10*GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S3 = load<SimdReal>(x_ci_simd + 11*GMX_SIMD_REAL_WIDTH) - jz_S;
/* rsq = dx*dx+dy*dy+dz*dz */
rsq_S0 = norm2(dx_S0, dy_S0, dz_S0);
{
xind_l = xIndexFromCj<NbnxnLayout::Simd4xN>(cjFromCi<NbnxnLayout::Simd4xN>(gridj->cell0) + jclusterLast);
- jx_S = load(x_j +xind_l + 0*STRIDE_S);
- jy_S = load(x_j +xind_l + 1*STRIDE_S);
- jz_S = load(x_j +xind_l + 2*STRIDE_S);
+ jx_S = load<SimdReal>(x_j +xind_l + 0*STRIDE_S);
+ jy_S = load<SimdReal>(x_j +xind_l + 1*STRIDE_S);
+ jz_S = load<SimdReal>(x_j +xind_l + 2*STRIDE_S);
/* Calculate distance */
- dx_S0 = load(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S0 = load(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S0 = load(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH) - jz_S;
- dx_S1 = load(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S1 = load(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S1 = load(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH) - jz_S;
- dx_S2 = load(x_ci_simd + 6*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S2 = load(x_ci_simd + 7*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S2 = load(x_ci_simd + 8*GMX_SIMD_REAL_WIDTH) - jz_S;
- dx_S3 = load(x_ci_simd + 9*GMX_SIMD_REAL_WIDTH) - jx_S;
- dy_S3 = load(x_ci_simd + 10*GMX_SIMD_REAL_WIDTH) - jy_S;
- dz_S3 = load(x_ci_simd + 11*GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S0 = load<SimdReal>(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S0 = load<SimdReal>(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S0 = load<SimdReal>(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S1 = load<SimdReal>(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S1 = load<SimdReal>(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S1 = load<SimdReal>(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S2 = load<SimdReal>(x_ci_simd + 6*GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S2 = load<SimdReal>(x_ci_simd + 7*GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S2 = load<SimdReal>(x_ci_simd + 8*GMX_SIMD_REAL_WIDTH) - jz_S;
+ dx_S3 = load<SimdReal>(x_ci_simd + 9*GMX_SIMD_REAL_WIDTH) - jx_S;
+ dy_S3 = load<SimdReal>(x_ci_simd + 10*GMX_SIMD_REAL_WIDTH) - jy_S;
+ dz_S3 = load<SimdReal>(x_ci_simd + 11*GMX_SIMD_REAL_WIDTH) - jz_S;
/* rsq = dx*dx+dy*dy+dz*dz */
rsq_S0 = norm2(dx_S0, dy_S0, dz_S0);
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2015,2016,2017, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
{
SimdReal shz, shy, shx;
- shz = round(*dz * load(pbc_simd+0*GMX_SIMD_REAL_WIDTH)); // load inv_bzz
- *dx = *dx - shz * load(pbc_simd+1*GMX_SIMD_REAL_WIDTH); // load bzx
- *dy = *dy - shz * load(pbc_simd+2*GMX_SIMD_REAL_WIDTH); // load bzy
- *dz = *dz - shz * load(pbc_simd+3*GMX_SIMD_REAL_WIDTH); // load bzz
+ shz = round(*dz * load<SimdReal>(pbc_simd+0*GMX_SIMD_REAL_WIDTH)); // load inv_bzz
+ *dx = *dx - shz * load<SimdReal>(pbc_simd+1*GMX_SIMD_REAL_WIDTH); // load bzx
+ *dy = *dy - shz * load<SimdReal>(pbc_simd+2*GMX_SIMD_REAL_WIDTH); // load bzy
+ *dz = *dz - shz * load<SimdReal>(pbc_simd+3*GMX_SIMD_REAL_WIDTH); // load bzz
- shy = round(*dy * load(pbc_simd+4*GMX_SIMD_REAL_WIDTH)); // load inv_byy
- *dx = *dx - shy * load(pbc_simd+5*GMX_SIMD_REAL_WIDTH); // load byx
- *dy = *dy - shy * load(pbc_simd+6*GMX_SIMD_REAL_WIDTH); // load byy
+ shy = round(*dy * load<SimdReal>(pbc_simd+4*GMX_SIMD_REAL_WIDTH)); // load inv_byy
+ *dx = *dx - shy * load<SimdReal>(pbc_simd+5*GMX_SIMD_REAL_WIDTH); // load byx
+ *dy = *dy - shy * load<SimdReal>(pbc_simd+6*GMX_SIMD_REAL_WIDTH); // load byy
- shx = round(*dx * load(pbc_simd+7*GMX_SIMD_REAL_WIDTH)); // load inv_bxx
- *dx = *dx - shx * load(pbc_simd+8*GMX_SIMD_REAL_WIDTH); // load bxx
+ shx = round(*dx * load<SimdReal>(pbc_simd+7*GMX_SIMD_REAL_WIDTH)); // load inv_bxx
+ *dx = *dx - shx * load<SimdReal>(pbc_simd+8*GMX_SIMD_REAL_WIDTH); // load bxx
}
//! \} end of name-group describing SIMD data types
-//Traits of Simd. Works for float and double but NOT for int.
+/*! \name High-level SIMD proxy objects to disambiguate load/set operations
+ * \{
+ */
+
+/*! \libinternal \brief Simd traits */
template<typename T>
struct SimdTraits {};
-//This does not work for int because int32_t maps to two simd types and we can't base
-//it on the SIMD type because it doesn't exist if there is no support
+#if GMX_SIMD_HAVE_FLOAT
template<>
-struct SimdTraits<float>
+struct SimdTraits<SimdFloat>
{
-#if GMX_SIMD_HAVE_FLOAT
+ using type = float;
static constexpr int width = GMX_SIMD_FLOAT_WIDTH;
- using type = SimdFloat;
-#else
- static constexpr int width = 1;
-#endif
+ using tag = SimdFloatTag;
};
-
+#endif
+#if GMX_SIMD_HAVE_DOUBLE
template<>
-struct SimdTraits<double>
+struct SimdTraits<SimdDouble>
{
-#if GMX_SIMD_HAVE_DOUBLE
+ using type = double;
static constexpr int width = GMX_SIMD_DOUBLE_WIDTH;
- using type = SimdDouble;
-#else
- static constexpr int width = 1;
-#endif
+ using tag = SimdDoubleTag;
};
-
-template<typename T>
-struct SimdTraits<const T> : public SimdTraits<T> {};
-
-/*! \name High-level SIMD proxy objects to disambiguate load/set operations
- * \{
- */
-template <typename T> //can be either float/double/int, each const or non-const
-class SimdLoadProxyInternal;
-
-template<typename T>
-static inline const SimdLoadProxyInternal<T> gmx_simdcall
-load(T *m);
-
-template <typename T, size_t N>
-static inline const SimdLoadProxyInternal<const T> gmx_simdcall
-load(const AlignedArray<T, N> &m);
-
-/*! \libinternal \brief Proxy object to enable load() for SIMD and equivalent basic type
- *
- * This object is returned by the load() function that takes a single pointer
- * to a float/double. When the result is assigned to either SimdFloat/Double or float/double/int,
- * the appropriate conversion method will be executed, which in turn calls
- * the correct low-level load function.
- * In practice this simply means you can use load() regardless for both SIMD
- * and non-SIMD data in templated functions.
- *
- * This is an internal class which should never be constructed directly. The constructor is private
- * so that only the load function can construct it.
- */
-template <typename T>
-class SimdLoadProxyInternal
-{
- template<typename U>
- using IsIntType = std::is_same<std::int32_t, typename std::remove_const<U>::type>;
-
- public:
- //! \brief Conversion method that will execute load of scalar basic type
- operator T() const { return *m_; }
- //! \brief Conversion method that will execute load of SimdFloat/Double
- template<typename U = T> //Always U=T. Indirection needed for SFINAE
- //Disabled if type doesn't exist (unsupported or int)
- operator typename SimdTraits<U>::type() const { return simdLoad(m_); }
-
+#endif
#if GMX_SIMD_HAVE_FLOAT
- //! \brief Conversion method that will execute load of SimdFInt32
- template<typename U = T, typename = typename std::enable_if<IsIntType<U>::value>::type>
- operator SimdFInt32() const { return simdLoad(m_, SimdFInt32Tag()); }
+template<>
+struct SimdTraits<SimdFInt32>
+{
+ using type = int;
+ static constexpr int width = GMX_SIMD_FINT32_WIDTH;
+ using tag = SimdFInt32Tag;
+};
#endif
#if GMX_SIMD_HAVE_DOUBLE
- //! \brief Conversion method that will execute load of SimdDInt32
- template<typename U = T, typename = typename std::enable_if<IsIntType<U>::value>::type>
- operator SimdDInt32() const { return simdLoad(m_, SimdDInt32Tag()); }
+template<>
+struct SimdTraits<SimdDInt32>
+{
+ using type = int;
+ static constexpr int width = GMX_SIMD_DINT32_WIDTH;
+ using tag = SimdDInt32Tag;
+};
#endif
- private:
- //! \brief Private constructor can only be called from load()
- SimdLoadProxyInternal(T *m) : m_(m) {}
-
- template<typename U>
- friend const SimdLoadProxyInternal<U> gmx_simdcall
- load(U *m);
- template <typename U, size_t N>
- friend const SimdLoadProxyInternal<const U> gmx_simdcall
- load(const AlignedArray<U, N> &m);
-
- T* const m_; //!< The pointer used to load memory
+template<typename T>
+struct SimdTraits<const T>
+{
+ using type = const typename SimdTraits<T>::type;
+ static constexpr int width = SimdTraits<T>::width;
+ using tag = typename SimdTraits<T>::tag;
};
-/*! \brief Load function that returns proxy object for SimdFloat/Double/Int and basic type
+/*! \brief Load function that returns SIMD or scalar
*
- * \param m Pointer to load memory
- * \return Proxy object that will call the actual load for either SimdFloat/Double/Int
- * or basic scalar type when you assign it and the conversion method is called.
+ * \tparam T Type to load (type is always mandatory)
+ * \param m Pointer to aligned memory
+ * \return Loaded value
*/
template<typename T>
-static inline const SimdLoadProxyInternal<T> gmx_simdcall
-load(T *m)
+static inline T
+load(const typename SimdTraits<T>::type *m) //disabled by SFINAE for non-SIMD types
{
- return {
- m
- };
+ return simdLoad(m, typename SimdTraits<T>::tag());
}
-template <typename T, size_t N>
-static inline const SimdLoadProxyInternal<const T> gmx_simdcall
-load(const AlignedArray<T, N> &m)
+template<typename T>
+static inline T
+/* the enable_if serves to prevent two different type of misuse:
+ * 1) load<SimdReal>(SimdReal*); should only be called on real* or int*
+ * 2) load(real*); template parameter is mandatory because otherwise ambiguity is
+ * created. The dependent type disables type deduction.
+ */
+load(const typename std::enable_if<std::is_arithmetic<T>::value, T>::type *m)
{
- return {
- m.data()
- };
+ return *m;
}
-template <typename T> //can be either float/double/int, each const or non-const
-class SimdLoadUProxyInternal;
-
-template<typename T>
-static inline const SimdLoadUProxyInternal<T> gmx_simdcall
-loadU(T *m);
+template <typename T, size_t N>
+static inline T gmx_simdcall
+load(const AlignedArray<typename SimdTraits<T>::type, N> &m)
+{
+ return simdLoad(m.data(), typename SimdTraits<T>::tag());
+}
-/*! \libinternal \brief Proxy object to enable loadU() for SIMD and equivalent basic type
- *
- * This object is returned by the loadU() function that takes a single pointer
- * to a float/double. When the result is assigned to either SimdFloat/Double or float/double/int,
- * the appropriate conversion method will be executed, which in turn calls
- * the correct low-level load function.
- * In practice this simply means you can use load() regardless for both SIMD
- * and non-SIMD data in templated functions.
+/*! \brief Load function that returns SIMD or scalar based on template argument
*
- * This is an internal class which should never be constructed directly. The constructor is private
- * so that only the load function can construct it.
+ * \tparam T Type to load (type is always mandatory)
+ * \param m Pointer to unaligned memory
+ * \return Loaded SimdFloat/Double/Int or basic scalar type
*/
-template <typename T>
-class SimdLoadUProxyInternal
+template<typename T>
+static inline T
+loadU(const typename SimdTraits<T>::type *m)
{
- template<typename U>
- using IsIntType = std::is_same<std::int32_t, typename std::remove_const<U>::type>;
-
- public:
- //! \brief Conversion method that will execute loadU of scalar basic type
- operator T() const { return *m_; }
- //! \brief Conversion method that will execute loadU of SimdFloat/Double
- template<typename U = T> //Always U=T. Indirection needed for SFINAE.
- //Disabled if type doesn't exist (unsupported or int)
- operator typename SimdTraits<U>::type() const { return simdLoadU(m_); }
-
-#if GMX_SIMD_HAVE_FLOAT
- //! \brief Conversion method that will execute loadU of SimdFInt32
- template<typename U = T, typename = typename std::enable_if<IsIntType<U>::value>::type>
- operator SimdFInt32() const { return simdLoadU(m_, SimdFInt32Tag()); }
-#endif
-#if GMX_SIMD_HAVE_DOUBLE
- //! \brief Conversion method that will execute loadU of SimdDInt32
- template<typename U = T, typename = typename std::enable_if<IsIntType<U>::value>::type>
- operator SimdDInt32() const { return simdLoadU(m_, SimdDInt32Tag()); }
-#endif
-
- private:
- //! \brief Private constructor can only be called from loadU()
- SimdLoadUProxyInternal(T *m) : m_(m) {}
-
- template<typename U>
- friend const SimdLoadUProxyInternal<U> gmx_simdcall
- loadU(U *m);
-
- T* const m_; //!< The pointer used to load memory
-};
+ return simdLoadU(m, typename SimdTraits<T>::tag());
+}
-/*! \brief LoadU function that returns proxy object for SimdFloat/Double/Int and basic type
- *
- * \param m Pointer to load memory
- * \return Proxy object that will call the actual unaligned load for either SimdFloat/Double/Int
- * or basic scalar type when you assign it and the conversion method is called.
- */
template<typename T>
-static inline const SimdLoadUProxyInternal<T> gmx_simdcall
-loadU(T *m)
+static inline T
+loadU(const typename std::enable_if<std::is_arithmetic<T>::value, T>::type *m)
+{
+ return *m;
+}
+
+template <typename T, size_t N>
+static inline T gmx_simdcall
+loadU(const AlignedArray<typename SimdTraits<T>::type, N> &m)
{
- return {
- m
- };
+ return simdLoadU(m.data(), typename SimdTraits<T>::tag());
}
class SimdSetZeroProxyInternal;
conv.i = conv.i & isieve;
mem[i] = conv.f;
}
- z = load(mem);
+ z = load<SimdFloat>(mem);
#endif
q = (z-y) * (z+y);
corr = fma(CD4, q, CD3);
scalar.cpp
scalar_util.cpp
scalar_math.cpp)
-
-# Add tests for expressions which are supposed to not compile when the
-# build configuration supports a real implementation of the SIMD
-# module. See tests/simd_ambiguous.cpp for documentation.
-set(AMBIGUOUS_COMPILATION_EXPECTED_TO_FAIL TRUE)
-if(GMX_SIMD_ACTIVE STREQUAL "NONE")
- set(AMBIGUOUS_COMPILATION_EXPECTED_TO_FAIL FALSE)
-endif()
-foreach(TEST_PREC float double)
- foreach(TEST_FUNC exp exp2 log inv cos sin sqrt)
- set(TEST_NAME simd_ambiguous_${TEST_PREC}_${TEST_FUNC})
- add_executable(${TEST_NAME} simd_ambiguous.cpp)
- set_target_properties(${TEST_NAME} PROPERTIES
- EXCLUDE_FROM_ALL TRUE
- EXCLUDE_FROM_DEFAULT_BUILD TRUE)
- target_compile_definitions(${TEST_NAME} PRIVATE TEST_FUNC=${TEST_FUNC} TEST_PREC=${TEST_PREC})
- add_test(NAME ${TEST_NAME}
- COMMAND ${CMAKE_COMMAND} --build . --target ${TEST_NAME} --config $<CONFIGURATION>
- WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
- set_tests_properties(${TEST_NAME} PROPERTIES WILL_FAIL ${AMBIGUOUS_COMPILATION_EXPECTED_TO_FAIL})
- endforeach()
-endforeach()
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* \param m Memory address to load from
*/
template <typename T, typename TSimd> TSimd gmx_simdcall
-loadWrapper(const T * m) { return load(m); }
+loadWrapper(const T * m) { return load<TSimd>(m); }
/*! \brief Wrapper to handle proxy objects returned by some loadU functions.
*
* \param m Memory address to load from
*/
template <typename T, typename TSimd> TSimd gmx_simdcall
-loadUWrapper(const T * m) { return loadU(m); }
+loadUWrapper(const T * m) { return loadU<TSimd>(m); }
#if GMX_SIMD_HAVE_REAL
TEST(SimdScalarTest, load)
{
- real val = load(&c1);
+ real val = load<real>(&c1);
EXPECT_EQ(c1, val);
}
TEST(SimdScalarTest, loadU)
{
- real val = loadU(&c1);
+ real val = loadU<real>(&c1);
EXPECT_EQ(c1, val);
}
TEST(SimdScalarTest, loadI)
{
std::int32_t ref = 42;
- std::int32_t val = load(&ref);
+ std::int32_t val = load<int32_t>(&ref);
EXPECT_EQ(ref, val);
}
TEST(SimdScalarTest, loadUI)
{
std::int32_t ref = 42;
- std::int32_t val = load(&ref);
+ std::int32_t val = loadU<int32_t>(&ref);
EXPECT_EQ(ref, val);
}
{
mem[i] = v[i % v.size()]; // repeat vector contents to fill simd width
}
- return load(mem);
+ return load<SimdReal>(mem);
}
SimdReal
{
mem[i] = v[i % v.size()]; // repeat vector contents to fill simd width
}
- return load(mem);
+ return load<SimdInt32>(mem);
}
SimdInt32
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include "gromacs/simd/simd.h"
-#include "gromacs/simd/simd_math.h"
-
-/* Test that math functions which can be used both with scalar and SIMD
- * are ambiguous when applied to value returned from load.
- *
- * gmx::load returns a proxy/reference object which can be casted to either
- * a scalar (e.g. float) or a SIMD value (e.g. SIMDFloat). The gmx math
- * functions (e.g. sqrt) take both a scalar and a SIMD value as an argument.
- * Thus e.g. load(sqrt(m)) should be ambiguous. This test makes sure that
- * this does not compile. This got previously broken by introducing templates
- * which influenced the overload resolution.
- *
- * The test execution code in CMakeLists.txt tests that the code doesn't
- * compile with a SIMD implementation. To test that this code does correctly
- * compile besides causing the ambiguous overload error, it expects to
- * correctly compile for a a non-simd build. For such a build the
- * code is non-ambiguous because only the scalar version exists.
- *
- * The test execution code passes either float/double as TEST_PREC and the math
- * function to test as TEST_FUNC. Both are passed as compile definitions.
- * The file is compiled once for each combination when executing ctest and
- * the test fails if the file compiles.
- *
- * Possible extensions: Test all other math functions including those taking
- * multiple arguments.
- */
-int main()
-{
- /* We cannot check for SIMD float or double support at cmake,
- * only for general SIMD support. Therefore with SIMD, but without
- * SIMD float or double support we make the compilation fail with
- * a static_assert instead of the ambiguous overload error
- */
- constexpr bool testFloat = std::is_same<TEST_PREC, float>::value;
- constexpr bool testDouble = std::is_same<TEST_PREC, double>::value;
- constexpr bool haveSimdSupport = GMX_SIMD;
- constexpr bool haveSimdFloat = GMX_SIMD_HAVE_FLOAT;
- constexpr bool haveSimdDouble = GMX_SIMD_HAVE_DOUBLE;
- static_assert(!haveSimdSupport || !testFloat || haveSimdFloat, "Assertion failure to make test fail without SIMD float support");
- static_assert(!haveSimdSupport || !testDouble || haveSimdDouble, "Assertion failure to make test fail without SIMD double support");
-
- TEST_PREC d = 0;
- TEST_PREC *m = &d;
- gmx::TEST_FUNC(gmx::load(m));
-}
f[i] = i * (1.0 + 100*GMX_FLOAT_EPS);
}
- vf = load(f);
+ vf = load<SimdFloat>(f);
#if (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH)
SimdDouble vd1;
cvtF2DD(vf, &vd0, &vd1);
d[i] = i * (1.0 + 100*GMX_FLOAT_EPS);
}
- vd0 = load(d);
+ vd0 = load<SimdDouble>(d);
#if (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH)
- SimdDouble vd1 = load(d + GMX_SIMD_DOUBLE_WIDTH); // load upper half of data
+ SimdDouble vd1 = load<SimdDouble>(d + GMX_SIMD_DOUBLE_WIDTH); // load upper half of data
vf = cvtDD2F(vd0, vd1);
#elif (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
vf = cvtD2F(vd0);
mem0_[align * offset_[j] + 3] = val3_[j];
}
- ref0 = load(val0_);
- ref1 = load(val1_);
- ref2 = load(val2_);
- ref3 = load(val3_);
+ ref0 = load<SimdReal>(val0_);
+ ref1 = load<SimdReal>(val1_);
+ ref2 = load<SimdReal>(val2_);
+ ref3 = load<SimdReal>(val3_);
if (align == 4)
{
mem0_[align * offset_[j] + 1] = val1_[j];
}
- ref0 = load(val0_);
- ref1 = load(val1_);
+ ref0 = load<SimdReal>(val0_);
+ ref1 = load<SimdReal>(val1_);
if (align == 2)
{
mem0_[align * offset_[j] + 2] = val2_[j];
}
- ref0 = load(val0_);
- ref1 = load(val1_);
- ref2 = load(val2_);
+ ref0 = load<SimdReal>(val0_);
+ ref1 = load<SimdReal>(val1_);
+ ref2 = load<SimdReal>(val2_);
if (align == 3)
{
refmem[align * offset_[j] + 2] = val2_[j];
}
- v0 = load(val0_);
- v1 = load(val1_);
- v2 = load(val2_);
+ v0 = load<SimdReal>(val0_);
+ v1 = load<SimdReal>(val1_);
+ v2 = load<SimdReal>(val2_);
if (align == 3)
{
refmem[align * offset_[j] + 2] += val2_[j];
}
- v0 = load(val0_);
- v1 = load(val1_);
- v2 = load(val2_);
+ v0 = load<SimdReal>(val0_);
+ v1 = load<SimdReal>(val1_);
+ v2 = load<SimdReal>(val2_);
if (align == 3)
{
refmem[3 * offset_[j] + 2] += val2_[j];
}
- v0 = load(val0_);
- v1 = load(val1_);
- v2 = load(val2_);
+ v0 = load<SimdReal>(val0_);
+ v1 = load<SimdReal>(val1_);
+ v2 = load<SimdReal>(val2_);
transposeScatterIncrU<3>(mem0_, offset_, v0, v1, v2);
refmem[align * offset_[j] + 2] -= val2_[j];
}
- v0 = load(val0_);
- v1 = load(val1_);
- v2 = load(val2_);
+ v0 = load<SimdReal>(val0_);
+ v1 = load<SimdReal>(val1_);
+ v2 = load<SimdReal>(val2_);
if (align == 3)
{
refmem[3 * offset_[j] + 2] -= val2_[j];
}
- v0 = load(val0_);
- v1 = load(val1_);
- v2 = load(val2_);
+ v0 = load<SimdReal>(val0_);
+ v1 = load<SimdReal>(val1_);
+ v2 = load<SimdReal>(val2_);
transposeScatterDecrU<3>(mem0_, offset_, v0, v1, v2);
mem0_[i] = i;
}
- vs = load(mem0_);
+ vs = load<SimdReal>(mem0_);
expandScalarsToTriplets(vs, &v0, &v1, &v2);
mem0_[align * offset_[j] + 3] = val3_[j];
}
- simdoffset = load(offset_);
- ref0 = load(val0_);
- ref1 = load(val1_);
- ref2 = load(val2_);
- ref3 = load(val3_);
+ simdoffset = load<SimdInt32>(offset_);
+ ref0 = load<SimdReal>(val0_);
+ ref1 = load<SimdReal>(val1_);
+ ref2 = load<SimdReal>(val2_);
+ ref3 = load<SimdReal>(val3_);
if (align == 4)
{
mem0_[align * offset_[j] + 1] = val1_[j];
}
- simdoffset = load(offset_);
- ref0 = load(val0_);
- ref1 = load(val1_);
+ simdoffset = load<SimdInt32>(offset_);
+ ref0 = load<SimdReal>(val0_);
+ ref1 = load<SimdReal>(val1_);
if (align == 4)
{
mem0_[align * offset_[j] + 1] = val1_[j];
}
- simdoffset = load(offset_);
- ref0 = load(val0_);
- ref1 = load(val1_);
+ simdoffset = load<SimdInt32>(offset_);
+ ref0 = load<SimdReal>(val0_);
+ ref1 = load<SimdReal>(val1_);
if (align == 1)
{
real sum0, sum1, sum2, sum3, tstsum;
FloatingPointTolerance tolerance(defaultRealTolerance());
- v0 = load(val0_);
- v1 = load(val1_);
- v2 = load(val2_);
- v3 = load(val3_);
+ v0 = load<SimdReal>(val0_);
+ v1 = load<SimdReal>(val1_);
+ v2 = load<SimdReal>(val2_);
+ v3 = load<SimdReal>(val3_);
sum0 = sum1 = sum2 = sum3 = 0;
for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
// Point p to the upper half of val0_
real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
- v0 = load(val0_);
+ v0 = load<SimdReal>(val0_);
v1 = loadDualHsimd(val0_, p);
GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
p[i] = val0_[i];
}
- v0 = load(val0_);
+ v0 = load<SimdReal>(val0_);
v1 = loadDuplicateHsimd(val0_);
GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
p[i] = data[1];
}
- v0 = load(val0_);
+ v0 = load<SimdReal>(val0_);
v1 = load1DualHsimd(data);
GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
// Point p to the upper half of val0_
real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
- v0 = load(val2_);
+ v0 = load<SimdReal>(val2_);
storeDualHsimd(val0_, p, v0);
for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
// Point p to the upper half of val0_
real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
- v0 = load(val2_);
+ v0 = load<SimdReal>(val2_);
incrDualHsimd(val0_, p, v0);
for (std::size_t i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
reference[i] = val0_[i] + val2_[i] + val2_[GMX_SIMD_REAL_WIDTH/2+i];
}
- v0 = load(val2_);
+ v0 = load<SimdReal>(val2_);
incrDualHsimd(val0_, val0_, v0);
for (std::size_t i = 0; i < GMX_SIMD_REAL_WIDTH/2; i++)
ref[i] = val0_[i] - ( val1_[i] + p[i] );
}
- v0 = load(val1_);
+ v0 = load<SimdReal>(val1_);
decrHsimd(val0_, v0);
for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
}
- ref0 = load(val0_);
- ref1 = load(val1_);
+ ref0 = load<SimdReal>(val0_);
+ ref1 = load<SimdReal>(val1_);
if (align == 2)
{
FloatingPointTolerance tolerance(defaultRealTolerance());
// Use the half-SIMD storage in memory val0_ and val1_.
- v0 = load(val0_);
- v1 = load(val1_);
+ v0 = load<SimdReal>(val0_);
+ v1 = load<SimdReal>(val1_);
sum0 = sum1 = sum2 = sum3 = 0;
for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
{
idata[i] = i+1;
}
- simd = load(idata);
+ simd = load<SimdInt32>(idata);
/* We cannot do a loop here, since
* - C++ gets confused about signed/unsigned if SSE macros are used in EXPECT_EQ()
// Make position 1 incorrect if width>=2, otherwise position 0
// range.first-GMX_REAL_EPS is not invalid. See comment in table.
alignedMem[ (GMX_SIMD_REAL_WIDTH >= 2) ? 1 : 0] = -GMX_REAL_EPS;
- x = load(alignedMem);
+ x = load<SimdReal>(alignedMem);
EXPECT_THROW_GMX(table.evaluateFunctionAndDerivative(x, &func, &der), gmx::RangeError);
// Make position 1 incorrect if width>=2, otherwise position 0
alignedMem[ (GMX_SIMD_REAL_WIDTH >= 2) ? 1 : 0] = range.second;
- x = load(alignedMem);
+ x = load<SimdReal>(alignedMem);
EXPECT_THROW_GMX(table.evaluateFunctionAndDerivative(x, &func, &der), gmx::RangeError);
}
{
alignedMem[i] = range.second*(1.0-GMX_REAL_EPS)*i/(GMX_SIMD_REAL_WIDTH-1);
}
- x = load(alignedMem);
+ x = load<SimdReal>(alignedMem);
EXPECT_NO_THROW_GMX(table.evaluateFunctionAndDerivative(x, &func, &der));
}