Require template parameter for load function
authorRoland Schulz <roland.schulz@intel.com>
Fri, 6 Oct 2017 23:36:50 +0000 (16:36 -0700)
committerRoland Schulz <roland.schulz@intel.com>
Wed, 11 Oct 2017 23:22:14 +0000 (01:22 +0200)
The implicit conversion from load(float*) to both float
and SimdFloat caused multiple issues. The primary ones:
- Extra complexity in the implementation of traits, ArrayRef, SimdReference
- required compiler tests for ambiguity
- SimdReal x = f(load(m)) //confusing broadcast if f is scalar function
- x = s*load(m) //error-prone scalar multiply if s is scalar

New syntax in templated function is load<T>(m) and in non-templated function
load<SimdReal>(m). While this is slightly longer by itself, it is clearer
and doesn't require to store values in tempories (no ambigious overload errors).

Also avoids the need for the load proxies.

Change-Id: I8109e9365e956aaea428ec338b6a810444e03d77

25 files changed:
src/gromacs/ewald/pme-solve.cpp
src/gromacs/listed-forces/bonded.cpp
src/gromacs/listed-forces/pairs.cpp
src/gromacs/mdlib/clincs.cpp
src/gromacs/mdlib/csettle.cpp
src/gromacs/mdlib/nbnxn_atomdata.cpp
src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_outer.h
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_outer.h
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_prune.cpp
src/gromacs/mdlib/nbnxn_search_simd_2xnn.h
src/gromacs/mdlib/nbnxn_search_simd_4xn.h
src/gromacs/pbcutil/pbc-simd.h
src/gromacs/simd/simd.h
src/gromacs/simd/simd_math.h
src/gromacs/simd/tests/CMakeLists.txt
src/gromacs/simd/tests/bootstrap_loadstore.cpp
src/gromacs/simd/tests/scalar.cpp
src/gromacs/simd/tests/simd.cpp
src/gromacs/simd/tests/simd_ambiguous.cpp [deleted file]
src/gromacs/simd/tests/simd_floatingpoint.cpp
src/gromacs/simd/tests/simd_floatingpoint_util.cpp
src/gromacs/simd/tests/simd_integer.cpp
src/gromacs/tables/tests/splinetable.cpp

index e250581ca9e1d01a45e225bd15136ecf104b21ee..e2e33577499faf6c7ed4a9e75288553a6ab62e5d 100644 (file)
@@ -231,8 +231,8 @@ gmx_inline static void calc_exponentials_q(int gmx_unused start, int end, real f
          */
         for (kx = 0; kx < end; kx += GMX_SIMD_REAL_WIDTH)
         {
-            tmp_d1   = load(d_aligned+kx);
-            tmp_r    = load(r_aligned+kx);
+            tmp_d1   = load<SimdReal>(d_aligned+kx);
+            tmp_r    = load<SimdReal>(r_aligned+kx);
             tmp_r    = gmx::exp(tmp_r);
             tmp_e    = f_simd / tmp_d1;
             tmp_e    = tmp_e * tmp_r;
@@ -271,13 +271,13 @@ gmx_inline static void calc_exponentials_lj(int gmx_unused start, int end, real
         /* We only need to calculate from start. But since start is 0 or 1
          * and we want to use aligned loads/stores, we always start from 0.
          */
-        tmp_d = load(d_aligned+kx);
+        tmp_d = load<SimdReal>(d_aligned+kx);
         d_inv = SimdReal(1.0) / tmp_d;
         store(d_aligned+kx, d_inv);
-        tmp_r = load(r_aligned+kx);
+        tmp_r = load<SimdReal>(r_aligned+kx);
         tmp_r = gmx::exp(tmp_r);
         store(r_aligned+kx, tmp_r);
-        tmp_mk  = load(factor_aligned+kx);
+        tmp_mk  = load<SimdReal>(factor_aligned+kx);
         tmp_fac = sqr_PI * tmp_mk * erfc(tmp_mk);
         store(factor_aligned+kx, tmp_fac);
     }
index 4b05207a8015b27bcf0eb1ba777834b056e09ba0..90512069e9bede4a7b9c5681af366580db7e72dd 100644 (file)
@@ -1052,8 +1052,8 @@ angles_noener_simd(int nbonds,
         rkjy_S = yk_S - yj_S;
         rkjz_S = zk_S - zj_S;
 
-        k_S       = load(coeff);
-        theta0_S  = load(coeff+GMX_SIMD_REAL_WIDTH) * deg2rad_S;
+        k_S       = load<SimdReal>(coeff);
+        theta0_S  = load<SimdReal>(coeff+GMX_SIMD_REAL_WIDTH) * deg2rad_S;
 
         pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
         pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
@@ -1926,9 +1926,9 @@ pdihs_noener_simd(int nbonds,
                        &nrkj_n2_S,
                        &p_S, &q_S);
 
-        cp_S     = load(cp);
-        phi0_S   = load(phi0) * deg2rad_S;
-        mult_S   = load(mult);
+        cp_S     = load<SimdReal>(cp);
+        phi0_S   = load<SimdReal>(phi0) * deg2rad_S;
+        mult_S   = load<SimdReal>(mult);
 
         mdphi_S  = fms(mult_S, phi_S, phi0_S);
 
@@ -2054,7 +2054,7 @@ rbdihs_noener_simd(int nbonds,
         cosfac_S  = one_S;
         for (j = 1; j < NR_RBDIHS; j++)
         {
-            parm_S   = load(parm + j*GMX_SIMD_REAL_WIDTH);
+            parm_S   = load<SimdReal>(parm + j*GMX_SIMD_REAL_WIDTH);
             ddphi_S  = fma(c_S * parm_S, cosfac_S, ddphi_S);
             cosfac_S = cosfac_S * cos_S;
             c_S      = c_S + one_S;
index 7835e4589ed834e20ddf2f8dbb387a16f7854234..6d0d1794de6fdb08f421e30662446194bc476d96 100644 (file)
@@ -597,9 +597,9 @@ do_pairs_simple(int nbonds,
         gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), ai, &xi[XX], &xi[YY], &xi[ZZ]);
         gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), aj, &xj[XX], &xj[YY], &xj[ZZ]);
 
-        T c6    = load(coeff + 0*pack_size);
-        T c12   = load(coeff + 1*pack_size);
-        T qq    = load(coeff + 2*pack_size);
+        T c6    = load<T>(coeff + 0*pack_size);
+        T c12   = load<T>(coeff + 1*pack_size);
+        T qq    = load<T>(coeff + 2*pack_size);
 
         /* We could save these operations by storing 6*C6,12*C12 */
         c6             = six*c6;
index 7a0d823b96be4757fb6a27ea86d2c469ed202a47..336e75b4aa9858105d64dc0828fd36bc2352dcfd 100644 (file)
@@ -488,7 +488,7 @@ calc_dr_x_f_simd(int                       b0,
 
         ip_S  = iprod(rx_S, ry_S, rz_S, fx_S, fy_S, fz_S);
 
-        rhs_S = load(blc + bs) * ip_S;
+        rhs_S = load<SimdReal>(blc + bs) * ip_S;
 
         store(rhs + bs, rhs_S);
         store(sol + bs, rhs_S);
@@ -744,7 +744,7 @@ calc_dr_x_xp_simd(int                       b0,
 
         ip_S  = iprod(rx_S, ry_S, rz_S, rxp_S, ryp_S, rzp_S);
 
-        rhs_S = load(blc + bs) * (ip_S - load(bllen + bs));
+        rhs_S = load<SimdReal>(blc + bs) * (ip_S - load<SimdReal>(bllen + bs));
 
         store(rhs + bs, rhs_S);
         store(sol + bs, rhs_S);
@@ -854,7 +854,7 @@ calc_dist_iter_simd(int                       b0,
 
         n2_S    = norm2(rx_S, ry_S, rz_S);
 
-        len_S   = load(bllen + bs);
+        len_S   = load<SimdReal>(bllen + bs);
         len2_S  = len_S * len_S;
 
         dlen2_S = fms(two_S, len2_S, n2_S);
@@ -869,7 +869,7 @@ calc_dist_iter_simd(int                       b0,
 
         lc_S    = fnma(dlen2_S, invsqrt(dlen2_S), len_S);
 
-        blc_S   = load(blc + bs);
+        blc_S   = load<SimdReal>(blc + bs);
 
         lc_S    = blc_S * lc_S;
 
@@ -1010,8 +1010,8 @@ static void do_lincs(rvec *x, rvec *xp, matrix box, t_pbc *pbc,
 #if GMX_SIMD_HAVE_REAL
     for (b = b0; b < b1; b += GMX_SIMD_REAL_WIDTH)
     {
-        SimdReal t1 = load(blc + b);
-        SimdReal t2 = load(sol + b);
+        SimdReal t1 = load<SimdReal>(blc + b);
+        SimdReal t2 = load<SimdReal>(sol + b);
         store(mlambda + b, t1 * t2);
     }
 #else
@@ -1068,11 +1068,11 @@ static void do_lincs(rvec *x, rvec *xp, matrix box, t_pbc *pbc,
 #if GMX_SIMD_HAVE_REAL
         for (b = b0; b < b1; b += GMX_SIMD_REAL_WIDTH)
         {
-            SimdReal t1  = load(blc + b);
-            SimdReal t2  = load(sol + b);
+            SimdReal t1  = load<SimdReal>(blc + b);
+            SimdReal t2  = load<SimdReal>(sol + b);
             SimdReal mvb = t1 * t2;
             store(blc_sol + b, mvb);
-            store(mlambda + b, load(mlambda + b) + mvb);
+            store(mlambda + b, load<SimdReal>(mlambda + b) + mvb);
         }
 #else
         for (b = b0; b < b1; b++)
index 1dbaff73fae5746eca598ffd21d196c57a440ae3..354c475b95e16b9ae0731e285db15b113c672065 100644 (file)
@@ -717,7 +717,7 @@ static void settleTemplate(const gmx_settledata_t settled,
             if (bCalcVirial)
             {
                 /* Filter out the non-local settles */
-                T filter = load(settled->virfac + i);
+                T filter = load<T>(settled->virfac + i);
                 T mOf    = filter*mO;
                 T mHf    = filter*mH;
 
index 2a66fad79b8a9c356d39d81cca04b5bc43e5bf6b..ed10300eedb16fe59c8011783450345d047399fe 100644 (file)
@@ -1191,10 +1191,10 @@ nbnxn_atomdata_reduce_reals_simd(real gmx_unused * gmx_restrict dest,
     {
         for (int i = i0; i < i1; i += GMX_SIMD_REAL_WIDTH)
         {
-            dest_SSE = load(dest+i);
+            dest_SSE = load<SimdReal>(dest+i);
             for (int s = 0; s < nsrc; s++)
             {
-                src_SSE  = load(src[s]+i);
+                src_SSE  = load<SimdReal>(src[s]+i);
                 dest_SSE = dest_SSE + src_SSE;
             }
             store(dest+i, dest_SSE);
@@ -1204,10 +1204,10 @@ nbnxn_atomdata_reduce_reals_simd(real gmx_unused * gmx_restrict dest,
     {
         for (int i = i0; i < i1; i += GMX_SIMD_REAL_WIDTH)
         {
-            dest_SSE = load(src[0]+i);
+            dest_SSE = load<SimdReal>(src[0]+i);
             for (int s = 1; s < nsrc; s++)
             {
-                src_SSE  = load(src[s]+i);
+                src_SSE  = load<SimdReal>(src[s]+i);
                 dest_SSE = dest_SSE + src_SSE;
             }
             store(dest+i, dest_SSE);
index c346e7a9de3561285352b732abb36eba67a0834b..337f9860c48378a1c2c933150609789b522c1c80 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 #endif
 
     /* Load j-i for the first i */
-    diagonal_jmi_S    = load(nbat->simd_2xnn_diagonal_j_minus_i);
+    diagonal_jmi_S    = load<SimdReal>(nbat->simd_2xnn_diagonal_j_minus_i);
     /* Generate all the diagonal masks as comparison results */
 #if UNROLLI == UNROLLJ
     diagonal_mask_S0  = (zero_S < diagonal_jmi_S);
      * matter, as long as both filter and mask data are treated the same way.
      */
 #if GMX_SIMD_HAVE_INT32_LOGICAL
-    filter_S0 = load(reinterpret_cast<const int *>(exclusion_filter + 0*UNROLLJ));
-    filter_S2 = load(reinterpret_cast<const int *>(exclusion_filter + 2*UNROLLJ));
+    filter_S0 = load<SimdBitMask>(reinterpret_cast<const int *>(exclusion_filter + 0*UNROLLJ));
+    filter_S2 = load<SimdBitMask>(reinterpret_cast<const int *>(exclusion_filter + 2*UNROLLJ));
 #else
-    filter_S0 = load(reinterpret_cast<const real *>(exclusion_filter + 0*UNROLLJ));
-    filter_S2 = load(reinterpret_cast<const real *>(exclusion_filter + 2*UNROLLJ));
+    filter_S0 = load<SimdBitMask>(reinterpret_cast<const real *>(exclusion_filter + 0*UNROLLJ));
+    filter_S2 = load<SimdBitMask>(reinterpret_cast<const real *>(exclusion_filter + 2*UNROLLJ));
 #endif
 
 #ifdef CALC_COUL_RF
         pvdw_c12[2*UNROLLJ+jp] = nbat->nbfp[0*2+1];
         pvdw_c12[3*UNROLLJ+jp] = nbat->nbfp[0*2+1];
     }
-    SimdReal c6_S0  = load(pvdw_c6 +0*UNROLLJ);
-    SimdReal c6_S1  = load(pvdw_c6 +1*UNROLLJ);
-    SimdReal c6_S2  = load(pvdw_c6 +2*UNROLLJ);
-    SimdReal c6_S3  = load(pvdw_c6 +3*UNROLLJ);
-
-    SimdReal c12_S0 = load(pvdw_c12+0*UNROLLJ);
-    SimdReal c12_S1 = load(pvdw_c12+1*UNROLLJ);
-    SimdReal c12_S2 = load(pvdw_c12+2*UNROLLJ);
-    SimdReal c12_S3 = load(pvdw_c12+3*UNROLLJ);
+    SimdReal c6_S0  = load<SimdReal>(pvdw_c6 +0*UNROLLJ);
+    SimdReal c6_S1  = load<SimdReal>(pvdw_c6 +1*UNROLLJ);
+    SimdReal c6_S2  = load<SimdReal>(pvdw_c6 +2*UNROLLJ);
+    SimdReal c6_S3  = load<SimdReal>(pvdw_c6 +3*UNROLLJ);
+
+    SimdReal c12_S0 = load<SimdReal>(pvdw_c12+0*UNROLLJ);
+    SimdReal c12_S1 = load<SimdReal>(pvdw_c12+1*UNROLLJ);
+    SimdReal c12_S2 = load<SimdReal>(pvdw_c12+2*UNROLLJ);
+    SimdReal c12_S3 = load<SimdReal>(pvdw_c12+3*UNROLLJ);
 #endif /* FIX_LJ_C */
 
 #ifdef ENERGY_GROUPS
index 9833f7a6a171b92b776353c8cf60cae24547ddb4..4b1f3646a20cfd618b1ef164de54b0ef32aef9be 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -85,7 +85,7 @@ static gmx_inline void add_ener_grp(SimdReal e_S, real *v, const int *offset_jj)
     {
         SimdReal v_S;
 
-        v_S = load(v+offset_jj[jj]+jj*GMX_SIMD_REAL_WIDTH);
+        v_S = load<SimdReal>(v+offset_jj[jj]+jj*GMX_SIMD_REAL_WIDTH);
         store(v+offset_jj[jj]+jj*GMX_SIMD_REAL_WIDTH, v_S + e_S);
     }
 }
@@ -138,10 +138,10 @@ gmx_load_simd_4xn_interactions(int                               excl,
     // Neither real or integer bitwise logical operations supported.
     // Load masks from memory instead.
     SimdReal      zero = setZero();
-    *interact_S0  = ( zero < load( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (0 * UNROLLJ)) & 0xF) ) );
-    *interact_S1  = ( zero < load( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (1 * UNROLLJ)) & 0xF) ) );
-    *interact_S2  = ( zero < load( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (2 * UNROLLJ)) & 0xF) ) );
-    *interact_S3  = ( zero < load( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (3 * UNROLLJ)) & 0xF) ) );
+    *interact_S0  = ( zero < load<SimdReal>( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (0 * UNROLLJ)) & 0xF) ) );
+    *interact_S1  = ( zero < load<SimdReal>( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (1 * UNROLLJ)) & 0xF) ) );
+    *interact_S2  = ( zero < load<SimdReal>( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (2 * UNROLLJ)) & 0xF) ) );
+    *interact_S3  = ( zero < load<SimdReal>( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (3 * UNROLLJ)) & 0xF) ) );
 #endif
 }
 
index bbd25513eefdccdd5a7a750fd408117fbdf785f0..f0b0b7a8bc66ecefd3207ce4db75377b391a0248 100644 (file)
 #endif /* CHECK_EXCLS */
 
     /* load j atom coordinates */
-    jx_S        = load(x+ajx);
-    jy_S        = load(x+ajy);
-    jz_S        = load(x+ajz);
+    jx_S        = load<SimdReal>(x+ajx);
+    jy_S        = load<SimdReal>(x+ajy);
+    jz_S        = load<SimdReal>(x+ajz);
 
     /* Calculate distance */
     dx_S0       = ix_S0 - jx_S;
 
 #ifdef CALC_COULOMB
     /* Load parameters for j atom */
-    jq_S        = load(q+aj);
+    jq_S        = load<SimdReal>(q+aj);
     qq_S0       = iq_S0 * jq_S;
     qq_S1       = iq_S1 * jq_S;
     qq_S2       = iq_S2 * jq_S;
 #endif /* not defined any LJ rule */
 
 #ifdef LJ_COMB_GEOM
-    c6s_j_S     = load(ljc+aj2+0);
-    c12s_j_S    = load(ljc+aj2+STRIDE);
+    c6s_j_S     = load<SimdReal>(ljc+aj2+0);
+    c12s_j_S    = load<SimdReal>(ljc+aj2+STRIDE);
     SimdReal c6_S0  = c6s_S0 * c6s_j_S;
     SimdReal c6_S1  = c6s_S1 * c6s_j_S;
 #ifndef HALF_LJ
 #endif /* LJ_COMB_GEOM */
 
 #ifdef LJ_COMB_LB
-    hsig_j_S    = load(ljc+aj2+0);
-    seps_j_S    = load(ljc+aj2+STRIDE);
+    hsig_j_S    = load<SimdReal>(ljc+aj2+0);
+    seps_j_S    = load<SimdReal>(ljc+aj2+STRIDE);
 
     sig_S0      = hsig_i_S0 + hsig_j_S;
     sig_S1      = hsig_i_S1 + hsig_j_S;
 #endif
 
         /* Determine C6 for the grid using the geometric combination rule */
-        c6s_j_S         = load(ljc+aj2+0);
+        c6s_j_S         = load<SimdReal>(ljc+aj2+0);
         c6grid_S0       = c6s_S0 * c6s_j_S;
         c6grid_S1       = c6s_S1 * c6s_j_S;
 #ifndef HALF_LJ
     fiz_S3      = fiz_S3 + tz_S3;
 
     /* Decrement j atom force */
-    store(f+ajx, load(f+ajx) - (tx_S0 + tx_S1 + tx_S2 + tx_S3));
-    store(f+ajy, load(f+ajy) - (ty_S0 + ty_S1 + ty_S2 + ty_S3));
-    store(f+ajz, load(f+ajz) - (tz_S0 + tz_S1 + tz_S2 + tz_S3));
+    store(f+ajx, load<SimdReal>(f+ajx) - (tx_S0 + tx_S1 + tx_S2 + tx_S3));
+    store(f+ajy, load<SimdReal>(f+ajy) - (ty_S0 + ty_S1 + ty_S2 + ty_S3));
+    store(f+ajz, load<SimdReal>(f+ajz) - (tz_S0 + tz_S1 + tz_S2 + tz_S3));
 }
 
 #undef  rinv_ex_S0
index e7e5842086a0487e5e3efb6eaa49f4cfbe8afc41..167379c455f1ca241dca3abcbed6bf492036a517 100644 (file)
 #endif
 
     /* Load j-i for the first i */
-    diagonal_jmi_S    = load(nbat->simd_4xn_diagonal_j_minus_i);
+    diagonal_jmi_S    = load<SimdReal>(nbat->simd_4xn_diagonal_j_minus_i);
     /* Generate all the diagonal masks as comparison results */
 #if UNROLLI == UNROLLJ
     diagonal_mask_S0  = (zero_S < diagonal_jmi_S);
 
 #if UNROLLI == 2*UNROLLJ
     /* Load j-i for the second half of the j-cluster */
-    diagonal_jmi_S    = load(nbat->simd_4xn_diagonal_j_minus_i + UNROLLJ);
+    diagonal_jmi_S    = load<SimdReal>(nbat->simd_4xn_diagonal_j_minus_i + UNROLLJ);
 #endif
 
     diagonal_mask1_S0 = (zero_S < diagonal_jmi_S);
      * matter, as long as both filter and mask data are treated the same way.
      */
 #if GMX_SIMD_HAVE_INT32_LOGICAL
-    filter_S0 = load(reinterpret_cast<const int *>(exclusion_filter + 0*UNROLLJ));
-    filter_S1 = load(reinterpret_cast<const int *>(exclusion_filter + 1*UNROLLJ));
-    filter_S2 = load(reinterpret_cast<const int *>(exclusion_filter + 2*UNROLLJ));
-    filter_S3 = load(reinterpret_cast<const int *>(exclusion_filter + 3*UNROLLJ));
+    filter_S0 = load<SimdBitMask>(reinterpret_cast<const int *>(exclusion_filter + 0*UNROLLJ));
+    filter_S1 = load<SimdBitMask>(reinterpret_cast<const int *>(exclusion_filter + 1*UNROLLJ));
+    filter_S2 = load<SimdBitMask>(reinterpret_cast<const int *>(exclusion_filter + 2*UNROLLJ));
+    filter_S3 = load<SimdBitMask>(reinterpret_cast<const int *>(exclusion_filter + 3*UNROLLJ));
 #else
-    filter_S0 = load(reinterpret_cast<const real *>(exclusion_filter + 0*UNROLLJ));
-    filter_S1 = load(reinterpret_cast<const real *>(exclusion_filter + 1*UNROLLJ));
-    filter_S2 = load(reinterpret_cast<const real *>(exclusion_filter + 2*UNROLLJ));
-    filter_S3 = load(reinterpret_cast<const real *>(exclusion_filter + 3*UNROLLJ));
+    filter_S0 = load<SimdBitMask>(reinterpret_cast<const real *>(exclusion_filter + 0*UNROLLJ));
+    filter_S1 = load<SimdBitMask>(reinterpret_cast<const real *>(exclusion_filter + 1*UNROLLJ));
+    filter_S2 = load<SimdBitMask>(reinterpret_cast<const real *>(exclusion_filter + 2*UNROLLJ));
+    filter_S3 = load<SimdBitMask>(reinterpret_cast<const real *>(exclusion_filter + 3*UNROLLJ));
 #endif
 
 #ifdef CALC_COUL_RF
         pvdw_c12[2*UNROLLJ+jp] = nbat->nbfp[0*2+1];
         pvdw_c12[3*UNROLLJ+jp] = nbat->nbfp[0*2+1];
     }
-    SimdReal c6_S0  = simdLoad(pvdw_c6 +0*UNROLLJ);
-    SimdReal c6_S1  = simdLoad(pvdw_c6 +1*UNROLLJ);
-    SimdReal c6_S2  = simdLoad(pvdw_c6 +2*UNROLLJ);
-    SimdReal c6_S3  = simdLoad(pvdw_c6 +3*UNROLLJ);
-
-    SimdReal c12_S0 = simdLoad(pvdw_c12+0*UNROLLJ);
-    SimdReal c12_S1 = simdLoad(pvdw_c12+1*UNROLLJ);
-    SimdReal c12_S2 = simdLoad(pvdw_c12+2*UNROLLJ);
-    SimdReal c12_S3 = simdLoad(pvdw_c12+3*UNROLLJ);
+    SimdReal c6_S0  = load<SimdReal>(pvdw_c6 +0*UNROLLJ);
+    SimdReal c6_S1  = load<SimdReal>(pvdw_c6 +1*UNROLLJ);
+    SimdReal c6_S2  = load<SimdReal>(pvdw_c6 +2*UNROLLJ);
+    SimdReal c6_S3  = load<SimdReal>(pvdw_c6 +3*UNROLLJ);
+
+    SimdReal c12_S0 = load<SimdReal>(pvdw_c12+0*UNROLLJ);
+    SimdReal c12_S1 = load<SimdReal>(pvdw_c12+1*UNROLLJ);
+    SimdReal c12_S2 = load<SimdReal>(pvdw_c12+2*UNROLLJ);
+    SimdReal c12_S3 = load<SimdReal>(pvdw_c12+3*UNROLLJ);
 #endif /* FIX_LJ_C */
 
 #ifdef ENERGY_GROUPS
index d6b42038a55a84d22ff56fb36bc5de9e1d223cc0..d59d88e94df330a9c5cddee359973184b41d1dda 100644 (file)
@@ -127,9 +127,9 @@ nbnxn_kernel_prune_4xn(nbnxn_pairlist_t *         nbl,
             int ajz     = ajy + STRIDE;
 
             /* load j atom coordinates */
-            SimdReal jx_S   = load(x + ajx);
-            SimdReal jy_S   = load(x + ajy);
-            SimdReal jz_S   = load(x + ajz);
+            SimdReal jx_S   = load<SimdReal>(x + ajx);
+            SimdReal jy_S   = load<SimdReal>(x + ajy);
+            SimdReal jz_S   = load<SimdReal>(x + ajz);
 
             /* Calculate distance */
             SimdReal dx_S0  = ix_S0 - jx_S;
index 70b89f37a98f692d9ad85222ab307e9fdcdd2db7..733c030f678ee67527c9b7a39a93d9658516ad48 100644 (file)
@@ -144,12 +144,12 @@ makeClusterListSimd2xnn(const nbnxn_grid_t *      gridj,
             jz_S  = loadDuplicateHsimd(x_j + xind_f + 2*STRIDE_S);
 
             /* Calculate distance */
-            dx_S0            = load(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S0            = load(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S0            = load(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH) - jz_S;
-            dx_S2            = load(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S2            = load(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S2            = load(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S0            = load<SimdReal>(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S0            = load<SimdReal>(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S0            = load<SimdReal>(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S2            = load<SimdReal>(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S2            = load<SimdReal>(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S2            = load<SimdReal>(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH) - jz_S;
 
             /* rsq = dx*dx+dy*dy+dz*dz */
             rsq_S0           = norm2(dx_S0, dy_S0, dz_S0);
@@ -202,12 +202,12 @@ makeClusterListSimd2xnn(const nbnxn_grid_t *      gridj,
             jz_S  = loadDuplicateHsimd(x_j + xind_l + 2*STRIDE_S);
 
             /* Calculate distance */
-            dx_S0            = load(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S0            = load(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S0            = load(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH) - jz_S;
-            dx_S2            = load(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S2            = load(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S2            = load(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S0            = load<SimdReal>(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S0            = load<SimdReal>(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S0            = load<SimdReal>(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S2            = load<SimdReal>(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S2            = load<SimdReal>(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S2            = load<SimdReal>(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH) - jz_S;
 
             /* rsq = dx*dx+dy*dy+dz*dz */
             rsq_S0           = norm2(dx_S0, dy_S0, dz_S0);
index afe21147c6b8bf716d0502a1914971c4a058f8a9..5432c3ccfb27b264992ae40660af6093ad4df7a6 100644 (file)
@@ -158,24 +158,24 @@ makeClusterListSimd4xn(const nbnxn_grid_t *      gridj,
         {
             xind_f  = xIndexFromCj<NbnxnLayout::Simd4xN>(cjFromCi<NbnxnLayout::Simd4xN>(gridj->cell0) + jclusterFirst);
 
-            jx_S  = load(x_j + xind_f + 0*STRIDE_S);
-            jy_S  = load(x_j + xind_f + 1*STRIDE_S);
-            jz_S  = load(x_j + xind_f + 2*STRIDE_S);
+            jx_S  = load<SimdReal>(x_j + xind_f + 0*STRIDE_S);
+            jy_S  = load<SimdReal>(x_j + xind_f + 1*STRIDE_S);
+            jz_S  = load<SimdReal>(x_j + xind_f + 2*STRIDE_S);
 
 
             /* Calculate distance */
-            dx_S0            = load(x_ci_simd +  0*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S0            = load(x_ci_simd +  1*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S0            = load(x_ci_simd +  2*GMX_SIMD_REAL_WIDTH) - jz_S;
-            dx_S1            = load(x_ci_simd +  3*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S1            = load(x_ci_simd +  4*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S1            = load(x_ci_simd +  5*GMX_SIMD_REAL_WIDTH) - jz_S;
-            dx_S2            = load(x_ci_simd +  6*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S2            = load(x_ci_simd +  7*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S2            = load(x_ci_simd +  8*GMX_SIMD_REAL_WIDTH) - jz_S;
-            dx_S3            = load(x_ci_simd +  9*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S3            = load(x_ci_simd + 10*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S3            = load(x_ci_simd + 11*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S0            = load<SimdReal>(x_ci_simd +  0*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S0            = load<SimdReal>(x_ci_simd +  1*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S0            = load<SimdReal>(x_ci_simd +  2*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S1            = load<SimdReal>(x_ci_simd +  3*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S1            = load<SimdReal>(x_ci_simd +  4*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S1            = load<SimdReal>(x_ci_simd +  5*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S2            = load<SimdReal>(x_ci_simd +  6*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S2            = load<SimdReal>(x_ci_simd +  7*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S2            = load<SimdReal>(x_ci_simd +  8*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S3            = load<SimdReal>(x_ci_simd +  9*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S3            = load<SimdReal>(x_ci_simd + 10*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S3            = load<SimdReal>(x_ci_simd + 11*GMX_SIMD_REAL_WIDTH) - jz_S;
 
             /* rsq = dx*dx+dy*dy+dz*dz */
             rsq_S0           = norm2(dx_S0, dy_S0, dz_S0);
@@ -229,23 +229,23 @@ makeClusterListSimd4xn(const nbnxn_grid_t *      gridj,
         {
             xind_l  = xIndexFromCj<NbnxnLayout::Simd4xN>(cjFromCi<NbnxnLayout::Simd4xN>(gridj->cell0) + jclusterLast);
 
-            jx_S  = load(x_j +xind_l + 0*STRIDE_S);
-            jy_S  = load(x_j +xind_l + 1*STRIDE_S);
-            jz_S  = load(x_j +xind_l + 2*STRIDE_S);
+            jx_S  = load<SimdReal>(x_j +xind_l + 0*STRIDE_S);
+            jy_S  = load<SimdReal>(x_j +xind_l + 1*STRIDE_S);
+            jz_S  = load<SimdReal>(x_j +xind_l + 2*STRIDE_S);
 
             /* Calculate distance */
-            dx_S0            = load(x_ci_simd +  0*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S0            = load(x_ci_simd +  1*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S0            = load(x_ci_simd +  2*GMX_SIMD_REAL_WIDTH) - jz_S;
-            dx_S1            = load(x_ci_simd +  3*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S1            = load(x_ci_simd +  4*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S1            = load(x_ci_simd +  5*GMX_SIMD_REAL_WIDTH) - jz_S;
-            dx_S2            = load(x_ci_simd +  6*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S2            = load(x_ci_simd +  7*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S2            = load(x_ci_simd +  8*GMX_SIMD_REAL_WIDTH) - jz_S;
-            dx_S3            = load(x_ci_simd +  9*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S3            = load(x_ci_simd + 10*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S3            = load(x_ci_simd + 11*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S0            = load<SimdReal>(x_ci_simd +  0*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S0            = load<SimdReal>(x_ci_simd +  1*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S0            = load<SimdReal>(x_ci_simd +  2*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S1            = load<SimdReal>(x_ci_simd +  3*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S1            = load<SimdReal>(x_ci_simd +  4*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S1            = load<SimdReal>(x_ci_simd +  5*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S2            = load<SimdReal>(x_ci_simd +  6*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S2            = load<SimdReal>(x_ci_simd +  7*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S2            = load<SimdReal>(x_ci_simd +  8*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S3            = load<SimdReal>(x_ci_simd +  9*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S3            = load<SimdReal>(x_ci_simd + 10*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S3            = load<SimdReal>(x_ci_simd + 11*GMX_SIMD_REAL_WIDTH) - jz_S;
 
             /* rsq = dx*dx+dy*dy+dz*dz */
             rsq_S0           = norm2(dx_S0, dy_S0, dz_S0);
index 893f20296cf4a2948811ed0ce2a9acc951d4f290..709901e113f8a0fcf6f38e5f9d3d3d864ef85247 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2015,2016,2017, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -91,17 +91,17 @@ pbc_correct_dx_simd(SimdReal         *dx,
 {
     SimdReal shz, shy, shx;
 
-    shz = round(*dz * load(pbc_simd+0*GMX_SIMD_REAL_WIDTH)); // load inv_bzz
-    *dx = *dx - shz * load(pbc_simd+1*GMX_SIMD_REAL_WIDTH);  // load bzx
-    *dy = *dy - shz * load(pbc_simd+2*GMX_SIMD_REAL_WIDTH);  // load bzy
-    *dz = *dz - shz * load(pbc_simd+3*GMX_SIMD_REAL_WIDTH);  // load bzz
+    shz = round(*dz * load<SimdReal>(pbc_simd+0*GMX_SIMD_REAL_WIDTH)); // load inv_bzz
+    *dx = *dx - shz * load<SimdReal>(pbc_simd+1*GMX_SIMD_REAL_WIDTH);  // load bzx
+    *dy = *dy - shz * load<SimdReal>(pbc_simd+2*GMX_SIMD_REAL_WIDTH);  // load bzy
+    *dz = *dz - shz * load<SimdReal>(pbc_simd+3*GMX_SIMD_REAL_WIDTH);  // load bzz
 
-    shy = round(*dy * load(pbc_simd+4*GMX_SIMD_REAL_WIDTH)); // load inv_byy
-    *dx = *dx - shy * load(pbc_simd+5*GMX_SIMD_REAL_WIDTH);  // load byx
-    *dy = *dy - shy * load(pbc_simd+6*GMX_SIMD_REAL_WIDTH);  // load byy
+    shy = round(*dy * load<SimdReal>(pbc_simd+4*GMX_SIMD_REAL_WIDTH)); // load inv_byy
+    *dx = *dx - shy * load<SimdReal>(pbc_simd+5*GMX_SIMD_REAL_WIDTH);  // load byx
+    *dy = *dy - shy * load<SimdReal>(pbc_simd+6*GMX_SIMD_REAL_WIDTH);  // load byy
 
-    shx = round(*dx * load(pbc_simd+7*GMX_SIMD_REAL_WIDTH)); // load inv_bxx
-    *dx = *dx - shx * load(pbc_simd+8*GMX_SIMD_REAL_WIDTH);  // load bxx
+    shx = round(*dx * load<SimdReal>(pbc_simd+7*GMX_SIMD_REAL_WIDTH)); // load inv_bxx
+    *dx = *dx - shx * load<SimdReal>(pbc_simd+8*GMX_SIMD_REAL_WIDTH);  // load bxx
 
 }
 
index b5ea65038257154808d7d6ebf1aead93cfe6696f..0871aac1a565a38f07c9d8ee6704d61fd9d79abf 100644 (file)
@@ -398,194 +398,116 @@ typedef Simd4FBool                Simd4Bool;
 
 //! \}  end of name-group describing SIMD data types
 
-//Traits of Simd. Works for float and double but NOT for int.
+/*! \name High-level SIMD proxy objects to disambiguate load/set operations
+ * \{
+ */
+
+/*! \libinternal \brief Simd traits */
 template<typename T>
 struct SimdTraits {};
-//This does not work for int because int32_t maps to two simd types and we can't base
-//it on the SIMD type because it doesn't exist if there is no support
 
+#if GMX_SIMD_HAVE_FLOAT
 template<>
-struct SimdTraits<float>
+struct SimdTraits<SimdFloat>
 {
-#if GMX_SIMD_HAVE_FLOAT
+    using type = float;
     static constexpr int width = GMX_SIMD_FLOAT_WIDTH;
-    using type                 = SimdFloat;
-#else
-    static constexpr int width = 1;
-#endif
+    using tag = SimdFloatTag;
 };
-
+#endif
+#if GMX_SIMD_HAVE_DOUBLE
 template<>
-struct SimdTraits<double>
+struct SimdTraits<SimdDouble>
 {
-#if GMX_SIMD_HAVE_DOUBLE
+    using type = double;
     static constexpr int width = GMX_SIMD_DOUBLE_WIDTH;
-    using type                 = SimdDouble;
-#else
-    static constexpr int width = 1;
-#endif
+    using tag = SimdDoubleTag;
 };
-
-template<typename T>
-struct SimdTraits<const T> : public SimdTraits<T> {};
-
-/*! \name High-level SIMD proxy objects to disambiguate load/set operations
- * \{
- */
-template <typename T> //can be either float/double/int, each const or non-const
-class SimdLoadProxyInternal;
-
-template<typename T>
-static inline const SimdLoadProxyInternal<T> gmx_simdcall
-load(T *m);
-
-template <typename T, size_t N>
-static inline const SimdLoadProxyInternal<const T> gmx_simdcall
-load(const AlignedArray<T, N> &m);
-
-/*! \libinternal \brief Proxy object to enable load() for SIMD and equivalent basic type
- *
- * This object is returned by the load() function that takes a single pointer
- * to a float/double. When the result is assigned to either SimdFloat/Double or float/double/int,
- * the appropriate conversion method will be executed, which in turn calls
- * the correct low-level load function.
- * In practice this simply means you can use load() regardless for both SIMD
- * and non-SIMD data in templated functions.
- *
- * This is an internal class which should never be constructed directly. The constructor is private
- * so that only the load function can construct it.
- */
-template <typename T>
-class SimdLoadProxyInternal
-{
-    template<typename U>
-    using IsIntType = std::is_same<std::int32_t, typename std::remove_const<U>::type>;
-
-    public:
-        //! \brief Conversion method that will execute load of scalar basic type
-        operator T() const { return *m_; }
-        //! \brief Conversion method that will execute load of SimdFloat/Double
-        template<typename U = T>  //Always U=T. Indirection needed for SFINAE
-                                  //Disabled if type doesn't exist (unsupported or int)
-        operator typename SimdTraits<U>::type() const { return simdLoad(m_); }
-
+#endif
 #if GMX_SIMD_HAVE_FLOAT
-        //! \brief Conversion method that will execute load of SimdFInt32
-        template<typename U = T, typename = typename std::enable_if<IsIntType<U>::value>::type>
-        operator SimdFInt32() const { return simdLoad(m_, SimdFInt32Tag()); }
+template<>
+struct SimdTraits<SimdFInt32>
+{
+    using type = int;
+    static constexpr int width = GMX_SIMD_FINT32_WIDTH;
+    using tag = SimdFInt32Tag;
+};
 #endif
 #if GMX_SIMD_HAVE_DOUBLE
-        //! \brief Conversion method that will execute load of SimdDInt32
-        template<typename U = T, typename = typename std::enable_if<IsIntType<U>::value>::type>
-        operator SimdDInt32() const { return simdLoad(m_, SimdDInt32Tag()); }
+template<>
+struct SimdTraits<SimdDInt32>
+{
+    using type = int;
+    static constexpr int width = GMX_SIMD_DINT32_WIDTH;
+    using tag = SimdDInt32Tag;
+};
 #endif
 
-    private:
-        //! \brief Private constructor can only be called from load()
-        SimdLoadProxyInternal(T *m) : m_(m) {}
-
-        template<typename U>
-        friend const SimdLoadProxyInternal<U> gmx_simdcall
-        load(U *m);
-        template <typename U, size_t N>
-        friend const SimdLoadProxyInternal<const U> gmx_simdcall
-        load(const AlignedArray<U, N> &m);
-
-        T* const m_; //!< The pointer used to load memory
+template<typename T>
+struct SimdTraits<const T>
+{
+    using type = const typename SimdTraits<T>::type;
+    static constexpr int width = SimdTraits<T>::width;
+    using tag = typename SimdTraits<T>::tag;
 };
 
-/*! \brief Load function that returns proxy object for SimdFloat/Double/Int and basic type
+/*! \brief Load function that returns SIMD or scalar
  *
- * \param m Pointer to load memory
- * \return Proxy object that will call the actual load for either SimdFloat/Double/Int
- *         or basic scalar type when you assign it and the conversion method is called.
+ * \tparam T Type to load (type is always mandatory)
+ * \param  m Pointer to aligned memory
+ * \return   Loaded value
  */
 template<typename T>
-static inline const SimdLoadProxyInternal<T> gmx_simdcall
-load(T *m)
+static inline T
+load(const typename SimdTraits<T>::type *m) //disabled by SFINAE for non-SIMD types
 {
-    return {
-               m
-    };
+    return simdLoad(m, typename SimdTraits<T>::tag());
 }
 
-template <typename T, size_t N>
-static inline const SimdLoadProxyInternal<const T> gmx_simdcall
-load(const AlignedArray<T, N> &m)
+template<typename T>
+static inline T
+/* the enable_if serves to prevent two different type of misuse:
+ * 1) load<SimdReal>(SimdReal*); should only be called on real* or int*
+ * 2) load(real*); template parameter is mandatory because otherwise ambiguity is
+ *    created. The dependent type disables type deduction.
+ */
+load(const typename std::enable_if<std::is_arithmetic<T>::value, T>::type *m)
 {
-    return {
-               m.data()
-    };
+    return *m;
 }
 
-template <typename T> //can be either float/double/int, each const or non-const
-class SimdLoadUProxyInternal;
-
-template<typename T>
-static inline const SimdLoadUProxyInternal<T> gmx_simdcall
-loadU(T *m);
+template <typename T, size_t N>
+static inline T gmx_simdcall
+load(const AlignedArray<typename SimdTraits<T>::type, N> &m)
+{
+    return simdLoad(m.data(), typename SimdTraits<T>::tag());
+}
 
-/*! \libinternal \brief Proxy object to enable loadU() for SIMD and equivalent basic type
- *
- * This object is returned by the loadU() function that takes a single pointer
- * to a float/double. When the result is assigned to either SimdFloat/Double or float/double/int,
- * the appropriate conversion method will be executed, which in turn calls
- * the correct low-level load function.
- * In practice this simply means you can use load() regardless for both SIMD
- * and non-SIMD data in templated functions.
+/*! \brief Load function that returns SIMD or scalar based on template argument
  *
- * This is an internal class which should never be constructed directly. The constructor is private
- * so that only the load function can construct it.
+ * \tparam T Type to load (type is always mandatory)
+ * \param m Pointer to unaligned memory
+ * \return Loaded SimdFloat/Double/Int or basic scalar type
  */
-template <typename T>
-class SimdLoadUProxyInternal
+template<typename T>
+static inline T
+loadU(const typename SimdTraits<T>::type *m)
 {
-    template<typename U>
-    using IsIntType = std::is_same<std::int32_t, typename std::remove_const<U>::type>;
-
-    public:
-        //! \brief Conversion method that will execute loadU of scalar basic type
-        operator T() const { return *m_; }
-        //! \brief Conversion method that will execute loadU of SimdFloat/Double
-        template<typename U = T>  //Always U=T. Indirection needed for SFINAE.
-                                  //Disabled if type doesn't exist (unsupported or int)
-        operator typename SimdTraits<U>::type() const { return simdLoadU(m_); }
-
-#if GMX_SIMD_HAVE_FLOAT
-        //! \brief Conversion method that will execute loadU of SimdFInt32
-        template<typename U = T, typename = typename std::enable_if<IsIntType<U>::value>::type>
-        operator SimdFInt32() const { return simdLoadU(m_, SimdFInt32Tag()); }
-#endif
-#if GMX_SIMD_HAVE_DOUBLE
-        //! \brief Conversion method that will execute loadU of SimdDInt32
-        template<typename U = T, typename = typename std::enable_if<IsIntType<U>::value>::type>
-        operator SimdDInt32() const { return simdLoadU(m_, SimdDInt32Tag()); }
-#endif
-
-    private:
-        //! \brief Private constructor can only be called from loadU()
-        SimdLoadUProxyInternal(T *m) : m_(m) {}
-
-        template<typename U>
-        friend const SimdLoadUProxyInternal<U> gmx_simdcall
-        loadU(U *m);
-
-        T* const m_; //!< The pointer used to load memory
-};
+    return simdLoadU(m, typename SimdTraits<T>::tag());
+}
 
-/*! \brief LoadU function that returns proxy object for SimdFloat/Double/Int and basic type
- *
- * \param m Pointer to load memory
- * \return Proxy object that will call the actual unaligned load for either SimdFloat/Double/Int
- *         or basic scalar type when you assign it and the conversion method is called.
- */
 template<typename T>
-static inline const SimdLoadUProxyInternal<T> gmx_simdcall
-loadU(T *m)
+static inline T
+loadU(const typename std::enable_if<std::is_arithmetic<T>::value, T>::type *m)
+{
+    return *m;
+}
+
+template <typename T, size_t N>
+static inline T gmx_simdcall
+loadU(const AlignedArray<typename SimdTraits<T>::type, N> &m)
 {
-    return {
-               m
-    };
+    return simdLoadU(m.data(), typename SimdTraits<T>::tag());
 }
 
 class SimdSetZeroProxyInternal;
index 22a7232dfc3e6588c14fc43926b8124ea86c791c..ad519574b1683cb8bd410d2ceece516f34bc64d0 100644 (file)
@@ -806,7 +806,7 @@ erfc(SimdFloat x)
         conv.i  = conv.i & isieve;
         mem[i]  = conv.f;
     }
-    z = load(mem);
+    z = load<SimdFloat>(mem);
 #endif
     q       = (z-y) * (z+y);
     corr    = fma(CD4, q, CD3);
index f7c1c8a2714704ce6a8a0e5629a22995a1025c5e..b651f5f4e1c2ea7217627dd09aa4b6e7679f852a 100644 (file)
@@ -49,25 +49,3 @@ gmx_add_unit_test(SimdUnitTests simd-test
                   scalar.cpp
                   scalar_util.cpp
                   scalar_math.cpp)
-
-# Add tests for expressions which are supposed to not compile when the
-# build configuration supports a real implementation of the SIMD
-# module. See tests/simd_ambiguous.cpp for documentation.
-set(AMBIGUOUS_COMPILATION_EXPECTED_TO_FAIL TRUE)
-if(GMX_SIMD_ACTIVE STREQUAL "NONE")
-    set(AMBIGUOUS_COMPILATION_EXPECTED_TO_FAIL FALSE)
-endif()
-foreach(TEST_PREC float double)
-    foreach(TEST_FUNC exp exp2 log inv cos sin sqrt)
-        set(TEST_NAME simd_ambiguous_${TEST_PREC}_${TEST_FUNC})
-        add_executable(${TEST_NAME} simd_ambiguous.cpp)
-        set_target_properties(${TEST_NAME} PROPERTIES
-                              EXCLUDE_FROM_ALL TRUE
-                              EXCLUDE_FROM_DEFAULT_BUILD TRUE)
-        target_compile_definitions(${TEST_NAME} PRIVATE TEST_FUNC=${TEST_FUNC} TEST_PREC=${TEST_PREC})
-        add_test(NAME ${TEST_NAME}
-            COMMAND ${CMAKE_COMMAND} --build . --target ${TEST_NAME} --config $<CONFIGURATION>
-            WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
-        set_tests_properties(${TEST_NAME} PROPERTIES WILL_FAIL ${AMBIGUOUS_COMPILATION_EXPECTED_TO_FAIL})
-    endforeach()
-endforeach()
index fd1f68c64f3b93d6b98a60a57993c0174b1d2040..5bbdb1d35f80ea3d75806e08c5add85c7d33891e 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -134,7 +134,7 @@ loadStoreTester(TSimd gmx_simdcall loadFn(const T* mem), void gmx_simdcall store
  * \param  m      Memory address to load from
  */
 template <typename T, typename TSimd> TSimd gmx_simdcall
-loadWrapper(const T * m) { return load(m); }
+loadWrapper(const T * m) { return load<TSimd>(m); }
 
 /*! \brief Wrapper to handle proxy objects returned by some loadU functions.
  *
@@ -143,7 +143,7 @@ loadWrapper(const T * m) { return load(m); }
  * \param  m      Memory address to load from
  */
 template <typename T, typename TSimd> TSimd gmx_simdcall
-loadUWrapper(const T * m) { return loadU(m); }
+loadUWrapper(const T * m) { return loadU<TSimd>(m); }
 
 
 #if GMX_SIMD_HAVE_REAL
index 540d3b1398795aa0af7187cc792bf52fd8fecdb3..cfd77f153b95f21248398826d51a7adaa3b609e8 100644 (file)
@@ -62,14 +62,14 @@ namespace
 
 TEST(SimdScalarTest, load)
 {
-    real val = load(&c1);
+    real val = load<real>(&c1);
 
     EXPECT_EQ(c1, val);
 }
 
 TEST(SimdScalarTest, loadU)
 {
-    real val = loadU(&c1);
+    real val = loadU<real>(&c1);
 
     EXPECT_EQ(c1, val);
 }
@@ -253,7 +253,7 @@ TEST(SimdScalarTest, cvtD2D)
 TEST(SimdScalarTest, loadI)
 {
     std::int32_t ref = 42;
-    std::int32_t val = load(&ref);
+    std::int32_t val = load<int32_t>(&ref);
 
     EXPECT_EQ(ref, val);
 }
@@ -261,7 +261,7 @@ TEST(SimdScalarTest, loadI)
 TEST(SimdScalarTest, loadUI)
 {
     std::int32_t ref = 42;
-    std::int32_t val = load(&ref);
+    std::int32_t val = loadU<int32_t>(&ref);
 
     EXPECT_EQ(ref, val);
 }
index 6aa0fa04ff311658760d41b9d77ebf203ca1ebfc..2dcc77214bb47320e7d8950e5c15051da763db1d 100644 (file)
@@ -141,7 +141,7 @@ vector2SimdReal(const std::vector<real> &v)
     {
         mem[i] = v[i % v.size()];  // repeat vector contents to fill simd width
     }
-    return load(mem);
+    return load<SimdReal>(mem);
 }
 
 SimdReal
@@ -199,7 +199,7 @@ vector2SimdInt(const std::vector<int> &v)
     {
         mem[i] = v[i % v.size()];  // repeat vector contents to fill simd width
     }
-    return load(mem);
+    return load<SimdInt32>(mem);
 }
 
 SimdInt32
diff --git a/src/gromacs/simd/tests/simd_ambiguous.cpp b/src/gromacs/simd/tests/simd_ambiguous.cpp
deleted file mode 100644 (file)
index 072cf9e..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include "gromacs/simd/simd.h"
-#include "gromacs/simd/simd_math.h"
-
-/* Test that math functions which can be used both with scalar and SIMD
- * are ambiguous when applied to value returned from load.
- *
- * gmx::load returns a proxy/reference object which can be casted to either
- * a scalar (e.g. float) or a SIMD value (e.g. SIMDFloat). The gmx math
- * functions (e.g. sqrt) take both a scalar and a SIMD value as an argument.
- * Thus e.g. load(sqrt(m)) should be ambiguous. This test makes sure that
- * this does not compile. This got previously broken by introducing templates
- * which influenced the overload resolution.
- *
- * The test execution code in CMakeLists.txt tests that the code doesn't
- * compile with a SIMD implementation. To test that this code does correctly
- * compile besides causing the ambiguous overload error, it expects to
- * correctly compile for a a non-simd build. For such a build the
- * code is non-ambiguous because only the scalar version exists.
- *
- * The test execution code passes either float/double as TEST_PREC and the math
- * function to test as TEST_FUNC. Both are passed as compile definitions.
- * The file is compiled once for each combination when executing ctest and
- * the test fails if the file compiles.
- *
- * Possible extensions: Test all other math functions including those taking
- * multiple arguments.
- */
-int main()
-{
-    /* We cannot check for SIMD float or double support at cmake,
-     * only for general SIMD support. Therefore with SIMD, but without
-     * SIMD float or double support we make the compilation fail with
-     * a static_assert instead of the ambiguous overload error
-     */
-    constexpr bool testFloat       = std::is_same<TEST_PREC, float>::value;
-    constexpr bool testDouble      = std::is_same<TEST_PREC, double>::value;
-    constexpr bool haveSimdSupport = GMX_SIMD;
-    constexpr bool haveSimdFloat   = GMX_SIMD_HAVE_FLOAT;
-    constexpr bool haveSimdDouble  = GMX_SIMD_HAVE_DOUBLE;
-    static_assert(!haveSimdSupport || !testFloat || haveSimdFloat, "Assertion failure to make test fail without SIMD float support");
-    static_assert(!haveSimdSupport || !testDouble || haveSimdDouble, "Assertion failure to make test fail without SIMD double support");
-
-    TEST_PREC  d = 0;
-    TEST_PREC *m = &d;
-    gmx::TEST_FUNC(gmx::load(m));
-}
index fe728444ee11d0d4cd625e1fa4169e57f80cb53f..f8bbc56098d5a8a3253ad86fe0331a1ccbe88d71 100644 (file)
@@ -464,7 +464,7 @@ TEST_F(SimdFloatingpointTest, cvtFloat2Double)
         f[i] = i * (1.0 + 100*GMX_FLOAT_EPS);
     }
 
-    vf = load(f);
+    vf = load<SimdFloat>(f);
 #if (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH)
     SimdDouble vd1;
     cvtF2DD(vf, &vd0, &vd1);
@@ -499,9 +499,9 @@ TEST_F(SimdFloatingpointTest, cvtDouble2Float)
         d[i] = i * (1.0 + 100*GMX_FLOAT_EPS);
     }
 
-    vd0 = load(d);
+    vd0 = load<SimdDouble>(d);
 #if (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH)
-    SimdDouble vd1 = load(d + GMX_SIMD_DOUBLE_WIDTH); // load upper half of data
+    SimdDouble vd1 = load<SimdDouble>(d + GMX_SIMD_DOUBLE_WIDTH); // load upper half of data
     vf = cvtDD2F(vd0, vd1);
 #elif (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
     vf = cvtD2F(vd0);
index 8d288f77c69be35d16996814f26aed1aa1906faf..e9205eab0e55fb0af9e76dd52c6dc219034a9052 100644 (file)
@@ -137,10 +137,10 @@ TEST_F(SimdFloatingpointUtilTest, gatherLoadTranspose4)
             mem0_[align * offset_[j] + 3] = val3_[j];
         }
 
-        ref0 = load(val0_);
-        ref1 = load(val1_);
-        ref2 = load(val2_);
-        ref3 = load(val3_);
+        ref0 = load<SimdReal>(val0_);
+        ref1 = load<SimdReal>(val1_);
+        ref2 = load<SimdReal>(val2_);
+        ref3 = load<SimdReal>(val3_);
 
         if (align == 4)
         {
@@ -185,8 +185,8 @@ TEST_F(SimdFloatingpointUtilTest, gatherLoadTranspose2)
             mem0_[align * offset_[j] + 1] = val1_[j];
         }
 
-        ref0 = load(val0_);
-        ref1 = load(val1_);
+        ref0 = load<SimdReal>(val0_);
+        ref1 = load<SimdReal>(val1_);
 
         if (align == 2)
         {
@@ -228,9 +228,9 @@ TEST_F(SimdFloatingpointUtilTest, gatherLoadUTranspose3)
             mem0_[align * offset_[j] + 2] = val2_[j];
         }
 
-        ref0 = load(val0_);
-        ref1 = load(val1_);
-        ref2 = load(val2_);
+        ref0 = load<SimdReal>(val0_);
+        ref1 = load<SimdReal>(val1_);
+        ref2 = load<SimdReal>(val2_);
 
         if (align == 3)
         {
@@ -279,9 +279,9 @@ TEST_F(SimdFloatingpointUtilTest, transposeScatterStoreU3)
             refmem[align * offset_[j] + 2] = val2_[j];
         }
 
-        v0 = load(val0_);
-        v1 = load(val1_);
-        v2 = load(val2_);
+        v0 = load<SimdReal>(val0_);
+        v1 = load<SimdReal>(val1_);
+        v2 = load<SimdReal>(val2_);
 
         if (align == 3)
         {
@@ -331,9 +331,9 @@ TEST_F(SimdFloatingpointUtilTest, transposeScatterIncrU3)
             refmem[align * offset_[j] + 2] += val2_[j];
         }
 
-        v0 = load(val0_);
-        v1 = load(val1_);
-        v2 = load(val2_);
+        v0 = load<SimdReal>(val0_);
+        v1 = load<SimdReal>(val1_);
+        v2 = load<SimdReal>(val2_);
 
         if (align == 3)
         {
@@ -382,9 +382,9 @@ TEST_F(SimdFloatingpointUtilTest, transposeScatterIncrU3Overlapping)
         refmem[3 * offset_[j] + 2] += val2_[j];
     }
 
-    v0 = load(val0_);
-    v1 = load(val1_);
-    v2 = load(val2_);
+    v0 = load<SimdReal>(val0_);
+    v1 = load<SimdReal>(val1_);
+    v2 = load<SimdReal>(val2_);
 
     transposeScatterIncrU<3>(mem0_, offset_, v0, v1, v2);
 
@@ -422,9 +422,9 @@ TEST_F(SimdFloatingpointUtilTest, transposeScatterDecrU3)
             refmem[align * offset_[j] + 2] -= val2_[j];
         }
 
-        v0 = load(val0_);
-        v1 = load(val1_);
-        v2 = load(val2_);
+        v0 = load<SimdReal>(val0_);
+        v1 = load<SimdReal>(val1_);
+        v2 = load<SimdReal>(val2_);
 
         if (align == 3)
         {
@@ -473,9 +473,9 @@ TEST_F(SimdFloatingpointUtilTest, transposeScatterDecrU3Overlapping)
         refmem[3 * offset_[j] + 2] -= val2_[j];
     }
 
-    v0 = load(val0_);
-    v1 = load(val1_);
-    v2 = load(val2_);
+    v0 = load<SimdReal>(val0_);
+    v1 = load<SimdReal>(val1_);
+    v2 = load<SimdReal>(val2_);
 
     transposeScatterDecrU<3>(mem0_, offset_, v0, v1, v2);
 
@@ -495,7 +495,7 @@ TEST_F(SimdFloatingpointUtilTest, expandScalarsToTriplets)
         mem0_[i] = i;
     }
 
-    vs = load(mem0_);
+    vs = load<SimdReal>(mem0_);
 
     expandScalarsToTriplets(vs, &v0, &v1, &v2);
 
@@ -532,11 +532,11 @@ TEST_F(SimdFloatingpointUtilTest, gatherLoadBySimdIntTranspose4)
             mem0_[align * offset_[j] + 3] = val3_[j];
         }
 
-        simdoffset = load(offset_);
-        ref0       = load(val0_);
-        ref1       = load(val1_);
-        ref2       = load(val2_);
-        ref3       = load(val3_);
+        simdoffset = load<SimdInt32>(offset_);
+        ref0       = load<SimdReal>(val0_);
+        ref1       = load<SimdReal>(val1_);
+        ref2       = load<SimdReal>(val2_);
+        ref3       = load<SimdReal>(val3_);
 
         if (align == 4)
         {
@@ -581,9 +581,9 @@ TEST_F(SimdFloatingpointUtilTest, gatherLoadBySimdIntTranspose2)
             mem0_[align * offset_[j] + 1] = val1_[j];
         }
 
-        simdoffset = load(offset_);
-        ref0       = load(val0_);
-        ref1       = load(val1_);
+        simdoffset = load<SimdInt32>(offset_);
+        ref0       = load<SimdReal>(val0_);
+        ref1       = load<SimdReal>(val1_);
 
         if (align == 4)
         {
@@ -626,9 +626,9 @@ TEST_F(SimdFloatingpointUtilTest, gatherLoadUBySimdIntTranspose2)
             mem0_[align * offset_[j] + 1] = val1_[j];
         }
 
-        simdoffset = load(offset_);
-        ref0       = load(val0_);
-        ref1       = load(val1_);
+        simdoffset = load<SimdInt32>(offset_);
+        ref0       = load<SimdReal>(val0_);
+        ref1       = load<SimdReal>(val1_);
 
         if (align == 1)
         {
@@ -660,10 +660,10 @@ TEST_F(SimdFloatingpointUtilTest, reduceIncr4Sum)
     real                              sum0, sum1, sum2, sum3, tstsum;
     FloatingPointTolerance            tolerance(defaultRealTolerance());
 
-    v0 = load(val0_);
-    v1 = load(val1_);
-    v2 = load(val2_);
-    v3 = load(val3_);
+    v0 = load<SimdReal>(val0_);
+    v1 = load<SimdReal>(val1_);
+    v2 = load<SimdReal>(val2_);
+    v3 = load<SimdReal>(val3_);
 
     sum0 = sum1 = sum2 = sum3 = 0;
     for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
@@ -699,7 +699,7 @@ TEST_F(SimdFloatingpointUtilTest, loadDualHsimd)
     // Point p to the upper half of val0_
     real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
 
-    v0 = load(val0_);
+    v0 = load<SimdReal>(val0_);
     v1 = loadDualHsimd(val0_, p);
 
     GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
@@ -717,7 +717,7 @@ TEST_F(SimdFloatingpointUtilTest, loadDuplicateHsimd)
         p[i] = val0_[i];
     }
 
-    v0 = load(val0_);
+    v0 = load<SimdReal>(val0_);
     v1 = loadDuplicateHsimd(val0_);
 
     GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
@@ -739,7 +739,7 @@ TEST_F(SimdFloatingpointUtilTest, load1DualHsimd)
         p[i]     = data[1];
     }
 
-    v0 = load(val0_);
+    v0 = load<SimdReal>(val0_);
     v1 = load1DualHsimd(data);
 
     GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
@@ -754,7 +754,7 @@ TEST_F(SimdFloatingpointUtilTest, storeDualHsimd)
     // Point p to the upper half of val0_
     real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
 
-    v0 = load(val2_);
+    v0 = load<SimdReal>(val2_);
     storeDualHsimd(val0_, p, v0);
 
     for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
@@ -777,7 +777,7 @@ TEST_F(SimdFloatingpointUtilTest, incrDualHsimd)
     // Point p to the upper half of val0_
     real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
 
-    v0 = load(val2_);
+    v0 = load<SimdReal>(val2_);
     incrDualHsimd(val0_, p, v0);
 
     for (std::size_t i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
@@ -797,7 +797,7 @@ TEST_F(SimdFloatingpointUtilTest, incrDualHsimdOverlapping)
         reference[i] = val0_[i] + val2_[i] + val2_[GMX_SIMD_REAL_WIDTH/2+i];
     }
 
-    v0 = load(val2_);
+    v0 = load<SimdReal>(val2_);
     incrDualHsimd(val0_, val0_, v0);
 
     for (std::size_t i = 0; i < GMX_SIMD_REAL_WIDTH/2; i++)
@@ -820,7 +820,7 @@ TEST_F(SimdFloatingpointUtilTest, decrHsimd)
         ref[i] = val0_[i] - ( val1_[i] + p[i] );
     }
 
-    v0 = load(val1_);
+    v0 = load<SimdReal>(val1_);
     decrHsimd(val0_, v0);
 
     for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
@@ -853,8 +853,8 @@ TEST_F(SimdFloatingpointUtilTest, gatherLoadTranspose2Hsimd)
 
         }
 
-        ref0 = load(val0_);
-        ref1 = load(val1_);
+        ref0 = load<SimdReal>(val0_);
+        ref1 = load<SimdReal>(val1_);
 
         if (align == 2)
         {
@@ -887,8 +887,8 @@ TEST_F(SimdFloatingpointUtilTest, reduceIncr4SumHsimd)
     FloatingPointTolerance            tolerance(defaultRealTolerance());
 
     // Use the half-SIMD storage in memory val0_ and val1_.
-    v0 = load(val0_);
-    v1 = load(val1_);
+    v0 = load<SimdReal>(val0_);
+    v1 = load<SimdReal>(val1_);
 
     sum0 = sum1 = sum2 = sum3 = 0;
     for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
index 7dd555bbbd15daee5f37010d9f710dd174daf618..f468af1f190113c06a05a54020dd9e8777abfeca 100644 (file)
@@ -147,7 +147,7 @@ TEST_F(SimdIntegerTest, extract)
     {
         idata[i] = i+1;
     }
-    simd = load(idata);
+    simd = load<SimdInt32>(idata);
 
     /* We cannot do a loop here, since
      * - C++ gets confused about signed/unsigned if SSE macros are used in EXPECT_EQ()
index 02b7ea73a2d13e299add9836e9ac78d7f9845bb4..e54a52d0d6971be7955ce295960926a85961b6f2 100644 (file)
@@ -725,13 +725,13 @@ TYPED_TEST(SplineTableTest, CatchesOutOfRangeValuesSimd)
     // Make position 1 incorrect if width>=2, otherwise position 0
     // range.first-GMX_REAL_EPS is not invalid. See comment in table.
     alignedMem[ (GMX_SIMD_REAL_WIDTH >= 2) ? 1 : 0] = -GMX_REAL_EPS;
-    x = load(alignedMem);
+    x = load<SimdReal>(alignedMem);
 
     EXPECT_THROW_GMX(table.evaluateFunctionAndDerivative(x, &func, &der), gmx::RangeError);
 
     // Make position 1 incorrect if width>=2, otherwise position 0
     alignedMem[ (GMX_SIMD_REAL_WIDTH >= 2) ? 1 : 0] = range.second;
-    x = load(alignedMem);
+    x = load<SimdReal>(alignedMem);
 
     EXPECT_THROW_GMX(table.evaluateFunctionAndDerivative(x, &func, &der), gmx::RangeError);
 }
@@ -749,7 +749,7 @@ TYPED_TEST(SplineTableTest, AcceptsInRangeValuesSimd)
     {
         alignedMem[i] = range.second*(1.0-GMX_REAL_EPS)*i/(GMX_SIMD_REAL_WIDTH-1);
     }
-    x = load(alignedMem);
+    x = load<SimdReal>(alignedMem);
 
     EXPECT_NO_THROW_GMX(table.evaluateFunctionAndDerivative(x, &func, &der));
 }