Require template parameter for load function

author Roland Schulz <roland.schulz@intel.com>

Fri, 6 Oct 2017 23:36:50 +0000 (16:36 -0700)

committer Roland Schulz <roland.schulz@intel.com>

Wed, 11 Oct 2017 23:22:14 +0000 (01:22 +0200)
author Roland Schulz <roland.schulz@intel.com>
Fri, 6 Oct 2017 23:36:50 +0000 (16:36 -0700)
committer Roland Schulz <roland.schulz@intel.com>
Wed, 11 Oct 2017 23:22:14 +0000 (01:22 +0200)
diff --git a/src/gromacs/ewald/pme-solve.cpp b/src/gromacs/ewald/pme-solve.cpp

index e250581ca9e1d01a45e225bd15136ecf104b21ee..e2e33577499faf6c7ed4a9e75288553a6ab62e5d 100644 (file)
--- a/src/gromacs/ewald/pme-solve.cpp
+++ b/src/gromacs/ewald/pme-solve.cpp
@@ -231,8 +231,8 @@ gmx_inline static void calc_exponentials_q(int gmx_unused start, int end, real f
           */
          for (kx = 0; kx < end; kx += GMX_SIMD_REAL_WIDTH)
          {
-            tmp_d1   = load(d_aligned+kx);
-            tmp_r    = load(r_aligned+kx);
+            tmp_d1   = load<SimdReal>(d_aligned+kx);
+            tmp_r    = load<SimdReal>(r_aligned+kx);
              tmp_r    = gmx::exp(tmp_r);
              tmp_e    = f_simd / tmp_d1;
              tmp_e    = tmp_e * tmp_r;
@@ -271,13 +271,13 @@ gmx_inline static void calc_exponentials_lj(int gmx_unused start, int end, real
          /* We only need to calculate from start. But since start is 0 or 1
           * and we want to use aligned loads/stores, we always start from 0.
           */
-        tmp_d = load(d_aligned+kx);
+        tmp_d = load<SimdReal>(d_aligned+kx);
          d_inv = SimdReal(1.0) / tmp_d;
          store(d_aligned+kx, d_inv);
-        tmp_r = load(r_aligned+kx);
+        tmp_r = load<SimdReal>(r_aligned+kx);
          tmp_r = gmx::exp(tmp_r);
          store(r_aligned+kx, tmp_r);
-        tmp_mk  = load(factor_aligned+kx);
+        tmp_mk  = load<SimdReal>(factor_aligned+kx);
          tmp_fac = sqr_PI * tmp_mk * erfc(tmp_mk);
          store(factor_aligned+kx, tmp_fac);
      }
diff --git a/src/gromacs/listed-forces/bonded.cpp b/src/gromacs/listed-forces/bonded.cpp

index 4b05207a8015b27bcf0eb1ba777834b056e09ba0..90512069e9bede4a7b9c5681af366580db7e72dd 100644 (file)
--- a/src/gromacs/listed-forces/bonded.cpp
+++ b/src/gromacs/listed-forces/bonded.cpp
@@ -1052,8 +1052,8 @@ angles_noener_simd(int nbonds,
          rkjy_S = yk_S - yj_S;
          rkjz_S = zk_S - zj_S;
  
-        k_S       = load(coeff);
-        theta0_S  = load(coeff+GMX_SIMD_REAL_WIDTH) * deg2rad_S;
+        k_S       = load<SimdReal>(coeff);
+        theta0_S  = load<SimdReal>(coeff+GMX_SIMD_REAL_WIDTH) * deg2rad_S;
  
          pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
          pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
@@ -1926,9 +1926,9 @@ pdihs_noener_simd(int nbonds,
                         &nrkj_n2_S,
                         &p_S, &q_S);
  
-        cp_S     = load(cp);
-        phi0_S   = load(phi0) * deg2rad_S;
-        mult_S   = load(mult);
+        cp_S     = load<SimdReal>(cp);
+        phi0_S   = load<SimdReal>(phi0) * deg2rad_S;
+        mult_S   = load<SimdReal>(mult);
  
          mdphi_S  = fms(mult_S, phi_S, phi0_S);
  
@@ -2054,7 +2054,7 @@ rbdihs_noener_simd(int nbonds,
          cosfac_S  = one_S;
          for (j = 1; j < NR_RBDIHS; j++)
          {
-            parm_S   = load(parm + j*GMX_SIMD_REAL_WIDTH);
+            parm_S   = load<SimdReal>(parm + j*GMX_SIMD_REAL_WIDTH);
              ddphi_S  = fma(c_S * parm_S, cosfac_S, ddphi_S);
              cosfac_S = cosfac_S * cos_S;
              c_S      = c_S + one_S;
diff --git a/src/gromacs/listed-forces/pairs.cpp b/src/gromacs/listed-forces/pairs.cpp

index 7835e4589ed834e20ddf2f8dbb387a16f7854234..6d0d1794de6fdb08f421e30662446194bc476d96 100644 (file)
--- a/src/gromacs/listed-forces/pairs.cpp
+++ b/src/gromacs/listed-forces/pairs.cpp
@@ -597,9 +597,9 @@ do_pairs_simple(int nbonds,
          gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), ai, &xi[XX], &xi[YY], &xi[ZZ]);
          gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), aj, &xj[XX], &xj[YY], &xj[ZZ]);
  
-        T c6    = load(coeff + 0*pack_size);
-        T c12   = load(coeff + 1*pack_size);
-        T qq    = load(coeff + 2*pack_size);
+        T c6    = load<T>(coeff + 0*pack_size);
+        T c12   = load<T>(coeff + 1*pack_size);
+        T qq    = load<T>(coeff + 2*pack_size);
  
          /* We could save these operations by storing 6*C6,12*C12 */
          c6             = six*c6;
diff --git a/src/gromacs/mdlib/clincs.cpp b/src/gromacs/mdlib/clincs.cpp

index 7a0d823b96be4757fb6a27ea86d2c469ed202a47..336e75b4aa9858105d64dc0828fd36bc2352dcfd 100644 (file)
--- a/src/gromacs/mdlib/clincs.cpp
+++ b/src/gromacs/mdlib/clincs.cpp
@@ -488,7 +488,7 @@ calc_dr_x_f_simd(int                       b0,
  
          ip_S  = iprod(rx_S, ry_S, rz_S, fx_S, fy_S, fz_S);
  
-        rhs_S = load(blc + bs) * ip_S;
+        rhs_S = load<SimdReal>(blc + bs) * ip_S;
  
          store(rhs + bs, rhs_S);
          store(sol + bs, rhs_S);
@@ -744,7 +744,7 @@ calc_dr_x_xp_simd(int                       b0,
  
          ip_S  = iprod(rx_S, ry_S, rz_S, rxp_S, ryp_S, rzp_S);
  
-        rhs_S = load(blc + bs) * (ip_S - load(bllen + bs));
+        rhs_S = load<SimdReal>(blc + bs) * (ip_S - load<SimdReal>(bllen + bs));
  
          store(rhs + bs, rhs_S);
          store(sol + bs, rhs_S);
@@ -854,7 +854,7 @@ calc_dist_iter_simd(int                       b0,
  
          n2_S    = norm2(rx_S, ry_S, rz_S);
  
-        len_S   = load(bllen + bs);
+        len_S   = load<SimdReal>(bllen + bs);
          len2_S  = len_S * len_S;
  
          dlen2_S = fms(two_S, len2_S, n2_S);
@@ -869,7 +869,7 @@ calc_dist_iter_simd(int                       b0,
  
          lc_S    = fnma(dlen2_S, invsqrt(dlen2_S), len_S);
  
-        blc_S   = load(blc + bs);
+        blc_S   = load<SimdReal>(blc + bs);
  
          lc_S    = blc_S * lc_S;
  
@@ -1010,8 +1010,8 @@ static void do_lincs(rvec *x, rvec *xp, matrix box, t_pbc *pbc,
  #if GMX_SIMD_HAVE_REAL
      for (b = b0; b < b1; b += GMX_SIMD_REAL_WIDTH)
      {
-        SimdReal t1 = load(blc + b);
-        SimdReal t2 = load(sol + b);
+        SimdReal t1 = load<SimdReal>(blc + b);
+        SimdReal t2 = load<SimdReal>(sol + b);
          store(mlambda + b, t1 * t2);
      }
  #else
@@ -1068,11 +1068,11 @@ static void do_lincs(rvec *x, rvec *xp, matrix box, t_pbc *pbc,
  #if GMX_SIMD_HAVE_REAL
          for (b = b0; b < b1; b += GMX_SIMD_REAL_WIDTH)
          {
-            SimdReal t1  = load(blc + b);
-            SimdReal t2  = load(sol + b);
+            SimdReal t1  = load<SimdReal>(blc + b);
+            SimdReal t2  = load<SimdReal>(sol + b);
              SimdReal mvb = t1 * t2;
              store(blc_sol + b, mvb);
-            store(mlambda + b, load(mlambda + b) + mvb);
+            store(mlambda + b, load<SimdReal>(mlambda + b) + mvb);
          }
  #else
          for (b = b0; b < b1; b++)
diff --git a/src/gromacs/mdlib/csettle.cpp b/src/gromacs/mdlib/csettle.cpp

index 1dbaff73fae5746eca598ffd21d196c57a440ae3..354c475b95e16b9ae0731e285db15b113c672065 100644 (file)
--- a/src/gromacs/mdlib/csettle.cpp
+++ b/src/gromacs/mdlib/csettle.cpp
@@ -717,7 +717,7 @@ static void settleTemplate(const gmx_settledata_t settled,
              if (bCalcVirial)
              {
                  /* Filter out the non-local settles */
-                T filter = load(settled->virfac + i);
+                T filter = load<T>(settled->virfac + i);
                  T mOf    = filter*mO;
                  T mHf    = filter*mH;
  
diff --git a/src/gromacs/mdlib/nbnxn_atomdata.cpp b/src/gromacs/mdlib/nbnxn_atomdata.cpp

index 2a66fad79b8a9c356d39d81cca04b5bc43e5bf6b..ed10300eedb16fe59c8011783450345d047399fe 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_atomdata.cpp
+++ b/src/gromacs/mdlib/nbnxn_atomdata.cpp
@@ -1191,10 +1191,10 @@ nbnxn_atomdata_reduce_reals_simd(real gmx_unused * gmx_restrict dest,
      {
          for (int i = i0; i < i1; i += GMX_SIMD_REAL_WIDTH)
          {
-            dest_SSE = load(dest+i);
+            dest_SSE = load<SimdReal>(dest+i);
              for (int s = 0; s < nsrc; s++)
              {
-                src_SSE  = load(src[s]+i);
+                src_SSE  = load<SimdReal>(src[s]+i);
                  dest_SSE = dest_SSE + src_SSE;
              }
              store(dest+i, dest_SSE);
@@ -1204,10 +1204,10 @@ nbnxn_atomdata_reduce_reals_simd(real gmx_unused * gmx_restrict dest,
      {
          for (int i = i0; i < i1; i += GMX_SIMD_REAL_WIDTH)
          {
-            dest_SSE = load(src[0]+i);
+            dest_SSE = load<SimdReal>(src[0]+i);
              for (int s = 1; s < nsrc; s++)
              {
-                src_SSE  = load(src[s]+i);
+                src_SSE  = load<SimdReal>(src[s]+i);
                  dest_SSE = dest_SSE + src_SSE;
              }
              store(dest+i, dest_SSE);
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_outer.h b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_outer.h

index c346e7a9de3561285352b732abb36eba67a0834b..337f9860c48378a1c2c933150609789b522c1c80 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_outer.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_outer.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -169,7 +169,7 @@
  #endif
  
      /* Load j-i for the first i */
-    diagonal_jmi_S    = load(nbat->simd_2xnn_diagonal_j_minus_i);
+    diagonal_jmi_S    = load<SimdReal>(nbat->simd_2xnn_diagonal_j_minus_i);
      /* Generate all the diagonal masks as comparison results */
  #if UNROLLI == UNROLLJ
      diagonal_mask_S0  = (zero_S < diagonal_jmi_S);
@@ -204,11 +204,11 @@
       * matter, as long as both filter and mask data are treated the same way.
       */
  #if GMX_SIMD_HAVE_INT32_LOGICAL
-    filter_S0 = load(reinterpret_cast<const int *>(exclusion_filter + 0*UNROLLJ));
-    filter_S2 = load(reinterpret_cast<const int *>(exclusion_filter + 2*UNROLLJ));
+    filter_S0 = load<SimdBitMask>(reinterpret_cast<const int *>(exclusion_filter + 0*UNROLLJ));
+    filter_S2 = load<SimdBitMask>(reinterpret_cast<const int *>(exclusion_filter + 2*UNROLLJ));
  #else
-    filter_S0 = load(reinterpret_cast<const real *>(exclusion_filter + 0*UNROLLJ));
-    filter_S2 = load(reinterpret_cast<const real *>(exclusion_filter + 2*UNROLLJ));
+    filter_S0 = load<SimdBitMask>(reinterpret_cast<const real *>(exclusion_filter + 0*UNROLLJ));
+    filter_S2 = load<SimdBitMask>(reinterpret_cast<const real *>(exclusion_filter + 2*UNROLLJ));
  #endif
  
  #ifdef CALC_COUL_RF
@@ -326,15 +326,15 @@
          pvdw_c12[2*UNROLLJ+jp] = nbat->nbfp[0*2+1];
          pvdw_c12[3*UNROLLJ+jp] = nbat->nbfp[0*2+1];
      }
-    SimdReal c6_S0  = load(pvdw_c6 +0*UNROLLJ);
-    SimdReal c6_S1  = load(pvdw_c6 +1*UNROLLJ);
-    SimdReal c6_S2  = load(pvdw_c6 +2*UNROLLJ);
-    SimdReal c6_S3  = load(pvdw_c6 +3*UNROLLJ);
-
-    SimdReal c12_S0 = load(pvdw_c12+0*UNROLLJ);
-    SimdReal c12_S1 = load(pvdw_c12+1*UNROLLJ);
-    SimdReal c12_S2 = load(pvdw_c12+2*UNROLLJ);
-    SimdReal c12_S3 = load(pvdw_c12+3*UNROLLJ);
+    SimdReal c6_S0  = load<SimdReal>(pvdw_c6 +0*UNROLLJ);
+    SimdReal c6_S1  = load<SimdReal>(pvdw_c6 +1*UNROLLJ);
+    SimdReal c6_S2  = load<SimdReal>(pvdw_c6 +2*UNROLLJ);
+    SimdReal c6_S3  = load<SimdReal>(pvdw_c6 +3*UNROLLJ);
+
+    SimdReal c12_S0 = load<SimdReal>(pvdw_c12+0*UNROLLJ);
+    SimdReal c12_S1 = load<SimdReal>(pvdw_c12+1*UNROLLJ);
+    SimdReal c12_S2 = load<SimdReal>(pvdw_c12+2*UNROLLJ);
+    SimdReal c12_S3 = load<SimdReal>(pvdw_c12+3*UNROLLJ);
  #endif /* FIX_LJ_C */
  
  #ifdef ENERGY_GROUPS
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h

index 9833f7a6a171b92b776353c8cf60cae24547ddb4..4b1f3646a20cfd618b1ef164de54b0ef32aef9be 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -85,7 +85,7 @@ static gmx_inline void add_ener_grp(SimdReal e_S, real *v, const int *offset_jj)
      {
          SimdReal v_S;
  
-        v_S = load(v+offset_jj[jj]+jj*GMX_SIMD_REAL_WIDTH);
+        v_S = load<SimdReal>(v+offset_jj[jj]+jj*GMX_SIMD_REAL_WIDTH);
          store(v+offset_jj[jj]+jj*GMX_SIMD_REAL_WIDTH, v_S + e_S);
      }
  }
@@ -138,10 +138,10 @@ gmx_load_simd_4xn_interactions(int                               excl,
      // Neither real or integer bitwise logical operations supported.
      // Load masks from memory instead.
      SimdReal      zero = setZero();
-    *interact_S0  = ( zero < load( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (0 * UNROLLJ)) & 0xF) ) );
-    *interact_S1  = ( zero < load( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (1 * UNROLLJ)) & 0xF) ) );
-    *interact_S2  = ( zero < load( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (2 * UNROLLJ)) & 0xF) ) );
-    *interact_S3  = ( zero < load( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (3 * UNROLLJ)) & 0xF) ) );
+    *interact_S0  = ( zero < load<SimdReal>( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (0 * UNROLLJ)) & 0xF) ) );
+    *interact_S1  = ( zero < load<SimdReal>( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (1 * UNROLLJ)) & 0xF) ) );
+    *interact_S2  = ( zero < load<SimdReal>( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (2 * UNROLLJ)) & 0xF) ) );
+    *interact_S3  = ( zero < load<SimdReal>( simd_interaction_array + GMX_SIMD_REAL_WIDTH*((excl >> (3 * UNROLLJ)) & 0xF) ) );
  #endif
  }
  
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h

index bbd25513eefdccdd5a7a750fd408117fbdf785f0..f0b0b7a8bc66ecefd3207ce4db75377b391a0248 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h
@@ -277,9 +277,9 @@
  #endif /* CHECK_EXCLS */
  
      /* load j atom coordinates */
-    jx_S        = load(x+ajx);
-    jy_S        = load(x+ajy);
-    jz_S        = load(x+ajz);
+    jx_S        = load<SimdReal>(x+ajx);
+    jy_S        = load<SimdReal>(x+ajy);
+    jz_S        = load<SimdReal>(x+ajz);
  
      /* Calculate distance */
      dx_S0       = ix_S0 - jx_S;
@@ -399,7 +399,7 @@
  
  #ifdef CALC_COULOMB
      /* Load parameters for j atom */
-    jq_S        = load(q+aj);
+    jq_S        = load<SimdReal>(q+aj);
      qq_S0       = iq_S0 * jq_S;
      qq_S1       = iq_S1 * jq_S;
      qq_S2       = iq_S2 * jq_S;
@@ -419,8 +419,8 @@
  #endif /* not defined any LJ rule */
  
  #ifdef LJ_COMB_GEOM
-    c6s_j_S     = load(ljc+aj2+0);
-    c12s_j_S    = load(ljc+aj2+STRIDE);
+    c6s_j_S     = load<SimdReal>(ljc+aj2+0);
+    c12s_j_S    = load<SimdReal>(ljc+aj2+STRIDE);
      SimdReal c6_S0  = c6s_S0 * c6s_j_S;
      SimdReal c6_S1  = c6s_S1 * c6s_j_S;
  #ifndef HALF_LJ
@@ -436,8 +436,8 @@
  #endif /* LJ_COMB_GEOM */
  
  #ifdef LJ_COMB_LB
-    hsig_j_S    = load(ljc+aj2+0);
-    seps_j_S    = load(ljc+aj2+STRIDE);
+    hsig_j_S    = load<SimdReal>(ljc+aj2+0);
+    seps_j_S    = load<SimdReal>(ljc+aj2+STRIDE);
  
      sig_S0      = hsig_i_S0 + hsig_j_S;
      sig_S1      = hsig_i_S1 + hsig_j_S;
@@ -946,7 +946,7 @@
  #endif
  
          /* Determine C6 for the grid using the geometric combination rule */
-        c6s_j_S         = load(ljc+aj2+0);
+        c6s_j_S         = load<SimdReal>(ljc+aj2+0);
          c6grid_S0       = c6s_S0 * c6s_j_S;
          c6grid_S1       = c6s_S1 * c6s_j_S;
  #ifndef HALF_LJ
@@ -1172,9 +1172,9 @@
      fiz_S3      = fiz_S3 + tz_S3;
  
      /* Decrement j atom force */
-    store(f+ajx, load(f+ajx) - (tx_S0 + tx_S1 + tx_S2 + tx_S3));
-    store(f+ajy, load(f+ajy) - (ty_S0 + ty_S1 + ty_S2 + ty_S3));
-    store(f+ajz, load(f+ajz) - (tz_S0 + tz_S1 + tz_S2 + tz_S3));
+    store(f+ajx, load<SimdReal>(f+ajx) - (tx_S0 + tx_S1 + tx_S2 + tx_S3));
+    store(f+ajy, load<SimdReal>(f+ajy) - (ty_S0 + ty_S1 + ty_S2 + ty_S3));
+    store(f+ajz, load<SimdReal>(f+ajz) - (tz_S0 + tz_S1 + tz_S2 + tz_S3));
  }
  
  #undef  rinv_ex_S0
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_outer.h b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_outer.h

index e7e5842086a0487e5e3efb6eaa49f4cfbe8afc41..167379c455f1ca241dca3abcbed6bf492036a517 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_outer.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_outer.h
@@ -176,7 +176,7 @@
  #endif
  
      /* Load j-i for the first i */
-    diagonal_jmi_S    = load(nbat->simd_4xn_diagonal_j_minus_i);
+    diagonal_jmi_S    = load<SimdReal>(nbat->simd_4xn_diagonal_j_minus_i);
      /* Generate all the diagonal masks as comparison results */
  #if UNROLLI == UNROLLJ
      diagonal_mask_S0  = (zero_S < diagonal_jmi_S);
@@ -199,7 +199,7 @@
  
  #if UNROLLI == 2*UNROLLJ
      /* Load j-i for the second half of the j-cluster */
-    diagonal_jmi_S    = load(nbat->simd_4xn_diagonal_j_minus_i + UNROLLJ);
+    diagonal_jmi_S    = load<SimdReal>(nbat->simd_4xn_diagonal_j_minus_i + UNROLLJ);
  #endif
  
      diagonal_mask1_S0 = (zero_S < diagonal_jmi_S);
@@ -223,15 +223,15 @@
       * matter, as long as both filter and mask data are treated the same way.
       */
  #if GMX_SIMD_HAVE_INT32_LOGICAL
-    filter_S0 = load(reinterpret_cast<const int *>(exclusion_filter + 0*UNROLLJ));
-    filter_S1 = load(reinterpret_cast<const int *>(exclusion_filter + 1*UNROLLJ));
-    filter_S2 = load(reinterpret_cast<const int *>(exclusion_filter + 2*UNROLLJ));
-    filter_S3 = load(reinterpret_cast<const int *>(exclusion_filter + 3*UNROLLJ));
+    filter_S0 = load<SimdBitMask>(reinterpret_cast<const int *>(exclusion_filter + 0*UNROLLJ));
+    filter_S1 = load<SimdBitMask>(reinterpret_cast<const int *>(exclusion_filter + 1*UNROLLJ));
+    filter_S2 = load<SimdBitMask>(reinterpret_cast<const int *>(exclusion_filter + 2*UNROLLJ));
+    filter_S3 = load<SimdBitMask>(reinterpret_cast<const int *>(exclusion_filter + 3*UNROLLJ));
  #else
-    filter_S0 = load(reinterpret_cast<const real *>(exclusion_filter + 0*UNROLLJ));
-    filter_S1 = load(reinterpret_cast<const real *>(exclusion_filter + 1*UNROLLJ));
-    filter_S2 = load(reinterpret_cast<const real *>(exclusion_filter + 2*UNROLLJ));
-    filter_S3 = load(reinterpret_cast<const real *>(exclusion_filter + 3*UNROLLJ));
+    filter_S0 = load<SimdBitMask>(reinterpret_cast<const real *>(exclusion_filter + 0*UNROLLJ));
+    filter_S1 = load<SimdBitMask>(reinterpret_cast<const real *>(exclusion_filter + 1*UNROLLJ));
+    filter_S2 = load<SimdBitMask>(reinterpret_cast<const real *>(exclusion_filter + 2*UNROLLJ));
+    filter_S3 = load<SimdBitMask>(reinterpret_cast<const real *>(exclusion_filter + 3*UNROLLJ));
  #endif
  
  #ifdef CALC_COUL_RF
@@ -349,15 +349,15 @@
          pvdw_c12[2*UNROLLJ+jp] = nbat->nbfp[0*2+1];
          pvdw_c12[3*UNROLLJ+jp] = nbat->nbfp[0*2+1];
      }
-    SimdReal c6_S0  = simdLoad(pvdw_c6 +0*UNROLLJ);
-    SimdReal c6_S1  = simdLoad(pvdw_c6 +1*UNROLLJ);
-    SimdReal c6_S2  = simdLoad(pvdw_c6 +2*UNROLLJ);
-    SimdReal c6_S3  = simdLoad(pvdw_c6 +3*UNROLLJ);
-
-    SimdReal c12_S0 = simdLoad(pvdw_c12+0*UNROLLJ);
-    SimdReal c12_S1 = simdLoad(pvdw_c12+1*UNROLLJ);
-    SimdReal c12_S2 = simdLoad(pvdw_c12+2*UNROLLJ);
-    SimdReal c12_S3 = simdLoad(pvdw_c12+3*UNROLLJ);
+    SimdReal c6_S0  = load<SimdReal>(pvdw_c6 +0*UNROLLJ);
+    SimdReal c6_S1  = load<SimdReal>(pvdw_c6 +1*UNROLLJ);
+    SimdReal c6_S2  = load<SimdReal>(pvdw_c6 +2*UNROLLJ);
+    SimdReal c6_S3  = load<SimdReal>(pvdw_c6 +3*UNROLLJ);
+
+    SimdReal c12_S0 = load<SimdReal>(pvdw_c12+0*UNROLLJ);
+    SimdReal c12_S1 = load<SimdReal>(pvdw_c12+1*UNROLLJ);
+    SimdReal c12_S2 = load<SimdReal>(pvdw_c12+2*UNROLLJ);
+    SimdReal c12_S3 = load<SimdReal>(pvdw_c12+3*UNROLLJ);
  #endif /* FIX_LJ_C */
  
  #ifdef ENERGY_GROUPS
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_prune.cpp b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_prune.cpp

index d6b42038a55a84d22ff56fb36bc5de9e1d223cc0..d59d88e94df330a9c5cddee359973184b41d1dda 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_prune.cpp
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_prune.cpp
@@ -127,9 +127,9 @@ nbnxn_kernel_prune_4xn(nbnxn_pairlist_t *         nbl,
              int ajz     = ajy + STRIDE;
  
              /* load j atom coordinates */
-            SimdReal jx_S   = load(x + ajx);
-            SimdReal jy_S   = load(x + ajy);
-            SimdReal jz_S   = load(x + ajz);
+            SimdReal jx_S   = load<SimdReal>(x + ajx);
+            SimdReal jy_S   = load<SimdReal>(x + ajy);
+            SimdReal jz_S   = load<SimdReal>(x + ajz);
  
              /* Calculate distance */
              SimdReal dx_S0  = ix_S0 - jx_S;
diff --git a/src/gromacs/mdlib/nbnxn_search_simd_2xnn.h b/src/gromacs/mdlib/nbnxn_search_simd_2xnn.h

index 70b89f37a98f692d9ad85222ab307e9fdcdd2db7..733c030f678ee67527c9b7a39a93d9658516ad48 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_search_simd_2xnn.h
+++ b/src/gromacs/mdlib/nbnxn_search_simd_2xnn.h
@@ -144,12 +144,12 @@ makeClusterListSimd2xnn(const nbnxn_grid_t *      gridj,
              jz_S  = loadDuplicateHsimd(x_j + xind_f + 2*STRIDE_S);
  
              /* Calculate distance */
-            dx_S0            = load(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S0            = load(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S0            = load(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH) - jz_S;
-            dx_S2            = load(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S2            = load(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S2            = load(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S0            = load<SimdReal>(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S0            = load<SimdReal>(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S0            = load<SimdReal>(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S2            = load<SimdReal>(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S2            = load<SimdReal>(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S2            = load<SimdReal>(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH) - jz_S;
  
              /* rsq = dx*dx+dy*dy+dz*dz */
              rsq_S0           = norm2(dx_S0, dy_S0, dz_S0);
@@ -202,12 +202,12 @@ makeClusterListSimd2xnn(const nbnxn_grid_t *      gridj,
              jz_S  = loadDuplicateHsimd(x_j + xind_l + 2*STRIDE_S);
  
              /* Calculate distance */
-            dx_S0            = load(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S0            = load(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S0            = load(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH) - jz_S;
-            dx_S2            = load(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S2            = load(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S2            = load(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S0            = load<SimdReal>(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S0            = load<SimdReal>(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S0            = load<SimdReal>(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S2            = load<SimdReal>(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S2            = load<SimdReal>(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S2            = load<SimdReal>(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH) - jz_S;
  
              /* rsq = dx*dx+dy*dy+dz*dz */
              rsq_S0           = norm2(dx_S0, dy_S0, dz_S0);
diff --git a/src/gromacs/mdlib/nbnxn_search_simd_4xn.h b/src/gromacs/mdlib/nbnxn_search_simd_4xn.h

index afe21147c6b8bf716d0502a1914971c4a058f8a9..5432c3ccfb27b264992ae40660af6093ad4df7a6 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_search_simd_4xn.h
+++ b/src/gromacs/mdlib/nbnxn_search_simd_4xn.h
@@ -158,24 +158,24 @@ makeClusterListSimd4xn(const nbnxn_grid_t *      gridj,
          {
              xind_f  = xIndexFromCj<NbnxnLayout::Simd4xN>(cjFromCi<NbnxnLayout::Simd4xN>(gridj->cell0) + jclusterFirst);
  
-            jx_S  = load(x_j + xind_f + 0*STRIDE_S);
-            jy_S  = load(x_j + xind_f + 1*STRIDE_S);
-            jz_S  = load(x_j + xind_f + 2*STRIDE_S);
+            jx_S  = load<SimdReal>(x_j + xind_f + 0*STRIDE_S);
+            jy_S  = load<SimdReal>(x_j + xind_f + 1*STRIDE_S);
+            jz_S  = load<SimdReal>(x_j + xind_f + 2*STRIDE_S);
  
  
              /* Calculate distance */
-            dx_S0            = load(x_ci_simd +  0*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S0            = load(x_ci_simd +  1*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S0            = load(x_ci_simd +  2*GMX_SIMD_REAL_WIDTH) - jz_S;
-            dx_S1            = load(x_ci_simd +  3*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S1            = load(x_ci_simd +  4*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S1            = load(x_ci_simd +  5*GMX_SIMD_REAL_WIDTH) - jz_S;
-            dx_S2            = load(x_ci_simd +  6*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S2            = load(x_ci_simd +  7*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S2            = load(x_ci_simd +  8*GMX_SIMD_REAL_WIDTH) - jz_S;
-            dx_S3            = load(x_ci_simd +  9*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S3            = load(x_ci_simd + 10*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S3            = load(x_ci_simd + 11*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S0            = load<SimdReal>(x_ci_simd +  0*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S0            = load<SimdReal>(x_ci_simd +  1*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S0            = load<SimdReal>(x_ci_simd +  2*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S1            = load<SimdReal>(x_ci_simd +  3*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S1            = load<SimdReal>(x_ci_simd +  4*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S1            = load<SimdReal>(x_ci_simd +  5*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S2            = load<SimdReal>(x_ci_simd +  6*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S2            = load<SimdReal>(x_ci_simd +  7*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S2            = load<SimdReal>(x_ci_simd +  8*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S3            = load<SimdReal>(x_ci_simd +  9*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S3            = load<SimdReal>(x_ci_simd + 10*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S3            = load<SimdReal>(x_ci_simd + 11*GMX_SIMD_REAL_WIDTH) - jz_S;
  
              /* rsq = dx*dx+dy*dy+dz*dz */
              rsq_S0           = norm2(dx_S0, dy_S0, dz_S0);
@@ -229,23 +229,23 @@ makeClusterListSimd4xn(const nbnxn_grid_t *      gridj,
          {
              xind_l  = xIndexFromCj<NbnxnLayout::Simd4xN>(cjFromCi<NbnxnLayout::Simd4xN>(gridj->cell0) + jclusterLast);
  
-            jx_S  = load(x_j +xind_l + 0*STRIDE_S);
-            jy_S  = load(x_j +xind_l + 1*STRIDE_S);
-            jz_S  = load(x_j +xind_l + 2*STRIDE_S);
+            jx_S  = load<SimdReal>(x_j +xind_l + 0*STRIDE_S);
+            jy_S  = load<SimdReal>(x_j +xind_l + 1*STRIDE_S);
+            jz_S  = load<SimdReal>(x_j +xind_l + 2*STRIDE_S);
  
              /* Calculate distance */
-            dx_S0            = load(x_ci_simd +  0*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S0            = load(x_ci_simd +  1*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S0            = load(x_ci_simd +  2*GMX_SIMD_REAL_WIDTH) - jz_S;
-            dx_S1            = load(x_ci_simd +  3*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S1            = load(x_ci_simd +  4*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S1            = load(x_ci_simd +  5*GMX_SIMD_REAL_WIDTH) - jz_S;
-            dx_S2            = load(x_ci_simd +  6*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S2            = load(x_ci_simd +  7*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S2            = load(x_ci_simd +  8*GMX_SIMD_REAL_WIDTH) - jz_S;
-            dx_S3            = load(x_ci_simd +  9*GMX_SIMD_REAL_WIDTH) - jx_S;
-            dy_S3            = load(x_ci_simd + 10*GMX_SIMD_REAL_WIDTH) - jy_S;
-            dz_S3            = load(x_ci_simd + 11*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S0            = load<SimdReal>(x_ci_simd +  0*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S0            = load<SimdReal>(x_ci_simd +  1*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S0            = load<SimdReal>(x_ci_simd +  2*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S1            = load<SimdReal>(x_ci_simd +  3*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S1            = load<SimdReal>(x_ci_simd +  4*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S1            = load<SimdReal>(x_ci_simd +  5*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S2            = load<SimdReal>(x_ci_simd +  6*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S2            = load<SimdReal>(x_ci_simd +  7*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S2            = load<SimdReal>(x_ci_simd +  8*GMX_SIMD_REAL_WIDTH) - jz_S;
+            dx_S3            = load<SimdReal>(x_ci_simd +  9*GMX_SIMD_REAL_WIDTH) - jx_S;
+            dy_S3            = load<SimdReal>(x_ci_simd + 10*GMX_SIMD_REAL_WIDTH) - jy_S;
+            dz_S3            = load<SimdReal>(x_ci_simd + 11*GMX_SIMD_REAL_WIDTH) - jz_S;
  
              /* rsq = dx*dx+dy*dy+dz*dz */
              rsq_S0           = norm2(dx_S0, dy_S0, dz_S0);
diff --git a/src/gromacs/pbcutil/pbc-simd.h b/src/gromacs/pbcutil/pbc-simd.h

index 893f20296cf4a2948811ed0ce2a9acc951d4f290..709901e113f8a0fcf6f38e5f9d3d3d864ef85247 100644 (file)
--- a/src/gromacs/pbcutil/pbc-simd.h
+++ b/src/gromacs/pbcutil/pbc-simd.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2015,2016,2017, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -91,17 +91,17 @@ pbc_correct_dx_simd(SimdReal         *dx,
  {
      SimdReal shz, shy, shx;
  
-    shz = round(*dz * load(pbc_simd+0*GMX_SIMD_REAL_WIDTH)); // load inv_bzz
-    *dx = *dx - shz * load(pbc_simd+1*GMX_SIMD_REAL_WIDTH);  // load bzx
-    *dy = *dy - shz * load(pbc_simd+2*GMX_SIMD_REAL_WIDTH);  // load bzy
-    *dz = *dz - shz * load(pbc_simd+3*GMX_SIMD_REAL_WIDTH);  // load bzz
+    shz = round(*dz * load<SimdReal>(pbc_simd+0*GMX_SIMD_REAL_WIDTH)); // load inv_bzz
+    *dx = *dx - shz * load<SimdReal>(pbc_simd+1*GMX_SIMD_REAL_WIDTH);  // load bzx
+    *dy = *dy - shz * load<SimdReal>(pbc_simd+2*GMX_SIMD_REAL_WIDTH);  // load bzy
+    *dz = *dz - shz * load<SimdReal>(pbc_simd+3*GMX_SIMD_REAL_WIDTH);  // load bzz
  
-    shy = round(*dy * load(pbc_simd+4*GMX_SIMD_REAL_WIDTH)); // load inv_byy
-    *dx = *dx - shy * load(pbc_simd+5*GMX_SIMD_REAL_WIDTH);  // load byx
-    *dy = *dy - shy * load(pbc_simd+6*GMX_SIMD_REAL_WIDTH);  // load byy
+    shy = round(*dy * load<SimdReal>(pbc_simd+4*GMX_SIMD_REAL_WIDTH)); // load inv_byy
+    *dx = *dx - shy * load<SimdReal>(pbc_simd+5*GMX_SIMD_REAL_WIDTH);  // load byx
+    *dy = *dy - shy * load<SimdReal>(pbc_simd+6*GMX_SIMD_REAL_WIDTH);  // load byy
  
-    shx = round(*dx * load(pbc_simd+7*GMX_SIMD_REAL_WIDTH)); // load inv_bxx
-    *dx = *dx - shx * load(pbc_simd+8*GMX_SIMD_REAL_WIDTH);  // load bxx
+    shx = round(*dx * load<SimdReal>(pbc_simd+7*GMX_SIMD_REAL_WIDTH)); // load inv_bxx
+    *dx = *dx - shx * load<SimdReal>(pbc_simd+8*GMX_SIMD_REAL_WIDTH);  // load bxx
  
  }
  
diff --git a/src/gromacs/simd/simd.h b/src/gromacs/simd/simd.h

index b5ea65038257154808d7d6ebf1aead93cfe6696f..0871aac1a565a38f07c9d8ee6704d61fd9d79abf 100644 (file)
--- a/src/gromacs/simd/simd.h
+++ b/src/gromacs/simd/simd.h
@@ -398,194 +398,116 @@ typedef Simd4FBool                Simd4Bool;
  
  //! \}  end of name-group describing SIMD data types
  
-//Traits of Simd. Works for float and double but NOT for int.
+/*! \name High-level SIMD proxy objects to disambiguate load/set operations
+ * \{
+ */
+
+/*! \libinternal \brief Simd traits */
  template<typename T>
  struct SimdTraits {};
-//This does not work for int because int32_t maps to two simd types and we can't base
-//it on the SIMD type because it doesn't exist if there is no support
  
+#if GMX_SIMD_HAVE_FLOAT
  template<>
-struct SimdTraits<float>
+struct SimdTraits<SimdFloat>
  {
-#if GMX_SIMD_HAVE_FLOAT
+    using type = float;
      static constexpr int width = GMX_SIMD_FLOAT_WIDTH;
-    using type                 = SimdFloat;
-#else
-    static constexpr int width = 1;
-#endif
+    using tag = SimdFloatTag;
  };
-
+#endif
+#if GMX_SIMD_HAVE_DOUBLE
  template<>
-struct SimdTraits<double>
+struct SimdTraits<SimdDouble>
  {
-#if GMX_SIMD_HAVE_DOUBLE
+    using type = double;
      static constexpr int width = GMX_SIMD_DOUBLE_WIDTH;
-    using type                 = SimdDouble;
-#else
-    static constexpr int width = 1;
-#endif
+    using tag = SimdDoubleTag;
  };
-
-template<typename T>
-struct SimdTraits<const T> : public SimdTraits<T> {};
-
-/*! \name High-level SIMD proxy objects to disambiguate load/set operations
- * \{
- */
-template <typename T> //can be either float/double/int, each const or non-const
-class SimdLoadProxyInternal;
-
-template<typename T>
-static inline const SimdLoadProxyInternal<T> gmx_simdcall
-load(T *m);
-
-template <typename T, size_t N>
-static inline const SimdLoadProxyInternal<const T> gmx_simdcall
-load(const AlignedArray<T, N> &m);
-
-/*! \libinternal \brief Proxy object to enable load() for SIMD and equivalent basic type
- *
- * This object is returned by the load() function that takes a single pointer
- * to a float/double. When the result is assigned to either SimdFloat/Double or float/double/int,
- * the appropriate conversion method will be executed, which in turn calls
- * the correct low-level load function.
- * In practice this simply means you can use load() regardless for both SIMD
- * and non-SIMD data in templated functions.
- *
- * This is an internal class which should never be constructed directly. The constructor is private
- * so that only the load function can construct it.
- */
-template <typename T>
-class SimdLoadProxyInternal
-{
-    template<typename U>
-    using IsIntType = std::is_same<std::int32_t, typename std::remove_const<U>::type>;
-
-    public:
-        //! \brief Conversion method that will execute load of scalar basic type
-        operator T() const { return *m_; }
-        //! \brief Conversion method that will execute load of SimdFloat/Double
-        template<typename U = T>  //Always U=T. Indirection needed for SFINAE
-                                  //Disabled if type doesn't exist (unsupported or int)
-        operator typename SimdTraits<U>::type() const { return simdLoad(m_); }
-
+#endif
  #if GMX_SIMD_HAVE_FLOAT
-        //! \brief Conversion method that will execute load of SimdFInt32
-        template<typename U = T, typename = typename std::enable_if<IsIntType<U>::value>::type>
-        operator SimdFInt32() const { return simdLoad(m_, SimdFInt32Tag()); }
+template<>
+struct SimdTraits<SimdFInt32>
+{
+    using type = int;
+    static constexpr int width = GMX_SIMD_FINT32_WIDTH;
+    using tag = SimdFInt32Tag;
+};
  #endif
  #if GMX_SIMD_HAVE_DOUBLE
-        //! \brief Conversion method that will execute load of SimdDInt32
-        template<typename U = T, typename = typename std::enable_if<IsIntType<U>::value>::type>
-        operator SimdDInt32() const { return simdLoad(m_, SimdDInt32Tag()); }
+template<>
+struct SimdTraits<SimdDInt32>
+{
+    using type = int;
+    static constexpr int width = GMX_SIMD_DINT32_WIDTH;
+    using tag = SimdDInt32Tag;
+};
  #endif
  
-    private:
-        //! \brief Private constructor can only be called from load()
-        SimdLoadProxyInternal(T *m) : m_(m) {}
-
-        template<typename U>
-        friend const SimdLoadProxyInternal<U> gmx_simdcall
-        load(U *m);
-        template <typename U, size_t N>
-        friend const SimdLoadProxyInternal<const U> gmx_simdcall
-        load(const AlignedArray<U, N> &m);
-
-        T* const m_; //!< The pointer used to load memory
+template<typename T>
+struct SimdTraits<const T>
+{
+    using type = const typename SimdTraits<T>::type;
+    static constexpr int width = SimdTraits<T>::width;
+    using tag = typename SimdTraits<T>::tag;
  };
  
-/*! \brief Load function that returns proxy object for SimdFloat/Double/Int and basic type
+/*! \brief Load function that returns SIMD or scalar
   *
- * \param m Pointer to load memory
- * \return Proxy object that will call the actual load for either SimdFloat/Double/Int
- *         or basic scalar type when you assign it and the conversion method is called.
+ * \tparam T Type to load (type is always mandatory)
+ * \param  m Pointer to aligned memory
+ * \return   Loaded value
   */
  template<typename T>
-static inline const SimdLoadProxyInternal<T> gmx_simdcall
-load(T *m)
+static inline T
+load(const typename SimdTraits<T>::type *m) //disabled by SFINAE for non-SIMD types
  {
-    return {
-               m
-    };
+    return simdLoad(m, typename SimdTraits<T>::tag());
  }
  
-template <typename T, size_t N>
-static inline const SimdLoadProxyInternal<const T> gmx_simdcall
-load(const AlignedArray<T, N> &m)
+template<typename T>
+static inline T
+/* the enable_if serves to prevent two different type of misuse:
+ * 1) load<SimdReal>(SimdReal*); should only be called on real* or int*
+ * 2) load(real*); template parameter is mandatory because otherwise ambiguity is
+ *    created. The dependent type disables type deduction.
+ */
+load(const typename std::enable_if<std::is_arithmetic<T>::value, T>::type *m)
  {
-    return {
-               m.data()
-    };
+    return *m;
  }
  
-template <typename T> //can be either float/double/int, each const or non-const
-class SimdLoadUProxyInternal;
-
-template<typename T>
-static inline const SimdLoadUProxyInternal<T> gmx_simdcall
-loadU(T *m);
+template <typename T, size_t N>
+static inline T gmx_simdcall
+load(const AlignedArray<typename SimdTraits<T>::type, N> &m)
+{
+    return simdLoad(m.data(), typename SimdTraits<T>::tag());
+}
  
-/*! \libinternal \brief Proxy object to enable loadU() for SIMD and equivalent basic type
- *
- * This object is returned by the loadU() function that takes a single pointer
- * to a float/double. When the result is assigned to either SimdFloat/Double or float/double/int,
- * the appropriate conversion method will be executed, which in turn calls
- * the correct low-level load function.
- * In practice this simply means you can use load() regardless for both SIMD
- * and non-SIMD data in templated functions.
+/*! \brief Load function that returns SIMD or scalar based on template argument
   *
- * This is an internal class which should never be constructed directly. The constructor is private
- * so that only the load function can construct it.
+ * \tparam T Type to load (type is always mandatory)
+ * \param m Pointer to unaligned memory
+ * \return Loaded SimdFloat/Double/Int or basic scalar type
   */
-template <typename T>
-class SimdLoadUProxyInternal
+template<typename T>
+static inline T
+loadU(const typename SimdTraits<T>::type *m)
  {
-    template<typename U>
-    using IsIntType = std::is_same<std::int32_t, typename std::remove_const<U>::type>;
-
-    public:
-        //! \brief Conversion method that will execute loadU of scalar basic type
-        operator T() const { return *m_; }
-        //! \brief Conversion method that will execute loadU of SimdFloat/Double
-        template<typename U = T>  //Always U=T. Indirection needed for SFINAE.
-                                  //Disabled if type doesn't exist (unsupported or int)
-        operator typename SimdTraits<U>::type() const { return simdLoadU(m_); }
-
-#if GMX_SIMD_HAVE_FLOAT
-        //! \brief Conversion method that will execute loadU of SimdFInt32
-        template<typename U = T, typename = typename std::enable_if<IsIntType<U>::value>::type>
-        operator SimdFInt32() const { return simdLoadU(m_, SimdFInt32Tag()); }
-#endif
-#if GMX_SIMD_HAVE_DOUBLE
-        //! \brief Conversion method that will execute loadU of SimdDInt32
-        template<typename U = T, typename = typename std::enable_if<IsIntType<U>::value>::type>
-        operator SimdDInt32() const { return simdLoadU(m_, SimdDInt32Tag()); }
-#endif
-
-    private:
-        //! \brief Private constructor can only be called from loadU()
-        SimdLoadUProxyInternal(T *m) : m_(m) {}
-
-        template<typename U>
-        friend const SimdLoadUProxyInternal<U> gmx_simdcall
-        loadU(U *m);
-
-        T* const m_; //!< The pointer used to load memory
-};
+    return simdLoadU(m, typename SimdTraits<T>::tag());
+}
  
-/*! \brief LoadU function that returns proxy object for SimdFloat/Double/Int and basic type
- *
- * \param m Pointer to load memory
- * \return Proxy object that will call the actual unaligned load for either SimdFloat/Double/Int
- *         or basic scalar type when you assign it and the conversion method is called.
- */
  template<typename T>
-static inline const SimdLoadUProxyInternal<T> gmx_simdcall
-loadU(T *m)
+static inline T
+loadU(const typename std::enable_if<std::is_arithmetic<T>::value, T>::type *m)
+{
+    return *m;
+}
+
+template <typename T, size_t N>
+static inline T gmx_simdcall
+loadU(const AlignedArray<typename SimdTraits<T>::type, N> &m)
  {
-    return {
-               m
-    };
+    return simdLoadU(m.data(), typename SimdTraits<T>::tag());
  }
  
  class SimdSetZeroProxyInternal;
diff --git a/src/gromacs/simd/simd_math.h b/src/gromacs/simd/simd_math.h

index 22a7232dfc3e6588c14fc43926b8124ea86c791c..ad519574b1683cb8bd410d2ceece516f34bc64d0 100644 (file)
--- a/src/gromacs/simd/simd_math.h
+++ b/src/gromacs/simd/simd_math.h
@@ -806,7 +806,7 @@ erfc(SimdFloat x)
          conv.i  = conv.i & isieve;
          mem[i]  = conv.f;
      }
-    z = load(mem);
+    z = load<SimdFloat>(mem);
  #endif
      q       = (z-y) * (z+y);
      corr    = fma(CD4, q, CD3);
diff --git a/src/gromacs/simd/tests/CMakeLists.txt b/src/gromacs/simd/tests/CMakeLists.txt

index f7c1c8a2714704ce6a8a0e5629a22995a1025c5e..b651f5f4e1c2ea7217627dd09aa4b6e7679f852a 100644 (file)
--- a/src/gromacs/simd/tests/CMakeLists.txt
+++ b/src/gromacs/simd/tests/CMakeLists.txt
@@ -49,25 +49,3 @@ gmx_add_unit_test(SimdUnitTests simd-test
                    scalar.cpp
                    scalar_util.cpp
                    scalar_math.cpp)
-
-# Add tests for expressions which are supposed to not compile when the
-# build configuration supports a real implementation of the SIMD
-# module. See tests/simd_ambiguous.cpp for documentation.
-set(AMBIGUOUS_COMPILATION_EXPECTED_TO_FAIL TRUE)
-if(GMX_SIMD_ACTIVE STREQUAL "NONE")
-    set(AMBIGUOUS_COMPILATION_EXPECTED_TO_FAIL FALSE)
-endif()
-foreach(TEST_PREC float double)
-    foreach(TEST_FUNC exp exp2 log inv cos sin sqrt)
-        set(TEST_NAME simd_ambiguous_${TEST_PREC}_${TEST_FUNC})
-        add_executable(${TEST_NAME} simd_ambiguous.cpp)
-        set_target_properties(${TEST_NAME} PROPERTIES
-                              EXCLUDE_FROM_ALL TRUE
-                              EXCLUDE_FROM_DEFAULT_BUILD TRUE)
-        target_compile_definitions(${TEST_NAME} PRIVATE TEST_FUNC=${TEST_FUNC} TEST_PREC=${TEST_PREC})
-        add_test(NAME ${TEST_NAME}
-            COMMAND ${CMAKE_COMMAND} --build . --target ${TEST_NAME} --config $<CONFIGURATION>
-            WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
-        set_tests_properties(${TEST_NAME} PROPERTIES WILL_FAIL ${AMBIGUOUS_COMPILATION_EXPECTED_TO_FAIL})
-    endforeach()
-endforeach()
diff --git a/src/gromacs/simd/tests/bootstrap_loadstore.cpp b/src/gromacs/simd/tests/bootstrap_loadstore.cpp

index fd1f68c64f3b93d6b98a60a57993c0174b1d2040..5bbdb1d35f80ea3d75806e08c5add85c7d33891e 100644 (file)
--- a/src/gromacs/simd/tests/bootstrap_loadstore.cpp
+++ b/src/gromacs/simd/tests/bootstrap_loadstore.cpp
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -134,7 +134,7 @@ loadStoreTester(TSimd gmx_simdcall loadFn(const T* mem), void gmx_simdcall store
   * \param  m      Memory address to load from
   */
  template <typename T, typename TSimd> TSimd gmx_simdcall
-loadWrapper(const T * m) { return load(m); }
+loadWrapper(const T * m) { return load<TSimd>(m); }
  
  /*! \brief Wrapper to handle proxy objects returned by some loadU functions.
   *
@@ -143,7 +143,7 @@ loadWrapper(const T * m) { return load(m); }
   * \param  m      Memory address to load from
   */
  template <typename T, typename TSimd> TSimd gmx_simdcall
-loadUWrapper(const T * m) { return loadU(m); }
+loadUWrapper(const T * m) { return loadU<TSimd>(m); }
  
  
  #if GMX_SIMD_HAVE_REAL
diff --git a/src/gromacs/simd/tests/scalar.cpp b/src/gromacs/simd/tests/scalar.cpp

index 540d3b1398795aa0af7187cc792bf52fd8fecdb3..cfd77f153b95f21248398826d51a7adaa3b609e8 100644 (file)
--- a/src/gromacs/simd/tests/scalar.cpp
+++ b/src/gromacs/simd/tests/scalar.cpp
@@ -62,14 +62,14 @@ namespace
  
  TEST(SimdScalarTest, load)
  {
-    real val = load(&c1);
+    real val = load<real>(&c1);
  
      EXPECT_EQ(c1, val);
  }
  
  TEST(SimdScalarTest, loadU)
  {
-    real val = loadU(&c1);
+    real val = loadU<real>(&c1);
  
      EXPECT_EQ(c1, val);
  }
@@ -253,7 +253,7 @@ TEST(SimdScalarTest, cvtD2D)
  TEST(SimdScalarTest, loadI)
  {
      std::int32_t ref = 42;
-    std::int32_t val = load(&ref);
+    std::int32_t val = load<int32_t>(&ref);
  
      EXPECT_EQ(ref, val);
  }
@@ -261,7 +261,7 @@ TEST(SimdScalarTest, loadI)
  TEST(SimdScalarTest, loadUI)
  {
      std::int32_t ref = 42;
-    std::int32_t val = load(&ref);
+    std::int32_t val = loadU<int32_t>(&ref);
  
      EXPECT_EQ(ref, val);
  }
diff --git a/src/gromacs/simd/tests/simd.cpp b/src/gromacs/simd/tests/simd.cpp

index 6aa0fa04ff311658760d41b9d77ebf203ca1ebfc..2dcc77214bb47320e7d8950e5c15051da763db1d 100644 (file)
--- a/src/gromacs/simd/tests/simd.cpp
+++ b/src/gromacs/simd/tests/simd.cpp
@@ -141,7 +141,7 @@ vector2SimdReal(const std::vector<real> &v)
      {
          mem[i] = v[i % v.size()];  // repeat vector contents to fill simd width
      }
-    return load(mem);
+    return load<SimdReal>(mem);
  }
  
  SimdReal
@@ -199,7 +199,7 @@ vector2SimdInt(const std::vector<int> &v)
      {
          mem[i] = v[i % v.size()];  // repeat vector contents to fill simd width
      }
-    return load(mem);
+    return load<SimdInt32>(mem);
  }
  
  SimdInt32
diff --git a/src/gromacs/simd/tests/simd_ambiguous.cpp b/src/gromacs/simd/tests/simd_ambiguous.cpp

deleted file mode 100644 (file)

index 072cf9e..0000000
--- a/src/gromacs/simd/tests/simd_ambiguous.cpp
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2017, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include "gromacs/simd/simd.h"
-#include "gromacs/simd/simd_math.h"
-
-/* Test that math functions which can be used both with scalar and SIMD
- * are ambiguous when applied to value returned from load.
- *
- * gmx::load returns a proxy/reference object which can be casted to either
- * a scalar (e.g. float) or a SIMD value (e.g. SIMDFloat). The gmx math
- * functions (e.g. sqrt) take both a scalar and a SIMD value as an argument.
- * Thus e.g. load(sqrt(m)) should be ambiguous. This test makes sure that
- * this does not compile. This got previously broken by introducing templates
- * which influenced the overload resolution.
- *
- * The test execution code in CMakeLists.txt tests that the code doesn't
- * compile with a SIMD implementation. To test that this code does correctly
- * compile besides causing the ambiguous overload error, it expects to
- * correctly compile for a a non-simd build. For such a build the
- * code is non-ambiguous because only the scalar version exists.
- *
- * The test execution code passes either float/double as TEST_PREC and the math
- * function to test as TEST_FUNC. Both are passed as compile definitions.
- * The file is compiled once for each combination when executing ctest and
- * the test fails if the file compiles.
- *
- * Possible extensions: Test all other math functions including those taking
- * multiple arguments.
- */
-int main()
-{
-    /* We cannot check for SIMD float or double support at cmake,
-     * only for general SIMD support. Therefore with SIMD, but without
-     * SIMD float or double support we make the compilation fail with
-     * a static_assert instead of the ambiguous overload error
-     */
-    constexpr bool testFloat       = std::is_same<TEST_PREC, float>::value;
-    constexpr bool testDouble      = std::is_same<TEST_PREC, double>::value;
-    constexpr bool haveSimdSupport = GMX_SIMD;
-    constexpr bool haveSimdFloat   = GMX_SIMD_HAVE_FLOAT;
-    constexpr bool haveSimdDouble  = GMX_SIMD_HAVE_DOUBLE;
-    static_assert(!haveSimdSupport || !testFloat || haveSimdFloat, "Assertion failure to make test fail without SIMD float support");
-    static_assert(!haveSimdSupport || !testDouble || haveSimdDouble, "Assertion failure to make test fail without SIMD double support");
-
-    TEST_PREC  d = 0;
-    TEST_PREC *m = &d;
-    gmx::TEST_FUNC(gmx::load(m));
-}
diff --git a/src/gromacs/simd/tests/simd_floatingpoint.cpp b/src/gromacs/simd/tests/simd_floatingpoint.cpp

index fe728444ee11d0d4cd625e1fa4169e57f80cb53f..f8bbc56098d5a8a3253ad86fe0331a1ccbe88d71 100644 (file)
--- a/src/gromacs/simd/tests/simd_floatingpoint.cpp
+++ b/src/gromacs/simd/tests/simd_floatingpoint.cpp
@@ -464,7 +464,7 @@ TEST_F(SimdFloatingpointTest, cvtFloat2Double)
          f[i] = i * (1.0 + 100*GMX_FLOAT_EPS);
      }
  
-    vf = load(f);
+    vf = load<SimdFloat>(f);
  #if (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH)
      SimdDouble vd1;
      cvtF2DD(vf, &vd0, &vd1);
@@ -499,9 +499,9 @@ TEST_F(SimdFloatingpointTest, cvtDouble2Float)
          d[i] = i * (1.0 + 100*GMX_FLOAT_EPS);
      }
  
-    vd0 = load(d);
+    vd0 = load<SimdDouble>(d);
  #if (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH)
-    SimdDouble vd1 = load(d + GMX_SIMD_DOUBLE_WIDTH); // load upper half of data
+    SimdDouble vd1 = load<SimdDouble>(d + GMX_SIMD_DOUBLE_WIDTH); // load upper half of data
      vf = cvtDD2F(vd0, vd1);
  #elif (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
      vf = cvtD2F(vd0);
diff --git a/src/gromacs/simd/tests/simd_floatingpoint_util.cpp b/src/gromacs/simd/tests/simd_floatingpoint_util.cpp

index 8d288f77c69be35d16996814f26aed1aa1906faf..e9205eab0e55fb0af9e76dd52c6dc219034a9052 100644 (file)
--- a/src/gromacs/simd/tests/simd_floatingpoint_util.cpp
+++ b/src/gromacs/simd/tests/simd_floatingpoint_util.cpp
@@ -137,10 +137,10 @@ TEST_F(SimdFloatingpointUtilTest, gatherLoadTranspose4)
              mem0_[align * offset_[j] + 3] = val3_[j];
          }
  
-        ref0 = load(val0_);
-        ref1 = load(val1_);
-        ref2 = load(val2_);
-        ref3 = load(val3_);
+        ref0 = load<SimdReal>(val0_);
+        ref1 = load<SimdReal>(val1_);
+        ref2 = load<SimdReal>(val2_);
+        ref3 = load<SimdReal>(val3_);
  
          if (align == 4)
          {
@@ -185,8 +185,8 @@ TEST_F(SimdFloatingpointUtilTest, gatherLoadTranspose2)
              mem0_[align * offset_[j] + 1] = val1_[j];
          }
  
-        ref0 = load(val0_);
-        ref1 = load(val1_);
+        ref0 = load<SimdReal>(val0_);
+        ref1 = load<SimdReal>(val1_);
  
          if (align == 2)
          {
@@ -228,9 +228,9 @@ TEST_F(SimdFloatingpointUtilTest, gatherLoadUTranspose3)
              mem0_[align * offset_[j] + 2] = val2_[j];
          }
  
-        ref0 = load(val0_);
-        ref1 = load(val1_);
-        ref2 = load(val2_);
+        ref0 = load<SimdReal>(val0_);
+        ref1 = load<SimdReal>(val1_);
+        ref2 = load<SimdReal>(val2_);
  
          if (align == 3)
          {
@@ -279,9 +279,9 @@ TEST_F(SimdFloatingpointUtilTest, transposeScatterStoreU3)
              refmem[align * offset_[j] + 2] = val2_[j];
          }
  
-        v0 = load(val0_);
-        v1 = load(val1_);
-        v2 = load(val2_);
+        v0 = load<SimdReal>(val0_);
+        v1 = load<SimdReal>(val1_);
+        v2 = load<SimdReal>(val2_);
  
          if (align == 3)
          {
@@ -331,9 +331,9 @@ TEST_F(SimdFloatingpointUtilTest, transposeScatterIncrU3)
              refmem[align * offset_[j] + 2] += val2_[j];
          }
  
-        v0 = load(val0_);
-        v1 = load(val1_);
-        v2 = load(val2_);
+        v0 = load<SimdReal>(val0_);
+        v1 = load<SimdReal>(val1_);
+        v2 = load<SimdReal>(val2_);
  
          if (align == 3)
          {
@@ -382,9 +382,9 @@ TEST_F(SimdFloatingpointUtilTest, transposeScatterIncrU3Overlapping)
          refmem[3 * offset_[j] + 2] += val2_[j];
      }
  
-    v0 = load(val0_);
-    v1 = load(val1_);
-    v2 = load(val2_);
+    v0 = load<SimdReal>(val0_);
+    v1 = load<SimdReal>(val1_);
+    v2 = load<SimdReal>(val2_);
  
      transposeScatterIncrU<3>(mem0_, offset_, v0, v1, v2);
  
@@ -422,9 +422,9 @@ TEST_F(SimdFloatingpointUtilTest, transposeScatterDecrU3)
              refmem[align * offset_[j] + 2] -= val2_[j];
          }
  
-        v0 = load(val0_);
-        v1 = load(val1_);
-        v2 = load(val2_);
+        v0 = load<SimdReal>(val0_);
+        v1 = load<SimdReal>(val1_);
+        v2 = load<SimdReal>(val2_);
  
          if (align == 3)
          {
@@ -473,9 +473,9 @@ TEST_F(SimdFloatingpointUtilTest, transposeScatterDecrU3Overlapping)
          refmem[3 * offset_[j] + 2] -= val2_[j];
      }
  
-    v0 = load(val0_);
-    v1 = load(val1_);
-    v2 = load(val2_);
+    v0 = load<SimdReal>(val0_);
+    v1 = load<SimdReal>(val1_);
+    v2 = load<SimdReal>(val2_);
  
      transposeScatterDecrU<3>(mem0_, offset_, v0, v1, v2);
  
@@ -495,7 +495,7 @@ TEST_F(SimdFloatingpointUtilTest, expandScalarsToTriplets)
          mem0_[i] = i;
      }
  
-    vs = load(mem0_);
+    vs = load<SimdReal>(mem0_);
  
      expandScalarsToTriplets(vs, &v0, &v1, &v2);
  
@@ -532,11 +532,11 @@ TEST_F(SimdFloatingpointUtilTest, gatherLoadBySimdIntTranspose4)
              mem0_[align * offset_[j] + 3] = val3_[j];
          }
  
-        simdoffset = load(offset_);
-        ref0       = load(val0_);
-        ref1       = load(val1_);
-        ref2       = load(val2_);
-        ref3       = load(val3_);
+        simdoffset = load<SimdInt32>(offset_);
+        ref0       = load<SimdReal>(val0_);
+        ref1       = load<SimdReal>(val1_);
+        ref2       = load<SimdReal>(val2_);
+        ref3       = load<SimdReal>(val3_);
  
          if (align == 4)
          {
@@ -581,9 +581,9 @@ TEST_F(SimdFloatingpointUtilTest, gatherLoadBySimdIntTranspose2)
              mem0_[align * offset_[j] + 1] = val1_[j];
          }
  
-        simdoffset = load(offset_);
-        ref0       = load(val0_);
-        ref1       = load(val1_);
+        simdoffset = load<SimdInt32>(offset_);
+        ref0       = load<SimdReal>(val0_);
+        ref1       = load<SimdReal>(val1_);
  
          if (align == 4)
          {
@@ -626,9 +626,9 @@ TEST_F(SimdFloatingpointUtilTest, gatherLoadUBySimdIntTranspose2)
              mem0_[align * offset_[j] + 1] = val1_[j];
          }
  
-        simdoffset = load(offset_);
-        ref0       = load(val0_);
-        ref1       = load(val1_);
+        simdoffset = load<SimdInt32>(offset_);
+        ref0       = load<SimdReal>(val0_);
+        ref1       = load<SimdReal>(val1_);
  
          if (align == 1)
          {
@@ -660,10 +660,10 @@ TEST_F(SimdFloatingpointUtilTest, reduceIncr4Sum)
      real                              sum0, sum1, sum2, sum3, tstsum;
      FloatingPointTolerance            tolerance(defaultRealTolerance());
  
-    v0 = load(val0_);
-    v1 = load(val1_);
-    v2 = load(val2_);
-    v3 = load(val3_);
+    v0 = load<SimdReal>(val0_);
+    v1 = load<SimdReal>(val1_);
+    v2 = load<SimdReal>(val2_);
+    v3 = load<SimdReal>(val3_);
  
      sum0 = sum1 = sum2 = sum3 = 0;
      for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
@@ -699,7 +699,7 @@ TEST_F(SimdFloatingpointUtilTest, loadDualHsimd)
      // Point p to the upper half of val0_
      real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
  
-    v0 = load(val0_);
+    v0 = load<SimdReal>(val0_);
      v1 = loadDualHsimd(val0_, p);
  
      GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
@@ -717,7 +717,7 @@ TEST_F(SimdFloatingpointUtilTest, loadDuplicateHsimd)
          p[i] = val0_[i];
      }
  
-    v0 = load(val0_);
+    v0 = load<SimdReal>(val0_);
      v1 = loadDuplicateHsimd(val0_);
  
      GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
@@ -739,7 +739,7 @@ TEST_F(SimdFloatingpointUtilTest, load1DualHsimd)
          p[i]     = data[1];
      }
  
-    v0 = load(val0_);
+    v0 = load<SimdReal>(val0_);
      v1 = load1DualHsimd(data);
  
      GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
@@ -754,7 +754,7 @@ TEST_F(SimdFloatingpointUtilTest, storeDualHsimd)
      // Point p to the upper half of val0_
      real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
  
-    v0 = load(val2_);
+    v0 = load<SimdReal>(val2_);
      storeDualHsimd(val0_, p, v0);
  
      for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
@@ -777,7 +777,7 @@ TEST_F(SimdFloatingpointUtilTest, incrDualHsimd)
      // Point p to the upper half of val0_
      real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
  
-    v0 = load(val2_);
+    v0 = load<SimdReal>(val2_);
      incrDualHsimd(val0_, p, v0);
  
      for (std::size_t i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
@@ -797,7 +797,7 @@ TEST_F(SimdFloatingpointUtilTest, incrDualHsimdOverlapping)
          reference[i] = val0_[i] + val2_[i] + val2_[GMX_SIMD_REAL_WIDTH/2+i];
      }
  
-    v0 = load(val2_);
+    v0 = load<SimdReal>(val2_);
      incrDualHsimd(val0_, val0_, v0);
  
      for (std::size_t i = 0; i < GMX_SIMD_REAL_WIDTH/2; i++)
@@ -820,7 +820,7 @@ TEST_F(SimdFloatingpointUtilTest, decrHsimd)
          ref[i] = val0_[i] - ( val1_[i] + p[i] );
      }
  
-    v0 = load(val1_);
+    v0 = load<SimdReal>(val1_);
      decrHsimd(val0_, v0);
  
      for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
@@ -853,8 +853,8 @@ TEST_F(SimdFloatingpointUtilTest, gatherLoadTranspose2Hsimd)
  
          }
  
-        ref0 = load(val0_);
-        ref1 = load(val1_);
+        ref0 = load<SimdReal>(val0_);
+        ref1 = load<SimdReal>(val1_);
  
          if (align == 2)
          {
@@ -887,8 +887,8 @@ TEST_F(SimdFloatingpointUtilTest, reduceIncr4SumHsimd)
      FloatingPointTolerance            tolerance(defaultRealTolerance());
  
      // Use the half-SIMD storage in memory val0_ and val1_.
-    v0 = load(val0_);
-    v1 = load(val1_);
+    v0 = load<SimdReal>(val0_);
+    v1 = load<SimdReal>(val1_);
  
      sum0 = sum1 = sum2 = sum3 = 0;
      for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
diff --git a/src/gromacs/simd/tests/simd_integer.cpp b/src/gromacs/simd/tests/simd_integer.cpp

index 7dd555bbbd15daee5f37010d9f710dd174daf618..f468af1f190113c06a05a54020dd9e8777abfeca 100644 (file)
--- a/src/gromacs/simd/tests/simd_integer.cpp
+++ b/src/gromacs/simd/tests/simd_integer.cpp
@@ -147,7 +147,7 @@ TEST_F(SimdIntegerTest, extract)
      {
          idata[i] = i+1;
      }
-    simd = load(idata);
+    simd = load<SimdInt32>(idata);
  
      /* We cannot do a loop here, since
       * - C++ gets confused about signed/unsigned if SSE macros are used in EXPECT_EQ()
diff --git a/src/gromacs/tables/tests/splinetable.cpp b/src/gromacs/tables/tests/splinetable.cpp

index 02b7ea73a2d13e299add9836e9ac78d7f9845bb4..e54a52d0d6971be7955ce295960926a85961b6f2 100644 (file)
--- a/src/gromacs/tables/tests/splinetable.cpp
+++ b/src/gromacs/tables/tests/splinetable.cpp
@@ -725,13 +725,13 @@ TYPED_TEST(SplineTableTest, CatchesOutOfRangeValuesSimd)
      // Make position 1 incorrect if width>=2, otherwise position 0
      // range.first-GMX_REAL_EPS is not invalid. See comment in table.
      alignedMem[ (GMX_SIMD_REAL_WIDTH >= 2) ? 1 : 0] = -GMX_REAL_EPS;
-    x = load(alignedMem);
+    x = load<SimdReal>(alignedMem);
  
      EXPECT_THROW_GMX(table.evaluateFunctionAndDerivative(x, &func, &der), gmx::RangeError);
  
      // Make position 1 incorrect if width>=2, otherwise position 0
      alignedMem[ (GMX_SIMD_REAL_WIDTH >= 2) ? 1 : 0] = range.second;
-    x = load(alignedMem);
+    x = load<SimdReal>(alignedMem);
  
      EXPECT_THROW_GMX(table.evaluateFunctionAndDerivative(x, &func, &der), gmx::RangeError);
  }
@@ -749,7 +749,7 @@ TYPED_TEST(SplineTableTest, AcceptsInRangeValuesSimd)
      {
          alignedMem[i] = range.second*(1.0-GMX_REAL_EPS)*i/(GMX_SIMD_REAL_WIDTH-1);
      }
-    x = load(alignedMem);
+    x = load<SimdReal>(alignedMem);
  
      EXPECT_NO_THROW_GMX(table.evaluateFunctionAndDerivative(x, &func, &der));
  }
author	Roland Schulz <roland.schulz@intel.com>
	Fri, 6 Oct 2017 23:36:50 +0000 (16:36 -0700)
committer	Roland Schulz <roland.schulz@intel.com>
	Wed, 11 Oct 2017 23:22:14 +0000 (01:22 +0200)
src/gromacs/ewald/pme-solve.cpp		patch \| blob \| history
src/gromacs/listed-forces/bonded.cpp		patch \| blob \| history
src/gromacs/listed-forces/pairs.cpp		patch \| blob \| history
src/gromacs/mdlib/clincs.cpp		patch \| blob \| history
src/gromacs/mdlib/csettle.cpp		patch \| blob \| history
src/gromacs/mdlib/nbnxn_atomdata.cpp		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_outer.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_outer.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_prune.cpp		patch \| blob \| history
src/gromacs/mdlib/nbnxn_search_simd_2xnn.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_search_simd_4xn.h		patch \| blob \| history
src/gromacs/pbcutil/pbc-simd.h		patch \| blob \| history
src/gromacs/simd/simd.h		patch \| blob \| history
src/gromacs/simd/simd_math.h		patch \| blob \| history
src/gromacs/simd/tests/CMakeLists.txt		patch \| blob \| history
src/gromacs/simd/tests/bootstrap_loadstore.cpp		patch \| blob \| history
src/gromacs/simd/tests/scalar.cpp		patch \| blob \| history
src/gromacs/simd/tests/simd.cpp		patch \| blob \| history
src/gromacs/simd/tests/simd_ambiguous.cpp	[deleted file]	patch \| blob \| history
src/gromacs/simd/tests/simd_floatingpoint.cpp		patch \| blob \| history
src/gromacs/simd/tests/simd_floatingpoint_util.cpp		patch \| blob \| history
src/gromacs/simd/tests/simd_integer.cpp		patch \| blob \| history
src/gromacs/tables/tests/splinetable.cpp		patch \| blob \| history