const real *const gmx_restrict dthy = spline->dtheta[YY] + norder;
const real *const gmx_restrict dthz = spline->dtheta[ZZ] + norder;
- SimdReal fx_S = setZero();
- SimdReal fy_S = setZero();
- SimdReal fz_S = setZero();
+ Simd4NReal fx_S = setZero();
+ Simd4NReal fy_S = setZero();
+ Simd4NReal fz_S = setZero();
/* With order 4 the z-spline is actually aligned */
- const SimdReal tz_S = load4DuplicateN(thz);
- const SimdReal dz_S = load4DuplicateN(dthz);
+ const Simd4NReal tz_S = load4DuplicateN(thz);
+ const Simd4NReal dz_S = load4DuplicateN(dthz);
for (int ithx = 0; ithx < 4; ithx++)
{
- const int index_x = (idxX + ithx)*gridNY*gridNZ;
- const SimdReal tx_S = SimdReal(thx[ithx]);
- const SimdReal dx_S = SimdReal(dthx[ithx]);
+ const int index_x = (idxX + ithx)*gridNY*gridNZ;
+ const Simd4NReal tx_S = Simd4NReal(thx[ithx]);
+ const Simd4NReal dx_S = Simd4NReal(dthx[ithx]);
- for (int ithy = 0; ithy < 4; ithy += GMX_SIMD_REAL_WIDTH/4)
+ for (int ithy = 0; ithy < 4; ithy += GMX_SIMD4N_REAL_WIDTH/4)
{
- const int index_xy = index_x + (idxY+ithy)*gridNZ;
+ const int index_xy = index_x + (idxY+ithy)*gridNZ;
- const SimdReal ty_S = loadUNDuplicate4(thy +ithy);
- const SimdReal dy_S = loadUNDuplicate4(dthy+ithy);
+ const Simd4NReal ty_S = loadUNDuplicate4(thy +ithy);
+ const Simd4NReal dy_S = loadUNDuplicate4(dthy+ithy);
- const SimdReal gval_S = loadU4NOffset(grid+index_xy+idxZ, gridNZ);
+ const Simd4NReal gval_S = loadU4NOffset(grid+index_xy+idxZ, gridNZ);
- const SimdReal fxy1_S = tz_S * gval_S;
- const SimdReal fz1_S = dz_S * gval_S;
+ const Simd4NReal fxy1_S = tz_S * gval_S;
+ const Simd4NReal fz1_S = dz_S * gval_S;
fx_S = fma(dx_S * ty_S, fxy1_S, fx_S);
fy_S = fma(tx_S * dy_S, fxy1_S, fy_S);
* \{
*/
-/*! \libinternal \brief Simd traits */
+namespace internal
+{
+/*! \libinternal \brief Simd traits
+ *
+ * These traits are used to query data about SIMD types. Currently provided
+ * data is useful for SIMD loads (load function and helper classes for
+ * ArrayRef<> in simd_memory.h). Provided data:
+ * - type: scalar type corresponding to the SIMD type
+ * - width: SIMD width
+ * - tag: tag used for type dispatch of load function
+ */
template<typename T>
struct SimdTraits {};
static constexpr int width = SimdTraits<T>::width;
using tag = typename SimdTraits<T>::tag;
};
+} //namespace internal
/*! \brief Load function that returns SIMD or scalar
*
*/
template<typename T>
static inline T
-load(const typename SimdTraits<T>::type *m) //disabled by SFINAE for non-SIMD types
+load(const typename internal::SimdTraits<T>::type *m) //disabled by SFINAE for non-SIMD types
{
- return simdLoad(m, typename SimdTraits<T>::tag());
+ return simdLoad(m, typename internal::SimdTraits<T>::tag());
}
template<typename T>
template <typename T, size_t N>
static inline T gmx_simdcall
-load(const AlignedArray<typename SimdTraits<T>::type, N> &m)
+load(const AlignedArray<typename internal::SimdTraits<T>::type, N> &m)
{
- return simdLoad(m.data(), typename SimdTraits<T>::tag());
+ return simdLoad(m.data(), typename internal::SimdTraits<T>::tag());
}
/*! \brief Load function that returns SIMD or scalar based on template argument
*/
template<typename T>
static inline T
-loadU(const typename SimdTraits<T>::type *m)
+loadU(const typename internal::SimdTraits<T>::type *m)
{
- return simdLoadU(m, typename SimdTraits<T>::tag());
+ return simdLoadU(m, typename internal::SimdTraits<T>::tag());
}
template<typename T>
template <typename T, size_t N>
static inline T gmx_simdcall
-loadU(const AlignedArray<typename SimdTraits<T>::type, N> &m)
+loadU(const AlignedArray<typename internal::SimdTraits<T>::type, N> &m)
{
- return simdLoadU(m.data(), typename SimdTraits<T>::tag());
+ return simdLoadU(m.data(), typename internal::SimdTraits<T>::tag());
}
class SimdSetZeroProxyInternal;
return {};
}
+namespace internal
+{
+//TODO: Don't foward function but properly rename them and use proper traits
+template<typename T>
+struct Simd4Traits {};
+
+#if GMX_SIMD4_HAVE_FLOAT
+template<>
+struct Simd4Traits<Simd4Float>
+{
+ using type = float;
+};
+#endif
+
+#if GMX_SIMD4_HAVE_DOUBLE
+template<>
+struct Simd4Traits<Simd4Double>
+{
+ using type = double;
+};
+#endif
+} //namespace internal
+
+#if GMX_SIMD4_HAVE_REAL
+template<typename T>
+T load(const typename internal::Simd4Traits<T>::type* m)
+{
+ return load4(m);
+}
+template<typename T>
+T loadU(const typename internal::Simd4Traits<T>::type* m)
+{
+ return load4U(m);
+}
+#endif
+
/* Implement most of 4xn functions by forwarding them to other functions when possible.
* The functions forwarded here don't need to be implemented by each implementation.
* For width=4 all functions are forwarded and for width=8 all but loadU4NOffset are forwarded.
*/
#if GMX_SIMD_HAVE_FLOAT
-#if GMX_SIMD_FLOAT_WIDTH < 4 || !GMX_SIMD_HAVE_LOADU
-#define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT 0
-#elif GMX_SIMD_FLOAT_WIDTH == 4 && GMX_SIMD_HAVE_LOADU
-#define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT 1
+#if GMX_SIMD_FLOAT_WIDTH < 4
+#define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT (GMX_SIMD_HAVE_LOADU && GMX_SIMD4_HAVE_FLOAT)
+#elif GMX_SIMD_FLOAT_WIDTH == 4
+#define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT GMX_SIMD_HAVE_LOADU
//For GMX_SIMD_FLOAT_WIDTH>4 it is the reponsibility of the implementation to set
//GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
#endif
-#if GMX_SIMD_FLOAT_WIDTH == 4 && GMX_SIMD_HAVE_LOADU
-static inline SimdFloat gmx_simdcall
+#if GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
+#if GMX_SIMD_FLOAT_WIDTH < 4
+using Simd4NFloat = Simd4Float;
+#define GMX_SIMD4N_FLOAT_WIDTH 4
+#else
+using Simd4NFloat = SimdFloat;
+#define GMX_SIMD4N_FLOAT_WIDTH GMX_SIMD_FLOAT_WIDTH
+#endif
+
+#if GMX_SIMD_FLOAT_WIDTH <= 4
+static inline Simd4NFloat gmx_simdcall
loadUNDuplicate4(const float* f)
{
- return SimdFloat(*f);
+ return Simd4NFloat(*f);
}
-static inline SimdFloat gmx_simdcall
+static inline Simd4NFloat gmx_simdcall
load4DuplicateN(const float* f)
{
- return load<SimdFloat>(f);
+ return load<Simd4NFloat>(f);
}
-static inline SimdFloat gmx_simdcall
+static inline Simd4NFloat gmx_simdcall
loadU4NOffset(const float* f, int)
{
- return loadU<SimdFloat>(f);
+ return loadU<Simd4NFloat>(f);
}
-#elif GMX_SIMD_FLOAT_WIDTH == 8 && GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT && GMX_SIMD_HAVE_LOADU
-static inline SimdFloat gmx_simdcall
+#elif GMX_SIMD_FLOAT_WIDTH == 8
+static inline Simd4NFloat gmx_simdcall
loadUNDuplicate4(const float* f)
{
return loadU1DualHsimd(f);
}
-static inline SimdFloat gmx_simdcall
+static inline Simd4NFloat gmx_simdcall
load4DuplicateN(const float* f)
{
return loadDuplicateHsimd(f);
}
#endif
-#else //GMX_SIMD_HAVE_FLOAT
+#endif //GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
+#else //GMX_SIMD_HAVE_FLOAT
#define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT 0
#endif
#if GMX_SIMD_HAVE_DOUBLE
-#if GMX_SIMD_DOUBLE_WIDTH < 4 || !GMX_SIMD_HAVE_LOADU
-#define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE 0
-#elif GMX_SIMD_DOUBLE_WIDTH == 4 && GMX_SIMD_HAVE_LOADU
-#define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE 1
+#if GMX_SIMD_DOUBLE_WIDTH < 4
+#define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE (GMX_SIMD_HAVE_LOADU && GMX_SIMD4_HAVE_DOUBLE)
+#elif GMX_SIMD_DOUBLE_WIDTH == 4
+#define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE GMX_SIMD_HAVE_LOADU
//For GMX_SIMD_DOUBLE_WIDTH>4 it is the reponsibility of the implementation to set
//GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
#endif
-#if GMX_SIMD_DOUBLE_WIDTH == 4 && GMX_SIMD_HAVE_LOADU
-static inline SimdDouble gmx_simdcall
+#if GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
+#if GMX_SIMD_DOUBLE_WIDTH < 4
+using Simd4NDouble = Simd4Double;
+#define GMX_SIMD4N_DOUBLE_WIDTH 4
+#else
+using Simd4NDouble = SimdDouble;
+#define GMX_SIMD4N_DOUBLE_WIDTH GMX_SIMD_DOUBLE_WIDTH
+#endif
+
+#if GMX_SIMD_DOUBLE_WIDTH <= 4
+static inline Simd4NDouble gmx_simdcall
loadUNDuplicate4(const double* f)
{
- return SimdDouble(*f);
+ return Simd4NDouble(*f);
}
-static inline SimdDouble gmx_simdcall
+static inline Simd4NDouble gmx_simdcall
load4DuplicateN(const double* f)
{
- return load<SimdDouble>(f);
+ return load<Simd4NDouble>(f);
}
-static inline SimdDouble gmx_simdcall
+static inline Simd4NDouble gmx_simdcall
loadU4NOffset(const double* f, int)
{
- return loadU<SimdDouble>(f);
+ return loadU<Simd4NDouble>(f);
}
-#elif GMX_SIMD_DOUBLE_WIDTH == 8 && GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE && GMX_SIMD_HAVE_LOADU
-static inline SimdDouble gmx_simdcall
+#elif GMX_SIMD_DOUBLE_WIDTH == 8
+static inline Simd4NDouble gmx_simdcall
loadUNDuplicate4(const double* f)
{
return loadU1DualHsimd(f);
}
-static inline SimdDouble gmx_simdcall
+static inline Simd4NDouble gmx_simdcall
load4DuplicateN(const double* f)
{
return loadDuplicateHsimd(f);
}
#endif
-#else //GMX_SIMD_HAVE_DOUBLE
+#endif //GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
+#else //GMX_SIMD_HAVE_DOUBLE
#define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE 0
#endif
#define GMX_SIMD_HAVE_4NSIMD_UTIL_REAL GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
#endif
+#if GMX_SIMD_HAVE_4NSIMD_UTIL_REAL
+#if GMX_DOUBLE
+using Simd4NReal = Simd4NDouble;
+#define GMX_SIMD4N_REAL_WIDTH GMX_SIMD4N_DOUBLE_WIDTH
+#else
+using Simd4NReal = Simd4NFloat;
+#define GMX_SIMD4N_REAL_WIDTH GMX_SIMD4N_FLOAT_WIDTH
+#endif
+#endif
+
//! \} end of name-group proxy objects
} // namespace gmx
#endif // GMX_SIMD_HAVE_HSIMD_UTIL_REAL
-#if GMX_SIMD_HAVE_4NSIMD_UTIL_REAL
+//Test Currently doesn't work for GMX_SIMD_REAL_WIDTH<4. Should be fixed by having GMX_EXPECT_SIMD_REAL_EQ which works for both Simd and Simd4
+#if GMX_SIMD_HAVE_4NSIMD_UTIL_REAL && GMX_SIMD_REAL_WIDTH >= 4
TEST_F(SimdFloatingpointUtilTest, loadUNDuplicate4)
{
- SimdReal v0, v1;
+ Simd4NReal v0, v1;
int i;
real data[GMX_SIMD_REAL_WIDTH/4];
std::iota(data, data+GMX_SIMD_REAL_WIDTH/4, 1);
val0_[i*4] = val0_[i*4+1] = val0_[i*4+2] = val0_[i*4+3] = data[i];
}
- v0 = load<SimdReal>(val0_);
+ v0 = load<Simd4NReal>(val0_);
v1 = loadUNDuplicate4(data);
GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
TEST_F(SimdFloatingpointUtilTest, load4DuplicateN)
{
- SimdReal v0, v1;
- int i;
- real data[4] = { 1, 2, 3, 4};
+ Simd4NReal v0, v1;
+ int i;
+ real data[4] = { 1, 2, 3, 4};
for (i = 0; i < GMX_SIMD_REAL_WIDTH / 4; i++)
{
val0_[i*4+3] = data[3];
}
- v0 = load<SimdReal>(val0_);
+ v0 = load<Simd4NReal>(val0_);
v1 = load4DuplicateN(val0_);
GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
val0_[i*4+3] = data[3+offset*i];
}
- const SimdReal v0 = load<SimdReal>(val0_);
- const SimdReal v1 = loadU4NOffset(data, offset);
+ const Simd4NReal v0 = load<Simd4NReal>(val0_);
+ const Simd4NReal v1 = loadU4NOffset(data, offset);
GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
}