#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
+# Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
# top-level source directory and at http://www.gromacs.org.
set(OUTPUT_SIMD "IBM_QPX")
elseif(OUTPUT_TMP MATCHES " neon_asimd ")
set(OUTPUT_SIMD "ARM_NEON_ASIMD")
- elseif(OUTPUT_TMP MATCHES " neon ")
+ elseif(OUTPUT_TMP MATCHES " neon " AND NOT GMX_DOUBLE)
set(OUTPUT_SIMD "ARM_NEON")
endif()
endif()
elseif(GMX_SIMD STREQUAL "ARM_NEON")
+ if (GMX_DOUBLE)
+ message(FATAL_ERROR "ARM_NEON SIMD support is not available for a double precision build because the architecture lacks double-precision support")
+ endif()
+
gmx_find_flags(
"#include<arm_neon.h>
int main(){float32x4_t x=vdupq_n_f32(0.5);x=vmlaq_f32(x,x,x);return vgetq_lane_f32(x,0)>0;}"
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2016,2017, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
static inline SimdFloat gmx_simdcall
maskzRsqrt(SimdFloat x, SimdFBool m)
{
+ // The result will always be correct since we mask the result with m, but
+ // for debug builds we also want to make sure not to generate FP exceptions
#ifndef NDEBUG
- x.simdInternal_ = vbslq_f32(m, vdupq_n_f32(1.0f), x.simdInternal_);
+ x.simdInternal_ = vbslq_f32(m.simdInternal_, x.simdInternal_, vdupq_n_f32(1.0f));
#endif
return {
vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(vrsqrteq_f32(x.simdInternal_)),
static inline SimdFloat gmx_simdcall
maskzRcp(SimdFloat x, SimdFBool m)
{
+ // The result will always be correct since we mask the result with m, but
+ // for debug builds we also want to make sure not to generate FP exceptions
#ifndef NDEBUG
- x.simdInternal_ = vbslq_f32(m, vdupq_n_f32(1.0f), x.simdInternal_);
+ x.simdInternal_ = vbslq_f32(m.simdInternal_, x.simdInternal_, vdupq_n_f32(1.0f));
#endif
return {
vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(vrecpeq_f32(x.simdInternal_)),
operator<<(SimdFInt32 a, int n)
{
return {
- vshlq_n_s32(a.simdInternal_, n)
+ vshlq_s32(a.simdInternal_, vdupq_n_s32(n >= 32 ? 32 : n))
};
}
operator>>(SimdFInt32 a, int n)
{
return {
- vshrq_n_s32(a.simdInternal_, n)
+ vshlq_s32(a.simdInternal_, vdupq_n_s32(n >= 32 ? -32 : -n))
};
}
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2016,2017, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
// The result will always be correct since we mask the result with m, but
// for debug builds we also want to make sure not to generate FP exceptions
#ifndef NDEBUG
- x.simdInternal_ = vbslq_f64(m.simdInternal_, vdupq_n_f64(1.0, x.simdInternal_);
+ x.simdInternal_ = vbslq_f64(m.simdInternal_, x.simdInternal_, vdupq_n_f64(1.0f));
#endif
return {
float64x2_t(vandq_u64(uint64x2_t(vrsqrteq_f64(x.simdInternal_)), m.simdInternal_))
// The result will always be correct since we mask the result with m, but
// for debug builds we also want to make sure not to generate FP exceptions
#ifndef NDEBUG
- x.simdInternal_ = vbslq_f64(m.simdInternal_, vdupq_n_f64(1.0, x.simdInternal_);
+ x.simdInternal_ = vbslq_f64(m.simdInternal_, x.simdInternal_, vdupq_n_f64(1.0f));
#endif
return {
float64x2_t(vandq_u64(uint64x2_t(vrecpeq_f64(x.simdInternal_)), m.simdInternal_))
operator<<(SimdDInt32 a, int n)
{
return {
- vshl_n_s32(a.simdInternal_, n)
+ vshl_s32(a.simdInternal_, vdup_n_s32(n >= 32 ? 32 : n))
};
}
operator>>(SimdDInt32 a, int n)
{
return {
- vshr_n_s32(a.simdInternal_, n)
+ vshl_s32(a.simdInternal_, vdup_n_s32(n >= 32 ? -32 : -n))
};
}