Fix compilation issues with ARM SIMD

author Mark Abraham <mark.j.abraham@gmail.com>

Thu, 13 Jul 2017 11:23:52 +0000 (11:23 +0000)

committer Mark Abraham <mark.j.abraham@gmail.com>

Mon, 21 Aug 2017 12:45:11 +0000 (14:45 +0200)
author Mark Abraham <mark.j.abraham@gmail.com>
Thu, 13 Jul 2017 11:23:52 +0000 (11:23 +0000)
committer Mark Abraham <mark.j.abraham@gmail.com>
Mon, 21 Aug 2017 12:45:11 +0000 (14:45 +0200)
diff --git a/cmake/gmxDetectSimd.cmake b/cmake/gmxDetectSimd.cmake

index fdf64df5f8744edc6c216eea359ae7873dc4b0a7..c0cf2f56ac17db11670d50fc40101c7a3c223233 100644 (file)
--- a/cmake/gmxDetectSimd.cmake
+++ b/cmake/gmxDetectSimd.cmake
@@ -1,7 +1,7 @@
  #
  # This file is part of the GROMACS molecular simulation package.
  #
-# Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
+# Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
  # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  # and including many others, as listed in the AUTHORS file in the
  # top-level source directory and at http://www.gromacs.org.
@@ -136,7 +136,7 @@ function(gmx_suggest_simd _suggested_simd)
                              set(OUTPUT_SIMD "IBM_QPX")
                          elseif(OUTPUT_TMP MATCHES " neon_asimd ")
                              set(OUTPUT_SIMD "ARM_NEON_ASIMD")
-                        elseif(OUTPUT_TMP MATCHES " neon ")
+                        elseif(OUTPUT_TMP MATCHES " neon " AND NOT GMX_DOUBLE)
                              set(OUTPUT_SIMD "ARM_NEON")
                          endif()
                      endif()
diff --git a/cmake/gmxManageSimd.cmake b/cmake/gmxManageSimd.cmake

index 8efcff425782bec6cadbb2bb4ec11d2a5e8b5bc7..b1b76a51d3844d420948c455d4640fbff960481e 100644 (file)
--- a/cmake/gmxManageSimd.cmake
+++ b/cmake/gmxManageSimd.cmake
@@ -346,6 +346,10 @@ elseif(GMX_SIMD STREQUAL "AVX_512_KNL")
  
  elseif(GMX_SIMD STREQUAL "ARM_NEON")
  
+    if (GMX_DOUBLE)
+        message(FATAL_ERROR "ARM_NEON SIMD support is not available for a double precision build because the architecture lacks double-precision support")
+    endif()
+
      gmx_find_flags(
          "#include<arm_neon.h>
           int main(){float32x4_t x=vdupq_n_f32(0.5);x=vmlaq_f32(x,x,x);return vgetq_lane_f32(x,0)>0;}"
diff --git a/src/gromacs/simd/impl_arm_neon/impl_arm_neon_simd_float.h b/src/gromacs/simd/impl_arm_neon/impl_arm_neon_simd_float.h

index 635b40ab0c5aec7b9d67c13b24741b95ef879f8d..4920df2c047d2f6990c8c9549fa06b6e005c75d9 100644 (file)
--- a/src/gromacs/simd/impl_arm_neon/impl_arm_neon_simd_float.h
+++ b/src/gromacs/simd/impl_arm_neon/impl_arm_neon_simd_float.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2016,2017, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -372,8 +372,10 @@ maskzFma(SimdFloat a, SimdFloat b, SimdFloat c, SimdFBool m)
  static inline SimdFloat gmx_simdcall
  maskzRsqrt(SimdFloat x, SimdFBool m)
  {
+    // The result will always be correct since we mask the result with m, but
+    // for debug builds we also want to make sure not to generate FP exceptions
  #ifndef NDEBUG
-    x.simdInternal_ = vbslq_f32(m, vdupq_n_f32(1.0f), x.simdInternal_);
+    x.simdInternal_ = vbslq_f32(m.simdInternal_, x.simdInternal_, vdupq_n_f32(1.0f));
  #endif
      return {
                 vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(vrsqrteq_f32(x.simdInternal_)),
@@ -384,8 +386,10 @@ maskzRsqrt(SimdFloat x, SimdFBool m)
  static inline SimdFloat gmx_simdcall
  maskzRcp(SimdFloat x, SimdFBool m)
  {
+    // The result will always be correct since we mask the result with m, but
+    // for debug builds we also want to make sure not to generate FP exceptions
  #ifndef NDEBUG
-    x.simdInternal_ = vbslq_f32(m, vdupq_n_f32(1.0f), x.simdInternal_);
+    x.simdInternal_ = vbslq_f32(m.simdInternal_, x.simdInternal_, vdupq_n_f32(1.0f));
  #endif
      return {
                 vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(vrecpeq_f32(x.simdInternal_)),
@@ -572,7 +576,7 @@ static inline SimdFInt32 gmx_simdcall
  operator<<(SimdFInt32 a, int n)
  {
      return {
-               vshlq_n_s32(a.simdInternal_, n)
+               vshlq_s32(a.simdInternal_, vdupq_n_s32(n >= 32 ? 32 : n))
      };
  }
  
@@ -580,7 +584,7 @@ static inline SimdFInt32 gmx_simdcall
  operator>>(SimdFInt32 a, int n)
  {
      return {
-               vshrq_n_s32(a.simdInternal_, n)
+               vshlq_s32(a.simdInternal_, vdupq_n_s32(n >= 32 ? -32 : -n))
      };
  }
  
diff --git a/src/gromacs/simd/impl_arm_neon_asimd/impl_arm_neon_asimd_simd_double.h b/src/gromacs/simd/impl_arm_neon_asimd/impl_arm_neon_asimd_simd_double.h

index 38bed249af330100adf9b5eebbc1344f87deb138..0787d52af08f687c9283788ecdca7cc1fe2443b2 100644 (file)
--- a/src/gromacs/simd/impl_arm_neon_asimd/impl_arm_neon_asimd_simd_double.h
+++ b/src/gromacs/simd/impl_arm_neon_asimd/impl_arm_neon_asimd_simd_double.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2016,2017, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -345,7 +345,7 @@ maskzRsqrt(SimdDouble x, SimdDBool m)
      // The result will always be correct since we mask the result with m, but
      // for debug builds we also want to make sure not to generate FP exceptions
  #ifndef NDEBUG
-    x.simdInternal_ = vbslq_f64(m.simdInternal_, vdupq_n_f64(1.0, x.simdInternal_);
+    x.simdInternal_ = vbslq_f64(m.simdInternal_, x.simdInternal_, vdupq_n_f64(1.0f));
  #endif
      return {
                 float64x2_t(vandq_u64(uint64x2_t(vrsqrteq_f64(x.simdInternal_)), m.simdInternal_))
@@ -358,7 +358,7 @@ maskzRcp(SimdDouble x, SimdDBool m)
      // The result will always be correct since we mask the result with m, but
      // for debug builds we also want to make sure not to generate FP exceptions
  #ifndef NDEBUG
-    x.simdInternal_ = vbslq_f64(m.simdInternal_, vdupq_n_f64(1.0, x.simdInternal_);
+    x.simdInternal_ = vbslq_f64(m.simdInternal_, x.simdInternal_, vdupq_n_f64(1.0f));
  #endif
      return {
                 float64x2_t(vandq_u64(uint64x2_t(vrecpeq_f64(x.simdInternal_)), m.simdInternal_))
@@ -535,7 +535,7 @@ static inline SimdDInt32 gmx_simdcall
  operator<<(SimdDInt32 a, int n)
  {
      return {
-        vshl_n_s32(a.simdInternal_, n)
+        vshl_s32(a.simdInternal_, vdup_n_s32(n >= 32 ? 32 : n))
      };
  }
  
@@ -543,7 +543,7 @@ static inline SimdDInt32 gmx_simdcall
  operator>>(SimdDInt32 a, int n)
  {
      return {
-        vshr_n_s32(a.simdInternal_, n)
+        vshl_s32(a.simdInternal_, vdup_n_s32(n >= 32 ? -32 : -n))
      };
  }
author	Mark Abraham <mark.j.abraham@gmail.com>
	Thu, 13 Jul 2017 11:23:52 +0000 (11:23 +0000)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Mon, 21 Aug 2017 12:45:11 +0000 (14:45 +0200)
cmake/gmxDetectSimd.cmake		patch \| blob \| history
cmake/gmxManageSimd.cmake		patch \| blob \| history
src/gromacs/simd/impl_arm_neon/impl_arm_neon_simd_float.h		patch \| blob \| history
src/gromacs/simd/impl_arm_neon_asimd/impl_arm_neon_asimd_simd_double.h		patch \| blob \| history