elseif(GMX_SIMD STREQUAL "ARM_NEON")
- gmx_find_cflag_for_source(CFLAGS_ARM_NEON "C compiler 32-bit ARM NEON flag"
+ gmx_find_cflag_for_source(CFLAGS_ARM_NEON "C compiler ARM NEON flag"
"#include<arm_neon.h>
int main(){float32x4_t x=vdupq_n_f32(0.5);x=vmlaq_f32(x,x,x);return vgetq_lane_f32(x,0)>0;}"
SIMD_C_FLAGS
- "-mfpu=neon" "")
- gmx_find_cxxflag_for_source(CXXFLAGS_ARM_NEON "C++ compiler 32-bit ARM NEON flag"
+ "-mfpu=neon-vfpv4" "-mfpu=neon" "")
+ gmx_find_cxxflag_for_source(CXXFLAGS_ARM_NEON "C++ compiler ARM NEON flag"
"#include<arm_neon.h>
int main(){float32x4_t x=vdupq_n_f32(0.5);x=vmlaq_f32(x,x,x);return vgetq_lane_f32(x,0)>0;}"
SIMD_CXX_FLAGS
- "-mfpu=neon" "-D__STDC_CONSTANT_MACROS" "")
+ "-mfpu=neon-vfpv4" "-mfpu=neon" "-D__STDC_CONSTANT_MACROS" "")
if(NOT CFLAGS_ARM_NEON OR NOT CXXFLAGS_ARM_NEON)
- gmx_give_fatal_error_when_simd_support_not_found("ARM 32-bit NEON" "disable SIMD support (slower)" "${SUGGEST_BINUTILS_UPDATE}")
+ gmx_give_fatal_error_when_simd_support_not_found("ARM NEON" "disable SIMD support (slower)" "${SUGGEST_BINUTILS_UPDATE}")
endif()
set(GMX_SIMD_ARM_NEON 1)
set(SIMD_STATUS_MESSAGE "Enabling 32-bit ARM NEON SIMD instructions")
elseif(GMX_SIMD STREQUAL "ARM_NEON_ASIMD")
- # Gcc-4.8.1 appears to have a bug where the c++ compiler requires
- # -D__STDC_CONSTANT_MACROS if we include arm_neon.h
gmx_find_cflag_for_source(CFLAGS_ARM_NEON_ASIMD "C compiler ARM NEON Advanced SIMD flag"
"#include<arm_neon.h>
- int main(){float64x2_t x=vdupq_n_f64(0.5);x=vfmaq_f64(x,x,x);return vgetq_lane_f64(x,0)>0;}"
+ int main(){float64x2_t x=vdupq_n_f64(0.5);x=vfmaq_f64(x,x,x);x=vrndnq_f64(x);return vgetq_lane_f64(x,0)>0;}"
SIMD_C_FLAGS
"")
gmx_find_cxxflag_for_source(CXXFLAGS_ARM_NEON_ASIMD "C++ compiler ARM NEON Advanced SIMD flag"
"#include<arm_neon.h>
- int main(){float64x2_t x=vdupq_n_f64(0.5);x=vfmaq_f64(x,x,x);return vgetq_lane_f64(x,0)>0;}"
+ int main(){float64x2_t x=vdupq_n_f64(0.5);x=vfmaq_f64(x,x,x);x=vrndnq_f64(x);return vgetq_lane_f64(x,0)>0;}"
SIMD_CXX_FLAGS
- "-D__STDC_CONSTANT_MACROS" "")
+ "")
if(NOT CFLAGS_ARM_NEON_ASIMD OR NOT CXXFLAGS_ARM_NEON_ASIMD)
- gmx_give_fatal_error_when_simd_support_not_found("ARM (AArch64) NEON Advanced SIMD" "disable SIMD support (slower)" "${SUGGEST_BINUTILS_UPDATE}")
- endif()
-
- if(CMAKE_C_COMPILER_ID MATCHES "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_LESS "4.9")
- message(WARNING "At least gcc-4.8.1 has many bugs for ARM (AArch64) NEON Advanced SIMD compilation. You might need gcc version 4.9 or later.")
+ gmx_give_fatal_error_when_simd_support_not_found("ARM (AArch64) NEON Advanced SIMD" "particularly gcc version 4.9 or later, or disable SIMD support (slower)" "${SUGGEST_BINUTILS_UPDATE}")
endif()
if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_LESS "3.4")
elseif(GMX_SIMD STREQUAL "IBM_VSX")
- # Altivec was originally single-only, and it took a while for compilers
- # to support the double-precision features in VSX.
- if(GMX_DOUBLE AND CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9")
- message(FATAL_ERROR "Using VSX SIMD in double precision with GCC requires GCC-4.9 or later.")
+ if(${CMAKE_CXX_COMPILER_ID} MATCHES "GNU" OR ${CMAKE_C_COMPILER_ID} MATCHES "GNU")
+ # VSX uses the same function API as Altivec/VMX, so make sure we tune for the current CPU and not VMX.
+ # By putting these flags here rather than in the general compiler flags file we can safely assume
+ # that we are at least on Power7 since that is when VSX appeared.
+ if(BUILD_CPU_BRAND MATCHES "POWER7")
+ gmx_test_cflag(GNU_C_VSX_POWER7 "-mcpu=power7 -mtune=power7" SIMD_C_FLAGS)
+ gmx_test_cflag(GNU_CXX_VSX_POWER7 "-mcpu=power7 -mtune=power7" SIMD_CXX_FLAGS)
+ else()
+ # Enable power8 vector extensions on all platforms except old Power7.
+ gmx_test_cflag(GNU_C_VSX_POWER8 "-mcpu=power8 -mpower8-vector -mpower8-fusion -mdirect-move" SIMD_C_FLAGS)
+ gmx_test_cflag(GNU_CXX_VSX_POWER8 "-mcpu=power8 -mpower8-vector -mpower8-fusion -mdirect-move" SIMD_CXX_FLAGS)
+ endif()
+ # Altivec was originally single-only, and it took a while for compilers
+ # to support the double-precision features in VSX.
+ if(GMX_DOUBLE AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9")
+ message(FATAL_ERROR "Using VSX SIMD in double precision with GCC requires GCC-4.9 or later.")
+ endif()
endif()
gmx_find_cflag_for_source(CFLAGS_IBM_VSX "C compiler IBM VSX SIMD flag"
# Check if the compiler supports one of these, and in that case set gmx_simdcall
# to that string. If we do not have any such calling convention modifier, set it
# to an empty string.
+#
+# Update 2015-11-04: As of version 3.6, clang has added support for __vectorcall
+# (also on Linux). This appears to be buggy for the reference SIMD
+# implementation when using the Debug build (when functions are not inlined)
+# while it seems works fine for the actual SIMD implementations. This is likely
+# because the reference build ends up passing lots of structures with arrays
+# rather than actual vector data. For now we disable __vectorcall with clang
+# when using the reference build.
+#
if(NOT DEFINED GMX_SIMD_CALLING_CONVENTION)
- foreach(callconv __vectorcall __regcall "")
+ if(GMX_TARGET_BGQ)
+ set(CALLCONV_LIST " ")
+ elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND GMX_SIMD STREQUAL "REFERENCE")
+ set(CALLCONV_LIST __regcall " ")
+ else()
+ set(CALLCONV_LIST __vectorcall __regcall " ")
+ endif()
+ foreach(callconv ${CALLCONV_LIST})
set(callconv_compile_var "_callconv_${callconv}")
check_c_source_compiles("int ${callconv} f(int i) {return i;} int main(void) {return f(0);}" ${callconv_compile_var})
if(${callconv_compile_var})