cmake/gmxManageSimd.cmake

   1 #
   2 # This file is part of the GROMACS molecular simulation package.
   3 #
   4 # Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
   5 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6 # and including many others, as listed in the AUTHORS file in the
   7 # top-level source directory and at http://www.gromacs.org.
   8 #
   9 # GROMACS is free software; you can redistribute it and/or
  10 # modify it under the terms of the GNU Lesser General Public License
  11 # as published by the Free Software Foundation; either version 2.1
  12 # of the License, or (at your option) any later version.
  13 #
  14 # GROMACS is distributed in the hope that it will be useful,
  15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 # Lesser General Public License for more details.
  18 #
  19 # You should have received a copy of the GNU Lesser General Public
  20 # License along with GROMACS; if not, see
  21 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23 #
  24 # If you want to redistribute modifications to GROMACS, please
  25 # consider that scientific software is very special. Version
  26 # control is crucial - bugs must be traceable. We will be happy to
  27 # consider code for inclusion in the official distribution, but
  28 # derived work must not be called official GROMACS. Details are found
  29 # in the README & COPYING files - if they are missing, get the
  30 # official version at http://www.gromacs.org.
  31 #
  32 # To help us fund GROMACS development, we humbly ask that you cite
  33 # the research papers on the package. Check out http://www.gromacs.org.
  34
  35 include(gmxDetectCpu)
  36 include(gmxFindFlagsForSource)
  37
  38 # Macro that manages setting the respective C and C++ toolchain
  39 # variables so that subsequent tests for SIMD support can work.
  40 macro(prepare_x86_toolchain TOOLCHAIN_C_FLAGS_VARIABLE TOOLCHAIN_CXX_FLAGS_VARIABLE)
  41     # On OS X, we often want to use gcc instead of clang, since gcc
  42     # supports OpenMP (until clang 3.8, or so, plus whenever Apple
  43     # support it in their version). However, by default gcc uses the
  44     # external system assembler, which does not support AVX, so we
  45     # need to tell the linker to use the clang compilers assembler
  46     # instead - and this has to happen before we detect AVX flags.
  47     if(APPLE AND CMAKE_C_COMPILER_ID STREQUAL "GNU")
  48         gmx_test_cflag(GNU_C_USE_CLANG_AS "-Wa,-q" ${TOOLCHAIN_C_FLAGS_VARIABLE})
  49     endif()
  50     if(APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
  51         gmx_test_cxxflag(GNU_CXX_USE_CLANG_AS "-Wa,-q" ${TOOLCHAIN_CXX_FLAGS_VARIABLE})
  52     endif()
  53 endmacro()
  54
  55 # Macro that manages setting the respective C and C++ toolchain
  56 # variables so that subsequent tests for SIMD support can work.
  57 macro(prepare_power_vsx_toolchain TOOLCHAIN_C_FLAGS_VARIABLE TOOLCHAIN_CXX_FLAGS_VARIABLE)
  58     if(${CMAKE_CXX_COMPILER_ID} MATCHES "GNU" OR ${CMAKE_C_COMPILER_ID} MATCHES "GNU")
  59         # VSX uses the same function API as Altivec/VMX, so make sure we tune for the current CPU and not VMX.
  60         # By putting these flags here rather than in the general compiler flags file we can safely assume
  61         # that we are at least on Power7 since that is when VSX appeared.
  62         gmx_run_cpu_detection(brand)
  63         if(CPU_DETECTION_BRAND MATCHES "POWER7")
  64             gmx_test_cflag(GNU_C_VSX_POWER7   "-mcpu=power7 -mtune=power7" ${TOOLCHAIN_C_FLAGS_VARIABLE})
  65             gmx_test_cflag(GNU_CXX_VSX_POWER7 "-mcpu=power7 -mtune=power7" ${TOOLCHAIN_CXX_FLAGS_VARIABLE})
  66         else()
  67             # Enable power8 vector extensions on all platforms except old Power7.
  68             gmx_test_cflag(GNU_C_VSX_POWER8   "-mcpu=power8 -mpower8-vector -mpower8-fusion -mdirect-move" ${TOOLCHAIN_C_FLAGS_VARIABLE})
  69             gmx_test_cflag(GNU_CXX_VSX_POWER8 "-mcpu=power8 -mpower8-vector -mpower8-fusion -mdirect-move" ${TOOLCHAIN_CXX_FLAGS_VARIABLE})
  70         endif()
  71         # Altivec was originally single-only, and it took a while for compilers
  72         # to support the double-precision features in VSX.
  73         if(GMX_DOUBLE AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9")
  74             message(FATAL_ERROR "Using VSX SIMD in double precision with GCC requires GCC-4.9 or later.")
  75         endif()
  76     endif()
  77     if(${CMAKE_CXX_COMPILER_ID} MATCHES "XL" OR ${CMAKE_C_COMPILER_ID} MATCHES "XL")
  78         if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "13.1.5" OR CMAKE_C_COMPILER_VERSION VERSION_LESS "13.1.5")
  79             message(FATAL_ERROR "Using VSX SIMD requires XL compiler version 13.1.5 or later.")
  80         endif()
  81     endif()
  82 endmacro()
  83
  84 # Issue a fatal error with an appropriate message, when the toolchain
  85 # was not able to compile code for SIMD support.
  86 #
  87 # Inputs:
  88 #  SIMD_STRING              A string describing the kind of SIMD support that didn't work.
  89 #  ALTERNATIVE_SUGGESTION   A string describing anything the user could try other than getting a new compiler.
  90 #  SUGGEST_BINUTILS_UPDATE  True when there's information that the compiler was OK, but something else was not.
  91 function(gmx_give_fatal_error_when_simd_support_not_found SIMD_STRING ALTERNATIVE_SUGGESTION SUGGEST_BINUTILS_UPDATE)
  92     if(SUGGEST_BINUTILS_UPDATE)
  93         set(_msg "Found a compiler flag for ${SIMD_STRING} support, but some other problem exists. Update your assembler and/or linker, e.g. in the binutils package of your distribution.")
  94     else()
  95         set(_msg "Cannot find ${SIMD_STRING} compiler flag. Use a newer compiler, or ${ALTERNATIVE_SUGGESTION}.")
  96     endif()
  97     message(FATAL_ERROR ${_msg})
  98 endfunction()
  99
 100 macro(gmx_manage_simd)
 101
 102 set(GMX_SIMD_ACCURACY_BITS_SINGLE 22 CACHE STRING "Target mantissa bits for SIMD single math")
 103 #
 104 # Note that we typically restrict double precision target accuracy to be twice that
 105 # of single. This means we only need one more N-R iteration for 1/sqrt(x) and 1(x),
 106 # and the first iteration can sometimes be done as a pair in single precision. This should
 107 # be plenty enough for Molecular Dynamics applications. Many of our double precision math
 108 # functions still achieve very close to full double precision, but we do not guarantee that
 109 # they will be able to achieve higher accuracy if you set this beyond 44 bits. GROMACS will
 110 # work - but some unit tests might fail.
 111 #
 112 set(GMX_SIMD_ACCURACY_BITS_DOUBLE 44 CACHE STRING "Target mantissa bits for SIMD double math")
 113 mark_as_advanced(GMX_SIMD_ACCURACY_BITS_SINGLE)
 114 mark_as_advanced(GMX_SIMD_ACCURACY_BITS_DOUBLE)
 115
 116 if(${GMX_SIMD_ACCURACY_BITS_SINGLE} GREATER 22)
 117     message(STATUS "Note: Full mantissa accuracy (including least significant bit) requested for SIMD single math. Presently we cannot get the least significant bit correct since that would require different algorithms - reducing to 22 bits.")
 118     set(GMX_SIMD_ACCURACY_BITS_SINGLE 22 CACHE STRING "Target mantissa bits for SIMD single math" FORCE)
 119 endif()
 120
 121 if(${GMX_SIMD_ACCURACY_BITS_DOUBLE} GREATER 51)
 122     message(STATUS "Note: Full mantissa accuracy (including least significant bit) requested for SIMD double math. Presently we cannot get the least significant bit correct since that would require different algorithms - reducing to 51 bits.")
 123     set(GMX_SIMD_ACCURACY_BITS_DOUBLE 51 CACHE STRING "Target mantissa bits for SIMD double math" FORCE)
 124 endif()
 125
 126 #
 127 # Section to set (and test) compiler flags for SIMD.
 128 #
 129 # If the user chose the (default) automatic behaviour, then detection
 130 # is run to suggest a SIMD choice suitable for the build
 131 # host. Otherwise, the users's choice is always honoured. The compiler
 132 # flags will be set based on that choice.
 133 #
 134
 135 set(GMX_SIMD_ACTIVE ${GMX_SIMD})
 136 if(GMX_SIMD STREQUAL "AUTO")
 137     include(gmxDetectSimd)
 138     gmx_detect_simd(GMX_SUGGESTED_SIMD)
 139     set(GMX_SIMD_ACTIVE ${GMX_SUGGESTED_SIMD})
 140 endif()
 141
 142 if(GMX_SIMD_ACTIVE STREQUAL "NONE")
 143     # nothing to do configuration-wise
 144     set(SIMD_STATUS_MESSAGE "SIMD instructions disabled")
 145 elseif(GMX_SIMD_ACTIVE STREQUAL "SSE2")
 146
 147     gmx_find_flags(
 148         "#include<xmmintrin.h>
 149          int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_rsqrt_ps(x);return _mm_movemask_ps(x);}"
 150         TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
 151         SIMD_${GMX_SIMD_ACTIVE}_C_FLAGS SIMD_${GMX_SIMD_ACTIVE}_CXX_FLAGS
 152         "-msse2" "/arch:SSE2" "-hgnu")
 153
 154     if(NOT SIMD_${GMX_SIMD_ACTIVE}_C_FLAGS OR NOT SIMD_${GMX_SIMD_ACTIVE}_CXX_FLAGS)
 155         gmx_give_fatal_error_when_simd_support_not_found("SSE2" "disable SIMD support (slow)" "${SUGGEST_BINUTILS_UPDATE}")
 156     endif()
 157
 158     set(SIMD_C_FLAGS "${TOOLCHAIN_C_FLAGS}")
 159     set(SIMD_CXX_FLAGS "${TOOLCHAIN_CXX_FLAGS}")
 160     set(GMX_SIMD_X86_${GMX_SIMD_ACTIVE} 1)
 161     set(SIMD_STATUS_MESSAGE "Enabling SSE2 SIMD instructions")
 162
 163 elseif(GMX_SIMD_ACTIVE STREQUAL "SSE4.1")
 164
 165     # Note: MSVC enables SSE4.1 with the SSE2 flag, so we include that in testing.
 166     gmx_find_flags(
 167         "#include<smmintrin.h>
 168         int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_dp_ps(x,x,0x77);return _mm_movemask_ps(x);}"
 169         TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
 170         SIMD_SSE_4_1_C_FLAGS SIMD_SSE_4_1_CXX_FLAGS
 171         "-msse4.1" "/arch:SSE4.1" "/arch:SSE2" "-hgnu")
 172
 173     if(NOT SIMD_SSE_4_1_C_FLAGS OR NOT SIMD_SSE_4_1_CXX_FLAGS)
 174         gmx_give_fatal_error_when_simd_support_not_found("SSE4.1" "choose SSE2 SIMD (slower)" "${SUGGEST_BINUTILS_UPDATE}")
 175     endif()
 176
 177     set(SIMD_C_FLAGS "${TOOLCHAIN_C_FLAGS}")
 178     set(SIMD_CXX_FLAGS "${TOOLCHAIN_CXX_FLAGS}")
 179     set(GMX_SIMD_X86_SSE4_1 1)
 180     set(SIMD_STATUS_MESSAGE "Enabling SSE4.1 SIMD instructions")
 181
 182 elseif(GMX_SIMD_ACTIVE STREQUAL "AVX_128_FMA")
 183
 184     prepare_x86_toolchain(TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS)
 185
 186     # We don't have the full compiler version string yet (BUILD_C_COMPILER),
 187     # so we can't distinguish vanilla from Apple clang versions, but catering for a few rare AMD
 188     # hackintoshes is not worth the effort.
 189     if (APPLE AND (CMAKE_C_COMPILER_ID STREQUAL "Clang" OR
 190                 CMAKE_CXX_COMPILER_ID STREQUAL "Clang"))
 191         message(WARNING "Due to a known compiler bug, Clang up to version 3.2 (and Apple Clang up to version 4.1) produces incorrect code with AVX_128_FMA SIMD. As we cannot work around this bug on OS X, you will have to select a different compiler or SIMD instruction set.")
 192     endif()
 193
 194     # clang <=3.2 contains a bug that causes incorrect code to be generated for the
 195     # vfmaddps instruction and therefore the bug is triggered with AVX_128_FMA.
 196     # (see: http://llvm.org/bugs/show_bug.cgi?id=15040).
 197     # We can work around this by not using the integrated assembler (except on OS X
 198     # which has an outdated assembler that does not support AVX instructions).
 199     if (CMAKE_C_COMPILER_ID MATCHES "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_LESS "3.3")
 200         # we assume that we have an external assembler that supports AVX
 201         message(STATUS "Clang ${CMAKE_C_COMPILER_VERSION} detected, enabling FMA bug workaround")
 202         set(TOOLCHAIN_C_FLAGS "${TOOLCHAIN_C_FLAGS} -no-integrated-as")
 203     endif()
 204     if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "3.3")
 205         # we assume that we have an external assembler that supports AVX
 206         message(STATUS "Clang ${CMAKE_CXX_COMPILER_VERSION} detected, enabling FMA bug workaround")
 207         set(TOOLCHAIN_CXX_FLAGS "${TOOLCHAIN_CXX_FLAGS} -no-integrated-as")
 208     endif()
 209
 210     # AVX128/FMA on AMD is a bit complicated. We need to do detection in three stages:
 211     # 1) Find the flags required for generic AVX support
 212     # 2) Find the flags necessary to enable fused-multiply add support
 213     # 3) Optional: Find a flag to enable the AMD XOP instructions
 214
 215     ### STAGE 1: Find the generic AVX flag
 216     gmx_find_flags(
 217         "#include<immintrin.h>
 218         int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_permute_ps(x,1);return 0;}"
 219         TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
 220         SIMD_GENERIC_AVX_C_FLAGS SIMD_GENERIC_AVX_CXX_FLAGS
 221         "-mavx" "/arch:AVX" "-hgnu")
 222
 223     ### STAGE 2: Find the fused-multiply add flag.
 224     # GCC requires x86intrin.h for FMA support. MSVC 2010 requires intrin.h for FMA support.
 225     check_include_file(x86intrin.h HAVE_X86INTRIN_H ${SIMD_C_FLAGS})
 226     check_include_file(intrin.h HAVE_INTRIN_H ${SIMD_C_FLAGS})
 227     if(HAVE_X86INTRIN_H)
 228         set(INCLUDE_X86INTRIN_H "#include <x86intrin.h>")
 229     endif()
 230     if(HAVE_INTRIN_H)
 231         set(INCLUDE_INTRIN_H "#include <xintrin.h>")
 232     endif()
 233
 234     gmx_find_flags(
 235         "#include<immintrin.h>
 236         ${INCLUDE_X86INTRIN_H}
 237         ${INCLUDE_INTRIN_H}
 238         int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_macc_ps(x,x,x);return _mm_movemask_ps(x);}"
 239         TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
 240         SIMD_${GMX_SIMD_ACTIVE}_C_FLAGS SIMD_${GMX_SIMD_ACTIVE}_CXX_FLAGS
 241         "-mfma4" "-hgnu")
 242
 243     # We only need to check the last (FMA) test; that will always fail if the generic AVX test failed
 244     if(NOT SIMD_${GMX_SIMD_ACTIVE}_C_FLAGS OR NOT SIMD_${GMX_SIMD_ACTIVE}_CXX_FLAGS)
 245         gmx_give_fatal_error_when_simd_support_not_found("128-bit AVX with FMA support" "choose SSE4.1 SIMD (slower)" "${SUGGEST_BINUTILS_UPDATE}")
 246     endif()
 247
 248     ### STAGE 3: Optional: Find the XOP instruction flag (No point in yelling if this does not work)
 249     gmx_find_flags(
 250         "#include<immintrin.h>
 251         ${INCLUDE_X86INTRIN_H}
 252         ${INCLUDE_INTRIN_H}
 253         int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_frcz_ps(x);return _mm_movemask_ps(x);}"
 254         TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
 255         SIMD_AVX_128_XOP_C_FLAGS SIMD_AVX_128_XOP_CXX_FLAGS
 256         "-mxop")
 257
 258     set(SIMD_C_FLAGS "${TOOLCHAIN_C_FLAGS}")
 259     set(SIMD_CXX_FLAGS "${TOOLCHAIN_CXX_FLAGS}")
 260     set(GMX_SIMD_X86_${GMX_SIMD_ACTIVE} 1)
 261     set(SIMD_STATUS_MESSAGE "Enabling 128-bit AVX SIMD GROMACS SIMD (with fused-multiply add)")
 262
 263 elseif(GMX_SIMD_ACTIVE STREQUAL "AVX_256")
 264
 265     prepare_x86_toolchain(TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS)
 266
 267     gmx_find_flags(
 268         "#include<immintrin.h>
 269          int main(){__m256 x=_mm256_set1_ps(0.5);x=_mm256_add_ps(x,x);return _mm256_movemask_ps(x);}"
 270         TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
 271         SIMD_${GMX_SIMD_ACTIVE}_C_FLAGS SIMD_${GMX_SIMD_ACTIVE}_CXX_FLAGS
 272         "-mavx" "/arch:AVX" "-hgnu")
 273
 274     if(NOT SIMD_${GMX_SIMD_ACTIVE}_C_FLAGS OR NOT SIMD_${GMX_SIMD_ACTIVE}_CXX_FLAGS)
 275         gmx_give_fatal_error_when_simd_support_not_found("AVX" "choose SSE4.1 SIMD (slower)" "${SUGGEST_BINUTILS_UPDATE}")
 276     endif()
 277
 278     set(SIMD_C_FLAGS "${TOOLCHAIN_C_FLAGS}")
 279     set(SIMD_CXX_FLAGS "${TOOLCHAIN_CXX_FLAGS}")
 280     set(GMX_SIMD_X86_${GMX_SIMD_ACTIVE} 1)
 281     set(SIMD_STATUS_MESSAGE "Enabling 256-bit AVX SIMD instructions")
 282
 283 elseif(GMX_SIMD_ACTIVE MATCHES "AVX2_")
 284
 285     prepare_x86_toolchain(TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS)
 286
 287     gmx_find_flags(
 288         "#include<immintrin.h>
 289          int main(){__m256i x=_mm256_set1_epi32(5);x=_mm256_add_epi32(x,x);return _mm256_movemask_epi8(x);}"
 290         TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
 291         SIMD_${GMX_SIMD_ACTIVE}_C_FLAGS SIMD_${GMX_SIMD_ACTIVE}_CXX_FLAGS
 292         "-march=core-avx2" "-mavx2" "/arch:AVX" "-hgnu") # no AVX2-specific flag for MSVC yet
 293
 294     if(NOT SIMD_${GMX_SIMD_ACTIVE}_C_FLAGS OR NOT SIMD_${GMX_SIMD_ACTIVE}_CXX_FLAGS)
 295         gmx_give_fatal_error_when_simd_support_not_found("AVX2" "choose AVX SIMD (slower)" "${SUGGEST_BINUTILS_UPDATE}")
 296     endif()
 297
 298     set(SIMD_C_FLAGS "${TOOLCHAIN_C_FLAGS}")
 299     set(SIMD_CXX_FLAGS "${TOOLCHAIN_CXX_FLAGS}")
 300     set(GMX_SIMD_X86_${GMX_SIMD_ACTIVE} 1)
 301
 302     if(GMX_SIMD_ACTIVE STREQUAL "AVX2_128")
 303         set(SIMD_STATUS_MESSAGE "Enabling 128-bit AVX2 SIMD instructions")
 304     else()
 305         set(SIMD_STATUS_MESSAGE "Enabling 256-bit AVX2 SIMD instructions")
 306     endif()
 307
 308 elseif(GMX_SIMD_ACTIVE STREQUAL "MIC")
 309
 310     # No flags needed. Not testing.
 311     set(GMX_SIMD_X86_MIC 1)
 312     set(SIMD_STATUS_MESSAGE "Enabling MIC (Xeon Phi) SIMD instructions")
 313
 314 elseif(GMX_SIMD_ACTIVE STREQUAL "AVX_512")
 315
 316     prepare_x86_toolchain(TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS)
 317
 318     gmx_find_flags(
 319         "#include<immintrin.h>
 320          int main(){__m512 y,x=_mm512_set1_ps(0.5);y=_mm512_fmadd_ps(x,x,x);return (int)_mm512_cmp_ps_mask(x,y,_CMP_LT_OS);}"
 321         TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
 322         SIMD_${GMX_SIMD_ACTIVE}_C_FLAGS SIMD_${GMX_SIMD_ACTIVE}_CXX_FLAGS
 323         "-xCORE-AVX512" "-mavx512f -mfma" "-mavx512f" "/arch:AVX" "-hgnu") # no AVX_512F flags known for MSVC yet
 324
 325     if(NOT SIMD_${GMX_SIMD_ACTIVE}_C_FLAGS OR NOT SIMD_${GMX_SIMD_ACTIVE}_CXX_FLAGS)
 326         gmx_give_fatal_error_when_simd_support_not_found("AVX 512F" "choose a lower level of SIMD (slower)" "${SUGGEST_BINUTILS_UPDATE}")
 327     endif()
 328
 329     set(SIMD_C_FLAGS "${TOOLCHAIN_C_FLAGS}")
 330     set(SIMD_CXX_FLAGS "${TOOLCHAIN_CXX_FLAGS}")
 331     set(GMX_SIMD_X86_${GMX_SIMD_ACTIVE} 1)
 332     set(SIMD_STATUS_MESSAGE "Enabling 512-bit AVX-512 SIMD instructions")
 333
 334 elseif(GMX_SIMD_ACTIVE STREQUAL "AVX_512_KNL")
 335
 336     prepare_x86_toolchain(TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS)
 337
 338     gmx_find_flags(
 339         "#include<immintrin.h>
 340         int main(){__m512 y,x=_mm512_set1_ps(0.5);y=_mm512_rsqrt28_ps(x);return (int)_mm512_cmp_ps_mask(x,y,_CMP_LT_OS);}"
 341         TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
 342         SIMD_${GMX_SIMD_ACTIVE}_C_FLAGS SIMD_${GMX_SIMD_ACTIVE}_CXX_FLAGS
 343         "-xMIC-AVX512" "-mavx512er -mfma" "-mavx512er" "/arch:AVX" "-hgnu") # no AVX_512ER flags known for MSVC yet
 344
 345     if(NOT SIMD_${GMX_SIMD_ACTIVE}_C_FLAGS OR NOT SIMD_${GMX_SIMD_ACTIVE}_CXX_FLAGS)
 346         gmx_give_fatal_error_when_simd_support_not_found("AVX 512ER" "choose a lower level of SIMD (slower)" "${SUGGEST_BINUTILS_UPDATE}")
 347     endif()
 348
 349     set(SIMD_C_FLAGS "${TOOLCHAIN_C_FLAGS}")
 350     set(SIMD_CXX_FLAGS "${TOOLCHAIN_CXX_FLAGS}")
 351     set(GMX_SIMD_X86_${GMX_SIMD_ACTIVE} 1)
 352     set(SIMD_STATUS_MESSAGE "Enabling 512-bit AVX-512-KNL SIMD instructions")
 353
 354 elseif(GMX_SIMD_ACTIVE STREQUAL "ARM_NEON")
 355
 356     gmx_find_flags(
 357         "#include<arm_neon.h>
 358          int main(){float32x4_t x=vdupq_n_f32(0.5);x=vmlaq_f32(x,x,x);return vgetq_lane_f32(x,0)>0;}"
 359         TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
 360         SIMD_${GMX_SIMD_ACTIVE}_C_FLAGS SIMD_${GMX_SIMD_ACTIVE}_CXX_FLAGS
 361         "-mfpu=neon-vfpv4" "-mfpu=neon" "")
 362
 363     if(NOT SIMD_${GMX_SIMD_ACTIVE}_C_FLAGS OR NOT SIMD_${GMX_SIMD_ACTIVE}_CXX_FLAGS)
 364         gmx_give_fatal_error_when_simd_support_not_found("ARM NEON" "disable SIMD support (slower)" "${SUGGEST_BINUTILS_UPDATE}")
 365     endif()
 366
 367     set(SIMD_C_FLAGS "${TOOLCHAIN_C_FLAGS}")
 368     set(SIMD_CXX_FLAGS "${TOOLCHAIN_CXX_FLAGS}")
 369     set(GMX_SIMD_${GMX_SIMD_ACTIVE} 1)
 370     set(SIMD_STATUS_MESSAGE "Enabling 32-bit ARM NEON SIMD instructions")
 371
 372 elseif(GMX_SIMD_ACTIVE STREQUAL "ARM_NEON_ASIMD")
 373
 374     gmx_find_flags(
 375         "#include<arm_neon.h>
 376          int main(){float64x2_t x=vdupq_n_f64(0.5);x=vfmaq_f64(x,x,x);x=vrndnq_f64(x);return vgetq_lane_f64(x,0)>0;}"
 377         TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
 378         SIMD_${GMX_SIMD_ACTIVE}_C_FLAGS SIMD_${GMX_SIMD_ACTIVE}_CXX_FLAGS
 379         "")
 380
 381     if(NOT SIMD_${GMX_SIMD_ACTIVE}_C_FLAGS OR NOT SIMD_${GMX_SIMD_ACTIVE}_CXX_FLAGS)
 382         gmx_give_fatal_error_when_simd_support_not_found("ARM (AArch64) NEON Advanced SIMD" "particularly gcc version 4.9 or later, or disable SIMD support (slower)" "${SUGGEST_BINUTILS_UPDATE}")
 383     endif()
 384
 385     set(SIMD_C_FLAGS "${TOOLCHAIN_C_FLAGS}")
 386     set(SIMD_CXX_FLAGS "${TOOLCHAIN_CXX_FLAGS}")
 387     set(GMX_SIMD_${GMX_SIMD_ACTIVE} 1)
 388     set(SIMD_STATUS_MESSAGE "Enabling ARM (AArch64) NEON Advanced SIMD instructions")
 389
 390 elseif(GMX_SIMD_ACTIVE STREQUAL "IBM_QPX")
 391
 392     try_compile(TEST_QPX ${CMAKE_BINARY_DIR}
 393         "${CMAKE_SOURCE_DIR}/cmake/TestQPX.c")
 394
 395     if (TEST_QPX)
 396         message(WARNING "IBM QPX SIMD instructions selected. This will work, but SIMD kernels are only available for the Verlet cut-off scheme. The plain C kernels that are used for the group cut-off scheme kernels will be slow, so please consider using the Verlet cut-off scheme.")
 397         set(GMX_SIMD_${GMX_SIMD_ACTIVE} 1)
 398         set(SIMD_STATUS_MESSAGE "Enabling IBM QPX SIMD instructions")
 399
 400     else()
 401         gmx_give_fatal_error_when_simd_support_not_found("IBM QPX" "or 'cmake .. -DCMAKE_TOOLCHAIN_FILE=Platform/BlueGeneQ-static-bgclang-CXX' to set up the tool chain" "${SUGGEST_BINUTILS_UPDATE}")
 402     endif()
 403
 404 elseif(GMX_SIMD_ACTIVE STREQUAL "IBM_VMX")
 405
 406     gmx_find_flags(
 407         "#include<altivec.h>
 408          int main(){vector float x,y=vec_ctf(vec_splat_s32(1),0);x=vec_madd(y,y,y);return vec_all_ge(y,x);}"
 409         TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
 410         SIMD_${GMX_SIMD_ACTIVE}_C_FLAGS SIMD_${GMX_SIMD_ACTIVE}_CXX_FLAGS
 411         "-maltivec -mabi=altivec" "-qarch=auto -qaltivec")
 412
 413     if(NOT SIMD_${GMX_SIMD_ACTIVE}_C_FLAGS OR NOT SIMD_${GMX_SIMD_ACTIVE}_CXX_FLAGS)
 414         gmx_give_fatal_error_when_simd_support_not_found("IBM VMX" "disable SIMD support (slower)" "${SUGGEST_BINUTILS_UPDATE}")
 415     endif()
 416
 417     set(SIMD_C_FLAGS "${TOOLCHAIN_C_FLAGS}")
 418     set(SIMD_CXX_FLAGS "${TOOLCHAIN_CXX_FLAGS}")
 419     set(GMX_SIMD_${GMX_SIMD_ACTIVE} 1)
 420     set(SIMD_STATUS_MESSAGE "Enabling IBM VMX SIMD instructions")
 421
 422 elseif(GMX_SIMD_ACTIVE STREQUAL "IBM_VSX")
 423
 424     prepare_power_vsx_toolchain(TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS)
 425
 426     gmx_find_flags(
 427         "#include<altivec.h>
 428          int main(){vector double x,y=vec_splats(1.0);x=vec_madd(y,y,y);return vec_all_ge(y,x);}"
 429         TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
 430         SIMD_${GMX_SIMD_ACTIVE}_C_FLAGS SIMD_${GMX_SIMD_ACTIVE}_CXX_FLAGS
 431         "-mvsx" "-maltivec -mabi=altivec" "-qarch=auto -qaltivec")
 432
 433     # Usually we check also for the C compiler here, but a C compiler
 434     # is not required for SIMD support on this platform. cmake through
 435     # at least version 3.7 cannot pass this check with the C compiler
 436     # in the latest xlc 13.1.5, but the C++ compiler has different
 437     # behaviour and is OK. See Redmine #2102.
 438     if(NOT SIMD_${GMX_SIMD_ACTIVE}_CXX_FLAGS)
 439         gmx_give_fatal_error_when_simd_support_not_found("IBM VSX" "disable SIMD support (slower)" "${SUGGEST_BINUTILS_UPDATE}")
 440     endif()
 441
 442     set(SIMD_C_FLAGS "${TOOLCHAIN_C_FLAGS}")
 443     set(SIMD_CXX_FLAGS "${TOOLCHAIN_CXX_FLAGS}")
 444     set(GMX_SIMD_${GMX_SIMD_ACTIVE} 1)
 445     set(SIMD_STATUS_MESSAGE "Enabling IBM VSX SIMD instructions")
 446
 447 elseif(GMX_SIMD_ACTIVE STREQUAL "SPARC64_HPC_ACE")
 448
 449     # Note that GMX_RELAXED_DOUBLE_PRECISION is enabled by default in the top-level CMakeLists.txt
 450
 451     set(GMX_SIMD_${GMX_SIMD_ACTIVE} 1)
 452     set(SIMD_STATUS_MESSAGE "Enabling Sparc64 HPC-ACE SIMD instructions")
 453
 454 elseif(GMX_SIMD_ACTIVE STREQUAL "REFERENCE")
 455
 456     # NB: This file handles settings for the SIMD module, so in the interest
 457     # of proper modularization, please do NOT put any verlet kernel settings in this file.
 458
 459     if(GMX_SIMD_REF_FLOAT_WIDTH)
 460         add_definitions(-DGMX_SIMD_REF_FLOAT_WIDTH=${GMX_SIMD_REF_FLOAT_WIDTH})
 461     endif()
 462     if(GMX_SIMD_REF_DOUBLE_WIDTH)
 463         add_definitions(-DGMX_SIMD_REF_DOUBLE_WIDTH=${GMX_SIMD_REF_DOUBLE_WIDTH})
 464     endif()
 465
 466     set(GMX_SIMD_${GMX_SIMD_ACTIVE} 1)
 467     set(SIMD_STATUS_MESSAGE "Enabling reference (emulated) SIMD instructions.")
 468
 469 else()
 470     gmx_invalid_option_value(GMX_SIMD_ACTIVE)
 471 endif()
 472
 473
 474 gmx_check_if_changed(SIMD_CHANGED GMX_SIMD_ACTIVE)
 475 if (SIMD_CHANGED AND DEFINED SIMD_STATUS_MESSAGE)
 476     message(STATUS "${SIMD_STATUS_MESSAGE}")
 477 endif()
 478
 479 # By default, 32-bit windows cannot pass SIMD (SSE/AVX) arguments in registers,
 480 # and even on 64-bit (all platforms) it is only used for a handful of arguments.
 481 # The __vectorcall (MSVC, from MSVC2013) or __regcall (ICC) calling conventions
 482 # enable this, which is critical to enable 32-bit SIMD and improves performance
 483 # for 64-bit SIMD.
 484 # Check if the compiler supports one of these, and in that case set gmx_simdcall
 485 # to that string. If we do not have any such calling convention modifier, set it
 486 # to an empty string.
 487 #
 488 # Update 2015-11-04: As of version 3.6, clang has added support for __vectorcall
 489 # (also on Linux). This appears to be buggy for the reference SIMD
 490 # implementation when using the Debug build (when functions are not inlined)
 491 # while it seems works fine for the actual SIMD implementations. This is likely
 492 # because the reference build ends up passing lots of structures with arrays
 493 # rather than actual vector data. For now we disable __vectorcall with clang
 494 # when using the reference build.
 495 #
 496 # xlc 13.1.5 does not seem recognize any attribute, and warns about invalid ones
 497 # so we avoid searching for any.
 498 #
 499 if(NOT DEFINED GMX_SIMD_CALLING_CONVENTION)
 500     if(GMX_TARGET_BGQ)
 501         set(CALLCONV_LIST " ")
 502     elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND GMX_SIMD_ACTIVE STREQUAL "REFERENCE")
 503         set(CALLCONV_LIST __regcall " ")
 504    elseif(CMAKE_CXX_COMPILER_ID MATCHES "XL")
 505         set(CALLCONV_LIST " ")
 506     else()
 507         set(CALLCONV_LIST __vectorcall __regcall " ")
 508     endif()
 509     foreach(callconv ${CALLCONV_LIST})
 510         set(callconv_compile_var "_callconv_${callconv}")
 511         check_c_source_compiles("int ${callconv} f(int i) {return i;} int main(void) {return f(0);}" ${callconv_compile_var})
 512         if(${callconv_compile_var})
 513             set(GMX_SIMD_CALLING_CONVENTION "${callconv}" CACHE INTERNAL "Calling convention for SIMD routines" FORCE)
 514             break()
 515         endif()
 516     endforeach()
 517 endif()
 518
 519 if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
 520     # GCC bug 49001, 54412 on Windows (just warn, since it might be fixed in later versions)
 521     if((CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9.0" OR CMAKE_SIZEOF_VOID_P EQUAL 8)
 522             AND (WIN32 OR CYGWIN)
 523             AND (GMX_SIMD_ACTIVE MATCHES "AVX") AND NOT (GMX_SIMD_ACTIVE STREQUAL "AVX_128_FMA"))
 524         message(WARNING "GCC on Windows (GCC older than 4.9 in 32-bit mode, or any version in 64-bit mode) with 256-bit AVX will probably crash. You might want to choose a different GMX_SIMD or a different compiler.")
 525     endif()
 526 endif()
 527
 528 endmacro()
 529