cmake/gmxTestSimd.cmake

   1 #
   2 # This file is part of the GROMACS molecular simulation package.
   3 #
   4 # Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
   5 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6 # and including many others, as listed in the AUTHORS file in the
   7 # top-level source directory and at http://www.gromacs.org.
   8 #
   9 # GROMACS is free software; you can redistribute it and/or
  10 # modify it under the terms of the GNU Lesser General Public License
  11 # as published by the Free Software Foundation; either version 2.1
  12 # of the License, or (at your option) any later version.
  13 #
  14 # GROMACS is distributed in the hope that it will be useful,
  15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 # Lesser General Public License for more details.
  18 #
  19 # You should have received a copy of the GNU Lesser General Public
  20 # License along with GROMACS; if not, see
  21 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23 #
  24 # If you want to redistribute modifications to GROMACS, please
  25 # consider that scientific software is very special. Version
  26 # control is crucial - bugs must be traceable. We will be happy to
  27 # consider code for inclusion in the official distribution, but
  28 # derived work must not be called official GROMACS. Details are found
  29 # in the README & COPYING files - if they are missing, get the
  30 # official version at http://www.gromacs.org.
  31 #
  32 # To help us fund GROMACS development, we humbly ask that you cite
  33 # the research papers on the package. Check out http://www.gromacs.org.
  34
  35 # include avx test source, used if the AVX flags are set below
  36 include(gmxTestAVXMaskload)
  37 include(gmxFindFlagsForSource)
  38
  39
  40 macro(gmx_use_clang_as_with_gnu_compilers_on_osx)
  41     # On OS X, we often want to use gcc instead of clang, since gcc supports
  42     # OpenMP. However, by default gcc uses the external system assembler, which
  43     # does not support AVX, so we need to tell the linker to use the clang
  44     # compilers assembler instead - and this has to happen before we detect AVX
  45     # flags.
  46     if(APPLE AND ${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
  47         gmx_test_cflag(GNU_C_USE_CLANG_AS "-Wa,-q" SIMD_C_FLAGS)
  48     endif()
  49     if(APPLE AND ${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
  50         gmx_test_cxxflag(GNU_CXX_USE_CLANG_AS "-Wa,-q" SIMD_CXX_FLAGS)
  51     endif()
  52 endmacro()
  53
  54
  55 macro(gmx_test_simd)
  56 #
  57 # To improve backward compatibility on x86 SIMD architectures,
  58 # we set the flags for all SIMD instructions that are supported, not only
  59 # the most recent instruction set. I.e., if your machine supports AVX2_256,
  60 # we will set flags both for AVX2_256, AVX_256, SSE4.1, and SSE2 support.
  61
  62 if(${GMX_SIMD} STREQUAL "NONE")
  63     # nothing to do configuration-wise
  64     set(SIMD_STATUS_MESSAGE "SIMD instructions disabled")
  65 elseif(${GMX_SIMD} STREQUAL "SSE2")
  66
  67     gmx_find_cflag_for_source(CFLAGS_SSE2 "C compiler SSE2 flag"
  68                               "#include<xmmintrin.h>
  69                               int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_rsqrt_ps(x);return _mm_movemask_ps(x);}"
  70                               SIMD_C_FLAGS
  71                               "-msse2" "/arch:SSE2" "-hgnu")
  72     gmx_find_cxxflag_for_source(CXXFLAGS_SSE2 "C++ compiler SSE2 flag"
  73                                 "#include<xmmintrin.h>
  74                                 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_rsqrt_ps(x);return _mm_movemask_ps(x);}"
  75                                 SIMD_CXX_FLAGS
  76                                 "-msse2" "/arch:SSE2" "-hgnu")
  77
  78     if(NOT CFLAGS_SSE2 OR NOT CXXFLAGS_SSE2)
  79         message(FATAL_ERROR "Cannot find SSE2 compiler flag. Use a newer compiler, or disable SIMD (slower).")
  80     endif()
  81
  82     set(GMX_SIMD_X86_SSE2 1)
  83     set(SIMD_STATUS_MESSAGE "Enabling SSE2 SIMD instructions")
  84
  85 elseif(${GMX_SIMD} STREQUAL "SSE4.1")
  86
  87     # Note: MSVC enables SSE4.1 with the SSE2 flag, so we include that in testing.
  88     gmx_find_cflag_for_source(CFLAGS_SSE4_1 "C compiler SSE4.1 flag"
  89                               "#include<smmintrin.h>
  90                               int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_dp_ps(x,x,0x77);return _mm_movemask_ps(x);}"
  91                               SIMD_C_FLAGS
  92                               "-msse4.1" "/arch:SSE4.1" "/arch:SSE2" "-hgnu")
  93     gmx_find_cxxflag_for_source(CXXFLAGS_SSE4_1 "C++ compiler SSE4.1 flag"
  94                                 "#include<smmintrin.h>
  95                                 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_dp_ps(x,x,0x77);return _mm_movemask_ps(x);}"
  96                                 SIMD_CXX_FLAGS
  97                                 "-msse4.1" "/arch:SSE4.1" "/arch:SSE2" "-hgnu")
  98
  99     if(NOT CFLAGS_SSE4_1 OR NOT CXXFLAGS_SSE4_1)
 100         message(FATAL_ERROR "Cannot find SSE4.1 compiler flag. "
 101                             "Use a newer compiler, or choose SSE2 SIMD (slower).")
 102     endif()
 103
 104     if(CMAKE_C_COMPILER_ID MATCHES "Intel" AND CMAKE_C_COMPILER_VERSION VERSION_EQUAL "11.1")
 105         message(FATAL_ERROR "You are using Intel compiler version 11.1, which produces incorrect results with SSE4.1 SIMD. You need to use a newer compiler (e.g. icc >= 12.0) or in worst case try a lower level of SIMD if performance is not critical.")
 106     endif()
 107
 108     set(GMX_SIMD_X86_SSE4_1 1)
 109     set(SIMD_STATUS_MESSAGE "Enabling SSE4.1 SIMD instructions")
 110
 111 elseif(${GMX_SIMD} STREQUAL "AVX_128_FMA")
 112
 113     gmx_use_clang_as_with_gnu_compilers_on_osx()
 114
 115     # AVX128/FMA on AMD is a bit complicated. We need to do detection in three stages:
 116     # 1) Find the flags required for generic AVX support
 117     # 2) Find the flags necessary to enable fused-multiply add support
 118     # 3) Optional: Find a flag to enable the AMD XOP instructions
 119
 120     ### STAGE 1: Find the generic AVX flag
 121     gmx_find_cflag_for_source(CFLAGS_AVX_128 "C compiler AVX (128 bit) flag"
 122                               "#include<immintrin.h>
 123                               int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_permute_ps(x,1);return 0;}"
 124                               SIMD_C_FLAGS
 125                               "-mavx" "/arch:AVX" "-hgnu")
 126     gmx_find_cxxflag_for_source(CXXFLAGS_AVX_128 "C++ compiler AVX (128 bit) flag"
 127                                 "#include<immintrin.h>
 128                                 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_permute_ps(x,1);return 0;}"
 129                                 SIMD_CXX_FLAGS
 130                                 "-mavx" "/arch:AVX" "-hgnu")
 131
 132     ### STAGE 2: Find the fused-multiply add flag.
 133     # GCC requires x86intrin.h for FMA support. MSVC 2010 requires intrin.h for FMA support.
 134     check_include_file(x86intrin.h HAVE_X86INTRIN_H ${SIMD_C_FLAGS})
 135     check_include_file(intrin.h HAVE_INTRIN_H ${SIMD_C_FLAGS})
 136     if(HAVE_X86INTRIN_H)
 137         set(INCLUDE_X86INTRIN_H "#include <x86intrin.h>")
 138     endif()
 139     if(HAVE_INTRIN_H)
 140         set(INCLUDE_INTRIN_H "#include <xintrin.h>")
 141     endif()
 142
 143     gmx_find_cflag_for_source(CFLAGS_AVX_128_FMA "C compiler AVX (128 bit) FMA4 flag"
 144 "#include<immintrin.h>
 145 ${INCLUDE_X86INTRIN_H}
 146 ${INCLUDE_INTRIN_H}
 147 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_macc_ps(x,x,x);return _mm_movemask_ps(x);}"
 148                               SIMD_C_FLAGS
 149                               "-mfma4" "-hgnu")
 150     gmx_find_cxxflag_for_source(CXXFLAGS_AVX_128_FMA "C++ compiler AVX (128 bit) FMA4 flag"
 151 "#include<immintrin.h>
 152 ${INCLUDE_X86INTRIN_H}
 153 ${INCLUDE_INTRIN_H}
 154 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_macc_ps(x,x,x);return _mm_movemask_ps(x);}"
 155                                 SIMD_CXX_FLAGS
 156                                 "-mfma4" "-hgnu")
 157
 158     # We only need to check the last (FMA) test; that will always fail if the basic AVX128 test failed
 159     if(NOT CFLAGS_AVX_128_FMA OR NOT CXXFLAGS_AVX_128_FMA)
 160         message(FATAL_ERROR "Cannot find compiler flags for 128 bit AVX with FMA support. Use a newer compiler, or choose SSE4.1 SIMD (slower).")
 161     endif()
 162
 163     ### STAGE 3: Optional: Find the XOP instruction flag (No point in yelling if this does not work)
 164     gmx_find_cflag_for_source(CFLAGS_AVX_128_XOP "C compiler AVX (128 bit) XOP flag"
 165 "#include<immintrin.h>
 166 ${INCLUDE_X86INTRIN_H}
 167 ${INCLUDE_INTRIN_H}
 168 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_frcz_ps(x);return _mm_movemask_ps(x);}"
 169                               SIMD_C_FLAGS
 170                               "-mxop")
 171     gmx_find_cxxflag_for_source(CXXFLAGS_AVX_128_XOP "C++ compiler AVX (128 bit) XOP flag"
 172 "#include<immintrin.h>
 173 ${INCLUDE_X86INTRIN_H}
 174 ${INCLUDE_INTRIN_H}
 175 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_frcz_ps(x);return _mm_movemask_ps(x);}"
 176                                 SIMD_CXX_FLAGS
 177                                 "-mxop")
 178
 179     # We don't have the full compiler version string yet (BUILD_C_COMPILER),
 180     # so we can't distinguish vanilla from Apple clang versions, but catering for a few rare AMD
 181     # hackintoshes is not worth the effort.
 182     if (APPLE AND (${CMAKE_C_COMPILER_ID} STREQUAL "Clang" OR
 183                 ${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang"))
 184         message(WARNING "Due to a known compiler bug, Clang up to version 3.2 (and Apple Clang up to version 4.1) produces incorrect code with AVX_128_FMA SIMD. As we cannot work around this bug on OS X, you will have to select a different compiler or SIMD instruction set.")
 185     endif()
 186
 187
 188     if (GMX_USE_CLANG_C_FMA_BUG_WORKAROUND)
 189         # we assume that we have an external assembler that supports AVX
 190         message(STATUS "Clang ${CMAKE_C_COMPILER_VERSION} detected, enabling FMA bug workaround")
 191         set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -no-integrated-as")
 192     endif()
 193     if (GMX_USE_CLANG_CXX_FMA_BUG_WORKAROUND)
 194         # we assume that we have an external assembler that supports AVX
 195         message(STATUS "Clang ${CMAKE_CXX_COMPILER_VERSION} detected, enabling FMA bug workaround")
 196         set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -no-integrated-as")
 197     endif()
 198
 199     gmx_test_avx_gcc_maskload_bug(GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG "${SIMD_C_FLAGS}")
 200
 201     set(GMX_SIMD_X86_AVX_128_FMA 1)
 202     set(SIMD_STATUS_MESSAGE "Enabling 128-bit AVX SIMD Gromacs SIMD (with fused-multiply add)")
 203
 204 elseif(${GMX_SIMD} STREQUAL "AVX_256")
 205
 206     gmx_use_clang_as_with_gnu_compilers_on_osx()
 207
 208     gmx_find_cflag_for_source(CFLAGS_AVX "C compiler AVX (256 bit) flag"
 209                               "#include<immintrin.h>
 210                               int main(){__m256 x=_mm256_set1_ps(0.5);x=_mm256_add_ps(x,x);return _mm256_movemask_ps(x);}"
 211                               SIMD_C_FLAGS
 212                               "-mavx" "/arch:AVX" "-hgnu")
 213     gmx_find_cxxflag_for_source(CXXFLAGS_AVX "C++ compiler AVX (256 bit) flag"
 214                                 "#include<immintrin.h>
 215                                 int main(){__m256 x=_mm256_set1_ps(0.5);x=_mm256_add_ps(x,x);return _mm256_movemask_ps(x);}"
 216                                 SIMD_CXX_FLAGS
 217                                 "-mavx" "/arch:AVX" "-hgnu")
 218
 219     if(NOT CFLAGS_AVX OR NOT CXXFLAGS_AVX)
 220         message(FATAL_ERROR "Cannot find AVX compiler flag. Use a newer compiler, or choose SSE4.1 SIMD (slower).")
 221     endif()
 222
 223     gmx_test_avx_gcc_maskload_bug(GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG "${SIMD_C_FLAGS}")
 224
 225     set(GMX_SIMD_X86_AVX_256 1)
 226     set(SIMD_STATUS_MESSAGE "Enabling 256-bit AVX SIMD instructions")
 227
 228 elseif(${GMX_SIMD} STREQUAL "AVX2_256")
 229
 230     gmx_use_clang_as_with_gnu_compilers_on_osx()
 231
 232     gmx_find_cflag_for_source(CFLAGS_AVX2 "C compiler AVX2 flag"
 233                               "#include<immintrin.h>
 234                               int main(){__m256i x=_mm256_set1_epi32(5);x=_mm256_add_epi32(x,x);return _mm256_movemask_epi8(x);}"
 235                               SIMD_C_FLAGS
 236                               "-march=core-avx2" "-mavx2" "/arch:AVX" "-hgnu") # no AVX2-specific flag for MSVC yet
 237     gmx_find_cxxflag_for_source(CXXFLAGS_AVX2 "C++ compiler AVX2 flag"
 238                                 "#include<immintrin.h>
 239                                 int main(){__m256i x=_mm256_set1_epi32(5);x=_mm256_add_epi32(x,x);return _mm256_movemask_epi8(x);}"
 240                                 SIMD_CXX_FLAGS
 241                                 "-march=core-avx2" "-mavx2" "/arch:AVX" "-hgnu") # no AVX2-specific flag for MSVC yet
 242
 243     if(NOT CFLAGS_AVX2 OR NOT CXXFLAGS_AVX2)
 244         message(FATAL_ERROR "Cannot find AVX2 compiler flag. Use a newer compiler, or choose AVX SIMD (slower).")
 245     endif()
 246
 247     # No need to test for Maskload bug - it was fixed before gcc added AVX2 support
 248
 249     set(GMX_SIMD_X86_AVX2_256 1)
 250     set(SIMD_STATUS_MESSAGE "Enabling 256-bit AVX2 SIMD instructions")
 251
 252 elseif(${GMX_SIMD} STREQUAL "ARM_NEON")
 253
 254     gmx_find_cflag_for_source(CFLAGS_ARM_NEON "C compiler 32-bit ARM NEON flag"
 255                               "#include<arm_neon.h>
 256                               int main(){float32x4_t x=vdupq_n_f32(0.5);x=vmlaq_f32(x,x,x);return vgetq_lane_f32(x,0)>0;}"
 257                               SIMD_C_FLAGS
 258                               "-mfpu=neon" "")
 259     gmx_find_cxxflag_for_source(CXXFLAGS_ARM_NEON "C++ compiler 32-bit ARM NEON flag"
 260                                 "#include<arm_neon.h>
 261                                 int main(){float32x4_t x=vdupq_n_f32(0.5);x=vmlaq_f32(x,x,x);return vgetq_lane_f32(x,0)>0;}"
 262                                 SIMD_CXX_FLAGS
 263                                 "-mfpu=neon" "-D__STDC_CONSTANT_MACROS" "")
 264
 265     if(NOT CFLAGS_ARM_NEON OR NOT CXXFLAGS_ARM_NEON)
 266         message(FATAL_ERROR "Cannot find ARM 32-bit NEON compiler flag. Use a newer compiler, or disable NEON SIMD.")
 267     endif()
 268
 269     set(GMX_SIMD_ARM_NEON 1)
 270     set(SIMD_STATUS_MESSAGE "Enabling 32-bit ARM NEON SIMD instructions")
 271
 272 elseif(${GMX_SIMD} STREQUAL "ARM_NEON_ASIMD")
 273     # Gcc-4.8.1 appears to have a bug where the c++ compiler requires
 274     # -D__STDC_CONSTANT_MACROS if we include arm_neon.h
 275
 276     gmx_find_cflag_for_source(CFLAGS_ARM_NEON_ASIMD "C compiler ARM NEON Advanced SIMD flag"
 277                               "#include<arm_neon.h>
 278                               int main(){float64x2_t x=vdupq_n_f64(0.5);x=vfmaq_f64(x,x,x);return vgetq_lane_f64(x,0)>0;}"
 279                               SIMD_C_FLAGS
 280                               "")
 281     gmx_find_cxxflag_for_source(CXXFLAGS_ARM_NEON_ASIMD "C++ compiler ARM NEON Advanced SIMD flag"
 282                                 "#include<arm_neon.h>
 283                                 int main(){float64x2_t x=vdupq_n_f64(0.5);x=vfmaq_f64(x,x,x);return vgetq_lane_f64(x,0)>0;}"
 284                                 SIMD_CXX_FLAGS
 285                                 "-D__STDC_CONSTANT_MACROS" "")
 286
 287     if(NOT CFLAGS_ARM_NEON_ASIMD OR NOT CXXFLAGS_ARM_NEON_ASIMD)
 288         message(FATAL_ERROR "Cannot find ARM (AArch64) NEON Advanced SIMD compiler flag. Use a newer compiler, or disable SIMD.")
 289     endif()
 290
 291     if(CMAKE_C_COMPILER_ID MATCHES "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_LESS "4.9")
 292         message(WARNING "At least gcc-4.8.1 has many bugs for ARM (AArch64) NEON Advanced SIMD compilation. You might need gcc version 4.9 or later.")
 293     endif()
 294
 295     if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_LESS "3.4")
 296         message(FATAL_ERROR "Clang version 3.4 or later is required for ARM (AArch64) NEON Advanced SIMD.")
 297     endif()
 298
 299     set(GMX_SIMD_ARM_NEON_ASIMD 1)
 300     set(SIMD_STATUS_MESSAGE "Enabling ARM (AArch64) NEON Advanced SIMD instructions")
 301
 302 elseif(${GMX_SIMD} STREQUAL "IBM_QPX")
 303
 304     try_compile(TEST_QPX ${CMAKE_BINARY_DIR}
 305         "${CMAKE_SOURCE_DIR}/cmake/TestQPX.c")
 306
 307     if (TEST_QPX)
 308         message(WARNING "IBM QPX SIMD instructions selected. This will work, but SIMD kernels are only available for the Verlet cut-off scheme. The plain C kernels that are used for the group cut-off scheme kernels will be slow, so please consider using the Verlet cut-off scheme.")
 309         set(GMX_SIMD_IBM_QPX 1)
 310         set(SIMD_STATUS_MESSAGE "Enabling IBM QPX SIMD instructions")
 311
 312     else()
 313         message(FATAL_ERROR "Cannot compile the requested IBM QPX intrinsics. If you are compiling for BlueGene/Q with the XL compilers, use 'cmake .. -DCMAKE_TOOLCHAIN_FILE=Platform/BlueGeneQ-static-XL-C' to set up the tool chain.")
 314     endif()
 315
 316 elseif(${GMX_SIMD} STREQUAL "IBM_VMX")
 317
 318     gmx_find_cflag_for_source(CFLAGS_IBM_VMX "C compiler IBM VMX SIMD flag"
 319                               "#include<altivec.h>
 320                               int main(){vector float x,y=vec_ctf(vec_splat_s32(1),0);x=vec_madd(y,y,y);return vec_all_ge(y,x);}"
 321                               SIMD_C_FLAGS
 322                               "-maltivec -mabi=altivec" "-qarch=auto -qaltivec")
 323     gmx_find_cxxflag_for_source(CXXFLAGS_IBM_VMX "C++ compiler IBM VMX SIMD flag"
 324                                 "#include<altivec.h>
 325                                 int main(){vector float x,y=vec_ctf(vec_splat_s32(1),0);x=vec_madd(y,y,y);return vec_all_ge(y,x);}"
 326                                 SIMD_CXX_FLAGS
 327                                 "-maltivec -mabi=altivec" "-qarch=auto -qaltivec")
 328
 329     if(NOT CFLAGS_IBM_VMX OR NOT CXXFLAGS_IBM_VMX)
 330         message(FATAL_ERROR "Cannot find IBM VMX SIMD compiler flag. Use a newer compiler, or disable VMX SIMD.")
 331     endif()
 332
 333     set(GMX_SIMD_IBM_VMX 1)
 334     set(SIMD_STATUS_MESSAGE "Enabling IBM VMX SIMD instructions")
 335
 336 elseif(${GMX_SIMD} STREQUAL "SPARC64_HPC_ACE")
 337
 338     # Note that GMX_RELAXED_DOUBLE_PRECISION is enabled by default in the top-level CMakeLists.txt
 339
 340     set(GMX_SIMD_SPARC64_HPC_ACE 1)
 341     set(SIMD_STATUS_MESSAGE "Enabling Sparc64 HPC-ACE SIMD instructions")
 342
 343 elseif(${GMX_SIMD} STREQUAL "REFERENCE")
 344
 345     # NB: This file handles settings for the SIMD module, so in the interest
 346     # of proper modularization, please do NOT put any verlet kernel settings in this file.
 347
 348     if(GMX_SIMD_REF_FLOAT_WIDTH)
 349         add_definitions(-DGMX_SIMD_REF_FLOAT_WIDTH=${GMX_SIMD_REF_FLOAT_WIDTH})
 350     endif()
 351     if(GMX_SIMD_REF_DOUBLE_WIDTH)
 352         add_definitions(-DGMX_SIMD_REF_DOUBLE_WIDTH=${GMX_SIMD_REF_DOUBLE_WIDTH})
 353     endif()
 354
 355     set(GMX_SIMD_REFERENCE 1)
 356     set(SIMD_STATUS_MESSAGE "Enabling reference (emulated) SIMD instructions.")
 357
 358 else()
 359     gmx_invalid_option_value(GMX_SIMD)
 360 endif()
 361
 362
 363 gmx_check_if_changed(SIMD_CHANGED GMX_SIMD)
 364 if (SIMD_CHANGED AND DEFINED SIMD_STATUS_MESSAGE)
 365     message(STATUS "${SIMD_STATUS_MESSAGE}")
 366 endif()
 367
 368 # By default, 32-bit windows cannot pass SIMD (SSE/AVX) arguments in registers,
 369 # and even on 64-bit (all platforms) it is only used for a handful of arguments.
 370 # The __vectorcall (MSVC, from MSVC2013) or __regcall (ICC) calling conventions
 371 # enable this, which is critical to enable 32-bit SIMD and improves performance
 372 # for 64-bit SIMD.
 373 # Check if the compiler supports one of these, and in that case set gmx_simdcall
 374 # to that string. If we do not have any such calling convention modifier, set it
 375 # to an empty string.
 376 if(NOT DEFINED GMX_SIMD_CALLING_CONVENTION)
 377     foreach(callconv __vectorcall __regcall "")
 378         set(callconv_compile_var "_callconv_${callconv}")
 379         check_c_source_compiles("int ${callconv} f(int i) {return i;} int main(void) {return f(0);}" ${callconv_compile_var})
 380         if(${callconv_compile_var})
 381             set(GMX_SIMD_CALLING_CONVENTION "${callconv}" CACHE INTERNAL "Calling convention for SIMD routines" FORCE)
 382             break()
 383         endif()
 384     endforeach()
 385 endif()
 386
 387 endmacro()
 388