cmake/gmxTestCPUAcceleration.cmake

   1 #
   2 # This file is part of the GROMACS molecular simulation package.
   3 #
   4 # Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
   5 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6 # and including many others, as listed in the AUTHORS file in the
   7 # top-level source directory and at http://www.gromacs.org.
   8 #
   9 # GROMACS is free software; you can redistribute it and/or
  10 # modify it under the terms of the GNU Lesser General Public License
  11 # as published by the Free Software Foundation; either version 2.1
  12 # of the License, or (at your option) any later version.
  13 #
  14 # GROMACS is distributed in the hope that it will be useful,
  15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 # Lesser General Public License for more details.
  18 #
  19 # You should have received a copy of the GNU Lesser General Public
  20 # License along with GROMACS; if not, see
  21 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23 #
  24 # If you want to redistribute modifications to GROMACS, please
  25 # consider that scientific software is very special. Version
  26 # control is crucial - bugs must be traceable. We will be happy to
  27 # consider code for inclusion in the official distribution, but
  28 # derived work must not be called official GROMACS. Details are found
  29 # in the README & COPYING files - if they are missing, get the
  30 # official version at http://www.gromacs.org.
  31 #
  32 # To help us fund GROMACS development, we humbly ask that you cite
  33 # the research papers on the package. Check out http://www.gromacs.org.
  34
  35 # include avx test source, used if the AVX flags are set below
  36 include(gmxTestAVXMaskload)
  37 include(gmxFindFlagsForSource)
  38
  39
  40 macro(gmx_use_clang_as_with_gnu_compilers_on_osx)
  41     # On OS X, we often want to use gcc instead of clang, since gcc supports
  42     # OpenMP. However, by default gcc uses the external system assembler, which
  43     # does not support AVX, so we need to tell the linker to use the clang
  44     # compilers assembler instead - and this has to happen before we detect AVX
  45     # flags.
  46     if(APPLE AND ${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
  47         gmx_test_cflag(GNU_C_USE_CLANG_AS "-Wa,-q" ACCELERATION_C_FLAGS)
  48     endif()
  49     if(APPLE AND ${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
  50         gmx_test_cxxflag(GNU_CXX_USE_CLANG_AS "-Wa,-q" ACCELERATION_CXX_FLAGS)
  51     endif()
  52 endmacro()
  53
  54
  55 macro(gmx_test_cpu_acceleration)
  56 #
  57 # To improve backward compatibility on x86 SIMD architectures,
  58 # we set the flags for all accelerations that are supported, not only
  59 # the most recent instruction set. I.e., if your machine supports AVX2_256,
  60 # we will set flags both for AVX2_256, AVX_256, SSE4.1, and SSE2 support.
  61
  62 if(${GMX_CPU_ACCELERATION} STREQUAL "NONE")
  63     # nothing to do configuration-wise
  64     set(ACCELERATION_STATUS_MESSAGE "CPU SIMD acceleration disabled")
  65 elseif(${GMX_CPU_ACCELERATION} STREQUAL "SSE2")
  66
  67     gmx_find_cflag_for_source(CFLAGS_SSE2 "C compiler SSE2 flag"
  68                               "#include<xmmintrin.h>
  69                               int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_rsqrt_ps(x);return 0;}"
  70                               ACCELERATION_C_FLAGS
  71                               "-msse2" "/arch:SSE2")
  72     gmx_find_cxxflag_for_source(CXXFLAGS_SSE2 "C++ compiler SSE2 flag"
  73                                 "#include<xmmintrin.h>
  74                                 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_rsqrt_ps(x);return 0;}"
  75                                 ACCELERATION_CXX_FLAGS
  76                                 "-msse2" "/arch:SSE2")
  77
  78     if(NOT CFLAGS_SSE2 OR NOT CXXFLAGS_SSE2)
  79         message(FATAL_ERROR "Cannot find SSE2 compiler flag. Use a newer compiler, or disable acceleration (slower).")
  80     endif()
  81
  82     set(GMX_CPU_ACCELERATION_X86_SSE2 1)
  83     set(GMX_X86_SSE2 1)
  84
  85     set(ACCELERATION_STATUS_MESSAGE "Enabling SSE2 SIMD Gromacs acceleration")
  86
  87 elseif(${GMX_CPU_ACCELERATION} STREQUAL "SSE4.1")
  88
  89     # Note: MSVC enables SSE4.1 with the SSE2 flag, so we include that in testing.
  90     gmx_find_cflag_for_source(CFLAGS_SSE4_1 "C compiler SSE4.1 flag"
  91                               "#include<smmintrin.h>
  92                               int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_dp_ps(x,x,0x77);return 0;}"
  93                               ACCELERATION_C_FLAGS
  94                               "-msse4.1" "/arch:SSE4.1" "/arch:SSE2")
  95     gmx_find_cxxflag_for_source(CXXFLAGS_SSE4_1 "C++ compiler SSE4.1 flag"
  96                                 "#include<smmintrin.h>
  97                                 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_dp_ps(x,x,0x77);return 0;}"
  98                                 ACCELERATION_CXX_FLAGS
  99                                 "-msse4.1" "/arch:SSE4.1" "/arch:SSE2")
 100
 101     if(NOT CFLAGS_SSE4_1 OR NOT CXXFLAGS_SSE4_1)
 102         message(FATAL_ERROR "Cannot find SSE4.1 compiler flag. "
 103                             "Use a newer compiler, or choose SSE2 acceleration (slower).")
 104     endif()
 105
 106     if(CMAKE_C_COMPILER_ID MATCHES "Intel" AND CMAKE_C_COMPILER_VERSION VERSION_EQUAL "11.1")
 107         message(FATAL_ERROR "You are using Intel compiler version 11.1, which produces incorrect results with SSE4.1 acceleration. You need to use a newer compiler (e.g. icc >= 12.0) or in worst case try a lower level of acceleration if performance is not critical.")
 108     endif()
 109
 110     set(GMX_CPU_ACCELERATION_X86_SSE4_1 1)
 111     set(GMX_X86_SSE4_1 1)
 112     set(GMX_X86_SSE2   1)
 113     set(ACCELERATION_STATUS_MESSAGE "Enabling SSE4.1 SIMD Gromacs acceleration")
 114
 115 elseif(${GMX_CPU_ACCELERATION} STREQUAL "AVX_128_FMA")
 116
 117     gmx_use_clang_as_with_gnu_compilers_on_osx()
 118
 119     # AVX128/FMA on AMD is a bit complicated. We need to do detection in three stages:
 120     # 1) Find the flags required for generic AVX support
 121     # 2) Find the flags necessary to enable fused-multiply add support
 122     # 3) Optional: Find a flag to enable the AMD XOP instructions
 123
 124     ### STAGE 1: Find the generic AVX flag
 125     gmx_find_cflag_for_source(CFLAGS_AVX_128 "C compiler AVX (128 bit) flag"
 126                               "#include<immintrin.h>
 127                               int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_permute_ps(x,1);return 0;}"
 128                               ACCELERATION_C_FLAGS
 129                               "-mavx" "/arch:AVX")
 130     gmx_find_cxxflag_for_source(CXXFLAGS_AVX_128 "C++ compiler AVX (128 bit) flag"
 131                                 "#include<immintrin.h>
 132                                 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_permute_ps(x,1);return 0;}"
 133                                 ACCELERATION_CXX_FLAGS
 134                                 "-mavx" "/arch:AVX")
 135
 136     ### STAGE 2: Find the fused-multiply add flag.
 137     # GCC requires x86intrin.h for FMA support. MSVC 2010 requires intrin.h for FMA support.
 138     check_include_file(x86intrin.h HAVE_X86INTRIN_H ${ACCELERATION_C_FLAGS})
 139     check_include_file(intrin.h HAVE_INTRIN_H ${ACCELERATION_C_FLAGS})
 140     if(HAVE_X86INTRIN_H)
 141         set(INCLUDE_X86INTRIN_H "#include <x86intrin.h>")
 142     endif()
 143     if(HAVE_INTRIN_H)
 144         set(INCLUDE_INTRIN_H "#include <xintrin.h>")
 145     endif()
 146
 147     gmx_find_cflag_for_source(CFLAGS_AVX_128_FMA "C compiler AVX (128 bit) FMA4 flag"
 148 "#include<immintrin.h>
 149 ${INCLUDE_X86INTRIN_H}
 150 ${INCLUDE_INTRIN_H}
 151 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_macc_ps(x,x,x);return 0;}"
 152                               ACCELERATION_C_FLAGS
 153                               "-mfma4")
 154     gmx_find_cxxflag_for_source(CXXFLAGS_AVX_128_FMA "C++ compiler AVX (128 bit) FMA4 flag"
 155 "#include<immintrin.h>
 156 ${INCLUDE_X86INTRIN_H}
 157 ${INCLUDE_INTRIN_H}
 158 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_macc_ps(x,x,x);return 0;}"
 159                                 ACCELERATION_CXX_FLAGS
 160                                 "-mfma4")
 161
 162     # We only need to check the last (FMA) test; that will always fail if the basic AVX128 test failed
 163     if(NOT CFLAGS_AVX_128_FMA OR NOT CXXFLAGS_AVX_128_FMA)
 164         message(FATAL_ERROR "Cannot find compiler flags for 128 bit AVX with FMA support. Use a newer compiler, or choose SSE4.1 acceleration (slower).")
 165     endif()
 166
 167     ### STAGE 3: Optional: Find the XOP instruction flag (No point in yelling if this does not work)
 168     gmx_find_cflag_for_source(CFLAGS_AVX_128_XOP "C compiler AVX (128 bit) XOP flag"
 169 "#include<immintrin.h>
 170 ${INCLUDE_X86INTRIN_H}
 171 ${INCLUDE_INTRIN_H}
 172 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_frcz_ps(x);return 0;}"
 173                               ACCELERATION_C_FLAGS
 174                               "-mxop")
 175     gmx_find_cxxflag_for_source(CXXFLAGS_AVX_128_XOP "C++ compiler AVX (128 bit) XOP flag"
 176 "#include<immintrin.h>
 177 ${INCLUDE_X86INTRIN_H}
 178 ${INCLUDE_INTRIN_H}
 179 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_frcz_ps(x);return 0;}"
 180                                 ACCELERATION_CXX_FLAGS
 181                                 "-mxop")
 182
 183     # We don't have the full compiler version string yet (BUILD_C_COMPILER),
 184     # so we can't distinguish vanilla from Apple clang versions, but catering for a few rare AMD
 185     # hackintoshes is not worth the effort.
 186     if (APPLE AND (${CMAKE_C_COMPILER_ID} STREQUAL "Clang" OR
 187                 ${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang"))
 188         message(WARNING "Due to a known compiler bug, Clang up to version 3.2 (and Apple Clang up to version 4.1) produces incorrect code with AVX_128_FMA acceleration. As we cannot work around this bug on OS X, you will have to select a different compiler or CPU acceleration.")
 189     endif()
 190
 191
 192     if (GMX_USE_CLANG_C_FMA_BUG_WORKAROUND)
 193         # we assume that we have an external assembler that supports AVX
 194         message(STATUS "Clang ${CMAKE_C_COMPILER_VERSION} detected, enabling FMA bug workaround")
 195         set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -no-integrated-as")
 196     endif()
 197     if (GMX_USE_CLANG_CXX_FMA_BUG_WORKAROUND)
 198         # we assume that we have an external assembler that supports AVX
 199         message(STATUS "Clang ${CMAKE_CXX_COMPILER_VERSION} detected, enabling FMA bug workaround")
 200         set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -no-integrated-as")
 201     endif()
 202
 203     gmx_test_avx_gcc_maskload_bug(GMX_X86_AVX_GCC_MASKLOAD_BUG "${ACCELERATION_C_FLAGS}")
 204
 205     set(GMX_CPU_ACCELERATION_X86_AVX_128_FMA 1)
 206     set(GMX_X86_AVX_128_FMA 1)
 207     set(GMX_X86_SSE4_1      1)
 208     set(GMX_X86_SSE2        1)
 209
 210     set(ACCELERATION_STATUS_MESSAGE "Enabling 128-bit AVX SIMD Gromacs acceleration (with fused-multiply add)")
 211
 212 elseif(${GMX_CPU_ACCELERATION} STREQUAL "AVX_256")
 213
 214     gmx_use_clang_as_with_gnu_compilers_on_osx()
 215
 216     gmx_find_cflag_for_source(CFLAGS_AVX "C compiler AVX (256 bit) flag"
 217                               "#include<immintrin.h>
 218                               int main(){__m256 x=_mm256_set1_ps(0.5);x=_mm256_add_ps(x,x);return 0;}"
 219                               ACCELERATION_C_FLAGS
 220                               "-mavx" "/arch:AVX")
 221     gmx_find_cxxflag_for_source(CXXFLAGS_AVX "C++ compiler AVX (256 bit) flag"
 222                                 "#include<immintrin.h>
 223                                 int main(){__m256 x=_mm256_set1_ps(0.5);x=_mm256_add_ps(x,x);return 0;}"
 224                                 ACCELERATION_CXX_FLAGS
 225                                 "-mavx" "/arch:AVX")
 226
 227     if(NOT CFLAGS_AVX OR NOT CXXFLAGS_AVX)
 228         message(FATAL_ERROR "Cannot find AVX compiler flag. Use a newer compiler, or choose SSE4.1 acceleration (slower).")
 229     endif()
 230
 231     gmx_test_avx_gcc_maskload_bug(GMX_X86_AVX_GCC_MASKLOAD_BUG "${ACCELERATION_C_FLAGS}")
 232
 233     set(GMX_CPU_ACCELERATION_X86_AVX_256 1)
 234     set(GMX_X86_AVX_256  1)
 235     set(GMX_X86_SSE4_1   1)
 236     set(GMX_X86_SSE2     1)
 237
 238     set(ACCELERATION_STATUS_MESSAGE "Enabling 256-bit AVX SIMD Gromacs acceleration")
 239
 240 elseif(${GMX_CPU_ACCELERATION} STREQUAL "AVX2_256")
 241
 242     # Comment out this line for AVX2 development
 243     message(FATAL_ERROR "AVX2_256 is disabled until the implementation has been commited.")
 244
 245     gmx_use_clang_as_with_gnu_compilers_on_osx()
 246
 247     gmx_find_cflag_for_source(CFLAGS_AVX2 "C compiler AVX2 flag"
 248                               "#include<immintrin.h>
 249                               int main(){__m256 x=_mm256_set1_ps(0.5);x=_mm256_fmadd_ps(x,x,x);return 0;}"
 250                               ACCELERATION_C_FLAGS
 251                               "-march=core-avx2" "-mavx2" "/arch:AVX") # no AVX2-specific flag for MSVC yet
 252     gmx_find_cxxflag_for_source(CXXFLAGS_AVX2 "C++ compiler AVX2 flag"
 253                                 "#include<immintrin.h>
 254                                 int main(){__m256 x=_mm256_set1_ps(0.5);x=_mm256_fmadd_ps(x,x,x);return 0;}"
 255                                 ACCELERATION_CXX_FLAGS
 256                                 "-march=core-avx2" "-mavx2" "/arch:AVX") # no AVX2-specific flag for MSVC yet
 257
 258     if(NOT CFLAGS_AVX2 OR NOT CXXFLAGS_AVX2)
 259         message(FATAL_ERROR "Cannot find AVX2 compiler flag. Use a newer compiler, or choose AVX acceleration (slower).")
 260     endif()
 261
 262     # No need to test for Maskload bug - it was fixed before gcc added AVX2 support
 263
 264     set(GMX_CPU_ACCELERATION_X86_AVX2_256 1)
 265     set(GMX_X86_AVX2_256 1)
 266     set(GMX_X86_AVX_256  1)
 267     set(GMX_X86_SSE4_1   1)
 268     set(GMX_X86_SSE2     1)
 269
 270     set(ACCELERATION_STATUS_MESSAGE "Enabling 256-bit AVX2 Gromacs acceleration")
 271
 272 elseif(${GMX_CPU_ACCELERATION} STREQUAL "IBM_QPX")
 273
 274     try_compile(TEST_QPX ${CMAKE_BINARY_DIR}
 275         "${CMAKE_SOURCE_DIR}/cmake/TestQPX.c")
 276
 277     if (TEST_QPX)
 278         message(WARNING "IBM QPX acceleration was selected. This will work, but SIMD-accelerated kernels are only available for the Verlet cut-off scheme. The plain C kernels that are used for the group cut-off scheme kernels will be slow, so please consider using the Verlet cut-off scheme.")
 279         set(GMX_CPU_ACCELERATION_IBM_QPX 1)
 280         set(ACCELERATION_STATUS_MESSAGE "Enabling IBM QPX SIMD acceleration")
 281
 282     else()
 283         message(FATAL_ERROR "Cannot compile the requested IBM QPX intrinsics. If you are compiling for BlueGene/Q with the XL compilers, use 'cmake .. -DCMAKE_TOOLCHAIN_FILE=Platform/BlueGeneQ-static-XL-C' to set up the tool chain.")
 284     endif()
 285
 286 elseif(${GMX_CPU_ACCELERATION} STREQUAL "SPARC64_HPC_ACE")
 287
 288     set(GMX_CPU_ACCELERATION_SPARC64_HPC_ACE 1)
 289     set(ACCELERATION_STATUS_MESSAGE "Enabling Sparc64 HPC-ACE SIMD acceleration")
 290
 291 elseif(${GMX_CPU_ACCELERATION} STREQUAL "REFERENCE")
 292
 293     add_definitions(-DGMX_SIMD_REFERENCE_PLAIN_C)
 294     if(${GMX_NBNXN_REF_KERNEL_TYPE} STREQUAL "4xn")
 295         if(${GMX_NBNXN_REF_KERNEL_WIDTH} STREQUAL "2" OR ${GMX_NBNXN_REF_KERNEL_WIDTH} STREQUAL "4" OR ${GMX_NBNXN_REF_KERNEL_WIDTH} STREQUAL "8")
 296             add_definitions(-DGMX_NBNXN_SIMD_4XN -DGMX_SIMD_REF_WIDTH=${GMX_NBNXN_REF_KERNEL_WIDTH})
 297         else()
 298             message(FATAL_ERROR "Unsupported width for 4xn reference kernels")
 299         endif()
 300     elseif(${GMX_NBNXN_REF_KERNEL_TYPE} STREQUAL "2xnn")
 301         if(${GMX_NBNXN_REF_KERNEL_WIDTH} STREQUAL "8" OR ${GMX_NBNXN_REF_KERNEL_WIDTH} STREQUAL "16")
 302             add_definitions(-DGMX_NBNXN_SIMD_2XNN -DGMX_SIMD_REF_WIDTH=${GMX_NBNXN_REF_KERNEL_WIDTH})
 303         else()
 304             message(FATAL_ERROR "Unsupported width for 2xn reference kernels")
 305         endif()
 306     else()
 307         message(FATAL_ERROR "Unsupported kernel type")
 308     endif()
 309
 310 else()
 311     gmx_invalid_option_value(GMX_CPU_ACCELERATION)
 312 endif()
 313
 314
 315 gmx_check_if_changed(ACCELERATION_CHANGED GMX_CPU_ACCELERATION)
 316 if (ACCELERATION_CHANGED AND DEFINED ACCELERATION_STATUS_MESSAGE)
 317     message(STATUS "${ACCELERATION_STATUS_MESSAGE}")
 318 endif()
 319
 320 endmacro()
 321