From d33720e6c87063242fa851ef7908b0720f1e80ae Mon Sep 17 00:00:00 2001 From: Berk Hess Date: Thu, 7 May 2015 10:45:27 +0200 Subject: [PATCH] Changed FFTW warning from AVX to no SSE Changed the cmake FFTW SIMD check warning from complaining about AVX to complaining about missing SSE or SSE2. With FFTW 3.3.4 the performance of FFTW with both SSE and AVX enabled is often a bit better and never much worse than SSE along. Newer Intel processors probably also perform better with AVX with FFTW 3.3.3 so we should not complain about the combination of SSE(2) and AVX, but only when SSE is missing. Change-Id: I3665a35ec98616f015d05e314c8fbb80a8862092 --- cmake/FindFFTW.cmake | 50 ++++++++++++++++++++++++++----- cmake/gmxManageFFTLibraries.cmake | 12 ++++---- 2 files changed, 49 insertions(+), 13 deletions(-) diff --git a/cmake/FindFFTW.cmake b/cmake/FindFFTW.cmake index 326805a163..dbe8f92c4b 100644 --- a/cmake/FindFFTW.cmake +++ b/cmake/FindFFTW.cmake @@ -1,7 +1,7 @@ # # This file is part of the GROMACS molecular simulation package. # -# Copyright (c) 2012,2013,2014, by the GROMACS development team, led by +# Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, # and including many others, as listed in the AUTHORS file in the # top-level source directory and at http://www.gromacs.org. @@ -100,14 +100,50 @@ if (${FFTW}_FOUND) message(FATAL_ERROR "Could not find ${${FFTW}_FUNCTION_PREFIX}_plan_r2r_1d in ${${FFTW}_LIBRARY}, take a look at the error message in ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log to find out what went wrong. If you are using a static lib (.a) make sure you have specified all dependencies of ${${FFTW}_PKG} in ${FFTW}_LIBRARY by hand (e.g. -D${FFTW}_LIBRARY='/path/to/lib${${FFTW}_PKG}.so;/path/to/libm.so') !") endif() - # Check for FFTW3 compiled with --enable-avx, which is slower for GROMACS than --enable-sse or --enable-sse2 - foreach(AVX_FUNCTION ${${FFTW}_FUNCTION_PREFIX}_have_simd_avx) + # Check for FFTW3 compiled with --enable-sse + foreach(SSE_FUNCTION ${${FFTW}_FUNCTION_PREFIX}_have_simd_sse) if (FFTW_LIBRARY_CHANGED) - unset(${FFTW}_HAVE_${AVX_FUNCTION} CACHE) + unset(${FFTW}_HAVE_${SSE_FUNCTION} CACHE) endif() - check_library_exists("${${FFTW}_LIBRARIES}" "${AVX_FUNCTION}" "" ${FFTW}_HAVE_${AVX_FUNCTION}) - if(${FFTW}_HAVE_${AVX_FUNCTION}) - set(${FFTW}_HAVE_AVX TRUE) + check_library_exists("${${FFTW}_LIBRARIES}" "${SSE_FUNCTION}" "" ${FFTW}_HAVE_${SSE_FUNCTION}) + if(${FFTW}_HAVE_${SSE_FUNCTION}) + set(${FFTW}_HAVE_SSE TRUE) + break() + endif() + endforeach() + + # Check for FFTW3 compiled with --enable-sse2 + foreach(SSE2_FUNCTION ${${FFTW}_FUNCTION_PREFIX}_have_simd_sse2) + if (FFTW_LIBRARY_CHANGED) + unset(${FFTW}_HAVE_${SSE2_FUNCTION} CACHE) + endif() + check_library_exists("${${FFTW}_LIBRARIES}" "${SSE2_FUNCTION}" "" ${FFTW}_HAVE_${SSE2_FUNCTION}) + if(${FFTW}_HAVE_${SSE2_FUNCTION}) + set(${FFTW}_HAVE_SSE2 TRUE) + break() + endif() + endforeach() + + # Check for FFTW3 with 128-bit AVX compiled with --enable-avx + foreach(AVX_128_FUNCTION ${${FFTW}_FUNCTION_PREFIX}_have_simd_avx_128) + if (FFTW_LIBRARY_CHANGED) + unset(${FFTW}_HAVE_${AVX_128_FUNCTION} CACHE) + endif() + check_library_exists("${${FFTW}_LIBRARIES}" "${AVX_128_FUNCTION}" "" ${FFTW}_HAVE_${AVX_128_FUNCTION}) + if(${FFTW}_HAVE_${AVX_128_FUNCTION}) + set(${FFTW}_HAVE_AVX_128 TRUE) + break() + endif() + endforeach() + + # Check for FFTW3 with 128-bit AVX2 compiled with --enable-avx2 + foreach(AVX2_128_FUNCTION ${${FFTW}_FUNCTION_PREFIX}_have_simd_avx2_128) + if (FFTW_LIBRARY_CHANGED) + unset(${FFTW}_HAVE_${AVX2_128_FUNCTION} CACHE) + endif() + check_library_exists("${${FFTW}_LIBRARIES}" "${AVX2_128_FUNCTION}" "" ${FFTW}_HAVE_${AVX2_128_FUNCTION}) + if(${FFTW}_HAVE_${AVX2_128_FUNCTION}) + set(${FFTW}_HAVE_AVX2_128 TRUE) break() endif() endforeach() diff --git a/cmake/gmxManageFFTLibraries.cmake b/cmake/gmxManageFFTLibraries.cmake index 104576d4b3..bc66a18d92 100644 --- a/cmake/gmxManageFFTLibraries.cmake +++ b/cmake/gmxManageFFTLibraries.cmake @@ -1,7 +1,7 @@ # # This file is part of the GROMACS molecular simulation package. # -# Copyright (c) 2012,2013,2014, by the GROMACS development team, led by +# Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, # and including many others, as listed in the AUTHORS file in the # top-level source directory and at http://www.gromacs.org. @@ -87,11 +87,11 @@ if(${GMX_FFT_LIBRARY} STREQUAL "FFTW3") if ((${GMX_SIMD} MATCHES "SSE" OR ${GMX_SIMD} MATCHES "AVX") AND NOT ${FFTW}_HAVE_SIMD) message(WARNING "The fftw library found is compiled without SIMD support, which makes it slow. Consider recompiling it or contact your admin") - endif() - - if((${GMX_SIMD} MATCHES "SSE" OR ${GMX_SIMD} MATCHES "AVX") AND ${FFTW}_HAVE_AVX) - # If we're not using SIMD instructions, we don't care about FFTW performance on x86 either - message(WARNING "The FFTW library was compiled with --enable-avx to enable AVX SIMD instructions. That might sound like a good idea for your processor, but for FFTW versions up to 3.3.3, these are slower than the SSE/SSE2 SIMD instructions for the way GROMACS uses FFTs. Limitations in the way FFTW allows GROMACS to measure performance make it awkward for either GROMACS or FFTW to make the decision for you based on runtime performance. You should compile a different FFTW library with --enable-sse or --enable-sse2. If you have a more recent FFTW, you may like to compare the performance of GROMACS with FFTW libraries compiled with and without --enable-avx. However, the GROMACS developers do not really expect the FFTW AVX optimization to help, because the performance is limited by memory access, not computation.") + else() + if(${GMX_SIMD} MATCHES "AVX" AND NOT (${FFTW}_HAVE_SSE OR ${FFTW}_HAVE_SSE2 OR ${FFTW}_HAVE_AVX_128 OR ${FFTW}_HAVE_AVX2_128)) + # If we end up here we have an AVX Gromacs build, and FFTW with SIMD, but no 128-bit SIMD, this means AVX is enabled for FFTW. + message(WARNING "The FFTW library was compiled with neither --enable-sse nor --enable-sse2; those would have enabled SSE(2) SIMD instructions. This will give suboptimal performance. You should (re)compile the FFTW library with both SSE2 and AVX instruction support (use both --enable-sse2 and --enable-avx). The FFTW library will determine at runtime which SIMD instruction set is fastest for different parts of the FFTs.") + endif() endif() set(FFT_STATUS_MESSAGE "Using external FFT library - FFTW3") -- 2.22.0