Added AVX2 detection to cmake and created CPU acceleration macro

author Erik Lindahl <erik@kth.se>

Wed, 11 Dec 2013 21:46:13 +0000 (22:46 +0100)

committer Mark Abraham <mark.j.abraham@gmail.com>

Sat, 1 Feb 2014 23:37:57 +0000 (00:37 +0100)
author Erik Lindahl <erik@kth.se>
Wed, 11 Dec 2013 21:46:13 +0000 (22:46 +0100)
committer Mark Abraham <mark.j.abraham@gmail.com>
Sat, 1 Feb 2014 23:37:57 +0000 (00:37 +0100)
diff --git a/CMakeLists.txt b/CMakeLists.txt

index 96559705fb8b64d4f3e94614bb4f994e4b75e504..424964d04dff12dc63e6c5259688e807b1629179 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -192,12 +192,16 @@ include(gmxDetectTargetArchitecture)
  gmx_detect_target_architecture()
  include(gmxDetectAcceleration)
  gmx_detect_acceleration(GMX_SUGGESTED_CPU_ACCELERATION)
  gmx_detect_target_architecture()
  include(gmxDetectAcceleration)
  gmx_detect_acceleration(GMX_SUGGESTED_CPU_ACCELERATION)
+if("${GMX_SUGGESTED_CPU_ACCELERATION}" STREQUAL "AVX2_256")
+    message(STATUS "Changing acceleration from AVX2 to AVX (until AVX2 patches commited).")
+    set(GMX_SUGGESTED_CPU_ACCELERATION "AVX_256")
+endif()
  
  gmx_option_multichoice(
      GMX_CPU_ACCELERATION
      "Acceleration for CPU kernels and compiler optimization"
      "${GMX_SUGGESTED_CPU_ACCELERATION}"
  
  gmx_option_multichoice(
      GMX_CPU_ACCELERATION
      "Acceleration for CPU kernels and compiler optimization"
      "${GMX_SUGGESTED_CPU_ACCELERATION}"
-    None SSE2 SSE4.1 AVX_128_FMA AVX_256 IBM_QPX Sparc64_HPC_ACE Reference)
+    None SSE2 SSE4.1 AVX_128_FMA AVX_256 AVX2_256 IBM_QPX Sparc64_HPC_ACE Reference)
  
  gmx_option_multichoice(
      GMX_FFT_LIBRARY
  
  gmx_option_multichoice(
      GMX_FFT_LIBRARY
@@ -570,231 +574,16 @@ if(NOT GMX_SYSTEM_XDR)
      set(GMX_INTERNAL_XDR 1)
  endif(NOT GMX_SYSTEM_XDR)
  
      set(GMX_INTERNAL_XDR 1)
  endif(NOT GMX_SYSTEM_XDR)
  
-# include avx test source, used if the AVX flags are set below
-include(gmxTestAVXMaskload)
-
-# Process nonbonded accelerated kernels settings
-#
-# Note that for the backward-compatible x86 SIMD architectures, the
-# GMX_CPU_ACCELERATION determines the maximum level of the instruction
-# set used (e.g. GMX_CPU_ACCLERATION=SSE4.1 implies
-# SSE2). Accordingly, there are a set of CMake variables
-# GMX_<arch>_<feature-set> that are exported to the C code to specify
-# CPU features that should be used. This means that the logic for
-# requiring such backward compatibility is all located here.
-if(${GMX_CPU_ACCELERATION} STREQUAL "NONE")
-    # nothing to do
-    set(ACCELERATION_STATUS_MESSAGE "CPU acceleration disabled")
-
-elseif(${GMX_CPU_ACCELERATION} STREQUAL "SSE2")
-
-    GMX_TEST_CFLAG(GNU_SSE2_CFLAG "-msse2" ACCELERATION_C_FLAGS)
-    if(NOT GNU_SSE2_CFLAG AND GMX_NATIVE_WINDOWS)
-        GMX_TEST_CFLAG(MSVC_SSE2_CFLAG "/arch:SSE2" ACCELERATION_C_FLAGS)
-    endif(NOT GNU_SSE2_CFLAG AND GMX_NATIVE_WINDOWS)
-
-    GMX_TEST_CXXFLAG(GNU_SSE2_CXXFLAG "-msse2" ACCELERATION_CXX_FLAGS)
-    if(NOT GNU_SSE2_CXXFLAG AND GMX_NATIVE_WINDOWS)
-        GMX_TEST_CXXFLAG(MSVC_SSE2_CXXFLAG "/arch:SSE2" ACCELERATION_CXX_FLAGS)
-    endif(NOT GNU_SSE2_CXXFLAG AND GMX_NATIVE_WINDOWS)
-
-    # We dont warn for lacking SSE2 flag support, since that is probably standard today.
-
-    # Only test the include after we have tried to add the correct flag for SSE2 support
-    check_include_file(emmintrin.h  HAVE_EMMINTRIN_H ${ACCELERATION_C_FLAGS})
-
-    if(NOT HAVE_EMMINTRIN_H)
-        message(FATAL_ERROR "Cannot find emmintrin.h, which is required for SSE2 intrinsics support.")
-    endif(NOT HAVE_EMMINTRIN_H)
-
-    set(GMX_CPU_ACCELERATION_X86_SSE2 1)
-    # The user should not be able to set this orthogonally to the acceleration
-    set(GMX_X86_SSE2 1)
-    set(ACCELERATION_STATUS_MESSAGE
-        "Enabling SSE2 Gromacs acceleration")
-
-elseif(${GMX_CPU_ACCELERATION} STREQUAL "SSE4.1")
-
-    GMX_TEST_CFLAG(GNU_SSE4_CFLAG "-msse4.1" ACCELERATION_C_FLAGS)
-    if (NOT GNU_SSE4_CFLAG AND GMX_NATIVE_WINDOWS)
-        GMX_TEST_CFLAG(MSVC_SSE4_CFLAG "/arch:SSE4.1" ACCELERATION_C_FLAGS)
-    endif(NOT GNU_SSE4_CFLAG AND GMX_NATIVE_WINDOWS)
-    if (NOT GNU_SSE4_CFLAG AND NOT MSVC_SSE4_CFLAG)
-        # Not surprising if we end up here! MSVC current does not support the SSE4.1 flag. However, it appears to accept SSE4.1
-        # intrinsics when SSE2 support is enabled, so we try that instead first.
-        if (GMX_NATIVE_WINDOWS)
-            GMX_TEST_CFLAG(MSVC_SSE2_CFLAG "/arch:SSE2" ACCELERATION_C_FLAGS)
-            message(WARNING "Neither SSE4.1 or SSE2 seems to be supported by your Windows compiler. Something is likely broken.")
-        else()
-            message(WARNING "No C SSE4.1 flag found. Consider a newer compiler, or use SSE2 for slightly lower performance")
-        endif()
-    endif(NOT GNU_SSE4_CFLAG AND NOT MSVC_SSE4_CFLAG)
-
-    GMX_TEST_CXXFLAG(GNU_SSE4_CXXFLAG "-msse4.1" ACCELERATION_CXX_FLAGS)
-    if (NOT GNU_SSE4_CXXFLAG AND GMX_NATIVE_WINDOWS)
-        GMX_TEST_CXXFLAG(MSVC_SSE4_CXXFLAG "/arch:SSE4.1" ACCELERATION_CXX_FLAGS)
-    endif(NOT GNU_SSE4_CXXFLAG AND GMX_NATIVE_WINDOWS)
-    if (NOT GNU_SSE4_CXXFLAG AND NOT MSVC_SSE4_CXXFLAG)
-        message(WARNING "No C++ SSE4.1 flag found. Consider a newer compiler, or use SSE2 for slightly lower performance.")
-        # Not surprising if we end up here! MSVC current does not support the SSE4.1 flag. However, it appears to accept SSE4.1
-        # intrinsics when SSE2 support is enabled, so we try that instead.
-        if (GMX_NATIVE_WINDOWS)
-            GMX_TEST_CXXFLAG(MSVC_SSE2_CXXFLAG "/arch:SSE2" ACCELERATION_CXX_FLAGS)
-        endif()
-    endif(NOT GNU_SSE4_CXXFLAG AND NOT MSVC_SSE4_CXXFLAG)
-
-    # This must come after we have added the -msse4.1 flag on some platforms.
-    check_include_file(smmintrin.h  HAVE_SMMINTRIN_H ${ACCELERATION_C_FLAGS})
-
-    if(NOT HAVE_SMMINTRIN_H)
-        message(FATAL_ERROR "Cannot find smmintrin.h, which is required for SSE4.1 intrinsics support.")
-    endif(NOT HAVE_SMMINTRIN_H)
-
-    if(CMAKE_C_COMPILER_ID MATCHES "Intel" AND CMAKE_C_COMPILER_VERSION VERSION_EQUAL "11.1")
-        message(FATAL_ERROR "You are using Intel compiler version 11.1, and that compiler is known to produce incorrect results with SSE4.1 acceleration. You need to use another compiler (e.g. icc 12 or newer) or different acceleration (probably slower simulations).")
-    endif()
-
-    set(GMX_CPU_ACCELERATION_X86_SSE4_1 1)
-    # The user should not be able to set this orthogonally to the acceleration
-    set(GMX_X86_SSE4_1 1)
-    set(GMX_X86_SSE2   1)
-    set(ACCELERATION_STATUS_MESSAGE
-        "Enabling SSE4.1 Gromacs acceleration")
-
-elseif(${GMX_CPU_ACCELERATION} STREQUAL "AVX_128_FMA" OR ${GMX_CPU_ACCELERATION} STREQUAL "AVX_256")
-
-    # Set the AVX compiler flag for both these choices!
-
-    GMX_TEST_CFLAG(GNU_AVX_CFLAG "-mavx" ACCELERATION_C_FLAGS)
-    if (NOT GNU_AVX_CFLAG AND GMX_NATIVE_WINDOWS)
-        GMX_TEST_CFLAG(MSVC_AVX_CFLAG "/arch:AVX" ACCELERATION_C_FLAGS)
-    endif (NOT GNU_AVX_CFLAG AND GMX_NATIVE_WINDOWS)
-    if (NOT GNU_AVX_CFLAG AND NOT MSVC_AVX_CFLAG)
-        message(WARNING "No C AVX flag found. Consider a newer compiler, or try SSE4.1 (lower performance) giving the -DGMX_CPU_ACCELERATION=SSE4.1 to cmake.")
-    endif (NOT GNU_AVX_CFLAG AND NOT MSVC_AVX_CFLAG)
-
-    GMX_TEST_CXXFLAG(GNU_AVX_CXXFLAG "-mavx" ACCELERATION_CXX_FLAGS)
-    if (NOT GNU_AVX_CXXFLAG AND GMX_NATIVE_WINDOWS)
-        GMX_TEST_CXXFLAG(MSVC_AVX_CXXFLAG "/arch:AVX" ACCELERATION_CXX_FLAGS)
-    endif (NOT GNU_AVX_CXXFLAG AND GMX_NATIVE_WINDOWS)
-    if (NOT GNU_AVX_CXXFLAG AND NOT MSVC_AVX_CXXFLAG)
-        message(WARNING "No C++ AVX flag found. Consider a newer compiler, or try SSE4.1 (lower performance) giving the -DGMX_CPU_ACCELERATION=SSE4.1 to cmake.")
-    endif (NOT GNU_AVX_CXXFLAG AND NOT MSVC_AVX_CXXFLAG)
-
-    # Set the FMA4 flags (MSVC doesn't require any)
-    if(${GMX_CPU_ACCELERATION} STREQUAL "AVX_128_FMA" AND NOT MSVC)
-        GMX_TEST_CFLAG(GNU_FMA_CFLAG "-mfma4" ACCELERATION_C_FLAGS)
-        if (NOT GNU_FMA_CFLAG)
-            message(WARNING "No C FMA4 flag found. Consider a newer compiler, or try SSE4.1 (lower performance).")
-        endif(NOT GNU_FMA_CFLAG)
-        GMX_TEST_CFLAG(GNU_XOP_CFLAG "-mxop" ACCELERATION_C_FLAGS)
-        # No big deal if we do not have xop, so no point yelling warnings about it.
-        GMX_TEST_CXXFLAG(GNU_FMA_CXXFLAG "-mfma4" ACCELERATION_CXX_FLAGS)
-        if (NOT GNU_FMA_CXXFLAG)
-            message(WARNING "No C++ FMA flag found. Consider a newer compiler, or try SSE4.1 (lower performance).")
-        endif (NOT GNU_FMA_CXXFLAG)
-        GMX_TEST_CXXFLAG(GNU_XOP_CXXFLAG "-mxop" ACCELERATION_CXX_FLAGS)
-        # No big deal if we do not have xop, so no point yelling warnings about it.
-    endif()
  
  
-    # Only test the header after we have tried to add the flag for AVX support
-    check_include_file(immintrin.h  HAVE_IMMINTRIN_H ${ACCELERATION_C_FLAGS})
-
-    if(NOT HAVE_IMMINTRIN_H)
-        message(FATAL_ERROR "Cannot find immintrin.h, which is required for AVX intrinsics support. Consider switching compiler.")
-    endif(NOT HAVE_IMMINTRIN_H)
-
-    if(${GMX_CPU_ACCELERATION} STREQUAL "AVX_256")
-        try_compile(TEST_AVX ${CMAKE_BINARY_DIR}
-            "${CMAKE_SOURCE_DIR}/cmake/TestAVX.c"
-            COMPILE_DEFINITIONS "${ACCELERATION_C_FLAGS}")
-        if(NOT TEST_AVX)
-            message(FATAL_ERROR "Cannot compile AVX intrinsics. Consider switching compiler.")
-        endif()
-    endif()
-
-    # GCC requires x86intrin.h for FMA support. MSVC 2010 requires intrin.h for FMA support.
-    check_include_file(x86intrin.h HAVE_X86INTRIN_H ${ACCELERATION_C_FLAGS})
-    check_include_file(intrin.h HAVE_INTRIN_H ${ACCELERATION_C_FLAGS})
-
-    # The user should not be able to set this orthogonally to the acceleration
-    set(GMX_X86_SSE4_1 1)
-    set(GMX_X86_SSE2   1)
-
-    # But just enable one of the choices internally...
-    if(${GMX_CPU_ACCELERATION} STREQUAL "AVX_128_FMA")
-        # We don't have the full compiler version string yet (BUILD_C_COMPILER),
-        # so we can't distinguish vanilla and Apple clang, but catering for AMD
-        # hackintoshes is not worth the effort.
-        if (APPLE AND (${CMAKE_C_COMPILER_ID} STREQUAL "Clang" OR
-                    ${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang"))
-            message(WARNING "Due to a known compiler bug, Clang up to version 3.2 (and Apple Clang up to version 4.1) produces incorrect code with AVX_128_FMA acceleration. As we can not work around this bug on OS X, you will have to select a different compiler or CPU acceleration.")
-        endif()
-
-        if (GMX_USE_CLANG_C_FMA_BUG_WORKAROUND)
-            # we assume that we have an external assembler that supports AVX
-            message(STATUS "Clang ${CMAKE_C_COMPILER_VERSION} detected, enabling FMA bug workaround")
-            set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -no-integrated-as")
-        endif()
-        if (GMX_USE_CLANG_CXX_FMA_BUG_WORKAROUND)
-            # we assume that we have an external assembler that supports AVX
-            message(STATUS "Clang ${CMAKE_CXX_COMPILER_VERSION} detected, enabling FMA bug workaround")
-            set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -no-integrated-as")
-        endif()
-
-        set(GMX_CPU_ACCELERATION_X86_AVX_128_FMA 1)
-        set(GMX_X86_AVX_128_FMA 1)
-        set(ACCELERATION_STATUS_MESSAGE
-            "Enabling 128-bit AVX Gromacs acceleration (with fused-multiply add)")
-
-    else()
-        # If we are not doing AVX_128, it must be AVX_256...
-        set(GMX_CPU_ACCELERATION_X86_AVX_256 1)
-        set(GMX_X86_AVX_256 1)
-        set(ACCELERATION_STATUS_MESSAGE
-            "Enabling 256-bit AVX Gromacs acceleration")
-    endif()
-
-    # Unfortunately gcc-4.5.2 and gcc-4.6.0 has a bug where they use the wrong datatype for the formal
-    # parameter of the mask for maskload/maskstore arguments. Check if this is present, since we can work around it.
-    gmx_test_avx_gcc_maskload_bug(${ACCELERATION_C_FLAGS} GMX_X86_AVX_GCC_MASKLOAD_BUG)
-
-elseif(${GMX_CPU_ACCELERATION} STREQUAL "IBM_QPX")
-    try_compile(TEST_QPX ${CMAKE_BINARY_DIR}
-        "${CMAKE_SOURCE_DIR}/cmake/TestQPX.c")
+##################################################
+# Process CPU acceleration settings
+##################################################
+# This checks what flags to add in order to
+# support the SIMD instructions we need, and sets
+# correct defines for the acceleration supported.
+include(gmxTestCPUAcceleration)
+gmx_test_cpu_acceleration()
  
  
-    if (TEST_QPX)
-        message(WARNING "IBM QPX acceleration was selected. This will work, but SIMD-accelerated kernels are only available for the Verlet cut-off scheme. The plain C kernels that are used for the group cut-off scheme kernels will be slow, so please consider using the Verlet cut-off scheme.")
-        set(GMX_CPU_ACCELERATION_IBM_QPX 1)
-    else()
-        message(FATAL_ERROR "Cannot compile the requested IBM QPX intrinsics. If you are compiling for BlueGene/Q with the XL compilers, use 'cmake .. -DCMAKE_TOOLCHAIN_FILE=Platform/BlueGeneQ-static-XL-C' to set up the tool chain.")
-    endif()
-elseif(${GMX_CPU_ACCELERATION} STREQUAL "SPARC64_HPC_ACE")
-    set(GMX_CPU_ACCELERATION_SPARC64_HPC_ACE 1)
-elseif(${GMX_CPU_ACCELERATION} STREQUAL "REFERENCE")
-    add_definitions(-DGMX_SIMD_REFERENCE_PLAIN_C)
-    if(${GMX_NBNXN_REF_KERNEL_TYPE} STREQUAL "4xn")
-        if(${GMX_NBNXN_REF_KERNEL_WIDTH} STREQUAL "2" OR ${GMX_NBNXN_REF_KERNEL_WIDTH} STREQUAL "4" OR ${GMX_NBNXN_REF_KERNEL_WIDTH} STREQUAL "8")
-            add_definitions(-DGMX_NBNXN_SIMD_4XN -DGMX_SIMD_REF_WIDTH=${GMX_NBNXN_REF_KERNEL_WIDTH})
-        else()
-            message(FATAL_ERROR "Unsupported width for 4xn reference kernels")
-        endif()
-    elseif(${GMX_NBNXN_REF_KERNEL_TYPE} STREQUAL "2xnn")
-        if(${GMX_NBNXN_REF_KERNEL_WIDTH} STREQUAL "8" OR ${GMX_NBNXN_REF_KERNEL_WIDTH} STREQUAL "16")
-            add_definitions(-DGMX_NBNXN_SIMD_2XNN -DGMX_SIMD_REF_WIDTH=${GMX_NBNXN_REF_KERNEL_WIDTH})
-        else()
-            message(FATAL_ERROR "Unsupported width for 2xn reference kernels")
-        endif()
-    else()
-        message(FATAL_ERROR "Unsupported kernel type")
-    endif()
-else()
-    gmx_invalid_option_value(GMX_CPU_ACCELERATION)
-endif()
-gmx_check_if_changed(ACCELERATION_CHANGED GMX_CPU_ACCELERATION)
-if (ACCELERATION_CHANGED AND DEFINED ACCELERATION_STATUS_MESSAGE)
-    message(STATUS "${ACCELERATION_STATUS_MESSAGE}")
-endif()
  
  # Process QM/MM Settings
  if(${GMX_QMMM_PROGRAM} STREQUAL "GAUSSIAN")
  
  # Process QM/MM Settings
  if(${GMX_QMMM_PROGRAM} STREQUAL "GAUSSIAN")
diff --git a/cmake/TestAVX.c b/cmake/TestAVX.c

deleted file mode 100644 (file)

index 0878dcf..0000000
--- a/cmake/TestAVX.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#include <immintrin.h>
-
-int main()
-{
-    __m256 x  = _mm256_set_ps(0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5);
-    x = _mm256_rsqrt_ps(x);
-    return 0;
-}
diff --git a/cmake/gmxFindFlagsForSource.cmake b/cmake/gmxFindFlagsForSource.cmake

new file mode 100644 (file)

index 0000000..3ebb57b
--- /dev/null
+++ b/cmake/gmxFindFlagsForSource.cmake
@@ -0,0 +1,99 @@
+#
+# This file is part of the GROMACS molecular simulation package.
+#
+# Copyright (c) 2013,2014, by the GROMACS development team, led by
+# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+# and including many others, as listed in the AUTHORS file in the
+# top-level source directory and at http://www.gromacs.org.
+#
+# GROMACS is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public License
+# as published by the Free Software Foundation; either version 2.1
+# of the License, or (at your option) any later version.
+#
+# GROMACS is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with GROMACS; if not, see
+# http://www.gnu.org/licenses, or write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+#
+# If you want to redistribute modifications to GROMACS, please
+# consider that scientific software is very special. Version
+# control is crucial - bugs must be traceable. We will be happy to
+# consider code for inclusion in the official distribution, but
+# derived work must not be called official GROMACS. Details are found
+# in the README & COPYING files - if they are missing, get the
+# official version at http://www.gromacs.org.
+#
+# To help us fund GROMACS development, we humbly ask that you cite
+# the research papers on the package. Check out http://www.gromacs.org.
+
+# Helper routine to find flag (from a list) to compile a specific C source.
+# VARIABLE            This will be set when we have found a flag that works
+# DESCRIPTION         Text string describing what flag we are trying to find
+# SOURCE              Source code to test
+#                     The compiler is chosen based on the extension of this file
+# FLAGSVAR            Variable (string) to which we should add the correct flag
+# Args 5 through N    Multiple strings with acceleration flags to test
+FUNCTION(GMX_FIND_CFLAG_FOR_SOURCE VARIABLE DESCRIPTION SOURCE CFLAGSVAR)
+    IF(NOT DEFINED ${VARIABLE})
+        # Insert a blank element last in the list (try without any flags too)
+        # This must come last, since some compilers (Intel) might try to emulate
+        # emulate AVX instructions with SSE4.1 otherwise.
+        foreach(_testflag ${ARGN} "")
+            message(STATUS "Try ${DESCRIPTION} = [${_testflag}]")
+            set(CMAKE_REQUIRED_FLAGS "${${CFLAGSVAR}} ${_testflag}")
+            # make a valid variable name from the flag string: replace all non-alphanumerical chars
+            string(REGEX REPLACE "[^a-zA-Z0-9]+" "_" COMPILE_VARIABLE "C_FLAG_${_testflag}")
+            check_c_source_compiles("${SOURCE}" ${COMPILE_VARIABLE})
+            if(${${COMPILE_VARIABLE}})
+                set(${VARIABLE}_FLAG "${_testflag}" CACHE INTERNAL "${DESCRIPTION}")
+                set(${VARIABLE} 1 CACHE INTERNAL "Result of test for ${DESCRIPTION}" FORCE)
+                break()
+            else(${${COMPILE_VARIABLE}})
+                set(${VARIABLE} 0 CACHE INTERNAL "Result of test for ${DESCRIPTION}" FORCE)
+            endif(${${COMPILE_VARIABLE}})
+        endforeach()
+    ENDIF(NOT DEFINED ${VARIABLE})
+    IF (${VARIABLE})
+        SET (${CFLAGSVAR} "${${CFLAGSVAR}} ${${VARIABLE}_FLAG}" PARENT_SCOPE)
+    ENDIF (${VARIABLE})
+ENDFUNCTION(GMX_FIND_CFLAG_FOR_SOURCE VARIABLE DESCRIPTION SOURCE CFLAGSVAR)
+
+
+# Helper routine to find flag (from list) to compile a specific C++ source.
+# VARIABLE            This will be set when we have found a flag that works
+# DESCRIPTION         Text string describing what flag we are trying to find
+# SOURCE              Source code to test
+#                     The compiler is chosen based on the extension of this file
+# FLAGSVAR            Variable (string) to which we should add the correct flag
+# Args 5 through N    Multiple strings with acceleration flags to test
+FUNCTION(GMX_FIND_CXXFLAG_FOR_SOURCE VARIABLE DESCRIPTION SOURCE CXXFLAGSVAR)
+    IF(NOT DEFINED ${VARIABLE})
+        # Insert a blank element last in the list (try without any flags too)
+        # This must come last, since some compilers (Intel) might try to
+        # emulate AVX instructions with SSE4.1 otherwise.
+        foreach(_testflag ${ARGN} "")
+            message(STATUS "Try ${DESCRIPTION} = [${_testflag}]")
+            set(CMAKE_REQUIRED_FLAGS "${${CXXFLAGSVAR}} ${_testflag}")
+            # make a valid variable name from the flag string: replace all non-alphanumerical chars
+            string(REGEX REPLACE "[^a-zA-Z0-9]+" "_" COMPILE_VARIABLE "CXX_FLAG_${_testflag}")
+            check_cxx_source_compiles("${SOURCE}" ${COMPILE_VARIABLE})
+            if(${${COMPILE_VARIABLE}})
+                set(${VARIABLE}_FLAG "${_testflag}" CACHE INTERNAL "${DESCRIPTION}")
+                set(${VARIABLE} 1 CACHE INTERNAL "Result of test for ${DESCRIPTION}" FORCE)
+                break()
+            else(${${COMPILE_VARIABLE}})
+                set(${VARIABLE} 0 CACHE INTERNAL "Result of test for ${DESCRIPTION}" FORCE)
+            endif(${${COMPILE_VARIABLE}})
+        endforeach()
+    ENDIF(NOT DEFINED ${VARIABLE})
+    IF (${VARIABLE})
+        SET (${CXXFLAGSVAR} "${${CXXFLAGSVAR}} ${${VARIABLE}_FLAG}" PARENT_SCOPE)
+    ENDIF (${VARIABLE})
+ENDFUNCTION(GMX_FIND_CXXFLAG_FOR_SOURCE VARIABLE DESCRIPTION SOURCE CXXFLAGSVAR)
+
diff --git a/cmake/gmxTestAVXMaskload.cmake b/cmake/gmxTestAVXMaskload.cmake

index 659075d3b17fc935da3beaa7a27cf6270bd41010..8b05b12979b490a505882a96678009d9eb4c7b37 100644 (file)
--- a/cmake/gmxTestAVXMaskload.cmake
+++ b/cmake/gmxTestAVXMaskload.cmake
@@ -1,7 +1,7 @@
  #
  # This file is part of the GROMACS molecular simulation package.
  #
  #
  # This file is part of the GROMACS molecular simulation package.
  #
-# Copyright (c) 2012,2013, by the GROMACS development team, led by
+# Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
  # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  # and including many others, as listed in the AUTHORS file in the
  # top-level source directory and at http://www.gromacs.org.
  # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  # and including many others, as listed in the AUTHORS file in the
  # top-level source directory and at http://www.gromacs.org.
@@ -32,7 +32,7 @@
  # To help us fund GROMACS development, we humbly ask that you cite
  # the research papers on the package. Check out http://www.gromacs.org.
  
  # To help us fund GROMACS development, we humbly ask that you cite
  # the research papers on the package. Check out http://www.gromacs.org.
  
-#  GMX_TEST_AVX_GCC_MASKLOAD_BUG(VARIABLE)
+#  GMX_TEST_AVX_GCC_MASKLOAD_BUG(VARIABLE AVX_CFLAGS)
  #
  #  VARIABLE will be set if the compiler is a buggy version
  #  of GCC (prior to 4.5.3, and maybe 4.6) that has an incorrect second
  #
  #  VARIABLE will be set if the compiler is a buggy version
  #  of GCC (prior to 4.5.3, and maybe 4.6) that has an incorrect second
@@ -41,9 +41,9 @@
  #  You need to use this variable in a cmakedefine, and then handle
  #  the case separately in your code - no automatic cure, unfortunately.
  #
  #  You need to use this variable in a cmakedefine, and then handle
  #  the case separately in your code - no automatic cure, unfortunately.
  #
-MACRO(GMX_TEST_AVX_GCC_MASKLOAD_BUG AVX_CFLAGS VARIABLE)
+MACRO(GMX_TEST_AVX_GCC_MASKLOAD_BUG VARIABLE AVX_CFLAGS)
      IF(NOT DEFINED ${VARIABLE})
      IF(NOT DEFINED ${VARIABLE})
-        MESSAGE(STATUS "Checking for gcc AVX maskload bug") 
+        MESSAGE(STATUS "Checking for gcc AVX maskload bug")
          # some compilers like clang accept both cases, 
          # so first try a normal compile to avoid flagging those as buggy.
          TRY_COMPILE(${VARIABLE}_COMPILEOK "${CMAKE_BINARY_DIR}"
          # some compilers like clang accept both cases, 
          # so first try a normal compile to avoid flagging those as buggy.
          TRY_COMPILE(${VARIABLE}_COMPILEOK "${CMAKE_BINARY_DIR}"
@@ -65,7 +65,7 @@ MACRO(GMX_TEST_AVX_GCC_MASKLOAD_BUG AVX_CFLAGS VARIABLE)
              ENDIF()
          ENDIF()
      ENDIF(NOT DEFINED ${VARIABLE})
              ENDIF()
          ENDIF()
      ENDIF(NOT DEFINED ${VARIABLE})
-ENDMACRO(GMX_TEST_AVX_GCC_MASKLOAD_BUG VARIABLE)
+ENDMACRO()
  
  
  
  
  
  
diff --git a/cmake/gmxTestCPUAcceleration.cmake b/cmake/gmxTestCPUAcceleration.cmake

new file mode 100644 (file)

index 0000000..537379d
--- /dev/null
+++ b/cmake/gmxTestCPUAcceleration.cmake
@@ -0,0 +1,321 @@
+#
+# This file is part of the GROMACS molecular simulation package.
+#
+# Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
+# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+# and including many others, as listed in the AUTHORS file in the
+# top-level source directory and at http://www.gromacs.org.
+#
+# GROMACS is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public License
+# as published by the Free Software Foundation; either version 2.1
+# of the License, or (at your option) any later version.
+#
+# GROMACS is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with GROMACS; if not, see
+# http://www.gnu.org/licenses, or write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+#
+# If you want to redistribute modifications to GROMACS, please
+# consider that scientific software is very special. Version
+# control is crucial - bugs must be traceable. We will be happy to
+# consider code for inclusion in the official distribution, but
+# derived work must not be called official GROMACS. Details are found
+# in the README & COPYING files - if they are missing, get the
+# official version at http://www.gromacs.org.
+#
+# To help us fund GROMACS development, we humbly ask that you cite
+# the research papers on the package. Check out http://www.gromacs.org.
+
+# include avx test source, used if the AVX flags are set below
+include(gmxTestAVXMaskload)
+include(gmxFindFlagsForSource)
+
+
+macro(gmx_use_clang_as_with_gnu_compilers_on_osx)
+    # On OS X, we often want to use gcc instead of clang, since gcc supports
+    # OpenMP. However, by default gcc uses the external system assembler, which
+    # does not support AVX, so we need to tell the linker to use the clang
+    # compilers assembler instead - and this has to happen before we detect AVX
+    # flags.
+    if(APPLE AND ${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
+        gmx_test_cflag(GNU_C_USE_CLANG_AS "-Wa,-q" ACCELERATION_C_FLAGS)
+    endif()
+    if(APPLE AND ${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
+        gmx_test_cxxflag(GNU_CXX_USE_CLANG_AS "-Wa,-q" ACCELERATION_CXX_FLAGS)
+    endif()
+endmacro()
+
+
+macro(gmx_test_cpu_acceleration)
+#
+# To improve backward compatibility on x86 SIMD architectures,
+# we set the flags for all accelerations that are supported, not only
+# the most recent instruction set. I.e., if your machine supports AVX2_256,
+# we will set flags both for AVX2_256, AVX_256, SSE4.1, and SSE2 support.
+
+if(${GMX_CPU_ACCELERATION} STREQUAL "NONE")
+    # nothing to do configuration-wise
+    set(ACCELERATION_STATUS_MESSAGE "CPU SIMD acceleration disabled")
+elseif(${GMX_CPU_ACCELERATION} STREQUAL "SSE2")
+
+    gmx_find_cflag_for_source(CFLAGS_SSE2 "C compiler SSE2 flag"
+                              "#include<xmmintrin.h>
+                              int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_rsqrt_ps(x);return 0;}"
+                              ACCELERATION_C_FLAGS
+                              "-msse2" "/arch:SSE2")
+    gmx_find_cxxflag_for_source(CXXFLAGS_SSE2 "C++ compiler SSE2 flag"
+                                "#include<xmmintrin.h>
+                                int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_rsqrt_ps(x);return 0;}"
+                                ACCELERATION_CXX_FLAGS
+                                "-msse2" "/arch:SSE2")
+
+    if(NOT CFLAGS_SSE2 OR NOT CXXFLAGS_SSE2)
+        message(FATAL_ERROR "Cannot find SSE2 compiler flag. Use a newer compiler, or disable acceleration (slower).")
+    endif()
+
+    set(GMX_CPU_ACCELERATION_X86_SSE2 1)
+    set(GMX_X86_SSE2 1)
+
+    set(ACCELERATION_STATUS_MESSAGE "Enabling SSE2 SIMD Gromacs acceleration")
+
+elseif(${GMX_CPU_ACCELERATION} STREQUAL "SSE4.1")
+
+    # Note: MSVC enables SSE4.1 with the SSE2 flag, so we include that in testing.
+    gmx_find_cflag_for_source(CFLAGS_SSE4_1 "C compiler SSE4.1 flag"
+                              "#include<smmintrin.h>
+                              int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_dp_ps(x,x,0x77);return 0;}"
+                              ACCELERATION_C_FLAGS
+                              "-msse4.1" "/arch:SSE4.1" "/arch:SSE2")
+    gmx_find_cxxflag_for_source(CXXFLAGS_SSE4_1 "C++ compiler SSE4.1 flag"
+                                "#include<smmintrin.h>
+                                int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_dp_ps(x,x,0x77);return 0;}"
+                                ACCELERATION_CXX_FLAGS
+                                "-msse4.1" "/arch:SSE4.1" "/arch:SSE2")
+
+    if(NOT CFLAGS_SSE4_1 OR NOT CXXFLAGS_SSE4_1)
+        message(FATAL_ERROR "Cannot find SSE4.1 compiler flag. "
+                            "Use a newer compiler, or choose SSE2 acceleration (slower).")
+    endif()
+
+    if(CMAKE_C_COMPILER_ID MATCHES "Intel" AND CMAKE_C_COMPILER_VERSION VERSION_EQUAL "11.1")
+        message(FATAL_ERROR "You are using Intel compiler version 11.1, which produces incorrect results with SSE4.1 acceleration. You need to use a newer compiler (e.g. icc >= 12.0) or in worst case try a lower level of acceleration if performance is not critical.")
+    endif()
+
+    set(GMX_CPU_ACCELERATION_X86_SSE4_1 1)
+    set(GMX_X86_SSE4_1 1)
+    set(GMX_X86_SSE2   1)
+    set(ACCELERATION_STATUS_MESSAGE "Enabling SSE4.1 SIMD Gromacs acceleration")
+
+elseif(${GMX_CPU_ACCELERATION} STREQUAL "AVX_128_FMA")
+
+    gmx_use_clang_as_with_gnu_compilers_on_osx()
+
+    # AVX128/FMA on AMD is a bit complicated. We need to do detection in three stages:
+    # 1) Find the flags required for generic AVX support
+    # 2) Find the flags necessary to enable fused-multiply add support
+    # 3) Optional: Find a flag to enable the AMD XOP instructions
+
+    ### STAGE 1: Find the generic AVX flag
+    gmx_find_cflag_for_source(CFLAGS_AVX_128 "C compiler AVX (128 bit) flag"
+                              "#include<immintrin.h>
+                              int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_permute_ps(x,1);return 0;}"
+                              ACCELERATION_C_FLAGS
+                              "-mavx" "/arch:AVX")
+    gmx_find_cxxflag_for_source(CXXFLAGS_AVX_128 "C++ compiler AVX (128 bit) flag"
+                                "#include<immintrin.h>
+                                int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_permute_ps(x,1);return 0;}"
+                                ACCELERATION_CXX_FLAGS
+                                "-mavx" "/arch:AVX")
+
+    ### STAGE 2: Find the fused-multiply add flag.
+    # GCC requires x86intrin.h for FMA support. MSVC 2010 requires intrin.h for FMA support.
+    check_include_file(x86intrin.h HAVE_X86INTRIN_H ${ACCELERATION_C_FLAGS})
+    check_include_file(intrin.h HAVE_INTRIN_H ${ACCELERATION_C_FLAGS})
+    if(HAVE_X86INTRIN_H)
+        set(INCLUDE_X86INTRIN_H "#include <x86intrin.h>")
+    endif()
+    if(HAVE_INTRIN_H)
+        set(INCLUDE_INTRIN_H "#include <xintrin.h>")
+    endif()
+
+    gmx_find_cflag_for_source(CFLAGS_AVX_128_FMA "C compiler AVX (128 bit) FMA4 flag"
+"#include<immintrin.h>
+${INCLUDE_X86INTRIN_H}
+${INCLUDE_INTRIN_H}
+int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_macc_ps(x,x,x);return 0;}"
+                              ACCELERATION_C_FLAGS
+                              "-mfma4")
+    gmx_find_cxxflag_for_source(CXXFLAGS_AVX_128_FMA "C++ compiler AVX (128 bit) FMA4 flag"
+"#include<immintrin.h>
+${INCLUDE_X86INTRIN_H}
+${INCLUDE_INTRIN_H}
+int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_macc_ps(x,x,x);return 0;}"
+                                ACCELERATION_CXX_FLAGS
+                                "-mfma4")
+
+    # We only need to check the last (FMA) test; that will always fail if the basic AVX128 test failed
+    if(NOT CFLAGS_AVX_128_FMA OR NOT CXXFLAGS_AVX_128_FMA)
+        message(FATAL_ERROR "Cannot find compiler flags for 128 bit AVX with FMA support. Use a newer compiler, or choose SSE4.1 acceleration (slower).")
+    endif()
+
+    ### STAGE 3: Optional: Find the XOP instruction flag (No point in yelling if this does not work)
+    gmx_find_cflag_for_source(CFLAGS_AVX_128_XOP "C compiler AVX (128 bit) XOP flag"
+"#include<immintrin.h>
+${INCLUDE_X86INTRIN_H}
+${INCLUDE_INTRIN_H}
+int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_frcz_ps(x);return 0;}"
+                              ACCELERATION_C_FLAGS
+                              "-mxop")
+    gmx_find_cxxflag_for_source(CXXFLAGS_AVX_128_XOP "C++ compiler AVX (128 bit) XOP flag"
+"#include<immintrin.h>
+${INCLUDE_X86INTRIN_H}
+${INCLUDE_INTRIN_H}
+int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_frcz_ps(x);return 0;}"
+                                ACCELERATION_CXX_FLAGS
+                                "-mxop")
+
+    # We don't have the full compiler version string yet (BUILD_C_COMPILER),
+    # so we can't distinguish vanilla from Apple clang versions, but catering for a few rare AMD
+    # hackintoshes is not worth the effort.
+    if (APPLE AND (${CMAKE_C_COMPILER_ID} STREQUAL "Clang" OR
+                ${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang"))
+        message(WARNING "Due to a known compiler bug, Clang up to version 3.2 (and Apple Clang up to version 4.1) produces incorrect code with AVX_128_FMA acceleration. As we cannot work around this bug on OS X, you will have to select a different compiler or CPU acceleration.")
+    endif()
+
+
+    if (GMX_USE_CLANG_C_FMA_BUG_WORKAROUND)
+        # we assume that we have an external assembler that supports AVX
+        message(STATUS "Clang ${CMAKE_C_COMPILER_VERSION} detected, enabling FMA bug workaround")
+        set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -no-integrated-as")
+    endif()
+    if (GMX_USE_CLANG_CXX_FMA_BUG_WORKAROUND)
+        # we assume that we have an external assembler that supports AVX
+        message(STATUS "Clang ${CMAKE_CXX_COMPILER_VERSION} detected, enabling FMA bug workaround")
+        set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -no-integrated-as")
+    endif()
+
+    gmx_test_avx_gcc_maskload_bug(GMX_X86_AVX_GCC_MASKLOAD_BUG "${ACCELERATION_C_FLAGS}")
+
+    set(GMX_CPU_ACCELERATION_X86_AVX_128_FMA 1)
+    set(GMX_X86_AVX_128_FMA 1)
+    set(GMX_X86_SSE4_1      1)
+    set(GMX_X86_SSE2        1)
+
+    set(ACCELERATION_STATUS_MESSAGE "Enabling 128-bit AVX SIMD Gromacs acceleration (with fused-multiply add)")
+
+elseif(${GMX_CPU_ACCELERATION} STREQUAL "AVX_256")
+
+    gmx_use_clang_as_with_gnu_compilers_on_osx()
+
+    gmx_find_cflag_for_source(CFLAGS_AVX "C compiler AVX (256 bit) flag"
+                              "#include<immintrin.h>
+                              int main(){__m256 x=_mm256_set1_ps(0.5);x=_mm256_add_ps(x,x);return 0;}"
+                              ACCELERATION_C_FLAGS
+                              "-mavx" "/arch:AVX")
+    gmx_find_cxxflag_for_source(CXXFLAGS_AVX "C++ compiler AVX (256 bit) flag"
+                                "#include<immintrin.h>
+                                int main(){__m256 x=_mm256_set1_ps(0.5);x=_mm256_add_ps(x,x);return 0;}"
+                                ACCELERATION_CXX_FLAGS
+                                "-mavx" "/arch:AVX")
+
+    if(NOT CFLAGS_AVX OR NOT CXXFLAGS_AVX)
+        message(FATAL_ERROR "Cannot find AVX compiler flag. Use a newer compiler, or choose SSE4.1 acceleration (slower).")
+    endif()
+
+    gmx_test_avx_gcc_maskload_bug(GMX_X86_AVX_GCC_MASKLOAD_BUG "${ACCELERATION_C_FLAGS}")
+
+    set(GMX_CPU_ACCELERATION_X86_AVX_256 1)
+    set(GMX_X86_AVX_256  1)
+    set(GMX_X86_SSE4_1   1)
+    set(GMX_X86_SSE2     1)
+
+    set(ACCELERATION_STATUS_MESSAGE "Enabling 256-bit AVX SIMD Gromacs acceleration")
+
+elseif(${GMX_CPU_ACCELERATION} STREQUAL "AVX2_256")
+
+    # Comment out this line for AVX2 development
+    message(FATAL_ERROR "AVX2_256 is disabled until the implementation has been commited.")
+
+    gmx_use_clang_as_with_gnu_compilers_on_osx()
+
+    gmx_find_cflag_for_source(CFLAGS_AVX2 "C compiler AVX2 flag"
+                              "#include<immintrin.h>
+                              int main(){__m256 x=_mm256_set1_ps(0.5);x=_mm256_fmadd_ps(x,x,x);return 0;}"
+                              ACCELERATION_C_FLAGS
+                              "-march=core-avx2" "-mavx2" "/arch:AVX") # no AVX2-specific flag for MSVC yet
+    gmx_find_cxxflag_for_source(CXXFLAGS_AVX2 "C++ compiler AVX2 flag"
+                                "#include<immintrin.h>
+                                int main(){__m256 x=_mm256_set1_ps(0.5);x=_mm256_fmadd_ps(x,x,x);return 0;}"
+                                ACCELERATION_CXX_FLAGS
+                                "-march=core-avx2" "-mavx2" "/arch:AVX") # no AVX2-specific flag for MSVC yet
+
+    if(NOT CFLAGS_AVX2 OR NOT CXXFLAGS_AVX2)
+        message(FATAL_ERROR "Cannot find AVX2 compiler flag. Use a newer compiler, or choose AVX acceleration (slower).")
+    endif()
+
+    # No need to test for Maskload bug - it was fixed before gcc added AVX2 support
+
+    set(GMX_CPU_ACCELERATION_X86_AVX2_256 1)
+    set(GMX_X86_AVX2_256 1)
+    set(GMX_X86_AVX_256  1)
+    set(GMX_X86_SSE4_1   1)
+    set(GMX_X86_SSE2     1)
+
+    set(ACCELERATION_STATUS_MESSAGE "Enabling 256-bit AVX2 Gromacs acceleration")
+
+elseif(${GMX_CPU_ACCELERATION} STREQUAL "IBM_QPX")
+
+    try_compile(TEST_QPX ${CMAKE_BINARY_DIR}
+        "${CMAKE_SOURCE_DIR}/cmake/TestQPX.c")
+
+    if (TEST_QPX)
+        message(WARNING "IBM QPX acceleration was selected. This will work, but SIMD-accelerated kernels are only available for the Verlet cut-off scheme. The plain C kernels that are used for the group cut-off scheme kernels will be slow, so please consider using the Verlet cut-off scheme.")
+        set(GMX_CPU_ACCELERATION_IBM_QPX 1)
+        set(ACCELERATION_STATUS_MESSAGE "Enabling IBM QPX SIMD acceleration")
+
+    else()
+        message(FATAL_ERROR "Cannot compile the requested IBM QPX intrinsics. If you are compiling for BlueGene/Q with the XL compilers, use 'cmake .. -DCMAKE_TOOLCHAIN_FILE=Platform/BlueGeneQ-static-XL-C' to set up the tool chain.")
+    endif()
+
+elseif(${GMX_CPU_ACCELERATION} STREQUAL "SPARC64_HPC_ACE")
+
+    set(GMX_CPU_ACCELERATION_SPARC64_HPC_ACE 1)
+    set(ACCELERATION_STATUS_MESSAGE "Enabling Sparc64 HPC-ACE SIMD acceleration")
+
+elseif(${GMX_CPU_ACCELERATION} STREQUAL "REFERENCE")
+
+    add_definitions(-DGMX_SIMD_REFERENCE_PLAIN_C)
+    if(${GMX_NBNXN_REF_KERNEL_TYPE} STREQUAL "4xn")
+        if(${GMX_NBNXN_REF_KERNEL_WIDTH} STREQUAL "2" OR ${GMX_NBNXN_REF_KERNEL_WIDTH} STREQUAL "4" OR ${GMX_NBNXN_REF_KERNEL_WIDTH} STREQUAL "8")
+            add_definitions(-DGMX_NBNXN_SIMD_4XN -DGMX_SIMD_REF_WIDTH=${GMX_NBNXN_REF_KERNEL_WIDTH})
+        else()
+            message(FATAL_ERROR "Unsupported width for 4xn reference kernels")
+        endif()
+    elseif(${GMX_NBNXN_REF_KERNEL_TYPE} STREQUAL "2xnn")
+        if(${GMX_NBNXN_REF_KERNEL_WIDTH} STREQUAL "8" OR ${GMX_NBNXN_REF_KERNEL_WIDTH} STREQUAL "16")
+            add_definitions(-DGMX_NBNXN_SIMD_2XNN -DGMX_SIMD_REF_WIDTH=${GMX_NBNXN_REF_KERNEL_WIDTH})
+        else()
+            message(FATAL_ERROR "Unsupported width for 2xn reference kernels")
+        endif()
+    else()
+        message(FATAL_ERROR "Unsupported kernel type")
+    endif()
+
+else()
+    gmx_invalid_option_value(GMX_CPU_ACCELERATION)
+endif()
+
+
+gmx_check_if_changed(ACCELERATION_CHANGED GMX_CPU_ACCELERATION)
+if (ACCELERATION_CHANGED AND DEFINED ACCELERATION_STATUS_MESSAGE)
+    message(STATUS "${ACCELERATION_STATUS_MESSAGE}")
+endif()
+
+endmacro()
+
diff --git a/src/gromacs/gmxlib/gmx_cpuid.c b/src/gromacs/gmxlib/gmx_cpuid.c

index 0b56208bbe34381b9b11b758dc3f5c17bcc361ee..0824591f1cbbf88dced57ca3991a6f75ee18b2bc 100644 (file)
--- a/src/gromacs/gmxlib/gmx_cpuid.c
+++ b/src/gromacs/gmxlib/gmx_cpuid.c
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -142,6 +142,7 @@ gmx_cpuid_acceleration_string[GMX_CPUID_NACCELERATIONS] =
      "SSE4.1",
      "AVX_128_FMA",
      "AVX_256",
      "SSE4.1",
      "AVX_128_FMA",
      "AVX_256",
+    "AVX2_256",
      "Sparc64 HPC-ACE",
      "IBM_QPX"
  };
      "Sparc64 HPC-ACE",
      "IBM_QPX"
  };
@@ -1057,7 +1058,11 @@ gmx_cpuid_acceleration_suggest  (gmx_cpuid_t                 cpuid)
  
      if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_INTEL)
      {
  
      if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_INTEL)
      {
-        if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX))
+        if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX2))
+        {
+            tmpacc = GMX_CPUID_ACCELERATION_X86_AVX2_256;
+        }
+        else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX))
          {
              tmpacc = GMX_CPUID_ACCELERATION_X86_AVX_256;
          }
          {
              tmpacc = GMX_CPUID_ACCELERATION_X86_AVX_256;
          }
diff --git a/src/gromacs/legacyheaders/gmx_cpuid.h b/src/gromacs/legacyheaders/gmx_cpuid.h

index 1070e97f02db97d5ef1a3b095160a00f91d092f9..15c2b3bbf356a7696ad1d4cb336bce8a1838c4df 100644 (file)
--- a/src/gromacs/legacyheaders/gmx_cpuid.h
+++ b/src/gromacs/legacyheaders/gmx_cpuid.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -128,6 +128,7 @@ enum gmx_cpuid_acceleration
      GMX_CPUID_ACCELERATION_X86_SSE4_1,
      GMX_CPUID_ACCELERATION_X86_AVX_128_FMA,
      GMX_CPUID_ACCELERATION_X86_AVX_256,
      GMX_CPUID_ACCELERATION_X86_SSE4_1,
      GMX_CPUID_ACCELERATION_X86_AVX_128_FMA,
      GMX_CPUID_ACCELERATION_X86_AVX_256,
+    GMX_CPUID_ACCELERATION_X86_AVX2_256,
      GMX_CPUID_ACCELERATION_SPARC64_HPC_ACE,
      GMX_CPUID_ACCELERATION_IBM_QPX,
      GMX_CPUID_NACCELERATIONS
      GMX_CPUID_ACCELERATION_SPARC64_HPC_ACE,
      GMX_CPUID_ACCELERATION_IBM_QPX,
      GMX_CPUID_NACCELERATIONS
author	Erik Lindahl <erik@kth.se>
	Wed, 11 Dec 2013 21:46:13 +0000 (22:46 +0100)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Sat, 1 Feb 2014 23:37:57 +0000 (00:37 +0100)
CMakeLists.txt		patch \| blob \| history
cmake/TestAVX.c	[deleted file]	patch \| blob \| history
cmake/gmxFindFlagsForSource.cmake	[new file with mode: 0644]	patch \| blob
cmake/gmxTestAVXMaskload.cmake		patch \| blob \| history
cmake/gmxTestCPUAcceleration.cmake	[new file with mode: 0644]	patch \| blob
src/gromacs/gmxlib/gmx_cpuid.c		patch \| blob \| history
src/gromacs/legacyheaders/gmx_cpuid.h		patch \| blob \| history