include(gmxDetectTargetArchitecture)
gmx_detect_target_architecture()
+
+########################################################################
+# Detect CXX11 support and flags
+########################################################################
+# The cmake/Check{C,CXX}CompilerFlag.cmake files in the GROMACS distribution
+# are used with permission from CMake v3.0.0 so that GROMACS can detect
+# invalid options with the Intel Compilers, and we have added a line
+# to detect warnings with the Fujitsu compilers on K computer and ICC.
+# CMake-3.0 also has a bug where the FAIL_REGEX pattern for AIX contains
+# a semicolon. Since this is also used as a separator in lists inside CMake,
+# that string ends up being split into two separate patterns, and the last
+# part is just a single word that also matches other messages. We solved this
+# by replacing the semicolon with a period that matches any character.
+#
+# These files should be removed from the source tree when a CMake version that
+# includes the features in question becomes required for building GROMACS.
+include(CheckCCompilerFlag)
+include(CheckCXXCompilerFlag)
+
+# This must come early, since some of our configuration flag tests
+# depend on being able to compile C++11 source files
+include(gmxTestCXX11)
+gmx_test_cxx11(GMX_CXX11_SUPPORTED GMX_CXX11_FLAGS)
+if(NOT GMX_CXX11_SUPPORTED)
+message(FATAL_ERROR "This version of GROMACS requires C++11. Please use a newer compiler or use the GROMACS 5.1.x release. Note it might be sufficient to instruct the compiler to use a newer STL version. See the installation guide for details.")
+endif()
+
+# Make sure all tests are run in C++11 mode
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GMX_CXX11_FLAGS}")
+
########################################################################
# User input options #
########################################################################
# These need to be done early (before further tests).
#####################################################################
-# The cmake/Check{C,CXX}CompilerFlag.cmake files in the GROMACS distribution
-# are used with permission from CMake v3.0.0 so that GROMACS can detect
-# invalid options with the Intel Compilers, and we have added a line
-# to detect warnings with the Fujitsu compilers on K computer and ICC.
-# CMake-3.0 also has a bug where the FAIL_REGEX pattern for AIX contains
-# a semicolon. Since this is also used as a separator in lists inside CMake,
-# that string ends up being split into two separate patterns, and the last
-# part is just a single word that also matches other messages. We solved this
-# by replacing the semicolon with a period that matches any character.
-#
-# These files should be removed from the source tree when a CMake version that
-# includes the features in question becomes required for building GROMACS.
-include(CheckCCompilerFlag)
-include(CheckCXXCompilerFlag)
-
include(gmxCFlags)
gmx_c_flags()
include(gmxTestPipes)
gmx_test_pipes(HAVE_PIPES)
-include(gmxTestCXX11)
-gmx_test_cxx11(GMX_CXX11_SUPPORTED GMX_CXX11_FLAGS)
-if(NOT GMX_CXX11_SUPPORTED)
- message(FATAL_ERROR "This version of GROMACS requires C++11. Please use a newer compiler or use the GROMACS 5.1.x release. Note it might be sufficient to instruct the compiler to use a newer STL version. See the installation guide for details.")
-endif()
-set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} ${GMX_CXX11_FLAGS}")
-
-# Now we can test for CXX11_REGEX include file
check_include_file_cxx(regex HAVE_CXX11_REGEX)
include(gmxTestXDR)
message(STATUS "Detecting best SIMD instructions for this CPU")
# Get CPU SIMD properties information
- set(_compile_definitions "${GCC_INLINE_ASM_DEFINE} -I${CMAKE_SOURCE_DIR}/src -DGMX_CPUID_STANDALONE")
- if(GMX_TARGET_X86)
- set(_compile_definitions "${_compile_definitions} -DGMX_TARGET_X86")
- endif()
+ set(_compile_definitions "${GCC_INLINE_ASM_DEFINE} -I${CMAKE_SOURCE_DIR}/src -DGMX_CPUINFO_STANDALONE")
# We need to execute the binary, so this only works if not cross-compiling.
# However, note that we are NOT limited to x86.
if(NOT CMAKE_CROSSCOMPILING)
- try_run(GMX_CPUID_RUN_SIMD GMX_CPUID_COMPILED
+ try_run(GMX_CPUINFO_RUN_SIMD GMX_CPUINFO_COMPILED
${CMAKE_BINARY_DIR}
- ${CMAKE_SOURCE_DIR}/src/gromacs/gmxlib/gmx_cpuid.c
+ ${CMAKE_SOURCE_DIR}/src/gromacs/hardware/cpuinfo.cpp
COMPILE_DEFINITIONS ${_compile_definitions}
RUN_OUTPUT_VARIABLE OUTPUT_TMP
- COMPILE_OUTPUT_VARIABLE GMX_CPUID_COMPILE_OUTPUT
- ARGS "-simd")
+ COMPILE_OUTPUT_VARIABLE GMX_CPUINFO_COMPILE_OUTPUT
+ ARGS "-features")
- if(NOT GMX_CPUID_COMPILED)
- message(WARNING "Cannot compile CPUID code, which means no SIMD instructions.")
- message(STATUS "Compile output: ${GMX_CPUID_COMPILE_OUTPUT}")
+ if(NOT GMX_CPUINFO_COMPILED)
+ message(WARNING "Cannot compile cpuinfo code, which means no SIMD instructions.")
+ message(STATUS "Compile output: ${GMX_CPUINFO_COMPILE_OUTPUT}")
set(OUTPUT_TMP "None")
- elseif(NOT GMX_CPUID_RUN_SIMD EQUAL 0)
- message(WARNING "Cannot run CPUID code, which means no SIMD instructions.")
+ elseif(NOT GMX_CPUINFO_RUN_SIMD EQUAL 0)
+ message(WARNING "Cannot run cpuinfo code, which means no SIMD instructions.")
message(STATUS "Run output: ${OUTPUT_TMP}")
set(OUTPUT_TMP "None")
- endif(NOT GMX_CPUID_COMPILED)
+ endif(NOT GMX_CPUINFO_COMPILED)
- string(STRIP "${OUTPUT_TMP}" OUTPUT_SIMD)
+ if(GMX_TARGET_X86)
+ if(OUTPUT_TMP MATCHES " avx512er ")
+ set(OUTPUT_SIMD "AVX_512ER")
+ elseif(OUTPUT_TMP MATCHES " avx512f ")
+ set(OUTPUT_SIMD "AVX_512F")
+ elseif(OUTPUT_TMP MATCHES " avx2 ")
+ set(OUTPUT_SIMD "AVX2_256")
+ elseif(OUTPUT_TMP MATCHES " avx ")
+ if(OUTPUT_TMP MATCHES " fma4 ")
+ # AMD that works better with avx-128-fma
+ set(OUTPUT_SIMD "AVX_128_FMA")
+ else()
+ # Intel
+ set(OUTPUT_SIMD "AVX_256")
+ endif()
+ elseif(OUTPUT_TMP MATCHES " sse4.1 ")
+ set(OUTPUT_SIMD "SSE4.1")
+ elseif(OUTPUT_TMP MATCHES " sse2 ")
+ set(OUTPUT_SIMD "SSE2")
+ endif()
+ else()
+ if(OUTPUT_TMP MATCHES " vsx ")
+ set(OUTPUT_SIMD "IBM_VSX")
+ elseif(OUTPUT_TMP MATCHES " vmx ")
+ set(OUTPUT_SIMD "IBM_VMX")
+ elseif(OUTPUT_TMP MATCHES " qpx ")
+ set(OUTPUT_SIMD "IBM_QPX")
+ elseif(OUTPUT_TMP MATCHES " neon ")
+ set(OUTPUT_SIMD "ARM_NEON")
+ elseif(OUTPUT_TMP MATCHES " neon_asimd ")
+ set(OUTPUT_SIMD "ARM_NEON_ASIMD")
+ endif()
+ endif()
set(${_suggested_simd} "${OUTPUT_SIMD}" PARENT_SCOPE)
message(STATUS "Detected best SIMD instructions for this CPU - ${OUTPUT_SIMD}")
#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
+# Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
# top-level source directory and at http://www.gromacs.org.
if(NOT CMAKE_CROSSCOMPILING)
# Get CPU information, e.g. for deciding what SIMD support exists
- set(_compile_definitions "${GCC_INLINE_ASM_DEFINE} -I${CMAKE_SOURCE_DIR}/src -DGMX_CPUID_STANDALONE")
- if(GMX_TARGET_X86)
- set(_compile_definitions "${_compile_definitions} -DGMX_TARGET_X86")
- endif()
- try_run(GMX_CPUID_RUN_VENDOR GMX_CPUID_COMPILED
+ set(_compile_definitions "${GCC_INLINE_ASM_DEFINE} -I${CMAKE_SOURCE_DIR}/src -DGMX_CPUINFO_STANDALONE")
+ try_run(GMX_CPUINFO_RUN_VENDOR GMX_CPUINFO_COMPILED
${CMAKE_BINARY_DIR}
- ${CMAKE_SOURCE_DIR}/src/gromacs/gmxlib/gmx_cpuid.c
+ ${CMAKE_SOURCE_DIR}/src/gromacs/hardware/cpuinfo.cpp
COMPILE_DEFINITIONS ${_compile_definitions}
RUN_OUTPUT_VARIABLE OUTPUT_CPU_VENDOR ARGS "-vendor")
- try_run(GMX_CPUID_RUN_BRAND GMX_CPUID_COMPILED
+ try_run(GMX_CPUINFO_RUN_BRAND GMX_CPUINFO_COMPILED
${CMAKE_BINARY_DIR}
- ${CMAKE_SOURCE_DIR}/src/gromacs/gmxlib/gmx_cpuid.c
+ ${CMAKE_SOURCE_DIR}/src/gromacs/hardware/cpuinfo.cpp
COMPILE_DEFINITIONS ${_compile_definitions}
RUN_OUTPUT_VARIABLE OUTPUT_CPU_BRAND ARGS "-brand")
- try_run(GMX_CPUID_RUN_FAMILY GMX_CPUID_COMPILED
+ try_run(GMX_CPUINFO_RUN_FAMILY GMX_CPUINFO_COMPILED
${CMAKE_BINARY_DIR}
- ${CMAKE_SOURCE_DIR}/src/gromacs/gmxlib/gmx_cpuid.c
+ ${CMAKE_SOURCE_DIR}/src/gromacs/hardware/cpuinfo.cpp
COMPILE_DEFINITIONS ${_compile_definitions}
RUN_OUTPUT_VARIABLE OUTPUT_CPU_FAMILY ARGS "-family")
- try_run(GMX_CPUID_RUN_MODEL GMX_CPUID_COMPILED
+ try_run(GMX_CPUINFO_RUN_MODEL GMX_CPUINFO_COMPILED
${CMAKE_BINARY_DIR}
- ${CMAKE_SOURCE_DIR}/src/gromacs/gmxlib/gmx_cpuid.c
+ ${CMAKE_SOURCE_DIR}/src/gromacs/hardware/cpuinfo.cpp
COMPILE_DEFINITIONS ${_compile_definitions}
RUN_OUTPUT_VARIABLE OUTPUT_CPU_MODEL ARGS "-model")
- try_run(GMX_CPUID_RUN_STEPPING GMX_CPUID_COMPILED
+ try_run(GMX_CPUINFO_RUN_STEPPING GMX_CPUINFO_COMPILED
${CMAKE_BINARY_DIR}
- ${CMAKE_SOURCE_DIR}/src/gromacs/gmxlib/gmx_cpuid.c
+ ${CMAKE_SOURCE_DIR}/src/gromacs/hardware/cpuinfo.cpp
COMPILE_DEFINITIONS ${_compile_definitions}
RUN_OUTPUT_VARIABLE OUTPUT_CPU_STEPPING ARGS "-stepping")
- try_run(GMX_CPUID_RUN_FEATURES GMX_CPUID_COMPILED
+ try_run(GMX_CPUINFO_RUN_FEATURES GMX_CPUINFO_COMPILED
${CMAKE_BINARY_DIR}
- ${CMAKE_SOURCE_DIR}/src/gromacs/gmxlib/gmx_cpuid.c
+ ${CMAKE_SOURCE_DIR}/src/gromacs/hardware/cpuinfo.cpp
COMPILE_DEFINITIONS ${_compile_definitions}
RUN_OUTPUT_VARIABLE OUTPUT_CPU_FEATURES ARGS "-features")
unset(_compile_definitions)
string(STRIP "${OUTPUT_CPU_STEPPING}" OUTPUT_CPU_STEPPING)
string(STRIP "${OUTPUT_CPU_FEATURES}" OUTPUT_CPU_FEATURES)
- if(GMX_CPUID_RUN_VENDOR EQUAL 0)
+ if(GMX_CPUINFO_RUN_VENDOR EQUAL 0)
set(BUILD_CPU_VENDOR "${OUTPUT_CPU_VENDOR}" CACHE INTERNAL "Build CPU vendor")
else()
set(BUILD_CPU_VENDOR "Unknown, detect failed" CACHE INTERNAL "Build CPU vendor")
endif()
- if(GMX_CPUID_RUN_BRAND EQUAL 0)
+ if(GMX_CPUINFO_RUN_BRAND EQUAL 0)
set(BUILD_CPU_BRAND "${OUTPUT_CPU_BRAND}" CACHE INTERNAL "Build CPU brand")
else()
set(BUILD_CPU_BRAND "Unknown, detect failed" CACHE INTERNAL "Build CPU brand")
endif()
- if(GMX_CPUID_RUN_FAMILY EQUAL 0)
+ if(GMX_CPUINFO_RUN_FAMILY EQUAL 0)
set(BUILD_CPU_FAMILY "${OUTPUT_CPU_FAMILY}" CACHE INTERNAL "Build CPU family")
else()
set(BUILD_CPU_FAMILY "0" CACHE INTERNAL "Build CPU family")
endif()
- if(GMX_CPUID_RUN_MODEL EQUAL 0)
+ if(GMX_CPUINFO_RUN_MODEL EQUAL 0)
set(BUILD_CPU_MODEL "${OUTPUT_CPU_MODEL}" CACHE INTERNAL "Build CPU model")
else()
set(BUILD_CPU_MODEL "0" CACHE INTERNAL "Build CPU model")
endif()
- if(GMX_CPUID_RUN_STEPPING EQUAL 0)
+ if(GMX_CPUINFO_RUN_STEPPING EQUAL 0)
set(BUILD_CPU_STEPPING "${OUTPUT_CPU_STEPPING}" CACHE INTERNAL "Build CPU stepping")
else()
set(BUILD_CPU_STEPPING "0" CACHE INTERNAL "Build CPU stepping")
endif()
- if(GMX_CPUID_RUN_FEATURES EQUAL 0)
+ if(GMX_CPUINFO_RUN_FEATURES EQUAL 0)
set(BUILD_CPU_FEATURES "${OUTPUT_CPU_FEATURES}" CACHE INTERNAL "Build CPU features")
else()
set(BUILD_CPU_FEATURES "" CACHE INTERNAL "Build CPU features")
#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2009,2010,2012,2013,2014, by the GROMACS development team, led by
+# Copyright (c) 2009,2010,2012,2013,2014,2015, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
# top-level source directory and at http://www.gromacs.org.
MESSAGE(STATUS "Checking for GCC x86 inline asm")
TRY_COMPILE(${VARIABLE} "${CMAKE_BINARY_DIR}"
- "${CMAKE_SOURCE_DIR}/cmake/TestInlineASM_gcc_x86.c"
+ "${CMAKE_SOURCE_DIR}/cmake/TestInlineASM_gcc_x86.cpp"
OUTPUT_VARIABLE INLINE_ASM_COMPILE_OUTPUT)
if(${VARIABLE})
add_subdirectory(domdec)
add_subdirectory(ewald)
add_subdirectory(fft)
+add_subdirectory(hardware)
add_subdirectory(linearalgebra)
add_subdirectory(math)
add_subdirectory(mdrunutility)
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-/*! \cond */
-#include "gromacs/legacyheaders/gmx_cpuid.h"
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include <ctype.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifdef GMX_NATIVE_WINDOWS
-/* MSVC definition for __cpuid() */
- #ifdef _MSC_VER
- #include <intrin.h>
- #endif
-/* sysinfo functions */
- #include <windows.h>
-#endif
-#ifdef HAVE_SCHED_H
- #include <sched.h>
-#endif
-#ifdef HAVE_UNISTD_H
-/* sysconf() definition */
- #include <unistd.h>
-#endif
-
-
-/* For convenience, and to enable configure-time invocation, we keep all architectures
- * in a single file, but to avoid repeated ifdefs we set the overall architecture here.
- */
-#ifdef GMX_TARGET_X86
-/* OK, it is x86, but can we execute cpuid? */
-#if defined(GMX_X86_GCC_INLINE_ASM) || ( defined(_MSC_VER) && ( (_MSC_VER > 1500) || (_MSC_VER == 1500 & _MSC_FULL_VER >= 150030729)))
-# define GMX_CPUID_X86
-#endif
-#endif
-
-/* Global constant character strings corresponding to our enumerated types */
-const char *
-gmx_cpuid_vendor_string[GMX_CPUID_NVENDORS] =
-{
- "CannotDetect",
- "Unknown",
- "GenuineIntel",
- "AuthenticAMD",
- "Fujitsu",
- "IBM", /* Used on Power and BlueGene/Q */
- "ARM"
-};
-
-const char *
-gmx_cpuid_vendor_string_alternative[GMX_CPUID_NVENDORS] =
-{
- "CannotDetect",
- "Unknown",
- "GenuineIntel",
- "AuthenticAMD",
- "Fujitsu",
- "ibm", /* Used on Power and BlueGene/Q */
- "AArch64"
-};
-
-const char *
-gmx_cpuid_feature_string[GMX_CPUID_NFEATURES] =
-{
- "CannotDetect",
- "aes",
- "apic",
- "avx",
- "avx2",
- "avx512f",
- "avx512pf",
- "avx512er",
- "avx512cd",
- "clfsh",
- "cmov",
- "cx8",
- "cx16",
- "f16c",
- "fma",
- "fma4",
- "htt",
- "lahf_lm",
- "misalignsse",
- "mmx",
- "msr",
- "nonstop_tsc",
- "pcid",
- "pclmuldq",
- "pdcm",
- "pdpe1gb",
- "popcnt",
- "pse",
- "rdrnd",
- "rdtscp",
- "sse2",
- "sse3",
- "sse4a",
- "sse4.1",
- "sse4.2",
- "ssse3",
- "tdt",
- "x2apic",
- "xop",
- "arm_neon",
- "arm_neon_asimd",
- "QPX",
- "VMX",
- "VSX"
-};
-
-const char *
-gmx_cpuid_simd_string[GMX_CPUID_NSIMD] =
-{
- "CannotDetect",
- "None",
- "Reference",
- "SSE2",
- "SSE4.1",
- "AVX_128_FMA",
- "AVX_256",
- "AVX2_256",
- "AVX_512F",
- "AVX_512ER",
- "Sparc64 HPC-ACE",
- "IBM_QPX",
- "IBM_VMX",
- "IBM_VSX",
- "ARM_NEON",
- "ARM_NEON_ASIMD"
-};
-
-/* Max length of brand string */
-#define GMX_CPUID_STRLEN 256
-
-
-/* Contents of the abstract datatype */
-struct gmx_cpuid
-{
- enum gmx_cpuid_vendor vendor;
- char brand[GMX_CPUID_STRLEN];
- int family;
- int model;
- int stepping;
- char feature[GMX_CPUID_NFEATURES];
-
- /* Basic CPU topology information. For x86 this is a bit complicated since the topology differs between
- * operating systems and sometimes even settings. For most other architectures you can likely just check
- * the documentation and then write static information to these arrays rather than detecting on-the-fly.
- */
- int have_cpu_topology;
- int nproc; /* total number of logical processors from OS */
- int npackages;
- int ncores_per_package;
- int nhwthreads_per_core;
- int * package_id;
- int * core_id; /* Local core id in each package */
- int * hwthread_id; /* Local hwthread id in each core */
- int * locality_order; /* Processor indices sorted in locality order */
-};
-
-
-/* Simple routines to access the data structure. The initialization routine is
- * further down since that needs to call other static routines in this file.
- */
-enum gmx_cpuid_vendor
-gmx_cpuid_vendor (gmx_cpuid_t cpuid)
-{
- return cpuid->vendor;
-}
-
-
-const char *
-gmx_cpuid_brand (gmx_cpuid_t cpuid)
-{
- return cpuid->brand;
-}
-
-int
-gmx_cpuid_family (gmx_cpuid_t cpuid)
-{
- return cpuid->family;
-}
-
-int
-gmx_cpuid_model (gmx_cpuid_t cpuid)
-{
- return cpuid->model;
-}
-
-int
-gmx_cpuid_stepping (gmx_cpuid_t cpuid)
-{
- return cpuid->stepping;
-}
-
-int
-gmx_cpuid_feature (gmx_cpuid_t cpuid,
- enum gmx_cpuid_feature feature)
-{
- return (cpuid->feature[feature] != 0);
-}
-
-
-int
-gmx_cpuid_is_intel_nehalem (const gmx_cpuid_t cpuid)
-{
- return (cpuid->vendor == GMX_CPUID_VENDOR_INTEL &&
- cpuid->family == 6 &&
- (cpuid->model == 0x2E ||
- cpuid->model == 0x1A ||
- cpuid->model == 0x1E ||
- cpuid->model == 0x2F ||
- cpuid->model == 0x2C ||
- cpuid->model == 0x25));
-}
-
-
-/* What type of SIMD was compiled in, if any? */
-#if GMX_SIMD_X86_AVX_512ER
-static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_512ER;
-#elif GMX_SIMD_X86_AVX_512F
-static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_512F;
-#elif GMX_SIMD_X86_AVX2_256
-static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX2_256;
-#elif GMX_SIMD_X86_AVX_256
-static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_256;
-#elif GMX_SIMD_X86_AVX_128_FMA
-static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_128_FMA;
-#elif GMX_SIMD_X86_SSE4_1
-static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_SSE4_1;
-#elif GMX_SIMD_X86_SSE2
-static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_SSE2;
-#elif GMX_SIMD_ARM_NEON
-static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_ARM_NEON;
-#elif GMX_SIMD_ARM_NEON_ASIMD
-static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_ARM_NEON_ASIMD;
-#elif GMX_SIMD_SPARC64_HPC_ACE
-static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_SPARC64_HPC_ACE;
-#elif GMX_SIMD_IBM_QPX
-static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_QPX;
-#elif GMX_SIMD_IBM_VMX
-static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_VMX;
-#elif GMX_SIMD_IBM_VSX
-static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_VSX;
-#elif GMX_SIMD_REFERENCE
-static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_REFERENCE;
-#else
-static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_NONE;
-#endif
-
-
-enum gmx_cpuid_simd
-gmx_compiled_simd()
-{
- return compiled_simd;
-}
-
-
-#ifdef GMX_CPUID_X86
-
-/* Execute CPUID on x86 class CPUs. level sets function to exec, and the
- * contents of register output is returned. See Intel/AMD docs for details.
- *
- * This version supports extended information where we can also have an input
- * value in the ecx register. This is ignored for most levels, but some of them
- * (e.g. level 0xB on Intel) use it.
- */
-static int
-execute_x86cpuid(unsigned int level,
- unsigned int ecxval,
- unsigned int * eax,
- unsigned int * ebx,
- unsigned int * ecx,
- unsigned int * edx)
-{
- int rc = 0;
-
- /* Currently CPUID is only supported (1) if we can use an instruction on MSVC, or (2)
- * if the compiler handles GNU-style inline assembly.
- */
-
-#if (defined _MSC_VER)
- int CPUInfo[4];
-
-#if (_MSC_VER > 1500) || (_MSC_VER == 1500 & _MSC_FULL_VER >= 150030729)
- /* MSVC 9.0 SP1 or later */
- __cpuidex(CPUInfo, level, ecxval);
- rc = 0;
-#else
- __cpuid(CPUInfo, level);
- /* Set an error code if the user wanted a non-zero ecxval, since we did not have cpuidex */
- rc = (ecxval > 0) ? -1 : 0;
-#endif
- *eax = CPUInfo[0];
- *ebx = CPUInfo[1];
- *ecx = CPUInfo[2];
- *edx = CPUInfo[3];
-
-#elif (defined GMX_X86_GCC_INLINE_ASM)
- /* for now this means GMX_X86_GCC_INLINE_ASM should be defined,
- * but there might be more options added in the future.
- */
- *eax = level;
- *ecx = ecxval;
- *ebx = 0;
- *edx = 0;
-#if defined(__i386__) && defined(__PIC__)
- /* Avoid clobbering the global offset table in 32-bit pic code (ebx register) */
- __asm__ __volatile__ ("xchgl %%ebx, %1 \n\t"
- "cpuid \n\t"
- "xchgl %%ebx, %1 \n\t"
- : "+a" (*eax), "+r" (*ebx), "+c" (*ecx), "+d" (*edx));
-#else
- /* i386 without PIC, or x86-64. Things are easy and we can clobber any reg we want :-) */
- __asm__ __volatile__ ("cpuid \n\t"
- : "+a" (*eax), "+b" (*ebx), "+c" (*ecx), "+d" (*edx));
-#endif
- rc = 0;
-#else
- /* Death and horror!
- * Apparently this is an x86 platform where we don't know how to call cpuid.
- *
- * This is REALLY bad, since we will lose all Gromacs SIMD support.
- */
- *eax = 0;
- *ebx = 0;
- *ecx = 0;
- *edx = 0;
-
- rc = -1;
-#endif
- return rc;
-}
-
-
-/* Identify CPU features common to Intel & AMD - mainly brand string,
- * version and some features. Vendor has already been detected outside this.
- */
-static int
-cpuid_check_common_x86(gmx_cpuid_t cpuid)
-{
- int fn, max_stdfn, max_extfn;
- unsigned int eax, ebx, ecx, edx;
- char str[GMX_CPUID_STRLEN];
- char * p;
-
- /* Find largest standard/extended function input value */
- execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
- max_stdfn = eax;
- execute_x86cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx);
- max_extfn = eax;
-
- p = str;
- if (max_extfn >= 0x80000005)
- {
- /* Get CPU brand string */
- for (fn = 0x80000002; fn < 0x80000005; fn++)
- {
- execute_x86cpuid(fn, 0, &eax, &ebx, &ecx, &edx);
- memcpy(p, &eax, 4);
- memcpy(p+4, &ebx, 4);
- memcpy(p+8, &ecx, 4);
- memcpy(p+12, &edx, 4);
- p += 16;
- }
- *p = '\0';
-
- /* Remove empty initial space */
- p = str;
- while (isspace(*(p)))
- {
- p++;
- }
- strncpy(cpuid->brand, p, GMX_CPUID_STRLEN);
- }
- else
- {
- strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_STRLEN);
- }
-
- /* Find basic CPU properties */
- if (max_stdfn >= 1)
- {
- execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
-
- cpuid->family = ((eax & 0x0FF00000) >> 20) + ((eax & 0x00000F00) >> 8);
- /* Note that extended model should be shifted left 4, so only shift right 12 iso 16. */
- cpuid->model = ((eax & 0x000F0000) >> 12) + ((eax & 0x000000F0) >> 4);
- cpuid->stepping = (eax & 0x0000000F);
-
- /* Feature flags common to AMD and intel */
- cpuid->feature[GMX_CPUID_FEATURE_X86_SSE3] = (ecx & (1 << 0)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_PCLMULDQ] = (ecx & (1 << 1)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_SSSE3] = (ecx & (1 << 9)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_FMA] = (ecx & (1 << 12)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_CX16] = (ecx & (1 << 13)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4_1] = (ecx & (1 << 19)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4_2] = (ecx & (1 << 20)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_POPCNT] = (ecx & (1 << 23)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_AES] = (ecx & (1 << 25)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_AVX] = (ecx & (1 << 28)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_F16C] = (ecx & (1 << 29)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_RDRND] = (ecx & (1 << 30)) != 0;
-
- cpuid->feature[GMX_CPUID_FEATURE_X86_PSE] = (edx & (1 << 3)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_MSR] = (edx & (1 << 5)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_CX8] = (edx & (1 << 8)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_APIC] = (edx & (1 << 9)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_CMOV] = (edx & (1 << 15)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_CLFSH] = (edx & (1 << 19)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_MMX] = (edx & (1 << 23)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_SSE2] = (edx & (1 << 26)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] = (edx & (1 << 28)) != 0;
- }
- else
- {
- cpuid->family = -1;
- cpuid->model = -1;
- cpuid->stepping = -1;
- }
-
- if (max_extfn >= 0x80000001)
- {
- execute_x86cpuid(0x80000001, 0, &eax, &ebx, &ecx, &edx);
- cpuid->feature[GMX_CPUID_FEATURE_X86_LAHF_LM] = (ecx & (1 << 0)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_PDPE1GB] = (edx & (1 << 26)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_RDTSCP] = (edx & (1 << 27)) != 0;
- }
-
- if (max_extfn >= 0x80000007)
- {
- execute_x86cpuid(0x80000007, 0, &eax, &ebx, &ecx, &edx);
- cpuid->feature[GMX_CPUID_FEATURE_X86_NONSTOP_TSC] = (edx & (1 << 8)) != 0;
- }
- return 0;
-}
-
-/* This routine returns the number of unique different elements found in the array,
- * and renumbers these starting from 0. For example, the array {0,1,2,8,9,10,8,9,10,0,1,2}
- * will be rewritten to {0,1,2,3,4,5,3,4,5,0,1,2}, and it returns 6 for the
- * number of unique elements.
- */
-static int
-cpuid_renumber_elements(int *data, int n)
-{
- int *unique;
- int i, j, nunique, found;
-
- unique = malloc(sizeof(int)*n);
-
- nunique = 0;
- for (i = 0; i < n; i++)
- {
- for (j = 0, found = 0; j < nunique && !found; j++)
- {
- found = (data[i] == unique[j]);
- }
- if (!found)
- {
- /* Insert in sorted order! */
- for (j = nunique++; j > 0 && unique[j-1] > data[i]; j--)
- {
- unique[j] = unique[j-1];
- }
- unique[j] = data[i];
- }
- }
- /* renumber */
- for (i = 0; i < n; i++)
- {
- for (j = 0; j < nunique; j++)
- {
- if (data[i] == unique[j])
- {
- data[i] = j;
- }
- }
- }
- free(unique);
- return nunique;
-}
-
-/* APIC IDs, or everything you wanted to know about your x86 cores but were afraid to ask...
- *
- * Raw APIC IDs are unfortunately somewhat dirty. For technical reasons they are assigned
- * in power-of-2 chunks, and even then there are no guarantees about specific numbers - all
- * we know is that the part for each thread/core/package is unique, and how many bits are
- * reserved for that part.
- * This routine does internal renumbering so we get continuous indices, and also
- * decodes the actual number of packages,cores-per-package and hwthreads-per-core.
- * Returns: 0 on success, non-zero on failure.
- */
-static int
-cpuid_x86_decode_apic_id(gmx_cpuid_t cpuid, int *apic_id, int core_bits, int hwthread_bits)
-{
- int i, idx;
- int hwthread_mask, core_mask_after_shift;
-
- cpuid->hwthread_id = malloc(sizeof(int)*cpuid->nproc);
- cpuid->core_id = malloc(sizeof(int)*cpuid->nproc);
- cpuid->package_id = malloc(sizeof(int)*cpuid->nproc);
- cpuid->locality_order = malloc(sizeof(int)*cpuid->nproc);
-
- hwthread_mask = (1 << hwthread_bits) - 1;
- core_mask_after_shift = (1 << core_bits) - 1;
-
- for (i = 0; i < cpuid->nproc; i++)
- {
- cpuid->hwthread_id[i] = apic_id[i] & hwthread_mask;
- cpuid->core_id[i] = (apic_id[i] >> hwthread_bits) & core_mask_after_shift;
- cpuid->package_id[i] = apic_id[i] >> (core_bits + hwthread_bits);
- }
-
- cpuid->npackages = cpuid_renumber_elements(cpuid->package_id, cpuid->nproc);
- cpuid->ncores_per_package = cpuid_renumber_elements(cpuid->core_id, cpuid->nproc);
- cpuid->nhwthreads_per_core = cpuid_renumber_elements(cpuid->hwthread_id, cpuid->nproc);
-
- /* now check for consistency */
- if ( (cpuid->npackages * cpuid->ncores_per_package *
- cpuid->nhwthreads_per_core) != cpuid->nproc)
- {
- /* the packages/cores-per-package/hwthreads-per-core counts are
- inconsistent. */
- return -1;
- }
-
- /* Create a locality order array, i.e. first all resources in package0, which in turn
- * are sorted so we first have all resources in core0, where threads are sorted in order, etc.
- */
-
- for (i = 0; i < cpuid->nproc; i++)
- {
- idx = (cpuid->package_id[i]*cpuid->ncores_per_package + cpuid->core_id[i])*cpuid->nhwthreads_per_core + cpuid->hwthread_id[i];
- cpuid->locality_order[idx] = i;
- }
- return 0;
-}
-
-
-/* Detection of AMD-specific CPU features */
-static int
-cpuid_check_amd_x86(gmx_cpuid_t cpuid)
-{
- int max_stdfn, max_extfn, ret;
- unsigned int eax, ebx, ecx, edx;
- int hwthread_bits, core_bits;
- int * apic_id;
-
- cpuid_check_common_x86(cpuid);
-
- execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
- max_stdfn = eax;
-
- execute_x86cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx);
- max_extfn = eax;
-
- if (max_extfn >= 0x80000001)
- {
- execute_x86cpuid(0x80000001, 0, &eax, &ebx, &ecx, &edx);
-
- cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4A] = (ecx & (1 << 6)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_MISALIGNSSE] = (ecx & (1 << 7)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_XOP] = (ecx & (1 << 11)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_FMA4] = (ecx & (1 << 16)) != 0;
- }
-
- /* Query APIC information on AMD */
- if (max_extfn >= 0x80000008)
- {
-#if (defined HAVE_SCHED_AFFINITY && defined HAVE_SYSCONF && defined __linux__)
- /* Linux */
- unsigned int i;
- cpu_set_t cpuset, save_cpuset;
- cpuid->nproc = sysconf(_SC_NPROCESSORS_ONLN);
- apic_id = malloc(sizeof(int)*cpuid->nproc);
- sched_getaffinity(0, sizeof(cpu_set_t), &save_cpuset);
- /* Get APIC id from each core */
- CPU_ZERO(&cpuset);
- for (i = 0; i < cpuid->nproc; i++)
- {
- CPU_SET(i, &cpuset);
- sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
- execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
- apic_id[i] = ebx >> 24;
- CPU_CLR(i, &cpuset);
- }
- /* Reset affinity to the value it had when calling this routine */
- sched_setaffinity(0, sizeof(cpu_set_t), &save_cpuset);
-#define CPUID_HAVE_APIC
-#elif defined GMX_NATIVE_WINDOWS
- /* Windows */
- DWORD_PTR i;
- SYSTEM_INFO sysinfo;
- unsigned int save_affinity, affinity;
- GetSystemInfo( &sysinfo );
- cpuid->nproc = sysinfo.dwNumberOfProcessors;
- apic_id = malloc(sizeof(int)*cpuid->nproc);
- /* Get previous affinity mask */
- save_affinity = SetThreadAffinityMask(GetCurrentThread(), 1);
- for (i = 0; i < cpuid->nproc; i++)
- {
- SetThreadAffinityMask(GetCurrentThread(), (((DWORD_PTR)1)<<i));
- Sleep(0);
- execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
- apic_id[i] = ebx >> 24;
- }
- SetThreadAffinityMask(GetCurrentThread(), save_affinity);
-#define CPUID_HAVE_APIC
-#endif
-#ifdef CPUID_HAVE_APIC
- /* AMD does not support SMT yet - there are no hwthread bits in apic ID */
- hwthread_bits = 0;
- /* Get number of core bits in apic ID - try modern extended method first */
- execute_x86cpuid(0x80000008, 0, &eax, &ebx, &ecx, &edx);
- core_bits = (ecx >> 12) & 0xf;
- if (core_bits == 0)
- {
- /* Legacy method for old single/dual core AMD CPUs */
- int i = ecx & 0xF;
- for (core_bits = 0; (i>>core_bits) > 0; core_bits++)
- {
- ;
- }
- }
- ret = cpuid_x86_decode_apic_id(cpuid, apic_id, core_bits,
- hwthread_bits);
- cpuid->have_cpu_topology = (ret == 0);
-#endif
- }
- return 0;
-}
-
-/* Detection of Intel-specific CPU features */
-static int
-cpuid_check_intel_x86(gmx_cpuid_t cpuid)
-{
- unsigned int max_stdfn, max_extfn, ret;
- unsigned int eax, ebx, ecx, edx;
- unsigned int max_logical_cores, max_physical_cores;
- int hwthread_bits, core_bits;
- int * apic_id;
-
- cpuid_check_common_x86(cpuid);
-
- execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
- max_stdfn = eax;
-
- execute_x86cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx);
- max_extfn = eax;
-
- if (max_stdfn >= 1)
- {
- execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
- cpuid->feature[GMX_CPUID_FEATURE_X86_PDCM] = (ecx & (1 << 15)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_PCID] = (ecx & (1 << 17)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_X2APIC] = (ecx & (1 << 21)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_TDT] = (ecx & (1 << 24)) != 0;
- }
-
- if (max_stdfn >= 7)
- {
- execute_x86cpuid(0x7, 0, &eax, &ebx, &ecx, &edx);
- cpuid->feature[GMX_CPUID_FEATURE_X86_AVX2] = (ebx & (1 << 5)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512F] = (ebx & (1 << 16)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512PF] = (ebx & (1 << 26)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512ER] = (ebx & (1 << 27)) != 0;
- cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512CD] = (ebx & (1 << 28)) != 0;
- }
-
- /* Check whether Hyper-Threading is enabled, not only supported */
- if (cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] && max_stdfn >= 4)
- {
- execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
- max_logical_cores = (ebx >> 16) & 0x0FF;
- execute_x86cpuid(0x4, 0, &eax, &ebx, &ecx, &edx);
- max_physical_cores = ((eax >> 26) & 0x3F) + 1;
-
- /* Clear HTT flag if we only have 1 logical core per physical */
- if (max_logical_cores/max_physical_cores < 2)
- {
- cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] = 0;
- }
- }
-
- if (max_stdfn >= 0xB)
- {
- /* Query x2 APIC information from cores */
-#if (defined HAVE_SCHED_AFFINITY && defined HAVE_SYSCONF && defined __linux__)
- /* Linux */
- unsigned int i;
- cpu_set_t cpuset, save_cpuset;
- cpuid->nproc = sysconf(_SC_NPROCESSORS_ONLN);
- apic_id = malloc(sizeof(int)*cpuid->nproc);
- sched_getaffinity(0, sizeof(cpu_set_t), &save_cpuset);
- /* Get x2APIC ID from each hardware thread */
- CPU_ZERO(&cpuset);
- for (i = 0; i < cpuid->nproc; i++)
- {
- CPU_SET(i, &cpuset);
- sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
- execute_x86cpuid(0xB, 0, &eax, &ebx, &ecx, &edx);
- apic_id[i] = edx;
- CPU_CLR(i, &cpuset);
- }
- /* Reset affinity to the value it had when calling this routine */
- sched_setaffinity(0, sizeof(cpu_set_t), &save_cpuset);
-#define CPUID_HAVE_APIC
-#elif defined GMX_NATIVE_WINDOWS
- /* Windows */
- DWORD_PTR i;
- SYSTEM_INFO sysinfo;
- unsigned int save_affinity, affinity;
- GetSystemInfo( &sysinfo );
- cpuid->nproc = sysinfo.dwNumberOfProcessors;
- apic_id = malloc(sizeof(int)*cpuid->nproc);
- /* Get previous affinity mask */
- save_affinity = SetThreadAffinityMask(GetCurrentThread(), 1);
- for (i = 0; i < cpuid->nproc; i++)
- {
- SetThreadAffinityMask(GetCurrentThread(), (((DWORD_PTR)1)<<i));
- Sleep(0);
- execute_x86cpuid(0xB, 0, &eax, &ebx, &ecx, &edx);
- apic_id[i] = edx;
- }
- SetThreadAffinityMask(GetCurrentThread(), save_affinity);
-#define CPUID_HAVE_APIC
-#endif
-#ifdef CPUID_HAVE_APIC
- execute_x86cpuid(0xB, 0, &eax, &ebx, &ecx, &edx);
- hwthread_bits = eax & 0x1F;
- execute_x86cpuid(0xB, 1, &eax, &ebx, &ecx, &edx);
- core_bits = (eax & 0x1F) - hwthread_bits;
- ret = cpuid_x86_decode_apic_id(cpuid, apic_id, core_bits,
- hwthread_bits);
- cpuid->have_cpu_topology = (ret == 0);
-#endif
- }
- return 0;
-}
-#endif /* GMX_CPUID_X86 */
-
-
-
-static void
-chomp_substring_before_colon(const char *in, char *s, int maxlength)
-{
- char *p;
- strncpy(s, in, maxlength);
- p = strchr(s, ':');
- if (p != NULL)
- {
- *p = '\0';
- while (isspace(*(--p)) && (p >= s))
- {
- *p = '\0';
- }
- }
- else
- {
- *s = '\0';
- }
-}
-
-static void
-chomp_substring_after_colon(const char *in, char *s, int maxlength)
-{
- char *p;
- if ( (p = strchr(in, ':')) != NULL)
- {
- p++;
- while (isspace(*p))
- {
- p++;
- }
- strncpy(s, p, maxlength);
- p = s+strlen(s);
- while (isspace(*(--p)) && (p >= s))
- {
- *p = '\0';
- }
- }
- else
- {
- *s = '\0';
- }
-}
-
-static int
-cpuid_check_arm(gmx_cpuid_t cpuid)
-{
-#if defined(__linux__) || defined(__linux)
- FILE *fp;
- char buffer[GMX_CPUID_STRLEN], buffer2[GMX_CPUID_STRLEN], buffer3[GMX_CPUID_STRLEN];
-
- if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
- {
- while ( (fgets(buffer, sizeof(buffer), fp) != NULL))
- {
- chomp_substring_before_colon(buffer, buffer2, GMX_CPUID_STRLEN);
- chomp_substring_after_colon(buffer, buffer3, GMX_CPUID_STRLEN);
-
- if (!strcmp(buffer2, "Processor"))
- {
- strncpy(cpuid->brand, buffer3, GMX_CPUID_STRLEN);
- }
- else if (!strcmp(buffer2, "CPU architecture"))
- {
- cpuid->family = strtol(buffer3, NULL, 10);
- if (!strcmp(buffer3, "AArch64"))
- {
- cpuid->family = 8;
- }
- }
- else if (!strcmp(buffer2, "CPU part"))
- {
- cpuid->model = strtol(buffer3, NULL, 16);
- }
- else if (!strcmp(buffer2, "CPU revision"))
- {
- cpuid->stepping = strtol(buffer3, NULL, 10);
- }
- else if (!strcmp(buffer2, "Features") && strstr(buffer3, "neon"))
- {
- cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON] = 1;
- }
- else if (!strcmp(buffer2, "Features") && strstr(buffer3, "asimd"))
- {
- cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON_ASIMD] = 1;
- }
- }
- }
- fclose(fp);
-#else
-# ifdef __aarch64__
- /* Strange 64-bit non-linux platform. However, since NEON ASIMD is present on all
- * implementations of AArch64 this far, we assume it is present for now.
- */
- cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON_ASIMD] = 1;
-# else
- /* Strange 32-bit non-linux platform. We cannot assume that neon is present. */
- cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON] = 0;
-# endif
-#endif
- return 0;
-}
-
-
-static int
-cpuid_check_ibm(gmx_cpuid_t cpuid)
-{
-#if defined(__linux__) || defined(__linux)
- FILE *fp;
- char buffer[GMX_CPUID_STRLEN], before_colon[GMX_CPUID_STRLEN], after_colon[GMX_CPUID_STRLEN];
-
- if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
- {
- while ( (fgets(buffer, sizeof(buffer), fp) != NULL))
- {
- chomp_substring_before_colon(buffer, before_colon, GMX_CPUID_STRLEN);
- chomp_substring_after_colon(buffer, after_colon, GMX_CPUID_STRLEN);
-
- if (!strcmp(before_colon, "cpu") || !strcmp(before_colon, "Processor"))
- {
- strncpy(cpuid->brand, after_colon, GMX_CPUID_STRLEN);
- }
- if (!strcmp(before_colon, "model name") ||
- !strcmp(before_colon, "model") ||
- !strcmp(before_colon, "Processor") ||
- !strcmp(before_colon, "cpu"))
- {
- if (strstr(after_colon, "altivec"))
- {
- cpuid->feature[GMX_CPUID_FEATURE_IBM_VMX] = 1;
-
- if (!strstr(after_colon, "POWER6") && !strstr(after_colon, "Power6") &&
- !strstr(after_colon, "power6"))
- {
- cpuid->feature[GMX_CPUID_FEATURE_IBM_VSX] = 1;
- }
- }
- }
- }
- }
- fclose(fp);
-
- if (strstr(cpuid->brand, "A2"))
- {
- /* BlueGene/Q */
- cpuid->feature[GMX_CPUID_FEATURE_IBM_QPX] = 1;
- }
-#else
- strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_STRLEN);
- cpuid->feature[GMX_CPUID_FEATURE_IBM_QPX] = 0;
- cpuid->feature[GMX_CPUID_FEATURE_IBM_VMX] = 0;
- cpuid->feature[GMX_CPUID_FEATURE_IBM_VSX] = 0;
-#endif
- return 0;
-}
-
-
-/* Try to find the vendor of the current CPU, so we know what specific
- * detection routine to call.
- */
-static enum gmx_cpuid_vendor
-cpuid_check_vendor(void)
-{
- enum gmx_cpuid_vendor i, vendor;
- /* Register data used on x86 */
- unsigned int eax, ebx, ecx, edx;
- char vendorstring[13];
- FILE * fp;
- char buffer[GMX_CPUID_STRLEN];
- char before_colon[GMX_CPUID_STRLEN];
- char after_colon[GMX_CPUID_STRLEN];
-
- /* Set default first */
- vendor = GMX_CPUID_VENDOR_UNKNOWN;
-
-#ifdef GMX_CPUID_X86
- execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
-
- memcpy(vendorstring, &ebx, 4);
- memcpy(vendorstring+4, &edx, 4);
- memcpy(vendorstring+8, &ecx, 4);
-
- vendorstring[12] = '\0';
-
- for (i = GMX_CPUID_VENDOR_UNKNOWN; i < GMX_CPUID_NVENDORS; i++)
- {
- if (!strncmp(vendorstring, gmx_cpuid_vendor_string[i], 12))
- {
- vendor = i;
- }
- }
-#elif defined(__linux__) || defined(__linux)
- /* General Linux. Try to get CPU vendor from /proc/cpuinfo */
- if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
- {
- while ( (vendor == GMX_CPUID_VENDOR_UNKNOWN) && (fgets(buffer, sizeof(buffer), fp) != NULL))
- {
- chomp_substring_before_colon(buffer, before_colon, sizeof(before_colon));
- /* Intel/AMD use "vendor_id", IBM "vendor", "model", or "cpu". Fujitsu "manufacture".
- * On ARM there does not seem to be a vendor, but ARM or AArch64 is listed in the Processor string.
- * Add others if you have them!
- */
- if (!strcmp(before_colon, "vendor_id")
- || !strcmp(before_colon, "vendor")
- || !strcmp(before_colon, "manufacture")
- || !strcmp(before_colon, "model")
- || !strcmp(before_colon, "Processor")
- || !strcmp(before_colon, "cpu"))
- {
- chomp_substring_after_colon(buffer, after_colon, sizeof(after_colon));
- for (i = GMX_CPUID_VENDOR_UNKNOWN; i < GMX_CPUID_NVENDORS; i++)
- {
- /* Be liberal and accept if we find the vendor
- * string (or alternative string) anywhere. Using
- * strcasestr() would be non-portable. */
- if (strstr(after_colon, gmx_cpuid_vendor_string[i])
- || strstr(after_colon, gmx_cpuid_vendor_string_alternative[i]))
- {
- vendor = i;
- }
- }
- /* If we did not find vendor yet, check if it is IBM:
- * On some Power/PowerPC systems it only says power, not IBM.
- */
- if (vendor == GMX_CPUID_VENDOR_UNKNOWN &&
- ((strstr(after_colon, "POWER") || strstr(after_colon, "Power") ||
- strstr(after_colon, "power"))))
- {
- vendor = GMX_CPUID_VENDOR_IBM;
- }
- }
- }
- }
- fclose(fp);
-#elif defined(__arm__) || defined (__arm) || defined(__aarch64__)
- /* If we are using ARM on something that is not linux we have to trust the compiler,
- * and we cannot get the extra info that might be present in /proc/cpuinfo.
- */
- vendor = GMX_CPUID_VENDOR_ARM;
-#endif
- return vendor;
-}
-
-
-
-int
-gmx_cpuid_topology(gmx_cpuid_t cpuid,
- int * nprocessors,
- int * npackages,
- int * ncores_per_package,
- int * nhwthreads_per_core,
- const int ** package_id,
- const int ** core_id,
- const int ** hwthread_id,
- const int ** locality_order)
-{
- int rc;
-
- if (cpuid->have_cpu_topology)
- {
- *nprocessors = cpuid->nproc;
- *npackages = cpuid->npackages;
- *ncores_per_package = cpuid->ncores_per_package;
- *nhwthreads_per_core = cpuid->nhwthreads_per_core;
- *package_id = cpuid->package_id;
- *core_id = cpuid->core_id;
- *hwthread_id = cpuid->hwthread_id;
- *locality_order = cpuid->locality_order;
- rc = 0;
- }
- else
- {
- rc = -1;
- }
- return rc;
-}
-
-
-enum gmx_cpuid_x86_smt
-gmx_cpuid_x86_smt(gmx_cpuid_t cpuid)
-{
- enum gmx_cpuid_x86_smt rc;
-
- if (cpuid->have_cpu_topology)
- {
- rc = (cpuid->nhwthreads_per_core > 1) ? GMX_CPUID_X86_SMT_ENABLED : GMX_CPUID_X86_SMT_DISABLED;
- }
- else if (cpuid->vendor == GMX_CPUID_VENDOR_AMD || gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_HTT) == 0)
- {
- rc = GMX_CPUID_X86_SMT_DISABLED;
- }
- else
- {
- rc = GMX_CPUID_X86_SMT_CANNOTDETECT;
- }
- return rc;
-}
-
-
-int
-gmx_cpuid_init (gmx_cpuid_t * pcpuid)
-{
- gmx_cpuid_t cpuid;
- int i;
- FILE * fp;
- char buffer[GMX_CPUID_STRLEN], buffer2[GMX_CPUID_STRLEN];
- int found_brand;
-
- cpuid = malloc(sizeof(*cpuid));
-
- *pcpuid = cpuid;
-
- for (i = 0; i < GMX_CPUID_NFEATURES; i++)
- {
- cpuid->feature[i] = 0;
- }
-
- cpuid->have_cpu_topology = 0;
- cpuid->nproc = 0;
- cpuid->npackages = 0;
- cpuid->ncores_per_package = 0;
- cpuid->nhwthreads_per_core = 0;
- cpuid->package_id = NULL;
- cpuid->core_id = NULL;
- cpuid->hwthread_id = NULL;
- cpuid->locality_order = NULL;
-
- cpuid->vendor = cpuid_check_vendor();
-
- switch (cpuid->vendor)
- {
-#ifdef GMX_CPUID_X86
- case GMX_CPUID_VENDOR_INTEL:
- cpuid_check_intel_x86(cpuid);
- break;
- case GMX_CPUID_VENDOR_AMD:
- cpuid_check_amd_x86(cpuid);
- break;
-#endif
- case GMX_CPUID_VENDOR_ARM:
- cpuid_check_arm(cpuid);
- break;
- case GMX_CPUID_VENDOR_IBM:
- cpuid_check_ibm(cpuid);
- break;
- default:
- /* Default value */
- strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_STRLEN);
-#if defined(__linux__) || defined(__linux)
- /* General Linux. Try to get CPU type from /proc/cpuinfo */
- if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
- {
- found_brand = 0;
- while ( (found_brand == 0) && (fgets(buffer, sizeof(buffer), fp) != NULL))
- {
- chomp_substring_before_colon(buffer, buffer2, sizeof(buffer2));
- /* Intel uses "model name", Fujitsu and IBM "cpu". */
- if (!strcmp(buffer2, "model name") || !strcmp(buffer2, "cpu"))
- {
- chomp_substring_after_colon(buffer, cpuid->brand, GMX_CPUID_STRLEN);
- found_brand = 1;
- }
- }
- }
- fclose(fp);
-#endif
- cpuid->family = 0;
- cpuid->model = 0;
- cpuid->stepping = 0;
-
- for (i = 0; i < GMX_CPUID_NFEATURES; i++)
- {
- cpuid->feature[i] = 0;
- }
- cpuid->feature[GMX_CPUID_FEATURE_CANNOTDETECT] = 1;
- break;
- }
- return 0;
-}
-
-
-
-void
-gmx_cpuid_done (gmx_cpuid_t cpuid)
-{
- free(cpuid);
-}
-
-
-int
-gmx_cpuid_formatstring (gmx_cpuid_t cpuid,
- char * str,
- int n)
-{
- int c;
- int i;
- enum gmx_cpuid_feature feature;
-
-#ifdef _MSC_VER
- _snprintf(str, n,
- " Vendor: %s\n"
- " Brand: %s\n"
- " Family: %2d model: %2d stepping: %2d\n"
- " CPU features:",
- gmx_cpuid_vendor_string[gmx_cpuid_vendor(cpuid)],
- gmx_cpuid_brand(cpuid),
- gmx_cpuid_family(cpuid), gmx_cpuid_model(cpuid), gmx_cpuid_stepping(cpuid));
-#else
- snprintf(str, n,
- " Vendor: %s\n"
- " Brand: %s\n"
- " Family: %2d model: %2d stepping: %2d\n"
- " CPU features:",
- gmx_cpuid_vendor_string[gmx_cpuid_vendor(cpuid)],
- gmx_cpuid_brand(cpuid),
- gmx_cpuid_family(cpuid), gmx_cpuid_model(cpuid), gmx_cpuid_stepping(cpuid));
-#endif
-
- str[n-1] = '\0';
- c = strlen(str);
- n -= c;
- str += c;
-
- for (feature = GMX_CPUID_FEATURE_CANNOTDETECT; feature < GMX_CPUID_NFEATURES; feature++)
- {
- if (gmx_cpuid_feature(cpuid, feature) == 1)
- {
-#ifdef _MSC_VER
- _snprintf(str, n, " %s", gmx_cpuid_feature_string[feature]);
-#else
- snprintf(str, n, " %s", gmx_cpuid_feature_string[feature]);
-#endif
- str[n-1] = '\0';
- c = strlen(str);
- n -= c;
- str += c;
- }
- }
-#ifdef _MSC_VER
- _snprintf(str, n, "\n");
-#else
- snprintf(str, n, "\n");
-#endif
- str[n-1] = '\0';
-
- return 0;
-}
-
-
-
-enum gmx_cpuid_simd
-gmx_cpuid_simd_suggest (gmx_cpuid_t cpuid)
-{
- enum gmx_cpuid_simd tmpsimd;
-
- tmpsimd = GMX_CPUID_SIMD_NONE;
-
- if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_INTEL)
- {
- /* TODO: Add check for AVX-512F & AVX-512ER here as soon as we
- * have implemented verlet kernels for them. Until then,
- * we should pick AVX2 instead for the automatic detection.
- */
- if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX2))
- {
- tmpsimd = GMX_CPUID_SIMD_X86_AVX2_256;
- }
- else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX))
- {
- tmpsimd = GMX_CPUID_SIMD_X86_AVX_256;
- }
- else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE4_1))
- {
- tmpsimd = GMX_CPUID_SIMD_X86_SSE4_1;
- }
- else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE2))
- {
- tmpsimd = GMX_CPUID_SIMD_X86_SSE2;
- }
- }
- else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_AMD)
- {
- if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX))
- {
- tmpsimd = GMX_CPUID_SIMD_X86_AVX_128_FMA;
- }
- else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE4_1))
- {
- tmpsimd = GMX_CPUID_SIMD_X86_SSE4_1;
- }
- else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE2))
- {
- tmpsimd = GMX_CPUID_SIMD_X86_SSE2;
- }
- }
- else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_FUJITSU)
- {
- if (strstr(gmx_cpuid_brand(cpuid), "SPARC64"))
- {
- tmpsimd = GMX_CPUID_SIMD_SPARC64_HPC_ACE;
- }
- }
- else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_IBM)
- {
- if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_IBM_QPX))
- {
- tmpsimd = GMX_CPUID_SIMD_IBM_QPX;
- }
- else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_IBM_VSX))
- {
- /* VSX is better than VMX, so we check it first */
- tmpsimd = GMX_CPUID_SIMD_IBM_VSX;
- }
- else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_IBM_VMX))
- {
- tmpsimd = GMX_CPUID_SIMD_IBM_VMX;
- }
- }
- else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_ARM)
- {
- if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_ARM_NEON_ASIMD))
- {
- tmpsimd = GMX_CPUID_SIMD_ARM_NEON_ASIMD;
- }
- else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_ARM_NEON))
- {
- tmpsimd = GMX_CPUID_SIMD_ARM_NEON;
- }
- }
- return tmpsimd;
-}
-
-
-int
-gmx_cpuid_simd_check(enum gmx_cpuid_simd simd_suggest,
- FILE * log,
- int print_to_stderr)
-{
- int rc;
-
- rc = (simd_suggest != compiled_simd);
-
- if (rc != 0)
- {
- if (log != NULL)
- {
- fprintf(log, "\nBinary not matching hardware - you might be losing performance.\n"
- "SIMD instructions most likely to fit this hardware: %s\n"
- "SIMD instructions selected at GROMACS compile time: %s\n\n",
- gmx_cpuid_simd_string[simd_suggest],
- gmx_cpuid_simd_string[compiled_simd]);
- }
- if (print_to_stderr)
- {
- fprintf(stderr, "Compiled SIMD instructions: %s, GROMACS could use %s on this machine, which is better\n\n",
- gmx_cpuid_simd_string[compiled_simd],
- gmx_cpuid_simd_string[simd_suggest]);
- }
- }
- return rc;
-}
-
-
-#ifdef GMX_CPUID_STANDALONE
-/* Stand-alone program to enable queries of CPU features from Cmake.
- * Note that you need to check inline ASM capabilities before compiling and set
- * -DGMX_X86_GCC_INLINE_ASM for the cpuid instruction to work...
- */
-int
-main(int argc, char **argv)
-{
- gmx_cpuid_t cpuid;
- enum gmx_cpuid_simd simd;
- int i, cnt;
-
- if (argc < 2)
- {
- fprintf(stdout,
- "Usage:\n\n%s [flags]\n\n"
- "Available flags:\n"
- "-vendor Print CPU vendor.\n"
- "-brand Print CPU brand string.\n"
- "-family Print CPU family version.\n"
- "-model Print CPU model version.\n"
- "-stepping Print CPU stepping version.\n"
- "-features Print CPU feature flags.\n"
- "-simd Print suggested GROMACS SIMD instructions.\n",
- argv[0]);
- exit(0);
- }
-
- gmx_cpuid_init(&cpuid);
-
- if (!strncmp(argv[1], "-vendor", 3))
- {
- printf("%s\n", gmx_cpuid_vendor_string[cpuid->vendor]);
- }
- else if (!strncmp(argv[1], "-brand", 3))
- {
- printf("%s\n", cpuid->brand);
- }
- else if (!strncmp(argv[1], "-family", 3))
- {
- printf("%d\n", cpuid->family);
- }
- else if (!strncmp(argv[1], "-model", 3))
- {
- printf("%d\n", cpuid->model);
- }
- else if (!strncmp(argv[1], "-stepping", 3))
- {
- printf("%d\n", cpuid->stepping);
- }
- else if (!strncmp(argv[1], "-features", 3))
- {
- cnt = 0;
- for (i = 0; i < GMX_CPUID_NFEATURES; i++)
- {
- if (cpuid->feature[i] == 1)
- {
- if (cnt++ > 0)
- {
- printf(" ");
- }
- printf("%s", gmx_cpuid_feature_string[i]);
- }
- }
- printf("\n");
- }
- else if (!strncmp(argv[1], "-simd", 3))
- {
- simd = gmx_cpuid_simd_suggest(cpuid);
- fprintf(stdout, "%s\n", gmx_cpuid_simd_string[simd]);
- }
-
- gmx_cpuid_done(cpuid);
-
-
- return 0;
-}
-
-#endif
-
-/*! \endcond */
#include "gromacs/gmxlib/md_logging.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/gmxlib/gpu_utils/gpu_utils.h"
-#include "gromacs/legacyheaders/gmx_cpuid.h"
+#include "gromacs/hardware/cpuinfo.h"
+#include "gromacs/hardware/hardwaretopology.h"
#include "gromacs/legacyheaders/types/commrec.h"
#include "gromacs/legacyheaders/types/hw_info.h"
#include "gromacs/mdtypes/md_enums.h"
+#include "gromacs/simd/support.h"
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/basenetwork.h"
/* Give a suitable fatal error or warning if the build configuration
and runtime CPU do not match. */
static void
-check_use_of_rdtscp_on_this_cpu(FILE *fplog,
- const t_commrec *cr,
- const gmx_hw_info_t *hwinfo)
+check_use_of_rdtscp_on_this_cpu(FILE *fplog,
+ const t_commrec *cr,
+ const gmx::CpuInfo &cpuInfo)
{
- gmx_bool bCpuHasRdtscp, bBinaryUsesRdtscp;
#ifdef HAVE_RDTSCP
- bBinaryUsesRdtscp = TRUE;
+ bool binaryUsesRdtscp = TRUE;
#else
- bBinaryUsesRdtscp = FALSE;
+ bool binaryUsesRdtscp = FALSE;
#endif
- bCpuHasRdtscp = gmx_cpuid_feature(hwinfo->cpuid_info, GMX_CPUID_FEATURE_X86_RDTSCP);
-
const char *programName = gmx::getProgramContext().displayName();
- if (!bCpuHasRdtscp && bBinaryUsesRdtscp)
+ if (cpuInfo.supportLevel() < gmx::CpuInfo::SupportLevel::Features)
{
- gmx_fatal(FARGS, "The %s executable was compiled to use the rdtscp CPU instruction. "
- "However, this is not supported by the current hardware and continuing would lead to a crash. "
- "Please rebuild GROMACS with the GMX_USE_RDTSCP=OFF CMake option.",
- programName);
+ if (binaryUsesRdtscp)
+ {
+ md_print_warn(cr, fplog, "The %s executable was compiled to use the rdtscp CPU instruction. "
+ "We cannot detect the features of your current CPU, but will proceed anyway. "
+ "If you get a crash, rebuild GROMACS with the GMX_USE_RDTSCP=OFF CMake option.",
+ programName);
+ }
}
-
- if (bCpuHasRdtscp && !bBinaryUsesRdtscp)
+ else
{
- md_print_warn(cr, fplog, "The current CPU can measure timings more accurately than the code in\n"
- "%s was configured to use. This might affect your simulation\n"
- "speed as accurate timings are needed for load-balancing.\n"
- "Please consider rebuilding %s with the GMX_USE_RDTSCP=ON CMake option.\n",
- programName, programName);
+ bool cpuHasRdtscp = cpuInfo.feature(gmx::CpuInfo::Feature::X86_Rdtscp);
+
+ if (!cpuHasRdtscp && binaryUsesRdtscp)
+ {
+ gmx_fatal(FARGS, "The %s executable was compiled to use the rdtscp CPU instruction. "
+ "However, this is not supported by the current hardware and continuing would lead to a crash. "
+ "Please rebuild GROMACS with the GMX_USE_RDTSCP=OFF CMake option.",
+ programName);
+ }
+
+ if (cpuHasRdtscp && !binaryUsesRdtscp)
+ {
+ md_print_warn(cr, fplog, "The current CPU can measure timings more accurately than the code in\n"
+ "%s was configured to use. This might affect your simulation\n"
+ "speed as accurate timings are needed for load-balancing.\n"
+ "Please consider rebuilding %s with the GMX_USE_RDTSCP=ON CMake option.\n",
+ programName, programName);
+ }
}
}
return uniq_count;
}
-static int get_ncores(gmx_cpuid_t cpuid)
+static int get_ncores(const gmx::HardwareTopology &hwTop)
{
- int nprocessors, npackages, ncores_per_package, nhwthreads_per_core;
- const int *package_id, *core_id, *hwthread_id, *locality_order;
- int rc;
-
- rc = gmx_cpuid_topology(cpuid,
- &nprocessors, &npackages,
- &ncores_per_package, &nhwthreads_per_core,
- &package_id, &core_id,
- &hwthread_id, &locality_order);
-
- if (rc == 0)
+ if (hwTop.supportLevel() >= gmx::HardwareTopology::SupportLevel::None)
{
- return npackages*ncores_per_package;
+ return hwTop.machine().logicalProcessorCount;
}
else
{
- /* We don't have cpuid topology info, return 0 core count */
return 0;
}
}
#endif
}
-static void gmx_collect_hardware_mpi()
+static void gmx_collect_hardware_mpi(const gmx::CpuInfo &cpuInfo)
{
#ifdef GMX_LIB_MPI
int rank_id;
buf[0] = ncore;
buf[1] = nhwthread;
buf[2] = ngpu;
- buf[3] = gmx_cpuid_simd_suggest(hwinfo_g->cpuid_info);
+ buf[3] = static_cast<int>(gmx::simdSuggested(cpuInfo));
buf[4] = gpu_hash;
buf[5] = -buf[0];
buf[6] = -buf[1];
hwinfo_g->ngpu_compatible_tot = sum[3];
hwinfo_g->ngpu_compatible_min = -maxmin[7];
hwinfo_g->ngpu_compatible_max = maxmin[2];
- hwinfo_g->simd_suggest_min = static_cast<enum gmx_cpuid_simd>(-maxmin[8]);
- hwinfo_g->simd_suggest_max = static_cast<enum gmx_cpuid_simd>(maxmin[3]);
+ hwinfo_g->simd_suggest_min = -maxmin[8];
+ hwinfo_g->simd_suggest_max = maxmin[3];
hwinfo_g->bIdenticalGPUs = (maxmin[4] == -maxmin[9]);
#else
/* All ranks use the same pointer, protect it with a mutex */
hwinfo_g->ngpu_compatible_tot = hwinfo_g->gpu_info.n_dev_compatible;
hwinfo_g->ngpu_compatible_min = hwinfo_g->gpu_info.n_dev_compatible;
hwinfo_g->ngpu_compatible_max = hwinfo_g->gpu_info.n_dev_compatible;
- hwinfo_g->simd_suggest_min = gmx_cpuid_simd_suggest(hwinfo_g->cpuid_info);
- hwinfo_g->simd_suggest_max = gmx_cpuid_simd_suggest(hwinfo_g->cpuid_info);
+ hwinfo_g->simd_suggest_min = static_cast<int>(simdSuggested(cpuInfo));
+ hwinfo_g->simd_suggest_max = static_cast<int>(simdSuggested(cpuInfo));
hwinfo_g->bIdenticalGPUs = TRUE;
tMPI_Thread_mutex_unlock(&hw_info_lock);
#endif
{
snew(hwinfo_g, 1);
- /* detect CPUID info; no fuss, we don't detect system-wide
- * -- sloppy, but that's it for now */
- if (gmx_cpuid_init(&hwinfo_g->cpuid_info) != 0)
- {
- gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
- "CPUID detection failed!");
- }
+ hwinfo_g->pCpuInfo = reinterpret_cast<struct CxxObject *>(new gmx::CpuInfo(gmx::CpuInfo::detect()));
+
+ gmx::HardwareTopology * hwTop = new gmx::HardwareTopology(gmx::HardwareTopology::detect());
+ hwinfo_g->pHardwareTopology = reinterpret_cast<struct CxxObject *>(hwTop);
+
/* get the number of cores, will be 0 when not detected */
- hwinfo_g->ncore = get_ncores(hwinfo_g->cpuid_info);
+ hwinfo_g->ncore = get_ncores(*hwTop);
/* detect number of hardware threads */
hwinfo_g->nthreads_hw_avail = get_nthreads_hw_avail(fplog, cr);
gmx_fatal(FARGS, "Error unlocking hwinfo mutex: %s", strerror(errno));
}
- gmx_collect_hardware_mpi();
+ gmx_collect_hardware_mpi(*reinterpret_cast<gmx::CpuInfo *>(hwinfo_g->pCpuInfo));
return hwinfo_g;
}
static std::string detected_hardware_string(const gmx_hw_info_t *hwinfo,
bool bFullCpuInfo)
{
- std::string s;
+ std::string s;
+
+ const gmx::CpuInfo &cpuInfo = *reinterpret_cast<gmx::CpuInfo *>(hwinfo_g->pCpuInfo);
s = gmx::formatString("\n");
s += gmx::formatString("Running on %d node%s with total",
s += gmx::formatString("Hardware detected:\n");
#endif
s += gmx::formatString(" CPU info:\n");
- if (bFullCpuInfo)
- {
- char buf[1024];
- gmx_cpuid_formatstring(hwinfo->cpuid_info, buf, 1023);
- buf[1023] = '\0';
+ s += gmx::formatString(" Vendor: %s\n", cpuInfo.vendorString().c_str());
- s += gmx::formatString("%s", buf);
- }
- else
+ s += gmx::formatString(" Brand: %s\n", cpuInfo.brandString().c_str());
+
+ if (bFullCpuInfo)
{
- s += gmx::formatString(" Vendor: %s\n",
- gmx_cpuid_vendor_string[gmx_cpuid_vendor(hwinfo->cpuid_info)]);
- s += gmx::formatString(" Brand: %s\n",
- gmx_cpuid_brand(hwinfo->cpuid_info));
+ s += gmx::formatString(" Family: %d Model: %d Stepping: %d\n",
+ cpuInfo.family(), cpuInfo.model(), cpuInfo.stepping());
+
+ s += gmx::formatString(" Features:");
+ for (auto &f : cpuInfo.featureSet())
+ {
+ s += gmx::formatString(" %s", cpuInfo.featureString(f).c_str());;
+ }
+ s += gmx::formatString("\n");
}
+
s += gmx::formatString(" SIMD instructions most likely to fit this hardware: %s",
- gmx_cpuid_simd_string[hwinfo->simd_suggest_min]);
+ gmx::simdString(static_cast<gmx::SimdType>(hwinfo->simd_suggest_min)).c_str());
+
if (hwinfo->simd_suggest_max > hwinfo->simd_suggest_min)
{
- s += gmx::formatString(" - %s",
- gmx_cpuid_simd_string[hwinfo->simd_suggest_max]);
+ s += gmx::formatString(" - %s", gmx::simdString(static_cast<gmx::SimdType>(hwinfo->simd_suggest_max)).c_str());
}
s += gmx::formatString("\n");
+
s += gmx::formatString(" SIMD instructions selected at GROMACS compile time: %s\n",
- gmx_cpuid_simd_string[gmx_compiled_simd()]);
+ gmx::simdString(gmx::simdCompiled()).c_str());
+
if (bGPUBinary && (hwinfo->ngpu_compatible_tot > 0 ||
hwinfo->gpu_info.n_dev > 0))
{
s += gmx::formatString("%s\n", buf);
}
}
-
return s;
}
void gmx_print_detected_hardware(FILE *fplog, const t_commrec *cr,
const gmx_hw_info_t *hwinfo)
{
+ const gmx::CpuInfo &cpuInfo = *reinterpret_cast<gmx::CpuInfo *>(hwinfo_g->pCpuInfo);
+
if (fplog != NULL)
{
std::string detected;
}
/* Check the compiled SIMD instruction set against that of the node
- * with the lowest SIMD level support.
+ * with the lowest SIMD level support (skip if SIMD detection did not work)
*/
- gmx_cpuid_simd_check(hwinfo->simd_suggest_min, fplog, MULTIMASTER(cr));
+ if (cpuInfo.supportLevel() >= gmx::CpuInfo::SupportLevel::Features)
+ {
+ gmx::simdCheck(static_cast<gmx::SimdType>(hwinfo->simd_suggest_min), fplog, MULTIMASTER(cr));
+ }
/* For RDTSCP we only check on our local node and skip the MPI reduction */
- check_use_of_rdtscp_on_this_cpu(fplog, cr, hwinfo);
+ check_use_of_rdtscp_on_this_cpu(fplog, cr, cpuInfo);
}
//! \brief Return if any GPU ID (e.g in a user-supplied string) is repeated
if (n_hwinfo == 0)
{
- gmx_cpuid_done(hwinfo_g->cpuid_info);
+ gmx::CpuInfo * pCpuInfo = reinterpret_cast<gmx::CpuInfo *>(hwinfo_g->pCpuInfo);
+ gmx::HardwareTopology * pHwTop = reinterpret_cast<gmx::HardwareTopology *>(hwinfo_g->pHardwareTopology);
+
+ delete pCpuInfo;
+ delete pHwTop;
free_gpu_info(&hwinfo_g->gpu_info);
sfree(hwinfo_g);
}
#ifndef GMX_GMXLIB_GPU_UTILS_GPU_UTILS_H
#define GMX_GMXLIB_GPU_UTILS_GPU_UTILS_H
+#include <cstdio>
+
#include "gromacs/gmxlib/gpu_utils/gpu_macros.h"
#include "gromacs/legacyheaders/types/hw_info.h"
#ifndef _nb_kernel_h_
#define _nb_kernel_h_
+#include <stdio.h>
+
#include "gromacs/gmxlib/nrnb.h"
#include "gromacs/legacyheaders/types/forcerec.h"
#include "gromacs/math/vectypes.h"
#include "gromacs/gmxlib/gmx_omp_nthreads.h"
#include "gromacs/gmxlib/md_logging.h"
-#include "gromacs/legacyheaders/gmx_cpuid.h"
+#include "gromacs/hardware/hardwaretopology.h"
#include "gromacs/legacyheaders/types/commrec.h"
#include "gromacs/legacyheaders/types/hw_info.h"
#include "gromacs/utility/basenetwork.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/gmxomp.h"
#include "gromacs/utility/programcontext.h"
+#include "gromacs/utility/scoped_cptr.h"
#include "gromacs/utility/smalloc.h"
+
static int
get_thread_affinity_layout(FILE *fplog,
const t_commrec *cr,
const gmx_hw_info_t * hwinfo,
- int nthreads,
+ int threads,
int pin_offset, int * pin_stride,
- const int **locality_order)
+ int **localityOrder)
{
- int nhwthreads, npkg, ncores, nhwthreads_per_core, rc;
- const int * pkg_id;
- const int * core_id;
- const int * hwthread_id;
- gmx_bool bPickPinStride;
+ int hwThreads;
+ int hwThreadsPerCore = 0;
+ bool bPickPinStride;
+ bool haveTopology;
+
+ const gmx::HardwareTopology &hwTop = *reinterpret_cast<gmx::HardwareTopology *>(hwinfo->pHardwareTopology);
+
+ haveTopology = (hwTop.supportLevel() >= gmx::HardwareTopology::SupportLevel::Basic);
if (pin_offset < 0)
{
gmx_fatal(FARGS, "Negative thread pinning stride requested");
}
- rc = gmx_cpuid_topology(hwinfo->cpuid_info, &nhwthreads, &npkg, &ncores,
- &nhwthreads_per_core,
- &pkg_id, &core_id, &hwthread_id, locality_order);
-
- if (rc != 0)
+ if (haveTopology)
+ {
+ hwThreads = hwTop.machine().logicalProcessorCount;
+ // Just use the value for the first core
+ hwThreadsPerCore = hwTop.machine().sockets[0].cores[0].hwThreads.size();
+ snew(*localityOrder, hwThreads);
+ int i = 0;
+ for (auto &s : hwTop.machine().sockets)
+ {
+ for (auto &c : s.cores)
+ {
+ for (auto &t : c.hwThreads)
+ {
+ (*localityOrder)[i++] = t.logicalProcessorId;
+ }
+ }
+ }
+ }
+ else
{
/* topology information not available or invalid, ignore it */
- nhwthreads = hwinfo->nthreads_hw_avail;
- *locality_order = NULL;
+ hwThreads = hwinfo->nthreads_hw_avail;
+ *localityOrder = NULL;
- if (nhwthreads <= 0)
+ if (hwThreads <= 0)
{
/* We don't know anything about the hardware, don't pin */
md_print_warn(cr, fplog,
}
}
- if (nthreads > nhwthreads)
+ if (threads > hwThreads)
{
/* We are oversubscribing, don't pin */
md_print_warn(NULL, fplog,
return -1;
}
- if (pin_offset + nthreads > nhwthreads)
+ if (pin_offset + threads > hwThreads)
{
/* We are oversubscribing, don't pin */
md_print_warn(NULL, fplog,
if (bPickPinStride)
{
- if (rc == 0 && pin_offset + nthreads*nhwthreads_per_core <= nhwthreads)
+ if (haveTopology && pin_offset + threads*hwThreadsPerCore <= hwThreads)
{
/* Put one thread on each physical core */
- *pin_stride = nhwthreads_per_core;
+ *pin_stride = hwThreadsPerCore;
}
else
{
* and probably threads are already pinned by the queuing system,
* so we wouldn't end up here in the first place.
*/
- *pin_stride = (nhwthreads - pin_offset)/nthreads;
+ *pin_stride = (hwThreads - pin_offset)/threads;
}
}
else
{
/* Check the placement of the thread with the largest index to make sure
* that the offset & stride doesn't cause pinning beyond the last hardware thread. */
- if (pin_offset + (nthreads-1)*(*pin_stride) >= nhwthreads)
+ if (pin_offset + (threads-1)*(*pin_stride) >= hwThreads)
{
/* We are oversubscribing, don't pin */
md_print_warn(NULL, fplog,
int nth_affinity_set, thread0_id_node,
nthread_local, nthread_node;
int offset;
- const int *locality_order;
+ int * localityOrder = nullptr;
int rc;
if (hw_opt->thread_affinity == threadaffOFF)
rc = get_thread_affinity_layout(fplog, cr, hwinfo,
nthread_node,
offset, &core_pinning_stride,
- &locality_order);
+ &localityOrder);
+ gmx::scoped_guard_sfree localityOrderGuard(localityOrder);
if (rc != 0)
{
thread_id = gmx_omp_get_thread_num();
thread_id_node = thread0_id_node + thread_id;
index = offset + thread_id_node*core_pinning_stride;
- if (locality_order != NULL)
+ if (localityOrder != nullptr)
{
- core = locality_order[index];
+ core = localityOrder[index];
}
else
{
--- /dev/null
+#
+# This file is part of the GROMACS molecular simulation package.
+#
+# Copyright (c) 2015, by the GROMACS development team, led by
+# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+# and including many others, as listed in the AUTHORS file in the
+# top-level source directory and at http://www.gromacs.org.
+#
+# GROMACS is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public License
+# as published by the Free Software Foundation; either version 2.1
+# of the License, or (at your option) any later version.
+#
+# GROMACS is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with GROMACS; if not, see
+# http://www.gnu.org/licenses, or write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# If you want to redistribute modifications to GROMACS, please
+# consider that scientific software is very special. Version
+# control is crucial - bugs must be traceable. We will be happy to
+# consider code for inclusion in the official distribution, but
+# derived work must not be called official GROMACS. Details are found
+# in the README & COPYING files - if they are missing, get the
+# official version at http://www.gromacs.org.
+#
+# To help us fund GROMACS development, we humbly ask that you cite
+# the research papers on the package. Check out http://www.gromacs.org.
+
+file(GLOB HARDWARE_SOURCES *.cpp)
+set(LIBGROMACS_SOURCES ${LIBGROMACS_SOURCES} ${HARDWARE_SOURCES} PARENT_SCOPE)
+
+if (BUILD_TESTING)
+ add_subdirectory(tests)
+endif()
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+/*! \internal \file
+ * \brief
+ * Implements gmx::CpuInfo.
+ *
+ * We need to be able to compile this file in stand-alone mode to use basic
+ * CPU feature detection to set the SIMD acceleration and similar things in
+ * CMake, while we still want to use more features that enable topology
+ * detection when config.h is present.
+ *
+ * We solve this by skipping the advanced stuff when the preprocessor
+ * macro GMX_CPUINFO_STANDALONE is defined. In this case you likely also need to
+ * define GMX_X86_GCC_INLINE_ASM if you are on x86; without inline assembly
+ * support it is not possible to perform the actual detection on Linux/Mac.
+ * Since these macros are specific to this file, they do not use the GMX prefix.
+ *
+ * The remaining defines (GMX_NATIVE_WINDOWS,HAVE_UNISTD_H,HAVE_SCHED_H,
+ * HAVE_SYSCONF, HAVE_SCHED_AFFINITY) are only used to determine the topology on
+ * 86, and for this we rely on including config.h.
+ *
+ * \author Erik Lindahl <erik.lindahl@gmail.com>
+ * \ingroup module_hardware
+ */
+
+#ifndef GMX_CPUINFO_STANDALONE
+# include "gmxpre.h"
+#endif
+
+#include "cpuinfo.h"
+
+#ifndef GMX_CPUINFO_STANDALONE
+# include "config.h"
+#endif
+
+#if defined _MSC_VER
+# include <intrin.h> // __cpuid()
+#endif
+
+#ifdef GMX_NATIVE_WINDOWS
+# include <windows.h> // sysinfo(), necessary for topology stuff
+#endif
+
+#ifdef HAVE_SCHED_H
+# include <sched.h> // sched_getaffinity(), sched_setaffinity()
+#endif
+#ifdef HAVE_UNISTD_H
+# include <unistd.h> // sysconf()
+#endif
+
+#include <cctype>
+#include <cstdlib>
+
+#include <algorithm>
+#include <fstream>
+#include <map>
+#include <set>
+#include <sstream>
+#include <string>
+
+#ifdef GMX_CPUINFO_STANDALONE
+# define gmx_unused
+#else
+# include "gromacs/utility/basedefinitions.h"
+#endif
+
+namespace gmx
+{
+
+namespace
+{
+
+/*! \cond internal */
+
+/******************************************************************************
+ * *
+ * Utility functions to make this file independent of the GROMACS library *
+ * *
+ ******************************************************************************/
+
+/*! \brief Remove initial and trailing whitespace from string
+ *
+ * \param s Pointer to string where whitespace will be removed
+ */
+void
+trimString(std::string * s)
+{
+ // heading
+ s->erase(s->begin(), std::find_if(s->begin(), s->end(), [](char &c) -> bool { return !std::isspace(c); }));
+ // trailing
+ s->erase(std::find_if(s->rbegin(), s->rend(), [](char &c) -> bool { return !std::isspace(c); }).base(), s->end());
+}
+
+
+/******************************************************************************
+ * *
+ * x86 detection functions *
+ * *
+ ******************************************************************************/
+
+/*! \brief execute x86 cpuid instructions with custom level and extended level
+ *
+ * \param level The main cpuid level (input argument for eax register)
+ * \param ecxval Extended level (input argument for ecx register)
+ * \param eax Output in eax register
+ * \param ebx Output in ebx register
+ * \param ecx Output in ecx register
+ * \param edx Output in edx register
+ *
+ * \return 0 on success, or non-zero if the instruction could not execute.
+ */
+int
+executeX86CpuID(unsigned int gmx_unused level,
+ unsigned int gmx_unused ecxval,
+ unsigned int * eax,
+ unsigned int * ebx,
+ unsigned int * ecx,
+ unsigned int * edx)
+{
+#if defined __i386__ || defined __i386 || defined _X86_ || defined _M_IX86 || \
+ defined __x86_64__ || defined __amd64__ || defined _M_X64 || defined _M_AMD64
+
+# if defined __GNUC__ || defined GMX_X86_GCC_INLINE_ASM
+
+ // any compiler that understands gcc inline assembly
+ *eax = level;
+ *ecx = ecxval;
+ *ebx = 0;
+ *edx = 0;
+
+# if (defined __i386__ || defined __i386 || defined _X86_ || defined _M_IX86) && defined(__PIC__)
+ // Avoid clobbering the global offset table in 32-bit pic code (ebx register)
+ __asm__ __volatile__ ("xchgl %%ebx, %1 \n\t"
+ "cpuid \n\t"
+ "xchgl %%ebx, %1 \n\t"
+ : "+a" (*eax), "+r" (*ebx), "+c" (*ecx), "+d" (*edx));
+# else
+ // i386 without PIC, or x86-64. Things are easy and we can clobber any reg we want
+ __asm__ __volatile__ ("cpuid \n\t"
+ : "+a" (*eax), "+b" (*ebx), "+c" (*ecx), "+d" (*edx));
+# endif
+ return 0;
+
+# elif defined _MSC_VER
+
+ // MSVC (and icc on windows) on ia32 or x86-64
+ int cpuInfo[4];
+ __cpuidex(cpuInfo, level, ecxval);
+ *eax = static_cast<unsigned int>(cpuInfo[0]);
+ *ebx = static_cast<unsigned int>(cpuInfo[1]);
+ *ecx = static_cast<unsigned int>(cpuInfo[2]);
+ *edx = static_cast<unsigned int>(cpuInfo[3]);
+ return 0;
+
+# else
+
+ // No compiler support for cpuid if we get here
+ *eax = 0;
+ *ebx = 0;
+ *ecx = 0;
+ *edx = 0;
+ return 1;
+
+# endif // gcc inline asm, msvc, or no cpuid support
+#endif // x86
+}
+
+
+/*! \brief Detect x86 vendors by using the cpuid assembly instructions
+ *
+ * If support for the cpuid instruction is present, we check for Intel
+ * or AMD vendors.
+ *
+ * \return gmx::CpuInfo::Vendor::Intel, gmx::CpuInfo::Vendor::Amd. If neither
+ * Intel nor Amd can be identified, or if the code fails to execute,
+ * gmx::CpuInfo::Vendor::Unknown is returned.
+ */
+CpuInfo::Vendor
+detectX86Vendor()
+{
+ unsigned int eax, ebx, ecx, edx;
+ CpuInfo::Vendor v = CpuInfo::Vendor::Unknown;
+
+ if (executeX86CpuID(0x0, 0, &eax, &ebx, &ecx, &edx) == 0)
+ {
+ if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
+ {
+ v = CpuInfo::Vendor::Intel; // ebx=='uneG', ecx=='letn', edx=='Ieni'
+ }
+ else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
+ {
+ v = CpuInfo::Vendor::Amd; // ebx=='htuA', ecx=='DMAc', edx=='itne'
+ }
+ }
+ return v;
+}
+
+/*! \brief Simple utility function to set/clear feature in a set
+ *
+ * \param featureSet Pointer to the feature set to update
+ * \param feature The specific feature to set/clear
+ * \param registerValue Register value (returned from cpuid)
+ * \param bit Bit to check in registerValue. The feature will be
+ * added to the featureSet if this bit is set.
+ *
+ * \note Nothing is done if the bit is not set. In particular, this will not
+ * erase anything if the feature already exists in the set.
+ */
+void
+setFeatureFromBit(std::set<CpuInfo::Feature> * featureSet,
+ CpuInfo::Feature feature,
+ unsigned int registerValue,
+ unsigned char bit)
+{
+ if (registerValue & (1 << bit))
+ {
+ featureSet->insert(feature);
+ }
+}
+
+/*! \brief Process x86 cpuinfo features that are common to Intel and AMD CPUs
+ *
+ * \param[out] brand String where to write the x86 brand string
+ * \param[out] family Major version of processor
+ * \param[out] model Middle version of processor
+ * \param[out] stepping Minor version of processor
+ * \param[out] features Feature set where supported features are inserted
+ */
+void
+detectX86Features(std::string * brand,
+ int * family,
+ int * model,
+ int * stepping,
+ std::set<CpuInfo::Feature> * features)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ // Return if we cannot execute any levels
+ if (executeX86CpuID(0x0, 0, &eax, &ebx, &ecx, &edx) != 0)
+ {
+ return;
+ }
+ unsigned int maxStdLevel = eax;
+
+ if (maxStdLevel >= 0x1)
+ {
+ executeX86CpuID(0x1, 0, &eax, &ebx, &ecx, &edx);
+
+ *family = ((eax & 0x0ff00000) >> 20) + ((eax & 0x00000f00) >> 8);
+ *model = ((eax & 0x000f0000) >> 12) + ((eax & 0x000000f0) >> 4);
+ *stepping = (eax & 0x0000000f);
+
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Sse3, ecx, 0 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Pclmuldq, ecx, 1 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Ssse3, ecx, 9 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Fma, ecx, 12 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Cx16, ecx, 13 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Pdcm, ecx, 15 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Pcid, ecx, 17 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Sse4_1, ecx, 19 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Sse4_2, ecx, 20 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_X2Apic, ecx, 21 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Popcnt, ecx, 23 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Tdt, ecx, 24 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Aes, ecx, 25 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Avx, ecx, 28 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_F16C, ecx, 29 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Rdrnd, ecx, 30 );
+
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Pse, edx, 3 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Msr, edx, 5 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Cx8, edx, 8 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Apic, edx, 9 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Cmov, edx, 15 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Clfsh, edx, 19 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Mmx, edx, 23 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Sse2, edx, 26 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Htt, edx, 28 );
+ }
+
+ if (maxStdLevel >= 0x7)
+ {
+ executeX86CpuID(0x7, 0, &eax, &ebx, &ecx, &edx);
+
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Hle, ebx, 4 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Avx2, ebx, 5 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Rtm, ebx, 11 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Avx512F, ebx, 16 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Avx512PF, ebx, 26 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Avx512ER, ebx, 27 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Avx512CD, ebx, 28 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Sha, ebx, 29 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Avx512BW, ebx, 30 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Avx512VL, ebx, 31 );
+ }
+
+ // Check whether Hyper-threading is really possible to enable in the hardware,
+ // not just technically supported by this generation of processors
+ if (features->count(CpuInfo::Feature::X86_Htt) && maxStdLevel >= 0x4)
+ {
+ executeX86CpuID(0x1, 0, &eax, &ebx, &ecx, &edx);
+ unsigned int maxLogicalCores = (ebx >> 16) & 0x0ff;
+ executeX86CpuID(0x4, 0, &eax, &ebx, &ecx, &edx);
+ unsigned int maxPhysicalCores = ((eax >> 26) & 0x3f) + 1;
+ if (maxLogicalCores/maxPhysicalCores < 2)
+ {
+ features->erase(CpuInfo::Feature::X86_Htt);
+ }
+ }
+
+ if (executeX86CpuID(0x80000000, 0, &eax, &ebx, &ecx, &edx) != 0)
+ {
+ // No point in continuing if we don't support any extended levels
+ return;
+ }
+ unsigned int maxExtLevel = eax;
+
+ if (maxExtLevel >= 0x80000001)
+ {
+ executeX86CpuID(0x80000001, 0, &eax, &ebx, &ecx, &edx);
+
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Lahf, ecx, 0 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Sse4A, ecx, 6 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_MisalignSse, ecx, 7 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Xop, ecx, 11 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Fma4, ecx, 16 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_PDPE1GB, edx, 26 );
+ setFeatureFromBit(features, CpuInfo::Feature::X86_Rdtscp, edx, 27 );
+ }
+
+ if (maxExtLevel >= 0x80000005)
+ {
+ // Get the x86 CPU brand string (3 levels, 16 bytes in each)
+ brand->clear();
+ for (unsigned int level = 0x80000002; level < 0x80000005; level++)
+ {
+ executeX86CpuID(level, 0, &eax, &ebx, &ecx, &edx);
+ // Add eax, ebx, ecx, edx contents as 4 chars each to the brand string
+ brand->append(reinterpret_cast<const char *>(&eax), sizeof(eax));
+ brand->append(reinterpret_cast<const char *>(&ebx), sizeof(ebx));
+ brand->append(reinterpret_cast<const char *>(&ecx), sizeof(ecx));
+ brand->append(reinterpret_cast<const char *>(&edx), sizeof(edx));
+ }
+ trimString(brand);
+ }
+
+ if (maxExtLevel >= 0x80000007)
+ {
+ executeX86CpuID(0x80000007, 0, &eax, &ebx, &ecx, &edx);
+
+ setFeatureFromBit(features, CpuInfo::Feature::X86_NonstopTsc, edx, 8 );
+ }
+}
+
+
+/*! \brief Return a vector with x86 APIC IDs for all threads
+ *
+ * \param haveX2Apic True if the processors supports x2APIC, otherwise vanilla APIC.
+ *
+ * \returns A new std::vector of unsigned integer APIC IDs, one for each
+ * logical processor in the system.
+ */
+const std::vector<unsigned int>
+detectX86ApicIDs(bool gmx_unused haveX2Apic)
+{
+ std::vector<unsigned int> apicID;
+
+ // We cannot just ask for all APIC IDs, but must force execution on each
+ // hardware thread and extract the APIC id there.
+#if defined HAVE_SCHED_AFFINITY && defined HAVE_SYSCONF
+ unsigned int eax, ebx, ecx, edx;
+ unsigned int nApic = sysconf(_SC_NPROCESSORS_ONLN);
+ cpu_set_t saveCpuSet;
+ cpu_set_t cpuSet;
+ sched_getaffinity(0, sizeof(cpu_set_t), &saveCpuSet);
+ CPU_ZERO(&cpuSet);
+ for (unsigned int i = 0; i < nApic; i++)
+ {
+ CPU_SET(i, &cpuSet);
+ sched_setaffinity(0, sizeof(cpu_set_t), &cpuSet);
+ if (haveX2Apic)
+ {
+ executeX86CpuID(0xb, 0, &eax, &ebx, &ecx, &edx);
+ apicID.push_back(edx);
+ }
+ else
+ {
+ executeX86CpuID(0x1, 0, &eax, &ebx, &ecx, &edx);
+ apicID.push_back(ebx >> 24);
+ }
+ CPU_CLR(i, &cpuSet);
+ }
+ sched_setaffinity(0, sizeof(cpu_set_t), &saveCpuSet);
+#elif defined GMX_NATIVE_WINDOWS
+ unsigned int eax, ebx, ecx, edx;
+ SYSTEM_INFO sysinfo;
+ GetSystemInfo( &sysinfo );
+ unsigned int nApic = sysinfo.dwNumberOfProcessors;
+ unsigned int saveAffinity = SetThreadAffinityMask(GetCurrentThread(), 1);
+ for (DWORD_PTR i = 0; i < nApic; i++)
+ {
+ SetThreadAffinityMask(GetCurrentThread(), (((DWORD_PTR)1)<<i));
+ Sleep(0);
+ if (haveX2Apic)
+ {
+ executeX86CpuID(0xb, 0, &eax, &ebx, &ecx, &edx);
+ apicID.push_back(edx);
+ }
+ else
+ {
+ executeX86CpuID(0x1, 0, &eax, &ebx, &ecx, &edx);
+ apicID.push_back(ebx >> 24);
+ }
+ }
+ SetThreadAffinityMask(GetCurrentThread(), saveAffinity);
+#endif
+ return apicID;
+}
+
+
+/*! \brief Utility to renumber indices extracted from APIC IDs
+ *
+ * \param v Vector with unsigned integer indices
+ *
+ * This routine returns the number of unique different elements found in the vector,
+ * and renumbers these starting from 0. For example, the vector {0,1,2,8,9,10,8,9,10,0,1,2}
+ * will be rewritten to {0,1,2,3,4,5,3,4,5,0,1,2}, and it returns 6 for the
+ * number of unique elements.
+ */
+void
+renumberIndex(std::vector<unsigned int> * v)
+{
+ std::vector<unsigned int> sortedV (*v);
+ std::sort(sortedV.begin(), sortedV.end());
+
+ std::vector<unsigned int> uniqueSortedV (sortedV);
+ auto it = std::unique(uniqueSortedV.begin(), uniqueSortedV.end());
+ uniqueSortedV.resize( std::distance(uniqueSortedV.begin(), it) );
+
+ for (std::size_t i = 0; i < uniqueSortedV.size(); i++)
+ {
+ unsigned int val = uniqueSortedV[i];
+ std::replace_if(v->begin(), v->end(), [val](unsigned int &c) -> bool { return c == val; }, static_cast<unsigned int>(i));
+ }
+}
+
+
+/*! \brief Try to detect basic CPU topology information using x86 cpuid
+ *
+ * If x2APIC support is present, this is our first choice, otherwise we
+ * attempt to use old vanilla APIC.
+ *
+ * \return A new vector of entries with socket, core, hwthread information
+ * for each logical processor.
+ */
+std::vector<CpuInfo::LogicalProcessor>
+detectX86LogicalProcessors()
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+ unsigned int maxStdLevel;
+ unsigned int maxExtLevel;
+ bool haveApic;
+ bool haveX2Apic;
+
+ std::vector<CpuInfo::LogicalProcessor> logicalProcessors;
+
+ // Find largest standard & extended level input values allowed
+ executeX86CpuID(0x0, 0, &eax, &ebx, &ecx, &edx);
+ maxStdLevel = eax;
+ executeX86CpuID(0x80000000, 0, &eax, &ebx, &ecx, &edx);
+ maxExtLevel = eax;
+
+ if (maxStdLevel >= 0x1)
+ {
+ executeX86CpuID(0x1, 0, &eax, &ebx, &ecx, &edx);
+ haveX2Apic = (ecx & (1 << 21)) && maxStdLevel >= 0xb;
+ haveApic = (edx & (1 << 9)) && maxExtLevel >= 0x80000008;
+ }
+ else
+ {
+ haveX2Apic = false,
+ haveApic = false;
+ }
+
+ if (haveX2Apic || haveApic)
+ {
+ unsigned int hwThreadBits;
+ unsigned int coreBits;
+ // Get bits for cores and hardware threads
+ if (haveX2Apic)
+ {
+ executeX86CpuID(0xb, 0, &eax, &ebx, &ecx, &edx);
+ hwThreadBits = eax & 0x1f;
+ executeX86CpuID(0xb, 1, &eax, &ebx, &ecx, &edx);
+ coreBits = (eax & 0x1f) - hwThreadBits;
+ }
+ else // haveApic
+ {
+ // AMD without x2APIC does not support SMT - there are no hwthread bits in apic ID
+ hwThreadBits = 0;
+ // Get number of core bits in apic ID - try modern extended method first
+ executeX86CpuID(0x80000008, 0, &eax, &ebx, &ecx, &edx);
+ coreBits = (ecx >> 12) & 0xf;
+ if (coreBits == 0)
+ {
+ // Legacy method for old single/dual core AMD CPUs
+ int i = ecx & 0xf;
+ while (i >> coreBits)
+ {
+ coreBits++;
+ }
+ }
+ }
+
+ std::vector<unsigned int> apicID = detectX86ApicIDs(haveX2Apic);
+
+ if (!apicID.empty())
+ {
+ // APIC IDs can be buggy, and it is always a mess. Typically more bits are
+ // reserved than needed, and the numbers might not increment by 1 even in
+ // a single socket or core. Extract, renumber, and check that things make sense.
+ unsigned int hwThreadMask = (1 << hwThreadBits) - 1;
+ unsigned int coreMask = (1 << coreBits) - 1;
+ std::vector<unsigned int> hwThreads;
+ std::vector<unsigned int> cores;
+ std::vector<unsigned int> sockets;
+
+ for (auto a : apicID)
+ {
+ hwThreads.push_back( static_cast<int>( a & hwThreadMask ) );
+ cores.push_back( static_cast<int>( ( a >> hwThreadBits ) & coreMask ) );
+ sockets.push_back( static_cast<int>( a >> ( coreBits + hwThreadBits ) ) );
+ }
+
+ renumberIndex(&hwThreads);
+ renumberIndex(&cores);
+ renumberIndex(&sockets);
+
+ unsigned int nHwThreads = 1 + *std::max_element(hwThreads.begin(), hwThreads.end());
+ unsigned int nCores = 1 + *std::max_element(cores.begin(), cores.end());
+ unsigned int nSockets = 1 + *std::max_element(sockets.begin(), sockets.end());
+
+ if (nSockets * nCores * nHwThreads == apicID.size() )
+ {
+ // Alright, everything looks consistent, so put it in the result
+ for (std::size_t i = 0; i < apicID.size(); i++)
+ {
+ // While the internal APIC IDs are always unsigned integers, we also cast to
+ // plain integers for the externally exposed vectors, since that will make
+ // it possible to use '-1' for invalid entries in the future.
+ logicalProcessors.push_back( { int(sockets[i]), int(cores[i]), int(hwThreads[i]) } );
+ }
+ }
+ }
+ }
+ return logicalProcessors; // Will only have contents if everything worked
+}
+
+
+/******************************************************************************
+ * *
+ * Generic Linux detection by parsing /proc/cpuinfo *
+ * *
+ ******************************************************************************/
+
+/*! \brief Parse /proc/cpuinfo into a simple string map
+ *
+ * This routine will read the contents of /proc/cpuinfo, and for each
+ * line that is not empty we will assign the (trimmed) string to the right of
+ * the colon as a key, and the left-hand side as the value in the map.
+ * For multi-processor systems where lines are repeated the latter lines will
+ * overwrite the first occurrence.
+ *
+ * \return New map with the contents. If the file is not available, the returned
+ * map will be empty.
+ */
+const std::map<std::string, std::string>
+parseProcCpuInfo()
+{
+ std::ifstream procCpuInfo("/proc/cpuinfo");
+ std::string line;
+ std::map<std::string, std::string> cpuInfo;
+
+ while (std::getline(procCpuInfo, line))
+ {
+ if (!line.empty())
+ {
+ std::stringstream iss(line);
+ std::string key;
+ std::string val;
+ std::getline(iss, key, ':'); // part before colon
+ std::getline(iss, val); // part after colon
+ trimString(&key);
+ trimString(&val);
+ // put it in the map. This will overwrite previous processors, but we don't care.
+ cpuInfo[key] = val;
+ }
+ }
+ return cpuInfo;
+}
+
+
+/*! \brief Try to detect vendor from /proc/cpuinfo
+ *
+ * \param cpuInfo Map returned from parseProcCpuinfo()
+ *
+ * This routine tries to match a few common labels in /proc/cpuinfo to see if
+ * they begin with the name of a standard vendor. If the file cannot be read
+ * or if no match is found, we return gmx::CpuInfo::Vendor::Unknown.
+ */
+CpuInfo::Vendor
+detectProcCpuInfoVendor(const std::map<std::string, std::string> &cpuInfo)
+{
+ const std::map<std::string, CpuInfo::Vendor> testVendors =
+ {
+ { "GenuineIntel", CpuInfo::Vendor::Intel },
+ { "Intel", CpuInfo::Vendor::Intel },
+ { "AuthenticAmd", CpuInfo::Vendor::Amd },
+ { "AMD", CpuInfo::Vendor::Amd },
+ { "ARM", CpuInfo::Vendor::Arm },
+ { "AArch64", CpuInfo::Vendor::Arm },
+ { "Fujitsu", CpuInfo::Vendor::Fujitsu },
+ { "IBM", CpuInfo::Vendor::Ibm },
+ { "POWER", CpuInfo::Vendor::Ibm }
+ };
+
+ // For each label in /proc/cpuinfo, compare the value to the name in the
+ // testNames map above, and if it's a match return the vendor.
+ for (auto &l : { "vendor_id", "vendor", "manufacture", "model", "processor", "cpu" })
+ {
+ if (cpuInfo.count(l))
+ {
+ // there was a line with this left-hand side in /proc/cpuinfo
+ const std::string &s1 = cpuInfo.at(l);
+
+ for (auto &t : testVendors)
+ {
+ const std::string &s2 = t.first;
+
+ // If the entire name we are testing (s2) matches the first part of
+ // the string after the colon in /proc/cpuinfo (s1) we found our vendor
+ if (std::equal(s2.begin(), s2.end(), s1.begin(),
+ [](const char &x, const char &y) -> bool { return tolower(x) == tolower(y); }))
+ {
+ return t.second;
+ }
+ }
+ }
+ }
+ return CpuInfo::Vendor::Unknown;
+}
+
+
+/*! \brief Detect IBM processor name and features from /proc/cpuinfo
+ *
+ * \param cpuInfo Map returned from parseProcCpuinfo()
+ * \param[out] brand String where to write the brand string
+ * \param[out] features Feature set where supported features are inserted
+ *
+ * This routine tries to match a few common labels in /proc/cpuinfo to see if
+ * we can find the processor name and features. It is likely fragile.
+ */
+void
+detectProcCpuInfoIbm(const std::map<std::string, std::string> &cpuInfo,
+ std::string * brand,
+ std::set<CpuInfo::Feature> * features)
+{
+ // Get brand string from 'cpu' label if present, otherwise 'Processor'
+ if (cpuInfo.count("cpu"))
+ {
+ *brand = cpuInfo.at("cpu");
+ }
+ else if (cpuInfo.count("Processor"))
+ {
+ *brand = cpuInfo.at("Processor");
+ }
+
+ if (brand->find("A2") != std::string::npos)
+ {
+ // If the processor identification contains "A2", this is BlueGene/Q with QPX
+ features->insert(CpuInfo::Feature::Ibm_Qpx);
+ }
+
+ for (auto &l : { "model name", "model", "Processor", "cpu" })
+ {
+ if (cpuInfo.count(l))
+ {
+ std::string s1 = cpuInfo.at(l);
+ std::transform(s1.begin(), s1.end(), s1.begin(), ::tolower);
+
+ if (s1.find("altivec") != std::string::npos)
+ {
+ features->insert(CpuInfo::Feature::Ibm_Vmx);
+ // If this is a power6, we only have VMX. All later processors have VSX.
+ if (s1.find("power6") == std::string::npos)
+ {
+ features->insert(CpuInfo::Feature::Ibm_Vsx);
+ }
+ }
+ }
+ }
+}
+
+
+/*! \brief Detect ARM processor name and features from /proc/cpuinfo
+ *
+ * \param cpuInfo Map returned from parseProcCpuinfo()
+ * \param[out] brand String where to write the brand string
+ * \param[out] family Major version of processor
+ * \param[out] model Middle version of processor
+ * \param[out] stepping Minor version of processor
+ * \param[out] features Feature set where supported features are inserted
+ *
+ * This routine tries to match a few common labels in /proc/cpuinfo to see if
+ * we can find the processor name and features. It is likely fragile.
+ */
+void
+detectProcCpuInfoArm(const std::map<std::string, std::string> &cpuInfo,
+ std::string * brand,
+ int * family,
+ int * model,
+ int * stepping,
+ std::set<CpuInfo::Feature> * features)
+{
+ if (cpuInfo.count("Processor"))
+ {
+ *brand = cpuInfo.at("Processor");
+ }
+ if (cpuInfo.count("CPU architecture"))
+ {
+ *family = std::strtol(cpuInfo.at("CPU architecture").c_str(), NULL, 10);
+ // For some 64-bit CPUs it appears to say 'AArch64' instead
+ if (*family == 0 && cpuInfo.at("CPU architecture").find("AArch64") != std::string::npos)
+ {
+ *family = 8; // fragile - no idea how a future ARMv9 will be represented in this case
+ }
+ }
+ if (cpuInfo.count("CPU variant"))
+ {
+ *model = std::strtol(cpuInfo.at("CPU variant").c_str(), NULL, 16);
+ }
+ if (cpuInfo.count("CPU revision"))
+ {
+ *stepping = std::strtol(cpuInfo.at("CPU revision").c_str(), NULL, 10);
+ }
+
+ if (cpuInfo.count("Features"))
+ {
+ const std::string &s = cpuInfo.at("Features");
+ if (s.find("neon") != std::string::npos)
+ {
+ features->insert(CpuInfo::Feature::Arm_Neon);
+ }
+ if (s.find("asimd") != std::string::npos)
+ {
+ features->insert(CpuInfo::Feature::Arm_NeonAsimd);
+ }
+ }
+}
+
+
+/*! \brief Try to detect vendor, cpu and features from /proc/cpuinfo
+ *
+ * \param[out] vendor Detected hardware vendor
+ * \param[out] brand String where to write the brand string
+ * \param[out] family Major version of processor
+ * \param[out] model Middle version of processor
+ * \param[out] stepping Minor version of processor
+ * \param[out] features Feature set where supported features are inserted
+ *
+ * This routine reads the /proc/cpuinfo file into a map and calls subroutines
+ * that attempt to parse by matching keys and values to known strings. It is
+ * much more fragile than our x86 detection, but it does not depend on
+ * specific system calls, intrinsics or assembly instructions.
+ */
+void
+detectProcCpuInfo(CpuInfo::Vendor * vendor,
+ std::string * brand,
+ int * family,
+ int * model,
+ int * stepping,
+ std::set<CpuInfo::Feature> * features)
+{
+ std::map<std::string, std::string> cpuInfo = parseProcCpuInfo();
+
+ if (*vendor == CpuInfo::Vendor::Unknown)
+ {
+ *vendor = detectProcCpuInfoVendor(cpuInfo);
+ }
+
+ // Unfortunately there is no standard for contents in /proc/cpuinfo. We cannot
+ // indiscriminately look for e.g. 'cpu' since it could be either name or an index.
+ // To handle this slightly better we use one subroutine per vendor.
+ switch (*vendor)
+ {
+ case CpuInfo::Vendor::Ibm:
+ detectProcCpuInfoIbm(cpuInfo, brand, features);
+ break;
+
+ case CpuInfo::Vendor::Arm:
+ detectProcCpuInfoArm(cpuInfo, brand, family, model, stepping, features);
+ break;
+
+ default:
+ // We only have a single check for fujitsu for now
+#ifdef __HPC_ACE__
+ features->insert(CpuInfo::Feature::Fujitsu_HpcAce);
+#endif
+ break;
+ }
+}
+/*! \endcond */
+} // namespace anonymous
+
+
+// static
+CpuInfo CpuInfo::detect()
+{
+ CpuInfo result;
+
+#if defined __i386__ || defined __i386 || defined _X86_ || defined _M_IX86 || \
+ defined __x86_64__ || defined __amd64__ || defined _M_X64 || defined _M_AMD64
+
+ result.vendor_ = detectX86Vendor();
+ detectX86Features(&result.brandString_, &result.family_, &result.model_,
+ &result.stepping_, &result.features_);
+ result.logicalProcessors_ = detectX86LogicalProcessors();
+
+#else // not x86
+
+# if defined __arm__ || defined __arm || defined _M_ARM || defined __aarch64__
+ result.vendor_ = CpuInfo::Vendor::Arm;
+# elif defined __powerpc__ || defined __ppc__ || defined __PPC__
+ result.vendor_ = CpuInfo::Vendor::Ibm;
+# endif
+
+# if defined __aarch64__ || ( defined _M_ARM && _M_ARM >= 8 )
+ result.features_.insert(Feature::Arm_Neon); // ARMv8 always has Neon
+ result.features_.insert(Feature::Arm_NeonAsimd); // ARMv8 always has Neon-asimd
+# endif
+
+ // On Linux we might be able to find information in /proc/cpuinfo. If vendor or brand
+ // is set to a known value this routine will not overwrite it.
+ detectProcCpuInfo(&result.vendor_, &result.brandString_, &result.family_,
+ &result.model_, &result.stepping_, &result.features_);
+
+#endif // x86 or not
+
+ if (!result.logicalProcessors_.empty())
+ {
+ result.supportLevel_ = CpuInfo::SupportLevel::LogicalProcessorInfo;
+ }
+ else if (!result.features_.empty())
+ {
+ result.supportLevel_ = CpuInfo::SupportLevel::Features;
+ }
+ else if (result.vendor_ != CpuInfo::Vendor::Unknown
+ || result.brandString_ != "Unknown CPU brand")
+ {
+ result.supportLevel_ = CpuInfo::SupportLevel::Name;
+ }
+ else
+ {
+ result.supportLevel_ = CpuInfo::SupportLevel::None;
+ }
+
+ return result;
+}
+
+
+CpuInfo::CpuInfo()
+ : vendor_(CpuInfo::Vendor::Unknown), brandString_("Unknown CPU brand"),
+ family_(0), model_(0), stepping_(0)
+{
+}
+
+
+const std::map<CpuInfo::Vendor, std::string>
+CpuInfo::s_vendorStrings_ =
+{
+ { CpuInfo::Vendor::Unknown, "Unknown vendor" },
+ { CpuInfo::Vendor::Intel, "Intel" },
+ { CpuInfo::Vendor::Amd, "AMD" },
+ { CpuInfo::Vendor::Fujitsu, "Fujitsu" },
+ { CpuInfo::Vendor::Ibm, "IBM" },
+ { CpuInfo::Vendor::Arm, "ARM" }
+};
+
+
+const std::map<CpuInfo::Feature, std::string>
+CpuInfo::s_featureStrings_ =
+{
+ { CpuInfo::Feature::X86_Aes, "aes" },
+ { CpuInfo::Feature::X86_Apic, "apic" },
+ { CpuInfo::Feature::X86_Avx, "avx" },
+ { CpuInfo::Feature::X86_Avx2, "avx2" },
+ { CpuInfo::Feature::X86_Avx512F, "avx512f" },
+ { CpuInfo::Feature::X86_Avx512PF, "avx512pf" },
+ { CpuInfo::Feature::X86_Avx512ER, "avx512er" },
+ { CpuInfo::Feature::X86_Avx512CD, "avx512cd" },
+ { CpuInfo::Feature::X86_Avx512BW, "avx512bw" },
+ { CpuInfo::Feature::X86_Avx512VL, "avx512vl" },
+ { CpuInfo::Feature::X86_Clfsh, "clfsh" },
+ { CpuInfo::Feature::X86_Cmov, "cmov" },
+ { CpuInfo::Feature::X86_Cx8, "cx8" },
+ { CpuInfo::Feature::X86_Cx16, "cx16" },
+ { CpuInfo::Feature::X86_F16C, "f16c" },
+ { CpuInfo::Feature::X86_Fma, "fma" },
+ { CpuInfo::Feature::X86_Fma4, "fma4" },
+ { CpuInfo::Feature::X86_Hle, "hle" },
+ { CpuInfo::Feature::X86_Htt, "htt" },
+ { CpuInfo::Feature::X86_Lahf, "lahf" },
+ { CpuInfo::Feature::X86_MisalignSse, "misalignsse" },
+ { CpuInfo::Feature::X86_Mmx, "mmx" },
+ { CpuInfo::Feature::X86_Msr, "msr" },
+ { CpuInfo::Feature::X86_NonstopTsc, "nonstop_tsc" },
+ { CpuInfo::Feature::X86_Pcid, "pcid" },
+ { CpuInfo::Feature::X86_Pclmuldq, "pclmuldq" },
+ { CpuInfo::Feature::X86_Pdcm, "pdcm" },
+ { CpuInfo::Feature::X86_PDPE1GB, "pdpe1gb" },
+ { CpuInfo::Feature::X86_Popcnt, "popcnt" },
+ { CpuInfo::Feature::X86_Pse, "pse" },
+ { CpuInfo::Feature::X86_Rdrnd, "rdrnd" },
+ { CpuInfo::Feature::X86_Rdtscp, "rdtscp" },
+ { CpuInfo::Feature::X86_Rtm, "rtm" },
+ { CpuInfo::Feature::X86_Sha, "sha" },
+ { CpuInfo::Feature::X86_Sse2, "sse2" },
+ { CpuInfo::Feature::X86_Sse3, "sse3" },
+ { CpuInfo::Feature::X86_Sse4A, "sse4a" },
+ { CpuInfo::Feature::X86_Sse4_1, "sse4.1" },
+ { CpuInfo::Feature::X86_Sse4_2, "sse4.2" },
+ { CpuInfo::Feature::X86_Ssse3, "ssse3" },
+ { CpuInfo::Feature::X86_Tdt, "tdt" },
+ { CpuInfo::Feature::X86_X2Apic, "x2apic" },
+ { CpuInfo::Feature::X86_Xop, "xop" },
+ { CpuInfo::Feature::Arm_Neon, "neon" },
+ { CpuInfo::Feature::Arm_NeonAsimd, "neon_asimd" },
+ { CpuInfo::Feature::Ibm_Qpx, "qpx" },
+ { CpuInfo::Feature::Ibm_Vmx, "vmx" },
+ { CpuInfo::Feature::Ibm_Vsx, "vsx" },
+ { CpuInfo::Feature::Fujitsu_HpcAce, "hpc-ace" }
+};
+
+
+bool
+cpuIsX86Nehalem(const CpuInfo &cpuInfo)
+{
+ return (cpuInfo.vendor() == gmx::CpuInfo::Vendor::Intel &&
+ cpuInfo.family() == 6 &&
+ (cpuInfo.model() == 0x2E || cpuInfo.model() == 0x1A ||
+ cpuInfo.model() == 0x1E || cpuInfo.model() == 0x2F ||
+ cpuInfo.model() == 0x2C || cpuInfo.model() == 0x25) );
+}
+
+} // namespace gmx
+
+#ifdef GMX_CPUINFO_STANDALONE
+int
+main(int argc, char **argv)
+{
+ if (argc < 2)
+ {
+ fprintf(stdout,
+ "Usage:\n\n%s [flags]\n\n"
+ "Available flags:\n"
+ "-vendor Print CPU vendor.\n"
+ "-brand Print CPU brand string.\n"
+ "-family Print CPU family version.\n"
+ "-model Print CPU model version.\n"
+ "-stepping Print CPU stepping version.\n"
+ "-features Print CPU feature flags.\n",
+ argv[0]);
+ exit(1);
+ }
+
+ std::string arg(argv[1]);
+ gmx::CpuInfo cpuInfo(gmx::CpuInfo::detect());
+
+ if (arg == "-vendor")
+ {
+ printf("%s\n", cpuInfo.vendorString().c_str());
+ }
+ else if (arg == "-brand")
+ {
+ printf("%s\n", cpuInfo.brandString().c_str());
+ }
+ else if (arg == "-family")
+ {
+ printf("%d\n", cpuInfo.family());
+ }
+ else if (arg == "-model")
+ {
+ printf("%d\n", cpuInfo.model());
+ }
+ else if (arg == "-stepping")
+ {
+ printf("%d\n", cpuInfo.stepping());
+ }
+ else if (arg == "-features")
+ {
+ for (auto &f : cpuInfo.featureSet() )
+ {
+ printf(" %s", cpuInfo.featureString(f).c_str());
+ }
+ printf(" \n"); // extra space so we can grep output for " <feature> " in CMake
+ }
+ else if (arg == "-topology")
+ {
+ // Undocumented debug option, usually not present in standalone version
+ for (auto &t : cpuInfo.logicalProcessors() )
+ {
+ printf("%3u %3u %3u\n", t.socket, t.core, t.hwThread);
+ }
+ }
+ return 0;
+}
+#endif
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ * \brief
+ * Declares gmx::CpuInfo
+ *
+ * \author Erik Lindahl <erik.lindahl@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_hardware
+ */
+#ifndef GMX_HARDWARE_CPUINFO_H
+#define GMX_HARDWARE_CPUINFO_H
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+namespace gmx
+{
+
+/*! \libinternal \brief Detect CPU capabilities and basic logical processor info
+ *
+ * This class provides a lot of information about x86 CPUs, and some very
+ * limited information about other hardware. The logical processor information
+ * is only available on x86, and is used as a fallback implementation in
+ * the HardwareTopology class.
+ * If you actually need information about the hardware topology, use the much
+ * more general implementation in the HardwareTopology class instead, since
+ * that will both be more portable and contain more information.
+ *
+ * \ingroup module_hardware
+ */
+class CpuInfo
+{
+
+ public:
+
+ /*! \brief Amount of cpu information present (incremental) */
+ enum class SupportLevel
+ {
+ None, //!< No cpu information whatsoever. Sorry.
+ Name, //!< Only vendor and/or brand is set
+ Features, //!< Some features are set
+ LogicalProcessorInfo //!< Everything includling logical processor information
+ };
+
+ /*! \brief Processor/system vendors */
+ enum class Vendor
+ {
+ Unknown, //!< Unidentified
+ Intel, //!< GenuineIntel
+ Amd, //!< AuthenticAMD
+ Fujitsu, //!< Only works on Linux (parsed from /proc/cpuinfo)
+ Ibm, //!< Only works on Linux (parsed from /proc/cpuinfo)
+ Arm, //!< Only works on Linux (parsed from /proc/cpuinfo)
+ };
+
+ /*! \brief List of CPU features
+ *
+ * These values can be used as arguments to the feature() method
+ * to check whether a specific feature was found on the CPU we are
+ * running on.
+ */
+ enum class Feature
+ {
+ X86_Aes, //!< x86 advanced encryption standard accel.
+ X86_Apic, //!< APIC support
+ X86_Avx, //!< Advanced vector extensions
+ X86_Avx2, //!< AVX2 including gather support (not used yet)
+ X86_Avx512F, //!< Foundation AVX-512 instructions
+ X86_Avx512PF, //!< Extended gather/scatter for AVX-512
+ X86_Avx512ER, //!< AVX-512 exponential and recpirocal extensions
+ X86_Avx512CD, //!< Memory conflict-detection for AVX-512
+ X86_Avx512BW, //!< AVX-512 byte and word instructions
+ X86_Avx512VL, //!< AVX-512 vector length extensions
+ X86_Clfsh, //!< Supports CLFLUSH instruction
+ X86_Cmov, //!< Conditional move insn support
+ X86_Cx8, //!< Supports CMPXCHG8B (8-byte compare-exchange)
+ X86_Cx16, //!< Supports CMPXCHG16B (16-byte compare-exchg)
+ X86_F16C, //!< Supports 16-bit FP conversion instructions
+ X86_Fma, //!< Fused-multiply add support (mainly for AVX)
+ X86_Fma4, //!< 4-operand FMA, only on AMD for now
+ X86_Hle, //!< Hardware lock elision
+ X86_Htt, //!< Hyper-Threading supported (but maybe not enabled)
+ X86_Lahf, //!< LAHF/SAHF support in 64 bits
+ X86_MisalignSse, //!< Support for misaligned SSE data instructions
+ X86_Mmx, //!< MMX registers and instructions
+ X86_Msr, //!< Supports Intel model-specific-registers
+ X86_NonstopTsc, //!< Invariant TSC (constant rate in ACPI states)
+ X86_Pcid, //!< Process context identifier support
+ X86_Pclmuldq, //!< Carry-less 64-bit multiplication supported
+ X86_Pdcm, //!< Perfmon and Debug Capability
+ X86_PDPE1GB, //!< Support for 1GB pages
+ X86_Popcnt, //!< Supports the POPCNT (population count) insn
+ X86_Pse, //!< Supports 4MB-pages (page size extension)
+ X86_Rdrnd, //!< RDRAND high-quality hardware random numbers
+ X86_Rdtscp, //!< Serializing rdtscp instruction available
+ X86_Rtm, //!< Restricted transactional memory
+ X86_Sha, //!< Intel SHA extensions
+ X86_Sse2, //!< SSE 2
+ X86_Sse3, //!< SSE 3
+ X86_Sse4A, //!< SSE 4A
+ X86_Sse4_1, //!< SSE 4.1
+ X86_Sse4_2, //!< SSE 4.2
+ X86_Ssse3, //!< Supplemental SSE3
+ X86_Tdt, //!< TSC deadline timer
+ X86_X2Apic, //!< Extended xAPIC Support
+ X86_Xop, //!< AMD extended instructions, only AMD for now
+ Arm_Neon, //!< 32-bit ARM NEON
+ Arm_NeonAsimd, //!< 64-bit ARM AArch64 Advanced SIMD
+ Ibm_Qpx, //!< IBM QPX SIMD (BlueGene/Q and later)
+ Ibm_Vmx, //!< IBM VMX SIMD (Altivec on Power6 and later)
+ Ibm_Vsx, //!< IBM VSX SIMD (Power7 and later)
+ Fujitsu_HpcAce //!< Fujitsu Sparc64 HPC-ACE
+ };
+
+ /*! \libinternal \brief Entry with basic information for a single logical processor */
+ struct LogicalProcessor
+ {
+ int socket; //!< Rank of the current socket in the system
+ int core; //!< Rank of the current core in its socket
+ int hwThread; //!< Rank of logical processor in its core
+ };
+
+ public:
+ /*! \brief Perform detection and construct a CpuInfo class from the results.
+ *
+ * \note The detection should generally be performed again in different
+ * contexts. This might seem like overkill, but there
+ * are systems (e.g. Arm) where processors can go completely offline
+ * during deep sleep, so at least in theory it is good to have a
+ * possibility of forcing re-detection if necessary.
+ */
+ static CpuInfo detect();
+
+ /*! \brief Check what cpu information is available
+ *
+ * The amount of cpu information that can be detected depends on the
+ * OS, compiler, and CPU, and on non-x86 platforms it can be fragile.
+ * Before basing decisions on the output or warning the user about
+ * optimizations, you want to check whether it was possible to detect
+ * the information you need.
+ */
+ SupportLevel
+ supportLevel() const { return supportLevel_; }
+
+ /*! \brief Enumerated value for vendor */
+ Vendor
+ vendor() const { return vendor_; }
+
+ /*! \brief String description of vendor:
+ *
+ * \throws std::out_of_range if the vendor is not present in the internal
+ * map of vendor names. This can only happen if we extend the enum
+ * type but forget to add the string with the vendor name.
+ */
+ const std::string &
+ vendorString() const
+ {
+ return s_vendorStrings_.at(vendor_);
+ }
+
+ /*! \brief String description of processor */
+ const std::string &
+ brandString() const { return brandString_; }
+
+ /*! \brief Major version/generation of the processor */
+ int
+ family() const { return family_; }
+
+ /*! \brief Middle version of the processor */
+ int
+ model() const { return model_; }
+
+ /*! \brief Minor version of the processor */
+ int
+ stepping() const { return stepping_; }
+
+ /*! \brief Check for availability of specific feature
+ *
+ * \param f feature to query support for
+ *
+ * \return True if the feature is available, otherwise false.
+ */
+ bool
+ feature(Feature f) const
+ {
+ // If the entry is present in the set it is supported
+ return (features_.count(f) != 0);
+ }
+
+ /*! \brief String description of a specific feature
+ *
+ * \throws std::out_of_range if the feature is not present in the internal
+ * map of feature names. This can only happen if we extend the enum
+ * type but forget to add the string with the feature name.
+ */
+ static const std::string &
+ featureString(Feature f)
+ {
+ return s_featureStrings_.at(f);
+ }
+
+ /*! \brief Set of all supported features on this processor
+ *
+ * This is only intended for logfiles, debugging or similar output when we
+ * need a full list of all the features available on the CPU.
+ */
+ const std::set<Feature> &
+ featureSet() const
+ {
+ return features_;
+ }
+
+ /*! \brief Reference to processing unit topology
+ *
+ * Only a few systems (x86) provide logical processor information in cpuinfo.
+ * This method returns a reference to a vector, whose length will either be
+ * zero (if topology information is not available) or the number of enabled
+ * processing units, as defined by the operating system. In the latter
+ * case, each entry will contain information about the relative rank in the
+ * core and socket of this hardware thread.
+ *
+ * This is only meant to be use as a fallback implementation for our
+ * HardwareTopology class; any user code that needs access to hardware
+ * topology information should use that class instead.
+ *
+ * \note For clarity, it is likely better to use the supportLevel()
+ * method to check if this information is available rather than
+ * relying on the length of the vector.
+ */
+ const std::vector<LogicalProcessor> &
+ logicalProcessors() const { return logicalProcessors_; }
+
+ private:
+ CpuInfo();
+
+ SupportLevel supportLevel_; //!< Available cpuinfo information
+ Vendor vendor_; //!< Value of vendor for current cpu
+ std::string brandString_; //!< Text description of cpu
+ int family_; //!< Major version of current cpu
+ int model_; //!< Middle version of current cpu
+ int stepping_; //!< Minor version of current cpu
+ std::set<Feature> features_; //!< Set of features supported on this cpu
+ std::vector<LogicalProcessor> logicalProcessors_; //!< Simple logical processor topology
+ static const std::map<Vendor, std::string> s_vendorStrings_; //!< Text description of each vendor
+ static const std::map<Feature, std::string> s_featureStrings_; //!< Text description of each feature
+}; // class CpuInfo
+
+/*! \brief Return true if the CPU is an Intel x86 Nehalem
+ *
+ * \param cpuInfo Object with cpu information
+ *
+ * \returns True if running on Nehalem CPU
+ */
+bool
+cpuIsX86Nehalem(const CpuInfo &cpuInfo);
+
+} // namespace gmx
+
+#endif // GMX_HARDWARE_CPUINFO_H
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+/*! \internal \file
+ * \brief
+ * Implements gmx::HardwareTopology.
+ *
+ * \author Erik Lindahl <erik.lindahl@gmail.com>
+ * \ingroup module_hardware
+ */
+
+#include "gmxpre.h"
+
+#include "hardwaretopology.h"
+
+#include "config.h"
+
+#include <algorithm>
+#include <vector>
+
+#include <thread>
+
+#include "gromacs/hardware/cpuinfo.h"
+
+#ifdef HAVE_UNISTD_H
+# include <unistd.h> // sysconf()
+#endif
+#ifdef GMX_NATIVE_WINDOWS
+# include <windows.h> // GetSystemInfo()
+#endif
+
+namespace gmx
+{
+
+namespace
+{
+
+
+/*! \brief Initialize machine data from basic information in cpuinfo
+ *
+ * \param cpuInfo CpuInfo object
+ * \param machine Machine tree structure where information will be assigned
+ * if the cpuinfo object contains topology information.
+ */
+void
+parseFromCpuInfo(const gmx::CpuInfo &cpuInfo,
+ HardwareTopology::Machine * machine)
+{
+ if (cpuInfo.logicalProcessors().size() > 0)
+ {
+ int nSockets = 0;
+ int nCores = 0;
+ int nHwThreads = 0;
+
+ // Copy the logical processor information from cpuinfo
+ for (auto &l : cpuInfo.logicalProcessors())
+ {
+ machine->logicalProcessors.push_back( { l.socket, l.core, l.hwThread } );
+ nSockets = std::max(nSockets, l.socket);
+ nCores = std::max(nCores, l.core);
+ nHwThreads = std::max(nHwThreads, l.hwThread);
+ }
+
+ // Resize all arrays for sockets/cores/hwthreads properly
+ machine->sockets.resize(nSockets + 1);
+ for (auto &s : machine->sockets)
+ {
+ s.cores.resize(nCores + 1);
+ for (auto &c : s.cores)
+ {
+ c.hwThreads.resize(nHwThreads + 1);
+ }
+ }
+
+ // Fill the logical processor id in the right place
+ for (std::size_t i = 0; i < machine->logicalProcessors.size(); i++)
+ {
+ const HardwareTopology::LogicalProcessor &l = machine->logicalProcessors[i];
+ machine->sockets[l.socket].cores[l.core].hwThreads[l.hwThread].logicalProcessorId = static_cast<int>(i);
+ }
+ machine->logicalProcessorCount = machine->logicalProcessors.size();
+ }
+
+}
+
+/*! \brief Try to detect the number of logical processors.
+ *
+ * \return The number of hardware processing units, or 0 if it fails.
+ */
+int
+detectLogicalProcessorCount()
+{
+ // Try to use std::thread::hardware_concurrency() first. This result is only
+ // a hint, and it might be 0 if the information is not available.
+ // On Apple this will not compile with gcc-4.6, and since it just returns 0 on other
+ // platforms too we skip it entirely for gcc < 4.7
+#if defined __GNUC__ && (__GNUC__ == 4 && __GNUC_MINOR__ < 7)
+ int count = 0;
+#else
+ int count = std::thread::hardware_concurrency();
+#endif
+
+ if (count == 0)
+ {
+#if defined GMX_NATIVE_WINDOWS
+ // Windows
+ SYSTEM_INFO sysinfo;
+ GetSystemInfo( &sysinfo );
+ count = sysinfo.dwNumberOfProcessors;
+#elif defined HAVE_SYSCONF
+ // We are probably on Unix. Check if we have the argument to use before executing the call
+# if defined(_SC_NPROCESSORS_CONF)
+ count = sysconf(_SC_NPROCESSORS_CONF);
+# elif defined(_SC_NPROC_CONF)
+ count = sysconf(_SC_NPROC_CONF);
+# elif defined(_SC_NPROCESSORS_ONLN)
+ count = sysconf(_SC_NPROCESSORS_ONLN);
+# elif defined(_SC_NPROC_ONLN)
+ count = sysconf(_SC_NPROC_ONLN);
+# endif // End of check for sysconf argument values
+
+#else
+ count = 0; // Neither windows nor Unix, and std::thread_hardware_concurrency() failed.
+#endif
+ }
+ return count;
+}
+
+} // namespace anonymous
+
+// static
+HardwareTopology HardwareTopology::detect()
+{
+ HardwareTopology result;
+
+ CpuInfo cpuInfo(CpuInfo::detect());
+
+ if (cpuInfo.logicalProcessors().size() > 0)
+ {
+ // There is topology information in cpuInfo
+ parseFromCpuInfo(cpuInfo, &result.machine_);
+ result.supportLevel_ = SupportLevel::Basic;
+ }
+ else
+ {
+ // No topology information; try to detect the number of logical processors at least
+ result.machine_.logicalProcessorCount = detectLogicalProcessorCount();
+ if (result.machine_.logicalProcessorCount > 0)
+ {
+ result.supportLevel_ = SupportLevel::LogicalProcessorCount;
+ }
+ }
+
+ return result;
+}
+
+
+HardwareTopology::HardwareTopology()
+ : supportLevel_(SupportLevel::None)
+{
+}
+
+} // namespace gmx
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ * \brief
+ * Declares gmx::HardwareTopology
+ *
+ * \author Erik Lindahl <erik.lindahl@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_hardware
+ */
+#ifndef GMX_HARDWARE_HARDWARETOPOLOGY_H
+#define GMX_HARDWARE_HARDWARETOPOLOGY_H
+
+#include <vector>
+
+#include "gromacs/hardware/cpuinfo.h"
+
+
+namespace gmx
+{
+
+/*! \libinternal \brief Information about sockets, cores, hardware threads, numa, and caches
+ *
+ * This class is the main GROMACS interface to provide information about the
+ * hardware of the system we are running on. Internally, it uses either
+ * hwloc for full or almost-full information, or a fallback implementation
+ * that relies on CpuInfo on x86.
+ *
+ * You should always use this class (rather than CpuInfo directly) to query
+ * the hardware layout in user code. Note that you cannot rely on any
+ * information being present, but you must check with the supportLevel()
+ * method before trying to access any information.
+ */
+class HardwareTopology
+{
+ public:
+
+ /*! \brief Amount of topology information present (incremental) */
+ enum class SupportLevel
+ {
+ None, //!< No hardware information whatsoever. Sorry.
+ LogicalProcessorCount, //!< Only machine().logicalProcessorCount is valid
+ Basic, //!< Socket, core and hardware thread info
+ Full, //!< Cache, memory and numa node info
+ FullWithDevices //!< Information about devices on the PCI bus
+ };
+
+ // For now the structures describing the machine are very basic, but they
+ // will grow to include e.g. cache and core-group information in the future.
+
+ /*! \libinternal \brief Information about a single hardware thread in a core */
+ struct HWThread
+ {
+ int logicalProcessorId; //!< Id of the operating system logical processor
+ };
+
+ /*! \libinternal \brief Information about a single core in a socket */
+ struct Core
+ {
+ std::vector<HWThread> hwThreads; //!< All the hardware threads in this core
+ };
+
+ /*! \libinternal \brief Information about a single core in the system */
+ struct Socket
+ {
+ std::vector<Core> cores; //!< All the cores in this socket
+ };
+
+ /*! \libinternal \brief Information about socket, core and hwthread for a logical processor */
+ struct LogicalProcessor
+ {
+ int socket; //!< Index of socket in machine
+ int core; //!< Index of core in socket
+ int hwThread; //!< Index of hardware thread in core
+ };
+
+ /*! \libinternal \brief Hardware topology information about the entire machine
+ *
+ * The machine structure is a tree with top-down information about all
+ * sockets, cores, and hardware threads in the system. For example, an
+ * operating system logical processor index can be found as
+ * machine.socket[0].core[1].hwthread[2].logicalProcessorId.
+ * In some cases you might need the opposite lookup, i.e. the physical
+ * hardware data for a specific logical processor. This is present in the
+ * logicalProcessor vector for convenience.
+ *
+ * \note The logicalProcessor vector will only have non-zero length if the
+ * support level is SupportLevel::Basic or higher. You cannot use the
+ * size of this vector to query the number of logical processors on
+ * lower support levels.
+ */
+ struct Machine
+ {
+ int logicalProcessorCount; //!< Number of logical processors in system
+ std::vector<LogicalProcessor> logicalProcessors; //!< Map logical processors to socket/core
+ std::vector<Socket> sockets; //!< All the sockets in the system
+ };
+
+ public:
+
+ /*! \brief Detects the hardware topology.
+ */
+ static HardwareTopology detect();
+
+ /*! \brief Check what topology information that is available and valid
+ *
+ * The amount of hardware topology information that can be detected depends
+ * on both the hardware and whether GROMACS was linked with the external
+ * hwloc library. You cannot assume that any information is present,
+ * although we can almost always provide the number of logical processors.
+ * On x86 we can usually get basic information about how sockets, cores
+ * and hardware threads are ordered even without hwloc.
+ * With the hwloc library we can usually also get information about cache,
+ * memory and concepts such as core groups and ccNUMA nodes.
+ * Finally, if hwloc was built with support for libpci we can also
+ * detect how the PCI devices are connected.
+ */
+ SupportLevel
+ supportLevel() const { return supportLevel_; }
+
+ /*! \brief Return the machine topology tree
+ *
+ * You can always call this routine, but be aware that some or all contents
+ * will not be valid unless supportLevel() returns a sufficient level.
+ *
+ * - With SupportLevel::LogicalProcessorCount, only the field
+ * machine.logicalProcessorCount is valid.
+ * - With SupportLevel::Basic, you can access the vectors of sockets,
+ * cores, and hardware threads, and query what logical processorId
+ * each hardware thread corresponds to.
+ * - SupportLevel::Full adds cache, memory and ccNUMA information.
+ * - SupportLevel::FullWithDevices also adds the PCI express bus.
+ *
+ * While data that is not valid has been initialized to special values,
+ * you should not rely on those but query the supportLevel() method before
+ * accessing it.
+ */
+ const Machine &
+ machine() const { return machine_; }
+
+ private:
+
+ HardwareTopology();
+
+ SupportLevel supportLevel_; //!< Available topology information
+ Machine machine_; //!< The machine map
+};
+
+}
+
+#endif // GMX_HARDWARE_HARDWARETOPOLOGY_H
--- /dev/null
+#
+# This file is part of the GROMACS molecular simulation package.
+#
+# Copyright (c) 2015, by the GROMACS development team, led by
+# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+# and including many others, as listed in the AUTHORS file in the
+# top-level source directory and at http://www.gromacs.org.
+#
+# GROMACS is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public License
+# as published by the Free Software Foundation; either version 2.1
+# of the License, or (at your option) any later version.
+#
+# GROMACS is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with GROMACS; if not, see
+# http://www.gnu.org/licenses, or write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# If you want to redistribute modifications to GROMACS, please
+# consider that scientific software is very special. Version
+# control is crucial - bugs must be traceable. We will be happy to
+# consider code for inclusion in the official distribution, but
+# derived work must not be called official GROMACS. Details are found
+# in the README & COPYING files - if they are missing, get the
+# official version at http://www.gromacs.org.
+#
+# To help us fund GROMACS development, we humbly ask that you cite
+# the research papers on the package. Check out http://www.gromacs.org.
+
+gmx_add_unit_test(HardwareUnitTests hardware-test
+ cpuinfo.cpp
+ hardwaretopology.cpp)
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief
+ * Tests for gmx::CpuInfo
+ *
+ * \author Erik Lindahl <erik.lindahl@gmail.com>
+ * \ingroup module_hardware
+ */
+#include "gmxpre.h"
+
+#include "gromacs/hardware/cpuinfo.h"
+
+#include "config.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+TEST(CpuInfoTest, SupportLevel)
+{
+ // There is no way we can compare to any reference data since that
+ // depends on the architecture, but we can at least make sure that it
+ // works to execute the tests
+
+ gmx::CpuInfo c(gmx::CpuInfo::detect());
+
+ std::string commonMsg =
+ "\nGROMACS might still work, but it will likely hurt your performance."
+ "\nPlease mail gmx-developers@gromacs.org so we can try to fix it.";
+
+ // It is not the end of the world if any of these tests fail (Gromacs will
+ // work fine without cpuinfo), but we might as well flag it so we add it to
+ // our detection code
+ EXPECT_GT(c.supportLevel(), gmx::CpuInfo::SupportLevel::None)
+ << "No CPU information at all could be detected. " << commonMsg << std::endl;
+
+#if defined __powerpc__ || defined __ppc__ || defined __PPC__ || defined __arm__ || defined __arm || defined GMX_TARGET_X86
+ EXPECT_GE(c.supportLevel(), gmx::CpuInfo::SupportLevel::Features)
+ << "No CPU features could be detected. " << commonMsg << std::endl;
+#endif
+
+ if (c.supportLevel() >= gmx::CpuInfo::SupportLevel::LogicalProcessorInfo)
+ {
+ // Make sure assigned numbers are reasonable if we have them
+ for (auto &l : c.logicalProcessors())
+ {
+ EXPECT_GE(l.socket, 0) << "Impossible socket index for logical processor. " << commonMsg << std::endl;
+ EXPECT_GE(l.core, 0) << "Impossible core index for logical processor. " << commonMsg << std::endl;
+ EXPECT_GE(l.hwThread, 0) << "Impossible hwthread index for logical processor. " << commonMsg << std::endl;
+ }
+ }
+}
+
+} // namespace
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief
+ * Tests for gmx::HardwareTopology
+ *
+ * \author Erik Lindahl <erik.lindahl@gmail.com>
+ * \ingroup module_hardware
+ */
+#include "gmxpre.h"
+
+#include "gromacs/hardware/hardwaretopology.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+TEST(HardwareTopologyTest, Execute)
+{
+ // There is no way we can compare to any reference data since that
+ // depends on the architecture, but we can at least make sure that it
+ // works to execute the tests
+
+ gmx::HardwareTopology hwTop(gmx::HardwareTopology::detect());
+
+ std::string commonMsg =
+ "\nGROMACS might still work, but it will likely hurt your performance."
+ "\nPlease mail gmx-developers@gromacs.org so we can try to fix it.";
+
+ // If we cannot even find the number of logical processors we want to flag it
+ EXPECT_GT(hwTop.supportLevel(), gmx::HardwareTopology::SupportLevel::None)
+ << "Cannot determine number of processors. " << commonMsg << std::endl;
+
+ if (hwTop.supportLevel() >= gmx::HardwareTopology::SupportLevel::Basic)
+ {
+ int socketsInSystem = hwTop.machine().sockets.size();
+ int coresPerSocket = hwTop.machine().sockets[0].cores.size();
+ int hwThreadsPerCore = hwTop.machine().sockets[0].cores[0].hwThreads.size();
+
+ // Check that logical processor information is reasonable
+ for (auto &l : hwTop.machine().logicalProcessors)
+ {
+ EXPECT_TRUE(l.socket >= 0 && l.socket < socketsInSystem)
+ << "Impossible socket index for logical processor. " << commonMsg << std::endl;
+ EXPECT_TRUE(l.core >= 0 && l.core < coresPerSocket)
+ << "Impossible core index for logical processor. " << commonMsg << std::endl;
+ EXPECT_TRUE(l.hwThread >= 0 && l.hwThread < hwThreadsPerCore)
+ << "Impossible hwthread index for logical processor. " << commonMsg << std::endl;
+ }
+
+ // Double-check that the tree is self-consistent with logical processor info
+ for (int s = 0; s < socketsInSystem; s++)
+ {
+ for (int c = 0; c < coresPerSocket; c++)
+ {
+ for (int t = 0; t < hwThreadsPerCore; t++)
+ {
+ int idx = hwTop.machine().sockets[s].cores[c].hwThreads[t].logicalProcessorId;
+ EXPECT_LT(idx, hwTop.machine().logicalProcessorCount)
+ << "Inconsistent logical processor index hardware topology. " << commonMsg << std::endl;
+ EXPECT_EQ(hwTop.machine().logicalProcessors[idx].socket, s)
+ << "Inconsistent socket index hardware topology. " << commonMsg << std::endl;
+ EXPECT_EQ(hwTop.machine().logicalProcessors[idx].core, c)
+ << "Inconsistent core index hardware topology. " << commonMsg << std::endl;
+ EXPECT_EQ(hwTop.machine().logicalProcessors[idx].hwThread, t)
+ << "Inconsistent hwthread index hardware topology. " << commonMsg << std::endl;
+ }
+ }
+ }
+ }
+}
+
+} // namespace
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#ifndef GMX_CPUID_H_
-#define GMX_CPUID_H_
-
-#include <stdio.h>
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-} /* fixes auto-indentation problems */
-#endif
-
-
-/* Currently identifiable CPU Vendors */
-enum gmx_cpuid_vendor
-{
- GMX_CPUID_VENDOR_CANNOTDETECT, /* Should only be used if something fails */
- GMX_CPUID_VENDOR_UNKNOWN,
- GMX_CPUID_VENDOR_INTEL,
- GMX_CPUID_VENDOR_AMD,
- GMX_CPUID_VENDOR_FUJITSU,
- GMX_CPUID_VENDOR_IBM,
- GMX_CPUID_VENDOR_ARM,
- GMX_CPUID_NVENDORS
-};
-
-
-/* CPU feature/property list, to be used as indices into the feature array of the
- * gmxcpuid_t data structure.
- *
- * To facilitate looking things up, we keep this list alphabetical.
- * The list is NOT exhaustive - we have basically added stuff that might be
- * useful in an application like Gromacs.
- *
- * AMD and Intel tend to share most architectural elements, and even if the
- * flags might have to be detected in different ways (different cpuid registers),
- * once the flag is present the functions should be identical. Unfortunately the
- * trend right now (2012) seems to be that they are diverging. This means that
- * we need to use specific flags to the compiler to maximize performance, and
- * then the binaries might not be portable between Intel and AMD as they were
- * before when we only needed to check for SSE and/or SSE2 support in Gromacs.
- */
-enum gmx_cpuid_feature
-{
- GMX_CPUID_FEATURE_CANNOTDETECT, /* Flag set if we could not detect on this CPU */
- GMX_CPUID_FEATURE_X86_AES, /* x86 advanced encryption standard accel. */
- GMX_CPUID_FEATURE_X86_APIC, /* APIC support */
- GMX_CPUID_FEATURE_X86_AVX, /* Advanced vector extensions */
- GMX_CPUID_FEATURE_X86_AVX2, /* AVX2 including gather support (not used yet) */
- GMX_CPUID_FEATURE_X86_AVX_512F, /* Foundation AVX-512 instructions */
- GMX_CPUID_FEATURE_X86_AVX_512PF, /* Extended gather/scatter for AVX-512 */
- GMX_CPUID_FEATURE_X86_AVX_512ER, /* Extended-range 1/x and /1sqrt(x) for AVX-512 */
- GMX_CPUID_FEATURE_X86_AVX_512CD, /* Memory conflict-detection for AVX-512 */
- GMX_CPUID_FEATURE_X86_CLFSH, /* Supports CLFLUSH instruction */
- GMX_CPUID_FEATURE_X86_CMOV, /* Conditional move insn support */
- GMX_CPUID_FEATURE_X86_CX8, /* Supports CMPXCHG8B (8-byte compare-exchange) */
- GMX_CPUID_FEATURE_X86_CX16, /* Supports CMPXCHG16B (16-byte compare-exchg) */
- GMX_CPUID_FEATURE_X86_F16C, /* Supports 16-bit FP conversion instructions */
- GMX_CPUID_FEATURE_X86_FMA, /* Fused-multiply add support (mainly for AVX) */
- GMX_CPUID_FEATURE_X86_FMA4, /* 4-operand FMA, only on AMD for now */
- GMX_CPUID_FEATURE_X86_HTT, /* Hyper-Threading supported */
- GMX_CPUID_FEATURE_X86_LAHF_LM, /* LAHF/SAHF support in 64 bits */
- GMX_CPUID_FEATURE_X86_MISALIGNSSE, /* Support for misaligned SSE data instructions */
- GMX_CPUID_FEATURE_X86_MMX, /* MMX registers and instructions */
- GMX_CPUID_FEATURE_X86_MSR, /* Supports Intel model-specific-registers */
- GMX_CPUID_FEATURE_X86_NONSTOP_TSC, /* Invariant TSC (constant rate in ACPI states) */
- GMX_CPUID_FEATURE_X86_PCID, /* Process context identifier support */
- GMX_CPUID_FEATURE_X86_PCLMULDQ, /* Carry-less 64-bit multiplication supported */
- GMX_CPUID_FEATURE_X86_PDCM, /* Perfmon and Debug Capability */
- GMX_CPUID_FEATURE_X86_PDPE1GB, /* Support for 1GB pages */
- GMX_CPUID_FEATURE_X86_POPCNT, /* Supports the POPCNT (population count) insn */
- GMX_CPUID_FEATURE_X86_PSE, /* Supports 4MB-pages (page size extension) */
- GMX_CPUID_FEATURE_X86_RDRND, /* RDRAND high-quality hardware random numbers */
- GMX_CPUID_FEATURE_X86_RDTSCP, /* Serializing rdtscp instruction available */
- GMX_CPUID_FEATURE_X86_SSE2, /* SSE 2 */
- GMX_CPUID_FEATURE_X86_SSE3, /* SSE 3 */
- GMX_CPUID_FEATURE_X86_SSE4A, /* SSE 4A */
- GMX_CPUID_FEATURE_X86_SSE4_1, /* SSE 4.1 */
- GMX_CPUID_FEATURE_X86_SSE4_2, /* SSE 4.2 */
- GMX_CPUID_FEATURE_X86_SSSE3, /* Supplemental SSE3 */
- GMX_CPUID_FEATURE_X86_TDT, /* TSC deadline timer */
- GMX_CPUID_FEATURE_X86_X2APIC, /* Extended xAPIC Support */
- GMX_CPUID_FEATURE_X86_XOP, /* AMD extended instructions, only AMD for now */
- GMX_CPUID_FEATURE_ARM_NEON, /* 32-bit ARM NEON */
- GMX_CPUID_FEATURE_ARM_NEON_ASIMD, /* 64-bit ARM AArch64 Advanced SIMD */
- GMX_CPUID_FEATURE_IBM_QPX, /* IBM QPX SIMD (BlueGene/Q and later) */
- GMX_CPUID_FEATURE_IBM_VMX, /* IBM VMX SIMD (Altivec on Power6 and later) */
- GMX_CPUID_FEATURE_IBM_VSX, /* IBM VSX SIMD (Power7 and later) */
- GMX_CPUID_NFEATURES
-};
-
-
-/* Currently supported SIMD instruction sets, intrinsics or other similar combinations
- * in Gromacs. There is not always a 1-to-1 correspondence with feature flags; on some AMD
- * hardware we prefer to use 128bit AVX instructions (although 256-bit ones could be executed).
- * These are listed in increasing order for sets supported by one CPU.
- * The order is only used for printing "minimum" and "maximum" suggested
- * SIMD instruction sets for nodes in a cluster, so pairs like
- * GMX_CPUID_SIMD_X86_AVX_128_FMA vs GMX_CPUID_SIMD_X86_AVX_256 which strictly
- * speaking can't be ordered are not really an issue.
- */
-enum gmx_cpuid_simd
-{
- GMX_CPUID_SIMD_CANNOTDETECT, /* Should only be used if something fails */
- GMX_CPUID_SIMD_NONE,
- GMX_CPUID_SIMD_REFERENCE,
- GMX_CPUID_SIMD_X86_SSE2,
- GMX_CPUID_SIMD_X86_SSE4_1,
- GMX_CPUID_SIMD_X86_AVX_128_FMA,
- GMX_CPUID_SIMD_X86_AVX_256,
- GMX_CPUID_SIMD_X86_AVX2_256,
- GMX_CPUID_SIMD_X86_AVX_512F,
- GMX_CPUID_SIMD_X86_AVX_512ER,
- GMX_CPUID_SIMD_SPARC64_HPC_ACE,
- GMX_CPUID_SIMD_IBM_QPX,
- GMX_CPUID_SIMD_IBM_VMX,
- GMX_CPUID_SIMD_IBM_VSX,
- GMX_CPUID_SIMD_ARM_NEON,
- GMX_CPUID_SIMD_ARM_NEON_ASIMD,
- GMX_CPUID_NSIMD
-};
-
-/* Text strings corresponding to CPU vendors */
-extern const char *
-gmx_cpuid_vendor_string[GMX_CPUID_NVENDORS];
-
-/* Text strings for CPU feature indices */
-extern const char *
-gmx_cpuid_feature_string[GMX_CPUID_NFEATURES];
-
-/* Text strings for Gromacs SIMD instruction sets */
-extern const char *
-gmx_cpuid_simd_string[GMX_CPUID_NSIMD];
-
-
-/* Abstract data type with CPU detection information. Set by gmx_cpuid_init(). */
-typedef struct gmx_cpuid *
- gmx_cpuid_t;
-
-
-/* Return the SIMD instruction set GROMACS was compiled with. */
-enum gmx_cpuid_simd
-gmx_compiled_simd ();
-
-
-/* Fill the data structure by using CPU detection instructions.
- * Return 0 on success, 1 if something bad happened.
- */
-int
-gmx_cpuid_init (gmx_cpuid_t * cpuid);
-
-
-/* Return the vendor id as enumerated type. Use gmx_cpuid_vendor_string[]
- * to get the corresponding text string.
- */
-enum gmx_cpuid_vendor
-gmx_cpuid_vendor (gmx_cpuid_t cpuid);
-
-
-/* Return a constant pointer to the processor brand string. */
-const char *
-gmx_cpuid_brand (gmx_cpuid_t cpuid);
-
-
-/* Return processor family version. For a chip of version 1.2.3, this is 1 */
-int
-gmx_cpuid_family (gmx_cpuid_t cpuid);
-
-/* Return processor model version, For a chip of version 1.2.3, this is 2. */
-int
-gmx_cpuid_model (gmx_cpuid_t cpuid);
-
-/* Return processor stepping version, For a chip of version 1.2.3, this is 3. */
-int
-gmx_cpuid_stepping (gmx_cpuid_t cpuid);
-
-
-/* Check whether a particular CPUID feature is set.
- * Returns 0 if flag "feature" is not set, 1 if the flag is set.
- */
-int
-gmx_cpuid_feature (gmx_cpuid_t cpuid,
- enum gmx_cpuid_feature feature);
-
-
-/* Check whether the CPU is an Intel with Nehalem microarchitecture.
- * Return 0 if not Intel Nehalem, 1 if Intel Nehalem.
- */
-int
-gmx_cpuid_is_intel_nehalem (const gmx_cpuid_t cpuid);
-
-
-/* Return pointers to cpu topology information.
- *
- * Important: CPU topology requires more OS support than most other
- * functions in this file, including support for thread pinning to hardware.
- * This means it will not work on some platforms, including e.g. Mac OS X.
- * Thus, it is IMPERATIVE that you check the return value from this routine
- * before doing anything with the information. It is only if the return
- * value is zero that the data is valid.
- *
- * For the returned values we have:
- * - nprocessors Total number of logical processors reported by OS
- * - npackages Usually number of CPU sockets
- * - ncores_per_package Number of cores in each package
- * - nhwthreads_per_core Number of hardware threads per core; 2 for hyperthreading.
- * - package_id Array with the package index for each logical cpu
- * - core_id Array with local core index for each logical cpu
- * - hwthread_id Array with local hwthread index for each logical cpu
- * - locality_order Array with logical cpu numbers, sorted in order
- * of physical and logical locality in the system.
- *
- * All arrays are of length nprocessors.
- */
-int
-gmx_cpuid_topology(gmx_cpuid_t cpuid,
- int * nprocessors,
- int * npackages,
- int * ncores_per_package,
- int * nhwthreads_per_core,
- const int ** package_id,
- const int ** core_id,
- const int ** hwthread_id,
- const int ** locality_order);
-
-/* Enumerated values for x86 SMT enabled-status. Note that this does not refer
- * to Hyper-Threading support (that is the flag GMX_CPUID_FEATURE_X86_HTT), but
- * whether Hyper-Threading is _enabled_ and _used_ in bios right now.
- */
-enum gmx_cpuid_x86_smt
-{
- GMX_CPUID_X86_SMT_CANNOTDETECT,
- GMX_CPUID_X86_SMT_DISABLED,
- GMX_CPUID_X86_SMT_ENABLED
-};
-
-/* Returns the status of x86 SMT support. IMPORTANT: There are non-zero
- * return values for this routine that still do not indicate supported and
- * enabled smt/Hyper-Threading. You need to carefully check the return value
- * against the enumerated type values to see what you are getting.
- *
- * Long-term, this functionality will move to a new hardware topology detection
- * layer, but that will require a lot of new code and a working interface to the
- * hwloc library. Surprisingly, there is no simple way to find out that
- * Hyper-Threading is actually turned on without fully enumerating and checking
- * all the cores, which we presently can only do on Linux. This means a couple
- * of things:
- *
- * 1) If you want to know whether your CPU _supports_ Hyper-Threading in the
- * first place, check the GMX_CPUID_FEATURE_X86_HTT flag instead!
- * 2) There are several scenarios where this routine will say that it cannot
- * detect whether SMT is enabled and used right now.
- * 3) If you need support on non-Linux x86, you have to write it :-)
- * 4) Don't invest too much efforts, since this will be replaced with
- * full hardware topology detection in the future.
- * 5) Don't worry if the detection does not work. It is not a catastrophe, but
- * but we get slightly better performance on x86 if we use Hyper-Threading
- * cores in direct space, but not reciprocal space.
- *
- * Since this routine presently only supports Hyper-Threading we say X86_SMT
- * in order not to give the impression we can detect any SMT. We haven't
- * even tested the performance on other SMT implementations, so it is not
- * obvious we shouldn't use SMT there.
- *
- * Note that you can get more complete topology information from
- * gmx_cpuid_topology(), although that requires slightly more OS support.
- */
-enum gmx_cpuid_x86_smt
-gmx_cpuid_x86_smt(gmx_cpuid_t cpuid);
-
-
-/* Formats a text string (up to n characters) from the data structure.
- * The output will have max 80 chars between newline characters.
- */
-int
-gmx_cpuid_formatstring (gmx_cpuid_t cpuid,
- char * s,
- int n);
-
-
-/* Suggests a suitable gromacs SIMD based on the support in the
- * hardware.
- */
-enum gmx_cpuid_simd
-gmx_cpuid_simd_suggest (gmx_cpuid_t cpuid);
-
-
-/* Check if this binary was compiled with the same SIMD instructions as we
- * would suggest for the current hardware. Always print stats to the log file
- * if it is non-NULL, and if we don't have a match, print a warning in log
- * (if non-NULL) and if print_to_stderr!=0 also to stderr.
- * The suggested SIMD instruction set simd_suggest is obtained with
- * gmx_cpuid_simd_suggest(), but with MPI this might be different for
- * different nodes, so it shoul be passed here after parallel reduction.
- */
-int
-gmx_cpuid_simd_check (enum gmx_cpuid_simd simd_suggest,
- FILE * log,
- int print_to_stderr);
-
-
-/* Release resources used by data structure. Note that the pointer to the
- * CPU brand string will no longer be valid once this routine has been called.
- */
-void
-gmx_cpuid_done (gmx_cpuid_t cpuid);
-
-
-
-
-#ifdef __cplusplus
-}
-#endif
-
-
-#endif /* GMX_CPUID_H_ */
#ifndef HWINFO_H
#define HWINFO_H
-#include "gromacs/legacyheaders/gmx_cpuid.h"
#include "gromacs/utility/basedefinitions.h"
#ifdef __cplusplus
/* Data for our local physical node */
struct gmx_gpu_info_t gpu_info; /* Information about GPUs detected in the system */
- gmx_cpuid_t cpuid_info; /* CPUID information about CPU detected;
- NOTE: this will only detect the CPU thread 0 of the
- current process runs on. */
int ncore; /* Number of cores, will be 0 when not detected */
int nthreads_hw_avail; /* Number of hardware threads available; this number
is based on the number of CPUs reported as available
by the OS at the time of detection. */
+ // TODO: Change these to proper C++ objects when this file is no longer included in any C sources
+ struct CxxObject * pCpuInfo; /* Opaque pointer to C++ object (CxxObject does not exist) */
+ struct CxxObject * pHardwareTopology; /* Opaque pointer to C++ object (CxxObject does not exist) */
+
/* Data reduced through MPI over all physical nodes */
int nphysicalnode; /* Number of physical nodes */
int ncore_tot; /* Sum of #cores over all nodes, can be 0 */
int ngpu_compatible_min; /* Min #GPUs over all nodes */
int ngpu_compatible_max; /* Max #GPUs over all nodes */
- /* The values below are only used for printing, so here it's not an issue
- * that stricly speaking SIMD instruction sets can't be uniquely ordered.
- */
- enum gmx_cpuid_simd simd_suggest_min; /* Highest SIMD instruction set supported by all ranks */
- enum gmx_cpuid_simd simd_suggest_max; /* Highest SIMD instruction set supported by at least one rank */
+ int simd_suggest_min; /* Highest SIMD instruction set supported by all ranks */
+ int simd_suggest_max; /* Highest SIMD instruction set supported by at least one rank */
gmx_bool bIdenticalGPUs; /* TRUE if all ranks have the same type(s) and order of GPUs */
} gmx_hw_info_t;
#ifndef GMX_LISTED_FORCES_MANAGE_THREADING_H
#define GMX_LISTED_FORCES_MANAGE_THREADING_H
+#include <cstdio>
+
#include "gromacs/legacyheaders/types/forcerec.h"
#include "gromacs/topology/idef.h"
#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2014, by the GROMACS development team, led by
+# Copyright (c) 2014,2015, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
# top-level source directory and at http://www.gromacs.org.
# To help us fund GROMACS development, we humbly ask that you cite
# the research papers on the package. Check out http://www.gromacs.org.
+file(GLOB SIMD_SOURCES *.cpp)
+set(LIBGROMACS_SOURCES ${LIBGROMACS_SOURCES} ${SIMD_SOURCES} PARENT_SCOPE)
+
if (BUILD_TESTING)
add_subdirectory(tests)
endif()
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+/*! \internal \file
+ *
+ * \brief Implements SIMD architecture support query routines
+ *
+ * \author Erik Lindahl <erik.lindahl@scilifelab.se>
+ *
+ * \ingroup module_simd
+ */
+
+#include "gmxpre.h"
+
+#include "support.h"
+
+#include "config.h"
+
+#include <cstdio>
+#include <cstdlib>
+
+#include <map>
+#include <string>
+
+#include "gromacs/hardware/cpuinfo.h"
+
+namespace gmx
+{
+
+/*! \cond libapi */
+
+const std::string &
+simdString(SimdType s)
+{
+ static const std::map<SimdType, std::string> name =
+ {
+ { SimdType::None, "None" },
+ { SimdType::Reference, "Reference" },
+ { SimdType::Generic, "Generic" },
+ { SimdType::X86_Sse2, "SSE2" },
+ { SimdType::X86_Sse4_1, "SSE4.1" },
+ { SimdType::X86_Avx128Fma, "AVX_128_FMA" },
+ { SimdType::X86_Avx, "AVX_256" },
+ { SimdType::X86_Avx2, "AVX2_256" },
+ { SimdType::X86_Avx512F, "AVX_512F" },
+ { SimdType::X86_Avx512ER, "AVX_512ER" },
+ { SimdType::X86_Mic, "X86_MIC" },
+ { SimdType::Arm_Neon, "ARM_NEON" },
+ { SimdType::Arm_NeonAsimd, "ARM_NEON_ASIMD" },
+ { SimdType::Ibm_Qpx, "IBM_QPX" },
+ { SimdType::Ibm_Vmx, "IBM_VMX" },
+ { SimdType::Ibm_Vsx, "IBM_VSX" },
+ { SimdType::Fujitsu_HpcAce, "Fujitsu HPC-ACE" }
+ };
+
+ return name.at(s);
+}
+
+SimdType
+simdSuggested(const CpuInfo &c)
+{
+ SimdType suggested = SimdType::None;
+
+ if (c.supportLevel() >= CpuInfo::SupportLevel::Features)
+ {
+ switch (c.vendor())
+ {
+ case CpuInfo::Vendor::Intel:
+ if (c.feature(CpuInfo::Feature::X86_Avx2))
+ {
+ suggested = SimdType::X86_Avx2;
+ }
+ else if (c.feature(CpuInfo::Feature::X86_Avx))
+ {
+ suggested = SimdType::X86_Avx;
+ }
+ else if (c.feature(CpuInfo::Feature::X86_Sse4_1))
+ {
+ suggested = SimdType::X86_Sse4_1;
+ }
+ else if (c.feature(CpuInfo::Feature::X86_Sse2))
+ {
+ suggested = SimdType::X86_Sse2;
+ }
+ break;
+ case CpuInfo::Vendor::Amd:
+ if (c.feature(CpuInfo::Feature::X86_Avx2))
+ {
+ // When Amd starts supporting Avx2 we assume it will be 256 bits
+ suggested = SimdType::X86_Avx2;
+ }
+ else if (c.feature(CpuInfo::Feature::X86_Avx))
+ {
+ // For vanilla Avx, we should use the 128-bit FMA flavor in Amd
+ suggested = SimdType::X86_Avx128Fma;
+ }
+ else if (c.feature(CpuInfo::Feature::X86_Sse4_1))
+ {
+ suggested = SimdType::X86_Sse4_1;
+ }
+ else if (c.feature(CpuInfo::Feature::X86_Sse2))
+ {
+ suggested = SimdType::X86_Sse2;
+ }
+
+ break;
+ case CpuInfo::Vendor::Arm:
+ if (c.feature(CpuInfo::Feature::Arm_NeonAsimd))
+ {
+ suggested = SimdType::Arm_NeonAsimd;
+ }
+ else if (c.feature(CpuInfo::Feature::Arm_Neon))
+ {
+ suggested = SimdType::Arm_Neon;
+ }
+ break;
+ case CpuInfo::Vendor::Ibm:
+ if (c.feature(CpuInfo::Feature::Ibm_Vsx))
+ {
+ suggested = SimdType::Ibm_Vsx;
+ }
+ else if (c.feature(CpuInfo::Feature::Ibm_Vsx))
+ {
+ suggested = SimdType::Ibm_Vsx;
+ }
+ else if (c.feature(CpuInfo::Feature::Ibm_Qpx))
+ {
+ suggested = SimdType::Ibm_Qpx;
+ }
+ break;
+ case CpuInfo::Vendor::Fujitsu:
+ if (c.feature(CpuInfo::Feature::Fujitsu_HpcAce))
+ {
+ suggested = SimdType::Fujitsu_HpcAce;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ return suggested;
+}
+
+SimdType
+simdCompiled()
+{
+#if GMX_SIMD_X86_AVX_512ER
+ return SimdType::X86_Avx512ER;
+#elif GMX_SIMD_X86_AVX_512F
+ return SimdType::X86_Avx512F;
+#elif GMX_SIMD_X86_MIC
+ return SimdType::X86_Mic;
+#elif GMX_SIMD_X86_AVX2_256
+ return SimdType::X86_Avx2;
+#elif GMX_SIMD_X86_AVX_256
+ return SimdType::X86_Avx;
+#elif GMX_SIMD_X86_AVX_128_FMA
+ return SimdType::X86_Avx128Fma;
+#elif GMX_SIMD_X86_SSE4_1
+ return SimdType::X86_Sse4_1;
+#elif GMX_SIMD_X86_SSE2
+ return SimdType::X86_Sse2;
+#elif GMX_SIMD_ARM_NEON
+ return SimdType::Arm_Neon;
+#elif GMX_SIMD_ARM_NEON_ASIMD
+ return SimdType::Arm_NeonAsimd;
+#elif GMX_SIMD_IBM_QPX
+ return SimdType::Ibm_Qpx;
+#elif GMX_SIMD_IBM_VMX
+ return SimdType::Ibm_Vmx;
+#elif GMX_SIMD_IBM_VSX
+ return SimdType::Ibm_Vsx;
+#elif GMX_SIMD_SPARC64_HPC_ACE
+ return SimdType::Fujitsu_HpcAce;
+#elif GMX_SIMD_REFERENCE
+ return SimdType::Reference;
+#else
+ return SimdType::None;
+#endif
+}
+
+bool
+simdCheck(gmx::SimdType wanted,
+ FILE * log,
+ bool warnToStdErr)
+{
+ SimdType compiled = simdCompiled();
+
+ // Normally it is close to catastrophic if the compiled SIMD type is larger than
+ // the supported one, but AVX128Fma is an exception: AMD CPUs will (strongly) prefer
+ // AVX128Fma, but they will work fine with AVX too. Thus, make an exception for this.
+ if (compiled > wanted && !(compiled == SimdType::X86_Avx && wanted == SimdType::X86_Avx128Fma))
+ {
+ fprintf(stderr, "Warning: SIMD instructions newer than hardware. Program will likely crash.\n"
+ "SIMD instructions most likely to fit this hardware: %s\n"
+ "SIMD instructions selected at GROMACS compile time: %s\n\n",
+ simdString(wanted).c_str(),
+ simdString(compiled).c_str());
+ }
+ else
+ {
+ // This warning will also occur if compiled is X86_Avx and wanted is X86_Avx128Fma
+
+ if (log != nullptr)
+ {
+ fprintf(log, "\nBinary not matching hardware - you might be losing performance.\n"
+ "SIMD instructions most likely to fit this hardware: %s\n"
+ "SIMD instructions selected at GROMACS compile time: %s\n\n",
+ simdString(wanted).c_str(),
+ simdString(compiled).c_str());
+ }
+ if (warnToStdErr)
+ {
+ fprintf(stderr, "Compiled SIMD instructions: %s, GROMACS could use %s on this machine, which is better.\n\n",
+ simdString(compiled).c_str(),
+ simdString(wanted).c_str());
+ }
+ }
+ return (wanted == compiled);
+}
+
+/*! \endcond */
+
+}
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+#ifndef GMX_SIMD_SUPPORT_H
+#define GMX_SIMD_SUPPORT_H
+
+
+/*! \libinternal \file
+ *
+ * \brief Functions to query compiled and supported SIMD architectures
+ *
+ * \author Erik Lindahl <erik.lindahl@scilifelab.se>
+ *
+ * \inlibraryapi
+ * \ingroup module_simd
+ */
+
+#include "gromacs/hardware/cpuinfo.h"
+
+namespace gmx
+{
+
+/*! \cond libapi */
+
+/*! \brief Enumerated options for SIMD architectures */
+enum class SimdType
+{
+ None, //!< Disable all SIMD support
+ Reference, //!< Gromacs reference software SIMD
+ Generic, //!< Placeholder for future support for gcc generic SIMD
+ X86_Sse2, //!< SSE2
+ X86_Sse4_1, //!< SSE4.1
+ X86_Avx128Fma, //!< 128-bit Avx with FMA (Amd)
+ X86_Avx, //!< 256-bit Avx
+ X86_Avx2, //!< AVX2
+ X86_Avx512F, //!< AVX512F
+ X86_Avx512ER, //!< AVX512ER
+ X86_Mic, //!< Knight's corner
+ Arm_Neon, //!< 32-bit ARM NEON
+ Arm_NeonAsimd, //!< 64-bit ARM AArch64 Advanced SIMD
+ Ibm_Qpx, //!< IBM QPX SIMD (BlueGene/Q and later)
+ Ibm_Vmx, //!< IBM VMX SIMD (Altivec on Power6 and later)
+ Ibm_Vsx, //!< IBM VSX SIMD (Power7 and later)
+ Fujitsu_HpcAce //!< Fujitsu K-computer
+};
+
+/*! \libinternal \brief Return a string with the name of a SIMD type
+ *
+ * \param s SIMD type to turn into string
+ */
+const std::string &
+simdString(SimdType s);
+
+/*! \libinternal \brief Return the SIMD type that would fit this hardware best */
+SimdType
+simdSuggested(const CpuInfo &c);
+
+/*! \libinternal \brief Return the SIMD type the library was compiled with */
+SimdType
+simdCompiled();
+
+/*! \libinternal \brief Check if binary was compiled with the provided SIMD type
+ *
+ * \param s SIMD type to query. If this matches the suggested type
+ * for this cpu, the routine returns quietly.
+ * \param log If not nullptr, statistics will be printed to the file.
+ * If we do not have a match there will also be a warning.
+ * \param warnToStdErr If true, warnings will also be printed to stderr.
+ */
+bool
+simdCheck(SimdType s,
+ FILE * log,
+ bool warnToStdErr);
+
+/*! \endcond */
+
+} // namespace gmx
+
+
+#endif // GMX_SIMD_SUPPORT_H
#ifndef GMX_TABLES_FORCETABLE_H
#define GMX_TABLES_FORCETABLE_H
+#include <cstdio>
+
#include "gromacs/legacyheaders/types/fcdata.h"
#include "gromacs/legacyheaders/types/forcerec.h"
#include "gromacs/legacyheaders/types/interaction_const.h"
#include "gromacs/gmxlib/gmx_detect_hardware.h"
#include "gromacs/gmxlib/gmx_omp_nthreads.h"
#include "gromacs/gmxlib/md_logging.h"
-#include "gromacs/legacyheaders/gmx_cpuid.h"
+#include "gromacs/hardware/cpuinfo.h"
+#include "gromacs/hardware/hardwaretopology.h"
#include "gromacs/legacyheaders/types/commrec.h"
#include "gromacs/legacyheaders/types/hw_info.h"
#include "gromacs/mdtypes/inputrec.h"
/* Returns the maximum OpenMP thread count for which using a single MPI rank
* should be faster than using multiple ranks with the same total thread count.
*/
-static int nthreads_omp_faster(gmx_cpuid_t cpuid_info, gmx_bool bUseGPU)
+static int nthreads_omp_faster(const gmx::CpuInfo &cpuInfo, gmx_bool bUseGPU)
{
int nth;
- if (gmx_cpuid_vendor(cpuid_info) == GMX_CPUID_VENDOR_INTEL &&
- gmx_cpuid_feature(cpuid_info, GMX_CPUID_FEATURE_X86_AVX))
+ if (cpuInfo.vendor() == gmx::CpuInfo::Vendor::Intel &&
+ cpuInfo.feature(gmx::CpuInfo::Feature::X86_Avx))
{
nth = nthreads_omp_faster_Intel_AVX;
}
- else if (gmx_cpuid_is_intel_nehalem(cpuid_info))
+ else if (gmx::cpuIsX86Nehalem(cpuInfo))
{
+ // Intel Nehalem
nth = nthreads_omp_faster_Nehalem;
}
else
}
/* Returns that maximum OpenMP thread count that passes the efficiency check */
-static int nthreads_omp_efficient_max(int gmx_unused nrank,
- gmx_cpuid_t cpuid_info,
- gmx_bool bUseGPU)
+static int nthreads_omp_efficient_max(int gmx_unused nrank,
+ const gmx::CpuInfo &cpuInfo,
+ gmx_bool bUseGPU)
{
#if defined GMX_OPENMP && defined GMX_MPI
if (nrank > 1)
else
#endif
{
- return nthreads_omp_faster(cpuid_info, bUseGPU);
+ return nthreads_omp_faster(cpuInfo, bUseGPU);
}
}
int nthreads_tot,
int ngpu)
{
- int nrank;
+ int nrank;
+ const gmx::CpuInfo &cpuInfo = *reinterpret_cast<gmx::CpuInfo *>(hwinfo->pCpuInfo);
GMX_RELEASE_ASSERT(nthreads_tot > 0, "There must be at least one thread per rank");
nrank = nthreads_tot;
}
else if (gmx_gpu_sharing_supported() &&
- (nthreads_tot > nthreads_omp_faster(hwinfo->cpuid_info,
- ngpu > 0) ||
+ (nthreads_tot > nthreads_omp_faster(cpuInfo, ngpu > 0) ||
(ngpu > 1 && nthreads_tot/ngpu > nthreads_omp_mpi_target_max)))
{
/* The high OpenMP thread count will likely result in sub-optimal
}
else
{
- if (nthreads_tot <= nthreads_omp_faster(hwinfo->cpuid_info, ngpu > 0))
+ if (nthreads_tot <= nthreads_omp_faster(cpuInfo, ngpu > 0))
{
/* Use pure OpenMP parallelization */
nrank = 1;
#ifdef GMX_THREAD_MPI
+
+
+static bool
+gmxSmtIsEnabled(const gmx::HardwareTopology &hwTop)
+{
+ return (hwTop.supportLevel() >= gmx::HardwareTopology::SupportLevel::Basic && hwTop.machine().sockets[0].cores[0].hwThreads.size() > 1);
+}
+
/* Get the number of MPI ranks to use for thread-MPI based on how many
* were requested, which algorithms we're using,
* and how many particles there are.
FILE *fplog,
gmx_bool bUseGpu)
{
- int nthreads_hw, nthreads_tot_max, nrank, ngpu;
- int min_atoms_per_mpi_rank;
+ int nthreads_hw, nthreads_tot_max, nrank, ngpu;
+ int min_atoms_per_mpi_rank;
+
+ const gmx::CpuInfo &cpuInfo = *reinterpret_cast<gmx::CpuInfo *>(hwinfo->pCpuInfo);
+ const gmx::HardwareTopology &hwTop = *reinterpret_cast<gmx::HardwareTopology *>(hwinfo->pHardwareTopology);
/* Check if an algorithm does not support parallel simulation. */
if (inputrec->eI == eiLBFGS ||
nrank_new = std::max(1, mtop->natoms/min_atoms_per_mpi_rank);
/* Avoid partial use of Hyper-Threading */
- if (gmx_cpuid_x86_smt(hwinfo->cpuid_info) == GMX_CPUID_X86_SMT_ENABLED &&
+ if (gmxSmtIsEnabled(hwTop) &&
nrank_new > nthreads_hw/2 && nrank_new < nthreads_hw)
{
nrank_new = nthreads_hw/2;
*/
int nt_omp_max;
- nt_omp_max = nthreads_omp_efficient_max(nrank, hwinfo->cpuid_info, ngpu >= 1);
+ nt_omp_max = nthreads_omp_efficient_max(nrank, cpuInfo, ngpu >= 1);
if (nrank*nt_omp_max < hwinfo->nthreads_hw_avail)
{
}
else
{
+ const gmx::CpuInfo &cpuInfo = *reinterpret_cast<gmx::CpuInfo *>(hwinfo->pCpuInfo);
+
/* No domain decomposition (or only one domain) */
if (!(ngpu > 0 && !gmx_gpu_sharing_supported()) &&
- nth_omp_max > nthreads_omp_faster(hwinfo->cpuid_info, ngpu > 0))
+ nth_omp_max > nthreads_omp_faster(cpuInfo, ngpu > 0))
{
/* To arrive here, the user/system set #ranks and/or #OMPthreads */
gmx_bool bEnvSet;