/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
}
+int
+gmx_cpuid_is_intel_nehalem (const gmx_cpuid_t cpuid)
+{
+ return (cpuid->vendor == GMX_CPUID_VENDOR_INTEL &&
+ cpuid->family == 6 &&
+ (cpuid->model == 0x2E ||
+ cpuid->model == 0x1A ||
+ cpuid->model == 0x1E ||
+ cpuid->model == 0x2F ||
+ cpuid->model == 0x2C ||
+ cpuid->model == 0x25));
+}
/* What type of SIMD was compiled in, if any? */
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
enum gmx_cpuid_feature feature);
+/* Check whether the CPU is an Intel with Nehalem microarchitecture.
+ * Return 0 if not Intel Nehalem, 1 if Intel Nehalem.
+ */
+int
+gmx_cpuid_is_intel_nehalem (const gmx_cpuid_t cpuid);
+
+
/* Return pointers to cpu topology information.
*
* Important: CPU topology requires more OS support than most other
* two CPUs with HT, so we need a limit<16; thus we use 12.
* A reasonable limit for Intel Sandy and Ivy bridge,
* not knowing the topology, is 16 threads.
+ * Below we check for Intel and AVX, which for now includes
+ * Sandy/Ivy Bridge, Has/Broadwell. By checking for AVX instead of
+ * model numbers we ensure also future Intel CPUs are covered.
*/
const int nthreads_omp_always_faster = 4;
const int nthreads_omp_always_faster_Nehalem = 12;
- const int nthreads_omp_always_faster_SandyBridge = 16;
- const int first_model_Nehalem = 0x1A;
- const int first_model_SandyBridge = 0x2A;
- gmx_bool bIntel_Family6;
+ const int nthreads_omp_always_faster_Intel_AVX = 16;
+ gmx_bool bIntelAVX;
- bIntel_Family6 =
+ bIntelAVX =
(gmx_cpuid_vendor(hwinfo->cpuid_info) == GMX_CPUID_VENDOR_INTEL &&
- gmx_cpuid_family(hwinfo->cpuid_info) == 6);
+ gmx_cpuid_feature(hwinfo->cpuid_info, GMX_CPUID_FEATURE_X86_AVX));
if (nthreads_tot <= nthreads_omp_always_faster ||
- (bIntel_Family6 &&
- ((gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_Nehalem && nthreads_tot <= nthreads_omp_always_faster_Nehalem) ||
- (gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_SandyBridge && nthreads_tot <= nthreads_omp_always_faster_SandyBridge))))
+ ((gmx_cpuid_is_intel_nehalem(hwinfo->cpuid_info) && nthreads_tot <= nthreads_omp_always_faster_Nehalem) ||
+ (bIntelAVX && nthreads_tot <= nthreads_omp_always_faster_Intel_AVX)))
{
/* Use pure OpenMP parallelization */
nthreads_tmpi = 1;