From 54ac4012d62bf1a6f725be153346add6573a6402 Mon Sep 17 00:00:00 2001 From: Berk Hess Date: Thu, 16 Apr 2015 18:07:49 +0200 Subject: [PATCH] Correct -ntmpi auto for intel nehalem and older With thread MPI mdrun would automatically choose 1 MPI rank with up to 16 OpenMP threads on all Intel family 6 CPUs, whereas this was only intended for Sandy Bridge and later. Change-Id: I29882375c8569497b3e309de7cc66a2af4d6fa40 --- src/gromacs/gmxlib/gmx_cpuid.c | 14 +++++++++++++- src/gromacs/legacyheaders/gmx_cpuid.h | 9 ++++++++- src/programs/mdrun/runner.c | 18 +++++++++--------- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/src/gromacs/gmxlib/gmx_cpuid.c b/src/gromacs/gmxlib/gmx_cpuid.c index 020e37aa55..53923a1c9c 100644 --- a/src/gromacs/gmxlib/gmx_cpuid.c +++ b/src/gromacs/gmxlib/gmx_cpuid.c @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -225,6 +225,18 @@ gmx_cpuid_feature (gmx_cpuid_t cpuid, } +int +gmx_cpuid_is_intel_nehalem (const gmx_cpuid_t cpuid) +{ + return (cpuid->vendor == GMX_CPUID_VENDOR_INTEL && + cpuid->family == 6 && + (cpuid->model == 0x2E || + cpuid->model == 0x1A || + cpuid->model == 0x1E || + cpuid->model == 0x2F || + cpuid->model == 0x2C || + cpuid->model == 0x25)); +} /* What type of SIMD was compiled in, if any? */ diff --git a/src/gromacs/legacyheaders/gmx_cpuid.h b/src/gromacs/legacyheaders/gmx_cpuid.h index d595c51a37..9eb261cb83 100644 --- a/src/gromacs/legacyheaders/gmx_cpuid.h +++ b/src/gromacs/legacyheaders/gmx_cpuid.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -194,6 +194,13 @@ gmx_cpuid_feature (gmx_cpuid_t cpuid, enum gmx_cpuid_feature feature); +/* Check whether the CPU is an Intel with Nehalem microarchitecture. + * Return 0 if not Intel Nehalem, 1 if Intel Nehalem. + */ +int +gmx_cpuid_is_intel_nehalem (const gmx_cpuid_t cpuid); + + /* Return pointers to cpu topology information. * * Important: CPU topology requires more OS support than most other diff --git a/src/programs/mdrun/runner.c b/src/programs/mdrun/runner.c index c87a06497e..b1e52db1d7 100644 --- a/src/programs/mdrun/runner.c +++ b/src/programs/mdrun/runner.c @@ -310,22 +310,22 @@ static int get_tmpi_omp_thread_division(const gmx_hw_info_t *hwinfo, * two CPUs with HT, so we need a limit<16; thus we use 12. * A reasonable limit for Intel Sandy and Ivy bridge, * not knowing the topology, is 16 threads. + * Below we check for Intel and AVX, which for now includes + * Sandy/Ivy Bridge, Has/Broadwell. By checking for AVX instead of + * model numbers we ensure also future Intel CPUs are covered. */ const int nthreads_omp_always_faster = 4; const int nthreads_omp_always_faster_Nehalem = 12; - const int nthreads_omp_always_faster_SandyBridge = 16; - const int first_model_Nehalem = 0x1A; - const int first_model_SandyBridge = 0x2A; - gmx_bool bIntel_Family6; + const int nthreads_omp_always_faster_Intel_AVX = 16; + gmx_bool bIntelAVX; - bIntel_Family6 = + bIntelAVX = (gmx_cpuid_vendor(hwinfo->cpuid_info) == GMX_CPUID_VENDOR_INTEL && - gmx_cpuid_family(hwinfo->cpuid_info) == 6); + gmx_cpuid_feature(hwinfo->cpuid_info, GMX_CPUID_FEATURE_X86_AVX)); if (nthreads_tot <= nthreads_omp_always_faster || - (bIntel_Family6 && - ((gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_Nehalem && nthreads_tot <= nthreads_omp_always_faster_Nehalem) || - (gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_SandyBridge && nthreads_tot <= nthreads_omp_always_faster_SandyBridge)))) + ((gmx_cpuid_is_intel_nehalem(hwinfo->cpuid_info) && nthreads_tot <= nthreads_omp_always_faster_Nehalem) || + (bIntelAVX && nthreads_tot <= nthreads_omp_always_faster_Intel_AVX))) { /* Use pure OpenMP parallelization */ nthreads_tmpi = 1; -- 2.22.0