From c68d61e160d3c805a86af76b6c4d1814aaa516e5 Mon Sep 17 00:00:00 2001 From: Szilard Pall Date: Mon, 24 Sep 2012 23:54:50 +0200 Subject: [PATCH] fix internal/external OpenMP thread affinity clash Thread affinity set by the OpenMP library, either automatically or requested by the user through environment variables, can conflict with the mdrun internal affinity setting. To avoid performance degradation, as Intel OpenMP has affinity setting on by default, we will explicitly disable it unless the user manually set OpenMP affinity through one of the KMP_AFFINITY or GOMP_CPU_AFFINITY environment variables. If any of these variables is set, we honor the externally set affinity and turn off the internal one. Change-Id: I78c6347154d6f11695ee04243db17bbb2e5cb0a7 --- include/gmx_omp.h | 7 +++ src/gmxlib/gmx_omp.c | 110 +++++++++++++++++++++++++++++++++++++++++++ src/kernel/runner.c | 33 ++++++++----- 3 files changed, 138 insertions(+), 12 deletions(-) diff --git a/include/gmx_omp.h b/include/gmx_omp.h index de55a43bf5..01e5208360 100644 --- a/include/gmx_omp.h +++ b/include/gmx_omp.h @@ -25,6 +25,9 @@ #ifndef GMX_OMP_H #define GMX_OMP_H +#include "types/commrec.h" +#include "mdrun.h" + /* This module defines wrappers for OpenMP API functions and enables compiling * code even when OpenMP is turned off in the build system. * Therefore, OpenMP API functions should always be used through these wrappers @@ -49,4 +52,8 @@ int gmx_omp_get_thread_num(void); * by a num_threads clause. Acts as a wrapper for omp_get_max_threads(void). */ void gmx_omp_set_num_threads(int num_threads); +/*! Check for externally set thread affinity to avoid conflicts with GROMACS internal setting. */ +void gmx_omp_check_thread_affinity(FILE *fplog, const t_commrec *cr, + gmx_hw_opt_t *hw_opt); + #endif /* GMX_OMP_H */ diff --git a/src/gmxlib/gmx_omp.c b/src/gmxlib/gmx_omp.c index e5c1f4540b..29622e9a7e 100644 --- a/src/gmxlib/gmx_omp.c +++ b/src/gmxlib/gmx_omp.c @@ -30,6 +30,12 @@ #include #endif +#include + +#include "md_logging.h" +#include "gmx_fatal.h" +#include "statutil.h" +#include "string2.h" #include "gmx_omp.h" int gmx_omp_get_max_threads(void) @@ -67,3 +73,107 @@ void gmx_omp_set_num_threads(int num_threads) return; #endif } + +/*! + * Thread affinity set by the OpenMP library can conflict with the GROMACS + * internal affinity setting. + * + * While GNU OpenMP does not set affinity by default, the Intel OpenMP library + * does. This conflicts with the internal affinity (especially thread-MPI) + * setting, results in incorrectly locked threads, and causes dreadful performance. + * + * The KMP_AFFINITY environment variable is used by Intel, GOMP_CPU_AFFINITY + * by the GNU compilers (Intel also honors it well). If any of the variables + * is set, we honor it, disable the internal pinning, and warn the user. + * When using Intel OpenMP, we will disable affinity if the user did not set it + * anually through one of the aforementioned environment variables. + * + * Note that the Intel OpenMP affinity disabling iwll only take effect if this + * function is called before the OpenMP library gets initialized which happens + * when the first call is made into a compilation unit that contains OpenMP + * pragmas. + */ +void gmx_omp_check_thread_affinity(FILE *fplog, const t_commrec *cr, + gmx_hw_opt_t *hw_opt) +{ + gmx_bool bKmpAffinitySet, bGompCpuAffinitySet; + char *kmp_env, *gomp_env; + + /* no need to worry if internal thread pinning is turned off */ + if (!hw_opt->bThreadPinning) + { + return; + } + +#if defined(GMX_OPENMP) + + /* We assume that the affinity setting is available on all platforms + * gcc supports. Even if this is not the case (e.g. Mac OS) the user + * will only get a warning.*/ + bGompCpuAffinitySet = FALSE; + gomp_env = NULL; +#if defined(__GNUC__) + gomp_env = getenv("GOMP_CPU_AFFINITY"); + bGompCpuAffinitySet = (gomp_env != NULL); +#endif /* __GNUC__ */ + + bKmpAffinitySet = FALSE; +#if defined(__INTEL_COMPILER) + kmp_env = getenv("KMP_AFFINITY"); + bKmpAffinitySet = (kmp_env != NULL); + + /* disable Intel OpenMP affinity if neither KMP_AFFINITY nor + * GOMP_CPU_AFFINITY is set (Intel uses the GNU env. var as well) */ + if (!bKmpAffinitySet && !bGompCpuAffinitySet) + { + int retval; + +#ifdef _MSC_VER + /* Windows not POSIX */ + retval = _putenv_s("KMP_AFFINITY", "disabled"); +#else + /* POSIX */ + retval = setenv("KMP_AFFINITY", "disabled", 0); +#endif /* _MSC_VER */ + + if (debug) + { + fprintf(debug, "Disabling Intel OpenMP affinity by setting the KMP_AFFINITY=disabled env. var.\n"); + } + + if (retval != 0) + { + gmx_warning("Disabling Intel OpenMp affinity setting failed!"); + } + } + + /* turn off internal pinning KMP_AFFINITY != "disabled" */ + if (bKmpAffinitySet && (gmx_strncasecmp(kmp_env, "disabled", 8) != 0)) + { + md_print_warn(cr, fplog, "WARNING: KMP_AFFINITY set, will turn off %s internal affinity\n" + " setting as the two can conflict and cause performance degradation.\n" + " To keep using the %s internal affinity setting, set the\n" + " KMP_AFFINITY=disabled environment variable.", + ShortProgram(), ShortProgram()); + + hw_opt->bThreadPinning = FALSE; + } +#endif /* __INTEL_COMPILER */ + +#if defined(__INTEL_COMPILER) || defined(__GNUC__) + /* turn off internal pinning f GOMP_CPU_AFFINITY is set & non-empty */ + if (bGompCpuAffinitySet && gomp_env != NULL && gomp_env != '\0') + { + md_print_warn(cr, fplog, + "WARNING: GOMP_CPU_AFFINITY set, will turn off %s internal affinity\n" + " setting as the two can conflict and cause performance degradation.\n" + " To keep using the %s internal affinity setting, unset the\n" + " GOMP_CPU_AFFINITY environment variable.", + ShortProgram(), ShortProgram()); + + hw_opt->bThreadPinning = FALSE; + } +#endif /* __INTEL_COMPILER || __GNUC__ */ + +#endif /* GMX_OPENMP */ +} diff --git a/src/kernel/runner.c b/src/kernel/runner.c index 2c825d458e..7599238dc5 100644 --- a/src/kernel/runner.c +++ b/src/kernel/runner.c @@ -263,7 +263,8 @@ static t_commrec *mdrunner_start_threads(gmx_hw_opt_t *hw_opt, fflush(stderr); /* now spawn new threads that start mdrunner_start_fn(), while the main thread returns */ - ret=tMPI_Init_fn(TRUE, hw_opt->nthreads_tmpi, TMPI_AFFINITY_ALL_CORES, + ret=tMPI_Init_fn(TRUE, hw_opt->nthreads_tmpi, + (hw_opt->bThreadPinning ? TMPI_AFFINITY_ALL_CORES : TMPI_AFFINITY_NONE), mdrunner_start_fn, (void*)(mda) ); if (ret!=TMPI_SUCCESS) return NULL; @@ -734,11 +735,21 @@ static void convert_to_verlet_scheme(FILE *fplog, */ static void set_cpu_affinity(FILE *fplog, const t_commrec *cr, - const gmx_hw_opt_t *hw_opt, + gmx_hw_opt_t *hw_opt, int nthreads_pme, const gmx_hw_info_t *hwinfo, const t_inputrec *inputrec) { +#if defined GMX_THREAD_MPI + /* With the number of TMPI threads equal to the number of cores + * we already pinned in thread-MPI, so don't pin again here. + */ + if (hw_opt->nthreads_tmpi == tMPI_Thread_get_hw_number()) + { + return; + } +#endif + #ifdef GMX_OPENMP /* TODO: actually we could do this even without OpenMP?! */ #ifdef __linux /* TODO: only linux? why not everywhere if sched_setaffinity is available */ if (hw_opt->bThreadPinning) @@ -1135,6 +1146,12 @@ int mdrunner(gmx_hw_opt_t *hw_opt, } } + /* Check for externally set OpenMP affinity and turn off internal + * pinning if any is found. We need to do this check early to tell + * thread-MPI whether it should do pinning when spawning threads. + */ + gmx_omp_check_thread_affinity(fplog, cr, hw_opt); + #ifdef GMX_THREAD_MPI if (SIMMASTER(cr)) { @@ -1581,16 +1598,8 @@ int mdrunner(gmx_hw_opt_t *hw_opt, snew(pmedata,1); } -#if defined GMX_THREAD_MPI - /* With the number of TMPI threads equal to the number of cores - * we already pinned in thread-MPI, so don't pin again here. - */ - if (hw_opt->nthreads_tmpi != tMPI_Thread_get_hw_number()) -#endif - { - /* Set the CPU affinity */ - set_cpu_affinity(fplog,cr,hw_opt,nthreads_pme,hwinfo,inputrec); - } + /* Set the CPU affinity */ + set_cpu_affinity(fplog,cr,hw_opt,nthreads_pme,hwinfo,inputrec); /* Initiate PME if necessary, * either on all nodes or on dedicated PME nodes only. */ -- 2.22.0