/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012, by the GROMACS development team, led by
- * David van der Spoel, Berk Hess, Erik Lindahl, and including many
- * others, as listed in the AUTHORS file in the top-level source
- * directory and at http://www.gromacs.org.
+ * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
*
* GROMACS is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-#if defined(HAVE_SCHED_H) && defined(HAVE_SCHED_GETAFFINITY)
-#define _GNU_SOURCE
-#include <sched.h>
-#include <sys/syscall.h>
+#include "gmxpre.h"
+
+#include "config.h"
+
+#ifdef HAVE_SCHED_AFFINITY
+# ifndef _GNU_SOURCE
+# define _GNU_SOURCE 1
+# endif
+# include <sched.h>
+# include <sys/syscall.h>
#endif
-#include <string.h>
-#include <errno.h>
+
+#include "gromacs/legacyheaders/gmx_thread_affinity.h"
+
#include <assert.h>
+#include <errno.h>
#include <stdio.h>
-#include "typedefs.h"
-#include "types/commrec.h"
-#include "types/hw_info.h"
-#include "gmx_cpuid.h"
-#include "gmx_omp.h"
-#include "gmx_omp_nthreads.h"
-#include "mdrun.h"
-#include "md_logging.h"
-#include "statutil.h"
-#include "gmx_thread_affinity.h"
+#include <string.h>
#include "thread_mpi/threads.h"
+#include "gromacs/legacyheaders/copyrite.h"
+#include "gromacs/legacyheaders/gmx_cpuid.h"
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/md_logging.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/legacyheaders/types/commrec.h"
+#include "gromacs/legacyheaders/types/hw_info.h"
+#include "gromacs/utility/basenetwork.h"
+#include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/gmxomp.h"
+#include "gromacs/utility/smalloc.h"
static int
get_thread_affinity_layout(FILE *fplog,
gmx_hw_opt_t *hw_opt,
const gmx_hw_info_t *hwinfo)
{
- int nth_affinity_set, thread_id_node, thread_id,
+ int nth_affinity_set, thread0_id_node,
nthread_local, nthread_node, nthread_hw_max, nphyscore;
int offset;
- /* these are inherently global properties that are shared among all threads
- */
- static const int *locality_order;
- static int rc;
- static gmx_bool have_locality_order = FALSE;
- static tMPI_Thread_mutex_t locality_order_mtx =
- TMPI_THREAD_MUTEX_INITIALIZER;
- static tMPI_Thread_cond_t locality_order_cond =
- TMPI_THREAD_COND_INITIALIZER;
+ const int *locality_order;
+ int rc;
if (hw_opt->thread_affinity == threadaffOFF)
{
"Can not set thread affinities on the current platform. On NUMA systems this\n"
"can cause performance degradation. If you think your platform should support\n"
"setting affinities, contact the GROMACS developers.");
-#endif /* __APPLE__ */
+#endif /* __APPLE__ */
return;
}
}
/* map the current process to cores */
- thread_id_node = 0;
- nthread_node = nthread_local;
+ thread0_id_node = 0;
+ nthread_node = nthread_local;
#ifdef GMX_MPI
if (PAR(cr) || MULTISIM(cr))
{
*/
MPI_Comm comm_intra;
- MPI_Comm_split(MPI_COMM_WORLD, gmx_hostname_num(), cr->rank_intranode,
+ MPI_Comm_split(MPI_COMM_WORLD,
+ gmx_physicalnode_id_hash(), cr->rank_intranode,
&comm_intra);
- MPI_Scan(&nthread_local, &thread_id_node, 1, MPI_INT, MPI_SUM, comm_intra);
+ MPI_Scan(&nthread_local, &thread0_id_node, 1, MPI_INT, MPI_SUM, comm_intra);
/* MPI_Scan is inclusive, but here we need exclusive */
- thread_id_node -= nthread_local;
+ thread0_id_node -= nthread_local;
/* Get the total number of threads on this physical node */
MPI_Allreduce(&nthread_local, &nthread_node, 1, MPI_INT, MPI_SUM, comm_intra);
MPI_Comm_free(&comm_intra);
md_print_info(cr, fplog, "Applying core pinning offset %d\n", offset);
}
- /* hw_opt is shared among tMPI threads, so for thread safety we need to do
- * the layout detection only on master as core_pinning_stride is an in-out
- * parameter and gets auto-set depending on its initial value.
- * This
- * This is not thread-safe with multi-simulations, but that's anyway not
- * supported by tMPI. */
- if (SIMMASTER(cr))
- {
- int ret;
- int i;
-
- ret = tMPI_Thread_mutex_lock(&locality_order_mtx);
- if (ret != 0)
- {
- goto locality_order_err;
- }
- rc = get_thread_affinity_layout(fplog, cr, hwinfo,
- nthread_node,
- offset, &hw_opt->core_pinning_stride,
- &locality_order);
- have_locality_order = TRUE;
- ret = tMPI_Thread_cond_broadcast(&locality_order_cond);
- if (ret != 0)
- {
- tMPI_Thread_mutex_unlock(&locality_order_mtx);
- goto locality_order_err;
- }
- ret = tMPI_Thread_mutex_unlock(&locality_order_mtx);
- if (ret != 0)
- {
- goto locality_order_err;
- }
- }
- else
- {
- int ret;
- /* all other threads wait for the locality order data. */
- ret = tMPI_Thread_mutex_lock(&locality_order_mtx);
- if (ret != 0)
- {
- goto locality_order_err;
- }
-
- while (!have_locality_order)
- {
- ret = tMPI_Thread_cond_wait(&locality_order_cond,
- &locality_order_mtx);
- if (ret != 0)
- {
- tMPI_Thread_mutex_unlock(&locality_order_mtx);
- goto locality_order_err;
- }
- }
- ret = tMPI_Thread_mutex_unlock(&locality_order_mtx);
- if (ret != 0)
- {
- goto locality_order_err;
- }
- }
+ rc = get_thread_affinity_layout(fplog, cr, hwinfo,
+ nthread_node,
+ offset, &hw_opt->core_pinning_stride,
+ &locality_order);
if (rc != 0)
{
* of threads on which we succeeded.
*/
nth_affinity_set = 0;
-#pragma omp parallel firstprivate(thread_id_node) num_threads(nthread_local) \
- reduction(+:nth_affinity_set)
+#pragma omp parallel num_threads(nthread_local) reduction(+:nth_affinity_set)
{
+ int thread_id, thread_id_node;
int index, core;
gmx_bool setaffinity_ret;
- thread_id = gmx_omp_get_thread_num();
- thread_id_node += thread_id;
- index = offset + thread_id_node*hw_opt->core_pinning_stride;
+ thread_id = gmx_omp_get_thread_num();
+ thread_id_node = thread0_id_node + thread_id;
+ index = offset + thread_id_node*hw_opt->core_pinning_stride;
if (locality_order != NULL)
{
core = locality_order[index];
if (debug)
{
- fprintf(debug, "On rank %2d, thread %2d, core %2d the affinity setting returned %d\n",
- cr->nodeid, gmx_omp_get_thread_num(), core, setaffinity_ret);
+ fprintf(debug, "On rank %2d, thread %2d, index %2d, core %2d the affinity setting returned %d\n",
+ cr->nodeid, gmx_omp_get_thread_num(), index, core, setaffinity_ret);
}
}
}
}
return;
-
-locality_order_err:
- /* any error in affinity setting shouldn't be fatal, but should generate
- a warning */
- md_print_warn(NULL, fplog,
- "WARNING: Obtaining affinity information failed due to a basic system error: %s.\n"
- " This can cause performance degradation! ",
- strerror(errno));
- return;
}
/* Check the process affinity mask and if it is found to be non-zero,
* Note that this will only work on Linux as we use a GNU feature.
*/
void
-gmx_check_thread_affinity_set(FILE *fplog, const t_commrec *cr,
- gmx_hw_opt_t *hw_opt, int ncpus,
- gmx_bool bAfterOpenmpInit)
+gmx_check_thread_affinity_set(FILE *fplog,
+ const t_commrec *cr,
+ gmx_hw_opt_t *hw_opt,
+ int gmx_unused nthreads_hw_avail,
+ gmx_bool bAfterOpenmpInit)
{
-#ifdef HAVE_SCHED_GETAFFINITY
+#ifdef HAVE_SCHED_AFFINITY
cpu_set_t mask_current;
int i, ret, cpu_count, cpu_set;
gmx_bool bAllSet;
+#endif
assert(hw_opt);
+ if (!bAfterOpenmpInit)
+ {
+ /* Check for externally set OpenMP affinity and turn off internal
+ * pinning if any is found. We need to do this check early to tell
+ * thread-MPI whether it should do pinning when spawning threads.
+ * TODO: the above no longer holds, we should move these checks later
+ */
+ if (hw_opt->thread_affinity != threadaffOFF)
+ {
+ char *message;
+ if (!gmx_omp_check_thread_affinity(&message))
+ {
+ /* TODO: with -pin auto we should only warn when using all cores */
+ md_print_warn(cr, fplog, "%s", message);
+ sfree(message);
+ hw_opt->thread_affinity = threadaffOFF;
+ }
+ }
+
+ /* With thread-MPI this is needed as pinning might get turned off,
+ * which needs to be known before starting thread-MPI.
+ * With thread-MPI hw_opt is processed here on the master rank
+ * and passed to the other ranks later, so we only do this on master.
+ */
+ if (!SIMMASTER(cr))
+ {
+ return;
+ }
+#ifndef GMX_THREAD_MPI
+ return;
+#endif
+ }
+
+#ifdef HAVE_SCHED_GETAFFINITY
if (hw_opt->thread_affinity == threadaffOFF)
{
/* internal affinity setting is off, don't bother checking process affinity */
* detected CPUs is >= the CPUs in the current set.
* We need to check for CPU_COUNT as it was added only in glibc 2.6. */
#ifdef CPU_COUNT
- if (ncpus < CPU_COUNT(&mask_current))
+ if (nthreads_hw_avail < CPU_COUNT(&mask_current))
{
if (debug)
{
- fprintf(debug, "%d CPUs detected, but %d was returned by CPU_COUNT",
- ncpus, CPU_COUNT(&mask_current));
+ fprintf(debug, "%d hardware threads detected, but %d was returned by CPU_COUNT",
+ nthreads_hw_avail, CPU_COUNT(&mask_current));
}
return;
}
#endif /* CPU_COUNT */
bAllSet = TRUE;
- for (i = 0; (i < ncpus && i < CPU_SETSIZE); i++)
+ for (i = 0; (i < nthreads_hw_avail && i < CPU_SETSIZE); i++)
{
bAllSet = bAllSet && (CPU_ISSET(i, &mask_current) != 0);
}
fprintf(debug, "Default affinity mask found\n");
}
}
-#endif /* HAVE_SCHED_GETAFFINITY */
+#endif /* HAVE_SCHED_AFFINITY */
}