Comprehensive hwinfo structure concurrency fix.
authorSander Pronk <pronk@kth.se>
Tue, 4 Jun 2013 20:31:08 +0000 (22:31 +0200)
committerGerrit Code Review <gerrit@gerrit.gromacs.org>
Mon, 1 Jul 2013 18:03:45 +0000 (20:03 +0200)
The hwinfo structure and structures contained therein are inherently
global to any mdrun processes/ranks. This patch makes sure that
- The hwinfo structure is shared among all threads
- Only one thread creates a hwinfo structure
- The hwinfo structure is safe to read for all threads after they
    obtain it

In addition, it fixes the detection for pthread_setaffinity in thread_mpi.

This fixes concurrency issues with thread affinity settings with or
without MPI, and makes runner.c slightly easier to read because the
concurrency logic is pushed to gmx_detect_hardware.c

Fixes #1270, #1254

Note that #1254 issue 3 seems to be an OpenMPI bug.

Change-Id: I236e81923324d7873f3d8633889b91c7c02a7843

cmake/ThreadMPI.cmake
include/gmx_detect_hardware.h
include/nbnxn_cuda_data_mgmt.h
include/types/forcerec.h
include/types/hw_info.h
src/gmxlib/gmx_detect_hardware.c
src/gmxlib/gmx_thread_affinity.c
src/kernel/runner.c
src/mdlib/forcerec.c
src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu

index 84714196f0f5f67b92f7714cd0670fc0419d2820..18a2c3c49fea9df2d12d90accf82dabab6cb3ccd 100644 (file)
@@ -71,6 +71,40 @@ endif (CMAKE_USE_PTHREADS_INIT)
 # Turns on thread_mpi core threading functions.
 MACRO(TMPI_ENABLE_CORE INCDIR)
     TMPI_TEST_ATOMICS(${INCDIR})
+
+# affinity checks
+    include(CheckFunctionExists)
+    if (THREAD_PTHREADS)
+        set(CMAKE_REQUIRED_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
+        # check for sched_setaffinity
+        check_c_source_compiles(
+            "#define _GNU_SOURCE
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+    int main(void) { cpu_set_t set;
+        CPU_ZERO(&set);
+        CPU_SET(0, &set);
+        pthread_setaffinity_np(pthread_self(), sizeof(set), &set);
+        return 0;
+    }"
+            PTHREAD_SETAFFINITY
+        )
+        if (PTHREAD_SETAFFINITY)
+            set(HAVE_PTHREAD_SETAFFINITY 1)
+        endif (PTHREAD_SETAFFINITY)
+        set(CMAKE_REQUIRED_LIBRARIES)
+    endif (THREAD_PTHREADS)
+
+
+# this runs on POSIX systems
+    check_include_files(unistd.h        HAVE_UNISTD_H)
+    check_include_files(sched.h         HAVE_SCHED_H)
+    check_include_files(sys/time.h      HAVE_SYS_TIME_H)
+    check_function_exists(sysconf       HAVE_SYSCONF)
+# this runs on windows
+#check_include_files(windows.h         HAVE_WINDOWS_H)
 ENDMACRO(TMPI_ENABLE_CORE)
 
 # enable C++ library build.
@@ -129,40 +163,6 @@ MACRO(TMPI_ENABLE)
     endif (THREAD_MPI_WARNINGS)
 
     include(CheckCSourceCompiles)
-
-# affinity checks
-    include(CheckFunctionExists)
-    if (THREAD_PTHREADS)
-        set(CMAKE_REQUIRED_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
-        # check for sched_setaffinity
-        check_c_source_compiles(
-            "#define _GNU_SOURCE
-#include <pthread.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <errno.h>
-    int main(void) { cpu_set_t set;
-        CPU_ZERO(&set);
-        CPU_SET(0, &set);
-        pthread_setaffinity_np(pthread_self(), sizeof(set), &set);
-        return 0;
-    }"
-            PTHREAD_SETAFFINITY
-        )
-        if (PTHREAD_SETAFFINITY)
-            set(HAVE_PTHREAD_SETAFFINITY 1)
-        endif (PTHREAD_SETAFFINITY)
-        set(CMAKE_REQUIRED_LIBRARIES)
-    endif (THREAD_PTHREADS)
-
-
-# this runs on POSIX systems
-    check_include_files(unistd.h        HAVE_UNISTD_H)
-    check_include_files(sched.h         HAVE_SCHED_H)
-    check_include_files(sys/time.h      HAVE_SYS_TIME_H)
-    check_function_exists(sysconf       HAVE_SYSCONF)
-# this runs on windows
-#check_include_files(windows.h         HAVE_WINDOWS_H)
 ENDMACRO(TMPI_ENABLE)
 
 
index 636e6dc9f54eb544828698207e81f35e71c97294..787fc561f66f70d768589477ab66af1300d31554 100644 (file)
@@ -45,19 +45,38 @@ extern "C" {
 } /* fixes auto-indentation problems */
 #endif
 
+/* the init and consistency functions depend on commrec that may not be 
+   consistent in cuda because MPI types don't exist there.  */
+#ifndef __CUDACC__
+#include "types/commrec.h"
+/* return a pointer to a global hwinfo structure. */
 GMX_LIBGMX_EXPORT
-void gmx_detect_hardware(FILE *fplog, gmx_hw_info_t *hwinfo,
-                         const t_commrec *cr,
-                         gmx_bool bForceUseGPU, gmx_bool bTryUseGPU,
-                         const char *gpu_id);
+gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr,
+                                   gmx_bool bForceUseGPU, gmx_bool bTryUseGPU,
+                                   const char *gpu_id);
 
 GMX_LIBGMX_EXPORT
 void gmx_hardware_info_free(gmx_hw_info_t *hwinfo);
 
+/* Check the thread count + GPU assignment. This function must
+   either be run by all threads that persist (i.e. all tmpi threads),
+   or be run before they are created.  */
 GMX_LIBGMX_EXPORT
 void gmx_check_hw_runconf_consistency(FILE *fplog, gmx_hw_info_t *hwinfo,
                                       const t_commrec *cr, int ntmpi_requsted,
                                       gmx_bool bUseGPU);
+#endif
+
+
+/* Check whether a GPU is shared among ranks, and return the number of shared
+   gpus
+
+   hwinfo        = the hwinfo struct
+
+   returns: The number of GPUs shared among ranks, or 0 */
+GMX_LIBGMX_EXPORT
+int gmx_count_gpu_dev_shared(const gmx_gpu_info_t *gpu_info);
+
 
 #ifdef __cplusplus
 }
index 1d29225cbaffa92fb954d7c12f4005ec043620dc..f215978a0ed0191a52072931cb6a19612b1e88fe 100644 (file)
@@ -60,7 +60,7 @@ extern "C" {
 FUNC_QUALIFIER
 void nbnxn_cuda_init(FILE *fplog,
                      nbnxn_cuda_ptr_t *p_cu_nb,
-                     gmx_gpu_info_t *gpu_info, int my_gpu_index,
+                     const gmx_gpu_info_t *gpu_info, int my_gpu_index,
                      /* true of both local and non-local are don on GPU */
                      gmx_bool bLocalAndNonlocal) FUNC_TERM
 
index ec6b6a5746e050267a7f5d89c16d98cdae6c9603..2e78634a43718cd815d38a402a82ff97bd01bdcf 100644 (file)
@@ -193,14 +193,14 @@ typedef struct {
     gmx_bool bDomDec;
 
     /* PBC stuff */
-    int            ePBC;
-    gmx_bool       bMolPBC;
-    int            rc_scaling;
-    rvec           posres_com;
-    rvec           posres_comB;
-
-    gmx_hw_info_t *hwinfo;
-    gmx_bool       use_cpu_acceleration;
+    int                  ePBC;
+    gmx_bool             bMolPBC;
+    int                  rc_scaling;
+    rvec                 posres_com;
+    rvec                 posres_comB;
+
+    const gmx_hw_info_t *hwinfo;
+    gmx_bool             use_cpu_acceleration;
 
     /* Interaction for calculated in kernels. In many cases this is similar to
      * the electrostatics settings in the inputrecord, but the difference is that
index a8d5c2b75efcbaa4355a013f528a5bf0845783db..f3c9c9284bf79131e597808151b2222d51e4ed20 100644 (file)
@@ -67,9 +67,7 @@ static const char * const gpu_detect_res_str[] =
  * The gmx_hardware_detect module initializes it. */
 typedef struct
 {
-    gmx_bool            bUserSet;       /* true if the GPUs in cuda_dev_use are manually provided by the user */
-    gmx_bool            bDevShare;      /* true if any of the devices is shared by
-                                           (t)MPI ranks, with auto-detection always FALSE */
+    gmx_bool             bUserSet;      /* true if the GPUs in cuda_dev_use are manually provided by the user */
 
     int                  ncuda_dev_use; /* number of devices selected to be used */
     int                 *cuda_dev_use;  /* index of the devices selected to be used */
@@ -78,18 +76,23 @@ typedef struct
 } gmx_gpu_info_t;
 
 /* Hardware information structure with CPU and GPU information.
- * It is initialized by gmx_detect_hardware(). */
+ * It is initialized by gmx_detect_hardware().
+ * NOTE: this structure may only contain structures that are globally valid
+ *       (i.e. must be able to be shared among all threads) */
 typedef struct
 {
-    gmx_bool        bCanUseGPU;        /* True if compatible GPUs are detected during hardware detection */
-    gmx_gpu_info_t  gpu_info;          /* Information about GPUs detected in the system */
+    gmx_bool        bCanUseGPU;          /* True if compatible GPUs are detected during hardware detection */
+    gmx_gpu_info_t  gpu_info;            /* Information about GPUs detected in the system */
 
-    gmx_cpuid_t     cpuid_info;        /* CPUID information about CPU detected;
-                                          NOTE: this will only detect the CPU thread 0 of the
-                                          current process runs on. */
-    int             nthreads_hw_avail; /* Number of hardware threads available; this number
-                                          is based on the number of CPUs reported as available
-                                          by the OS at the time of detection. */
+    gmx_cpuid_t     cpuid_info;          /* CPUID information about CPU detected;
+                                            NOTE: this will only detect the CPU thread 0 of the
+                                            current process runs on. */
+    int             nthreads_hw_avail;   /* Number of hardware threads available; this number
+                                            is based on the number of CPUs reported as available
+                                            by the OS at the time of detection. */
+    gmx_bool        bConsistencyChecked; /* whether
+                                            gmx_check_hw_runconf_consistency()
+                                            has been run with this hw_info */
 } gmx_hw_info_t;
 
 #ifdef __cplusplus
index 49dd1e0b95f4439642027096f889d26038c6afcc..640d74e85e63b69323adbab447bf231554e7b399 100644 (file)
@@ -52,6 +52,8 @@
 #include "main.h"
 #include "md_logging.h"
 
+#include "thread_mpi/threads.h"
+
 #if ((defined(WIN32) || defined( _WIN32 ) || defined(WIN64) || defined( _WIN64 )) && !(defined (__CYGWIN__) || defined (__CYGWIN32__)))
 #include "windows.h"
 #endif
@@ -66,8 +68,16 @@ static unsigned int max_gpu_ids_user = 64;
 static const char * invalid_gpuid_hint =
     "A delimiter-free sequence of valid numeric IDs of available GPUs is expected.";
 
+/* The globally shared hwinfo structure. */
+static gmx_hw_info_t      *hwinfo_g;
+/* A reference counter for the hwinfo structure */
+static int                 n_hwinfo = 0;
+/* A lock to protect the hwinfo structure */
+static tMPI_Thread_mutex_t hw_info_lock = TMPI_THREAD_MUTEX_INITIALIZER;
+
+
 /* FW decl. */
-void limit_num_gpus_used(gmx_hw_info_t *hwinfo, int count);
+static void limit_num_gpus_used(gmx_hw_info_t *hwinfo, int count);
 
 static void sprint_gpus(char *sbuf, const gmx_gpu_info_t *gpu_info, gmx_bool bPrintAll)
 {
@@ -193,226 +203,262 @@ void gmx_check_hw_runconf_consistency(FILE *fplog, gmx_hw_info_t *hwinfo,
                                       const t_commrec *cr, int ntmpi_requested,
                                       gmx_bool bUseGPU)
 {
-    int      npppn, ntmpi_pp, ngpu;
-    char     sbuf[STRLEN], th_or_proc[STRLEN], th_or_proc_plural[STRLEN], pernode[STRLEN];
-    char     gpu_plural[2];
-    gmx_bool bGPUBin, btMPI, bMPI, bMaxMpiThreadsSet, bNthreadsAuto, bEmulateGPU;
+    int                        npppn, ntmpi_pp, ngpu;
+    char                       sbuf[STRLEN], th_or_proc[STRLEN], th_or_proc_plural[STRLEN], pernode[STRLEN];
+    char                       gpu_plural[2];
+    gmx_bool                   bGPUBin, btMPI, bMPI, bMaxMpiThreadsSet, bNthreadsAuto, bEmulateGPU;
+    int                        ret;
+    static tMPI_Thread_mutex_t cons_lock = TMPI_THREAD_MUTEX_INITIALIZER;
+
 
     assert(hwinfo);
     assert(cr);
 
-    btMPI         = bMPI = FALSE;
-    bNthreadsAuto = FALSE;
+    /* Below we only do consistency checks for PP and GPUs,
+     * this is irrelevant for PME only nodes, so in that case we return
+     * here.
+     */
+    if (!(cr->duty & DUTY_PP))
+    {
+        return;
+    }
+
+    /* We run this function only once, but must make sure that all threads
+       that are alive run this function, so they get consistent data. We
+       achieve this by mutual exclusion and returning if the structure is
+       already properly checked & set */
+    ret = tMPI_Thread_mutex_lock(&cons_lock);
+    if (ret != 0)
+    {
+        gmx_fatal(FARGS, "Error locking cons mutex: %s", strerror(errno));
+    }
+
+    if (!hwinfo->bConsistencyChecked)
+    {
+        btMPI         = bMPI = FALSE;
+        bNthreadsAuto = FALSE;
 #if defined(GMX_THREAD_MPI)
-    btMPI         = TRUE;
-    bNthreadsAuto = (ntmpi_requested < 1);
+        btMPI         = TRUE;
+        bNthreadsAuto = (ntmpi_requested < 1);
 #elif defined(GMX_LIB_MPI)
-    bMPI  = TRUE;
+        bMPI  = TRUE;
 #endif
 
 #ifdef GMX_GPU
-    bGPUBin      = TRUE;
+        bGPUBin      = TRUE;
 #else
-    bGPUBin      = FALSE;
+        bGPUBin      = FALSE;
 #endif
 
-    /* GPU emulation detection is done later, but we need here as well
-     * -- uncool, but there's no elegant workaround */
-    bEmulateGPU       = (getenv("GMX_EMULATE_GPU") != NULL);
-    bMaxMpiThreadsSet = (getenv("GMX_MAX_MPI_THREADS") != NULL);
+        /* GPU emulation detection is done later, but we need here as well
+         * -- uncool, but there's no elegant workaround */
+        bEmulateGPU       = (getenv("GMX_EMULATE_GPU") != NULL);
+        bMaxMpiThreadsSet = (getenv("GMX_MAX_MPI_THREADS") != NULL);
 
-    if (SIMMASTER(cr))
-    {
-        /* check the acceleration mdrun is compiled with against hardware capabilities */
-        /* TODO: Here we assume homogeneous hardware which is not necessarily the case!
-         *       Might not hurt to add an extra check over MPI. */
+        /* check the acceleration mdrun is compiled with against hardware
+           capabilities */
+        /* TODO: Here we assume homogeneous hardware which is not necessarily
+                 the case! Might not hurt to add an extra check over MPI. */
         gmx_cpuid_acceleration_check(hwinfo->cpuid_info, fplog);
-    }
 
-    /* Below we only do consistency checks for PP and GPUs,
-     * this is irrelevant for PME only nodes, so in that case we return here.
-     */
-    if (!(cr->duty & DUTY_PP))
-    {
-        return;
-    }
-
-    /* Need to ensure that we have enough GPUs:
-     * - need one GPU per PP node
-     * - no GPU oversubscription with tMPI
-     * => keep on the GPU support, otherwise turn off (or bail if forced)
-     * */
-    /* number of PP processes per node */
-    npppn = cr->nrank_pp_intranode;
-
-    pernode[0]           = '\0';
-    th_or_proc_plural[0] = '\0';
-    if (btMPI)
-    {
-        sprintf(th_or_proc, "thread-MPI thread");
-        if (npppn > 1)
+        /* Need to ensure that we have enough GPUs:
+         * - need one GPU per PP node
+         * - no GPU oversubscription with tMPI
+         * => keep on the GPU support, otherwise turn off (or bail if forced)
+         * */
+        /* number of PP processes per node */
+        npppn = cr->nrank_pp_intranode;
+
+        pernode[0]           = '\0';
+        th_or_proc_plural[0] = '\0';
+        if (btMPI)
         {
-            sprintf(th_or_proc_plural, "s");
+            sprintf(th_or_proc, "thread-MPI thread");
+            if (npppn > 1)
+            {
+                sprintf(th_or_proc_plural, "s");
+            }
         }
-    }
-    else if (bMPI)
-    {
-        sprintf(th_or_proc, "MPI process");
-        if (npppn > 1)
+        else if (bMPI)
         {
-            sprintf(th_or_proc_plural, "es");
+            sprintf(th_or_proc, "MPI process");
+            if (npppn > 1)
+            {
+                sprintf(th_or_proc_plural, "es");
+            }
+            sprintf(pernode, " per node");
+        }
+        else
+        {
+            /* neither MPI nor tMPI */
+            sprintf(th_or_proc, "process");
         }
-        sprintf(pernode, " per node");
-    }
-    else
-    {
-        /* neither MPI nor tMPI */
-        sprintf(th_or_proc, "process");
-    }
-
-    if (bGPUBin)
-    {
-        print_gpu_detection_stats(fplog, &hwinfo->gpu_info, cr);
-    }
 
-    if (bUseGPU && hwinfo->bCanUseGPU && !bEmulateGPU)
-    {
-        ngpu = hwinfo->gpu_info.ncuda_dev_use;
-        sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
+        if (bGPUBin)
+        {
+            print_gpu_detection_stats(fplog, &hwinfo->gpu_info, cr);
+        }
 
-        /* number of tMPI threads atuo-adjusted */
-        if (btMPI && bNthreadsAuto && SIMMASTER(cr))
+        if (bUseGPU && hwinfo->bCanUseGPU && !bEmulateGPU)
         {
-            if (npppn < ngpu)
+            ngpu = hwinfo->gpu_info.ncuda_dev_use;
+            sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
+
+            /* number of tMPI threads atuo-adjusted */
+            if (btMPI && bNthreadsAuto)
             {
-                if (hwinfo->gpu_info.bUserSet)
+                if (npppn < ngpu)
                 {
-                    /* The user manually provided more GPUs than threads we could
-                     * automatically start. */
-                    gmx_fatal(FARGS,
-                              "%d GPU%s provided, but only %d PP thread-MPI thread%s coud be started.\n"
-                              "%s requires one PP tread-MPI thread per GPU; use fewer GPUs%s.",
-                              ngpu, gpu_plural, npppn, th_or_proc_plural,
-                              ShortProgram(), bMaxMpiThreadsSet ? "\nor allow more threads to be used" : "");
-                }
-                else
-                {
-                    /* There are more GPUs than tMPI threads; we have to limit the number GPUs used. */
-                    md_print_warn(cr, fplog,
-                                  "NOTE: %d GPU%s were detected, but only %d PP thread-MPI thread%s can be started.\n"
-                                  "      %s can use one GPU per PP tread-MPI thread, so only %d GPU%s will be used.%s\n",
+                    if (hwinfo->gpu_info.bUserSet)
+                    {
+                        /* The user manually provided more GPUs than threads we
+                           could automatically start. */
+                        gmx_fatal(FARGS,
+                                  "%d GPU%s provided, but only %d PP thread-MPI thread%s coud be started.\n"
+                                  "%s requires one PP tread-MPI thread per GPU; use fewer GPUs%s.",
                                   ngpu, gpu_plural, npppn, th_or_proc_plural,
-                                  ShortProgram(), npppn, npppn > 1 ? "s" : "",
-                                  bMaxMpiThreadsSet ? "\n      Also, you can allow more threads to be used by increasing GMX_MAX_MPI_THREADS" : "");
-
-                    if (cr->rank_pp_intranode == 0)
+                                  ShortProgram(), bMaxMpiThreadsSet ? "\nor allow more threads to be used" : "");
+                    }
+                    else
                     {
-                        limit_num_gpus_used(hwinfo, npppn);
-                        ngpu = hwinfo->gpu_info.ncuda_dev_use;
-                        sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
+                        /* There are more GPUs than tMPI threads; we have to
+                           limit the number GPUs used. */
+                        md_print_warn(cr, fplog,
+                                      "NOTE: %d GPU%s were detected, but only %d PP thread-MPI thread%s can be started.\n"
+                                      "      %s can use one GPU per PP tread-MPI thread, so only %d GPU%s will be used.%s\n",
+                                      ngpu, gpu_plural, npppn,
+                                      th_or_proc_plural,
+                                      ShortProgram(), npppn,
+                                      npppn > 1 ? "s" : "",
+                                      bMaxMpiThreadsSet ? "\n      Also, you can allow more threads to be used by increasing GMX_MAX_MPI_THREADS" : "");
+
+                        if (cr->rank_pp_intranode == 0)
+                        {
+                            limit_num_gpus_used(hwinfo, npppn);
+                            ngpu = hwinfo->gpu_info.ncuda_dev_use;
+                            sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
+                        }
                     }
                 }
             }
-        }
 
-        if (ngpu != npppn)
-        {
-            if (hwinfo->gpu_info.bUserSet)
+            if (ngpu != npppn)
             {
-                gmx_fatal(FARGS,
-                          "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
-                          "%s was started with %d PP %s%s%s, but you provided %d GPU%s.",
-                          th_or_proc, btMPI ? "s" : "es", pernode,
-                          ShortProgram(), npppn, th_or_proc, th_or_proc_plural, pernode, ngpu, gpu_plural);
-            }
-            else
-            {
-                if (ngpu > npppn)
+                if (hwinfo->gpu_info.bUserSet)
                 {
-                    md_print_warn(cr, fplog,
-                                  "NOTE: potentially sub-optimal launch configuration, %s started with less\n"
-                                  "      PP %s%s%s than GPU%s available.\n"
-                                  "      Each PP %s can use only one GPU, %d GPU%s%s will be used.\n",
-                                  ShortProgram(),
-                                  th_or_proc, th_or_proc_plural, pernode, gpu_plural,
-                                  th_or_proc, npppn, gpu_plural, pernode);
-
-                    if (bMPI || (btMPI && cr->rank_pp_intranode == 0))
-                    {
-                        limit_num_gpus_used(hwinfo, npppn);
-                        ngpu = hwinfo->gpu_info.ncuda_dev_use;
-                        sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
-                    }
+                    gmx_fatal(FARGS,
+                              "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
+                              "%s was started with %d PP %s%s%s, but you provided %d GPU%s.",
+                              th_or_proc, btMPI ? "s" : "es", pernode,
+                              ShortProgram(), npppn, th_or_proc,
+                              th_or_proc_plural, pernode, ngpu, gpu_plural);
                 }
                 else
                 {
-                    /* Avoid duplicate error messages.
-                     * Unfortunately we can only do this at the physical node
-                     * level, since the hardware setup and MPI process count
-                     * might be differ over physical nodes.
-                     */
-                    if (cr->rank_pp_intranode == 0)
+                    if (ngpu > npppn)
                     {
-                        gmx_fatal(FARGS,
-                                  "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
-                                  "%s was started with %d PP %s%s%s, but only %d GPU%s were detected.",
-                                  th_or_proc, btMPI ? "s" : "es", pernode,
-                                  ShortProgram(), npppn, th_or_proc, th_or_proc_plural, pernode, ngpu, gpu_plural);
+                        md_print_warn(cr, fplog,
+                                      "NOTE: potentially sub-optimal launch configuration, %s started with less\n"
+                                      "      PP %s%s%s than GPU%s available.\n"
+                                      "      Each PP %s can use only one GPU, %d GPU%s%s will be used.\n",
+                                      ShortProgram(), th_or_proc,
+                                      th_or_proc_plural, pernode, gpu_plural,
+                                      th_or_proc, npppn, gpu_plural, pernode);
+
+                        if (bMPI || (btMPI && cr->rank_pp_intranode == 0))
+                        {
+                            limit_num_gpus_used(hwinfo, npppn);
+                            ngpu = hwinfo->gpu_info.ncuda_dev_use;
+                            sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
+                        }
                     }
-#ifdef GMX_MPI
                     else
                     {
-                        /* Avoid other ranks to continue after inconsistency */
-                        MPI_Barrier(cr->mpi_comm_mygroup);
+                        /* Avoid duplicate error messages.
+                         * Unfortunately we can only do this at the physical node
+                         * level, since the hardware setup and MPI process count
+                         * might be differ over physical nodes.
+                         */
+                        if (cr->rank_pp_intranode == 0)
+                        {
+                            gmx_fatal(FARGS,
+                                      "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
+                                      "%s was started with %d PP %s%s%s, but only %d GPU%s were detected.",
+                                      th_or_proc, btMPI ? "s" : "es", pernode,
+                                      ShortProgram(), npppn, th_or_proc,
+                                      th_or_proc_plural, pernode, ngpu,
+                                      gpu_plural);
+                        }
                     }
-#endif
                 }
             }
-        }
 
-        hwinfo->gpu_info.bDevShare = FALSE;
-        if (hwinfo->gpu_info.bUserSet && (cr->rank_pp_intranode == 0))
-        {
-            int      i, j, same_count;
-            gmx_bool bSomeSame, bAllDifferent;
+            {
+                int      same_count;
 
-            same_count    = 0; /* number of GPUs shared among ranks */
-            bSomeSame     = FALSE;
-            bAllDifferent = TRUE;
+                same_count = gmx_count_gpu_dev_shared(&(hwinfo->gpu_info));
 
-            for (i = 0; i < ngpu - 1; i++)
-            {
-                for (j = i + 1; j < ngpu; j++)
+                if (btMPI && same_count > 0)
                 {
-                    bSomeSame       |= hwinfo->gpu_info.cuda_dev_use[i] == hwinfo->gpu_info.cuda_dev_use[j];
-                    bAllDifferent   &= hwinfo->gpu_info.cuda_dev_use[i] != hwinfo->gpu_info.cuda_dev_use[j];
-                    same_count      += hwinfo->gpu_info.cuda_dev_use[i] == hwinfo->gpu_info.cuda_dev_use[j];
+                    gmx_fatal(FARGS,
+                              "Invalid GPU assignment: can't share a GPU among multiple thread-MPI threads.\n"
+                              "Use MPI if you are sure that you want to assign GPU to multiple threads.");
+                }
+
+                if (same_count > 0)
+                {
+                    md_print_warn(cr, fplog,
+                                  "NOTE: Potentially sub-optimal launch configuration: you assigned %s to\n"
+                                  "      multiple %s%s; this should be avoided as it can cause\n"
+                                  "      performance loss.\n",
+                                  same_count > 1 ? "GPUs" : "a GPU", th_or_proc, btMPI ? "s" : "es");
                 }
             }
+            print_gpu_use_stats(fplog, &hwinfo->gpu_info, cr);
+        }
+        hwinfo->bConsistencyChecked = TRUE;
+    }
 
-            /* store the number of shared/oversubscribed GPUs */
-            hwinfo->gpu_info.bDevShare = bSomeSame;
+    ret = tMPI_Thread_mutex_unlock(&cons_lock);
+    if (ret != 0)
+    {
+        gmx_fatal(FARGS, "Error unlocking cons mutex: %s", strerror(errno));
+    }
 
-            if (btMPI && !bAllDifferent)
-            {
-                gmx_fatal(FARGS,
-                          "Invalid GPU assignment: can't share a GPU among multiple thread-MPI threads.\n"
-                          "Use MPI if you are sure that you want to assign GPU to multiple threads.");
-            }
+#ifdef GMX_MPI
+    if (PAR(cr))
+    {
+        /* Avoid other ranks to continue after
+           inconsistency */
+        MPI_Barrier(cr->mpi_comm_mygroup);
+    }
+#endif
+
+}
+
+int gmx_count_gpu_dev_shared(const gmx_gpu_info_t *gpu_info)
+{
+    int      same_count    = 0;
+    int      ngpu          = gpu_info->ncuda_dev_use;
 
-            if (bSomeSame)
+    if (gpu_info->bUserSet)
+    {
+        int      i, j;
+
+        for (i = 0; i < ngpu - 1; i++)
+        {
+            for (j = i + 1; j < ngpu; j++)
             {
-                md_print_warn(cr, fplog,
-                              "NOTE: Potentially sub-optimal launch configuration: you assigned %s to\n"
-                              "      multiple %s%s; this should be avoided as it can cause\n"
-                              "      performance loss.\n",
-                              same_count > 1 ? "GPUs" : "a GPU", th_or_proc, btMPI ? "s" : "es");
+                same_count      += (gpu_info->cuda_dev_use[i] ==
+                                    gpu_info->cuda_dev_use[j]);
             }
         }
-        print_gpu_use_stats(fplog, &hwinfo->gpu_info, cr);
     }
+
+    return same_count;
 }
 
+
 /* Return the number of hardware threads supported by the current CPU.
  * We assume that this is equal with the number of CPUs reported to be
  * online by the OS at the time of the call.
@@ -464,10 +510,9 @@ static int get_nthreads_hw_avail(FILE *fplog, const t_commrec *cr)
     return ret;
 }
 
-void gmx_detect_hardware(FILE *fplog, gmx_hw_info_t *hwinfo,
-                         const t_commrec *cr,
-                         gmx_bool bForceUseGPU, gmx_bool bTryUseGPU,
-                         const char *gpu_id)
+gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr,
+                                   gmx_bool bForceUseGPU, gmx_bool bTryUseGPU,
+                                   const char *gpu_id)
 {
     int              i;
     const char      *env;
@@ -475,132 +520,157 @@ void gmx_detect_hardware(FILE *fplog, gmx_hw_info_t *hwinfo,
     gmx_hw_info_t   *hw;
     gmx_gpu_info_t   gpuinfo_auto, gpuinfo_user;
     gmx_bool         bGPUBin;
+    int              ret;
 
-    assert(hwinfo);
-
-    /* detect CPUID info; no fuss, we don't detect system-wide
-     * -- sloppy, but that's it for now */
-    if (gmx_cpuid_init(&hwinfo->cpuid_info) != 0)
+    /* make sure no one else is doing the same thing */
+    ret = tMPI_Thread_mutex_lock(&hw_info_lock);
+    if (ret != 0)
     {
-        gmx_fatal_collective(FARGS, cr, NULL, "CPUID detection failed!");
+        gmx_fatal(FARGS, "Error locking hwinfo mutex: %s", strerror(errno));
     }
 
-    /* detect number of hardware threads */
-    hwinfo->nthreads_hw_avail = get_nthreads_hw_avail(fplog, cr);
+    /* only initialize the hwinfo structure if it is not already initalized */
+    if (n_hwinfo == 0)
+    {
+        snew(hwinfo_g, 1);
+        hwinfo_g->bConsistencyChecked = FALSE;
 
-    /* detect GPUs */
-    hwinfo->gpu_info.ncuda_dev_use  = 0;
-    hwinfo->gpu_info.cuda_dev_use   = NULL;
-    hwinfo->gpu_info.ncuda_dev      = 0;
-    hwinfo->gpu_info.cuda_dev       = NULL;
+        /* detect CPUID info; no fuss, we don't detect system-wide
+         * -- sloppy, but that's it for now */
+        if (gmx_cpuid_init(&hwinfo_g->cpuid_info) != 0)
+        {
+            gmx_fatal_collective(FARGS, cr, NULL, "CPUID detection failed!");
+        }
+
+        /* detect number of hardware threads */
+        hwinfo_g->nthreads_hw_avail = get_nthreads_hw_avail(fplog, cr);
+
+        /* detect GPUs */
+        hwinfo_g->gpu_info.ncuda_dev_use  = 0;
+        hwinfo_g->gpu_info.cuda_dev_use   = NULL;
+        hwinfo_g->gpu_info.ncuda_dev      = 0;
+        hwinfo_g->gpu_info.cuda_dev       = NULL;
 
 #ifdef GMX_GPU
-    bGPUBin      = TRUE;
+        bGPUBin      = TRUE;
 #else
-    bGPUBin      = FALSE;
+        bGPUBin      = FALSE;
 #endif
 
-    /* Bail if binary is not compiled with GPU acceleration, but this is either
-     * explicitly (-nb gpu) or implicitly (gpu ID passed) requested. */
-    if (bForceUseGPU && !bGPUBin)
-    {
-        gmx_fatal(FARGS, "GPU acceleration requested, but %s was compiled without GPU support!", ShortProgram());
-    }
-    if (gpu_id != NULL && !bGPUBin)
-    {
-        gmx_fatal(FARGS, "GPU ID string set, but %s was compiled without GPU support!", ShortProgram());
-    }
-
-    /* run the detection if the binary was compiled with GPU support */
-    if (bGPUBin && getenv("GMX_DISABLE_GPU_DETECTION") == NULL)
-    {
-        char detection_error[STRLEN];
-
-        if (detect_cuda_gpus(&hwinfo->gpu_info, detection_error) != 0)
+        /* Bail if binary is not compiled with GPU acceleration, but this is either
+         * explicitly (-nb gpu) or implicitly (gpu ID passed) requested. */
+        if (bForceUseGPU && !bGPUBin)
         {
-            if (detection_error != NULL && detection_error[0] != '\0')
-            {
-                sprintf(sbuf, ":\n      %s\n", detection_error);
-            }
-            else
-            {
-                sprintf(sbuf, ".");
-            }
-            md_print_warn(cr, fplog,
-                          "NOTE: Error occurred during GPU detection%s"
-                          "      Can not use GPU acceleration, will fall back to CPU kernels.\n",
-                          sbuf);
+            gmx_fatal(FARGS, "GPU acceleration requested, but %s was compiled without GPU support!", ShortProgram());
         }
-    }
-
-    if (bForceUseGPU || bTryUseGPU)
-    {
-        env = getenv("GMX_GPU_ID");
-        if (env != NULL && gpu_id != NULL)
+        if (gpu_id != NULL && !bGPUBin)
         {
-            gmx_fatal(FARGS, "GMX_GPU_ID and -gpu_id can not be used at the same time");
+            gmx_fatal(FARGS, "GPU ID string set, but %s was compiled without GPU support!", ShortProgram());
         }
-        if (env == NULL)
+
+        /* run the detection if the binary was compiled with GPU support */
+        if (bGPUBin && getenv("GMX_DISABLE_GPU_DETECTION") == NULL)
         {
-            env = gpu_id;
+            char detection_error[STRLEN];
+
+            if (detect_cuda_gpus(&hwinfo_g->gpu_info, detection_error) != 0)
+            {
+                if (detection_error != NULL && detection_error[0] != '\0')
+                {
+                    sprintf(sbuf, ":\n      %s\n", detection_error);
+                }
+                else
+                {
+                    sprintf(sbuf, ".");
+                }
+                md_print_warn(cr, fplog,
+                              "NOTE: Error occurred during GPU detection%s"
+                              "      Can not use GPU acceleration, will fall back to CPU kernels.\n",
+                              sbuf);
+            }
         }
 
-        /* parse GPU IDs if the user passed any */
-        if (env != NULL)
+        if (bForceUseGPU || bTryUseGPU)
         {
-            int *gpuid, *checkres;
-            int  nid, res;
+            env = getenv("GMX_GPU_ID");
+            if (env != NULL && gpu_id != NULL)
+            {
+                gmx_fatal(FARGS, "GMX_GPU_ID and -gpu_id can not be used at the same time");
+            }
+            if (env == NULL)
+            {
+                env = gpu_id;
+            }
 
-            snew(gpuid, max_gpu_ids_user);
-            snew(checkres, max_gpu_ids_user);
+            /* parse GPU IDs if the user passed any */
+            if (env != NULL)
+            {
+                int *gpuid, *checkres;
+                int  nid, res;
 
-            parse_gpu_id_plain_string(env, &nid, gpuid);
+                snew(gpuid, max_gpu_ids_user);
+                snew(checkres, max_gpu_ids_user);
 
-            if (nid == 0)
-            {
-                gmx_fatal(FARGS, "Empty GPU ID string encountered.\n%s\n", invalid_gpuid_hint);
-            }
+                parse_gpu_id_plain_string(env, &nid, gpuid);
 
-            res = check_select_cuda_gpus(checkres, &hwinfo->gpu_info, gpuid, nid);
+                if (nid == 0)
+                {
+                    gmx_fatal(FARGS, "Empty GPU ID string encountered.\n%s\n",
+                              invalid_gpuid_hint);
+                }
 
-            if (!res)
-            {
-                print_gpu_detection_stats(fplog, &hwinfo->gpu_info, cr);
+                res = check_select_cuda_gpus(checkres, &hwinfo_g->gpu_info,
+                                             gpuid, nid);
 
-                sprintf(sbuf, "Some of the requested GPUs do not exist, behave strangely, or are not compatible:\n");
-                for (i = 0; i < nid; i++)
+                if (!res)
                 {
-                    if (checkres[i] != egpuCompatible)
+                    print_gpu_detection_stats(fplog, &hwinfo_g->gpu_info, cr);
+
+                    sprintf(sbuf, "Some of the requested GPUs do not exist, behave strangely, or are not compatible:\n");
+                    for (i = 0; i < nid; i++)
                     {
-                        sprintf(stmp, "    GPU #%d: %s\n",
-                                gpuid[i], gpu_detect_res_str[checkres[i]]);
-                        strcat(sbuf, stmp);
+                        if (checkres[i] != egpuCompatible)
+                        {
+                            sprintf(stmp, "    GPU #%d: %s\n",
+                                    gpuid[i], gpu_detect_res_str[checkres[i]]);
+                            strcat(sbuf, stmp);
+                        }
                     }
+                    gmx_fatal(FARGS, "%s", sbuf);
                 }
-                gmx_fatal(FARGS, "%s", sbuf);
-            }
 
-            hwinfo->gpu_info.bUserSet = TRUE;
+                hwinfo_g->gpu_info.bUserSet = TRUE;
 
-            sfree(gpuid);
-            sfree(checkres);
-        }
-        else
-        {
-            pick_compatible_gpus(&hwinfo->gpu_info);
-            hwinfo->gpu_info.bUserSet = FALSE;
-        }
+                sfree(gpuid);
+                sfree(checkres);
+            }
+            else
+            {
+                pick_compatible_gpus(&hwinfo_g->gpu_info);
+                hwinfo_g->gpu_info.bUserSet = FALSE;
+            }
 
-        /* decide whether we can use GPU */
-        hwinfo->bCanUseGPU = (hwinfo->gpu_info.ncuda_dev_use > 0);
-        if (!hwinfo->bCanUseGPU && bForceUseGPU)
-        {
-            gmx_fatal(FARGS, "GPU acceleration requested, but no compatible GPUs were detected.");
+            /* decide whether we can use GPU */
+            hwinfo_g->bCanUseGPU = (hwinfo_g->gpu_info.ncuda_dev_use > 0);
+            if (!hwinfo_g->bCanUseGPU && bForceUseGPU)
+            {
+                gmx_fatal(FARGS, "GPU acceleration requested, but no compatible GPUs were detected.");
+            }
         }
     }
+    /* increase the reference counter */
+    n_hwinfo++;
+
+    ret = tMPI_Thread_mutex_unlock(&hw_info_lock);
+    if (ret != 0)
+    {
+        gmx_fatal(FARGS, "Error unlocking hwinfo mutex: %s", strerror(errno));
+    }
+
+    return hwinfo_g;
 }
 
-void limit_num_gpus_used(gmx_hw_info_t *hwinfo, int count)
+static void limit_num_gpus_used(gmx_hw_info_t *hwinfo, int count)
 {
     int ndev_use;
 
@@ -628,10 +698,38 @@ void limit_num_gpus_used(gmx_hw_info_t *hwinfo, int count)
 
 void gmx_hardware_info_free(gmx_hw_info_t *hwinfo)
 {
-    if (hwinfo)
+    int ret;
+
+    ret = tMPI_Thread_mutex_lock(&hw_info_lock);
+    if (ret != 0)
+    {
+        gmx_fatal(FARGS, "Error locking hwinfo mutex: %s", strerror(errno));
+    }
+
+    /* decrease the reference counter */
+    n_hwinfo--;
+
+
+    if (hwinfo != hwinfo_g)
+    {
+        gmx_incons("hwinfo < hwinfo_g");
+    }
+
+    if (n_hwinfo < 0)
+    {
+        gmx_incons("n_hwinfo < 0");
+    }
+
+    if (n_hwinfo == 0)
+    {
+        gmx_cpuid_done(hwinfo_g->cpuid_info);
+        free_gpu_info(&hwinfo_g->gpu_info);
+        sfree(hwinfo_g);
+    }
+
+    ret = tMPI_Thread_mutex_unlock(&hw_info_lock);
+    if (ret != 0)
     {
-        gmx_cpuid_done(hwinfo->cpuid_info);
-        free_gpu_info(&hwinfo->gpu_info);
-        sfree(hwinfo);
+        gmx_fatal(FARGS, "Error unlocking hwinfo mutex: %s", strerror(errno));
     }
 }
index ef96a0bb6bd7dbccaa82a4054d67887594891900..175cfeab3f9d0f5d568c5d495da8a80dd55a73e3 100644 (file)
@@ -190,15 +190,8 @@ gmx_set_thread_affinity(FILE                *fplog,
     int        nth_affinity_set, thread_id_node, thread_id,
                nthread_local, nthread_node, nthread_hw_max, nphyscore;
     int        offset;
-    /* these are inherently global properties that are shared among all threads
-     */
-    static const int          *locality_order;
-    static int                 rc;
-    static gmx_bool            have_locality_order = FALSE;
-    static tMPI_Thread_mutex_t locality_order_mtx  =
-        TMPI_THREAD_MUTEX_INITIALIZER;
-    static tMPI_Thread_cond_t  locality_order_cond =
-        TMPI_THREAD_COND_INITIALIZER;
+    const int *locality_order;
+    int        rc;
 
     if (hw_opt->thread_affinity == threadaffOFF)
     {
@@ -219,7 +212,7 @@ gmx_set_thread_affinity(FILE                *fplog,
                       "Can not set thread affinities on the current platform. On NUMA systems this\n"
                       "can cause performance degradation. If you think your platform should support\n"
                       "setting affinities, contact the GROMACS developers.");
-#endif /* __APPLE__ */
+#endif  /* __APPLE__ */
         return;
     }
 
@@ -277,65 +270,10 @@ gmx_set_thread_affinity(FILE                *fplog,
         md_print_info(cr, fplog, "Applying core pinning offset %d\n", offset);
     }
 
-    /* hw_opt is shared among tMPI threads, so for thread safety we need to do
-     * the layout detection only on master as core_pinning_stride is an in-out
-     * parameter and gets auto-set depending on its initial value.
-     * This
-     * This is not thread-safe with multi-simulations, but that's anyway not
-     * supported by tMPI. */
-    if (SIMMASTER(cr))
-    {
-        int ret;
-        int i;
-
-        ret = tMPI_Thread_mutex_lock(&locality_order_mtx);
-        if (ret != 0)
-        {
-            goto locality_order_err;
-        }
-        rc = get_thread_affinity_layout(fplog, cr, hwinfo,
-                                        nthread_node,
-                                        offset, &hw_opt->core_pinning_stride,
-                                        &locality_order);
-        have_locality_order = TRUE;
-        ret                 = tMPI_Thread_cond_broadcast(&locality_order_cond);
-        if (ret != 0)
-        {
-            tMPI_Thread_mutex_unlock(&locality_order_mtx);
-            goto locality_order_err;
-        }
-        ret = tMPI_Thread_mutex_unlock(&locality_order_mtx);
-        if (ret != 0)
-        {
-            goto locality_order_err;
-        }
-    }
-    else
-    {
-        int ret;
-        /* all other threads wait for the locality order data. */
-        ret = tMPI_Thread_mutex_lock(&locality_order_mtx);
-        if (ret != 0)
-        {
-            goto locality_order_err;
-        }
-
-        while (!have_locality_order)
-        {
-            ret = tMPI_Thread_cond_wait(&locality_order_cond,
-                                        &locality_order_mtx);
-            if (ret != 0)
-            {
-                tMPI_Thread_mutex_unlock(&locality_order_mtx);
-                goto locality_order_err;
-            }
-        }
-        ret = tMPI_Thread_mutex_unlock(&locality_order_mtx);
-        if (ret != 0)
-        {
-            goto locality_order_err;
-        }
-    }
+    rc = get_thread_affinity_layout(fplog, cr, hwinfo,
+                                    nthread_node,
+                                    offset, &hw_opt->core_pinning_stride,
+                                    &locality_order);
 
     if (rc != 0)
     {
@@ -424,15 +362,6 @@ gmx_set_thread_affinity(FILE                *fplog,
         }
     }
     return;
-
-locality_order_err:
-    /* any error in affinity setting shouldn't be fatal, but should generate
-       a warning */
-    md_print_warn(NULL, fplog,
-                  "WARNING: Obtaining affinity information failed due to a basic system error: %s.\n"
-                  "         This can cause performance degradation! ",
-                  strerror(errno));
-    return;
 }
 
 /* Check the process affinity mask and if it is found to be non-zero,
index 66c8d23fb897761a6530a63677513297475bd741..74f6a933ea273c666f91fa04c76ee7142ca7fbc6 100644 (file)
@@ -340,7 +340,7 @@ static int get_tmpi_omp_thread_division(const gmx_hw_info_t *hwinfo,
  * Thus all options should be internally consistent and consistent
  * with the hardware, except that ntmpi could be larger than #GPU.
  */
-static int get_nthreads_mpi(gmx_hw_info_t *hwinfo,
+static int get_nthreads_mpi(const gmx_hw_info_t *hwinfo,
                             gmx_hw_opt_t *hw_opt,
                             t_inputrec *inputrec, gmx_mtop_t *mtop,
                             const t_commrec *cr,
@@ -627,14 +627,14 @@ static void increase_nstlist(FILE *fp, t_commrec *cr,
     }
 }
 
-static void prepare_verlet_scheme(FILE             *fplog,
-                                  gmx_hw_info_t    *hwinfo,
-                                  t_commrec        *cr,
-                                  const char       *nbpu_opt,
-                                  t_inputrec       *ir,
-                                  const gmx_mtop_t *mtop,
-                                  matrix            box,
-                                  gmx_bool         *bUseGPU)
+static void prepare_verlet_scheme(FILE                *fplog,
+                                  const gmx_hw_info_t *hwinfo,
+                                  t_commrec           *cr,
+                                  const char          *nbpu_opt,
+                                  t_inputrec          *ir,
+                                  const gmx_mtop_t    *mtop,
+                                  matrix               box,
+                                  gmx_bool            *bUseGPU)
 {
     /* Here we only check for GPU usage on the MPI master process,
      * as here we don't know how many GPUs we will use yet.
@@ -963,6 +963,12 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
     bForceUseGPU = (strncmp(nbpu_opt, "gpu", 3) == 0);
     bTryUseGPU   = (strncmp(nbpu_opt, "auto", 4) == 0) || bForceUseGPU;
 
+    /* Detect hardware, gather information. This is an operation that is
+     * global for this process (MPI rank). */
+    hwinfo = gmx_detect_hardware(fplog, cr,
+                                 bForceUseGPU, bTryUseGPU, hw_opt->gpu_id);
+
+
     snew(state, 1);
     if (SIMMASTER(cr))
     {
@@ -975,11 +981,6 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
             convert_to_verlet_scheme(fplog, inputrec, mtop, det(state->box));
         }
 
-        /* Detect hardware, gather information. With tMPI only thread 0 does it
-         * and after threads are started broadcasts hwinfo around. */
-        snew(hwinfo, 1);
-        gmx_detect_hardware(fplog, hwinfo, cr,
-                            bForceUseGPU, bTryUseGPU, hw_opt->gpu_id);
 
         minf.cutoff_scheme = inputrec->cutoff_scheme;
         minf.bUseGPU       = FALSE;
@@ -1119,33 +1120,6 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
         pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE);
     }
 
-#if defined GMX_THREAD_MPI
-    /* With tMPI we detected on thread 0 and we'll just pass the hwinfo pointer
-     * to the other threads  -- slightly uncool, but works fine, just need to
-     * make sure that the data doesn't get freed twice. */
-    if (cr->nnodes > 1)
-    {
-        if (!SIMMASTER(cr))
-        {
-            snew(hwinfo, 1);
-        }
-        gmx_bcast(sizeof(&hwinfo), &hwinfo, cr);
-    }
-#else
-    if (PAR(cr) && !SIMMASTER(cr))
-    {
-        /* now we have inputrec on all nodes, can run the detection */
-        /* TODO: perhaps it's better to propagate within a node instead? */
-        snew(hwinfo, 1);
-        gmx_detect_hardware(fplog, hwinfo, cr,
-                            bForceUseGPU, bTryUseGPU, hw_opt->gpu_id);
-    }
-
-    /* Now do the affinity check with MPI/no-MPI (done earlier with thread-MPI). */
-    gmx_check_thread_affinity_set(fplog, cr,
-                                  hw_opt, hwinfo->nthreads_hw_avail, FALSE);
-#endif
-
     /* now make sure the state is initialized and propagated */
     set_state_entries(state, inputrec, cr->nnodes);
 
@@ -1381,7 +1355,9 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
                           (cr->duty & DUTY_PP) == 0,
                           inputrec->cutoff_scheme == ecutsVERLET);
 
-    gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt->nthreads_tmpi, minf.bUseGPU);
+    /* check consistency and decide on the number of gpus to use. */
+    gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt->nthreads_tmpi,
+                                     minf.bUseGPU);
 
     /* getting number of PP/PME threads
        PME: env variable should be read only on one node to make sure it is
@@ -1513,7 +1489,8 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
                                       hw_opt, hwinfo->nthreads_hw_avail, TRUE);
 
         /* Set the CPU affinity */
-        gmx_set_thread_affinity(fplog, cr, hw_opt, nthreads_pme, hwinfo, inputrec);
+        gmx_set_thread_affinity(fplog, cr, hw_opt, nthreads_pme, hwinfo,
+                                inputrec);
     }
 
     /* Initiate PME if necessary,
@@ -1660,12 +1637,7 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
         sfree(membed);
     }
 
-#ifdef GMX_THREAD_MPI
-    if (PAR(cr) && SIMMASTER(cr))
-#endif
-    {
-        gmx_hardware_info_free(hwinfo);
-    }
+    gmx_hardware_info_free(hwinfo);
 
     /* Does what it says */
     print_date_and_time(fplog, cr->nodeid, "Finished mdrun", &runtime);
index 2ce33fbacd13136b93d16b18da3ca2835e395a16..f608d4c258f5662459fb03ee83a924fa743ea48b 100644 (file)
@@ -2051,9 +2051,7 @@ void init_forcerec(FILE              *fp,
          * In mdrun, hwinfo has already been set before calling init_forcerec.
          * Here we ignore GPUs, as tools will not use them anyhow.
          */
-        snew(fr->hwinfo, 1);
-        gmx_detect_hardware(fp, fr->hwinfo, cr,
-                            FALSE, FALSE, NULL);
+        fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE, FALSE, NULL);
     }
 
     /* By default we turn acceleration on, but it might be turned off further down... */
@@ -2218,7 +2216,7 @@ void init_forcerec(FILE              *fp,
      * group kernels are OK. See Redmine #1249. */
     if (fr->bAllvsAll)
     {
-        fr->bAllvsAll = FALSE;
+        fr->bAllvsAll            = FALSE;
         fr->use_cpu_acceleration = FALSE;
         if (fp != NULL)
         {
index f42fa1e688107501cb0604509775bd28b1a38f7a..67e83ff89727db683593a860e4d914265c06f312 100644 (file)
@@ -53,6 +53,7 @@
 #include "types/interaction_const.h"
 #include "types/force_flags.h"
 #include "../nbnxn_consts.h"
+#include "gmx_detect_hardware.h"
 
 #include "nbnxn_cuda_types.h"
 #include "../../gmxlib/cuda_tools/cudautils.cuh"
@@ -459,7 +460,7 @@ static int pick_nbnxn_kernel_version(FILE            *fplog,
 
 void nbnxn_cuda_init(FILE *fplog,
                      nbnxn_cuda_ptr_t *p_cu_nb,
-                     gmx_gpu_info_t *gpu_info, int my_gpu_index,
+                     const gmx_gpu_info_t *gpu_info, int my_gpu_index,
                      gmx_bool bLocalAndNonlocal)
 {
     cudaError_t stat;
@@ -560,7 +561,8 @@ void nbnxn_cuda_init(FILE *fplog,
          *   - atomics are available, and
          *   - GPUs are not being shared.
          */
-        bool bShouldUsePollSync = (bX86 && bTMPIAtomics && !gpu_info->bDevShare);
+        bool bShouldUsePollSync = (bX86 && bTMPIAtomics &&
+                                   (gmx_count_gpu_dev_shared(gpu_info) < 1));
 
         if (bStreamSync)
         {