reorganized GPU detection and selection

author Berk Hess <hess@kth.se>

Thu, 10 Oct 2013 15:39:25 +0000 (17:39 +0200)

committer Berk Hess <hess@kth.se>

Thu, 31 Oct 2013 22:56:10 +0000 (23:56 +0100)
author Berk Hess <hess@kth.se>
Thu, 10 Oct 2013 15:39:25 +0000 (17:39 +0200)
committer Berk Hess <hess@kth.se>
Thu, 31 Oct 2013 22:56:10 +0000 (23:56 +0100)
diff --git a/include/gmx_cpuid.h b/include/gmx_cpuid.h

index e623351f38a08e569eec19b11765c80652d02e39..0ddbe358bea7397ddacce9313ef9f636c5be3957 100644 (file)
--- a/include/gmx_cpuid.h
+++ b/include/gmx_cpuid.h
@@ -297,11 +297,13 @@ gmx_cpuid_acceleration_suggest  (gmx_cpuid_t                    cpuid);
  
  /* Check if this binary was compiled with the same acceleration as we
   * would suggest for the current hardware. Always print stats to the log file
- * if it is non-NULL, and print a warning in stdout if we don't have a match.
+ * if it is non-NULL, and if we don't have a match, print a warning in log
+ * (if non-NULL) and if print_to_stderr!=0 also to stderr.
   */
  int
  gmx_cpuid_acceleration_check    (gmx_cpuid_t                cpuid,
-                                 FILE *                     log);
+                                 FILE *                     log,
+                                 int                        print_to_stderr);
  
  
  /* Release resources used by data structure. Note that the pointer to the
diff --git a/include/gmx_detect_hardware.h b/include/gmx_detect_hardware.h

index 787fc561f66f70d768589477ab66af1300d31554..a8cdc7fb0e2b1cde5c111c3903cc4b39d3625322 100644 (file)
--- a/include/gmx_detect_hardware.h
+++ b/include/gmx_detect_hardware.h
@@ -52,18 +52,27 @@ extern "C" {
  /* return a pointer to a global hwinfo structure. */
  GMX_LIBGMX_EXPORT
  gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr,
-                                   gmx_bool bForceUseGPU, gmx_bool bTryUseGPU,
-                                   const char *gpu_id);
+                                   gmx_bool bDetectGPUs);
  
  GMX_LIBGMX_EXPORT
  void gmx_hardware_info_free(gmx_hw_info_t *hwinfo);
  
-/* Check the thread count + GPU assignment. This function must
-   either be run by all threads that persist (i.e. all tmpi threads),
-   or be run before they are created.  */
  GMX_LIBGMX_EXPORT
-void gmx_check_hw_runconf_consistency(FILE *fplog, gmx_hw_info_t *hwinfo,
-                                      const t_commrec *cr, int ntmpi_requsted,
+void gmx_parse_gpu_ids(gmx_gpu_opt_t *gpu_opt);
+
+GMX_LIBGMX_EXPORT
+void gmx_select_gpu_ids(FILE *fplog, const t_commrec *cr,
+                        const gmx_gpu_info_t *gpu_info,
+                        gmx_bool bForceUseGPU,
+                        gmx_gpu_opt_t *gpu_opt);
+
+/* Check the consistency of hw_opt with hwinfo.
+   This function should be called once on each MPI rank. */
+GMX_LIBGMX_EXPORT
+void gmx_check_hw_runconf_consistency(FILE *fplog,
+                                      const gmx_hw_info_t *hwinfo,
+                                      const t_commrec *cr,
+                                      const gmx_hw_opt_t *hw_opt,
                                        gmx_bool bUseGPU);
  #endif
  
@@ -71,11 +80,11 @@ void gmx_check_hw_runconf_consistency(FILE *fplog, gmx_hw_info_t *hwinfo,
  /* Check whether a GPU is shared among ranks, and return the number of shared
     gpus
  
-   hwinfo        = the hwinfo struct
+   gpu_opt       = the gpu options struct
  
     returns: The number of GPUs shared among ranks, or 0 */
  GMX_LIBGMX_EXPORT
-int gmx_count_gpu_dev_shared(const gmx_gpu_info_t *gpu_info);
+int gmx_count_gpu_dev_shared(const gmx_gpu_opt_t *gpu_opt);
  
  
  #ifdef __cplusplus
diff --git a/include/gpu_utils.h b/include/gpu_utils.h

index 6c678c7d5aa199973d4f3f974976430ffc3ce6c0..68f8f7a2c766e6e330a14f74f47f7b3f515d420b 100644 (file)
--- a/include/gpu_utils.h
+++ b/include/gpu_utils.h
@@ -69,17 +69,21 @@ FUNC_QUALIFIER
  int detect_cuda_gpus(gmx_gpu_info_t *gpu_info, char *err_str) FUNC_TERM_INT
  
  FUNC_QUALIFIER
-void pick_compatible_gpus(gmx_gpu_info_t *gpu_info) FUNC_TERM_VOID
+void pick_compatible_gpus(const gmx_gpu_info_t *gpu_info,
+                          gmx_gpu_opt_t *gpu_opt) FUNC_TERM_VOID
  
  FUNC_QUALIFIER
-gmx_bool check_select_cuda_gpus(int *checkres, gmx_gpu_info_t *gpu_info,
-                                const int *requested_devs, int count) FUNC_TERM_INT
+gmx_bool check_selected_cuda_gpus(int *checkres,
+                                  const gmx_gpu_info_t *gpu_info,
+                                  gmx_gpu_opt_t *gpu_opt) FUNC_TERM_INT
  
  FUNC_QUALIFIER
  void free_gpu_info(const gmx_gpu_info_t *gpu_info) FUNC_TERM_VOID
  
  FUNC_QUALIFIER
-gmx_bool init_gpu(int mygpu, char *result_str, const gmx_gpu_info_t *gpu_info) FUNC_TERM_INT
+gmx_bool init_gpu(int mygpu, char *result_str,
+                  const gmx_gpu_info_t *gpu_info,
+                  const gmx_gpu_opt_t *gpu_opt) FUNC_TERM_INT
  
  FUNC_QUALIFIER
  gmx_bool free_gpu(char *result_str) FUNC_TERM_INT
@@ -89,7 +93,9 @@ FUNC_QUALIFIER
  int get_current_gpu_device_id(void) FUNC_TERM_INT
  
  FUNC_QUALIFIER
-int get_gpu_device_id(const gmx_gpu_info_t *gpu_info, int index) FUNC_TERM_INT
+int get_gpu_device_id(const gmx_gpu_info_t *gpu_info,
+                      const gmx_gpu_opt_t *gpu_opt,
+                      int index) FUNC_TERM_INT
  
  FUNC_QUALIFIER
  void get_gpu_device_info_string(char *s, const gmx_gpu_info_t *gpu_info, int index) FUNC_TERM_VOID
diff --git a/include/mdrun.h b/include/mdrun.h

index 98ff760bc6b2ce821f5e598523a26bd24fa8d006..1e2b9b5189151177ef2a96e52154661ce73bc61d 100644 (file)
--- a/include/mdrun.h
+++ b/include/mdrun.h
@@ -93,22 +93,6 @@ enum {
      ddnoSEL, ddnoINTERLEAVE, ddnoPP_PME, ddnoCARTESIAN, ddnoNR
  };
  
-/* The options for the thread affinity setting, default: auto */
-enum {
-    threadaffSEL, threadaffAUTO, threadaffON, threadaffOFF, threadaffNR
-};
-
-typedef struct {
-    int      nthreads_tot;        /* Total number of threads requested (TMPI) */
-    int      nthreads_tmpi;       /* Number of TMPI threads requested         */
-    int      nthreads_omp;        /* Number of OpenMP threads requested       */
-    int      nthreads_omp_pme;    /* As nthreads_omp, but for PME only nodes  */
-    int      thread_affinity;     /* Thread affinity switch, see enum above   */
-    int      core_pinning_stride; /* Logical core pinning stride              */
-    int      core_pinning_offset; /* Logical core pinning offset              */
-    char    *gpu_id;              /* GPU id's to use, each specified as chars */
-} gmx_hw_opt_t;
-
  /* Variables for temporary use with the deform option,
   * used in runner.c and md.c.
   * (These variables should be stored in the tpx file.)
diff --git a/include/nbnxn_cuda_data_mgmt.h b/include/nbnxn_cuda_data_mgmt.h

index f215978a0ed0191a52072931cb6a19612b1e88fe..02af68fa4ea9d610227dba0a4c1a72ca290ae488 100644 (file)
--- a/include/nbnxn_cuda_data_mgmt.h
+++ b/include/nbnxn_cuda_data_mgmt.h
@@ -60,7 +60,9 @@ extern "C" {
  FUNC_QUALIFIER
  void nbnxn_cuda_init(FILE *fplog,
                       nbnxn_cuda_ptr_t *p_cu_nb,
-                     const gmx_gpu_info_t *gpu_info, int my_gpu_index,
+                     const gmx_gpu_info_t *gpu_info,
+                     const gmx_gpu_opt_t *gpu_opt,
+                     int my_gpu_index,
                       /* true of both local and non-local are don on GPU */
                       gmx_bool bLocalAndNonlocal) FUNC_TERM
  
diff --git a/include/types/forcerec.h b/include/types/forcerec.h

index 2e78634a43718cd815d38a402a82ff97bd01bdcf..920a81db5432cb34f9a367938c9332df2a79e750 100644 (file)
--- a/include/types/forcerec.h
+++ b/include/types/forcerec.h
@@ -200,6 +200,7 @@ typedef struct {
      rvec                 posres_comB;
  
      const gmx_hw_info_t *hwinfo;
+    const gmx_gpu_opt_t *gpu_opt;
      gmx_bool             use_cpu_acceleration;
  
      /* Interaction for calculated in kernels. In many cases this is similar to
diff --git a/include/types/hw_info.h b/include/types/hw_info.h

index f3c9c9284bf79131e597808151b2222d51e4ed20..db9ce3b7fcf88fdf360b73bfb20602750ccde3bd 100644 (file)
--- a/include/types/hw_info.h
+++ b/include/types/hw_info.h
@@ -67,12 +67,10 @@ static const char * const gpu_detect_res_str[] =
   * The gmx_hardware_detect module initializes it. */
  typedef struct
  {
-    gmx_bool             bUserSet;      /* true if the GPUs in cuda_dev_use are manually provided by the user */
-
-    int                  ncuda_dev_use; /* number of devices selected to be used */
-    int                 *cuda_dev_use;  /* index of the devices selected to be used */
+    gmx_bool             bDetectGPUs;   /* Did we try to detect GPUs? */
      int                  ncuda_dev;     /* total number of devices detected */
      cuda_dev_info_ptr_t  cuda_dev;      /* devices detected in the system (per node) */
+    int                  ncuda_dev_compatible; /* number of compatible GPUs */
  } gmx_gpu_info_t;
  
  /* Hardware information structure with CPU and GPU information.
@@ -81,7 +79,6 @@ typedef struct
   *       (i.e. must be able to be shared among all threads) */
  typedef struct
  {
-    gmx_bool        bCanUseGPU;          /* True if compatible GPUs are detected during hardware detection */
      gmx_gpu_info_t  gpu_info;            /* Information about GPUs detected in the system */
  
      gmx_cpuid_t     cpuid_info;          /* CPUID information about CPU detected;
@@ -90,11 +87,37 @@ typedef struct
      int             nthreads_hw_avail;   /* Number of hardware threads available; this number
                                              is based on the number of CPUs reported as available
                                              by the OS at the time of detection. */
-    gmx_bool        bConsistencyChecked; /* whether
-                                            gmx_check_hw_runconf_consistency()
-                                            has been run with this hw_info */
  } gmx_hw_info_t;
  
+
+/* The options for the thread affinity setting, default: auto */
+enum {
+    threadaffSEL, threadaffAUTO, threadaffON, threadaffOFF, threadaffNR
+};
+
+/* GPU device selection information -- for now with only CUDA devices */
+typedef struct
+{
+    char     *gpu_id;        /* GPU id's to use, each specified as chars */
+    gmx_bool  bUserSet;      /* true if the GPUs in cuda_dev_use are manually provided by the user */
+
+    int       ncuda_dev_use; /* number of devices selected to be used */
+    int      *cuda_dev_use;  /* index of the devices selected to be used */
+} gmx_gpu_opt_t;
+
+/* Threading and GPU options, can be set automatically or by the user */
+typedef struct {
+    int      nthreads_tot;        /* Total number of threads requested (TMPI) */
+    int      nthreads_tmpi;       /* Number of TMPI threads requested         */
+    int      nthreads_omp;        /* Number of OpenMP threads requested       */
+    int      nthreads_omp_pme;    /* As nthreads_omp, but for PME only nodes  */
+    int      thread_affinity;     /* Thread affinity switch, see enum above   */
+    int      core_pinning_stride; /* Logical core pinning stride              */
+    int      core_pinning_offset; /* Logical core pinning offset              */
+
+    gmx_gpu_opt_t gpu_opt;        /* The GPU options                          */
+} gmx_hw_opt_t;
+
  #ifdef __cplusplus
  }
  #endif
diff --git a/src/gmxlib/gmx_cpuid.c b/src/gmxlib/gmx_cpuid.c

index 0eb36fa5849fc44fd723264aa716ad6c9b765ce8..7b9356451bbab06cf85281a3a0975281321d4bfc 100644 (file)
--- a/src/gmxlib/gmx_cpuid.c
+++ b/src/gmxlib/gmx_cpuid.c
@@ -1105,7 +1105,8 @@ gmx_cpuid_acceleration_suggest  (gmx_cpuid_t                 cpuid)
  
  int
  gmx_cpuid_acceleration_check(gmx_cpuid_t   cpuid,
-                             FILE *        log)
+                             FILE *        log,
+                             int           print_to_stderr)
  {
      int                           rc;
      char                          str[1024];
@@ -1140,9 +1141,12 @@ gmx_cpuid_acceleration_check(gmx_cpuid_t   cpuid,
                      gmx_cpuid_acceleration_string[acc],
                      gmx_cpuid_acceleration_string[compiled_acc]);
          }
-        printf("Compiled acceleration: %s (Gromacs could use %s on this machine, which is better)\n",
-               gmx_cpuid_acceleration_string[compiled_acc],
-               gmx_cpuid_acceleration_string[acc]);
+        if (print_to_stderr)
+        {
+            fprintf(stderr, "Compiled acceleration: %s (Gromacs could use %s on this machine, which is better)\n",
+                   gmx_cpuid_acceleration_string[compiled_acc],
+                   gmx_cpuid_acceleration_string[acc]);
+        }
      }
      return rc;
  }
diff --git a/src/gmxlib/gmx_detect_hardware.c b/src/gmxlib/gmx_detect_hardware.c

index 640d74e85e63b69323adbab447bf231554e7b399..c407f40b5d34fc421680f34571ac6aaf9ad67dab 100644 (file)
--- a/src/gmxlib/gmx_detect_hardware.c
+++ b/src/gmxlib/gmx_detect_hardware.c
@@ -58,12 +58,11 @@
  #include "windows.h"
  #endif
  
-/* Although we can't have more than 10 GPU different ID-s passed by the user as
- * the id-s are assumed to be represented by single digits, as multiple
- * processes can share a GPU, we can end up with more than 10 IDs.
- * To account for potential extreme cases we'll set the limit to a pretty
- * ridiculous number. */
-static unsigned int max_gpu_ids_user = 64;
+#ifdef GMX_GPU
+const gmx_bool bGPUBinary = TRUE;
+#else
+const gmx_bool bGPUBinary = FALSE;
+#endif
  
  static const char * invalid_gpuid_hint =
      "A delimiter-free sequence of valid numeric IDs of available GPUs is expected.";
@@ -77,7 +76,7 @@ static tMPI_Thread_mutex_t hw_info_lock = TMPI_THREAD_MUTEX_INITIALIZER;
  
  
  /* FW decl. */
-static void limit_num_gpus_used(gmx_hw_info_t *hwinfo, int count);
+static void limit_num_gpus_used(gmx_gpu_opt_t *gpu_opt, int count);
  
  static void sprint_gpus(char *sbuf, const gmx_gpu_info_t *gpu_info, gmx_bool bPrintAll)
  {
@@ -106,6 +105,12 @@ static void print_gpu_detection_stats(FILE                 *fplog,
      char onhost[266], stmp[STRLEN];
      int  ngpu;
  
+    if (!gpu_info->bDetectGPUs)
+    {
+        /* We skipped the detection, so don't print detection stats */
+        return;
+    }
+
      ngpu = gpu_info->ncuda_dev;
  
  #if defined GMX_MPI && !defined GMX_THREAD_MPI
@@ -131,31 +136,32 @@ static void print_gpu_detection_stats(FILE                 *fplog,
  
  static void print_gpu_use_stats(FILE                 *fplog,
                                  const gmx_gpu_info_t *gpu_info,
+                                const gmx_gpu_opt_t  *gpu_opt,
                                  const t_commrec      *cr)
  {
      char sbuf[STRLEN], stmp[STRLEN];
-    int  i, ngpu, ngpu_all;
+    int  i, ngpu_comp, ngpu_use;
  
-    ngpu     = gpu_info->ncuda_dev_use;
-    ngpu_all = gpu_info->ncuda_dev;
+    ngpu_comp = gpu_info->ncuda_dev_compatible;
+    ngpu_use  = gpu_opt->ncuda_dev_use;
  
-    /* Issue note if GPUs are available but not used */
-    if (ngpu_all > 0 && ngpu < 1)
+    /* Issue a note if GPUs are available but not used */
+    if (ngpu_comp > 0 && ngpu_use < 1)
      {
          sprintf(sbuf,
                  "%d compatible GPU%s detected in the system, but none will be used.\n"
                  "Consider trying GPU acceleration with the Verlet scheme!",
-                ngpu_all, (ngpu_all > 1) ? "s" : "");
+                ngpu_comp, (ngpu_comp > 1) ? "s" : "");
      }
      else
      {
          sprintf(sbuf, "%d GPU%s %sselected for this run: ",
-                ngpu, (ngpu > 1) ? "s" : "",
-                gpu_info->bUserSet ? "user-" : "auto-");
-        for (i = 0; i < ngpu; i++)
+                ngpu_use, (ngpu_use > 1) ? "s" : "",
+                gpu_opt->bUserSet ? "user-" : "auto-");
+        for (i = 0; i < ngpu_use; i++)
          {
-            sprintf(stmp, "#%d", get_gpu_device_id(gpu_info, i));
-            if (i < ngpu - 1)
+            sprintf(stmp, "#%d", get_gpu_device_id(gpu_info, gpu_opt, i));
+            if (i < ngpu_use - 1)
              {
                  strcat(stmp, ", ");
              }
@@ -167,20 +173,13 @@ static void print_gpu_use_stats(FILE                 *fplog,
  
  /* Parse a "plain" GPU ID string which contains a sequence of digits corresponding
   * to GPU IDs; the order will indicate the process/tMPI thread - GPU assignment. */
-static void parse_gpu_id_plain_string(const char *idstr, int *nid, int *idlist)
+static void parse_gpu_id_plain_string(const char *idstr, int *nid, int **idlist)
  {
-    int    i;
-    size_t len_idstr;
-
-    len_idstr = strlen(idstr);
+    int i;
  
-    if (len_idstr > max_gpu_ids_user)
-    {
-        gmx_fatal(FARGS, "%d GPU IDs provided, but only at most %d are supported",
-                  len_idstr, max_gpu_ids_user);
-    }
+    *nid = strlen(idstr);
  
-    *nid = len_idstr;
+    snew(*idlist, *nid);
  
      for (i = 0; i < *nid; i++)
      {
@@ -189,7 +188,7 @@ static void parse_gpu_id_plain_string(const char *idstr, int *nid, int *idlist)
              gmx_fatal(FARGS, "Invalid character in GPU ID string: '%c'\n%s\n",
                        idstr[i], invalid_gpuid_hint);
          }
-        idlist[i] = idstr[i] - '0';
+        (*idlist)[i] = idstr[i] - '0';
      }
  }
  
@@ -199,17 +198,15 @@ static void parse_gpu_id_csv_string(const char *idstr, int *nid, int *idlist)
      gmx_incons("Not implemented yet");
  }
  
-void gmx_check_hw_runconf_consistency(FILE *fplog, gmx_hw_info_t *hwinfo,
-                                      const t_commrec *cr, int ntmpi_requested,
+void gmx_check_hw_runconf_consistency(FILE *fplog,
+                                      const gmx_hw_info_t *hwinfo,
+                                      const t_commrec *cr,
+                                      const gmx_hw_opt_t *hw_opt,
                                        gmx_bool bUseGPU)
  {
-    int                        npppn, ntmpi_pp, ngpu;
-    char                       sbuf[STRLEN], th_or_proc[STRLEN], th_or_proc_plural[STRLEN], pernode[STRLEN];
-    char                       gpu_plural[2];
-    gmx_bool                   bGPUBin, btMPI, bMPI, bMaxMpiThreadsSet, bNthreadsAuto, bEmulateGPU;
-    int                        ret;
-    static tMPI_Thread_mutex_t cons_lock = TMPI_THREAD_MUTEX_INITIALIZER;
-
+    int      npppn, ntmpi_pp;
+    char     sbuf[STRLEN], th_or_proc[STRLEN], th_or_proc_plural[STRLEN], pernode[STRLEN];
+    gmx_bool btMPI, bMPI, bMaxMpiThreadsSet, bNthreadsAuto, bEmulateGPU;
  
      assert(hwinfo);
      assert(cr);
@@ -223,206 +220,176 @@ void gmx_check_hw_runconf_consistency(FILE *fplog, gmx_hw_info_t *hwinfo,
          return;
      }
  
-    /* We run this function only once, but must make sure that all threads
-       that are alive run this function, so they get consistent data. We
-       achieve this by mutual exclusion and returning if the structure is
-       already properly checked & set */
-    ret = tMPI_Thread_mutex_lock(&cons_lock);
-    if (ret != 0)
+    btMPI         = bMPI = FALSE;
+    bNthreadsAuto = FALSE;
+#if defined(GMX_THREAD_MPI)
+    btMPI         = TRUE;
+    bNthreadsAuto = (hw_opt->nthreads_tmpi < 1);
+#elif defined(GMX_LIB_MPI)
+    bMPI  = TRUE;
+#endif
+
+    /* GPU emulation detection is done later, but we need here as well
+     * -- uncool, but there's no elegant workaround */
+    bEmulateGPU       = (getenv("GMX_EMULATE_GPU") != NULL);
+    bMaxMpiThreadsSet = (getenv("GMX_MAX_MPI_THREADS") != NULL);
+
+    /* check the acceleration mdrun is compiled with against hardware
+       capabilities */
+    /* TODO: Here we assume homogeneous hardware which is not necessarily
+             the case! Might not hurt to add an extra check over MPI. */
+    gmx_cpuid_acceleration_check(hwinfo->cpuid_info, fplog, SIMMASTER(cr));
+
+    /* NOTE: this print is only for and on one physical node */
+    print_gpu_detection_stats(fplog, &hwinfo->gpu_info, cr);
+
+    if (hwinfo->gpu_info.ncuda_dev_compatible > 0)
      {
-        gmx_fatal(FARGS, "Error locking cons mutex: %s", strerror(errno));
+        /* NOTE: this print is only for and on one physical node */
+        print_gpu_use_stats(fplog, &hwinfo->gpu_info, &hw_opt->gpu_opt, cr);
      }
  
-    if (!hwinfo->bConsistencyChecked)
+    /* Need to ensure that we have enough GPUs:
+     * - need one GPU per PP node
+     * - no GPU oversubscription with tMPI
+     * */
+    /* number of PP processes per node */
+    npppn = cr->nrank_pp_intranode;
+
+    pernode[0]           = '\0';
+    th_or_proc_plural[0] = '\0';
+    if (btMPI)
      {
-        btMPI         = bMPI = FALSE;
-        bNthreadsAuto = FALSE;
-#if defined(GMX_THREAD_MPI)
-        btMPI         = TRUE;
-        bNthreadsAuto = (ntmpi_requested < 1);
-#elif defined(GMX_LIB_MPI)
-        bMPI  = TRUE;
-#endif
+        sprintf(th_or_proc, "thread-MPI thread");
+        if (npppn > 1)
+        {
+            sprintf(th_or_proc_plural, "s");
+        }
+    }
+    else if (bMPI)
+    {
+        sprintf(th_or_proc, "MPI process");
+        if (npppn > 1)
+        {
+            sprintf(th_or_proc_plural, "es");
+        }
+        sprintf(pernode, " per node");
+    }
+    else
+    {
+        /* neither MPI nor tMPI */
+        sprintf(th_or_proc, "process");
+    }
  
-#ifdef GMX_GPU
-        bGPUBin      = TRUE;
-#else
-        bGPUBin      = FALSE;
-#endif
+    if (bUseGPU && hwinfo->gpu_info.ncuda_dev_compatible > 0 &&
+        !bEmulateGPU)
+    {
+        int  ngpu_comp, ngpu_use;
+        char gpu_comp_plural[2], gpu_use_plural[2];
+
+        ngpu_comp = hwinfo->gpu_info.ncuda_dev_compatible;
+        ngpu_use  = hw_opt->gpu_opt.ncuda_dev_use;
  
-        /* GPU emulation detection is done later, but we need here as well
-         * -- uncool, but there's no elegant workaround */
-        bEmulateGPU       = (getenv("GMX_EMULATE_GPU") != NULL);
-        bMaxMpiThreadsSet = (getenv("GMX_MAX_MPI_THREADS") != NULL);
-
-        /* check the acceleration mdrun is compiled with against hardware
-           capabilities */
-        /* TODO: Here we assume homogeneous hardware which is not necessarily
-                 the case! Might not hurt to add an extra check over MPI. */
-        gmx_cpuid_acceleration_check(hwinfo->cpuid_info, fplog);
-
-        /* Need to ensure that we have enough GPUs:
-         * - need one GPU per PP node
-         * - no GPU oversubscription with tMPI
-         * => keep on the GPU support, otherwise turn off (or bail if forced)
-         * */
-        /* number of PP processes per node */
-        npppn = cr->nrank_pp_intranode;
-
-        pernode[0]           = '\0';
-        th_or_proc_plural[0] = '\0';
-        if (btMPI)
+        sprintf(gpu_comp_plural, "%s", (ngpu_comp> 1) ? "s" : "");
+        sprintf(gpu_use_plural,  "%s", (ngpu_use > 1) ? "s" : "");
+
+        /* number of tMPI threads auto-adjusted */
+        if (btMPI && bNthreadsAuto)
          {
-            sprintf(th_or_proc, "thread-MPI thread");
-            if (npppn > 1)
+            if (hw_opt->gpu_opt.bUserSet && npppn < ngpu_use)
              {
-                sprintf(th_or_proc_plural, "s");
+                /* The user manually provided more GPUs than threads we
+                   could automatically start. */
+                gmx_fatal(FARGS,
+                          "%d GPU%s provided, but only %d PP thread-MPI thread%s coud be started.\n"
+                          "%s requires one PP tread-MPI thread per GPU; use fewer GPUs%s.",
+                          ngpu_use, gpu_use_plural,
+                          npppn, th_or_proc_plural,
+                          ShortProgram(), bMaxMpiThreadsSet ? "\nor allow more threads to be used" : "");
              }
-        }
-        else if (bMPI)
-        {
-            sprintf(th_or_proc, "MPI process");
-            if (npppn > 1)
+
+            if (!hw_opt->gpu_opt.bUserSet && npppn < ngpu_comp)
              {
-                sprintf(th_or_proc_plural, "es");
+                /* There are more GPUs than tMPI threads; we have
+                   limited the number GPUs used. */
+                md_print_warn(cr, fplog,
+                              "NOTE: %d GPU%s were detected, but only %d PP thread-MPI thread%s can be started.\n"
+                              "      %s can use one GPU per PP tread-MPI thread, so only %d GPU%s will be used.%s\n",
+                              ngpu_comp, gpu_comp_plural,
+                              npppn, th_or_proc_plural,
+                              ShortProgram(), npppn,
+                              npppn > 1 ? "s" : "",
+                              bMaxMpiThreadsSet ? "\n      Also, you can allow more threads to be used by increasing GMX_MAX_MPI_THREADS" : "");
              }
-            sprintf(pernode, " per node");
-        }
-        else
-        {
-            /* neither MPI nor tMPI */
-            sprintf(th_or_proc, "process");
          }
  
-        if (bGPUBin)
+        if (hw_opt->gpu_opt.bUserSet)
          {
-            print_gpu_detection_stats(fplog, &hwinfo->gpu_info, cr);
+            if (ngpu_use != npppn)
+            {
+                gmx_fatal(FARGS,
+                          "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
+                          "%s was started with %d PP %s%s%s, but you provided %d GPU%s.",
+                          th_or_proc, btMPI ? "s" : "es", pernode,
+                          ShortProgram(), npppn, th_or_proc,
+                          th_or_proc_plural, pernode,
+                          ngpu_use, gpu_use_plural);
+            }
          }
-
-        if (bUseGPU && hwinfo->bCanUseGPU && !bEmulateGPU)
+        else
          {
-            ngpu = hwinfo->gpu_info.ncuda_dev_use;
-            sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
-
-            /* number of tMPI threads atuo-adjusted */
-            if (btMPI && bNthreadsAuto)
+            if (ngpu_comp > npppn)
              {
-                if (npppn < ngpu)
-                {
-                    if (hwinfo->gpu_info.bUserSet)
-                    {
-                        /* The user manually provided more GPUs than threads we
-                           could automatically start. */
-                        gmx_fatal(FARGS,
-                                  "%d GPU%s provided, but only %d PP thread-MPI thread%s coud be started.\n"
-                                  "%s requires one PP tread-MPI thread per GPU; use fewer GPUs%s.",
-                                  ngpu, gpu_plural, npppn, th_or_proc_plural,
-                                  ShortProgram(), bMaxMpiThreadsSet ? "\nor allow more threads to be used" : "");
-                    }
-                    else
-                    {
-                        /* There are more GPUs than tMPI threads; we have to
-                           limit the number GPUs used. */
-                        md_print_warn(cr, fplog,
-                                      "NOTE: %d GPU%s were detected, but only %d PP thread-MPI thread%s can be started.\n"
-                                      "      %s can use one GPU per PP tread-MPI thread, so only %d GPU%s will be used.%s\n",
-                                      ngpu, gpu_plural, npppn,
-                                      th_or_proc_plural,
-                                      ShortProgram(), npppn,
-                                      npppn > 1 ? "s" : "",
-                                      bMaxMpiThreadsSet ? "\n      Also, you can allow more threads to be used by increasing GMX_MAX_MPI_THREADS" : "");
-
-                        if (cr->rank_pp_intranode == 0)
-                        {
-                            limit_num_gpus_used(hwinfo, npppn);
-                            ngpu = hwinfo->gpu_info.ncuda_dev_use;
-                            sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
-                        }
-                    }
-                }
+                md_print_warn(cr, fplog,
+                              "NOTE: potentially sub-optimal launch configuration, %s started with less\n"
+                              "      PP %s%s%s than GPU%s available.\n"
+                              "      Each PP %s can use only one GPU, %d GPU%s%s will be used.\n",
+                              ShortProgram(), th_or_proc,
+                              th_or_proc_plural, pernode, gpu_comp_plural,
+                              th_or_proc, npppn, gpu_use_plural, pernode);
              }
-
-            if (ngpu != npppn)
+            
+            if (ngpu_use != npppn)
              {
-                if (hwinfo->gpu_info.bUserSet)
+                /* Avoid duplicate error messages.
+                 * Unfortunately we can only do this at the physical node
+                 * level, since the hardware setup and MPI process count
+                 * might differ between physical nodes.
+                 */
+                if (cr->rank_pp_intranode == 0)
                  {
                      gmx_fatal(FARGS,
                                "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
-                              "%s was started with %d PP %s%s%s, but you provided %d GPU%s.",
+                              "%s was started with %d PP %s%s%s, but only %d GPU%s were detected.",
                                th_or_proc, btMPI ? "s" : "es", pernode,
                                ShortProgram(), npppn, th_or_proc,
-                              th_or_proc_plural, pernode, ngpu, gpu_plural);
-                }
-                else
-                {
-                    if (ngpu > npppn)
-                    {
-                        md_print_warn(cr, fplog,
-                                      "NOTE: potentially sub-optimal launch configuration, %s started with less\n"
-                                      "      PP %s%s%s than GPU%s available.\n"
-                                      "      Each PP %s can use only one GPU, %d GPU%s%s will be used.\n",
-                                      ShortProgram(), th_or_proc,
-                                      th_or_proc_plural, pernode, gpu_plural,
-                                      th_or_proc, npppn, gpu_plural, pernode);
-
-                        if (bMPI || (btMPI && cr->rank_pp_intranode == 0))
-                        {
-                            limit_num_gpus_used(hwinfo, npppn);
-                            ngpu = hwinfo->gpu_info.ncuda_dev_use;
-                            sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
-                        }
-                    }
-                    else
-                    {
-                        /* Avoid duplicate error messages.
-                         * Unfortunately we can only do this at the physical node
-                         * level, since the hardware setup and MPI process count
-                         * might be differ over physical nodes.
-                         */
-                        if (cr->rank_pp_intranode == 0)
-                        {
-                            gmx_fatal(FARGS,
-                                      "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
-                                      "%s was started with %d PP %s%s%s, but only %d GPU%s were detected.",
-                                      th_or_proc, btMPI ? "s" : "es", pernode,
-                                      ShortProgram(), npppn, th_or_proc,
-                                      th_or_proc_plural, pernode, ngpu,
-                                      gpu_plural);
-                        }
-                    }
+                              th_or_proc_plural, pernode,
+                              ngpu_use, gpu_use_plural);
                  }
              }
+        }
  
-            {
-                int      same_count;
+        {
+            int      same_count;
  
-                same_count = gmx_count_gpu_dev_shared(&(hwinfo->gpu_info));
+            same_count = gmx_count_gpu_dev_shared(&hw_opt->gpu_opt);
  
-                if (btMPI && same_count > 0)
-                {
-                    gmx_fatal(FARGS,
-                              "Invalid GPU assignment: can't share a GPU among multiple thread-MPI threads.\n"
-                              "Use MPI if you are sure that you want to assign GPU to multiple threads.");
-                }
+            if (btMPI && same_count > 0)
+            {
+                gmx_fatal(FARGS,
+                          "Invalid GPU assignment: can't share a GPU among multiple thread-MPI threads.\n"
+                          "Use MPI if you are sure that you want to assign a GPU to multiple threads.");
+            }
  
-                if (same_count > 0)
-                {
-                    md_print_warn(cr, fplog,
-                                  "NOTE: Potentially sub-optimal launch configuration: you assigned %s to\n"
-                                  "      multiple %s%s; this should be avoided as it can cause\n"
-                                  "      performance loss.\n",
-                                  same_count > 1 ? "GPUs" : "a GPU", th_or_proc, btMPI ? "s" : "es");
-                }
+            if (same_count > 0)
+            {
+                md_print_warn(cr, fplog,
+                              "NOTE: Potentially sub-optimal launch configuration: you assigned %s to\n"
+                              "      multiple %s%s; this should be avoided as it can cause\n"
+                              "      performance loss.\n",
+                              same_count > 1 ? "GPUs" : "a GPU", th_or_proc, btMPI ? "s" : "es");
              }
-            print_gpu_use_stats(fplog, &hwinfo->gpu_info, cr);
          }
-        hwinfo->bConsistencyChecked = TRUE;
-    }
-
-    ret = tMPI_Thread_mutex_unlock(&cons_lock);
-    if (ret != 0)
-    {
-        gmx_fatal(FARGS, "Error unlocking cons mutex: %s", strerror(errno));
      }
  
  #ifdef GMX_MPI
@@ -436,12 +403,12 @@ void gmx_check_hw_runconf_consistency(FILE *fplog, gmx_hw_info_t *hwinfo,
  
  }
  
-int gmx_count_gpu_dev_shared(const gmx_gpu_info_t *gpu_info)
+int gmx_count_gpu_dev_shared(const gmx_gpu_opt_t *gpu_opt)
  {
      int      same_count    = 0;
-    int      ngpu          = gpu_info->ncuda_dev_use;
+    int      ngpu          = gpu_opt->ncuda_dev_use;
  
-    if (gpu_info->bUserSet)
+    if (gpu_opt->bUserSet)
      {
          int      i, j;
  
@@ -449,8 +416,8 @@ int gmx_count_gpu_dev_shared(const gmx_gpu_info_t *gpu_info)
          {
              for (j = i + 1; j < ngpu; j++)
              {
-                same_count      += (gpu_info->cuda_dev_use[i] ==
-                                    gpu_info->cuda_dev_use[j]);
+                same_count      += (gpu_opt->cuda_dev_use[i] ==
+                                    gpu_opt->cuda_dev_use[j]);
              }
          }
      }
@@ -511,15 +478,11 @@ static int get_nthreads_hw_avail(FILE *fplog, const t_commrec *cr)
  }
  
  gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr,
-                                   gmx_bool bForceUseGPU, gmx_bool bTryUseGPU,
-                                   const char *gpu_id)
+                                   gmx_bool bDetectGPUs)
  {
-    int              i;
-    const char      *env;
-    char             sbuf[STRLEN], stmp[STRLEN];
+    char             sbuf[STRLEN];
      gmx_hw_info_t   *hw;
      gmx_gpu_info_t   gpuinfo_auto, gpuinfo_user;
-    gmx_bool         bGPUBin;
      int              ret;
  
      /* make sure no one else is doing the same thing */
@@ -533,7 +496,6 @@ gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr,
      if (n_hwinfo == 0)
      {
          snew(hwinfo_g, 1);
-        hwinfo_g->bConsistencyChecked = FALSE;
  
          /* detect CPUID info; no fuss, we don't detect system-wide
           * -- sloppy, but that's it for now */
@@ -546,30 +508,17 @@ gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr,
          hwinfo_g->nthreads_hw_avail = get_nthreads_hw_avail(fplog, cr);
  
          /* detect GPUs */
-        hwinfo_g->gpu_info.ncuda_dev_use  = 0;
-        hwinfo_g->gpu_info.cuda_dev_use   = NULL;
-        hwinfo_g->gpu_info.ncuda_dev      = 0;
-        hwinfo_g->gpu_info.cuda_dev       = NULL;
-
-#ifdef GMX_GPU
-        bGPUBin      = TRUE;
-#else
-        bGPUBin      = FALSE;
-#endif
-
-        /* Bail if binary is not compiled with GPU acceleration, but this is either
-         * explicitly (-nb gpu) or implicitly (gpu ID passed) requested. */
-        if (bForceUseGPU && !bGPUBin)
-        {
-            gmx_fatal(FARGS, "GPU acceleration requested, but %s was compiled without GPU support!", ShortProgram());
-        }
-        if (gpu_id != NULL && !bGPUBin)
-        {
-            gmx_fatal(FARGS, "GPU ID string set, but %s was compiled without GPU support!", ShortProgram());
-        }
-
-        /* run the detection if the binary was compiled with GPU support */
-        if (bGPUBin && getenv("GMX_DISABLE_GPU_DETECTION") == NULL)
+        hwinfo_g->gpu_info.ncuda_dev            = 0;
+        hwinfo_g->gpu_info.cuda_dev             = NULL;
+        hwinfo_g->gpu_info.ncuda_dev_compatible = 0;
+
+        /* Run the detection if the binary was compiled with GPU support
+         * and we requested detection.
+         */
+        hwinfo_g->gpu_info.bDetectGPUs =
+            (bGPUBinary && bDetectGPUs &&
+             getenv("GMX_DISABLE_GPU_DETECTION") == NULL);
+        if (hwinfo_g->gpu_info.bDetectGPUs)
          {
              char detection_error[STRLEN];
  
@@ -589,94 +538,133 @@ gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr,
                                sbuf);
              }
          }
+    }
+    /* increase the reference counter */
+    n_hwinfo++;
+
+    ret = tMPI_Thread_mutex_unlock(&hw_info_lock);
+    if (ret != 0)
+    {
+        gmx_fatal(FARGS, "Error unlocking hwinfo mutex: %s", strerror(errno));
+    }
+
+    return hwinfo_g;
+}
+
+void gmx_parse_gpu_ids(gmx_gpu_opt_t *gpu_opt)
+{
+    char *env;
  
-        if (bForceUseGPU || bTryUseGPU)
+    if (gpu_opt->gpu_id != NULL && !bGPUBinary)
+    {
+        gmx_fatal(FARGS, "GPU ID string set, but %s was compiled without GPU support!", ShortProgram());
+    }
+
+    env = getenv("GMX_GPU_ID");
+    if (env != NULL && gpu_opt->gpu_id != NULL)
+    {
+        gmx_fatal(FARGS, "GMX_GPU_ID and -gpu_id can not be used at the same time");
+    }
+    if (env == NULL)
+    {
+        env = gpu_opt->gpu_id;
+    }
+
+    /* parse GPU IDs if the user passed any */
+    if (env != NULL)
+    {
+        parse_gpu_id_plain_string(env,
+                                  &gpu_opt->ncuda_dev_use,
+                                  &gpu_opt->cuda_dev_use);
+
+        if (gpu_opt->ncuda_dev_use == 0)
          {
-            env = getenv("GMX_GPU_ID");
-            if (env != NULL && gpu_id != NULL)
-            {
-                gmx_fatal(FARGS, "GMX_GPU_ID and -gpu_id can not be used at the same time");
-            }
-            if (env == NULL)
-            {
-                env = gpu_id;
-            }
+            gmx_fatal(FARGS, "Empty GPU ID string encountered.\n%s\n",
+                      invalid_gpuid_hint);
+        }
  
-            /* parse GPU IDs if the user passed any */
-            if (env != NULL)
-            {
-                int *gpuid, *checkres;
-                int  nid, res;
+        gpu_opt->bUserSet = TRUE;
+    }
+}
  
-                snew(gpuid, max_gpu_ids_user);
-                snew(checkres, max_gpu_ids_user);
+void gmx_select_gpu_ids(FILE *fplog, const t_commrec *cr,
+                        const gmx_gpu_info_t *gpu_info,
+                        gmx_bool bForceUseGPU,
+                        gmx_gpu_opt_t *gpu_opt)
+{
+    int              i;
+    const char      *env;
+    char             sbuf[STRLEN], stmp[STRLEN];
  
-                parse_gpu_id_plain_string(env, &nid, gpuid);
+    /* Bail if binary is not compiled with GPU acceleration, but this is either
+     * explicitly (-nb gpu) or implicitly (gpu ID passed) requested. */
+    if (bForceUseGPU && !bGPUBinary)
+    {
+        gmx_fatal(FARGS, "GPU acceleration requested, but %s was compiled without GPU support!", ShortProgram());
+    }
  
-                if (nid == 0)
-                {
-                    gmx_fatal(FARGS, "Empty GPU ID string encountered.\n%s\n",
-                              invalid_gpuid_hint);
-                }
+    if (gpu_opt->bUserSet)
+    {
+        /* Check the GPU IDs passed by the user.
+         * (GPU IDs have been parsed by gmx_parse_gpu_ids before)
+         */
+        int *checkres;
+        int  res;
  
-                res = check_select_cuda_gpus(checkres, &hwinfo_g->gpu_info,
-                                             gpuid, nid);
+        snew(checkres, gpu_opt->ncuda_dev_use);
  
-                if (!res)
-                {
-                    print_gpu_detection_stats(fplog, &hwinfo_g->gpu_info, cr);
-
-                    sprintf(sbuf, "Some of the requested GPUs do not exist, behave strangely, or are not compatible:\n");
-                    for (i = 0; i < nid; i++)
-                    {
-                        if (checkres[i] != egpuCompatible)
-                        {
-                            sprintf(stmp, "    GPU #%d: %s\n",
-                                    gpuid[i], gpu_detect_res_str[checkres[i]]);
-                            strcat(sbuf, stmp);
-                        }
-                    }
-                    gmx_fatal(FARGS, "%s", sbuf);
-                }
+        res = check_selected_cuda_gpus(checkres, gpu_info, gpu_opt);
  
-                hwinfo_g->gpu_info.bUserSet = TRUE;
+        if (!res)
+        {
+            print_gpu_detection_stats(fplog, gpu_info, cr);
  
-                sfree(gpuid);
-                sfree(checkres);
-            }
-            else
+            sprintf(sbuf, "Some of the requested GPUs do not exist, behave strangely, or are not compatible:\n");
+            for (i = 0; i < gpu_opt->ncuda_dev_use; i++)
              {
-                pick_compatible_gpus(&hwinfo_g->gpu_info);
-                hwinfo_g->gpu_info.bUserSet = FALSE;
+                if (checkres[i] != egpuCompatible)
+                {
+                    sprintf(stmp, "    GPU #%d: %s\n",
+                            gpu_opt->cuda_dev_use[i],
+                            gpu_detect_res_str[checkres[i]]);
+                    strcat(sbuf, stmp);
+                }
              }
+            gmx_fatal(FARGS, "%s", sbuf);
+        }
  
-            /* decide whether we can use GPU */
-            hwinfo_g->bCanUseGPU = (hwinfo_g->gpu_info.ncuda_dev_use > 0);
-            if (!hwinfo_g->bCanUseGPU && bForceUseGPU)
-            {
-                gmx_fatal(FARGS, "GPU acceleration requested, but no compatible GPUs were detected.");
-            }
+        sfree(checkres);
+    }
+    else
+    {
+        pick_compatible_gpus(&hwinfo_g->gpu_info, gpu_opt);
+
+        if (gpu_opt->ncuda_dev_use > cr->nrank_pp_intranode)
+        {
+            /* We picked more GPUs than we can use: limit the number.
+             * We print detailed messages about this later in
+             * gmx_check_hw_runconf_consistency.
+             */
+            limit_num_gpus_used(gpu_opt, cr->nrank_pp_intranode);
          }
+
+        gpu_opt->bUserSet = FALSE;
      }
-    /* increase the reference counter */
-    n_hwinfo++;
  
-    ret = tMPI_Thread_mutex_unlock(&hw_info_lock);
-    if (ret != 0)
+    /* If the user asked for a GPU, check whether we have a GPU */
+    if (bForceUseGPU && gpu_info->ncuda_dev_compatible == 0)
      {
-        gmx_fatal(FARGS, "Error unlocking hwinfo mutex: %s", strerror(errno));
+        gmx_fatal(FARGS, "GPU acceleration requested, but no compatible GPUs were detected.");
      }
-
-    return hwinfo_g;
  }
  
-static void limit_num_gpus_used(gmx_hw_info_t *hwinfo, int count)
+static void limit_num_gpus_used(gmx_gpu_opt_t *gpu_opt, int count)
  {
      int ndev_use;
  
-    assert(hwinfo);
+    assert(gpu_opt);
  
-    ndev_use = hwinfo->gpu_info.ncuda_dev_use;
+    ndev_use = gpu_opt->ncuda_dev_use;
  
      if (count > ndev_use)
      {
@@ -693,7 +681,7 @@ static void limit_num_gpus_used(gmx_hw_info_t *hwinfo, int count)
      }
  
      /* TODO: improve this implementation: either sort GPUs or remove the weakest here */
-    hwinfo->gpu_info.ncuda_dev_use = count;
+    gpu_opt->ncuda_dev_use = count;
  }
  
  void gmx_hardware_info_free(gmx_hw_info_t *hwinfo)
diff --git a/src/gmxlib/gmx_thread_affinity.c b/src/gmxlib/gmx_thread_affinity.c

index e3870412e9244207c031891f381fe5f278a16863..badb753428b3d7ddda7afd182bcb8211f5439620 100644 (file)
--- a/src/gmxlib/gmx_thread_affinity.c
+++ b/src/gmxlib/gmx_thread_affinity.c
@@ -50,7 +50,6 @@
  #include "gmx_cpuid.h"
  #include "gmx_omp.h"
  #include "gmx_omp_nthreads.h"
-#include "mdrun.h"
  #include "md_logging.h"
  #include "statutil.h"
  #include "gmx_thread_affinity.h"
diff --git a/src/gmxlib/gpu_utils/gpu_utils.cu b/src/gmxlib/gpu_utils/gpu_utils.cu

index e11367dc7e8451533c6a1aaf878ec9102c24b932..8640ed3bc80e74e69c1415bd3c4ba851abccafc5 100644 (file)
--- a/src/gmxlib/gpu_utils/gpu_utils.cu
+++ b/src/gmxlib/gpu_utils/gpu_utils.cu
@@ -492,9 +492,12 @@ int do_timed_memtest(int dev_id, int time_constr)
   * \param[out] result_str   the message related to the error that occurred
   *                          during the initialization (if there was any).
   * \param[in] gpu_info      GPU info of all detected devices in the system.
+ * \param[in] gpu_opt       options for using the GPUs in gpu_info
   * \returns                 true if no error occurs during initialization.
   */
-gmx_bool init_gpu(int mygpu, char *result_str, const gmx_gpu_info_t *gpu_info)
+gmx_bool init_gpu(int mygpu, char *result_str,
+                  const gmx_gpu_info_t *gpu_info,
+                  const gmx_gpu_opt_t *gpu_opt)
  {
      cudaError_t stat;
      char sbuf[STRLEN];
@@ -503,15 +506,15 @@ gmx_bool init_gpu(int mygpu, char *result_str, const gmx_gpu_info_t *gpu_info)
      assert(gpu_info);
      assert(result_str);
  
-    if (mygpu < 0 || mygpu >= gpu_info->ncuda_dev_use)
+    if (mygpu < 0 || mygpu >= gpu_opt->ncuda_dev_use)
      {
          sprintf(sbuf, "Trying to initialize an inexistent GPU: "
                  "there are %d %s-selected GPU(s), but #%d was requested.",
-                 gpu_info->ncuda_dev_use, gpu_info->bUserSet ? "user" : "auto", mygpu);
+                 gpu_opt->ncuda_dev_use, gpu_opt->bUserSet ? "user" : "auto", mygpu);
          gmx_incons(sbuf);
      }
  
-    gpuid = gpu_info->cuda_dev[gpu_info->cuda_dev_use[mygpu]].id;
+    gpuid = gpu_info->cuda_dev[gpu_opt->cuda_dev_use[mygpu]].id;
  
      stat = cudaSetDevice(gpuid);
      strncpy(result_str, cudaGetErrorString(stat), STRLEN);
@@ -651,6 +654,8 @@ int detect_cuda_gpus(gmx_gpu_info_t *gpu_info, char *err_str)
      assert(gpu_info);
      assert(err_str);
  
+    gpu_info->ncuda_dev_compatible = 0;
+
      ndev    = 0;
      devs    = NULL;
  
@@ -677,6 +682,11 @@ int detect_cuda_gpus(gmx_gpu_info_t *gpu_info, char *err_str)
              devs[i].id   = i;
              devs[i].prop = prop;
              devs[i].stat = checkres;
+
+            if (checkres == egpuCompatible)
+            {
+                gpu_info->ncuda_dev_compatible++;
+            }
          }
          retval = 0;
      }
@@ -692,15 +702,16 @@ int detect_cuda_gpus(gmx_gpu_info_t *gpu_info, char *err_str)
   * This function selects the compatible gpus and initializes
   * gpu_info->cuda_dev_use and gpu_info->ncuda_dev_use.
   *
- * Given the list of GPUs available in the system the it checks each gpu in
- * gpu_info->cuda_dev and puts the the indices (into gpu_info->cuda_dev) of
- * the compatible ones into cuda_dev_use with this marking the respective
- * GPUs as "available for use."
+ * Given the list of GPUs available in the system check each device in
+ * gpu_info->cuda_dev and place the indices of the compatible GPUs into
+ * cuda_dev_use with this marking the respective GPUs as "available for use."
   * Note that \detect_cuda_gpus must have been called before.
   *
- * \param[in]    gpu_info    pointer to structure holding GPU information
+ * \param[in]     gpu_info    pointer to structure holding GPU information
+ * \param[in,out] gpu_opt     pointer to structure holding GPU options
   */
-void pick_compatible_gpus(gmx_gpu_info_t *gpu_info)
+void pick_compatible_gpus(const gmx_gpu_info_t *gpu_info,
+                          gmx_gpu_opt_t *gpu_opt)
  {
      int i, ncompat;
      int *compat;
@@ -720,53 +731,52 @@ void pick_compatible_gpus(gmx_gpu_info_t *gpu_info)
          }
      }
  
-    gpu_info->ncuda_dev_use = ncompat;
-    snew(gpu_info->cuda_dev_use, ncompat);
-    memcpy(gpu_info->cuda_dev_use, compat, ncompat*sizeof(*compat));
+    gpu_opt->ncuda_dev_use = ncompat;
+    snew(gpu_opt->cuda_dev_use, ncompat);
+    memcpy(gpu_opt->cuda_dev_use, compat, ncompat*sizeof(*compat));
      sfree(compat);
  }
  
  /*! \brief Check the existence/compatibility of a set of GPUs specified by their device IDs.
   *
- * Given the a list of GPU devide IDs in \requested_devs, check for the
- * existence and compatibility of the respective GPUs and fill in \gpu_info
- * with the collected information. Also provide the caller with an array with
+ * Given the a list of gpu->ncuda_dev_use GPU device IDs stored in
+ * gpu_opt->cuda_dev_use check the existence and compatibility
+ * of the respective GPUs. Also provide the caller with an array containing
   * the result of checks in \checkres.
   *
   * \param[out]  checkres    check result for each ID passed in \requested_devs
   * \param[in]   gpu_info    pointer to structure holding GPU information
- * \param[in]   requested_devs array of requested device IDs
- * \param[in]   count       number of IDs in \requested_devs
- * \returns                 TRUE if every requested GPU is compatible
+ * \param[out]  gpu_opt     pointer to structure holding GPU options
+ * \returns                 TRUE if every the requested GPUs are compatible
   */
-gmx_bool check_select_cuda_gpus(int *checkres, gmx_gpu_info_t *gpu_info,
-                                const int *requested_devs, int count)
+gmx_bool check_selected_cuda_gpus(int *checkres,
+                                  const gmx_gpu_info_t *gpu_info,
+                                  gmx_gpu_opt_t *gpu_opt)
  {
      int i, id;
      bool bAllOk;
  
      assert(checkres);
      assert(gpu_info);
-    assert(requested_devs);
-    assert(count >= 0);
+    assert(gpu_opt->ncuda_dev_use >= 0);
  
-    if (count == 0)
+    if (gpu_opt->ncuda_dev_use == 0)
      {
          return TRUE;
      }
  
+    assert(gpu_opt->cuda_dev_use);
+
      /* we will assume that all GPUs requested are valid IDs,
         otherwise we'll bail anyways */
-    gpu_info->ncuda_dev_use = count;
-    snew(gpu_info->cuda_dev_use, count);
  
      bAllOk = true;
-    for (i = 0; i < count; i++)
+    for (i = 0; i < gpu_opt->ncuda_dev_use; i++)
      {
-        id = requested_devs[i];
+        id = gpu_opt->cuda_dev_use[i];
  
          /* devices are stored in increasing order of IDs in cuda_dev */
-        gpu_info->cuda_dev_use[i] = id;
+        gpu_opt->cuda_dev_use[i] = id;
  
          checkres[i] = (id >= gpu_info->ncuda_dev) ?
              egpuNonexistent : gpu_info->cuda_dev[id].stat;
@@ -788,7 +798,6 @@ void free_gpu_info(const gmx_gpu_info_t *gpu_info)
          return;
      }
  
-    sfree(gpu_info->cuda_dev_use);
      sfree(gpu_info->cuda_dev);
  }
  
@@ -841,18 +850,22 @@ void get_gpu_device_info_string(char *s, const gmx_gpu_info_t *gpu_info, int ind
   * respective CUDA GPU.
   *
   * \param[in]    gpu_info   pointer to structure holding GPU information
+ * \param[in]    gpu_opt    pointer to structure holding GPU options
   * \param[in]    idx        index into the array of used GPUs
   * \returns                 device ID of the requested GPU
   */
-int get_gpu_device_id(const gmx_gpu_info_t *gpu_info, int idx)
+int get_gpu_device_id(const gmx_gpu_info_t *gpu_info,
+                      const gmx_gpu_opt_t *gpu_opt,
+                      int idx)
  {
      assert(gpu_info);
-    if (idx < 0 && idx >= gpu_info->ncuda_dev_use)
+    assert(gpu_opt);
+    if (idx < 0 && idx >= gpu_opt->ncuda_dev_use)
      {
          return -1;
      }
  
-    return gpu_info->cuda_dev[gpu_info->cuda_dev_use[idx]].id;
+    return gpu_info->cuda_dev[gpu_opt->cuda_dev_use[idx]].id;
  }
  
  /*! \brief Returns the device ID of the GPU currently in use.
diff --git a/src/kernel/mdrun.c b/src/kernel/mdrun.c

index a5c0e6a6256def6a24b6b0bb702c5eef114b58c5..29cb7e3e4326d9430744aaf3f9458687b59844ae 100644 (file)
--- a/src/kernel/mdrun.c
+++ b/src/kernel/mdrun.c
@@ -451,7 +451,12 @@ int cmain(int argc, char *argv[])
      output_env_t  oenv                  = NULL;
      const char   *deviceOptions         = "";
  
-    gmx_hw_opt_t  hw_opt = {0, 0, 0, 0, threadaffSEL, 0, 0, NULL};
+    /* Non transparent initialization of a complex gmx_hw_opt_t struct.
+     * But unfortunately we are not allowed to call a function here,
+     * since declarations follow below.
+     */
+    gmx_hw_opt_t  hw_opt = { 0, 0, 0, 0, threadaffSEL, 0, 0,
+                             { NULL, FALSE, 0, NULL } };
  
      t_pargs       pa[] = {
  
@@ -477,7 +482,7 @@ int cmain(int argc, char *argv[])
            "The starting logical core number for pinning to cores; used to avoid pinning threads from different mdrun instances to the same core" },
          { "-pinstride", FALSE, etINT, {&hw_opt.core_pinning_stride},
            "Pinning distance in logical cores for threads, use 0 to minimize the number of threads per physical core" },
-        { "-gpu_id",  FALSE, etSTR, {&hw_opt.gpu_id},
+        { "-gpu_id",  FALSE, etSTR, {&hw_opt.gpu_opt.gpu_id},
            "List of GPU id's to use" },
          { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck},
            "Check for all bonded interactions with DD" },
diff --git a/src/kernel/runner.c b/src/kernel/runner.c

index 70f80d13d9658aa199af99f7c28ea3c233671591..3665c4740619a783fbd7936e0ed89e4fbb156acc 100644 (file)
--- a/src/kernel/runner.c
+++ b/src/kernel/runner.c
@@ -112,7 +112,7 @@ tMPI_Thread_mutex_t deform_init_box_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
  #ifdef GMX_THREAD_MPI
  struct mdrunner_arglist
  {
-    gmx_hw_opt_t   *hw_opt;
+    gmx_hw_opt_t    hw_opt;
      FILE           *fplog;
      t_commrec      *cr;
      int             nfile;
@@ -170,7 +170,7 @@ static void mdrunner_start_fn(void *arg)
          fplog = mc.fplog;
      }
  
-    mda->ret = mdrunner(mc.hw_opt, fplog, cr, mc.nfile, fnm, mc.oenv,
+    mda->ret = mdrunner(&mc.hw_opt, fplog, cr, mc.nfile, fnm, mc.oenv,
                          mc.bVerbose, mc.bCompact, mc.nstglobalcomm,
                          mc.ddxyz, mc.dd_node_order, mc.rdd,
                          mc.rconstr, mc.dddlb_opt, mc.dlb_scale,
@@ -215,7 +215,8 @@ static t_commrec *mdrunner_start_threads(gmx_hw_opt_t *hw_opt,
      fnmn = dup_tfn(nfile, fnm);
  
      /* fill the data structure to pass as void pointer to thread start fn */
-    mda->hw_opt         = hw_opt;
+    /* hw_opt contains pointers, which should all be NULL at this stage */
+    mda->hw_opt         = *hw_opt;
      mda->fplog          = fplog;
      mda->cr             = cr;
      mda->nfile          = nfile;
@@ -370,16 +371,23 @@ static int get_nthreads_mpi(const gmx_hw_info_t *hwinfo,
          nthreads_tot_max = nthreads_hw;
      }
  
-    bCanUseGPU = (inputrec->cutoff_scheme == ecutsVERLET && hwinfo->bCanUseGPU);
+    bCanUseGPU = (inputrec->cutoff_scheme == ecutsVERLET &&
+                  hwinfo->gpu_info.ncuda_dev_compatible > 0);
      if (bCanUseGPU)
      {
-        ngpu = hwinfo->gpu_info.ncuda_dev_use;
+        ngpu = hwinfo->gpu_info.ncuda_dev_compatible;
      }
      else
      {
          ngpu = 0;
      }
  
+    if (inputrec->cutoff_scheme == ecutsGROUP)
+    {
+        /* We checked this before, but it doesn't hurt to do it once more */
+        assert(hw_opt->nthreads_omp == 1);
+    }
+
      nthreads_tmpi =
          get_tmpi_omp_thread_division(hwinfo, hw_opt, nthreads_tot_max, ngpu);
  
@@ -641,20 +649,13 @@ static void increase_nstlist(FILE *fp, t_commrec *cr,
  }
  
  static void prepare_verlet_scheme(FILE                *fplog,
-                                  const gmx_hw_info_t *hwinfo,
                                    t_commrec           *cr,
                                    const char          *nbpu_opt,
                                    t_inputrec          *ir,
                                    const gmx_mtop_t    *mtop,
                                    matrix               box,
-                                  gmx_bool            *bUseGPU)
+                                  gmx_bool             bUseGPU)
  {
-    /* Here we only check for GPU usage on the MPI master process,
-     * as here we don't know how many GPUs we will use yet.
-     * We check for a GPU on all processes later.
-     */
-    *bUseGPU = hwinfo->bCanUseGPU || (getenv("GMX_EMULATE_GPU") != NULL);
-
      if (ir->verletbuf_drift > 0)
      {
          /* Update the Verlet buffer size for the current run setup */
@@ -665,7 +666,7 @@ static void prepare_verlet_scheme(FILE                *fplog,
           * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
           * and 4x2 gives a larger buffer than 4x4, this is ok.
           */
-        verletbuf_get_list_setup(*bUseGPU, &ls);
+        verletbuf_get_list_setup(bUseGPU, &ls);
  
          calc_verlet_buffer_size(mtop, det(box), ir,
                                  ir->verletbuf_drift, &ls,
@@ -685,7 +686,7 @@ static void prepare_verlet_scheme(FILE                *fplog,
  
      /* With GPU or emulation we should check nstlist for performance */
      if ((EI_DYNAMICS(ir->eI) &&
-         *bUseGPU &&
+         bUseGPU &&
           ir->nstlist < NSTLIST_GPU_ENOUGH) ||
          getenv(NSTLIST_ENVVAR) != NULL)
      {
@@ -771,9 +772,19 @@ static void convert_to_verlet_scheme(FILE *fplog,
      gmx_mtop_remove_chargegroups(mtop);
  }
  
-static void check_and_update_hw_opt(gmx_hw_opt_t *hw_opt,
-                                    int           cutoff_scheme,
-                                    gmx_bool      bIsSimMaster)
+static void print_hw_opt(FILE *fp, const gmx_hw_opt_t *hw_opt)
+{
+    fprintf(fp, "hw_opt: nt %d ntmpi %d ntomp %d ntomp_pme %d gpu_id '%s'\n",
+            hw_opt->nthreads_tot,
+            hw_opt->nthreads_tmpi,
+            hw_opt->nthreads_omp,
+            hw_opt->nthreads_omp_pme,
+            hw_opt->gpu_opt.gpu_id != NULL ? hw_opt->gpu_opt.gpu_id : "");
+}
+
+/* Checks we can do when we don't (yet) know the cut-off scheme */
+static void check_and_update_hw_opt_1(gmx_hw_opt_t *hw_opt,
+                                      gmx_bool      bIsSimMaster)
  {
      gmx_omp_nthreads_read_env(&hw_opt->nthreads_omp, bIsSimMaster);
  
@@ -837,18 +848,6 @@ static void check_and_update_hw_opt(gmx_hw_opt_t *hw_opt,
      }
  #endif
  
-    if (cutoff_scheme == ecutsGROUP)
-    {
-        /* We only have OpenMP support for PME only nodes */
-        if (hw_opt->nthreads_omp > 1)
-        {
-            gmx_fatal(FARGS, "OpenMP threads have been requested with cut-off scheme %s, but these are only supported with cut-off scheme %s",
-                      ecutscheme_names[cutoff_scheme],
-                      ecutscheme_names[ecutsVERLET]);
-        }
-        hw_opt->nthreads_omp = 1;
-    }
-
      if (hw_opt->nthreads_omp_pme > 0 && hw_opt->nthreads_omp <= 0)
      {
          gmx_fatal(FARGS, "You need to specify -ntomp in addition to -ntomp_pme");
@@ -871,15 +870,59 @@ static void check_and_update_hw_opt(gmx_hw_opt_t *hw_opt,
          hw_opt->nthreads_omp_pme = hw_opt->nthreads_omp;
      }
  
+    /* Parse GPU IDs, if provided.
+     * We check consistency with the tMPI thread count later.
+     */
+    gmx_parse_gpu_ids(&hw_opt->gpu_opt);
+
+#ifdef GMX_THREAD_MPI
+    if (hw_opt->gpu_opt.ncuda_dev_use > 0 && hw_opt->nthreads_tmpi == 0)
+    {
+        /* Set the number of MPI threads equal to the number of GPUs */
+        hw_opt->nthreads_tmpi = hw_opt->gpu_opt.ncuda_dev_use;
+
+        if (hw_opt->nthreads_tot > 0 &&
+            hw_opt->nthreads_tmpi > hw_opt->nthreads_tot)
+        {
+            /* We have more GPUs than total threads requested.
+             * We choose to (later) generate a mismatch error,
+             * instead of launching more threads than requested.
+             */
+            hw_opt->nthreads_tmpi = hw_opt->nthreads_tot;
+        }
+    }
+#endif
+
      if (debug)
      {
-        fprintf(debug, "hw_opt: nt %d ntmpi %d ntomp %d ntomp_pme %d gpu_id '%s'\n",
-                hw_opt->nthreads_tot,
-                hw_opt->nthreads_tmpi,
-                hw_opt->nthreads_omp,
-                hw_opt->nthreads_omp_pme,
-                hw_opt->gpu_id != NULL ? hw_opt->gpu_id : "");
+        print_hw_opt(debug, hw_opt);
+    }
+}
  
+/* Checks we can do when we know the cut-off scheme */
+static void check_and_update_hw_opt_2(gmx_hw_opt_t *hw_opt,
+                                      int           cutoff_scheme)
+{
+    if (cutoff_scheme == ecutsGROUP)
+    {
+        /* We only have OpenMP support for PME only nodes */
+        if (hw_opt->nthreads_omp > 1)
+        {
+            gmx_fatal(FARGS, "OpenMP threads have been requested with cut-off scheme %s, but these are only supported with cut-off scheme %s",
+                      ecutscheme_names[cutoff_scheme],
+                      ecutscheme_names[ecutsVERLET]);
+        }
+        hw_opt->nthreads_omp = 1;
+    }
+
+    if (hw_opt->nthreads_omp_pme <= 0 && hw_opt->nthreads_omp > 0)
+    {
+        hw_opt->nthreads_omp_pme = hw_opt->nthreads_omp;
+    }
+
+    if (debug)
+    {
+        print_hw_opt(debug, hw_opt);
      }
  }
  
@@ -917,14 +960,6 @@ static void override_nsteps_cmdline(FILE            *fplog,
      }
  }
  
-/* Data structure set by SIMMASTER which needs to be passed to all nodes
- * before the other nodes have read the tpx file and called gmx_detect_hardware.
- */
-typedef struct {
-    int      cutoff_scheme; /* The cutoff scheme from inputrec_t */
-    gmx_bool bUseGPU;       /* Use GPU or GPU emulation          */
-} master_inf_t;
-
  int mdrunner(gmx_hw_opt_t *hw_opt,
               FILE *fplog, t_commrec *cr, int nfile,
               const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
@@ -969,7 +1004,8 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
      int             nthreads_pp  = 1;
      gmx_membed_t    membed       = NULL;
      gmx_hw_info_t  *hwinfo       = NULL;
-    master_inf_t    minf         = {-1, FALSE};
+    /* The master rank decides early on bUseGPU and broadcasts this later */
+    gmx_bool        bUseGPU      = FALSE;
  
      /* CAUTION: threads may be started later on in this function, so
         cr doesn't reflect the final parallel state right now */
@@ -986,8 +1022,7 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
  
      /* Detect hardware, gather information. This is an operation that is
       * global for this process (MPI rank). */
-    hwinfo = gmx_detect_hardware(fplog, cr,
-                                 bForceUseGPU, bTryUseGPU, hw_opt->gpu_id);
+    hwinfo = gmx_detect_hardware(fplog, cr, bTryUseGPU);
  
  
      snew(state, 1);
@@ -1002,17 +1037,17 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
              convert_to_verlet_scheme(fplog, inputrec, mtop, det(state->box));
          }
  
-
-        minf.cutoff_scheme = inputrec->cutoff_scheme;
-        minf.bUseGPU       = FALSE;
-
          if (inputrec->cutoff_scheme == ecutsVERLET)
          {
-            prepare_verlet_scheme(fplog, hwinfo, cr, nbpu_opt,
-                                  inputrec, mtop, state->box,
-                                  &minf.bUseGPU);
+            /* Here the master rank decides if all ranks will use GPUs */
+            bUseGPU = (hwinfo->gpu_info.ncuda_dev_compatible > 0 ||
+                       getenv("GMX_EMULATE_GPU") != NULL);
+
+            prepare_verlet_scheme(fplog, cr,
+                                  nbpu_opt, inputrec, mtop, state->box,
+                                  bUseGPU);
          }
-        else if (hwinfo->bCanUseGPU)
+        else if (hwinfo->gpu_info.ncuda_dev_compatible > 0)
          {
              md_print_warn(cr, fplog,
                            "NOTE: GPU(s) found, but the current simulation can not use GPUs\n"
@@ -1034,17 +1069,6 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
          }
  #endif
      }
-#ifndef GMX_THREAD_MPI
-    if (PAR(cr))
-    {
-        gmx_bcast_sim(sizeof(minf), &minf, cr);
-    }
-#endif
-    if (minf.bUseGPU && cr->npmenodes == -1)
-    {
-        /* Don't automatically use PME-only nodes with GPUs */
-        cr->npmenodes = 0;
-    }
  
      /* Check for externally set OpenMP affinity and turn off internal
       * pinning if any is found. We need to do this check early to tell
@@ -1053,18 +1077,18 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
       */
      gmx_omp_check_thread_affinity(fplog, cr, hw_opt);
  
-#ifdef GMX_THREAD_MPI
-    /* With thread-MPI inputrec is only set here on the master thread */
+    /* Check and update the hardware options for internal consistency */
+    check_and_update_hw_opt_1(hw_opt, SIMMASTER(cr));
+
      if (SIMMASTER(cr))
-#endif
      {
-        check_and_update_hw_opt(hw_opt, minf.cutoff_scheme, SIMMASTER(cr));
-
  #ifdef GMX_THREAD_MPI
-        /* Early check for externally set process affinity. Can't do over all
-         * MPI processes because hwinfo is not available everywhere, but with
-         * thread-MPI it's needed as pinning might get turned off which needs
-         * to be known before starting thread-MPI. */
+        /* Early check for externally set process affinity.
+         * With thread-MPI this is needed as pinning might get turned off,
+         * which needs to be known before starting thread-MPI.
+         * With thread-MPI hw_opt is processed here on the master rank
+         * and passed to the other ranks later, so we only do this on master.
+         */
          gmx_check_thread_affinity_set(fplog,
                                        NULL,
                                        hw_opt, hwinfo->nthreads_hw_avail, FALSE);
@@ -1087,6 +1111,12 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
  #ifdef GMX_THREAD_MPI
      if (SIMMASTER(cr))
      {
+        /* Since the master knows the cut-off scheme, update hw_opt for this.
+         * This is done later for normal MPI and also once more with tMPI
+         * for all tMPI ranks.
+         */
+        check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme);
+
          /* NOW the threads will be started: */
          hw_opt->nthreads_tmpi = get_nthreads_mpi(hwinfo,
                                                   hw_opt,
@@ -1379,6 +1409,9 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
      fflush(stderr);
  #endif
  
+    /* Check and update hw_opt for the cut-off scheme */
+    check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme);
+
      gmx_omp_nthreads_init(fplog, cr,
                            hwinfo->nthreads_hw_avail,
                            hw_opt->nthreads_omp,
@@ -1386,9 +1419,29 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
                            (cr->duty & DUTY_PP) == 0,
                            inputrec->cutoff_scheme == ecutsVERLET);
  
-    /* check consistency and decide on the number of gpus to use. */
-    gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt->nthreads_tmpi,
-                                     minf.bUseGPU);
+    if (PAR(cr))
+    {
+        /* The master rank decided on the use of GPUs,
+         * broadcast this information to all ranks.
+         */
+        gmx_bcast_sim(sizeof(bUseGPU), &bUseGPU, cr);
+    }
+
+    if (bUseGPU)
+    {
+        if (cr->npmenodes == -1)
+        {
+            /* Don't automatically use PME-only nodes with GPUs */
+            cr->npmenodes = 0;
+        }
+
+        /* Select GPU id's to use */
+        gmx_select_gpu_ids(fplog, cr, &hwinfo->gpu_info, bForceUseGPU,
+                           &hw_opt->gpu_opt);
+    }
+
+    /* check consistency of CPU acceleration and number of GPUs selected */
+    gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt, bUseGPU);
  
      /* getting number of PP/PME threads
         PME: env variable should be read only on one node to make sure it is
@@ -1430,8 +1483,9 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
          }
  
          /* Initiate forcerecord */
-        fr         = mk_forcerec();
-        fr->hwinfo = hwinfo;
+        fr          = mk_forcerec();
+        fr->hwinfo  = hwinfo;
+        fr->gpu_opt = &hw_opt->gpu_opt;
          init_forcerec(fplog, oenv, fr, fcd, inputrec, mtop, cr, box, FALSE,
                        opt2fn("-table", nfile, fnm),
                        opt2fn("-tabletf", nfile, fnm),
diff --git a/src/mdlib/forcerec.c b/src/mdlib/forcerec.c

index 2b79f8e447060f7162e7069136b6d27c9fe50d4f..f7da120aca08c5c7fc1f85ae4d63ec03fa2f4177 100644 (file)
--- a/src/mdlib/forcerec.c
+++ b/src/mdlib/forcerec.c
@@ -1662,7 +1662,8 @@ static void pick_nbnxn_resources(FILE                *fp,
                                   const gmx_hw_info_t *hwinfo,
                                   gmx_bool             bDoNonbonded,
                                   gmx_bool            *bUseGPU,
-                                 gmx_bool            *bEmulateGPU)
+                                 gmx_bool            *bEmulateGPU,
+                                 const gmx_gpu_opt_t *gpu_opt)
  {
      gmx_bool bEmulateGPUEnvVarSet;
      char     gpu_err_str[STRLEN];
@@ -1683,21 +1684,24 @@ static void pick_nbnxn_resources(FILE                *fp,
       * Note that you should freezing the system as otherwise it will explode.
       */
      *bEmulateGPU = (bEmulateGPUEnvVarSet ||
-                    (!bDoNonbonded && hwinfo->bCanUseGPU));
+                    (!bDoNonbonded &&
+                     gpu_opt->ncuda_dev_use > 0));
  
      /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
       */
-    if (hwinfo->bCanUseGPU && !(*bEmulateGPU))
+    if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
      {
          /* Each PP node will use the intra-node id-th device from the
           * list of detected/selected GPUs. */
-        if (!init_gpu(cr->rank_pp_intranode, gpu_err_str, &hwinfo->gpu_info))
+        if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
+                      &hwinfo->gpu_info, gpu_opt))
          {
              /* At this point the init should never fail as we made sure that
               * we have all the GPUs we need. If it still does, we'll bail. */
              gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
                        cr->nodeid,
-                      get_gpu_device_id(&hwinfo->gpu_info, cr->rank_pp_intranode),
+                      get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
+                                        cr->rank_pp_intranode),
                        gpu_err_str);
          }
  
@@ -1898,7 +1902,8 @@ static void init_nb_verlet(FILE                *fp,
      pick_nbnxn_resources(fp, cr, fr->hwinfo,
                           fr->bNonbonded,
                           &nbv->bUseGPU,
-                         &bEmulateGPU);
+                         &bEmulateGPU,
+                         fr->gpu_opt);
  
      nbv->nbs = NULL;
  
@@ -1946,7 +1951,8 @@ static void init_nb_verlet(FILE                *fp,
          /* init the NxN GPU data; the last argument tells whether we'll have
           * both local and non-local NB calculation on GPU */
          nbnxn_cuda_init(fp, &nbv->cu_nbv,
-                        &fr->hwinfo->gpu_info, cr->rank_pp_intranode,
+                        &fr->hwinfo->gpu_info, fr->gpu_opt,
+                        cr->rank_pp_intranode,
                          (nbv->ngrp > 1) && !bHybridGPURun);
  
          if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
@@ -2059,7 +2065,7 @@ void init_forcerec(FILE              *fp,
           * In mdrun, hwinfo has already been set before calling init_forcerec.
           * Here we ignore GPUs, as tools will not use them anyhow.
           */
-        fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE, FALSE, NULL);
+        fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
      }
  
      /* By default we turn acceleration on, but it might be turned off further down... */
diff --git a/src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu b/src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu

index 4daa238bec0afb834d8882d69a5899e9a0c723e7..5b84773943dbaa76239a8851a5950bacfadba1a1 100644 (file)
--- a/src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
+++ b/src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
@@ -505,7 +505,9 @@ static int pick_nbnxn_kernel_version(FILE            *fplog,
  
  void nbnxn_cuda_init(FILE *fplog,
                       nbnxn_cuda_ptr_t *p_cu_nb,
-                     const gmx_gpu_info_t *gpu_info, int my_gpu_index,
+                     const gmx_gpu_info_t *gpu_info,
+                     const gmx_gpu_opt_t *gpu_opt,
+                     int my_gpu_index,
                       gmx_bool bLocalAndNonlocal)
  {
      cudaError_t stat;
@@ -540,7 +542,7 @@ void nbnxn_cuda_init(FILE *fplog,
      init_plist(nb->plist[eintLocal]);
  
      /* set device info, just point it to the right GPU among the detected ones */
-    nb->dev_info = &gpu_info->cuda_dev[get_gpu_device_id(gpu_info, my_gpu_index)];
+    nb->dev_info = &gpu_info->cuda_dev[get_gpu_device_id(gpu_info, gpu_opt, my_gpu_index)];
  
      /* local/non-local GPU streams */
      stat = cudaStreamCreate(&nb->stream[eintLocal]);
@@ -626,7 +628,7 @@ void nbnxn_cuda_init(FILE *fplog,
           *   - GPUs are not being shared.
           */
          bool bShouldUsePollSync = (bX86 && bTMPIAtomics &&
-                                   (gmx_count_gpu_dev_shared(gpu_info) < 1));
+                                   (gmx_count_gpu_dev_shared(gpu_opt) < 1));
  
          if (bStreamSync)
          {
author	Berk Hess <hess@kth.se>
	Thu, 10 Oct 2013 15:39:25 +0000 (17:39 +0200)
committer	Berk Hess <hess@kth.se>
	Thu, 31 Oct 2013 22:56:10 +0000 (23:56 +0100)
include/gmx_cpuid.h		patch \| blob \| history
include/gmx_detect_hardware.h		patch \| blob \| history
include/gpu_utils.h		patch \| blob \| history
include/mdrun.h		patch \| blob \| history
include/nbnxn_cuda_data_mgmt.h		patch \| blob \| history
include/types/forcerec.h		patch \| blob \| history
include/types/hw_info.h		patch \| blob \| history
src/gmxlib/gmx_cpuid.c		patch \| blob \| history
src/gmxlib/gmx_detect_hardware.c		patch \| blob \| history
src/gmxlib/gmx_thread_affinity.c		patch \| blob \| history
src/gmxlib/gpu_utils/gpu_utils.cu		patch \| blob \| history
src/kernel/mdrun.c		patch \| blob \| history
src/kernel/runner.c		patch \| blob \| history
src/mdlib/forcerec.c		patch \| blob \| history
src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu		patch \| blob \| history