Hardware reporting now covers all nodes

author Berk Hess <hess@kth.se>

Wed, 29 Apr 2015 10:13:00 +0000 (12:13 +0200)

committer Gerrit Code Review <gerrit@gerrit.gromacs.org>

Sat, 13 Jun 2015 09:47:37 +0000 (11:47 +0200)
author Berk Hess <hess@kth.se>
Wed, 29 Apr 2015 10:13:00 +0000 (12:13 +0200)
committer Gerrit Code Review <gerrit@gerrit.gromacs.org>
Sat, 13 Jun 2015 09:47:37 +0000 (11:47 +0200)
diff --git a/src/gromacs/gmxlib/gmx_cpuid.c b/src/gromacs/gmxlib/gmx_cpuid.c

index 9791608f4b61e5059e7297712fa0c20bb46b864a..8e496ea5e64292651b3c02d98f58f422e89b3347 100644 (file)
--- a/src/gromacs/gmxlib/gmx_cpuid.c
+++ b/src/gromacs/gmxlib/gmx_cpuid.c
@@ -289,6 +289,13 @@ static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_NONE;
  #endif
  
  
+enum gmx_cpuid_simd
+gmx_compiled_simd()
+{
+    return compiled_simd;
+}
+
+
  #ifdef GMX_CPUID_X86
  
  /* Execute CPUID on x86 class CPUs. level sets function to exec, and the
@@ -1174,19 +1181,19 @@ gmx_cpuid_formatstring       (gmx_cpuid_t              cpuid,
  
  #ifdef _MSC_VER
      _snprintf(str, n,
-              "Vendor: %s\n"
-              "Brand:  %s\n"
-              "Family: %2d  Model: %2d  Stepping: %2d\n"
-              "Features:",
+              "    Vendor: %s\n"
+              "    Brand:  %s\n"
+              "    Family: %2d  model: %2d  stepping: %2d\n"
+              "    CPU features:",
                gmx_cpuid_vendor_string[gmx_cpuid_vendor(cpuid)],
                gmx_cpuid_brand(cpuid),
                gmx_cpuid_family(cpuid), gmx_cpuid_model(cpuid), gmx_cpuid_stepping(cpuid));
  #else
      snprintf(str, n,
-             "Vendor: %s\n"
-             "Brand:  %s\n"
-             "Family: %2d  Model: %2d  Stepping: %2d\n"
-             "Features:",
+             "    Vendor: %s\n"
+             "    Brand:  %s\n"
+             "    Family: %2d  model: %2d  stepping: %2d\n"
+             "    CPU features:",
               gmx_cpuid_vendor_string[gmx_cpuid_vendor(cpuid)],
               gmx_cpuid_brand(cpuid),
               gmx_cpuid_family(cpuid), gmx_cpuid_model(cpuid), gmx_cpuid_stepping(cpuid));
@@ -1307,34 +1314,14 @@ gmx_cpuid_simd_suggest  (gmx_cpuid_t                 cpuid)
  }
  
  
-
  int
-gmx_cpuid_simd_check(gmx_cpuid_t   cpuid,
-                     FILE *        log,
-                     int           print_to_stderr)
+gmx_cpuid_simd_check(enum gmx_cpuid_simd  simd_suggest,
+                     FILE *               log,
+                     int                  print_to_stderr)
  {
-    int                           rc;
-    char                          str[1024];
-    enum gmx_cpuid_simd           simd;
-
-    simd = gmx_cpuid_simd_suggest(cpuid);
-
-    rc = (simd != compiled_simd);
+    int  rc;
  
-    gmx_cpuid_formatstring(cpuid, str, 1023);
-    str[1023] = '\0';
-
-    if (log != NULL)
-    {
-        fprintf(log,
-                "\nDetecting CPU SIMD instructions.\nPresent hardware specification:\n"
-                "%s"
-                "SIMD instructions most likely to fit this hardware: %s\n"
-                "SIMD instructions selected at GROMACS compile time: %s\n\n",
-                str,
-                gmx_cpuid_simd_string[simd],
-                gmx_cpuid_simd_string[compiled_simd]);
-    }
+    rc = (simd_suggest != compiled_simd);
  
      if (rc != 0)
      {
@@ -1343,14 +1330,14 @@ gmx_cpuid_simd_check(gmx_cpuid_t   cpuid,
              fprintf(log, "\nBinary not matching hardware - you might be losing performance.\n"
                      "SIMD instructions most likely to fit this hardware: %s\n"
                      "SIMD instructions selected at GROMACS compile time: %s\n\n",
-                    gmx_cpuid_simd_string[simd],
+                    gmx_cpuid_simd_string[simd_suggest],
                      gmx_cpuid_simd_string[compiled_simd]);
          }
          if (print_to_stderr)
          {
              fprintf(stderr, "Compiled SIMD instructions: %s (GROMACS could use %s on this machine, which is better)\n",
                      gmx_cpuid_simd_string[compiled_simd],
-                    gmx_cpuid_simd_string[simd]);
+                    gmx_cpuid_simd_string[simd_suggest]);
          }
      }
      return rc;
diff --git a/src/gromacs/gmxlib/gmx_detect_hardware.cpp b/src/gromacs/gmxlib/gmx_detect_hardware.cpp

index faa5678e8db23465def74fb5517ff3fbc7ce858c..a851dbee5d6b88b8847a3e8bba847b6ad565baef 100644 (file)
--- a/src/gromacs/gmxlib/gmx_detect_hardware.cpp
+++ b/src/gromacs/gmxlib/gmx_detect_hardware.cpp
@@ -109,7 +109,7 @@ static void sprint_gpus(char *sbuf, const gmx_gpu_info_t *gpu_info)
      for (i = 0; i < ndev; i++)
      {
          get_gpu_device_info_string(stmp, gpu_info, i);
-        strcat(sbuf, "  ");
+        strcat(sbuf, "    ");
          strcat(sbuf, stmp);
          if (i < ndev - 1)
          {
@@ -294,17 +294,6 @@ void gmx_check_hw_runconf_consistency(FILE                *fplog,
      bEmulateGPU       = (getenv("GMX_EMULATE_GPU") != NULL);
      bMaxMpiThreadsSet = (getenv("GMX_MAX_MPI_THREADS") != NULL);
  
-    /* check the SIMD level mdrun is compiled with against hardware
-       capabilities */
-    /* TODO: Here we assume homogeneous hardware which is not necessarily
-             the case! Might not hurt to add an extra check over MPI. */
-    gmx_cpuid_simd_check(hwinfo->cpuid_info, fplog, SIMMASTER(cr));
-
-    check_use_of_rdtscp_on_this_cpu(fplog, cr, hwinfo);
-
-    /* NOTE: this print is only for and on one physical node */
-    print_gpu_detection_stats(fplog, &hwinfo->gpu_info, cr);
-
      if (hwinfo->gpu_info.n_dev_compatible > 0)
      {
          std::string gpuUseageReport;
@@ -317,7 +306,7 @@ void gmx_check_hw_runconf_consistency(FILE                *fplog,
          GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
  
          /* NOTE: this print is only for and on one physical node */
-        md_print_info(cr, fplog, gpuUseageReport.c_str());
+        md_print_info(cr, fplog, "%s\n", gpuUseageReport.c_str());
      }
  
      /* Need to ensure that we have enough GPUs:
@@ -532,6 +521,28 @@ static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info,
      return uniq_count;
  }
  
+static int get_ncores(gmx_cpuid_t cpuid)
+{
+    int        nprocessors, npackages, ncores_per_package, nhwthreads_per_core;
+    const int *package_id, *core_id, *hwthread_id, *locality_order;
+    int        rc;
+
+    rc = gmx_cpuid_topology(cpuid,
+                            &nprocessors, &npackages,
+                            &ncores_per_package, &nhwthreads_per_core,
+                            &package_id, &core_id,
+                            &hwthread_id, &locality_order);
+
+    if (rc == 0)
+    {
+        return npackages*ncores_per_package;
+    }
+    else
+    {
+        /* We don't have cpuid topology info, return 0 core count */
+        return 0;
+    }
+}
  
  /* Return the number of hardware threads supported by the current CPU.
   * We assume that this is equal with the number of "processors"
@@ -667,10 +678,142 @@ static void gmx_detect_gpus(FILE *fplog, const t_commrec *cr)
  #endif
  }
  
+static void gmx_collect_hardware_mpi()
+{
+#ifdef GMX_LIB_MPI
+    int  rank_id;
+    int  nrank, rank, ncore, nhwthread, ngpu, i;
+    int  gpu_hash;
+    int *buf, *all;
+
+    rank_id   = gmx_physicalnode_id_hash();
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &nrank);
+    ncore     = hwinfo_g->ncore;
+    nhwthread = hwinfo_g->nthreads_hw_avail;
+    ngpu      = hwinfo_g->gpu_info.n_dev_compatible;
+    /* Create a unique hash of the GPU type(s) in this node */
+    gpu_hash  = 0;
+    /* Here it might be better to only loop over the compatible GPU, but we
+     * don't have that information available and it would also require
+     * removing the device ID from the device info string.
+     */
+    for (i = 0; i < hwinfo_g->gpu_info.n_dev; i++)
+    {
+        char stmp[STRLEN];
+
+        /* Since the device ID is incorporated in the hash, the order of
+         * the GPUs affects the hash. Also two identical GPUs won't give
+         * a gpu_hash of zero after XORing.
+         */
+        get_gpu_device_info_string(stmp, &hwinfo_g->gpu_info, i);
+        gpu_hash ^= gmx_string_fullhash_func(stmp, gmx_string_hash_init);
+    }
+
+    snew(buf, nrank);
+    snew(all, nrank);
+    buf[rank] = rank_id;
+
+    MPI_Allreduce(buf, all, nrank, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+
+    gmx_bool bFound;
+    int      nnode0, ncore0, nhwthread0, ngpu0, r;
+
+    bFound     = FALSE;
+    ncore0     = 0;
+    nnode0     = 0;
+    nhwthread0 = 0;
+    ngpu0      = 0;
+    for (r = 0; r < nrank; r++)
+    {
+        if (all[r] == rank_id)
+        {
+            if (!bFound && r == rank)
+            {
+                /* We are the first rank in this physical node */
+                nnode0     = 1;
+                ncore0     = ncore;
+                nhwthread0 = nhwthread;
+                ngpu0      = ngpu;
+            }
+            bFound = TRUE;
+        }
+    }
+
+    sfree(buf);
+    sfree(all);
+
+    int sum[4], maxmin[10];
+
+    {
+        int buf[4];
+
+        /* Sum values from only intra-rank 0 so we get the sum over all nodes */
+        buf[0] = nnode0;
+        buf[1] = ncore0;
+        buf[2] = nhwthread0;
+        buf[3] = ngpu0;
+
+        MPI_Allreduce(buf, sum, 4, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+    }
+
+    {
+        int buf[10];
+
+        /* Store + and - values for all ranks,
+         * so we can get max+min with one MPI call.
+         */
+        buf[0] = ncore;
+        buf[1] = nhwthread;
+        buf[2] = ngpu;
+        buf[3] = gmx_cpuid_simd_suggest(hwinfo_g->cpuid_info);
+        buf[4] = gpu_hash;
+        buf[5] = -buf[0];
+        buf[6] = -buf[1];
+        buf[7] = -buf[2];
+        buf[8] = -buf[3];
+        buf[9] = -buf[4];
+
+        MPI_Allreduce(buf, maxmin, 10, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+    }
+
+    hwinfo_g->nphysicalnode       = sum[0];
+    hwinfo_g->ncore_tot           = sum[1];
+    hwinfo_g->ncore_min           = -maxmin[5];
+    hwinfo_g->ncore_max           = maxmin[0];
+    hwinfo_g->nhwthread_tot       = sum[2];
+    hwinfo_g->nhwthread_min       = -maxmin[6];
+    hwinfo_g->nhwthread_max       = maxmin[1];
+    hwinfo_g->ngpu_compatible_tot = sum[3];
+    hwinfo_g->ngpu_compatible_min = -maxmin[7];
+    hwinfo_g->ngpu_compatible_max = maxmin[2];
+    hwinfo_g->simd_suggest_min    = static_cast<enum gmx_cpuid_simd>(-maxmin[8]);
+    hwinfo_g->simd_suggest_max    = static_cast<enum gmx_cpuid_simd>(maxmin[3]);
+    hwinfo_g->bIdenticalGPUs      = (maxmin[4] == -maxmin[9]);
+#else
+    /* All ranks use the same pointer, protect it with a mutex */
+    tMPI_Thread_mutex_lock(&hw_info_lock);
+    hwinfo_g->nphysicalnode       = 1;
+    hwinfo_g->ncore_tot           = hwinfo_g->ncore;
+    hwinfo_g->ncore_min           = hwinfo_g->ncore;
+    hwinfo_g->ncore_max           = hwinfo_g->ncore;
+    hwinfo_g->nhwthread_tot       = hwinfo_g->nthreads_hw_avail;
+    hwinfo_g->nhwthread_min       = hwinfo_g->nthreads_hw_avail;
+    hwinfo_g->nhwthread_max       = hwinfo_g->nthreads_hw_avail;
+    hwinfo_g->ngpu_compatible_tot = hwinfo_g->gpu_info.n_dev_compatible;
+    hwinfo_g->ngpu_compatible_min = hwinfo_g->gpu_info.n_dev_compatible;
+    hwinfo_g->ngpu_compatible_max = hwinfo_g->gpu_info.n_dev_compatible;
+    hwinfo_g->simd_suggest_min    = gmx_cpuid_simd_suggest(hwinfo_g->cpuid_info);
+    hwinfo_g->simd_suggest_max    = gmx_cpuid_simd_suggest(hwinfo_g->cpuid_info);
+    hwinfo_g->bIdenticalGPUs      = TRUE;
+    tMPI_Thread_mutex_unlock(&hw_info_lock);
+#endif
+}
+
  gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr,
                                     gmx_bool bDetectGPUs)
  {
-    int              ret;
+    int ret;
  
      /* make sure no one else is doing the same thing */
      ret = tMPI_Thread_mutex_lock(&hw_info_lock);
@@ -691,6 +834,9 @@ gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr,
              gmx_fatal_collective(FARGS, cr, NULL, "CPUID detection failed!");
          }
  
+        /* get the number of cores, will be 0 when not detected */
+        hwinfo_g->ncore             = get_ncores(hwinfo_g->cpuid_info);
+
          /* detect number of hardware threads */
          hwinfo_g->nthreads_hw_avail = get_nthreads_hw_avail(fplog, cr);
  
@@ -719,9 +865,168 @@ gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr,
          gmx_fatal(FARGS, "Error unlocking hwinfo mutex: %s", strerror(errno));
      }
  
+    gmx_collect_hardware_mpi();
+
      return hwinfo_g;
  }
  
+static std::string detected_hardware_string(const gmx_hw_info_t *hwinfo,
+                                            bool                 bFullCpuInfo)
+{
+    std::string s;
+
+    s  = gmx::formatString("\n");
+    s += gmx::formatString("Running on %d node%s with total",
+                           hwinfo->nphysicalnode,
+                           hwinfo->nphysicalnode == 1 ? "" : "s");
+    if (hwinfo->ncore_tot > 0)
+    {
+        s += gmx::formatString(" %d cores,", hwinfo->ncore_tot);
+    }
+    s += gmx::formatString(" %d hardware threads", hwinfo->nhwthread_tot);
+    if (hwinfo->gpu_info.bDetectGPUs)
+    {
+        s += gmx::formatString(", %d compatible GPU%s",
+                               hwinfo->ngpu_compatible_tot,
+                               hwinfo->ngpu_compatible_tot == 1 ? "" : "s");
+    }
+    else if (bGPUBinary)
+    {
+        s += gmx::formatString(" (GPU detection deactivated)");
+    }
+    s += gmx::formatString("\n");
+
+    if (hwinfo->nphysicalnode > 1)
+    {
+        /* Print per node hardware feature counts */
+        if (hwinfo->ncore_max > 0)
+        {
+            s += gmx::formatString("Cores per node:            %2d", hwinfo->ncore_min);
+            if (hwinfo->ncore_max > hwinfo->ncore_min)
+            {
+                s += gmx::formatString(" - %2d", hwinfo->ncore_max);
+            }
+            s += gmx::formatString("\n");
+        }
+        s += gmx::formatString("Hardware threads per node: %2d", hwinfo->nhwthread_min);
+        if (hwinfo->nhwthread_max > hwinfo->nhwthread_min)
+        {
+            s += gmx::formatString(" - %2d", hwinfo->nhwthread_max);
+        }
+        s += gmx::formatString("\n");
+        if (bGPUBinary)
+        {
+            s += gmx::formatString("Compatible GPUs per node:  %2d",
+                                   hwinfo->ngpu_compatible_min);
+            if (hwinfo->ngpu_compatible_max > hwinfo->ngpu_compatible_min)
+            {
+                s += gmx::formatString(" - %2d", hwinfo->ngpu_compatible_max);
+            }
+            s += gmx::formatString("\n");
+            if (hwinfo->ngpu_compatible_tot > 0)
+            {
+                if (hwinfo->bIdenticalGPUs)
+                {
+                    s += gmx::formatString("All nodes have identical type(s) of GPUs\n");
+                }
+                else
+                {
+                    /* This message will also appear with identical GPU types
+                     * when at least one node has no GPU.
+                     */
+                    s += gmx::formatString("Different nodes have different type(s) and/or order of GPUs\n");
+                }
+            }
+        }
+    }
+
+#ifdef GMX_LIB_MPI
+    char host[255];
+    int  rank;
+
+    gmx_gethostname(host, 255);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+    s += gmx::formatString("Hardware detected on host %s (the node of MPI rank %d):\n",
+                           host, rank);
+#else
+    s += gmx::formatString("Hardware detected:\n");
+#endif
+    s += gmx::formatString("  CPU info:\n");
+    if (bFullCpuInfo)
+    {
+        char buf[1024];
+
+        gmx_cpuid_formatstring(hwinfo->cpuid_info, buf, 1023);
+        buf[1023] = '\0';
+
+        s += gmx::formatString("%s", buf);
+    }
+    else
+    {
+        s += gmx::formatString("    Vendor: %s\n",
+                               gmx_cpuid_vendor_string[gmx_cpuid_vendor(hwinfo->cpuid_info)]);
+        s += gmx::formatString("    Brand:  %s\n",
+                               gmx_cpuid_brand(hwinfo->cpuid_info));
+    }
+    s += gmx::formatString("    SIMD instructions most likely to fit this hardware: %s",
+                           gmx_cpuid_simd_string[hwinfo->simd_suggest_min]);
+    if (hwinfo->simd_suggest_max > hwinfo->simd_suggest_min)
+    {
+        s += gmx::formatString(" - %s",
+                               gmx_cpuid_simd_string[hwinfo->simd_suggest_max]);
+    }
+    s += gmx::formatString("\n");
+    s += gmx::formatString("    SIMD instructions selected at GROMACS compile time: %s\n",
+                           gmx_cpuid_simd_string[gmx_compiled_simd()]);
+    if (bGPUBinary && (hwinfo->ngpu_compatible_tot > 0 ||
+                       hwinfo->gpu_info.n_dev > 0))
+    {
+        s += gmx::formatString("  GPU info:\n");
+        s += gmx::formatString("    Number of GPUs detected: %d\n",
+                               hwinfo->gpu_info.n_dev);
+        if (hwinfo->gpu_info.n_dev > 0)
+        {
+            char buf[STRLEN];
+
+            sprint_gpus(buf, &hwinfo->gpu_info);
+            s += gmx::formatString("%s\n", buf);
+        }
+    }
+
+    return s;
+}
+
+void gmx_print_detected_hardware(FILE *fplog, const t_commrec *cr,
+                                 const gmx_hw_info_t *hwinfo)
+{
+    if (fplog != NULL)
+    {
+        std::string detected;
+
+        detected = detected_hardware_string(hwinfo, TRUE);
+
+        fprintf(fplog, "%s\n", detected.c_str());
+    }
+
+    if (MULTIMASTER(cr))
+    {
+        std::string detected;
+
+        detected = detected_hardware_string(hwinfo, FALSE);
+
+        fprintf(stderr, "%s\n", detected.c_str());
+    }
+
+    /* Check the compiled SIMD instruction set against that of the node
+     * with the lowest SIMD level support.
+     */
+    gmx_cpuid_simd_check(hwinfo->simd_suggest_min, fplog, MULTIMASTER(cr));
+
+    /* For RDTSCP we only check on our local node and skip the MPI reduction */
+    check_use_of_rdtscp_on_this_cpu(fplog, cr, hwinfo);
+}
+
  void gmx_parse_gpu_ids(gmx_gpu_opt_t *gpu_opt)
  {
      char *env;
diff --git a/src/gromacs/gmxlib/gmx_omp_nthreads.c b/src/gromacs/gmxlib/gmx_omp_nthreads.c

index dc1688d36e829d11d2e7609e86cf780056c185b7..9b97291fc48eb6edf1f060ec216d0f2681e8e35a 100644 (file)
--- a/src/gromacs/gmxlib/gmx_omp_nthreads.c
+++ b/src/gromacs/gmxlib/gmx_omp_nthreads.c
@@ -426,6 +426,7 @@ reportOpenmpSettings(FILE            *fplog,
                        modth.gnth_pme, modth.gnth_pme > 1 ? "s" : "",
                        cr->nnodes > 1 ? mpi_str : "");
      }
+    md_print_info(cr, fplog, "\n");
  }
  
  /*! \brief Detect and warn about oversubscription of cores.
diff --git a/src/gromacs/gmxlib/main.cpp b/src/gromacs/gmxlib/main.cpp

index 5286b27d0d0058d03d272149b773ba5984c0bc82..28644ba909b177380e0626b35ef3743a09968b46 100644 (file)
--- a/src/gromacs/gmxlib/main.cpp
+++ b/src/gromacs/gmxlib/main.cpp
@@ -255,7 +255,7 @@ void gmx_log_open(const char *lognm, const t_commrec *cr,
          gmx::printBinaryInformation(fp, gmx::getProgramContext(), settings);
      }
      GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
-    fprintf(fp, "\n\n");
+    fprintf(fp, "\n");
  
      fflush(fp);
      debug_gmx();
diff --git a/src/gromacs/legacyheaders/gmx_cpuid.h b/src/gromacs/legacyheaders/gmx_cpuid.h

index 517bf2e7a296537e1d292b9fd7daa7b9e504ddeb..a997ff6fc58a70e0dcd7acccbd3e297997c74a6a 100644 (file)
--- a/src/gromacs/legacyheaders/gmx_cpuid.h
+++ b/src/gromacs/legacyheaders/gmx_cpuid.h
@@ -127,8 +127,12 @@ enum gmx_cpuid_feature
  
  /* Currently supported SIMD instruction sets, intrinsics or other similar combinations
   * in Gromacs. There is not always a 1-to-1 correspondence with feature flags; on some AMD
- * hardware we prefer to use 128bit AVX instructions (although 256-bit ones could be executed),
- * and we still haven't written the AVX2 kernels.
+ * hardware we prefer to use 128bit AVX instructions (although 256-bit ones could be executed).
+ * These are listed in increasing order for sets supported by one CPU.
+ * The order is only used for printing "minimum" and "maximum" suggested
+ * SIMD instruction sets for nodes in a cluster, so pairs like
+ * GMX_CPUID_SIMD_X86_AVX_128_FMA vs GMX_CPUID_SIMD_X86_AVX_256 which strictly
+ * speaking can't be ordered are not really an issue.
   */
  enum gmx_cpuid_simd
  {
@@ -169,6 +173,11 @@ typedef struct gmx_cpuid *
      gmx_cpuid_t;
  
  
+/* Return the SIMD instruction set GROMACS was compiled with. */
+enum gmx_cpuid_simd
+gmx_compiled_simd           ();
+
+
  /* Fill the data structure by using CPU detection instructions.
   * Return 0 on success, 1 if something bad happened.
   */
@@ -316,9 +325,12 @@ gmx_cpuid_simd_suggest  (gmx_cpuid_t                    cpuid);
   * would suggest for the current hardware. Always print stats to the log file
   * if it is non-NULL, and if we don't have a match, print a warning in log
   * (if non-NULL) and if print_to_stderr!=0 also to stderr.
+ * The suggested SIMD instruction set simd_suggest is obtained with
+ * gmx_cpuid_simd_suggest(), but with MPI this might be different for
+ * different nodes, so it shoul be passed here after parallel reduction.
   */
  int
-gmx_cpuid_simd_check    (gmx_cpuid_t                cpuid,
+gmx_cpuid_simd_check    (enum gmx_cpuid_simd        simd_suggest,
                           FILE *                     log,
                           int                        print_to_stderr);
  
diff --git a/src/gromacs/legacyheaders/gmx_detect_hardware.h b/src/gromacs/legacyheaders/gmx_detect_hardware.h

index 4f74e5e061289431ec43850cde9202b19419bc96..56b0384fe6fee1d84f97e1503a3a13a0e57a8702 100644 (file)
--- a/src/gromacs/legacyheaders/gmx_detect_hardware.h
+++ b/src/gromacs/legacyheaders/gmx_detect_hardware.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -53,6 +53,12 @@ extern "C" {
  gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr,
                                     gmx_bool bDetectGPUs);
  
+/* Print information about the detected hardware to fplog (if != NULL)
+ * and to stderr the master rank.
+ */
+void gmx_print_detected_hardware(FILE *fplog, const t_commrec *cr,
+                                 const gmx_hw_info_t *hwinfo);
+
  void gmx_hardware_info_free(gmx_hw_info_t *hwinfo);
  
  void gmx_parse_gpu_ids(gmx_gpu_opt_t *gpu_opt);
diff --git a/src/gromacs/legacyheaders/types/hw_info.h b/src/gromacs/legacyheaders/types/hw_info.h

index 842dfbe64be21a00effd89260b41183673756e93..51c3b9812f00ff650ef008d0075bd47804a131c3 100644 (file)
--- a/src/gromacs/legacyheaders/types/hw_info.h
+++ b/src/gromacs/legacyheaders/types/hw_info.h
@@ -80,14 +80,36 @@ struct gmx_gpu_info_t
   *       (i.e. must be able to be shared among all threads) */
  typedef struct
  {
-    struct gmx_gpu_info_t gpu_info;      /* Information about GPUs detected in the system */
-
-    gmx_cpuid_t           cpuid_info;    /* CPUID information about CPU detected;
-                                            NOTE: this will only detect the CPU thread 0 of the
-                                            current process runs on. */
-    int             nthreads_hw_avail;   /* Number of hardware threads available; this number
-                                            is based on the number of CPUs reported as available
-                                            by the OS at the time of detection. */
+    /* Data for our local physical node */
+    struct gmx_gpu_info_t gpu_info;          /* Information about GPUs detected in the system */
+
+    gmx_cpuid_t           cpuid_info;        /* CPUID information about CPU detected;
+                                                NOTE: this will only detect the CPU thread 0 of the
+                                                current process runs on. */
+    int                   ncore;             /* Number of cores, will be 0 when not detected */
+    int                   nthreads_hw_avail; /* Number of hardware threads available; this number
+                                                is based on the number of CPUs reported as available
+                                                by the OS at the time of detection. */
+
+    /* Data reduced through MPI over all physical nodes */
+    int                 nphysicalnode;       /* Number of physical nodes */
+    int                 ncore_tot;           /* Sum of #cores over all nodes, can be 0 */
+    int                 ncore_min;           /* Min #cores over all nodes */
+    int                 ncore_max;           /* Max #cores over all nodes */
+    int                 nhwthread_tot;       /* Sum of #hwthreads over all nodes */
+    int                 nhwthread_min;       /* Min #hwthreads over all nodes */
+    int                 nhwthread_max;       /* Max #hwthreads over all nodes */
+    int                 ngpu_compatible_tot; /* Sum of #GPUs over all nodes */
+    int                 ngpu_compatible_min; /* Min #GPUs over all nodes */
+    int                 ngpu_compatible_max; /* Max #GPUs over all nodes */
+
+    /* The values below are only used for printing, so here it's not an issue
+     * that stricly speaking SIMD instruction sets can't be uniquely ordered.
+     */
+    enum gmx_cpuid_simd simd_suggest_min;    /* Highest SIMD instruction set supported by all ranks */
+    enum gmx_cpuid_simd simd_suggest_max;    /* Highest SIMD instruction set supported by at least one rank */
+
+    gmx_bool            bIdenticalGPUs;      /* TRUE if all ranks have the same type(s) and order of GPUs */
  } gmx_hw_info_t;
  
  
diff --git a/src/programs/mdrun/mdrun.cpp b/src/programs/mdrun/mdrun.cpp

index 508f6d03b6101dae7225c5298acba6dd6c497266..c584010d3d8f99f71cb6ab540391b0b22fa172ea 100644 (file)
--- a/src/programs/mdrun/mdrun.cpp
+++ b/src/programs/mdrun/mdrun.cpp
@@ -60,7 +60,6 @@
  #include "gromacs/commandline/pargs.h"
  #include "gromacs/fileio/filenm.h"
  #include "gromacs/legacyheaders/checkpoint.h"
-#include "gromacs/legacyheaders/copyrite.h"
  #include "gromacs/legacyheaders/macros.h"
  #include "gromacs/legacyheaders/main.h"
  #include "gromacs/legacyheaders/mdrun.h"
@@ -582,10 +581,6 @@ int gmx_mdrun(int argc, char *argv[])
      {
          gmx_log_open(ftp2fn(efLOG, NFILE, fnm), cr,
                       Flags & MD_APPENDFILES, &fplog);
-        please_cite(fplog, "Hess2008b");
-        please_cite(fplog, "Spoel2005a");
-        please_cite(fplog, "Lindahl2001a");
-        please_cite(fplog, "Berendsen95a");
      }
      else
      {
diff --git a/src/programs/mdrun/runner.cpp b/src/programs/mdrun/runner.cpp

index 8956d4442bedc77b0a0e49e6181e0be132a26fef..3f520dd22e68fa107f47df0c308e1295a6cd22f0 100644 (file)
--- a/src/programs/mdrun/runner.cpp
+++ b/src/programs/mdrun/runner.cpp
@@ -53,6 +53,7 @@
  #include "gromacs/gmxlib/gpu_utils/gpu_utils.h"
  #include "gromacs/legacyheaders/checkpoint.h"
  #include "gromacs/legacyheaders/constr.h"
+#include "gromacs/legacyheaders/copyrite.h"
  #include "gromacs/legacyheaders/disre.h"
  #include "gromacs/legacyheaders/force.h"
  #include "gromacs/legacyheaders/gmx_detect_hardware.h"
@@ -1020,6 +1021,16 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
       * global for this process (MPI rank). */
      hwinfo = gmx_detect_hardware(fplog, cr, bTryUseGPU);
  
+    gmx_print_detected_hardware(fplog, cr, hwinfo);
+
+    if (fplog != NULL)
+    {
+        /* Print references after all software/hardware printing */
+        please_cite(fplog, "Hess2008b");
+        please_cite(fplog, "Spoel2005a");
+        please_cite(fplog, "Lindahl2001a");
+        please_cite(fplog, "Berendsen95a");
+    }
  
      snew(state, 1);
      if (SIMMASTER(cr))
@@ -1181,6 +1192,7 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
      if (fplog != NULL)
      {
          pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE);
+        fprintf(fplog, "\n");
      }
  
      /* now make sure the state is initialized and propagated */
author	Berk Hess <hess@kth.se>
	Wed, 29 Apr 2015 10:13:00 +0000 (12:13 +0200)
committer	Gerrit Code Review <gerrit@gerrit.gromacs.org>
	Sat, 13 Jun 2015 09:47:37 +0000 (11:47 +0200)
src/gromacs/gmxlib/gmx_cpuid.c		patch \| blob \| history
src/gromacs/gmxlib/gmx_detect_hardware.cpp		patch \| blob \| history
src/gromacs/gmxlib/gmx_omp_nthreads.c		patch \| blob \| history
src/gromacs/gmxlib/main.cpp		patch \| blob \| history
src/gromacs/legacyheaders/gmx_cpuid.h		patch \| blob \| history
src/gromacs/legacyheaders/gmx_detect_hardware.h		patch \| blob \| history
src/gromacs/legacyheaders/types/hw_info.h		patch \| blob \| history
src/programs/mdrun/mdrun.cpp		patch \| blob \| history
src/programs/mdrun/runner.cpp		patch \| blob \| history