Remove logging from hardware detection

[alexxy/gromacs.git] / src / gromacs / hardware / detecthardware.cpp
diff --git a/src/gromacs/hardware/detecthardware.cpp b/src/gromacs/hardware/detecthardware.cpp

index 3bcdac4835f2d52b5db9855e7aaab9109b213f40..884b98a4a2079cf063a215863cd8f91c7871cb5d 100644 (file)
--- a/src/gromacs/hardware/detecthardware.cpp
+++ b/src/gromacs/hardware/detecthardware.cpp
@@ -94,26 +94,38 @@ namespace gmx
  #    define _SC_NPROCESSORS_CONF _SC_NPROC_CONF
  #endif
  
-/*! \brief Information about the hardware of all nodes (common to all threads in this process).
+/*! \brief The result of device detection
   *
- * This information is constructed only when required, but thereafter
- * its lifetime is that of the whole process, potentially across
- * multiple successive simulation parts. It's wise to ensure that only
- * one thread can create the information, but thereafter they can all
- * read it without e.g. needing a std::shared_ptr to ensure its
- * lifetime exceeds that of the thread. */
-static std::unique_ptr<gmx_hw_info_t> g_hardwareInfo;
-//! A mutex to protect the hwinfo structure
-static Mutex g_hardwareInfoMutex;
-
-//! Detect GPUs, if that makes sense to attempt.
-static void gmx_detect_gpus(const gmx::MDLogger&             mdlog,
-                            const PhysicalNodeCommunicator&  physicalNodeComm,
-                            compat::not_null<gmx_hw_info_t*> hardwareInfo)
+ * Note that non-functional device detection still produces
+ * a detection result, ie. of no devices. This might not be
+ * what the user wanted, so it makes sense to log later when
+ * that is possible. */
+struct DeviceDetectionResult
  {
+    //! The device information detected
+    std::vector<std::unique_ptr<DeviceInformation>> deviceInfoList_;
+    //! Container of possible warnings to issue when that is possible
+    std::vector<std::string> deviceDetectionWarnings_;
+};
+
+/*! \brief Detect GPUs when that makes sense to attempt.
+ *
+ * \param[in]  physicalNodeComm  The communicator across this physical node
+ * \return The result of the detection, perhaps including diagnostic messages
+ *         to issue later.
+ *
+ * \todo Coordinating the efficient detection of devices across
+ * multiple ranks per node should be separated from the lower-level
+ * hardware detection. See
+ * https://gitlab.com/gromacs/gromacs/-/issues/3650.
+ */
+static DeviceDetectionResult detectAllDeviceInformation(const PhysicalNodeCommunicator& physicalNodeComm)
+{
+    DeviceDetectionResult deviceDetectionResult;
+
      if (!isDeviceDetectionEnabled())
      {
-        return;
+        return deviceDetectionResult;
      }
  
      std::string errorMessage;
@@ -122,15 +134,22 @@ static void gmx_detect_gpus(const gmx::MDLogger&             mdlog,
  #if GMX_LIB_MPI
      isMasterRankOfPhysicalNode = (physicalNodeComm.rank_ == 0);
  #else
-    // We choose to run the detection only once with thread-MPI and
-    // use a mutex to enforce it.
+    // Without an MPI library, this process is trivially the only one
+    // on the physical node. This code runs before e.g. thread-MPI
+    // ranks are spawned, so detection is race-free by construction.
+    // Read-only access is enforced with providing those ranks with a
+    // handle to a const object, so usage is also free of races.
      GMX_UNUSED_VALUE(physicalNodeComm);
      isMasterRankOfPhysicalNode = true;
  #endif
  
-    /* The OpenCL support requires us to run detection on all ranks.
+    /* The SYCL and OpenCL support requires us to run detection on all
+     * ranks.
+     *
       * With CUDA we don't need to, and prefer to detect on one rank
-     * and send the information to the other ranks over MPI. */
+     * and send the information to the other ranks over MPI. This
+     * avoids creating a start-up bottleneck with each MPI rank on a
+     * node making the same GPU API calls. */
      constexpr bool allRanksMustDetectGpus = (GMX_GPU_OPENCL != 0 || GMX_GPU_SYCL != 0);
      bool           gpusCanBeDetected      = false;
      if (isMasterRankOfPhysicalNode || allRanksMustDetectGpus)
@@ -139,19 +158,14 @@ static void gmx_detect_gpus(const gmx::MDLogger&             mdlog,
          gpusCanBeDetected = isDeviceDetectionFunctional(&errorMessage);
          if (!gpusCanBeDetected)
          {
-            GMX_LOG(mdlog.warning)
-                    .asParagraph()
-                    .appendTextFormatted(
-                            "NOTE: Detection of GPUs failed. The API reported:\n"
-                            "      %s\n"
-                            "      GROMACS cannot run tasks on a GPU.",
-                            errorMessage.c_str());
+            deviceDetectionResult.deviceDetectionWarnings_.emplace_back(
+                    "Detection of GPUs failed. The API reported:\n" + errorMessage);
          }
      }
  
      if (gpusCanBeDetected)
      {
-        hardwareInfo->deviceInfoList = findDevices();
+        deviceDetectionResult.deviceInfoList_ = findDevices();
          // No need to tell the user anything at this point, they get a
          // hardware report later.
      }
@@ -166,7 +180,7 @@ static void gmx_detect_gpus(const gmx::MDLogger&             mdlog,
          if (isMasterRankOfPhysicalNode)
          {
              gmx::InMemorySerializer writer;
-            serializeDeviceInformations(hardwareInfo->deviceInfoList, &writer);
+            serializeDeviceInformations(deviceDetectionResult.deviceInfoList_, &writer);
              buffer       = writer.finishAndGetBuffer();
              sizeOfBuffer = buffer.size();
          }
@@ -180,11 +194,12 @@ static void gmx_detect_gpus(const gmx::MDLogger&             mdlog,
              if (!isMasterRankOfPhysicalNode)
              {
                  gmx::InMemoryDeserializer reader(buffer, false);
-                hardwareInfo->deviceInfoList = deserializeDeviceInformations(&reader);
+                deviceDetectionResult.deviceInfoList_ = deserializeDeviceInformations(&reader);
              }
          }
      }
  #endif
+    return deviceDetectionResult;
  }
  
  //! Reduce the locally collected \p hardwareInfo over MPI ranks
@@ -384,13 +399,8 @@ static void hardwareTopologyPrepareDetection()
  #endif
  }
  
-/*! \brief Sanity check hardware topology and print some notes to log
- *
- *  \param mdlog            Logger.
- *  \param hardwareTopology Reference to hardwareTopology object.
- */
-static void hardwareTopologyDoubleCheckDetection(const gmx::MDLogger gmx_unused& mdlog,
-                                                 const gmx::HardwareTopology gmx_unused& hardwareTopology)
+void hardwareTopologyDoubleCheckDetection(const gmx::MDLogger gmx_unused& mdlog,
+                                          const gmx::HardwareTopology gmx_unused& hardwareTopology)
  {
  #if defined HAVE_SYSCONF && defined(_SC_NPROCESSORS_CONF)
      if (hardwareTopology.supportLevel() < gmx::HardwareTopology::SupportLevel::LogicalProcessorCount)
@@ -430,23 +440,14 @@ static void hardwareTopologyDoubleCheckDetection(const gmx::MDLogger gmx_unused&
                              "performance.");
          }
      }
+#else
+    GMX_UNUSED_VALUE(mdlog);
+    GMX_UNUSED_VALUE(hardwareTopology);
  #endif
  }
  
-gmx_hw_info_t* gmx_detect_hardware(const gmx::MDLogger& mdlog, const PhysicalNodeCommunicator& physicalNodeComm)
+std::unique_ptr<gmx_hw_info_t> gmx_detect_hardware(const PhysicalNodeCommunicator& physicalNodeComm)
  {
-    // By construction, only one thread ever runs hardware detection,
-    // but we may as well prevent issues arising if that would change.
-    // Taking the lock early ensures that exactly one thread can
-    // attempt to construct g_hardwareInfo.
-    lock_guard<Mutex> lock(g_hardwareInfoMutex);
-
-    // If we already have the information, just return a handle to it.
-    if (g_hardwareInfo != nullptr)
-    {
-        return g_hardwareInfo.get();
-    }
-
      // Make the new hardwareInfo in a temporary.
      hardwareTopologyPrepareDetection();
  
@@ -456,24 +457,29 @@ gmx_hw_info_t* gmx_detect_hardware(const gmx::MDLogger& mdlog, const PhysicalNod
              std::make_unique<CpuInfo>(CpuInfo::detect()),
              std::make_unique<HardwareTopology>(HardwareTopology::detect()));
  
-    // If we detected the topology on this system, double-check that it makes sense
-    if (hardwareInfo->hardwareTopology->isThisSystem())
-    {
-        hardwareTopologyDoubleCheckDetection(mdlog, *hardwareInfo->hardwareTopology);
-    }
-
      // TODO: Get rid of this altogether.
      hardwareInfo->nthreads_hw_avail = hardwareInfo->hardwareTopology->machine().logicalProcessorCount;
  
      // Detect GPUs
-    gmx_detect_gpus(mdlog, physicalNodeComm, compat::make_not_null(hardwareInfo));
+    // Open a nested scope so no temporary variables can
+    // be mis-used later.
+    {
+        DeviceDetectionResult deviceDetectionResult = detectAllDeviceInformation(physicalNodeComm);
+        hardwareInfo->deviceInfoList.swap(deviceDetectionResult.deviceInfoList_);
+        std::swap(hardwareInfo->hardwareDetectionWarnings_, deviceDetectionResult.deviceDetectionWarnings_);
+    }
+
      gmx_collect_hardware_mpi(*hardwareInfo->cpuInfo, physicalNodeComm, compat::make_not_null(hardwareInfo));
  
-    // Now that the temporary is fully constructed, swap it to become
-    // the real thing.
-    g_hardwareInfo.swap(hardwareInfo);
+    return hardwareInfo;
+}
  
-    return g_hardwareInfo.get();
+void logHardwareDetectionWarnings(const gmx::MDLogger& mdlog, const gmx_hw_info_t& hardwareInformation)
+{
+    for (const std::string& warningString : hardwareInformation.hardwareDetectionWarnings_)
+    {
+        GMX_LOG(mdlog.warning).asParagraph().appendText(warningString);
+    }
  }
  
  } // namespace gmx