# define _SC_NPROCESSORS_CONF _SC_NPROC_CONF
#endif
-/*! \brief Information about the hardware of all nodes (common to all threads in this process).
+/*! \brief The result of device detection
*
- * This information is constructed only when required, but thereafter
- * its lifetime is that of the whole process, potentially across
- * multiple successive simulation parts. It's wise to ensure that only
- * one thread can create the information, but thereafter they can all
- * read it without e.g. needing a std::shared_ptr to ensure its
- * lifetime exceeds that of the thread. */
-static std::unique_ptr<gmx_hw_info_t> g_hardwareInfo;
-//! A mutex to protect the hwinfo structure
-static Mutex g_hardwareInfoMutex;
-
-//! Detect GPUs, if that makes sense to attempt.
-static void gmx_detect_gpus(const gmx::MDLogger& mdlog,
- const PhysicalNodeCommunicator& physicalNodeComm,
- compat::not_null<gmx_hw_info_t*> hardwareInfo)
+ * Note that non-functional device detection still produces
+ * a detection result, ie. of no devices. This might not be
+ * what the user wanted, so it makes sense to log later when
+ * that is possible. */
+struct DeviceDetectionResult
{
+ //! The device information detected
+ std::vector<std::unique_ptr<DeviceInformation>> deviceInfoList_;
+ //! Container of possible warnings to issue when that is possible
+ std::vector<std::string> deviceDetectionWarnings_;
+};
+
+/*! \brief Detect GPUs when that makes sense to attempt.
+ *
+ * \param[in] physicalNodeComm The communicator across this physical node
+ * \return The result of the detection, perhaps including diagnostic messages
+ * to issue later.
+ *
+ * \todo Coordinating the efficient detection of devices across
+ * multiple ranks per node should be separated from the lower-level
+ * hardware detection. See
+ * https://gitlab.com/gromacs/gromacs/-/issues/3650.
+ */
+static DeviceDetectionResult detectAllDeviceInformation(const PhysicalNodeCommunicator& physicalNodeComm)
+{
+ DeviceDetectionResult deviceDetectionResult;
+
if (!isDeviceDetectionEnabled())
{
- return;
+ return deviceDetectionResult;
}
std::string errorMessage;
#if GMX_LIB_MPI
isMasterRankOfPhysicalNode = (physicalNodeComm.rank_ == 0);
#else
- // We choose to run the detection only once with thread-MPI and
- // use a mutex to enforce it.
+ // Without an MPI library, this process is trivially the only one
+ // on the physical node. This code runs before e.g. thread-MPI
+ // ranks are spawned, so detection is race-free by construction.
+ // Read-only access is enforced with providing those ranks with a
+ // handle to a const object, so usage is also free of races.
GMX_UNUSED_VALUE(physicalNodeComm);
isMasterRankOfPhysicalNode = true;
#endif
- /* The OpenCL support requires us to run detection on all ranks.
+ /* The SYCL and OpenCL support requires us to run detection on all
+ * ranks.
+ *
* With CUDA we don't need to, and prefer to detect on one rank
- * and send the information to the other ranks over MPI. */
+ * and send the information to the other ranks over MPI. This
+ * avoids creating a start-up bottleneck with each MPI rank on a
+ * node making the same GPU API calls. */
constexpr bool allRanksMustDetectGpus = (GMX_GPU_OPENCL != 0 || GMX_GPU_SYCL != 0);
bool gpusCanBeDetected = false;
if (isMasterRankOfPhysicalNode || allRanksMustDetectGpus)
gpusCanBeDetected = isDeviceDetectionFunctional(&errorMessage);
if (!gpusCanBeDetected)
{
- GMX_LOG(mdlog.warning)
- .asParagraph()
- .appendTextFormatted(
- "NOTE: Detection of GPUs failed. The API reported:\n"
- " %s\n"
- " GROMACS cannot run tasks on a GPU.",
- errorMessage.c_str());
+ deviceDetectionResult.deviceDetectionWarnings_.emplace_back(
+ "Detection of GPUs failed. The API reported:\n" + errorMessage);
}
}
if (gpusCanBeDetected)
{
- hardwareInfo->deviceInfoList = findDevices();
+ deviceDetectionResult.deviceInfoList_ = findDevices();
// No need to tell the user anything at this point, they get a
// hardware report later.
}
if (isMasterRankOfPhysicalNode)
{
gmx::InMemorySerializer writer;
- serializeDeviceInformations(hardwareInfo->deviceInfoList, &writer);
+ serializeDeviceInformations(deviceDetectionResult.deviceInfoList_, &writer);
buffer = writer.finishAndGetBuffer();
sizeOfBuffer = buffer.size();
}
if (!isMasterRankOfPhysicalNode)
{
gmx::InMemoryDeserializer reader(buffer, false);
- hardwareInfo->deviceInfoList = deserializeDeviceInformations(&reader);
+ deviceDetectionResult.deviceInfoList_ = deserializeDeviceInformations(&reader);
}
}
}
#endif
+ return deviceDetectionResult;
}
//! Reduce the locally collected \p hardwareInfo over MPI ranks
#endif
}
-/*! \brief Sanity check hardware topology and print some notes to log
- *
- * \param mdlog Logger.
- * \param hardwareTopology Reference to hardwareTopology object.
- */
-static void hardwareTopologyDoubleCheckDetection(const gmx::MDLogger gmx_unused& mdlog,
- const gmx::HardwareTopology gmx_unused& hardwareTopology)
+void hardwareTopologyDoubleCheckDetection(const gmx::MDLogger gmx_unused& mdlog,
+ const gmx::HardwareTopology gmx_unused& hardwareTopology)
{
#if defined HAVE_SYSCONF && defined(_SC_NPROCESSORS_CONF)
if (hardwareTopology.supportLevel() < gmx::HardwareTopology::SupportLevel::LogicalProcessorCount)
"performance.");
}
}
+#else
+ GMX_UNUSED_VALUE(mdlog);
+ GMX_UNUSED_VALUE(hardwareTopology);
#endif
}
-gmx_hw_info_t* gmx_detect_hardware(const gmx::MDLogger& mdlog, const PhysicalNodeCommunicator& physicalNodeComm)
+std::unique_ptr<gmx_hw_info_t> gmx_detect_hardware(const PhysicalNodeCommunicator& physicalNodeComm)
{
- // By construction, only one thread ever runs hardware detection,
- // but we may as well prevent issues arising if that would change.
- // Taking the lock early ensures that exactly one thread can
- // attempt to construct g_hardwareInfo.
- lock_guard<Mutex> lock(g_hardwareInfoMutex);
-
- // If we already have the information, just return a handle to it.
- if (g_hardwareInfo != nullptr)
- {
- return g_hardwareInfo.get();
- }
-
// Make the new hardwareInfo in a temporary.
hardwareTopologyPrepareDetection();
std::make_unique<CpuInfo>(CpuInfo::detect()),
std::make_unique<HardwareTopology>(HardwareTopology::detect()));
- // If we detected the topology on this system, double-check that it makes sense
- if (hardwareInfo->hardwareTopology->isThisSystem())
- {
- hardwareTopologyDoubleCheckDetection(mdlog, *hardwareInfo->hardwareTopology);
- }
-
// TODO: Get rid of this altogether.
hardwareInfo->nthreads_hw_avail = hardwareInfo->hardwareTopology->machine().logicalProcessorCount;
// Detect GPUs
- gmx_detect_gpus(mdlog, physicalNodeComm, compat::make_not_null(hardwareInfo));
+ // Open a nested scope so no temporary variables can
+ // be mis-used later.
+ {
+ DeviceDetectionResult deviceDetectionResult = detectAllDeviceInformation(physicalNodeComm);
+ hardwareInfo->deviceInfoList.swap(deviceDetectionResult.deviceInfoList_);
+ std::swap(hardwareInfo->hardwareDetectionWarnings_, deviceDetectionResult.deviceDetectionWarnings_);
+ }
+
gmx_collect_hardware_mpi(*hardwareInfo->cpuInfo, physicalNodeComm, compat::make_not_null(hardwareInfo));
- // Now that the temporary is fully constructed, swap it to become
- // the real thing.
- g_hardwareInfo.swap(hardwareInfo);
+ return hardwareInfo;
+}
- return g_hardwareInfo.get();
+void logHardwareDetectionWarnings(const gmx::MDLogger& mdlog, const gmx_hw_info_t& hardwareInformation)
+{
+ for (const std::string& warningString : hardwareInformation.hardwareDetectionWarnings_)
+ {
+ GMX_LOG(mdlog.warning).asParagraph().appendText(warningString);
+ }
}
} // namespace gmx