#include "architecture.h"
#ifdef HAVE_UNISTD_H
-# include <unistd.h> // sysconf()
+# include <unistd.h> // sysconf()
#endif
gmx_hw_info_t::gmx_hw_info_t(std::unique_ptr<gmx::CpuInfo> cpuInfo,
- std::unique_ptr<gmx::HardwareTopology> hardwareTopology)
- : cpuInfo(std::move(cpuInfo)),
- hardwareTopology(std::move(hardwareTopology))
+ std::unique_ptr<gmx::HardwareTopology> hardwareTopology) :
+ cpuInfo(std::move(cpuInfo)),
+ hardwareTopology(std::move(hardwareTopology))
{
}
* lifetime exceeds that of the thread. */
static std::unique_ptr<gmx_hw_info_t> g_hardwareInfo;
//! A mutex to protect the hwinfo structure
-static Mutex g_hardwareInfoMutex;
+static Mutex g_hardwareInfoMutex;
//! Detect GPUs, if that makes sense to attempt.
-static void gmx_detect_gpus(const gmx::MDLogger &mdlog,
- const PhysicalNodeCommunicator &physicalNodeComm,
- compat::not_null<gmx_hw_info_t *> hardwareInfo)
+static void gmx_detect_gpus(const gmx::MDLogger& mdlog,
+ const PhysicalNodeCommunicator& physicalNodeComm,
+ compat::not_null<gmx_hw_info_t*> hardwareInfo)
{
hardwareInfo->gpu_info.bDetectGPUs = canPerformGpuDetection();
gpusCanBeDetected = isGpuDetectionFunctional(&errorMessage);
if (!gpusCanBeDetected)
{
- GMX_LOG(mdlog.info).asParagraph().appendTextFormatted(
- "NOTE: Detection of GPUs failed. The API reported:\n"
- " %s\n"
- " GROMACS cannot run tasks on a GPU.",
- errorMessage.c_str());
+ GMX_LOG(mdlog.info)
+ .asParagraph()
+ .appendTextFormatted(
+ "NOTE: Detection of GPUs failed. The API reported:\n"
+ " %s\n"
+ " GROMACS cannot run tasks on a GPU.",
+ errorMessage.c_str());
}
}
{
int dev_size;
- dev_size = hardwareInfo->gpu_info.n_dev*sizeof_gpu_dev_info();
+ dev_size = hardwareInfo->gpu_info.n_dev * sizeof_gpu_dev_info();
if (!isMasterRankOfPhysicalNode)
{
- hardwareInfo->gpu_info.gpu_dev =
- (struct gmx_device_info_t *)malloc(dev_size);
+ hardwareInfo->gpu_info.gpu_dev = (struct gmx_device_info_t*)malloc(dev_size);
}
- MPI_Bcast(hardwareInfo->gpu_info.gpu_dev, dev_size, MPI_BYTE,
- 0, physicalNodeComm.comm_);
- MPI_Bcast(&hardwareInfo->gpu_info.n_dev_compatible, 1, MPI_INT,
- 0, physicalNodeComm.comm_);
+ MPI_Bcast(hardwareInfo->gpu_info.gpu_dev, dev_size, MPI_BYTE, 0, physicalNodeComm.comm_);
+ MPI_Bcast(&hardwareInfo->gpu_info.n_dev_compatible, 1, MPI_INT, 0, physicalNodeComm.comm_);
}
}
#endif
}
//! Reduce the locally collected \p hardwareInfo over MPI ranks
-static void gmx_collect_hardware_mpi(const gmx::CpuInfo &cpuInfo,
- const PhysicalNodeCommunicator &physicalNodeComm,
- compat::not_null<gmx_hw_info_t *> hardwareInfo)
+static void gmx_collect_hardware_mpi(const gmx::CpuInfo& cpuInfo,
+ const PhysicalNodeCommunicator& physicalNodeComm,
+ compat::not_null<gmx_hw_info_t*> hardwareInfo)
{
- const int ncore = hardwareInfo->hardwareTopology->numberOfCores();
+ const int ncore = hardwareInfo->hardwareTopology->numberOfCores();
/* Zen1 is assumed for:
* - family=23 with the below listed models;
* - Hygon as vendor.
*/
- const bool cpuIsAmdZen1 = ((cpuInfo.vendor() == CpuInfo::Vendor::Amd &&
- cpuInfo.family() == 23 &&
- (cpuInfo.model() == 1 || cpuInfo.model() == 17 ||
- cpuInfo.model() == 8 || cpuInfo.model() == 24)) ||
- cpuInfo.vendor() == CpuInfo::Vendor::Hygon);
+ const bool cpuIsAmdZen1 = ((cpuInfo.vendor() == CpuInfo::Vendor::Amd && cpuInfo.family() == 23
+ && (cpuInfo.model() == 1 || cpuInfo.model() == 17
+ || cpuInfo.model() == 8 || cpuInfo.model() == 24))
+ || cpuInfo.vendor() == CpuInfo::Vendor::Hygon);
#if GMX_LIB_MPI
- int nhwthread, ngpu, i;
- int gpu_hash;
+ int nhwthread, ngpu, i;
+ int gpu_hash;
nhwthread = hardwareInfo->nthreads_hw_avail;
ngpu = hardwareInfo->gpu_info.n_dev_compatible;
/* Create a unique hash of the GPU type(s) in this node */
- gpu_hash = 0;
+ gpu_hash = 0;
/* Here it might be better to only loop over the compatible GPU, but we
* don't have that information available and it would also require
* removing the device ID from the device info string.
gpu_hash ^= gmx_string_fullhash_func(stmp, gmx_string_hash_init);
}
- constexpr int numElementsCounts = 4;
- std::array<int, numElementsCounts> countsReduced;
+ constexpr int numElementsCounts = 4;
+ std::array<int, numElementsCounts> countsReduced;
{
- std::array<int, numElementsCounts> countsLocal = {{0}};
+ std::array<int, numElementsCounts> countsLocal = { { 0 } };
// Organize to sum values from only one rank within each node,
// so we get the sum over all nodes.
bool isMasterRankOfPhysicalNode = (physicalNodeComm.rank_ == 0);
countsLocal[3] = ngpu;
}
- MPI_Allreduce(countsLocal.data(), countsReduced.data(), countsLocal.size(),
- MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+ MPI_Allreduce(countsLocal.data(), countsReduced.data(), countsLocal.size(), MPI_INT,
+ MPI_SUM, MPI_COMM_WORLD);
}
- constexpr int numElementsMax = 11;
- std::array<int, numElementsMax> maxMinReduced;
+ constexpr int numElementsMax = 11;
+ std::array<int, numElementsMax> maxMinReduced;
{
std::array<int, numElementsMax> maxMinLocal;
/* Store + and - values for all ranks,
maxMinLocal[9] = -maxMinLocal[4];
maxMinLocal[10] = (cpuIsAmdZen1 ? 1 : 0);
- MPI_Allreduce(maxMinLocal.data(), maxMinReduced.data(), maxMinLocal.size(),
- MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+ MPI_Allreduce(maxMinLocal.data(), maxMinReduced.data(), maxMinLocal.size(), MPI_INT,
+ MPI_MAX, MPI_COMM_WORLD);
}
hardwareInfo->nphysicalnode = countsReduced[0];
*
* This routine will not throw exceptions.
*/
-static void
-spinUpCore() noexcept
+static void spinUpCore() noexcept
{
#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) && defined(_SC_NPROCESSORS_ONLN)
float dummy = 0.1;
int countConfigured = sysconf(_SC_NPROCESSORS_CONF); // noexcept
auto start = std::chrono::steady_clock::now(); // noexcept
- while (sysconf(_SC_NPROCESSORS_ONLN) < countConfigured &&
- std::chrono::steady_clock::now() - start < std::chrono::seconds(2))
+ while (sysconf(_SC_NPROCESSORS_ONLN) < countConfigured
+ && std::chrono::steady_clock::now() - start < std::chrono::seconds(2))
{
for (int i = 1; i < 10000; i++)
{
* \note Cores will sleep relatively quickly again, so it's important to issue
* the real detection code directly after this routine.
*/
-static void
-hardwareTopologyPrepareDetection()
+static void hardwareTopologyPrepareDetection()
{
-#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) && \
- (defined(THREAD_PTHREADS) || defined(THREAD_WINDOWS))
+#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) \
+ && (defined(THREAD_PTHREADS) || defined(THREAD_WINDOWS))
// Modify this conditional when/if x86 or PowerPC starts to sleep some cores
- if (c_architecture != Architecture::X86 &&
- c_architecture != Architecture::PowerPC)
+ if (c_architecture != Architecture::X86 && c_architecture != Architecture::PowerPC)
{
- int countConfigured = sysconf(_SC_NPROCESSORS_CONF);
+ int countConfigured = sysconf(_SC_NPROCESSORS_CONF);
std::vector<std::thread> workThreads(countConfigured);
- for (auto &t : workThreads)
+ for (auto& t : workThreads)
{
t = std::thread(spinUpCore);
}
- for (auto &t : workThreads)
+ for (auto& t : workThreads)
{
t.join();
}
* \param mdlog Logger.
* \param hardwareTopology Reference to hardwareTopology object.
*/
-static void
-hardwareTopologyDoubleCheckDetection(const gmx::MDLogger gmx_unused &mdlog,
- const gmx::HardwareTopology gmx_unused &hardwareTopology)
+static void hardwareTopologyDoubleCheckDetection(const gmx::MDLogger gmx_unused& mdlog,
+ const gmx::HardwareTopology gmx_unused& hardwareTopology)
{
#if defined HAVE_SYSCONF && defined(_SC_NPROCESSORS_CONF)
if (hardwareTopology.supportLevel() < gmx::HardwareTopology::SupportLevel::LogicalProcessorCount)
*/
if (countConfigured >= 0 && countConfigured != countFromDetection)
{
- GMX_LOG(mdlog.info).
- appendTextFormatted("Note: %d CPUs configured, but only %d were detected to be online.\n", countConfigured, countFromDetection);
+ GMX_LOG(mdlog.info)
+ .appendTextFormatted(
+ "Note: %d CPUs configured, but only %d were detected to be online.\n",
+ countConfigured, countFromDetection);
- if (c_architecture == Architecture::X86 &&
- countConfigured == 2*countFromDetection)
+ if (c_architecture == Architecture::X86 && countConfigured == 2 * countFromDetection)
{
- GMX_LOG(mdlog.info).
- appendText(" X86 Hyperthreading is likely disabled; enable it for better performance.");
+ GMX_LOG(mdlog.info)
+ .appendText(
+ " X86 Hyperthreading is likely disabled; enable it for better "
+ "performance.");
}
// For PowerPC (likely Power8) it is possible to set SMT to either 2,4, or 8-way hardware threads.
// We only warn if it is completely disabled since default performance drops with SMT8.
- if (c_architecture == Architecture::PowerPC &&
- countConfigured == 8*countFromDetection)
+ if (c_architecture == Architecture::PowerPC && countConfigured == 8 * countFromDetection)
{
- GMX_LOG(mdlog.info).
- appendText(" PowerPC SMT is likely disabled; enable SMT2/SMT4 for better performance.");
+ GMX_LOG(mdlog.info)
+ .appendText(
+ " PowerPC SMT is likely disabled; enable SMT2/SMT4 for better "
+ "performance.");
}
}
#endif
}
-gmx_hw_info_t *gmx_detect_hardware(const gmx::MDLogger &mdlog,
- const PhysicalNodeCommunicator &physicalNodeComm)
+gmx_hw_info_t* gmx_detect_hardware(const gmx::MDLogger& mdlog, const PhysicalNodeCommunicator& physicalNodeComm)
{
// By construction, only one thread ever runs hardware detection,
// but we may as well prevent issues arising if that would change.
// TODO: We should also do CPU hardware detection only once on each
// physical node and broadcast it, instead of doing it on every MPI rank.
- auto hardwareInfo = std::make_unique<gmx_hw_info_t>(std::make_unique<CpuInfo>(CpuInfo::detect()),
- std::make_unique<HardwareTopology>(HardwareTopology::detect()));
+ auto hardwareInfo = std::make_unique<gmx_hw_info_t>(
+ std::make_unique<CpuInfo>(CpuInfo::detect()),
+ std::make_unique<HardwareTopology>(HardwareTopology::detect()));
// If we detected the topology on this system, double-check that it makes sense
if (hardwareInfo->hardwareTopology->isThisSystem())
return g_hardwareInfo.get();
}
-bool compatibleGpusFound(const gmx_gpu_info_t &gpu_info)
+bool compatibleGpusFound(const gmx_gpu_info_t& gpu_info)
{
return gpu_info.n_dev_compatible > 0;
}
-} // namespace gmx
+} // namespace gmx