2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 * Implements gmx::HardwareTopology.
40 * \author Erik Lindahl <erik.lindahl@gmail.com>
41 * \ingroup module_hardware
46 #include "hardwaretopology.h"
59 #include "gromacs/gmxlib/md_logging.h"
60 #include "gromacs/hardware/cpuinfo.h"
61 #include "gromacs/utility/gmxassert.h"
62 #include "gromacs/utility/gmxomp.h"
65 # include <unistd.h> // sysconf()
67 #if GMX_NATIVE_WINDOWS
68 # include <windows.h> // GetSystemInfo()
71 #if defined(_M_ARM) || defined(__arm__) || defined(__ARM_ARCH) || defined (__aarch64__)
72 //! Constant used to help minimize preprocessed code
73 static const bool isArm = true;
75 //! Constant used to help minimize preprocessed code
76 static const bool isArm = false;
85 /*****************************************************************************
87 * Utility functions for extracting hardware topology from CpuInfo object *
89 *****************************************************************************/
91 /*! \brief Initialize machine data from basic information in cpuinfo
93 * \param machine Machine tree structure where information will be assigned
94 * if the cpuinfo object contains topology information.
95 * \param supportLevel If topology information is available in CpuInfo,
96 * this will be updated to reflect the amount of
97 * information written to the machine structure.
100 parseCpuInfo(HardwareTopology::Machine * machine,
101 HardwareTopology::SupportLevel * supportLevel)
103 CpuInfo cpuInfo(CpuInfo::detect());
105 if (!cpuInfo.logicalProcessors().empty())
111 // Copy the logical processor information from cpuinfo
112 for (auto &l : cpuInfo.logicalProcessors())
114 machine->logicalProcessors.push_back( { l.socketRankInMachine, l.coreRankInSocket, l.hwThreadRankInCore, -1 } );
115 nSockets = std::max(nSockets, l.socketRankInMachine);
116 nCores = std::max(nCores, l.coreRankInSocket);
117 nHwThreads = std::max(nHwThreads, l.hwThreadRankInCore);
120 // Fill info form sockets/cores/hwthreads
125 machine->sockets.resize(nSockets + 1);
126 for (auto &s : machine->sockets)
129 s.cores.resize(nCores + 1);
130 for (auto &c : s.cores)
133 c.numaNodeId = -1; // No numa information
134 c.hwThreads.resize(nHwThreads + 1);
135 for (auto &t : c.hwThreads)
138 t.logicalProcessorId = -1; // set as unassigned for now
143 // Fill the logical processor id in the right place
144 for (std::size_t i = 0; i < machine->logicalProcessors.size(); i++)
146 const HardwareTopology::LogicalProcessor &l = machine->logicalProcessors[i];
147 machine->sockets[l.socketRankInMachine].cores[l.coreRankInSocket].hwThreads[l.hwThreadRankInCore].logicalProcessorId = static_cast<int>(i);
149 machine->logicalProcessorCount = machine->logicalProcessors.size();
150 *supportLevel = HardwareTopology::SupportLevel::Basic;
154 *supportLevel = HardwareTopology::SupportLevel::None;
160 #if HWLOC_API_VERSION < 0x00010b00
161 # define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET
162 # define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
165 /*****************************************************************************
167 * Utility functions for extracting hardware topology from hwloc library *
169 *****************************************************************************/
171 /*! \brief Return vector of all descendants of a given type in hwloc topology
173 * \param obj Non-null hwloc object.
174 * \param type hwloc object type to find. The routine will only search
175 * on levels below obj.
177 * \return vector containing all the objects of given type that are
178 * descendants of the provided object. If no objects of this type
179 * were found, the vector will be empty.
181 const std::vector<hwloc_obj_t>
182 getHwLocDescendantsByType(const hwloc_obj_t obj, const hwloc_obj_type_t type)
184 GMX_RELEASE_ASSERT(obj, "NULL hwloc object provided to getHwLocDescendantsByType()");
186 std::vector<hwloc_obj_t> v;
188 // Go through children; if this object has no children obj->arity is 0,
189 // and we'll return an empty vector.
190 for (std::size_t i = 0; i < obj->arity; i++)
192 // If the child is the type we're looking for, add it directly.
193 // Otherwise call this routine recursively for each child.
194 if (obj->children[i]->type == type)
196 v.push_back(obj->children[i]);
200 std::vector<hwloc_obj_t> v2 = getHwLocDescendantsByType(obj->children[i], type);
201 v.insert(v.end(), v2.begin(), v2.end());
207 /*! \brief Read information about sockets, cores and threads from hwloc topology
209 * \param topo hwloc topology handle that has been initialized and loaded
210 * \param machine Pointer to the machine structure in the HardwareTopology
211 * class, where the tree of sockets/cores/threads will be written.
213 * \return If all the data is found the return value is 0, otherwise non-zero.
216 parseHwLocSocketsCoresThreads(const hwloc_topology_t topo,
217 HardwareTopology::Machine * machine)
219 const hwloc_obj_t root = hwloc_get_root_obj(topo);
220 std::vector<hwloc_obj_t> hwlocSockets = getHwLocDescendantsByType(root, HWLOC_OBJ_PACKAGE);
222 machine->logicalProcessorCount = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU);
223 machine->logicalProcessors.resize(machine->logicalProcessorCount);
224 machine->sockets.resize(hwlocSockets.size());
226 bool topologyOk = !hwlocSockets.empty(); // Fail if we have no sockets in machine
228 for (std::size_t i = 0; i < hwlocSockets.size() && topologyOk; i++)
230 // Assign information about this socket
231 machine->sockets[i].id = hwlocSockets[i]->logical_index;
233 // Get children (cores)
234 std::vector<hwloc_obj_t> hwlocCores = getHwLocDescendantsByType(hwlocSockets[i], HWLOC_OBJ_CORE);
235 machine->sockets[i].cores.resize(hwlocCores.size());
237 topologyOk = topologyOk && !hwlocCores.empty(); // Fail if we have no cores in socket
239 // Loop over child cores
240 for (std::size_t j = 0; j < hwlocCores.size() && topologyOk; j++)
242 // Assign information about this core
243 machine->sockets[i].cores[j].id = hwlocCores[j]->logical_index;
244 machine->sockets[i].cores[j].numaNodeId = -1;
246 // Get children (hwthreads)
247 std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(hwlocCores[j], HWLOC_OBJ_PU);
248 machine->sockets[i].cores[j].hwThreads.resize(hwlocPUs.size());
250 topologyOk = topologyOk && !hwlocPUs.empty(); // Fail if we have no hwthreads in core
252 // Loop over child hwthreads
253 for (std::size_t k = 0; k < hwlocPUs.size() && topologyOk; k++)
255 // Assign information about this hwthread
256 std::size_t logicalProcessorId = hwlocPUs[k]->os_index;
257 machine->sockets[i].cores[j].hwThreads[k].id = hwlocPUs[k]->logical_index;
258 machine->sockets[i].cores[j].hwThreads[k].logicalProcessorId = logicalProcessorId;
260 if (logicalProcessorId < machine->logicalProcessors.size())
262 // Cross-assign data for this hwthread to the logicalprocess vector
263 machine->logicalProcessors[logicalProcessorId].socketRankInMachine = static_cast<int>(i);
264 machine->logicalProcessors[logicalProcessorId].coreRankInSocket = static_cast<int>(j);
265 machine->logicalProcessors[logicalProcessorId].hwThreadRankInCore = static_cast<int>(k);
266 machine->logicalProcessors[logicalProcessorId].numaNodeId = -1;
282 machine->logicalProcessors.clear();
283 machine->sockets.clear();
288 /*! \brief Read cache information from hwloc topology
290 * \param topo hwloc topology handle that has been initialized and loaded
291 * \param machine Pointer to the machine structure in the HardwareTopology
292 * class, where cache data will be filled.
294 * \return If any cache data is found the return value is 0, otherwise non-zero.
297 parseHwLocCache(const hwloc_topology_t topo,
298 HardwareTopology::Machine * machine)
300 // Parse caches up to L5
301 for (int cachelevel : { 1, 2, 3, 4, 5})
303 int depth = hwloc_get_cache_type_depth(topo, cachelevel, HWLOC_OBJ_CACHE_DATA);
307 hwloc_obj_t cache = hwloc_get_next_obj_by_depth(topo, depth, NULL);
310 std::vector<hwloc_obj_t> hwThreads = getHwLocDescendantsByType(cache, HWLOC_OBJ_PU);
312 machine->caches.push_back( {
313 static_cast<int>(cache->attr->cache.depth),
314 static_cast<std::size_t>(cache->attr->cache.size),
315 static_cast<int>(cache->attr->cache.linesize),
316 static_cast<int>(cache->attr->cache.associativity),
317 std::max(static_cast<int>(hwThreads.size()), 1)
322 return machine->caches.empty();
326 /*! \brief Read numa information from hwloc topology
328 * \param topo hwloc topology handle that has been initialized and loaded
329 * \param machine Pointer to the machine structure in the HardwareTopology
330 * class, where numa information will be filled.
332 * Hwloc should virtually always be able to detect numa information, but if
333 * there is only a single numa node in the system it is not reported at all.
334 * In this case we create a single numa node covering all cores.
336 * This function uses the basic socket/core/thread information detected by
337 * parseHwLocSocketsCoresThreads(), which means that routine must have
338 * completed successfully before calling this one. If this is not the case,
339 * you will get an error return code.
341 * \return If the data found makes sense (either in the numa node or the
342 * entire machine) the return value is 0, otherwise non-zero.
345 parseHwLocNuma(const hwloc_topology_t topo,
346 HardwareTopology::Machine * machine)
348 const hwloc_obj_t root = hwloc_get_root_obj(topo);
349 std::vector<hwloc_obj_t> hwlocNumaNodes = getHwLocDescendantsByType(root, HWLOC_OBJ_NUMANODE);
350 bool topologyOk = true;
352 if (!hwlocNumaNodes.empty())
354 machine->numa.nodes.resize(hwlocNumaNodes.size());
356 for (std::size_t i = 0; i < hwlocNumaNodes.size(); i++)
358 machine->numa.nodes[i].id = hwlocNumaNodes[i]->logical_index;
359 machine->numa.nodes[i].memory = hwlocNumaNodes[i]->memory.total_memory;
360 machine->numa.nodes[i].logicalProcessorId.clear();
362 // Get list of PUs in this numa node
363 std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(hwlocNumaNodes[i], HWLOC_OBJ_PU);
365 for (auto &p : hwlocPUs)
367 machine->numa.nodes[i].logicalProcessorId.push_back(p->os_index);
369 GMX_RELEASE_ASSERT(p->os_index < machine->logicalProcessors.size(), "OS index of PU in hwloc larger than processor count");
371 machine->logicalProcessors[p->os_index].numaNodeId = static_cast<int>(i);
372 std::size_t s = machine->logicalProcessors[p->os_index].socketRankInMachine;
373 std::size_t c = machine->logicalProcessors[p->os_index].coreRankInSocket;
375 GMX_RELEASE_ASSERT(s < machine->sockets.size(), "Socket index in logicalProcessors larger than socket count");
376 GMX_RELEASE_ASSERT(c < machine->sockets[s].cores.size(), "Core index in logicalProcessors larger than core count");
377 // Set numaNodeId in core too
378 machine->sockets[s].cores[c].numaNodeId = i;
382 int depth = hwloc_get_type_depth(topo, HWLOC_OBJ_NUMANODE);
383 const struct hwloc_distances_s * dist = hwloc_get_whole_distance_matrix_by_depth(topo, depth);
384 if (dist != NULL && dist->nbobjs == hwlocNumaNodes.size())
386 machine->numa.baseLatency = dist->latency_base;
387 machine->numa.maxRelativeLatency = dist->latency_max;
388 machine->numa.relativeLatency.resize(dist->nbobjs);
389 for (std::size_t i = 0; i < dist->nbobjs; i++)
391 machine->numa.relativeLatency[i].resize(dist->nbobjs);
392 for (std::size_t j = 0; j < dist->nbobjs; j++)
394 machine->numa.relativeLatency[i][j] = dist->latency[i*dist->nbobjs+j];
405 // No numa nodes found. Use the entire machine as a numa node.
406 const hwloc_obj_t hwlocMachine = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_MACHINE, NULL);
408 if (hwlocMachine != NULL)
410 machine->numa.nodes.resize(1);
411 machine->numa.nodes[0].id = 0;
412 machine->numa.nodes[0].memory = hwlocMachine->memory.total_memory;
413 machine->numa.baseLatency = 10;
414 machine->numa.maxRelativeLatency = 1;
415 machine->numa.relativeLatency = { { 1.0 } };
417 for (int i = 0; i < machine->logicalProcessorCount; i++)
419 machine->numa.nodes[0].logicalProcessorId.push_back(i);
421 for (auto &l : machine->logicalProcessors)
425 for (auto &s : machine->sockets)
427 for (auto &c : s.cores)
445 machine->numa.nodes.clear();
451 /*! \brief Read PCI device information from hwloc topology
453 * \param topo hwloc topology handle that has been initialized and loaded
454 * \param machine Pointer to the machine structure in the HardwareTopology
455 * class, where PCI device information will be filled.
457 * \return If any devices were found the return value is 0, otherwise non-zero.
460 parseHwLocDevices(const hwloc_topology_t topo,
461 HardwareTopology::Machine * machine)
463 const hwloc_obj_t root = hwloc_get_root_obj(topo);
464 std::vector<hwloc_obj_t> pcidevs = getHwLocDescendantsByType(root, HWLOC_OBJ_PCI_DEVICE);
466 for (auto &p : pcidevs)
468 const hwloc_obj_t ancestor = hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_NUMANODE, p);
470 if (ancestor != NULL)
472 numaId = ancestor->logical_index;
476 // If we only have a single numa node we belong to it, otherwise set it to -1 (unknown)
477 numaId = (machine->numa.nodes.size() == 1) ? 0 : -1;
480 GMX_RELEASE_ASSERT(p->attr, "Attributes should not be NULL for hwloc PCI object");
482 machine->devices.push_back( {
483 p->attr->pcidev.vendor_id,
484 p->attr->pcidev.device_id,
485 p->attr->pcidev.class_id,
486 p->attr->pcidev.domain,
489 p->attr->pcidev.func,
493 return pcidevs.empty();
497 parseHwLoc(HardwareTopology::Machine * machine,
498 HardwareTopology::SupportLevel * supportLevel,
501 hwloc_topology_t topo;
503 // Initialize a hwloc object, set flags to request IO device information too,
504 // try to load the topology, and get the root object. If either step fails,
505 // return that we do not have any support at all from hwloc.
506 if (hwloc_topology_init(&topo) != 0)
508 hwloc_topology_destroy(topo);
509 return; // SupportLevel::None.
512 hwloc_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IO_DEVICES);
514 if (hwloc_topology_load(topo) != 0 || hwloc_get_root_obj(topo) == NULL)
516 hwloc_topology_destroy(topo);
517 return; // SupportLevel::None.
520 // If we get here, we can get a valid root object for the topology
521 *isThisSystem = hwloc_topology_is_thissystem(topo);
523 // Parse basic information about sockets, cores, and hardware threads
524 if (parseHwLocSocketsCoresThreads(topo, machine) == 0)
526 *supportLevel = HardwareTopology::SupportLevel::Basic;
530 hwloc_topology_destroy(topo);
531 return; // SupportLevel::None.
534 // Get information about cache and numa nodes
535 if (parseHwLocCache(topo, machine) == 0 && parseHwLocNuma(topo, machine) == 0)
537 *supportLevel = HardwareTopology::SupportLevel::Full;
541 hwloc_topology_destroy(topo);
542 return; // SupportLevel::Basic.
546 if (parseHwLocDevices(topo, machine) == 0)
548 *supportLevel = HardwareTopology::SupportLevel::FullWithDevices;
551 hwloc_topology_destroy(topo);
552 return; // SupportLevel::Full or SupportLevel::FullWithDevices.
557 /*! \brief Try to detect the number of logical processors.
559 * \return The number of hardware processing units, or 0 if it fails.
562 detectLogicalProcessorCount(FILE *fplog, const t_commrec *cr)
567 #if GMX_NATIVE_WINDOWS
570 GetSystemInfo( &sysinfo );
571 count = sysinfo.dwNumberOfProcessors;
572 #elif defined HAVE_SYSCONF
573 // We are probably on Unix. Check if we have the argument to use before executing any calls
574 # if defined(_SC_NPROCESSORS_CONF)
575 count = sysconf(_SC_NPROCESSORS_CONF);
576 # if defined(_SC_NPROCESSORS_ONLN)
577 /* On e.g. Arm, the Linux kernel can use advanced power saving features where
578 * processors are brought online/offline dynamically. This will cause
579 * _SC_NPROCESSORS_ONLN to report 1 at the beginning of the run. For this
580 * reason we now warn if this mismatches with the detected core count. */
581 int countOnline = sysconf(_SC_NPROCESSORS_ONLN);
582 if (count != countOnline)
584 /* We assume that this scenario means that the kernel has
585 disabled threads or cores, and that the only safe course is
586 to assume that _SC_NPROCESSORS_ONLN should be used. Even
587 this may not be valid if running in a containerized
588 environment, such system calls may read from
589 /sys/devices/system/cpu and report what the OS sees, rather
590 than what the container cgroup is supposed to set up as
591 limits. But we're not sure right now whether there's any
592 (standard-ish) way to handle that.
594 On ARM, the kernel may have powered down the cores,
595 which we'll warn the user about now. On x86, this
596 means HT is disabled by the kernel, not in the
597 BIOS. We're not sure what it means on other
598 architectures, or even if it is possible, because
599 sysconf is rather non-standardized. */
602 md_print_warn(cr, fplog,
603 "%d CPUs configured, but only %d of them are online.\n"
604 "This can happen on embedded platforms (e.g. ARM) where the OS shuts some cores\n"
605 "off to save power, and will turn them back on later when the load increases.\n"
606 "However, this will likely mean GROMACS cannot pin threads to those cores. You\n"
607 "will likely see much better performance by forcing all cores to be online, and\n"
608 "making sure they run at their full clock frequency.", count, countOnline);
612 md_print_warn(cr, fplog,
613 "Note: %d CPUs configured, but only %d of them are online, so GROMACS will use the latter.",
615 // We use the online count to avoid (potential) oversubscription.
620 # elif defined(_SC_NPROC_CONF)
621 count = sysconf(_SC_NPROC_CONF);
622 # elif defined(_SC_NPROCESSORS_ONLN)
623 count = sysconf(_SC_NPROCESSORS_ONLN);
624 # elif defined(_SC_NPROC_ONLN)
625 count = sysconf(_SC_NPROC_ONLN);
627 # warning "No valid sysconf argument value found. Executables will not be able to determine the number of logical cores: mdrun will use 1 thread by default!"
628 # endif // End of check for sysconf argument values
631 count = 0; // Neither windows nor Unix.
635 int countFromOpenmp = gmx_omp_get_num_procs();
636 if (count != countFromOpenmp)
638 md_print_warn(cr, fplog,
639 "Number of logical cores detected (%d) does not match the number reported by OpenMP (%d).\n"
640 "Consider setting the launch configuration manually!",
641 count, countFromOpenmp);
645 GMX_UNUSED_VALUE(cr);
646 GMX_UNUSED_VALUE(fplog);
650 } // namespace anonymous
653 HardwareTopology HardwareTopology::detect(FILE *fplog, const t_commrec *cr)
655 HardwareTopology result;
657 // Default values for machine and numa stuff
658 result.machine_.logicalProcessorCount = 0;
659 result.machine_.numa.baseLatency = 0.0;
660 result.machine_.numa.maxRelativeLatency = 0.0;
661 result.supportLevel_ = SupportLevel::None;
662 result.isThisSystem_ = true;
665 parseHwLoc(&result.machine_, &result.supportLevel_, &result.isThisSystem_);
668 // If something went wrong in hwloc (or if it was not present) we might
669 // have more information in cpuInfo
670 if (result.supportLevel_ < SupportLevel::Basic)
672 // There might be topology information in cpuInfo
673 parseCpuInfo(&result.machine_, &result.supportLevel_);
675 // If we did not manage to get anything from either hwloc or cpuInfo, find the cpu count at least
676 if (result.supportLevel_ == SupportLevel::None)
678 // No topology information; try to detect the number of logical processors at least
679 result.machine_.logicalProcessorCount = detectLogicalProcessorCount(fplog, cr);
680 if (result.machine_.logicalProcessorCount > 0)
682 result.supportLevel_ = SupportLevel::LogicalProcessorCount;
689 HardwareTopology::HardwareTopology()
690 : supportLevel_(SupportLevel::None)
694 int HardwareTopology::numberOfCores() const
696 if (supportLevel() >= SupportLevel::Basic)
698 // We assume all sockets have the same number of cores as socket 0.
699 // Since topology information is present, we can assume there is at least one socket.
700 return machine().sockets.size() * machine().sockets[0].cores.size();
702 else if (supportLevel() >= SupportLevel::LogicalProcessorCount)
704 return machine().logicalProcessorCount;