2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 * Implements gmx::HardwareTopology.
40 * \author Erik Lindahl <erik.lindahl@gmail.com>
41 * \ingroup module_hardware
46 #include "hardwaretopology.h"
59 #include "gromacs/gmxlib/md_logging.h"
60 #include "gromacs/hardware/cpuinfo.h"
61 #include "gromacs/utility/gmxassert.h"
62 #include "gromacs/utility/gmxomp.h"
65 # include <unistd.h> // sysconf()
67 #if GMX_NATIVE_WINDOWS
68 # include <windows.h> // GetSystemInfo()
77 /*****************************************************************************
79 * Utility functions for extracting hardware topology from CpuInfo object *
81 *****************************************************************************/
83 /*! \brief Initialize machine data from basic information in cpuinfo
85 * \param machine Machine tree structure where information will be assigned
86 * if the cpuinfo object contains topology information.
87 * \param supportLevel If topology information is available in CpuInfo,
88 * this will be updated to reflect the amount of
89 * information written to the machine structure.
92 parseCpuInfo(HardwareTopology::Machine * machine,
93 HardwareTopology::SupportLevel * supportLevel)
95 CpuInfo cpuInfo(CpuInfo::detect());
97 if (!cpuInfo.logicalProcessors().empty())
103 // Copy the logical processor information from cpuinfo
104 for (auto &l : cpuInfo.logicalProcessors())
106 machine->logicalProcessors.push_back( { l.socketRankInMachine, l.coreRankInSocket, l.hwThreadRankInCore, -1 } );
107 nSockets = std::max(nSockets, l.socketRankInMachine);
108 nCores = std::max(nCores, l.coreRankInSocket);
109 nHwThreads = std::max(nHwThreads, l.hwThreadRankInCore);
112 // Fill info form sockets/cores/hwthreads
117 machine->sockets.resize(nSockets + 1);
118 for (auto &s : machine->sockets)
121 s.cores.resize(nCores + 1);
122 for (auto &c : s.cores)
125 c.numaNodeId = -1; // No numa information
126 c.hwThreads.resize(nHwThreads + 1);
127 for (auto &t : c.hwThreads)
130 t.logicalProcessorId = -1; // set as unassigned for now
135 // Fill the logical processor id in the right place
136 for (std::size_t i = 0; i < machine->logicalProcessors.size(); i++)
138 const HardwareTopology::LogicalProcessor &l = machine->logicalProcessors[i];
139 machine->sockets[l.socketRankInMachine].cores[l.coreRankInSocket].hwThreads[l.hwThreadRankInCore].logicalProcessorId = static_cast<int>(i);
141 machine->logicalProcessorCount = machine->logicalProcessors.size();
142 *supportLevel = HardwareTopology::SupportLevel::Basic;
146 *supportLevel = HardwareTopology::SupportLevel::None;
152 #if HWLOC_API_VERSION < 0x00010b00
153 # define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET
154 # define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
157 /*****************************************************************************
159 * Utility functions for extracting hardware topology from hwloc library *
161 *****************************************************************************/
163 /*! \brief Return vector of all descendants of a given type in hwloc topology
165 * \param obj Non-null hwloc object.
166 * \param type hwloc object type to find. The routine will only search
167 * on levels below obj.
169 * \return vector containing all the objects of given type that are
170 * descendants of the provided object. If no objects of this type
171 * were found, the vector will be empty.
173 const std::vector<hwloc_obj_t>
174 getHwLocDescendantsByType(const hwloc_obj_t obj, const hwloc_obj_type_t type)
176 GMX_RELEASE_ASSERT(obj, "NULL hwloc object provided to getHwLocDescendantsByType()");
178 std::vector<hwloc_obj_t> v;
180 // Go through children; if this object has no children obj->arity is 0,
181 // and we'll return an empty vector.
182 for (std::size_t i = 0; i < obj->arity; i++)
184 // If the child is the type we're looking for, add it directly.
185 // Otherwise call this routine recursively for each child.
186 if (obj->children[i]->type == type)
188 v.push_back(obj->children[i]);
192 std::vector<hwloc_obj_t> v2 = getHwLocDescendantsByType(obj->children[i], type);
193 v.insert(v.end(), v2.begin(), v2.end());
199 /*! \brief Read information about sockets, cores and threads from hwloc topology
201 * \param topo hwloc topology handle that has been initialized and loaded
202 * \param machine Pointer to the machine structure in the HardwareTopology
203 * class, where the tree of sockets/cores/threads will be written.
205 * \return If all the data is found the return value is 0, otherwise non-zero.
208 parseHwLocSocketsCoresThreads(const hwloc_topology_t topo,
209 HardwareTopology::Machine * machine)
211 const hwloc_obj_t root = hwloc_get_root_obj(topo);
212 std::vector<hwloc_obj_t> hwlocSockets = getHwLocDescendantsByType(root, HWLOC_OBJ_PACKAGE);
214 machine->logicalProcessorCount = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU);
215 machine->logicalProcessors.resize(machine->logicalProcessorCount);
216 machine->sockets.resize(hwlocSockets.size());
218 bool topologyOk = !hwlocSockets.empty(); // Fail if we have no sockets in machine
220 for (std::size_t i = 0; i < hwlocSockets.size() && topologyOk; i++)
222 // Assign information about this socket
223 machine->sockets[i].id = hwlocSockets[i]->logical_index;
225 // Get children (cores)
226 std::vector<hwloc_obj_t> hwlocCores = getHwLocDescendantsByType(hwlocSockets[i], HWLOC_OBJ_CORE);
227 machine->sockets[i].cores.resize(hwlocCores.size());
229 topologyOk = topologyOk && !hwlocCores.empty(); // Fail if we have no cores in socket
231 // Loop over child cores
232 for (std::size_t j = 0; j < hwlocCores.size() && topologyOk; j++)
234 // Assign information about this core
235 machine->sockets[i].cores[j].id = hwlocCores[j]->logical_index;
236 machine->sockets[i].cores[j].numaNodeId = -1;
238 // Get children (hwthreads)
239 std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(hwlocCores[j], HWLOC_OBJ_PU);
240 machine->sockets[i].cores[j].hwThreads.resize(hwlocPUs.size());
242 topologyOk = topologyOk && !hwlocPUs.empty(); // Fail if we have no hwthreads in core
244 // Loop over child hwthreads
245 for (std::size_t k = 0; k < hwlocPUs.size() && topologyOk; k++)
247 // Assign information about this hwthread
248 std::size_t logicalProcessorId = hwlocPUs[k]->os_index;
249 machine->sockets[i].cores[j].hwThreads[k].id = hwlocPUs[k]->logical_index;
250 machine->sockets[i].cores[j].hwThreads[k].logicalProcessorId = logicalProcessorId;
252 if (logicalProcessorId < machine->logicalProcessors.size())
254 // Cross-assign data for this hwthread to the logicalprocess vector
255 machine->logicalProcessors[logicalProcessorId].socketRankInMachine = static_cast<int>(i);
256 machine->logicalProcessors[logicalProcessorId].coreRankInSocket = static_cast<int>(j);
257 machine->logicalProcessors[logicalProcessorId].hwThreadRankInCore = static_cast<int>(k);
258 machine->logicalProcessors[logicalProcessorId].numaNodeId = -1;
274 machine->logicalProcessors.clear();
275 machine->sockets.clear();
280 /*! \brief Read cache information from hwloc topology
282 * \param topo hwloc topology handle that has been initialized and loaded
283 * \param machine Pointer to the machine structure in the HardwareTopology
284 * class, where cache data will be filled.
286 * \return If any cache data is found the return value is 0, otherwise non-zero.
289 parseHwLocCache(const hwloc_topology_t topo,
290 HardwareTopology::Machine * machine)
292 // Parse caches up to L5
293 for (int cachelevel : { 1, 2, 3, 4, 5})
295 int depth = hwloc_get_cache_type_depth(topo, cachelevel, HWLOC_OBJ_CACHE_DATA);
299 hwloc_obj_t cache = hwloc_get_next_obj_by_depth(topo, depth, NULL);
302 std::vector<hwloc_obj_t> hwThreads = getHwLocDescendantsByType(cache, HWLOC_OBJ_PU);
304 machine->caches.push_back( {
305 static_cast<int>(cache->attr->cache.depth),
306 static_cast<std::size_t>(cache->attr->cache.size),
307 static_cast<int>(cache->attr->cache.linesize),
308 static_cast<int>(cache->attr->cache.associativity),
309 std::max(static_cast<int>(hwThreads.size()), 1)
314 return machine->caches.empty();
318 /*! \brief Read numa information from hwloc topology
320 * \param topo hwloc topology handle that has been initialized and loaded
321 * \param machine Pointer to the machine structure in the HardwareTopology
322 * class, where numa information will be filled.
324 * Hwloc should virtually always be able to detect numa information, but if
325 * there is only a single numa node in the system it is not reported at all.
326 * In this case we create a single numa node covering all cores.
328 * This function uses the basic socket/core/thread information detected by
329 * parseHwLocSocketsCoresThreads(), which means that routine must have
330 * completed successfully before calling this one. If this is not the case,
331 * you will get an error return code.
333 * \return If the data found makes sense (either in the numa node or the
334 * entire machine) the return value is 0, otherwise non-zero.
337 parseHwLocNuma(const hwloc_topology_t topo,
338 HardwareTopology::Machine * machine)
340 const hwloc_obj_t root = hwloc_get_root_obj(topo);
341 std::vector<hwloc_obj_t> hwlocNumaNodes = getHwLocDescendantsByType(root, HWLOC_OBJ_NUMANODE);
342 bool topologyOk = true;
344 if (!hwlocNumaNodes.empty())
346 machine->numa.nodes.resize(hwlocNumaNodes.size());
348 for (std::size_t i = 0; i < hwlocNumaNodes.size(); i++)
350 machine->numa.nodes[i].id = hwlocNumaNodes[i]->logical_index;
351 machine->numa.nodes[i].memory = hwlocNumaNodes[i]->memory.total_memory;
352 machine->numa.nodes[i].logicalProcessorId.clear();
354 // Get list of PUs in this numa node
355 std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(hwlocNumaNodes[i], HWLOC_OBJ_PU);
357 for (auto &p : hwlocPUs)
359 machine->numa.nodes[i].logicalProcessorId.push_back(p->os_index);
361 GMX_RELEASE_ASSERT(p->os_index < machine->logicalProcessors.size(), "OS index of PU in hwloc larger than processor count");
363 machine->logicalProcessors[p->os_index].numaNodeId = static_cast<int>(i);
364 std::size_t s = machine->logicalProcessors[p->os_index].socketRankInMachine;
365 std::size_t c = machine->logicalProcessors[p->os_index].coreRankInSocket;
367 GMX_RELEASE_ASSERT(s < machine->sockets.size(), "Socket index in logicalProcessors larger than socket count");
368 GMX_RELEASE_ASSERT(c < machine->sockets[s].cores.size(), "Core index in logicalProcessors larger than core count");
369 // Set numaNodeId in core too
370 machine->sockets[s].cores[c].numaNodeId = i;
374 int depth = hwloc_get_type_depth(topo, HWLOC_OBJ_NUMANODE);
375 const struct hwloc_distances_s * dist = hwloc_get_whole_distance_matrix_by_depth(topo, depth);
376 if (dist != NULL && dist->nbobjs == hwlocNumaNodes.size())
378 machine->numa.baseLatency = dist->latency_base;
379 machine->numa.maxRelativeLatency = dist->latency_max;
380 machine->numa.relativeLatency.resize(dist->nbobjs);
381 for (std::size_t i = 0; i < dist->nbobjs; i++)
383 machine->numa.relativeLatency[i].resize(dist->nbobjs);
384 for (std::size_t j = 0; j < dist->nbobjs; j++)
386 machine->numa.relativeLatency[i][j] = dist->latency[i*dist->nbobjs+j];
397 // No numa nodes found. Use the entire machine as a numa node.
398 const hwloc_obj_t hwlocMachine = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_MACHINE, NULL);
400 if (hwlocMachine != NULL)
402 machine->numa.nodes.resize(1);
403 machine->numa.nodes[0].id = 0;
404 machine->numa.nodes[0].memory = hwlocMachine->memory.total_memory;
405 machine->numa.baseLatency = 10;
406 machine->numa.maxRelativeLatency = 1;
407 machine->numa.relativeLatency = { { 1.0 } };
409 for (int i = 0; i < machine->logicalProcessorCount; i++)
411 machine->numa.nodes[0].logicalProcessorId.push_back(i);
413 for (auto &l : machine->logicalProcessors)
417 for (auto &s : machine->sockets)
419 for (auto &c : s.cores)
437 machine->numa.nodes.clear();
443 /*! \brief Read PCI device information from hwloc topology
445 * \param topo hwloc topology handle that has been initialized and loaded
446 * \param machine Pointer to the machine structure in the HardwareTopology
447 * class, where PCI device information will be filled.
449 * \return If any devices were found the return value is 0, otherwise non-zero.
452 parseHwLocDevices(const hwloc_topology_t topo,
453 HardwareTopology::Machine * machine)
455 const hwloc_obj_t root = hwloc_get_root_obj(topo);
456 std::vector<hwloc_obj_t> pcidevs = getHwLocDescendantsByType(root, HWLOC_OBJ_PCI_DEVICE);
458 for (auto &p : pcidevs)
460 const hwloc_obj_t ancestor = hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_NUMANODE, p);
462 if (ancestor != NULL)
464 numaId = ancestor->logical_index;
468 // If we only have a single numa node we belong to it, otherwise set it to -1 (unknown)
469 numaId = (machine->numa.nodes.size() == 1) ? 0 : -1;
472 GMX_RELEASE_ASSERT(p->attr, "Attributes should not be NULL for hwloc PCI object");
474 machine->devices.push_back( {
475 p->attr->pcidev.vendor_id,
476 p->attr->pcidev.device_id,
477 p->attr->pcidev.class_id,
478 p->attr->pcidev.domain,
481 p->attr->pcidev.func,
485 return pcidevs.empty();
489 parseHwLoc(HardwareTopology::Machine * machine,
490 HardwareTopology::SupportLevel * supportLevel,
493 hwloc_topology_t topo;
495 // Initialize a hwloc object, set flags to request IO device information too,
496 // try to load the topology, and get the root object. If either step fails,
497 // return that we do not have any support at all from hwloc.
498 if (hwloc_topology_init(&topo) != 0)
500 hwloc_topology_destroy(topo);
501 return; // SupportLevel::None.
504 hwloc_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IO_DEVICES);
506 if (hwloc_topology_load(topo) != 0 || hwloc_get_root_obj(topo) == NULL)
508 hwloc_topology_destroy(topo);
509 return; // SupportLevel::None.
512 // If we get here, we can get a valid root object for the topology
513 *isThisSystem = hwloc_topology_is_thissystem(topo);
515 // Parse basic information about sockets, cores, and hardware threads
516 if (parseHwLocSocketsCoresThreads(topo, machine) == 0)
518 *supportLevel = HardwareTopology::SupportLevel::Basic;
522 hwloc_topology_destroy(topo);
523 return; // SupportLevel::None.
526 // Get information about cache and numa nodes
527 if (parseHwLocCache(topo, machine) == 0 && parseHwLocNuma(topo, machine) == 0)
529 *supportLevel = HardwareTopology::SupportLevel::Full;
533 hwloc_topology_destroy(topo);
534 return; // SupportLevel::Basic.
538 if (parseHwLocDevices(topo, machine) == 0)
540 *supportLevel = HardwareTopology::SupportLevel::FullWithDevices;
543 hwloc_topology_destroy(topo);
544 return; // SupportLevel::Full or SupportLevel::FullWithDevices.
549 /*! \brief Try to detect the number of logical processors.
551 * \return The number of hardware processing units, or 0 if it fails.
554 detectLogicalProcessorCount(FILE *fplog, const t_commrec *cr)
559 #if GMX_NATIVE_WINDOWS
562 GetSystemInfo( &sysinfo );
563 count = sysinfo.dwNumberOfProcessors;
564 #elif defined HAVE_SYSCONF
565 // We are probably on Unix. Check if we have the argument to use before executing any calls
566 # if defined(_SC_NPROCESSORS_CONF)
567 count = sysconf(_SC_NPROCESSORS_CONF);
568 # if defined(_SC_NPROCESSORS_ONLN)
569 /* On e.g. Arm, the Linux kernel can use advanced power saving features where
570 * processors are brought online/offline dynamically. This will cause
571 * _SC_NPROCESSORS_ONLN to report 1 at the beginning of the run. For this
572 * reason we now warn if this mismatches with the detected core count. */
573 int countOnline = sysconf(_SC_NPROCESSORS_ONLN);
574 if (count != countOnline)
576 md_print_warn(cr, fplog,
577 "%d CPUs configured, but only %d of them are online.\n"
578 "This can happen on embedded platforms (e.g. ARM) where the OS shuts some cores\n"
579 "off to save power, and will turn them back on later when the load increases.\n"
580 "However, this will likely mean GROMACS cannot pin threads to those cores. You\n"
581 "will likely see much better performance by forcing all cores to be online, and\n"
582 "making sure they run at their full clock frequency.", count, countOnline);
585 # elif defined(_SC_NPROC_CONF)
586 count = sysconf(_SC_NPROC_CONF);
587 # elif defined(_SC_NPROCESSORS_ONLN)
588 count = sysconf(_SC_NPROCESSORS_ONLN);
589 # elif defined(_SC_NPROC_ONLN)
590 count = sysconf(_SC_NPROC_ONLN);
592 # warning "No valid sysconf argument value found. Executables will not be able to determine the number of logical cores: mdrun will use 1 thread by default!"
593 # endif // End of check for sysconf argument values
596 count = 0; // Neither windows nor Unix.
600 int countFromOpenmp = gmx_omp_get_num_procs();
601 if (count != countFromOpenmp)
603 md_print_warn(cr, fplog,
604 "Number of logical cores detected (%d) does not match the number reported by OpenMP (%d).\n"
605 "Consider setting the launch configuration manually!",
606 count, countFromOpenmp);
610 GMX_UNUSED_VALUE(cr);
611 GMX_UNUSED_VALUE(fplog);
615 } // namespace anonymous
618 HardwareTopology HardwareTopology::detect(FILE *fplog, const t_commrec *cr)
620 HardwareTopology result;
622 // Default values for machine and numa stuff
623 result.machine_.logicalProcessorCount = 0;
624 result.machine_.numa.baseLatency = 0.0;
625 result.machine_.numa.maxRelativeLatency = 0.0;
626 result.supportLevel_ = SupportLevel::None;
627 result.isThisSystem_ = true;
630 parseHwLoc(&result.machine_, &result.supportLevel_, &result.isThisSystem_);
633 // If something went wrong in hwloc (or if it was not present) we might
634 // have more information in cpuInfo
635 if (result.supportLevel_ < SupportLevel::Basic)
637 // There might be topology information in cpuInfo
638 parseCpuInfo(&result.machine_, &result.supportLevel_);
640 // If we did not manage to get anything from either hwloc or cpuInfo, find the cpu count at least
641 if (result.supportLevel_ == SupportLevel::None)
643 // No topology information; try to detect the number of logical processors at least
644 result.machine_.logicalProcessorCount = detectLogicalProcessorCount(fplog, cr);
645 if (result.machine_.logicalProcessorCount > 0)
647 result.supportLevel_ = SupportLevel::LogicalProcessorCount;
654 HardwareTopology::HardwareTopology()
655 : supportLevel_(SupportLevel::None)
659 int HardwareTopology::numberOfCores() const
661 if (supportLevel() >= SupportLevel::Basic)
663 // We assume all sockets have the same number of cores as socket 0.
664 // Since topology information is present, we can assume there is at least one socket.
665 return machine().sockets.size() * machine().sockets[0].cores.size();
667 else if (supportLevel() >= SupportLevel::LogicalProcessorCount)
669 return machine().logicalProcessorCount;