From 6f58aa984a84c651419f5de9b46b81ade136619c Mon Sep 17 00:00:00 2001 From: Kevin Boyd Date: Sat, 21 Jul 2018 14:56:38 -0400 Subject: [PATCH] Support hwloc 2.x.x Created compatibility layer to account for API changes moving from hwloc 1.x.x to 2.x.x while retaining support for v1.x.x. Changes supporting hwloc 2.x.x include: -reworked descendents lookup in topology tree to account for new division of object children into "normal", "memory", and "io" types -different memory access location for hwloc objects -accessing distances (latencies) between nodes has been reworked -different flags for accessing PCI devices -changed numa node ancestor search to account for numa nodes no longer being a normal part of topology tree Fixes #2539 Change-Id: I483dda3dd344d8f7c99aa828bcc118a3d2de9dfd --- src/gromacs/hardware/hardwaretopology.cpp | 173 ++++++++++++++++++---- 1 file changed, 143 insertions(+), 30 deletions(-) diff --git a/src/gromacs/hardware/hardwaretopology.cpp b/src/gromacs/hardware/hardwaretopology.cpp index 2051990888..014ae654ce 100644 --- a/src/gromacs/hardware/hardwaretopology.cpp +++ b/src/gromacs/hardware/hardwaretopology.cpp @@ -50,6 +50,9 @@ #include #include +#include +#include +#include #include #if GMX_USE_HWLOC @@ -157,14 +160,33 @@ parseCpuInfo(HardwareTopology::Machine * machine, # define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE #endif +// Preprocessor variable for if hwloc api is version 1.x.x or 2.x.x +#if HWLOC_API_VERSION >= 0x00020000 +# define GMX_HWLOC_API_VERSION_IS_2XX 1 +#else +# define GMX_HWLOC_API_VERSION_IS_2XX 0 +#endif + /***************************************************************************** * * * Utility functions for extracting hardware topology from hwloc library * * * *****************************************************************************/ +// Compatibility function for accessing hwloc_obj_t object memory with different API versions of hwloc +std::size_t +getHwLocObjectMemory(const hwloc_obj_t obj) +{ +#if GMX_HWLOC_API_VERSION_IS_2XX + return obj->total_memory; +#else + return obj->memory.total_memory; +#endif +} + /*! \brief Return vector of all descendants of a given type in hwloc topology * + * \param topo hwloc topology handle that has been initialized and loaded * \param obj Non-null hwloc object. * \param type hwloc object type to find. The routine will only search * on levels below obj. @@ -174,27 +196,23 @@ parseCpuInfo(HardwareTopology::Machine * machine, * were found, the vector will be empty. */ const std::vector -getHwLocDescendantsByType(const hwloc_obj* obj, const hwloc_obj_type_t type) +getHwLocDescendantsByType(const hwloc_topology_t topo, const hwloc_obj_t obj, const hwloc_obj_type_t type) { GMX_RELEASE_ASSERT(obj, "NULL hwloc object provided to getHwLocDescendantsByType()"); std::vector v; + if (obj->type == type) + { + v.push_back(obj); + } // Go through children; if this object has no children obj->arity is 0, // and we'll return an empty vector. - for (std::size_t i = 0; i < obj->arity; i++) + hwloc_obj_t tempNode = NULL; + while ((tempNode = hwloc_get_next_child(topo, obj, tempNode)) != NULL) { - // If the child is the type we're looking for, add it directly. - // Otherwise call this routine recursively for each child. - if (obj->children[i]->type == type) - { - v.push_back(obj->children[i]); - } - else - { - std::vector v2 = getHwLocDescendantsByType(obj->children[i], type); - v.insert(v.end(), v2.begin(), v2.end()); - } + std::vector v2 = getHwLocDescendantsByType(topo, tempNode, type); + v.insert(v.end(), v2.begin(), v2.end()); } return v; } @@ -211,8 +229,8 @@ int parseHwLocSocketsCoresThreads(hwloc_topology_t topo, HardwareTopology::Machine * machine) { - const hwloc_obj *const root = hwloc_get_root_obj(topo); - std::vector hwlocSockets = getHwLocDescendantsByType(root, HWLOC_OBJ_PACKAGE); + const hwloc_obj_t root = hwloc_get_root_obj(topo); + std::vector hwlocSockets = getHwLocDescendantsByType(topo, root, HWLOC_OBJ_PACKAGE); machine->logicalProcessorCount = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU); machine->logicalProcessors.resize(machine->logicalProcessorCount); @@ -226,7 +244,7 @@ parseHwLocSocketsCoresThreads(hwloc_topology_t topo, machine->sockets[i].id = hwlocSockets[i]->logical_index; // Get children (cores) - std::vector hwlocCores = getHwLocDescendantsByType(hwlocSockets[i], HWLOC_OBJ_CORE); + std::vector hwlocCores = getHwLocDescendantsByType(topo, hwlocSockets[i], HWLOC_OBJ_CORE); machine->sockets[i].cores.resize(hwlocCores.size()); topologyOk = topologyOk && !hwlocCores.empty(); // Fail if we have no cores in socket @@ -239,7 +257,7 @@ parseHwLocSocketsCoresThreads(hwloc_topology_t topo, machine->sockets[i].cores[j].numaNodeId = -1; // Get children (hwthreads) - std::vector hwlocPUs = getHwLocDescendantsByType(hwlocCores[j], HWLOC_OBJ_PU); + std::vector hwlocPUs = getHwLocDescendantsByType(topo, hwlocCores[j], HWLOC_OBJ_PU); machine->sockets[i].cores[j].hwThreads.resize(hwlocPUs.size()); topologyOk = topologyOk && !hwlocPUs.empty(); // Fail if we have no hwthreads in core @@ -302,7 +320,7 @@ parseHwLocCache(hwloc_topology_t topo, hwloc_obj_t cache = hwloc_get_next_obj_by_depth(topo, depth, nullptr); if (cache != nullptr) { - std::vector hwThreads = getHwLocDescendantsByType(cache, HWLOC_OBJ_PU); + std::vector hwThreads = getHwLocDescendantsByType(topo, cache, HWLOC_OBJ_PU); machine->caches.push_back( { static_cast(cache->attr->cache.depth), @@ -340,9 +358,9 @@ int parseHwLocNuma(hwloc_topology_t topo, HardwareTopology::Machine * machine) { - const hwloc_obj *const root = hwloc_get_root_obj(topo); - std::vector hwlocNumaNodes = getHwLocDescendantsByType(root, HWLOC_OBJ_NUMANODE); - bool topologyOk = true; + const hwloc_obj_t root = hwloc_get_root_obj(topo); + std::vector hwlocNumaNodes = getHwLocDescendantsByType(topo, root, HWLOC_OBJ_NUMANODE); + bool topologyOk = true; if (!hwlocNumaNodes.empty()) { @@ -351,12 +369,15 @@ parseHwLocNuma(hwloc_topology_t topo, for (std::size_t i = 0; i < hwlocNumaNodes.size(); i++) { machine->numa.nodes[i].id = hwlocNumaNodes[i]->logical_index; - machine->numa.nodes[i].memory = hwlocNumaNodes[i]->memory.total_memory; + machine->numa.nodes[i].memory = getHwLocObjectMemory(hwlocNumaNodes[i]);; machine->numa.nodes[i].logicalProcessorId.clear(); - // Get list of PUs in this numa node - std::vector hwlocPUs = getHwLocDescendantsByType(hwlocNumaNodes[i], HWLOC_OBJ_PU); - + // Get list of PUs in this numa node. Get from numa node if v1.x.x, get from numa node's parent if 2.x.x +#if GMX_HWLOC_API_VERSION_IS_2XX + std::vector hwlocPUs = getHwLocDescendantsByType(topo, hwlocNumaNodes[i]->parent, HWLOC_OBJ_PU); +#else + std::vector hwlocPUs = getHwLocDescendantsByType(topo, hwlocNumaNodes[i], HWLOC_OBJ_PU); +#endif for (auto &p : hwlocPUs) { machine->numa.nodes[i].logicalProcessorId.push_back(p->os_index); @@ -373,7 +394,69 @@ parseHwLocNuma(hwloc_topology_t topo, machine->sockets[s].cores[c].numaNodeId = i; } } + // Getting the distance matrix +#if GMX_HWLOC_API_VERSION_IS_2XX + // with hwloc api v. 2.x.x, distances are no longer directly accessible. Need to retrieve and release hwloc_distances_s object + // In addition, there can now be multiple types of distances, ie latency, bandwidth. We look only for latency, but have to check + // if multiple distance matrices are returned. + + // If only 1 numa node exists, the v2.x.x hwloc api won't have a distances matrix, set manually + if (hwlocNumaNodes.size() == 1) + { + machine->numa.relativeLatency = { { 1.0 } }; + } + else + { + hwloc_distances_s** dist = new hwloc_distances_s*; + // Set the number of distance matrices to return (1 in our case, but hwloc 2.x.x allows + // for multiple distances types and therefore multiple distance matrices) + unsigned nr = 1; + hwloc_distances_get(topo, &nr, dist, HWLOC_DISTANCES_KIND_MEANS_LATENCY, 0); + // If no distances were found, nr will be 0, otherwise distances will be populated with 1 + // hwloc_distances_s object + if (nr > 0 && dist[0]->nbobjs == hwlocNumaNodes.size()) + { + + machine->numa.relativeLatency.resize(dist[0]->nbobjs); + for (std::size_t i = 0; i < dist[0]->nbobjs; i++) + { + machine->numa.relativeLatency[i].resize(dist[0]->nbobjs); + for (std::size_t j = 0; j < dist[0]->nbobjs; j++) + { + machine->numa.relativeLatency[i][j] = dist[0]->values[i*dist[0]->nbobjs+j]; + } + } + } + else + { + topologyOk = false; + } + hwloc_distances_release(topo, dist[0]); + } + + // hwloc-2.x provides latencies as integers, but to make things more similar to the case of a single + // numa node as well as hwloc-1.x, we rescale to relative floating-point values and also set the + // largest relative latency value. + + // find smallest value in matrix + float minLatency = std::numeric_limits::max(); // large number + float maxLatency = std::numeric_limits::min(); // 0.0 + for (const auto &v : machine->numa.relativeLatency) + { + auto result = std::minmax_element(v.begin(), v.end()); + minLatency = std::min(minLatency, *result.first); + maxLatency = std::max(maxLatency, *result.second); + } + + // assign stuff + for (auto &v : machine->numa.relativeLatency) + { + std::transform(v.begin(), v.end(), v.begin(), std::bind(std::multiplies(), std::placeholders::_1, 1.0/minLatency)); + } + machine->numa.baseLatency = 1.0; // latencies still do not have any units in hwloc-2.x + machine->numa.maxRelativeLatency = maxLatency/minLatency; +#else // GMX_HWLOC_API_VERSION_IS_2XX == false, hwloc api is 1.x.x int depth = hwloc_get_type_depth(topo, HWLOC_OBJ_NUMANODE); const struct hwloc_distances_s * dist = hwloc_get_whole_distance_matrix_by_depth(topo, depth); if (dist != nullptr && dist->nbobjs == hwlocNumaNodes.size()) @@ -394,10 +477,20 @@ parseHwLocNuma(hwloc_topology_t topo, { topologyOk = false; } +#endif // end GMX_HWLOC_API_VERSION_IS_2XX == false } else + // Deals with the case of no numa nodes found. +#if GMX_HWLOC_API_VERSION_IS_2XX + // If the hwloc version is 2.x.x, and there is no numa node, something went wrong + { + topologyOk = false; + } +#else { // No numa nodes found. Use the entire machine as a numa node. + // Note that this should only be the case with hwloc api v 1.x.x, + // a numa node is assigned to the machine by default in v 2.x.x const hwloc_obj*const hwlocMachine = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_MACHINE, nullptr); if (hwlocMachine != nullptr) @@ -430,7 +523,7 @@ parseHwLocNuma(hwloc_topology_t topo, topologyOk = false; } } - +#endif // end if not GMX_HWLOC_API_VERSION_IS_2XX if (topologyOk) { return 0; @@ -455,13 +548,28 @@ int parseHwLocDevices(hwloc_topology_t topo, HardwareTopology::Machine * machine) { - const hwloc_obj *const root = hwloc_get_root_obj(topo); - std::vector pcidevs = getHwLocDescendantsByType(root, HWLOC_OBJ_PCI_DEVICE); + const hwloc_obj_t root = hwloc_get_root_obj(topo); + std::vector pcidevs = getHwLocDescendantsByType(topo, root, HWLOC_OBJ_PCI_DEVICE); for (auto &p : pcidevs) { - const hwloc_obj *const ancestor = hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_NUMANODE, p); - int numaId; +#if GMX_HWLOC_API_VERSION_IS_2XX + const hwloc_obj * ancestor = nullptr; + // Numa nodes not directly part of tree. Walk up the tree until we find an ancestor with a numa node + hwloc_obj_t parent = p->parent; + while (parent && !parent->memory_arity) + { + parent = parent->parent; + } + if (parent) + { + ancestor = parent->memory_first_child; + } +#else // GMX_HWLOC_API_VERSION_IS_2XX = false, api v 1.x.x + // numa nodes are normal part of tree, can use hwloc ancestor function + const hwloc_obj * const ancestor = hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_NUMANODE, p); +#endif // end if GMX_HWLOC_API_VERSION_IS_2XX + int numaId; if (ancestor != nullptr) { numaId = ancestor->logical_index; @@ -504,7 +612,12 @@ parseHwLoc(HardwareTopology::Machine * machine, return; // SupportLevel::None. } + // Flags to look for io devices +#if GMX_HWLOC_API_VERSION_IS_2XX + hwloc_topology_set_io_types_filter(topo, HWLOC_TYPE_FILTER_KEEP_IMPORTANT); +#else hwloc_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IO_DEVICES); +#endif if (hwloc_topology_load(topo) != 0 || hwloc_get_root_obj(topo) == nullptr) { -- 2.22.0