From 6f58aa984a84c651419f5de9b46b81ade136619c Mon Sep 17 00:00:00 2001
From: Kevin Boyd <kevin.boyd@uconn.edu>
Date: Sat, 21 Jul 2018 14:56:38 -0400
Subject: [PATCH] Support hwloc 2.x.x

Created compatibility layer to account for API changes moving
from hwloc 1.x.x to 2.x.x while retaining support for v1.x.x.

Changes supporting hwloc 2.x.x include:
    -reworked descendents lookup in topology tree to account for new
     division of object children into "normal", "memory", and "io" types
    -different memory access location for hwloc objects
    -accessing distances (latencies) between nodes has been reworked
    -different flags for accessing PCI devices
    -changed numa node ancestor search to account for numa nodes no
     longer being a normal part of topology tree

Fixes #2539

Change-Id: I483dda3dd344d8f7c99aa828bcc118a3d2de9dfd
---
 src/gromacs/hardware/hardwaretopology.cpp | 173 ++++++++++++++++++----
 1 file changed, 143 insertions(+), 30 deletions(-)
diff --git a/src/gromacs/hardware/hardwaretopology.cpp b/src/gromacs/hardware/hardwaretopology.cpp
index 2051990888..014ae654ce 100644
--- a/src/gromacs/hardware/hardwaretopology.cpp
+++ b/src/gromacs/hardware/hardwaretopology.cpp
@@ -50,6 +50,9 @@
 #include <cstdio>
 
 #include <algorithm>
+#include <functional>
+#include <limits>
+#include <utility>
 #include <vector>
 
 #if GMX_USE_HWLOC
@@ -157,14 +160,33 @@ parseCpuInfo(HardwareTopology::Machine *        machine,
 #    define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
 #endif
 
+// Preprocessor variable for if hwloc api is version 1.x.x or 2.x.x
+#if HWLOC_API_VERSION >= 0x00020000
+#    define GMX_HWLOC_API_VERSION_IS_2XX 1
+#else
+#    define GMX_HWLOC_API_VERSION_IS_2XX 0
+#endif
+
 /*****************************************************************************
  *                                                                           *
  *   Utility functions for extracting hardware topology from hwloc library   *
  *                                                                           *
  *****************************************************************************/
 
+// Compatibility function for accessing hwloc_obj_t object memory with different API versions of hwloc
+std::size_t
+getHwLocObjectMemory(const hwloc_obj_t obj)
+{
+#if GMX_HWLOC_API_VERSION_IS_2XX
+    return obj->total_memory;
+#else
+    return obj->memory.total_memory;
+#endif
+}
+
 /*! \brief Return vector of all descendants of a given type in hwloc topology
  *
+ *  \param topo  hwloc topology handle that has been initialized and loaded
  *  \param obj   Non-null hwloc object.
  *  \param type  hwloc object type to find. The routine will only search
  *               on levels below obj.
@@ -174,27 +196,23 @@ parseCpuInfo(HardwareTopology::Machine *        machine,
  *          were found, the vector will be empty.
  */
 const std::vector<hwloc_obj_t>
-getHwLocDescendantsByType(const hwloc_obj* obj, const hwloc_obj_type_t type)
+getHwLocDescendantsByType(const hwloc_topology_t topo, const hwloc_obj_t obj, const hwloc_obj_type_t type)
 {
     GMX_RELEASE_ASSERT(obj, "NULL hwloc object provided to getHwLocDescendantsByType()");
 
     std::vector<hwloc_obj_t> v;
 
+    if (obj->type == type)
+    {
+        v.push_back(obj);
+    }
     // Go through children; if this object has no children obj->arity is 0,
     // and we'll return an empty vector.
-    for (std::size_t i = 0; i < obj->arity; i++)
+    hwloc_obj_t tempNode = NULL;
+    while ((tempNode = hwloc_get_next_child(topo, obj, tempNode)) != NULL)
     {
-        // If the child is the type we're looking for, add it directly.
-        // Otherwise call this routine recursively for each child.
-        if (obj->children[i]->type == type)
-        {
-            v.push_back(obj->children[i]);
-        }
-        else
-        {
-            std::vector<hwloc_obj_t> v2 = getHwLocDescendantsByType(obj->children[i], type);
-            v.insert(v.end(), v2.begin(), v2.end());
-        }
+        std::vector<hwloc_obj_t> v2 = getHwLocDescendantsByType(topo, tempNode, type);
+        v.insert(v.end(), v2.begin(), v2.end());
     }
     return v;
 }
@@ -211,8 +229,8 @@ int
 parseHwLocSocketsCoresThreads(hwloc_topology_t                   topo,
                               HardwareTopology::Machine *        machine)
 {
-    const hwloc_obj *const         root         = hwloc_get_root_obj(topo);
-    std::vector<hwloc_obj_t>       hwlocSockets = getHwLocDescendantsByType(root, HWLOC_OBJ_PACKAGE);
+    const hwloc_obj_t                      root         = hwloc_get_root_obj(topo);
+    std::vector<hwloc_obj_t>               hwlocSockets = getHwLocDescendantsByType(topo, root, HWLOC_OBJ_PACKAGE);
 
     machine->logicalProcessorCount = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU);
     machine->logicalProcessors.resize(machine->logicalProcessorCount);
@@ -226,7 +244,7 @@ parseHwLocSocketsCoresThreads(hwloc_topology_t                   topo,
         machine->sockets[i].id = hwlocSockets[i]->logical_index;
 
         // Get children (cores)
-        std::vector<hwloc_obj_t> hwlocCores = getHwLocDescendantsByType(hwlocSockets[i], HWLOC_OBJ_CORE);
+        std::vector<hwloc_obj_t> hwlocCores = getHwLocDescendantsByType(topo, hwlocSockets[i], HWLOC_OBJ_CORE);
         machine->sockets[i].cores.resize(hwlocCores.size());
 
         topologyOk = topologyOk && !hwlocCores.empty(); // Fail if we have no cores in socket
@@ -239,7 +257,7 @@ parseHwLocSocketsCoresThreads(hwloc_topology_t                   topo,
             machine->sockets[i].cores[j].numaNodeId = -1;
 
             // Get children (hwthreads)
-            std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(hwlocCores[j], HWLOC_OBJ_PU);
+            std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(topo, hwlocCores[j], HWLOC_OBJ_PU);
             machine->sockets[i].cores[j].hwThreads.resize(hwlocPUs.size());
 
             topologyOk = topologyOk && !hwlocPUs.empty(); // Fail if we have no hwthreads in core
@@ -302,7 +320,7 @@ parseHwLocCache(hwloc_topology_t                   topo,
             hwloc_obj_t cache = hwloc_get_next_obj_by_depth(topo, depth, nullptr);
             if (cache != nullptr)
             {
-                std::vector<hwloc_obj_t> hwThreads = getHwLocDescendantsByType(cache, HWLOC_OBJ_PU);
+                std::vector<hwloc_obj_t> hwThreads = getHwLocDescendantsByType(topo, cache, HWLOC_OBJ_PU);
 
                 machine->caches.push_back( {
                                                static_cast<int>(cache->attr->cache.depth),
@@ -340,9 +358,9 @@ int
 parseHwLocNuma(hwloc_topology_t                   topo,
                HardwareTopology::Machine *        machine)
 {
-    const hwloc_obj *const   root           = hwloc_get_root_obj(topo);
-    std::vector<hwloc_obj_t> hwlocNumaNodes = getHwLocDescendantsByType(root, HWLOC_OBJ_NUMANODE);
-    bool                     topologyOk     = true;
+    const hwloc_obj_t                  root           = hwloc_get_root_obj(topo);
+    std::vector<hwloc_obj_t>           hwlocNumaNodes = getHwLocDescendantsByType(topo, root, HWLOC_OBJ_NUMANODE);
+    bool                               topologyOk     = true;
 
     if (!hwlocNumaNodes.empty())
     {
@@ -351,12 +369,15 @@ parseHwLocNuma(hwloc_topology_t                   topo,
         for (std::size_t i = 0; i < hwlocNumaNodes.size(); i++)
         {
             machine->numa.nodes[i].id     = hwlocNumaNodes[i]->logical_index;
-            machine->numa.nodes[i].memory = hwlocNumaNodes[i]->memory.total_memory;
+            machine->numa.nodes[i].memory = getHwLocObjectMemory(hwlocNumaNodes[i]);;
             machine->numa.nodes[i].logicalProcessorId.clear();
 
-            // Get list of PUs in this numa node
-            std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(hwlocNumaNodes[i], HWLOC_OBJ_PU);
-
+            // Get list of PUs in this numa node. Get from numa node if v1.x.x, get from numa node's parent if 2.x.x
+#if GMX_HWLOC_API_VERSION_IS_2XX
+            std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(topo, hwlocNumaNodes[i]->parent, HWLOC_OBJ_PU);
+#else
+            std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(topo, hwlocNumaNodes[i], HWLOC_OBJ_PU);
+#endif
             for (auto &p : hwlocPUs)
             {
                 machine->numa.nodes[i].logicalProcessorId.push_back(p->os_index);
@@ -373,7 +394,69 @@ parseHwLocNuma(hwloc_topology_t                   topo,
                 machine->sockets[s].cores[c].numaNodeId = i;
             }
         }
+        // Getting the distance matrix
+#if GMX_HWLOC_API_VERSION_IS_2XX
+        // with hwloc api v. 2.x.x, distances are no longer directly accessible. Need to retrieve and release hwloc_distances_s object
+        // In addition, there can now be multiple types of distances, ie latency, bandwidth. We look only for latency, but have to check
+        // if multiple distance matrices are returned.
+
+        // If only 1 numa node exists, the v2.x.x hwloc api won't have a distances matrix, set manually
+        if (hwlocNumaNodes.size() == 1)
+        {
+            machine->numa.relativeLatency       = { { 1.0 } };
+        }
+        else
+        {
+            hwloc_distances_s** dist = new hwloc_distances_s*;
+            // Set the number of distance matrices to return (1 in our case, but hwloc 2.x.x allows
+            // for multiple distances types and therefore multiple distance matrices)
+            unsigned nr = 1;
+            hwloc_distances_get(topo, &nr, dist, HWLOC_DISTANCES_KIND_MEANS_LATENCY, 0);
+            // If no distances were found, nr will be 0, otherwise distances will be populated with 1
+            // hwloc_distances_s object
+            if (nr > 0 && dist[0]->nbobjs == hwlocNumaNodes.size())
+            {
+
+                machine->numa.relativeLatency.resize(dist[0]->nbobjs);
+                for (std::size_t i = 0; i < dist[0]->nbobjs; i++)
+                {
+                    machine->numa.relativeLatency[i].resize(dist[0]->nbobjs);
+                    for (std::size_t j = 0; j < dist[0]->nbobjs; j++)
+                    {
+                        machine->numa.relativeLatency[i][j] = dist[0]->values[i*dist[0]->nbobjs+j];
+                    }
+                }
+            }
+            else
+            {
+                topologyOk = false;
+            }
+            hwloc_distances_release(topo, dist[0]);
+        }
+
+        // hwloc-2.x provides latencies as integers, but to make things more similar to the case of a single
+        // numa node as well as hwloc-1.x, we rescale to relative floating-point values and also set the
+        // largest relative latency value.
+
+        // find smallest value in matrix
+        float minLatency = std::numeric_limits<float>::max(); // large number
+        float maxLatency = std::numeric_limits<float>::min(); // 0.0
+        for (const auto &v : machine->numa.relativeLatency)
+        {
+            auto result = std::minmax_element(v.begin(), v.end());
+            minLatency = std::min(minLatency, *result.first);
+            maxLatency = std::max(maxLatency, *result.second);
+        }
+
+        // assign stuff
+        for (auto &v : machine->numa.relativeLatency)
+        {
+            std::transform(v.begin(), v.end(), v.begin(), std::bind(std::multiplies<float>(), std::placeholders::_1, 1.0/minLatency));
+        }
+        machine->numa.baseLatency        = 1.0; // latencies still do not have any units in hwloc-2.x
+        machine->numa.maxRelativeLatency = maxLatency/minLatency;
 
+#else           // GMX_HWLOC_API_VERSION_IS_2XX == false, hwloc api is 1.x.x
         int depth = hwloc_get_type_depth(topo, HWLOC_OBJ_NUMANODE);
         const struct hwloc_distances_s * dist = hwloc_get_whole_distance_matrix_by_depth(topo, depth);
         if (dist != nullptr && dist->nbobjs == hwlocNumaNodes.size())
@@ -394,10 +477,20 @@ parseHwLocNuma(hwloc_topology_t                   topo,
         {
             topologyOk = false;
         }
+#endif          // end GMX_HWLOC_API_VERSION_IS_2XX == false
     }
     else
+    // Deals with the case of no numa nodes found.
+#if GMX_HWLOC_API_VERSION_IS_2XX
+    // If the hwloc version is 2.x.x, and there is no numa node, something went wrong
+    {
+        topologyOk = false;
+    }
+#else
     {
         // No numa nodes found. Use the entire machine as a numa node.
+        // Note that this should only be the case with hwloc api v 1.x.x,
+        // a numa node is assigned to the machine by default in v 2.x.x
         const hwloc_obj*const hwlocMachine = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_MACHINE, nullptr);
 
         if (hwlocMachine != nullptr)
@@ -430,7 +523,7 @@ parseHwLocNuma(hwloc_topology_t                   topo,
             topologyOk = false;
         }
     }
-
+#endif      // end if not GMX_HWLOC_API_VERSION_IS_2XX
     if (topologyOk)
     {
         return 0;
@@ -455,13 +548,28 @@ int
 parseHwLocDevices(hwloc_topology_t                   topo,
                   HardwareTopology::Machine *        machine)
 {
-    const hwloc_obj *const   root    = hwloc_get_root_obj(topo);
-    std::vector<hwloc_obj_t> pcidevs = getHwLocDescendantsByType(root, HWLOC_OBJ_PCI_DEVICE);
+    const hwloc_obj_t        root    = hwloc_get_root_obj(topo);
+    std::vector<hwloc_obj_t> pcidevs = getHwLocDescendantsByType(topo, root, HWLOC_OBJ_PCI_DEVICE);
 
     for (auto &p : pcidevs)
     {
-        const hwloc_obj *const ancestor = hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_NUMANODE, p);
-        int                    numaId;
+#if GMX_HWLOC_API_VERSION_IS_2XX
+        const hwloc_obj * ancestor = nullptr;
+        // Numa nodes not directly part of tree. Walk up the tree until we find an ancestor with a numa node
+        hwloc_obj_t       parent = p->parent;
+        while (parent && !parent->memory_arity)
+        {
+            parent = parent->parent;
+        }
+        if (parent)
+        {
+            ancestor = parent->memory_first_child;
+        }
+#else           // GMX_HWLOC_API_VERSION_IS_2XX = false, api v 1.x.x
+        // numa nodes are normal part of tree, can use hwloc ancestor function
+        const hwloc_obj * const ancestor = hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_NUMANODE, p);
+#endif          // end if GMX_HWLOC_API_VERSION_IS_2XX
+        int                     numaId;
         if (ancestor != nullptr)
         {
             numaId = ancestor->logical_index;
@@ -504,7 +612,12 @@ parseHwLoc(HardwareTopology::Machine *        machine,
         return; // SupportLevel::None.
     }
 
+    // Flags to look for io devices
+#if GMX_HWLOC_API_VERSION_IS_2XX
+    hwloc_topology_set_io_types_filter(topo, HWLOC_TYPE_FILTER_KEEP_IMPORTANT);
+#else
     hwloc_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IO_DEVICES);
+#endif
 
     if (hwloc_topology_load(topo) != 0 || hwloc_get_root_obj(topo) == nullptr)
     {
-- 
2.22.0