src/gromacs/hardware/hardwaretopology.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \internal \file
  37  * \brief
  38  * Implements gmx::HardwareTopology.
  39  *
  40  * \author Erik Lindahl <erik.lindahl@gmail.com>
  41  * \ingroup module_hardware
  42  */
  43
  44 #include "gmxpre.h"
  45
  46 #include "hardwaretopology.h"
  47
  48 #include "config.h"
  49
  50 #include <cstdio>
  51
  52 #include <algorithm>
  53 #include <functional>
  54 #include <limits>
  55 #include <utility>
  56 #include <vector>
  57
  58 #if GMX_USE_HWLOC
  59 #    include <hwloc.h>
  60 #endif
  61
  62 #include "gromacs/hardware/cpuinfo.h"
  63 #include "gromacs/utility/gmxassert.h"
  64
  65 #ifdef HAVE_UNISTD_H
  66 #    include <unistd.h> // sysconf()
  67 #endif
  68 #if GMX_NATIVE_WINDOWS
  69 #    include <windows.h> // GetSystemInfo()
  70 #endif
  71
  72 //! Convenience macro to help us avoid ifdefs each time we use sysconf
  73 #if !defined(_SC_NPROCESSORS_ONLN) && defined(_SC_NPROC_ONLN)
  74 #    define _SC_NPROCESSORS_ONLN _SC_NPROC_ONLN
  75 #endif
  76
  77 namespace gmx
  78 {
  79
  80 namespace
  81 {
  82
  83 /*****************************************************************************
  84  *                                                                           *
  85  *   Utility functions for extracting hardware topology from CpuInfo object  *
  86  *                                                                           *
  87  *****************************************************************************/
  88
  89 /*! \brief Initialize machine data from basic information in cpuinfo
  90  *
  91  *  \param  machine      Machine tree structure where information will be assigned
  92  *                       if the cpuinfo object contains topology information.
  93  *  \param  supportLevel If topology information is available in CpuInfo,
  94  *                       this will be updated to reflect the amount of
  95  *                       information written to the machine structure.
  96  */
  97 void parseCpuInfo(HardwareTopology::Machine* machine, HardwareTopology::SupportLevel* supportLevel)
  98 {
  99     CpuInfo cpuInfo(CpuInfo::detect());
 100
 101     if (!cpuInfo.logicalProcessors().empty())
 102     {
 103         int nSockets   = 0;
 104         int nCores     = 0;
 105         int nHwThreads = 0;
 106
 107         // Copy the logical processor information from cpuinfo
 108         for (auto& l : cpuInfo.logicalProcessors())
 109         {
 110             machine->logicalProcessors.push_back(
 111                     { l.socketRankInMachine, l.coreRankInSocket, l.hwThreadRankInCore, -1 });
 112             nSockets   = std::max(nSockets, l.socketRankInMachine);
 113             nCores     = std::max(nCores, l.coreRankInSocket);
 114             nHwThreads = std::max(nHwThreads, l.hwThreadRankInCore);
 115         }
 116
 117         // Fill info form sockets/cores/hwthreads
 118         int socketId   = 0;
 119         int coreId     = 0;
 120         int hwThreadId = 0;
 121
 122         machine->sockets.resize(nSockets + 1);
 123         for (auto& s : machine->sockets)
 124         {
 125             s.id = socketId++;
 126             s.cores.resize(nCores + 1);
 127             for (auto& c : s.cores)
 128             {
 129                 c.id         = coreId++;
 130                 c.numaNodeId = -1; // No numa information
 131                 c.hwThreads.resize(nHwThreads + 1);
 132                 for (auto& t : c.hwThreads)
 133                 {
 134                     t.id                 = hwThreadId++;
 135                     t.logicalProcessorId = -1; // set as unassigned for now
 136                 }
 137             }
 138         }
 139
 140         // Fill the logical processor id in the right place
 141         for (std::size_t i = 0; i < machine->logicalProcessors.size(); i++)
 142         {
 143             const HardwareTopology::LogicalProcessor& l = machine->logicalProcessors[i];
 144             machine->sockets[l.socketRankInMachine]
 145                     .cores[l.coreRankInSocket]
 146                     .hwThreads[l.hwThreadRankInCore]
 147                     .logicalProcessorId = static_cast<int>(i);
 148         }
 149         machine->logicalProcessorCount = machine->logicalProcessors.size();
 150         *supportLevel                  = HardwareTopology::SupportLevel::Basic;
 151     }
 152     else
 153     {
 154         *supportLevel = HardwareTopology::SupportLevel::None;
 155     }
 156 }
 157
 158 #if GMX_USE_HWLOC
 159
 160 #    if HWLOC_API_VERSION < 0x00010b00
 161 #        define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET
 162 #        define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
 163 #    endif
 164
 165 // Preprocessor variable for if hwloc api is version 1.x.x or 2.x.x
 166 #    if HWLOC_API_VERSION >= 0x00020000
 167 #        define GMX_HWLOC_API_VERSION_IS_2XX 1
 168 #    else
 169 #        define GMX_HWLOC_API_VERSION_IS_2XX 0
 170 #    endif
 171
 172 /*****************************************************************************
 173  *                                                                           *
 174  *   Utility functions for extracting hardware topology from hwloc library   *
 175  *                                                                           *
 176  *****************************************************************************/
 177
 178 // Compatibility function for accessing hwloc_obj_t object memory with different API versions of hwloc
 179 std::size_t getHwLocObjectMemory(const hwloc_obj* obj)
 180 {
 181 #    if GMX_HWLOC_API_VERSION_IS_2XX
 182     return obj->total_memory;
 183 #    else
 184     return obj->memory.total_memory;
 185 #    endif
 186 }
 187
 188 /*! \brief Return vector of all descendants of a given type in hwloc topology
 189  *
 190  *  \param topo  hwloc topology handle that has been initialized and loaded
 191  *  \param obj   Non-null hwloc object.
 192  *  \param type  hwloc object type to find. The routine will only search
 193  *               on levels below obj.
 194  *
 195  *  \return vector containing all the objects of given type that are
 196  *          descendants of the provided object. If no objects of this type
 197  *          were found, the vector will be empty.
 198  */
 199 std::vector<const hwloc_obj*> getHwLocDescendantsByType(const hwloc_topology*  topo,
 200                                                         const hwloc_obj*       obj,
 201                                                         const hwloc_obj_type_t type)
 202 {
 203     GMX_RELEASE_ASSERT(obj, "NULL hwloc object provided to getHwLocDescendantsByType()");
 204
 205     std::vector<const hwloc_obj*> v;
 206
 207     if (obj->type == type)
 208     {
 209         v.push_back(obj);
 210     }
 211     // Go through children; if this object has no children obj->arity is 0,
 212     // and we'll return an empty vector.
 213     hwloc_obj_t tempNode = nullptr;
 214     while ((tempNode = hwloc_get_next_child(const_cast<hwloc_topology_t>(topo),
 215                                             const_cast<hwloc_obj_t>(obj), tempNode))
 216            != nullptr)
 217     {
 218         std::vector<const hwloc_obj*> v2 = getHwLocDescendantsByType(topo, tempNode, type);
 219         v.insert(v.end(), v2.begin(), v2.end());
 220     }
 221     return v;
 222 }
 223
 224 /*! \brief Read information about sockets, cores and threads from hwloc topology
 225  *
 226  *  \param topo    hwloc topology handle that has been initialized and loaded
 227  *  \param machine Pointer to the machine structure in the HardwareTopology
 228  *                 class, where the tree of sockets/cores/threads will be written.
 229  *
 230  *  \return If all the data is found
 231  */
 232 bool parseHwLocSocketsCoresThreads(hwloc_topology_t topo, HardwareTopology::Machine* machine)
 233 {
 234     const hwloc_obj*              root         = hwloc_get_root_obj(topo);
 235     std::vector<const hwloc_obj*> hwlocSockets = getHwLocDescendantsByType(topo, root, HWLOC_OBJ_PACKAGE);
 236
 237     machine->logicalProcessorCount = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU);
 238     machine->logicalProcessors.resize(machine->logicalProcessorCount);
 239     machine->sockets.resize(hwlocSockets.size());
 240
 241     bool topologyOk = !hwlocSockets.empty(); // Fail if we have no sockets in machine
 242
 243     for (std::size_t i = 0; i < hwlocSockets.size() && topologyOk; i++)
 244     {
 245         // Assign information about this socket
 246         machine->sockets[i].id = hwlocSockets[i]->logical_index;
 247
 248         // Get children (cores)
 249         std::vector<const hwloc_obj*> hwlocCores =
 250                 getHwLocDescendantsByType(topo, hwlocSockets[i], HWLOC_OBJ_CORE);
 251         machine->sockets[i].cores.resize(hwlocCores.size());
 252
 253         topologyOk = topologyOk && !hwlocCores.empty(); // Fail if we have no cores in socket
 254
 255         // Loop over child cores
 256         for (std::size_t j = 0; j < hwlocCores.size() && topologyOk; j++)
 257         {
 258             // Assign information about this core
 259             machine->sockets[i].cores[j].id         = hwlocCores[j]->logical_index;
 260             machine->sockets[i].cores[j].numaNodeId = -1;
 261
 262             // Get children (hwthreads)
 263             std::vector<const hwloc_obj*> hwlocPUs =
 264                     getHwLocDescendantsByType(topo, hwlocCores[j], HWLOC_OBJ_PU);
 265             machine->sockets[i].cores[j].hwThreads.resize(hwlocPUs.size());
 266
 267             topologyOk = topologyOk && !hwlocPUs.empty(); // Fail if we have no hwthreads in core
 268
 269             // Loop over child hwthreads
 270             for (std::size_t k = 0; k < hwlocPUs.size() && topologyOk; k++)
 271             {
 272                 // Assign information about this hwthread
 273                 std::size_t logicalProcessorId               = hwlocPUs[k]->os_index;
 274                 machine->sockets[i].cores[j].hwThreads[k].id = hwlocPUs[k]->logical_index;
 275                 machine->sockets[i].cores[j].hwThreads[k].logicalProcessorId = logicalProcessorId;
 276
 277                 if (logicalProcessorId < machine->logicalProcessors.size())
 278                 {
 279                     // Cross-assign data for this hwthread to the logicalprocess vector
 280                     machine->logicalProcessors[logicalProcessorId].socketRankInMachine =
 281                             static_cast<int>(i);
 282                     machine->logicalProcessors[logicalProcessorId].coreRankInSocket =
 283                             static_cast<int>(j);
 284                     machine->logicalProcessors[logicalProcessorId].hwThreadRankInCore =
 285                             static_cast<int>(k);
 286                     machine->logicalProcessors[logicalProcessorId].numaNodeId = -1;
 287                 }
 288                 else
 289                 {
 290                     topologyOk = false;
 291                 }
 292             }
 293         }
 294     }
 295
 296     if (!topologyOk)
 297     {
 298         machine->logicalProcessors.clear();
 299         machine->sockets.clear();
 300     }
 301     return topologyOk;
 302 }
 303
 304 /*! \brief Read cache information from hwloc topology
 305  *
 306  *  \param topo    hwloc topology handle that has been initialized and loaded
 307  *  \param machine Pointer to the machine structure in the HardwareTopology
 308  *                 class, where cache data will be filled.
 309  *
 310  *  \return If any cache data is found
 311  */
 312 bool parseHwLocCache(hwloc_topology_t topo, HardwareTopology::Machine* machine)
 313 {
 314     // Parse caches up to L5
 315     for (int cachelevel : { 1, 2, 3, 4, 5 })
 316     {
 317         int depth = hwloc_get_cache_type_depth(topo, cachelevel, HWLOC_OBJ_CACHE_DATA);
 318
 319         if (depth >= 0)
 320         {
 321             hwloc_obj_t cache = hwloc_get_next_obj_by_depth(topo, depth, nullptr);
 322             if (cache != nullptr)
 323             {
 324                 std::vector<const hwloc_obj*> hwThreads =
 325                         getHwLocDescendantsByType(topo, cache, HWLOC_OBJ_PU);
 326
 327                 machine->caches.push_back({ static_cast<int>(cache->attr->cache.depth),
 328                                             static_cast<std::size_t>(cache->attr->cache.size),
 329                                             static_cast<int>(cache->attr->cache.linesize),
 330                                             static_cast<int>(cache->attr->cache.associativity),
 331                                             std::max<int>(hwThreads.size(), 1) });
 332             }
 333         }
 334     }
 335     return !machine->caches.empty();
 336 }
 337
 338
 339 /*! \brief Read numa information from hwloc topology
 340  *
 341  *  \param topo    hwloc topology handle that has been initialized and loaded
 342  *  \param machine Pointer to the machine structure in the HardwareTopology
 343  *                 class, where numa information will be filled.
 344  *
 345  *  Hwloc should virtually always be able to detect numa information, but if
 346  *  there is only a single numa node in the system it is not reported at all.
 347  *  In this case we create a single numa node covering all cores.
 348  *
 349  *  This function uses the basic socket/core/thread information detected by
 350  *  parseHwLocSocketsCoresThreads(), which means that routine must have
 351  *  completed successfully before calling this one. If this is not the case,
 352  *  you will get an error return code.
 353  *
 354  *  \return If the data found makes sense (either in the numa node or the
 355  *          entire machine)
 356  */
 357 bool parseHwLocNuma(hwloc_topology_t topo, HardwareTopology::Machine* machine)
 358 {
 359     const hwloc_obj*              root = hwloc_get_root_obj(topo);
 360     std::vector<const hwloc_obj*> hwlocNumaNodes =
 361             getHwLocDescendantsByType(topo, root, HWLOC_OBJ_NUMANODE);
 362     bool topologyOk = true;
 363
 364     if (!hwlocNumaNodes.empty())
 365     {
 366         machine->numa.nodes.resize(hwlocNumaNodes.size());
 367
 368         for (std::size_t i = 0; i < hwlocNumaNodes.size(); i++)
 369         {
 370             machine->numa.nodes[i].id     = hwlocNumaNodes[i]->logical_index;
 371             machine->numa.nodes[i].memory = getHwLocObjectMemory(hwlocNumaNodes[i]);
 372
 373             machine->numa.nodes[i].logicalProcessorId.clear();
 374
 375             // Get list of PUs in this numa node. Get from numa node if v1.x.x, get from numa node's parent if 2.x.x
 376 #    if GMX_HWLOC_API_VERSION_IS_2XX
 377             std::vector<const hwloc_obj*> hwlocPUs =
 378                     getHwLocDescendantsByType(topo, hwlocNumaNodes[i]->parent, HWLOC_OBJ_PU);
 379 #    else
 380             std::vector<const hwloc_obj*> hwlocPUs =
 381                     getHwLocDescendantsByType(topo, hwlocNumaNodes[i], HWLOC_OBJ_PU);
 382 #    endif
 383             for (auto& p : hwlocPUs)
 384             {
 385                 machine->numa.nodes[i].logicalProcessorId.push_back(p->os_index);
 386
 387                 GMX_RELEASE_ASSERT(p->os_index < machine->logicalProcessors.size(),
 388                                    "OS index of PU in hwloc larger than processor count");
 389
 390                 machine->logicalProcessors[p->os_index].numaNodeId = static_cast<int>(i);
 391                 std::size_t s = machine->logicalProcessors[p->os_index].socketRankInMachine;
 392                 std::size_t c = machine->logicalProcessors[p->os_index].coreRankInSocket;
 393
 394                 GMX_RELEASE_ASSERT(s < machine->sockets.size(),
 395                                    "Socket index in logicalProcessors larger than socket count");
 396                 GMX_RELEASE_ASSERT(c < machine->sockets[s].cores.size(),
 397                                    "Core index in logicalProcessors larger than core count");
 398                 // Set numaNodeId in core too
 399                 machine->sockets[s].cores[c].numaNodeId = i;
 400             }
 401         }
 402         // Getting the distance matrix
 403 #    if GMX_HWLOC_API_VERSION_IS_2XX
 404         // with hwloc api v. 2.x.x, distances are no longer directly accessible. Need to retrieve and release hwloc_distances_s object
 405         // In addition, there can now be multiple types of distances, ie latency, bandwidth. We look only for latency, but have to check
 406         // if multiple distance matrices are returned.
 407
 408         // If only 1 numa node exists, the v2.x.x hwloc api won't have a distances matrix, set manually
 409         if (hwlocNumaNodes.size() == 1)
 410         {
 411             machine->numa.relativeLatency = { { 1.0 } };
 412         }
 413         else
 414         {
 415             hwloc_distances_s* dist;
 416             // Set the number of distance matrices to return (1 in our case, but hwloc 2.x.x allows
 417             // for multiple distances types and therefore multiple distance matrices)
 418             unsigned nr = 1;
 419             hwloc_distances_get(topo, &nr, &dist, HWLOC_DISTANCES_KIND_MEANS_LATENCY, 0);
 420             // If no distances were found, nr will be 0, otherwise distances will be populated with
 421             // 1 hwloc_distances_s object
 422             if (nr > 0 && dist->nbobjs == hwlocNumaNodes.size())
 423             {
 424
 425                 machine->numa.relativeLatency.resize(dist->nbobjs);
 426                 for (std::size_t i = 0; i < dist->nbobjs; i++)
 427                 {
 428                     machine->numa.relativeLatency[i].resize(dist->nbobjs);
 429                     for (std::size_t j = 0; j < dist->nbobjs; j++)
 430                     {
 431                         machine->numa.relativeLatency[i][j] = dist->values[i * dist->nbobjs + j];
 432                     }
 433                 }
 434             }
 435             else
 436             {
 437                 topologyOk = false;
 438             }
 439             hwloc_distances_release(topo, dist);
 440         }
 441
 442         // hwloc-2.x provides latencies as integers, but to make things more similar to the case of
 443         // a single numa node as well as hwloc-1.x, we rescale to relative floating-point values and
 444         // also set the largest relative latency value.
 445
 446         // find smallest value in matrix
 447         float minLatency = std::numeric_limits<float>::max(); // large number
 448         float maxLatency = std::numeric_limits<float>::min(); // 0.0
 449         for (const auto& v : machine->numa.relativeLatency)
 450         {
 451             auto result = std::minmax_element(v.begin(), v.end());
 452             minLatency  = std::min(minLatency, *result.first);
 453             maxLatency  = std::max(maxLatency, *result.second);
 454         }
 455
 456         // assign stuff
 457         for (auto& v : machine->numa.relativeLatency)
 458         {
 459             std::transform(v.begin(), v.end(), v.begin(),
 460                            std::bind(std::multiplies<float>(), std::placeholders::_1, 1.0 / minLatency));
 461         }
 462         machine->numa.baseLatency = 1.0; // latencies still do not have any units in hwloc-2.x
 463         machine->numa.maxRelativeLatency = maxLatency / minLatency;
 464
 465 #    else  // GMX_HWLOC_API_VERSION_IS_2XX == false, hwloc api is 1.x.x
 466         int                             depth = hwloc_get_type_depth(topo, HWLOC_OBJ_NUMANODE);
 467         const struct hwloc_distances_s* dist = hwloc_get_whole_distance_matrix_by_depth(topo, depth);
 468         if (dist != nullptr && dist->nbobjs == hwlocNumaNodes.size())
 469         {
 470             machine->numa.baseLatency        = dist->latency_base;
 471             machine->numa.maxRelativeLatency = dist->latency_max;
 472             machine->numa.relativeLatency.resize(dist->nbobjs);
 473             for (std::size_t i = 0; i < dist->nbobjs; i++)
 474             {
 475                 machine->numa.relativeLatency[i].resize(dist->nbobjs);
 476                 for (std::size_t j = 0; j < dist->nbobjs; j++)
 477                 {
 478                     machine->numa.relativeLatency[i][j] = dist->latency[i * dist->nbobjs + j];
 479                 }
 480             }
 481         }
 482         else
 483         {
 484             topologyOk = false;
 485         }
 486 #    endif // end GMX_HWLOC_API_VERSION_IS_2XX == false
 487     }
 488     else
 489     // Deals with the case of no numa nodes found.
 490 #    if GMX_HWLOC_API_VERSION_IS_2XX
 491     // If the hwloc version is 2.x.x, and there is no numa node, something went wrong
 492     {
 493         topologyOk = false;
 494     }
 495 #    else
 496     {
 497         // No numa nodes found. Use the entire machine as a numa node.
 498         // Note that this should only be the case with hwloc api v 1.x.x,
 499         // a numa node is assigned to the machine by default in v 2.x.x
 500         const hwloc_obj* const hwlocMachine = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_MACHINE, nullptr);
 501
 502         if (hwlocMachine != nullptr)
 503         {
 504             machine->numa.nodes.resize(1);
 505             machine->numa.nodes[0].id        = 0;
 506             machine->numa.nodes[0].memory    = hwlocMachine->memory.total_memory;
 507             machine->numa.baseLatency        = 10;
 508             machine->numa.maxRelativeLatency = 1;
 509             machine->numa.relativeLatency    = { { 1.0 } };
 510
 511             for (int i = 0; i < machine->logicalProcessorCount; i++)
 512             {
 513                 machine->numa.nodes[0].logicalProcessorId.push_back(i);
 514             }
 515             for (auto& l : machine->logicalProcessors)
 516             {
 517                 l.numaNodeId = 0;
 518             }
 519             for (auto& s : machine->sockets)
 520             {
 521                 for (auto& c : s.cores)
 522                 {
 523                     c.numaNodeId = 0;
 524                 }
 525             }
 526         }
 527         else
 528         {
 529             topologyOk = false;
 530         }
 531     }
 532 #    endif // end if not GMX_HWLOC_API_VERSION_IS_2XX
 533     if (!topologyOk)
 534     {
 535         machine->numa.nodes.clear();
 536     }
 537     return topologyOk;
 538 }
 539
 540 /*! \brief Read PCI device information from hwloc topology
 541  *
 542  *  \param topo    hwloc topology handle that has been initialized and loaded
 543  *  \param machine Pointer to the machine structure in the HardwareTopology
 544  *                 class, where PCI device information will be filled.
 545  * *
 546  *  \return If any devices were found
 547  */
 548 bool parseHwLocDevices(hwloc_topology_t topo, HardwareTopology::Machine* machine)
 549 {
 550     const hwloc_obj*              root = hwloc_get_root_obj(topo);
 551     std::vector<const hwloc_obj*> pcidevs = getHwLocDescendantsByType(topo, root, HWLOC_OBJ_PCI_DEVICE);
 552
 553     for (auto& p : pcidevs)
 554     {
 555 #    if GMX_HWLOC_API_VERSION_IS_2XX
 556         const hwloc_obj* ancestor = nullptr;
 557         // Numa nodes not directly part of tree. Walk up the tree until we find an ancestor with a numa node
 558         hwloc_obj_t parent = p->parent;
 559         while (parent && !parent->memory_arity)
 560         {
 561             parent = parent->parent;
 562         }
 563         if (parent)
 564         {
 565             ancestor = parent->memory_first_child;
 566         }
 567 #    else  // GMX_HWLOC_API_VERSION_IS_2XX = false, api v 1.x.x
 568         // numa nodes are normal part of tree, can use hwloc ancestor function
 569         const hwloc_obj* const ancestor =
 570                 hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_NUMANODE, const_cast<hwloc_obj_t>(p));
 571 #    endif // end if GMX_HWLOC_API_VERSION_IS_2XX
 572         int numaId;
 573         if (ancestor != nullptr)
 574         {
 575             numaId = ancestor->logical_index;
 576         }
 577         else
 578         {
 579             // If we only have a single numa node we belong to it, otherwise set it to -1 (unknown)
 580             numaId = (machine->numa.nodes.size() == 1) ? 0 : -1;
 581         }
 582
 583         GMX_RELEASE_ASSERT(p->attr, "Attributes should not be NULL for hwloc PCI object");
 584
 585         machine->devices.push_back({ p->attr->pcidev.vendor_id, p->attr->pcidev.device_id,
 586                                      p->attr->pcidev.class_id, p->attr->pcidev.domain,
 587                                      p->attr->pcidev.bus, p->attr->pcidev.dev, p->attr->pcidev.func,
 588                                      numaId });
 589     }
 590     return !pcidevs.empty();
 591 }
 592
 593 void parseHwLoc(HardwareTopology::Machine* machine, HardwareTopology::SupportLevel* supportLevel, bool* isThisSystem)
 594 {
 595     hwloc_topology_t topo;
 596
 597     // Initialize a hwloc object, set flags to request IO device information too,
 598     // try to load the topology, and get the root object. If either step fails,
 599     // return that we do not have any support at all from hwloc.
 600     if (hwloc_topology_init(&topo) != 0)
 601     {
 602         hwloc_topology_destroy(topo);
 603         return; // SupportLevel::None.
 604     }
 605
 606     // Flags to look for io devices
 607 #    if GMX_HWLOC_API_VERSION_IS_2XX
 608     hwloc_topology_set_io_types_filter(topo, HWLOC_TYPE_FILTER_KEEP_IMPORTANT);
 609 #    else
 610     hwloc_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IO_DEVICES);
 611 #    endif
 612
 613     if (hwloc_topology_load(topo) != 0 || hwloc_get_root_obj(topo) == nullptr)
 614     {
 615         hwloc_topology_destroy(topo);
 616         return; // SupportLevel::None.
 617     }
 618
 619     // If we get here, we can get a valid root object for the topology
 620     *isThisSystem = hwloc_topology_is_thissystem(topo) != 0;
 621
 622     // Parse basic information about sockets, cores, and hardware threads
 623     if (parseHwLocSocketsCoresThreads(topo, machine))
 624     {
 625         *supportLevel = HardwareTopology::SupportLevel::Basic;
 626     }
 627     else
 628     {
 629         hwloc_topology_destroy(topo);
 630         return; // SupportLevel::None.
 631     }
 632
 633     // Get information about cache and numa nodes
 634     if (parseHwLocCache(topo, machine) && parseHwLocNuma(topo, machine))
 635     {
 636         *supportLevel = HardwareTopology::SupportLevel::Full;
 637     }
 638     else
 639     {
 640         hwloc_topology_destroy(topo);
 641         return; // SupportLevel::Basic.
 642     }
 643
 644     // PCI devices
 645     if (parseHwLocDevices(topo, machine))
 646     {
 647         *supportLevel = HardwareTopology::SupportLevel::FullWithDevices;
 648     }
 649
 650     hwloc_topology_destroy(topo);
 651     // SupportLevel::Full or SupportLevel::FullWithDevices.
 652 }
 653
 654 #endif
 655
 656 /*! \brief Try to detect the number of logical processors.
 657  *
 658  *  \return The number of hardware processing units, or 0 if it fails.
 659  */
 660 int detectLogicalProcessorCount()
 661 {
 662     int count = 0;
 663
 664     {
 665 #if GMX_NATIVE_WINDOWS
 666         // Windows
 667         SYSTEM_INFO sysinfo;
 668         GetSystemInfo(&sysinfo);
 669         count = sysinfo.dwNumberOfProcessors;
 670 #elif defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
 671         // We are probably on Unix. Check if we have the argument to use before executing any calls
 672         count = sysconf(_SC_NPROCESSORS_ONLN);
 673 #else
 674         count = 0; // Neither windows nor Unix.
 675 #endif
 676     }
 677
 678     return count;
 679 }
 680
 681 } // namespace
 682
 683 // static
 684 HardwareTopology HardwareTopology::detect()
 685 {
 686     HardwareTopology result;
 687
 688 #if GMX_USE_HWLOC
 689     parseHwLoc(&result.machine_, &result.supportLevel_, &result.isThisSystem_);
 690 #endif
 691
 692     // If something went wrong in hwloc (or if it was not present) we might
 693     // have more information in cpuInfo
 694     if (result.supportLevel_ < SupportLevel::Basic)
 695     {
 696         // There might be topology information in cpuInfo
 697         parseCpuInfo(&result.machine_, &result.supportLevel_);
 698     }
 699     // If we did not manage to get anything from either hwloc or cpuInfo, find the cpu count at least
 700     if (result.supportLevel_ == SupportLevel::None)
 701     {
 702         // No topology information; try to detect the number of logical processors at least
 703         result.machine_.logicalProcessorCount = detectLogicalProcessorCount();
 704         if (result.machine_.logicalProcessorCount > 0)
 705         {
 706             result.supportLevel_ = SupportLevel::LogicalProcessorCount;
 707         }
 708     }
 709     return result;
 710 }
 711
 712 HardwareTopology::Machine::Machine()
 713 {
 714     logicalProcessorCount   = 0;
 715     numa.baseLatency        = 0.0;
 716     numa.maxRelativeLatency = 0.0;
 717 }
 718
 719
 720 HardwareTopology::HardwareTopology() :
 721     supportLevel_(SupportLevel::None),
 722     machine_(),
 723     isThisSystem_(true)
 724 {
 725 }
 726
 727 HardwareTopology::HardwareTopology(int logicalProcessorCount) :
 728     supportLevel_(SupportLevel::None),
 729     machine_(),
 730     isThisSystem_(true)
 731 {
 732     if (logicalProcessorCount > 0)
 733     {
 734         machine_.logicalProcessorCount = logicalProcessorCount;
 735         supportLevel_                  = SupportLevel::LogicalProcessorCount;
 736     }
 737 }
 738
 739 int HardwareTopology::numberOfCores() const
 740 {
 741     if (supportLevel() >= SupportLevel::Basic)
 742     {
 743         // We assume all sockets have the same number of cores as socket 0.
 744         // Since topology information is present, we can assume there is at least one socket.
 745         return machine().sockets.size() * machine().sockets[0].cores.size();
 746     }
 747     else if (supportLevel() >= SupportLevel::LogicalProcessorCount)
 748     {
 749         return machine().logicalProcessorCount;
 750     }
 751     else
 752     {
 753         return 0;
 754     }
 755 }
 756
 757 } // namespace gmx