src/gromacs/hardware/hardwaretopology.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \internal \file
  37  * \brief
  38  * Implements gmx::HardwareTopology.
  39  *
  40  * \author Erik Lindahl <erik.lindahl@gmail.com>
  41  * \ingroup module_hardware
  42  */
  43
  44 #include "gmxpre.h"
  45
  46 #include "hardwaretopology.h"
  47
  48 #include "config.h"
  49
  50 #include <cstdio>
  51
  52 #include <algorithm>
  53 #include <functional>
  54 #include <limits>
  55 #include <utility>
  56 #include <vector>
  57
  58 #if GMX_USE_HWLOC
  59 #    include <hwloc.h>
  60 #endif
  61
  62 #include "gromacs/hardware/cpuinfo.h"
  63 #include "gromacs/utility/gmxassert.h"
  64
  65 #ifdef HAVE_UNISTD_H
  66 #    include <unistd.h> // sysconf()
  67 #endif
  68 #if GMX_NATIVE_WINDOWS
  69 #    include <windows.h> // GetSystemInfo()
  70 #endif
  71
  72 //! Convenience macro to help us avoid ifdefs each time we use sysconf
  73 #if !defined(_SC_NPROCESSORS_ONLN) && defined(_SC_NPROC_ONLN)
  74 #    define _SC_NPROCESSORS_ONLN _SC_NPROC_ONLN
  75 #endif
  76
  77 namespace gmx
  78 {
  79
  80 namespace
  81 {
  82
  83 /*****************************************************************************
  84  *                                                                           *
  85  *   Utility functions for extracting hardware topology from CpuInfo object  *
  86  *                                                                           *
  87  *****************************************************************************/
  88
  89 /*! \brief Initialize machine data from basic information in cpuinfo
  90  *
  91  *  \param  machine      Machine tree structure where information will be assigned
  92  *                       if the cpuinfo object contains topology information.
  93  *  \param  supportLevel If topology information is available in CpuInfo,
  94  *                       this will be updated to reflect the amount of
  95  *                       information written to the machine structure.
  96  */
  97 void parseCpuInfo(HardwareTopology::Machine* machine, HardwareTopology::SupportLevel* supportLevel)
  98 {
  99     CpuInfo cpuInfo(CpuInfo::detect());
 100
 101     if (!cpuInfo.logicalProcessors().empty())
 102     {
 103         int nSockets   = 0;
 104         int nCores     = 0;
 105         int nHwThreads = 0;
 106
 107         // Copy the logical processor information from cpuinfo
 108         for (auto& l : cpuInfo.logicalProcessors())
 109         {
 110             machine->logicalProcessors.push_back(
 111                     { l.socketRankInMachine, l.coreRankInSocket, l.hwThreadRankInCore, -1 });
 112             nSockets   = std::max(nSockets, l.socketRankInMachine);
 113             nCores     = std::max(nCores, l.coreRankInSocket);
 114             nHwThreads = std::max(nHwThreads, l.hwThreadRankInCore);
 115         }
 116
 117         // Fill info form sockets/cores/hwthreads
 118         int socketId   = 0;
 119         int coreId     = 0;
 120         int hwThreadId = 0;
 121
 122         machine->sockets.resize(nSockets + 1);
 123         for (auto& s : machine->sockets)
 124         {
 125             s.id = socketId++;
 126             s.cores.resize(nCores + 1);
 127             for (auto& c : s.cores)
 128             {
 129                 c.id         = coreId++;
 130                 c.numaNodeId = -1; // No numa information
 131                 c.hwThreads.resize(nHwThreads + 1);
 132                 for (auto& t : c.hwThreads)
 133                 {
 134                     t.id                 = hwThreadId++;
 135                     t.logicalProcessorId = -1; // set as unassigned for now
 136                 }
 137             }
 138         }
 139
 140         // Fill the logical processor id in the right place
 141         for (std::size_t i = 0; i < machine->logicalProcessors.size(); i++)
 142         {
 143             const HardwareTopology::LogicalProcessor& l = machine->logicalProcessors[i];
 144             machine->sockets[l.socketRankInMachine]
 145                     .cores[l.coreRankInSocket]
 146                     .hwThreads[l.hwThreadRankInCore]
 147                     .logicalProcessorId = static_cast<int>(i);
 148         }
 149         machine->logicalProcessorCount = machine->logicalProcessors.size();
 150         *supportLevel                  = HardwareTopology::SupportLevel::Basic;
 151     }
 152     else
 153     {
 154         *supportLevel = HardwareTopology::SupportLevel::None;
 155     }
 156 }
 157
 158 #if GMX_USE_HWLOC
 159
 160 #    if HWLOC_API_VERSION < 0x00010b00
 161 #        define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET
 162 #        define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
 163 #    endif
 164
 165 // Preprocessor variable for if hwloc api is version 1.x.x or 2.x.x
 166 #    if HWLOC_API_VERSION >= 0x00020000
 167 #        define GMX_HWLOC_API_VERSION_IS_2XX 1
 168 #        if GMX_HWLOC_API_VERSION < 0x00020000
 169 #            error "HWLOC library major version set during configuration is 1, but currently using version 2 headers"
 170 #        endif
 171 #    else
 172 #        define GMX_HWLOC_API_VERSION_IS_2XX 0
 173 #        if GMX_HWLOC_API_VERSION >= 0x00020000
 174 #            error "HWLOC library major version set during configuration is 2, but currently using version 1 headers"
 175 #        endif
 176 #    endif
 177
 178 /*****************************************************************************
 179  *                                                                           *
 180  *   Utility functions for extracting hardware topology from hwloc library   *
 181  *                                                                           *
 182  *****************************************************************************/
 183
 184 // Compatibility function for accessing hwloc_obj_t object memory with different API versions of hwloc
 185 std::size_t getHwLocObjectMemory(const hwloc_obj* obj)
 186 {
 187 #    if GMX_HWLOC_API_VERSION_IS_2XX
 188     return obj->total_memory;
 189 #    else
 190     return obj->memory.total_memory;
 191 #    endif
 192 }
 193
 194 /*! \brief Return vector of all descendants of a given type in hwloc topology
 195  *
 196  *  \param topo  hwloc topology handle that has been initialized and loaded
 197  *  \param obj   Non-null hwloc object.
 198  *  \param type  hwloc object type to find. The routine will only search
 199  *               on levels below obj.
 200  *
 201  *  \return vector containing all the objects of given type that are
 202  *          descendants of the provided object. If no objects of this type
 203  *          were found, the vector will be empty.
 204  */
 205 std::vector<const hwloc_obj*> getHwLocDescendantsByType(const hwloc_topology*  topo,
 206                                                         const hwloc_obj*       obj,
 207                                                         const hwloc_obj_type_t type)
 208 {
 209     GMX_RELEASE_ASSERT(obj, "NULL hwloc object provided to getHwLocDescendantsByType()");
 210
 211     std::vector<const hwloc_obj*> v;
 212
 213     if (obj->type == type)
 214     {
 215         v.push_back(obj);
 216     }
 217     // Go through children; if this object has no children obj->arity is 0,
 218     // and we'll return an empty vector.
 219     hwloc_obj_t tempNode = nullptr;
 220     while ((tempNode = hwloc_get_next_child(const_cast<hwloc_topology_t>(topo),
 221                                             const_cast<hwloc_obj_t>(obj), tempNode))
 222            != nullptr)
 223     {
 224         std::vector<const hwloc_obj*> v2 = getHwLocDescendantsByType(topo, tempNode, type);
 225         v.insert(v.end(), v2.begin(), v2.end());
 226     }
 227     return v;
 228 }
 229
 230 /*! \brief Read information about sockets, cores and threads from hwloc topology
 231  *
 232  *  \param topo    hwloc topology handle that has been initialized and loaded
 233  *  \param machine Pointer to the machine structure in the HardwareTopology
 234  *                 class, where the tree of sockets/cores/threads will be written.
 235  *
 236  *  \return If all the data is found
 237  */
 238 bool parseHwLocSocketsCoresThreads(hwloc_topology_t topo, HardwareTopology::Machine* machine)
 239 {
 240     const hwloc_obj*              root         = hwloc_get_root_obj(topo);
 241     std::vector<const hwloc_obj*> hwlocSockets = getHwLocDescendantsByType(topo, root, HWLOC_OBJ_PACKAGE);
 242
 243     machine->logicalProcessorCount = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU);
 244     machine->logicalProcessors.resize(machine->logicalProcessorCount);
 245     machine->sockets.resize(hwlocSockets.size());
 246
 247     bool topologyOk = !hwlocSockets.empty(); // Fail if we have no sockets in machine
 248
 249     for (std::size_t i = 0; i < hwlocSockets.size() && topologyOk; i++)
 250     {
 251         // Assign information about this socket
 252         machine->sockets[i].id = hwlocSockets[i]->logical_index;
 253
 254         // Get children (cores)
 255         std::vector<const hwloc_obj*> hwlocCores =
 256                 getHwLocDescendantsByType(topo, hwlocSockets[i], HWLOC_OBJ_CORE);
 257         machine->sockets[i].cores.resize(hwlocCores.size());
 258
 259         topologyOk = topologyOk && !hwlocCores.empty(); // Fail if we have no cores in socket
 260
 261         // Loop over child cores
 262         for (std::size_t j = 0; j < hwlocCores.size() && topologyOk; j++)
 263         {
 264             // Assign information about this core
 265             machine->sockets[i].cores[j].id         = hwlocCores[j]->logical_index;
 266             machine->sockets[i].cores[j].numaNodeId = -1;
 267
 268             // Get children (hwthreads)
 269             std::vector<const hwloc_obj*> hwlocPUs =
 270                     getHwLocDescendantsByType(topo, hwlocCores[j], HWLOC_OBJ_PU);
 271             machine->sockets[i].cores[j].hwThreads.resize(hwlocPUs.size());
 272
 273             topologyOk = topologyOk && !hwlocPUs.empty(); // Fail if we have no hwthreads in core
 274
 275             // Loop over child hwthreads
 276             for (std::size_t k = 0; k < hwlocPUs.size() && topologyOk; k++)
 277             {
 278                 // Assign information about this hwthread
 279                 std::size_t logicalProcessorId               = hwlocPUs[k]->os_index;
 280                 machine->sockets[i].cores[j].hwThreads[k].id = hwlocPUs[k]->logical_index;
 281                 machine->sockets[i].cores[j].hwThreads[k].logicalProcessorId = logicalProcessorId;
 282
 283                 if (logicalProcessorId < machine->logicalProcessors.size())
 284                 {
 285                     // Cross-assign data for this hwthread to the logicalprocess vector
 286                     machine->logicalProcessors[logicalProcessorId].socketRankInMachine =
 287                             static_cast<int>(i);
 288                     machine->logicalProcessors[logicalProcessorId].coreRankInSocket =
 289                             static_cast<int>(j);
 290                     machine->logicalProcessors[logicalProcessorId].hwThreadRankInCore =
 291                             static_cast<int>(k);
 292                     machine->logicalProcessors[logicalProcessorId].numaNodeId = -1;
 293                 }
 294                 else
 295                 {
 296                     topologyOk = false;
 297                 }
 298             }
 299         }
 300     }
 301
 302     if (!topologyOk)
 303     {
 304         machine->logicalProcessors.clear();
 305         machine->sockets.clear();
 306     }
 307     return topologyOk;
 308 }
 309
 310 /*! \brief Read cache information from hwloc topology
 311  *
 312  *  \param topo    hwloc topology handle that has been initialized and loaded
 313  *  \param machine Pointer to the machine structure in the HardwareTopology
 314  *                 class, where cache data will be filled.
 315  *
 316  *  \return If any cache data is found
 317  */
 318 bool parseHwLocCache(hwloc_topology_t topo, HardwareTopology::Machine* machine)
 319 {
 320     // Parse caches up to L5
 321     for (int cachelevel : { 1, 2, 3, 4, 5 })
 322     {
 323         int depth = hwloc_get_cache_type_depth(topo, cachelevel, HWLOC_OBJ_CACHE_DATA);
 324
 325         if (depth >= 0)
 326         {
 327             hwloc_obj_t cache = hwloc_get_next_obj_by_depth(topo, depth, nullptr);
 328             if (cache != nullptr)
 329             {
 330                 std::vector<const hwloc_obj*> hwThreads =
 331                         getHwLocDescendantsByType(topo, cache, HWLOC_OBJ_PU);
 332
 333                 machine->caches.push_back({ static_cast<int>(cache->attr->cache.depth),
 334                                             static_cast<std::size_t>(cache->attr->cache.size),
 335                                             static_cast<int>(cache->attr->cache.linesize),
 336                                             static_cast<int>(cache->attr->cache.associativity),
 337                                             std::max<int>(hwThreads.size(), 1) });
 338             }
 339         }
 340     }
 341     return !machine->caches.empty();
 342 }
 343
 344
 345 /*! \brief Read numa information from hwloc topology
 346  *
 347  *  \param topo    hwloc topology handle that has been initialized and loaded
 348  *  \param machine Pointer to the machine structure in the HardwareTopology
 349  *                 class, where numa information will be filled.
 350  *
 351  *  Hwloc should virtually always be able to detect numa information, but if
 352  *  there is only a single numa node in the system it is not reported at all.
 353  *  In this case we create a single numa node covering all cores.
 354  *
 355  *  This function uses the basic socket/core/thread information detected by
 356  *  parseHwLocSocketsCoresThreads(), which means that routine must have
 357  *  completed successfully before calling this one. If this is not the case,
 358  *  you will get an error return code.
 359  *
 360  *  \return If the data found makes sense (either in the numa node or the
 361  *          entire machine)
 362  */
 363 bool parseHwLocNuma(hwloc_topology_t topo, HardwareTopology::Machine* machine)
 364 {
 365     const hwloc_obj*              root = hwloc_get_root_obj(topo);
 366     std::vector<const hwloc_obj*> hwlocNumaNodes =
 367             getHwLocDescendantsByType(topo, root, HWLOC_OBJ_NUMANODE);
 368     bool topologyOk = true;
 369
 370     if (!hwlocNumaNodes.empty())
 371     {
 372         machine->numa.nodes.resize(hwlocNumaNodes.size());
 373
 374         for (std::size_t i = 0; i < hwlocNumaNodes.size(); i++)
 375         {
 376             machine->numa.nodes[i].id     = hwlocNumaNodes[i]->logical_index;
 377             machine->numa.nodes[i].memory = getHwLocObjectMemory(hwlocNumaNodes[i]);
 378
 379             machine->numa.nodes[i].logicalProcessorId.clear();
 380
 381             // Get list of PUs in this numa node. Get from numa node if v1.x.x, get from numa node's parent if 2.x.x
 382 #    if GMX_HWLOC_API_VERSION_IS_2XX
 383             std::vector<const hwloc_obj*> hwlocPUs =
 384                     getHwLocDescendantsByType(topo, hwlocNumaNodes[i]->parent, HWLOC_OBJ_PU);
 385 #    else
 386             std::vector<const hwloc_obj*> hwlocPUs =
 387                     getHwLocDescendantsByType(topo, hwlocNumaNodes[i], HWLOC_OBJ_PU);
 388 #    endif
 389             for (auto& p : hwlocPUs)
 390             {
 391                 machine->numa.nodes[i].logicalProcessorId.push_back(p->os_index);
 392
 393                 GMX_RELEASE_ASSERT(p->os_index < machine->logicalProcessors.size(),
 394                                    "OS index of PU in hwloc larger than processor count");
 395
 396                 machine->logicalProcessors[p->os_index].numaNodeId = static_cast<int>(i);
 397                 std::size_t s = machine->logicalProcessors[p->os_index].socketRankInMachine;
 398                 std::size_t c = machine->logicalProcessors[p->os_index].coreRankInSocket;
 399
 400                 GMX_RELEASE_ASSERT(s < machine->sockets.size(),
 401                                    "Socket index in logicalProcessors larger than socket count");
 402                 GMX_RELEASE_ASSERT(c < machine->sockets[s].cores.size(),
 403                                    "Core index in logicalProcessors larger than core count");
 404                 // Set numaNodeId in core too
 405                 machine->sockets[s].cores[c].numaNodeId = i;
 406             }
 407         }
 408         // Getting the distance matrix
 409 #    if GMX_HWLOC_API_VERSION_IS_2XX
 410         // with hwloc api v. 2.x.x, distances are no longer directly accessible. Need to retrieve and release hwloc_distances_s object
 411         // In addition, there can now be multiple types of distances, ie latency, bandwidth. We look only for latency, but have to check
 412         // if multiple distance matrices are returned.
 413
 414         // If only 1 numa node exists, the v2.x.x hwloc api won't have a distances matrix, set manually
 415         if (hwlocNumaNodes.size() == 1)
 416         {
 417             machine->numa.relativeLatency = { { 1.0 } };
 418         }
 419         else
 420         {
 421             hwloc_distances_s* dist;
 422             // Set the number of distance matrices to return (1 in our case, but hwloc 2.x.x allows
 423             // for multiple distances types and therefore multiple distance matrices)
 424             unsigned nr = 1;
 425             hwloc_distances_get(topo, &nr, &dist, HWLOC_DISTANCES_KIND_MEANS_LATENCY, 0);
 426             // If no distances were found, nr will be 0, otherwise distances will be populated with
 427             // 1 hwloc_distances_s object
 428             if (nr > 0 && dist->nbobjs == hwlocNumaNodes.size())
 429             {
 430
 431                 machine->numa.relativeLatency.resize(dist->nbobjs);
 432                 for (std::size_t i = 0; i < dist->nbobjs; i++)
 433                 {
 434                     machine->numa.relativeLatency[i].resize(dist->nbobjs);
 435                     for (std::size_t j = 0; j < dist->nbobjs; j++)
 436                     {
 437                         machine->numa.relativeLatency[i][j] = dist->values[i * dist->nbobjs + j];
 438                     }
 439                 }
 440             }
 441             else
 442             {
 443                 topologyOk = false;
 444             }
 445             hwloc_distances_release(topo, dist);
 446         }
 447
 448         // hwloc-2.x provides latencies as integers, but to make things more similar to the case of
 449         // a single numa node as well as hwloc-1.x, we rescale to relative floating-point values and
 450         // also set the largest relative latency value.
 451
 452         // find smallest value in matrix
 453         float minLatency = std::numeric_limits<float>::max(); // large number
 454         float maxLatency = std::numeric_limits<float>::min(); // 0.0
 455         for (const auto& v : machine->numa.relativeLatency)
 456         {
 457             auto result = std::minmax_element(v.begin(), v.end());
 458             minLatency  = std::min(minLatency, *result.first);
 459             maxLatency  = std::max(maxLatency, *result.second);
 460         }
 461
 462         // assign stuff
 463         for (auto& v : machine->numa.relativeLatency)
 464         {
 465             std::transform(v.begin(), v.end(), v.begin(),
 466                            std::bind(std::multiplies<float>(), std::placeholders::_1, 1.0 / minLatency));
 467         }
 468         machine->numa.baseLatency = 1.0; // latencies still do not have any units in hwloc-2.x
 469         machine->numa.maxRelativeLatency = maxLatency / minLatency;
 470
 471 #    else  // GMX_HWLOC_API_VERSION_IS_2XX == false, hwloc api is 1.x.x
 472         int                             depth = hwloc_get_type_depth(topo, HWLOC_OBJ_NUMANODE);
 473         const struct hwloc_distances_s* dist = hwloc_get_whole_distance_matrix_by_depth(topo, depth);
 474         if (dist != nullptr && dist->nbobjs == hwlocNumaNodes.size())
 475         {
 476             machine->numa.baseLatency        = dist->latency_base;
 477             machine->numa.maxRelativeLatency = dist->latency_max;
 478             machine->numa.relativeLatency.resize(dist->nbobjs);
 479             for (std::size_t i = 0; i < dist->nbobjs; i++)
 480             {
 481                 machine->numa.relativeLatency[i].resize(dist->nbobjs);
 482                 for (std::size_t j = 0; j < dist->nbobjs; j++)
 483                 {
 484                     machine->numa.relativeLatency[i][j] = dist->latency[i * dist->nbobjs + j];
 485                 }
 486             }
 487         }
 488         else
 489         {
 490             topologyOk = false;
 491         }
 492 #    endif // end GMX_HWLOC_API_VERSION_IS_2XX == false
 493     }
 494     else
 495     // Deals with the case of no numa nodes found.
 496 #    if GMX_HWLOC_API_VERSION_IS_2XX
 497     // If the hwloc version is 2.x.x, and there is no numa node, something went wrong
 498     {
 499         topologyOk = false;
 500     }
 501 #    else
 502     {
 503         // No numa nodes found. Use the entire machine as a numa node.
 504         // Note that this should only be the case with hwloc api v 1.x.x,
 505         // a numa node is assigned to the machine by default in v 2.x.x
 506         const hwloc_obj* const hwlocMachine = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_MACHINE, nullptr);
 507
 508         if (hwlocMachine != nullptr)
 509         {
 510             machine->numa.nodes.resize(1);
 511             machine->numa.nodes[0].id        = 0;
 512             machine->numa.nodes[0].memory    = hwlocMachine->memory.total_memory;
 513             machine->numa.baseLatency        = 10;
 514             machine->numa.maxRelativeLatency = 1;
 515             machine->numa.relativeLatency    = { { 1.0 } };
 516
 517             for (int i = 0; i < machine->logicalProcessorCount; i++)
 518             {
 519                 machine->numa.nodes[0].logicalProcessorId.push_back(i);
 520             }
 521             for (auto& l : machine->logicalProcessors)
 522             {
 523                 l.numaNodeId = 0;
 524             }
 525             for (auto& s : machine->sockets)
 526             {
 527                 for (auto& c : s.cores)
 528                 {
 529                     c.numaNodeId = 0;
 530                 }
 531             }
 532         }
 533         else
 534         {
 535             topologyOk = false;
 536         }
 537     }
 538 #    endif // end if not GMX_HWLOC_API_VERSION_IS_2XX
 539     if (!topologyOk)
 540     {
 541         machine->numa.nodes.clear();
 542     }
 543     return topologyOk;
 544 }
 545
 546 /*! \brief Read PCI device information from hwloc topology
 547  *
 548  *  \param topo    hwloc topology handle that has been initialized and loaded
 549  *  \param machine Pointer to the machine structure in the HardwareTopology
 550  *                 class, where PCI device information will be filled.
 551  * *
 552  *  \return If any devices were found
 553  */
 554 bool parseHwLocDevices(hwloc_topology_t topo, HardwareTopology::Machine* machine)
 555 {
 556     const hwloc_obj*              root = hwloc_get_root_obj(topo);
 557     std::vector<const hwloc_obj*> pcidevs = getHwLocDescendantsByType(topo, root, HWLOC_OBJ_PCI_DEVICE);
 558
 559     for (auto& p : pcidevs)
 560     {
 561 #    if GMX_HWLOC_API_VERSION_IS_2XX
 562         const hwloc_obj* ancestor = nullptr;
 563         // Numa nodes not directly part of tree. Walk up the tree until we find an ancestor with a numa node
 564         hwloc_obj_t parent = p->parent;
 565         while (parent && !parent->memory_arity)
 566         {
 567             parent = parent->parent;
 568         }
 569         if (parent)
 570         {
 571             ancestor = parent->memory_first_child;
 572         }
 573 #    else  // GMX_HWLOC_API_VERSION_IS_2XX = false, api v 1.x.x
 574         // numa nodes are normal part of tree, can use hwloc ancestor function
 575         const hwloc_obj* const ancestor =
 576                 hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_NUMANODE, const_cast<hwloc_obj_t>(p));
 577 #    endif // end if GMX_HWLOC_API_VERSION_IS_2XX
 578         int numaId;
 579         if (ancestor != nullptr)
 580         {
 581             numaId = ancestor->logical_index;
 582         }
 583         else
 584         {
 585             // If we only have a single numa node we belong to it, otherwise set it to -1 (unknown)
 586             numaId = (machine->numa.nodes.size() == 1) ? 0 : -1;
 587         }
 588
 589         GMX_RELEASE_ASSERT(p->attr, "Attributes should not be NULL for hwloc PCI object");
 590
 591         machine->devices.push_back({ p->attr->pcidev.vendor_id, p->attr->pcidev.device_id,
 592                                      p->attr->pcidev.class_id, p->attr->pcidev.domain,
 593                                      p->attr->pcidev.bus, p->attr->pcidev.dev, p->attr->pcidev.func,
 594                                      numaId });
 595     }
 596     return !pcidevs.empty();
 597 }
 598
 599 void parseHwLoc(HardwareTopology::Machine* machine, HardwareTopology::SupportLevel* supportLevel, bool* isThisSystem)
 600 {
 601     hwloc_topology_t topo;
 602
 603     // Initialize a hwloc object, set flags to request IO device information too,
 604     // try to load the topology, and get the root object. If either step fails,
 605     // return that we do not have any support at all from hwloc.
 606     if (hwloc_topology_init(&topo) != 0)
 607     {
 608         hwloc_topology_destroy(topo);
 609         return; // SupportLevel::None.
 610     }
 611
 612     // Flags to look for io devices
 613 #    if GMX_HWLOC_API_VERSION_IS_2XX
 614     GMX_RELEASE_ASSERT(
 615             (hwloc_get_api_version() >= 0x20000),
 616             "Mismatch between hwloc headers and library, using v2 headers with v1 library");
 617     hwloc_topology_set_io_types_filter(topo, HWLOC_TYPE_FILTER_KEEP_IMPORTANT);
 618 #    else
 619     GMX_RELEASE_ASSERT(
 620             (hwloc_get_api_version() < 0x20000),
 621             "Mismatch between hwloc headers and library, using v1 headers with v2 library");
 622     hwloc_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IO_DEVICES);
 623 #    endif
 624
 625     if (hwloc_topology_load(topo) != 0 || hwloc_get_root_obj(topo) == nullptr)
 626     {
 627         hwloc_topology_destroy(topo);
 628         return; // SupportLevel::None.
 629     }
 630
 631     // If we get here, we can get a valid root object for the topology
 632     *isThisSystem = hwloc_topology_is_thissystem(topo) != 0;
 633
 634     // Parse basic information about sockets, cores, and hardware threads
 635     if (parseHwLocSocketsCoresThreads(topo, machine))
 636     {
 637         *supportLevel = HardwareTopology::SupportLevel::Basic;
 638     }
 639     else
 640     {
 641         hwloc_topology_destroy(topo);
 642         return; // SupportLevel::None.
 643     }
 644
 645     // Get information about cache and numa nodes
 646     if (parseHwLocCache(topo, machine) && parseHwLocNuma(topo, machine))
 647     {
 648         *supportLevel = HardwareTopology::SupportLevel::Full;
 649     }
 650     else
 651     {
 652         hwloc_topology_destroy(topo);
 653         return; // SupportLevel::Basic.
 654     }
 655
 656     // PCI devices
 657     if (parseHwLocDevices(topo, machine))
 658     {
 659         *supportLevel = HardwareTopology::SupportLevel::FullWithDevices;
 660     }
 661
 662     hwloc_topology_destroy(topo);
 663     // SupportLevel::Full or SupportLevel::FullWithDevices.
 664 }
 665
 666 #endif
 667
 668 /*! \brief Try to detect the number of logical processors.
 669  *
 670  *  \return The number of hardware processing units, or 0 if it fails.
 671  */
 672 int detectLogicalProcessorCount()
 673 {
 674     int count = 0;
 675
 676     {
 677 #if GMX_NATIVE_WINDOWS
 678         // Windows
 679         SYSTEM_INFO sysinfo;
 680         GetSystemInfo(&sysinfo);
 681         count = sysinfo.dwNumberOfProcessors;
 682 #elif defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
 683         // We are probably on Unix. Check if we have the argument to use before executing any calls
 684         count = sysconf(_SC_NPROCESSORS_ONLN);
 685 #else
 686         count = 0; // Neither windows nor Unix.
 687 #endif
 688     }
 689
 690     return count;
 691 }
 692
 693 } // namespace
 694
 695 // static
 696 HardwareTopology HardwareTopology::detect()
 697 {
 698     HardwareTopology result;
 699
 700 #if GMX_USE_HWLOC
 701     parseHwLoc(&result.machine_, &result.supportLevel_, &result.isThisSystem_);
 702 #endif
 703
 704     // If something went wrong in hwloc (or if it was not present) we might
 705     // have more information in cpuInfo
 706     if (result.supportLevel_ < SupportLevel::Basic)
 707     {
 708         // There might be topology information in cpuInfo
 709         parseCpuInfo(&result.machine_, &result.supportLevel_);
 710     }
 711     // If we did not manage to get anything from either hwloc or cpuInfo, find the cpu count at least
 712     if (result.supportLevel_ == SupportLevel::None)
 713     {
 714         // No topology information; try to detect the number of logical processors at least
 715         result.machine_.logicalProcessorCount = detectLogicalProcessorCount();
 716         if (result.machine_.logicalProcessorCount > 0)
 717         {
 718             result.supportLevel_ = SupportLevel::LogicalProcessorCount;
 719         }
 720     }
 721     return result;
 722 }
 723
 724 HardwareTopology::Machine::Machine()
 725 {
 726     logicalProcessorCount   = 0;
 727     numa.baseLatency        = 0.0;
 728     numa.maxRelativeLatency = 0.0;
 729 }
 730
 731
 732 HardwareTopology::HardwareTopology() :
 733     supportLevel_(SupportLevel::None),
 734     machine_(),
 735     isThisSystem_(true)
 736 {
 737 }
 738
 739 HardwareTopology::HardwareTopology(int logicalProcessorCount) :
 740     supportLevel_(SupportLevel::None),
 741     machine_(),
 742     isThisSystem_(true)
 743 {
 744     if (logicalProcessorCount > 0)
 745     {
 746         machine_.logicalProcessorCount = logicalProcessorCount;
 747         supportLevel_                  = SupportLevel::LogicalProcessorCount;
 748     }
 749 }
 750
 751 int HardwareTopology::numberOfCores() const
 752 {
 753     if (supportLevel() >= SupportLevel::Basic)
 754     {
 755         // We assume all sockets have the same number of cores as socket 0.
 756         // Since topology information is present, we can assume there is at least one socket.
 757         return machine().sockets.size() * machine().sockets[0].cores.size();
 758     }
 759     else if (supportLevel() >= SupportLevel::LogicalProcessorCount)
 760     {
 761         return machine().logicalProcessorCount;
 762     }
 763     else
 764     {
 765         return 0;
 766     }
 767 }
 768
 769 } // namespace gmx