src/gromacs/hardware/hardwaretopology.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \internal \file
  37  * \brief
  38  * Implements gmx::HardwareTopology.
  39  *
  40  * \author Erik Lindahl <erik.lindahl@gmail.com>
  41  * \ingroup module_hardware
  42  */
  43
  44 #include "gmxpre.h"
  45
  46 #include "hardwaretopology.h"
  47
  48 #include "config.h"
  49
  50 #include <cstdio>
  51
  52 #include <algorithm>
  53 #include <vector>
  54
  55 #if GMX_HWLOC
  56 #    include <hwloc.h>
  57 #endif
  58
  59 #include "gromacs/gmxlib/md_logging.h"
  60 #include "gromacs/hardware/cpuinfo.h"
  61 #include "gromacs/utility/gmxassert.h"
  62 #include "gromacs/utility/gmxomp.h"
  63
  64 #ifdef HAVE_UNISTD_H
  65 #    include <unistd.h>       // sysconf()
  66 #endif
  67 #if GMX_NATIVE_WINDOWS
  68 #    include <windows.h>      // GetSystemInfo()
  69 #endif
  70
  71 namespace gmx
  72 {
  73
  74 namespace
  75 {
  76
  77 /*****************************************************************************
  78  *                                                                           *
  79  *   Utility functions for extracting hardware topology from CpuInfo object  *
  80  *                                                                           *
  81  *****************************************************************************/
  82
  83 /*! \brief Initialize machine data from basic information in cpuinfo
  84  *
  85  *  \param  machine      Machine tree structure where information will be assigned
  86  *                       if the cpuinfo object contains topology information.
  87  *  \param  supportLevel If topology information is available in CpuInfo,
  88  *                       this will be updated to reflect the amount of
  89  *                       information written to the machine structure.
  90  */
  91 void
  92 parseCpuInfo(HardwareTopology::Machine *        machine,
  93              HardwareTopology::SupportLevel *   supportLevel)
  94 {
  95     CpuInfo cpuInfo(CpuInfo::detect());
  96
  97     if (!cpuInfo.logicalProcessors().empty())
  98     {
  99         int nSockets   = 0;
 100         int nCores     = 0;
 101         int nHwThreads = 0;
 102
 103         // Copy the logical processor information from cpuinfo
 104         for (auto &l : cpuInfo.logicalProcessors())
 105         {
 106             machine->logicalProcessors.push_back( { l.socketRankInMachine, l.coreRankInSocket, l.hwThreadRankInCore, -1 } );
 107             nSockets   = std::max(nSockets, l.socketRankInMachine);
 108             nCores     = std::max(nCores, l.coreRankInSocket);
 109             nHwThreads = std::max(nHwThreads, l.hwThreadRankInCore);
 110         }
 111
 112         // Fill info form sockets/cores/hwthreads
 113         int socketId   = 0;
 114         int coreId     = 0;
 115         int hwThreadId = 0;
 116
 117         machine->sockets.resize(nSockets + 1);
 118         for (auto &s : machine->sockets)
 119         {
 120             s.id = socketId++;
 121             s.cores.resize(nCores + 1);
 122             for (auto &c : s.cores)
 123             {
 124                 c.id         = coreId++;
 125                 c.numaNodeId = -1; // No numa information
 126                 c.hwThreads.resize(nHwThreads + 1);
 127                 for (auto &t : c.hwThreads)
 128                 {
 129                     t.id                 = hwThreadId++;
 130                     t.logicalProcessorId = -1; // set as unassigned for now
 131                 }
 132             }
 133         }
 134
 135         // Fill the logical processor id in the right place
 136         for (std::size_t i = 0; i < machine->logicalProcessors.size(); i++)
 137         {
 138             const HardwareTopology::LogicalProcessor &l = machine->logicalProcessors[i];
 139             machine->sockets[l.socketRankInMachine].cores[l.coreRankInSocket].hwThreads[l.hwThreadRankInCore].logicalProcessorId = static_cast<int>(i);
 140         }
 141         machine->logicalProcessorCount = machine->logicalProcessors.size();
 142         *supportLevel                  = HardwareTopology::SupportLevel::Basic;
 143     }
 144     else
 145     {
 146         *supportLevel = HardwareTopology::SupportLevel::None;
 147     }
 148 }
 149
 150 #if GMX_HWLOC
 151
 152 #if HWLOC_API_VERSION < 0x00010b00
 153 #    define HWLOC_OBJ_PACKAGE  HWLOC_OBJ_SOCKET
 154 #    define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
 155 #endif
 156
 157 /*****************************************************************************
 158  *                                                                           *
 159  *   Utility functions for extracting hardware topology from hwloc library   *
 160  *                                                                           *
 161  *****************************************************************************/
 162
 163 /*! \brief Return vector of all descendants of a given type in hwloc topology
 164  *
 165  *  \param obj   Non-null hwloc object.
 166  *  \param type  hwloc object type to find. The routine will only search
 167  *               on levels below obj.
 168  *
 169  *  \return vector containing all the objects of given type that are
 170  *          descendants of the provided object. If no objects of this type
 171  *          were found, the vector will be empty.
 172  */
 173 const std::vector<hwloc_obj_t>
 174 getHwLocDescendantsByType(const hwloc_obj_t obj, const hwloc_obj_type_t type)
 175 {
 176     GMX_RELEASE_ASSERT(obj, "NULL hwloc object provided to getHwLocDescendantsByType()");
 177
 178     std::vector<hwloc_obj_t> v;
 179
 180     // Go through children; if this object has no children obj->arity is 0,
 181     // and we'll return an empty vector.
 182     for (std::size_t i = 0; i < obj->arity; i++)
 183     {
 184         // If the child is the type we're looking for, add it directly.
 185         // Otherwise call this routine recursively for each child.
 186         if (obj->children[i]->type == type)
 187         {
 188             v.push_back(obj->children[i]);
 189         }
 190         else
 191         {
 192             std::vector<hwloc_obj_t> v2 = getHwLocDescendantsByType(obj->children[i], type);
 193             v.insert(v.end(), v2.begin(), v2.end());
 194         }
 195     }
 196     return v;
 197 }
 198
 199 /*! \brief Read information about sockets, cores and threads from hwloc topology
 200  *
 201  *  \param topo    hwloc topology handle that has been initialized and loaded
 202  *  \param machine Pointer to the machine structure in the HardwareTopology
 203  *                 class, where the tree of sockets/cores/threads will be written.
 204  *
 205  *  \return If all the data is found the return value is 0, otherwise non-zero.
 206  */
 207 int
 208 parseHwLocSocketsCoresThreads(const hwloc_topology_t             topo,
 209                               HardwareTopology::Machine *        machine)
 210 {
 211     const hwloc_obj_t              root         = hwloc_get_root_obj(topo);
 212     std::vector<hwloc_obj_t>       hwlocSockets = getHwLocDescendantsByType(root, HWLOC_OBJ_PACKAGE);
 213
 214     machine->logicalProcessorCount = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU);
 215     machine->logicalProcessors.resize(machine->logicalProcessorCount);
 216     machine->sockets.resize(hwlocSockets.size());
 217
 218     bool topologyOk = !hwlocSockets.empty(); // Fail if we have no sockets in machine
 219
 220     for (std::size_t i = 0; i < hwlocSockets.size() && topologyOk; i++)
 221     {
 222         // Assign information about this socket
 223         machine->sockets[i].id = hwlocSockets[i]->logical_index;
 224
 225         // Get children (cores)
 226         std::vector<hwloc_obj_t> hwlocCores = getHwLocDescendantsByType(hwlocSockets[i], HWLOC_OBJ_CORE);
 227         machine->sockets[i].cores.resize(hwlocCores.size());
 228
 229         topologyOk = topologyOk && !hwlocCores.empty(); // Fail if we have no cores in socket
 230
 231         // Loop over child cores
 232         for (std::size_t j = 0; j < hwlocCores.size() && topologyOk; j++)
 233         {
 234             // Assign information about this core
 235             machine->sockets[i].cores[j].id         = hwlocCores[j]->logical_index;
 236             machine->sockets[i].cores[j].numaNodeId = -1;
 237
 238             // Get children (hwthreads)
 239             std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(hwlocCores[j], HWLOC_OBJ_PU);
 240             machine->sockets[i].cores[j].hwThreads.resize(hwlocPUs.size());
 241
 242             topologyOk = topologyOk && !hwlocPUs.empty(); // Fail if we have no hwthreads in core
 243
 244             // Loop over child hwthreads
 245             for (std::size_t k = 0; k < hwlocPUs.size() && topologyOk; k++)
 246             {
 247                 // Assign information about this hwthread
 248                 std::size_t logicalProcessorId                               = hwlocPUs[k]->os_index;
 249                 machine->sockets[i].cores[j].hwThreads[k].id                 = hwlocPUs[k]->logical_index;
 250                 machine->sockets[i].cores[j].hwThreads[k].logicalProcessorId = logicalProcessorId;
 251
 252                 if (logicalProcessorId < machine->logicalProcessors.size())
 253                 {
 254                     // Cross-assign data for this hwthread to the logicalprocess vector
 255                     machine->logicalProcessors[logicalProcessorId].socketRankInMachine = static_cast<int>(i);
 256                     machine->logicalProcessors[logicalProcessorId].coreRankInSocket    = static_cast<int>(j);
 257                     machine->logicalProcessors[logicalProcessorId].hwThreadRankInCore  = static_cast<int>(k);
 258                     machine->logicalProcessors[logicalProcessorId].numaNodeId          = -1;
 259                 }
 260                 else
 261                 {
 262                     topologyOk = false;
 263                 }
 264             }
 265         }
 266     }
 267
 268     if (topologyOk)
 269     {
 270         return 0;
 271     }
 272     else
 273     {
 274         machine->logicalProcessors.clear();
 275         machine->sockets.clear();
 276         return -1;
 277     }
 278 }
 279
 280 /*! \brief Read cache information from hwloc topology
 281  *
 282  *  \param topo    hwloc topology handle that has been initialized and loaded
 283  *  \param machine Pointer to the machine structure in the HardwareTopology
 284  *                 class, where cache data will be filled.
 285  *
 286  *  \return If any cache data is found the return value is 0, otherwise non-zero.
 287  */
 288 int
 289 parseHwLocCache(const hwloc_topology_t             topo,
 290                 HardwareTopology::Machine *        machine)
 291 {
 292     // Parse caches up to L5
 293     for (int cachelevel : { 1, 2, 3, 4, 5})
 294     {
 295         int depth = hwloc_get_cache_type_depth(topo, cachelevel, HWLOC_OBJ_CACHE_DATA);
 296
 297         if (depth >= 0)
 298         {
 299             hwloc_obj_t cache = hwloc_get_next_obj_by_depth(topo, depth, NULL);
 300             if (cache != NULL)
 301             {
 302                 std::vector<hwloc_obj_t> hwThreads = getHwLocDescendantsByType(cache, HWLOC_OBJ_PU);
 303
 304                 machine->caches.push_back( {
 305                                                static_cast<int>(cache->attr->cache.depth),
 306                                                static_cast<std::size_t>(cache->attr->cache.size),
 307                                                static_cast<int>(cache->attr->cache.linesize),
 308                                                static_cast<int>(cache->attr->cache.associativity),
 309                                                std::max(static_cast<int>(hwThreads.size()), 1)
 310                                            } );
 311             }
 312         }
 313     }
 314     return machine->caches.empty();
 315 }
 316
 317
 318 /*! \brief Read numa information from hwloc topology
 319  *
 320  *  \param topo    hwloc topology handle that has been initialized and loaded
 321  *  \param machine Pointer to the machine structure in the HardwareTopology
 322  *                 class, where numa information will be filled.
 323  *
 324  *  Hwloc should virtually always be able to detect numa information, but if
 325  *  there is only a single numa node in the system it is not reported at all.
 326  *  In this case we create a single numa node covering all cores.
 327  *
 328  *  This function uses the basic socket/core/thread information detected by
 329  *  parseHwLocSocketsCoresThreads(), which means that routine must have
 330  *  completed successfully before calling this one. If this is not the case,
 331  *  you will get an error return code.
 332  *
 333  *  \return If the data found makes sense (either in the numa node or the
 334  *          entire machine) the return value is 0, otherwise non-zero.
 335  */
 336 int
 337 parseHwLocNuma(const hwloc_topology_t             topo,
 338                HardwareTopology::Machine *        machine)
 339 {
 340     const hwloc_obj_t        root           = hwloc_get_root_obj(topo);
 341     std::vector<hwloc_obj_t> hwlocNumaNodes = getHwLocDescendantsByType(root, HWLOC_OBJ_NUMANODE);
 342     bool                     topologyOk     = true;
 343
 344     if (!hwlocNumaNodes.empty())
 345     {
 346         machine->numa.nodes.resize(hwlocNumaNodes.size());
 347
 348         for (std::size_t i = 0; i < hwlocNumaNodes.size(); i++)
 349         {
 350             machine->numa.nodes[i].id     = hwlocNumaNodes[i]->logical_index;
 351             machine->numa.nodes[i].memory = hwlocNumaNodes[i]->memory.total_memory;
 352             machine->numa.nodes[i].logicalProcessorId.clear();
 353
 354             // Get list of PUs in this numa node
 355             std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(hwlocNumaNodes[i], HWLOC_OBJ_PU);
 356
 357             for (auto &p : hwlocPUs)
 358             {
 359                 machine->numa.nodes[i].logicalProcessorId.push_back(p->os_index);
 360
 361                 GMX_RELEASE_ASSERT(p->os_index < machine->logicalProcessors.size(), "OS index of PU in hwloc larger than processor count");
 362
 363                 machine->logicalProcessors[p->os_index].numaNodeId = static_cast<int>(i);
 364                 std::size_t s = machine->logicalProcessors[p->os_index].socketRankInMachine;
 365                 std::size_t c = machine->logicalProcessors[p->os_index].coreRankInSocket;
 366
 367                 GMX_RELEASE_ASSERT(s < machine->sockets.size(), "Socket index in logicalProcessors larger than socket count");
 368                 GMX_RELEASE_ASSERT(c < machine->sockets[s].cores.size(), "Core index in logicalProcessors larger than core count");
 369                 // Set numaNodeId in core too
 370                 machine->sockets[s].cores[c].numaNodeId = i;
 371             }
 372         }
 373
 374         int depth = hwloc_get_type_depth(topo, HWLOC_OBJ_NUMANODE);
 375         const struct hwloc_distances_s * dist = hwloc_get_whole_distance_matrix_by_depth(topo, depth);
 376         if (dist != NULL && dist->nbobjs == hwlocNumaNodes.size())
 377         {
 378             machine->numa.baseLatency        = dist->latency_base;
 379             machine->numa.maxRelativeLatency = dist->latency_max;
 380             machine->numa.relativeLatency.resize(dist->nbobjs);
 381             for (std::size_t i = 0; i < dist->nbobjs; i++)
 382             {
 383                 machine->numa.relativeLatency[i].resize(dist->nbobjs);
 384                 for (std::size_t j = 0; j < dist->nbobjs; j++)
 385                 {
 386                     machine->numa.relativeLatency[i][j] = dist->latency[i*dist->nbobjs+j];
 387                 }
 388             }
 389         }
 390         else
 391         {
 392             topologyOk = false;
 393         }
 394     }
 395     else
 396     {
 397         // No numa nodes found. Use the entire machine as a numa node.
 398         const hwloc_obj_t hwlocMachine = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_MACHINE, NULL);
 399
 400         if (hwlocMachine != NULL)
 401         {
 402             machine->numa.nodes.resize(1);
 403             machine->numa.nodes[0].id           = 0;
 404             machine->numa.nodes[0].memory       = hwlocMachine->memory.total_memory;
 405             machine->numa.baseLatency           = 10;
 406             machine->numa.maxRelativeLatency    = 1;
 407             machine->numa.relativeLatency       = { { 1.0 } };
 408
 409             for (int i = 0; i < machine->logicalProcessorCount; i++)
 410             {
 411                 machine->numa.nodes[0].logicalProcessorId.push_back(i);
 412             }
 413             for (auto &l : machine->logicalProcessors)
 414             {
 415                 l.numaNodeId = 0;
 416             }
 417             for (auto &s : machine->sockets)
 418             {
 419                 for (auto &c : s.cores)
 420                 {
 421                     c.numaNodeId = 0;
 422                 }
 423             }
 424         }
 425         else
 426         {
 427             topologyOk = false;
 428         }
 429     }
 430
 431     if (topologyOk)
 432     {
 433         return 0;
 434     }
 435     else
 436     {
 437         machine->numa.nodes.clear();
 438         return -1;
 439     }
 440
 441 }
 442
 443 /*! \brief Read PCI device information from hwloc topology
 444  *
 445  *  \param topo    hwloc topology handle that has been initialized and loaded
 446  *  \param machine Pointer to the machine structure in the HardwareTopology
 447  *                 class, where PCI device information will be filled.
 448  * *
 449  *  \return If any devices were found the return value is 0, otherwise non-zero.
 450  */
 451 int
 452 parseHwLocDevices(const hwloc_topology_t             topo,
 453                   HardwareTopology::Machine *        machine)
 454 {
 455     const hwloc_obj_t        root    = hwloc_get_root_obj(topo);
 456     std::vector<hwloc_obj_t> pcidevs = getHwLocDescendantsByType(root, HWLOC_OBJ_PCI_DEVICE);
 457
 458     for (auto &p : pcidevs)
 459     {
 460         const hwloc_obj_t ancestor = hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_NUMANODE, p);
 461         int               numaId;
 462         if (ancestor != NULL)
 463         {
 464             numaId = ancestor->logical_index;
 465         }
 466         else
 467         {
 468             // If we only have a single numa node we belong to it, otherwise set it to -1 (unknown)
 469             numaId = (machine->numa.nodes.size() == 1) ?  0 : -1;
 470         }
 471
 472         GMX_RELEASE_ASSERT(p->attr, "Attributes should not be NULL for hwloc PCI object");
 473
 474         machine->devices.push_back( {
 475                                         p->attr->pcidev.vendor_id,
 476                                         p->attr->pcidev.device_id,
 477                                         p->attr->pcidev.class_id,
 478                                         p->attr->pcidev.domain,
 479                                         p->attr->pcidev.bus,
 480                                         p->attr->pcidev.dev,
 481                                         p->attr->pcidev.func,
 482                                         numaId
 483                                     } );
 484     }
 485     return pcidevs.empty();
 486 }
 487
 488 void
 489 parseHwLoc(HardwareTopology::Machine *        machine,
 490            HardwareTopology::SupportLevel *   supportLevel,
 491            bool *                             isThisSystem)
 492 {
 493     hwloc_topology_t    topo;
 494
 495     // Initialize a hwloc object, set flags to request IO device information too,
 496     // try to load the topology, and get the root object. If either step fails,
 497     // return that we do not have any support at all from hwloc.
 498     if (hwloc_topology_init(&topo) != 0)
 499     {
 500         hwloc_topology_destroy(topo);
 501         return; // SupportLevel::None.
 502     }
 503
 504     hwloc_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IO_DEVICES);
 505
 506     if (hwloc_topology_load(topo) != 0 || hwloc_get_root_obj(topo) == NULL)
 507     {
 508         hwloc_topology_destroy(topo);
 509         return; // SupportLevel::None.
 510     }
 511
 512     // If we get here, we can get a valid root object for the topology
 513     *isThisSystem = hwloc_topology_is_thissystem(topo);
 514
 515     // Parse basic information about sockets, cores, and hardware threads
 516     if (parseHwLocSocketsCoresThreads(topo, machine) == 0)
 517     {
 518         *supportLevel = HardwareTopology::SupportLevel::Basic;
 519     }
 520     else
 521     {
 522         hwloc_topology_destroy(topo);
 523         return; // SupportLevel::None.
 524     }
 525
 526     // Get information about cache and numa nodes
 527     if (parseHwLocCache(topo, machine) == 0 && parseHwLocNuma(topo, machine) == 0)
 528     {
 529         *supportLevel = HardwareTopology::SupportLevel::Full;
 530     }
 531     else
 532     {
 533         hwloc_topology_destroy(topo);
 534         return; // SupportLevel::Basic.
 535     }
 536
 537     // PCI devices
 538     if (parseHwLocDevices(topo, machine) == 0)
 539     {
 540         *supportLevel = HardwareTopology::SupportLevel::FullWithDevices;
 541     }
 542
 543     hwloc_topology_destroy(topo);
 544     return; // SupportLevel::Full or SupportLevel::FullWithDevices.
 545 }
 546
 547 #endif
 548
 549 /*! \brief Try to detect the number of logical processors.
 550  *
 551  *  \return The number of hardware processing units, or 0 if it fails.
 552  */
 553 int
 554 detectLogicalProcessorCount(FILE *fplog, const t_commrec *cr)
 555 {
 556     int count = 0;
 557
 558     {
 559 #if GMX_NATIVE_WINDOWS
 560         // Windows
 561         SYSTEM_INFO sysinfo;
 562         GetSystemInfo( &sysinfo );
 563         count = sysinfo.dwNumberOfProcessors;
 564 #elif defined HAVE_SYSCONF
 565         // We are probably on Unix. Check if we have the argument to use before executing any calls
 566 #    if defined(_SC_NPROCESSORS_CONF)
 567         count = sysconf(_SC_NPROCESSORS_CONF);
 568 #        if defined(_SC_NPROCESSORS_ONLN)
 569         /* On e.g. Arm, the Linux kernel can use advanced power saving features where
 570          * processors are brought online/offline dynamically. This will cause
 571          * _SC_NPROCESSORS_ONLN to report 1 at the beginning of the run. For this
 572          * reason we now warn if this mismatches with the detected core count. */
 573         int countOnline = sysconf(_SC_NPROCESSORS_ONLN);
 574         if (count != countOnline)
 575         {
 576             md_print_warn(cr, fplog,
 577                           "%d CPUs configured, but only %d of them are online.\n"
 578                           "This can happen on embedded platforms (e.g. ARM) where the OS shuts some cores\n"
 579                           "off to save power, and will turn them back on later when the load increases.\n"
 580                           "However, this will likely mean GROMACS cannot pin threads to those cores. You\n"
 581                           "will likely see much better performance by forcing all cores to be online, and\n"
 582                           "making sure they run at their full clock frequency.", count, countOnline);
 583         }
 584 #        endif
 585 #    elif defined(_SC_NPROC_CONF)
 586         count = sysconf(_SC_NPROC_CONF);
 587 #    elif defined(_SC_NPROCESSORS_ONLN)
 588         count = sysconf(_SC_NPROCESSORS_ONLN);
 589 #    elif defined(_SC_NPROC_ONLN)
 590         count = sysconf(_SC_NPROC_ONLN);
 591 #    else
 592 #       warning "No valid sysconf argument value found. Executables will not be able to determine the number of logical cores: mdrun will use 1 thread by default!"
 593 #    endif      // End of check for sysconf argument values
 594
 595 #else
 596         count = 0; // Neither windows nor Unix.
 597 #endif
 598     }
 599 #if GMX_OPENMP
 600     int countFromOpenmp = gmx_omp_get_num_procs();
 601     if (count != countFromOpenmp)
 602     {
 603         md_print_warn(cr, fplog,
 604                       "Number of logical cores detected (%d) does not match the number reported by OpenMP (%d).\n"
 605                       "Consider setting the launch configuration manually!",
 606                       count, countFromOpenmp);
 607     }
 608 #endif
 609
 610     GMX_UNUSED_VALUE(cr);
 611     GMX_UNUSED_VALUE(fplog);
 612     return count;
 613 }
 614
 615 }   // namespace anonymous
 616
 617 // static
 618 HardwareTopology HardwareTopology::detect(FILE *fplog, const t_commrec *cr)
 619 {
 620     HardwareTopology result;
 621
 622     // Default values for machine and numa stuff
 623     result.machine_.logicalProcessorCount   = 0;
 624     result.machine_.numa.baseLatency        = 0.0;
 625     result.machine_.numa.maxRelativeLatency = 0.0;
 626     result.supportLevel_                    = SupportLevel::None;
 627     result.isThisSystem_                    = true;
 628
 629 #if GMX_HWLOC
 630     parseHwLoc(&result.machine_, &result.supportLevel_, &result.isThisSystem_);
 631 #endif
 632
 633     // If something went wrong in hwloc (or if it was not present) we might
 634     // have more information in cpuInfo
 635     if (result.supportLevel_ < SupportLevel::Basic)
 636     {
 637         // There might be topology information in cpuInfo
 638         parseCpuInfo(&result.machine_, &result.supportLevel_);
 639     }
 640     // If we did not manage to get anything from either hwloc or cpuInfo, find the cpu count at least
 641     if (result.supportLevel_ == SupportLevel::None)
 642     {
 643         // No topology information; try to detect the number of logical processors at least
 644         result.machine_.logicalProcessorCount = detectLogicalProcessorCount(fplog, cr);
 645         if (result.machine_.logicalProcessorCount > 0)
 646         {
 647             result.supportLevel_ = SupportLevel::LogicalProcessorCount;
 648         }
 649     }
 650     return result;
 651 }
 652
 653
 654 HardwareTopology::HardwareTopology()
 655     : supportLevel_(SupportLevel::None)
 656 {
 657 }
 658
 659 int HardwareTopology::numberOfCores() const
 660 {
 661     if (supportLevel() >= SupportLevel::Basic)
 662     {
 663         // We assume all sockets have the same number of cores as socket 0.
 664         // Since topology information is present, we can assume there is at least one socket.
 665         return machine().sockets.size() * machine().sockets[0].cores.size();
 666     }
 667     else if (supportLevel() >= SupportLevel::LogicalProcessorCount)
 668     {
 669         return machine().logicalProcessorCount;
 670     }
 671     else
 672     {
 673         return 0;
 674     }
 675 }
 676
 677 } // namespace gmx