src/gromacs/hardware/hardwaretopology.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \internal \file
  37  * \brief
  38  * Implements gmx::HardwareTopology.
  39  *
  40  * \author Erik Lindahl <erik.lindahl@gmail.com>
  41  * \ingroup module_hardware
  42  */
  43
  44 #include "gmxpre.h"
  45
  46 #include "hardwaretopology.h"
  47
  48 #include "config.h"
  49
  50 #include <cstdio>
  51
  52 #include <algorithm>
  53 #include <vector>
  54
  55 #if GMX_HWLOC
  56 #    include <hwloc.h>
  57 #endif
  58
  59 #include "gromacs/gmxlib/md_logging.h"
  60 #include "gromacs/hardware/cpuinfo.h"
  61 #include "gromacs/utility/gmxassert.h"
  62 #include "gromacs/utility/gmxomp.h"
  63
  64 #ifdef HAVE_UNISTD_H
  65 #    include <unistd.h>       // sysconf()
  66 #endif
  67 #if GMX_NATIVE_WINDOWS
  68 #    include <windows.h>      // GetSystemInfo()
  69 #endif
  70
  71 #if defined(_M_ARM) || defined(__arm__) || defined(__ARM_ARCH) || defined (__aarch64__)
  72 //! Constant used to help minimize preprocessed code
  73 static const bool isArm = true;
  74 #else
  75 //! Constant used to help minimize preprocessed code
  76 static const bool isArm = false;
  77 #endif
  78
  79 namespace gmx
  80 {
  81
  82 namespace
  83 {
  84
  85 /*****************************************************************************
  86  *                                                                           *
  87  *   Utility functions for extracting hardware topology from CpuInfo object  *
  88  *                                                                           *
  89  *****************************************************************************/
  90
  91 /*! \brief Initialize machine data from basic information in cpuinfo
  92  *
  93  *  \param  machine      Machine tree structure where information will be assigned
  94  *                       if the cpuinfo object contains topology information.
  95  *  \param  supportLevel If topology information is available in CpuInfo,
  96  *                       this will be updated to reflect the amount of
  97  *                       information written to the machine structure.
  98  */
  99 void
 100 parseCpuInfo(HardwareTopology::Machine *        machine,
 101              HardwareTopology::SupportLevel *   supportLevel)
 102 {
 103     CpuInfo cpuInfo(CpuInfo::detect());
 104
 105     if (!cpuInfo.logicalProcessors().empty())
 106     {
 107         int nSockets   = 0;
 108         int nCores     = 0;
 109         int nHwThreads = 0;
 110
 111         // Copy the logical processor information from cpuinfo
 112         for (auto &l : cpuInfo.logicalProcessors())
 113         {
 114             machine->logicalProcessors.push_back( { l.socketRankInMachine, l.coreRankInSocket, l.hwThreadRankInCore, -1 } );
 115             nSockets   = std::max(nSockets, l.socketRankInMachine);
 116             nCores     = std::max(nCores, l.coreRankInSocket);
 117             nHwThreads = std::max(nHwThreads, l.hwThreadRankInCore);
 118         }
 119
 120         // Fill info form sockets/cores/hwthreads
 121         int socketId   = 0;
 122         int coreId     = 0;
 123         int hwThreadId = 0;
 124
 125         machine->sockets.resize(nSockets + 1);
 126         for (auto &s : machine->sockets)
 127         {
 128             s.id = socketId++;
 129             s.cores.resize(nCores + 1);
 130             for (auto &c : s.cores)
 131             {
 132                 c.id         = coreId++;
 133                 c.numaNodeId = -1; // No numa information
 134                 c.hwThreads.resize(nHwThreads + 1);
 135                 for (auto &t : c.hwThreads)
 136                 {
 137                     t.id                 = hwThreadId++;
 138                     t.logicalProcessorId = -1; // set as unassigned for now
 139                 }
 140             }
 141         }
 142
 143         // Fill the logical processor id in the right place
 144         for (std::size_t i = 0; i < machine->logicalProcessors.size(); i++)
 145         {
 146             const HardwareTopology::LogicalProcessor &l = machine->logicalProcessors[i];
 147             machine->sockets[l.socketRankInMachine].cores[l.coreRankInSocket].hwThreads[l.hwThreadRankInCore].logicalProcessorId = static_cast<int>(i);
 148         }
 149         machine->logicalProcessorCount = machine->logicalProcessors.size();
 150         *supportLevel                  = HardwareTopology::SupportLevel::Basic;
 151     }
 152     else
 153     {
 154         *supportLevel = HardwareTopology::SupportLevel::None;
 155     }
 156 }
 157
 158 #if GMX_HWLOC
 159
 160 #if HWLOC_API_VERSION < 0x00010b00
 161 #    define HWLOC_OBJ_PACKAGE  HWLOC_OBJ_SOCKET
 162 #    define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
 163 #endif
 164
 165 /*****************************************************************************
 166  *                                                                           *
 167  *   Utility functions for extracting hardware topology from hwloc library   *
 168  *                                                                           *
 169  *****************************************************************************/
 170
 171 /*! \brief Return vector of all descendants of a given type in hwloc topology
 172  *
 173  *  \param obj   Non-null hwloc object.
 174  *  \param type  hwloc object type to find. The routine will only search
 175  *               on levels below obj.
 176  *
 177  *  \return vector containing all the objects of given type that are
 178  *          descendants of the provided object. If no objects of this type
 179  *          were found, the vector will be empty.
 180  */
 181 const std::vector<hwloc_obj_t>
 182 getHwLocDescendantsByType(const hwloc_obj_t obj, const hwloc_obj_type_t type)
 183 {
 184     GMX_RELEASE_ASSERT(obj, "NULL hwloc object provided to getHwLocDescendantsByType()");
 185
 186     std::vector<hwloc_obj_t> v;
 187
 188     // Go through children; if this object has no children obj->arity is 0,
 189     // and we'll return an empty vector.
 190     for (std::size_t i = 0; i < obj->arity; i++)
 191     {
 192         // If the child is the type we're looking for, add it directly.
 193         // Otherwise call this routine recursively for each child.
 194         if (obj->children[i]->type == type)
 195         {
 196             v.push_back(obj->children[i]);
 197         }
 198         else
 199         {
 200             std::vector<hwloc_obj_t> v2 = getHwLocDescendantsByType(obj->children[i], type);
 201             v.insert(v.end(), v2.begin(), v2.end());
 202         }
 203     }
 204     return v;
 205 }
 206
 207 /*! \brief Read information about sockets, cores and threads from hwloc topology
 208  *
 209  *  \param topo    hwloc topology handle that has been initialized and loaded
 210  *  \param machine Pointer to the machine structure in the HardwareTopology
 211  *                 class, where the tree of sockets/cores/threads will be written.
 212  *
 213  *  \return If all the data is found the return value is 0, otherwise non-zero.
 214  */
 215 int
 216 parseHwLocSocketsCoresThreads(const hwloc_topology_t             topo,
 217                               HardwareTopology::Machine *        machine)
 218 {
 219     const hwloc_obj_t              root         = hwloc_get_root_obj(topo);
 220     std::vector<hwloc_obj_t>       hwlocSockets = getHwLocDescendantsByType(root, HWLOC_OBJ_PACKAGE);
 221
 222     machine->logicalProcessorCount = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU);
 223     machine->logicalProcessors.resize(machine->logicalProcessorCount);
 224     machine->sockets.resize(hwlocSockets.size());
 225
 226     bool topologyOk = !hwlocSockets.empty(); // Fail if we have no sockets in machine
 227
 228     for (std::size_t i = 0; i < hwlocSockets.size() && topologyOk; i++)
 229     {
 230         // Assign information about this socket
 231         machine->sockets[i].id = hwlocSockets[i]->logical_index;
 232
 233         // Get children (cores)
 234         std::vector<hwloc_obj_t> hwlocCores = getHwLocDescendantsByType(hwlocSockets[i], HWLOC_OBJ_CORE);
 235         machine->sockets[i].cores.resize(hwlocCores.size());
 236
 237         topologyOk = topologyOk && !hwlocCores.empty(); // Fail if we have no cores in socket
 238
 239         // Loop over child cores
 240         for (std::size_t j = 0; j < hwlocCores.size() && topologyOk; j++)
 241         {
 242             // Assign information about this core
 243             machine->sockets[i].cores[j].id         = hwlocCores[j]->logical_index;
 244             machine->sockets[i].cores[j].numaNodeId = -1;
 245
 246             // Get children (hwthreads)
 247             std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(hwlocCores[j], HWLOC_OBJ_PU);
 248             machine->sockets[i].cores[j].hwThreads.resize(hwlocPUs.size());
 249
 250             topologyOk = topologyOk && !hwlocPUs.empty(); // Fail if we have no hwthreads in core
 251
 252             // Loop over child hwthreads
 253             for (std::size_t k = 0; k < hwlocPUs.size() && topologyOk; k++)
 254             {
 255                 // Assign information about this hwthread
 256                 std::size_t logicalProcessorId                               = hwlocPUs[k]->os_index;
 257                 machine->sockets[i].cores[j].hwThreads[k].id                 = hwlocPUs[k]->logical_index;
 258                 machine->sockets[i].cores[j].hwThreads[k].logicalProcessorId = logicalProcessorId;
 259
 260                 if (logicalProcessorId < machine->logicalProcessors.size())
 261                 {
 262                     // Cross-assign data for this hwthread to the logicalprocess vector
 263                     machine->logicalProcessors[logicalProcessorId].socketRankInMachine = static_cast<int>(i);
 264                     machine->logicalProcessors[logicalProcessorId].coreRankInSocket    = static_cast<int>(j);
 265                     machine->logicalProcessors[logicalProcessorId].hwThreadRankInCore  = static_cast<int>(k);
 266                     machine->logicalProcessors[logicalProcessorId].numaNodeId          = -1;
 267                 }
 268                 else
 269                 {
 270                     topologyOk = false;
 271                 }
 272             }
 273         }
 274     }
 275
 276     if (topologyOk)
 277     {
 278         return 0;
 279     }
 280     else
 281     {
 282         machine->logicalProcessors.clear();
 283         machine->sockets.clear();
 284         return -1;
 285     }
 286 }
 287
 288 /*! \brief Read cache information from hwloc topology
 289  *
 290  *  \param topo    hwloc topology handle that has been initialized and loaded
 291  *  \param machine Pointer to the machine structure in the HardwareTopology
 292  *                 class, where cache data will be filled.
 293  *
 294  *  \return If any cache data is found the return value is 0, otherwise non-zero.
 295  */
 296 int
 297 parseHwLocCache(const hwloc_topology_t             topo,
 298                 HardwareTopology::Machine *        machine)
 299 {
 300     // Parse caches up to L5
 301     for (int cachelevel : { 1, 2, 3, 4, 5})
 302     {
 303         int depth = hwloc_get_cache_type_depth(topo, cachelevel, HWLOC_OBJ_CACHE_DATA);
 304
 305         if (depth >= 0)
 306         {
 307             hwloc_obj_t cache = hwloc_get_next_obj_by_depth(topo, depth, NULL);
 308             if (cache != NULL)
 309             {
 310                 std::vector<hwloc_obj_t> hwThreads = getHwLocDescendantsByType(cache, HWLOC_OBJ_PU);
 311
 312                 machine->caches.push_back( {
 313                                                static_cast<int>(cache->attr->cache.depth),
 314                                                static_cast<std::size_t>(cache->attr->cache.size),
 315                                                static_cast<int>(cache->attr->cache.linesize),
 316                                                static_cast<int>(cache->attr->cache.associativity),
 317                                                std::max(static_cast<int>(hwThreads.size()), 1)
 318                                            } );
 319             }
 320         }
 321     }
 322     return machine->caches.empty();
 323 }
 324
 325
 326 /*! \brief Read numa information from hwloc topology
 327  *
 328  *  \param topo    hwloc topology handle that has been initialized and loaded
 329  *  \param machine Pointer to the machine structure in the HardwareTopology
 330  *                 class, where numa information will be filled.
 331  *
 332  *  Hwloc should virtually always be able to detect numa information, but if
 333  *  there is only a single numa node in the system it is not reported at all.
 334  *  In this case we create a single numa node covering all cores.
 335  *
 336  *  This function uses the basic socket/core/thread information detected by
 337  *  parseHwLocSocketsCoresThreads(), which means that routine must have
 338  *  completed successfully before calling this one. If this is not the case,
 339  *  you will get an error return code.
 340  *
 341  *  \return If the data found makes sense (either in the numa node or the
 342  *          entire machine) the return value is 0, otherwise non-zero.
 343  */
 344 int
 345 parseHwLocNuma(const hwloc_topology_t             topo,
 346                HardwareTopology::Machine *        machine)
 347 {
 348     const hwloc_obj_t        root           = hwloc_get_root_obj(topo);
 349     std::vector<hwloc_obj_t> hwlocNumaNodes = getHwLocDescendantsByType(root, HWLOC_OBJ_NUMANODE);
 350     bool                     topologyOk     = true;
 351
 352     if (!hwlocNumaNodes.empty())
 353     {
 354         machine->numa.nodes.resize(hwlocNumaNodes.size());
 355
 356         for (std::size_t i = 0; i < hwlocNumaNodes.size(); i++)
 357         {
 358             machine->numa.nodes[i].id     = hwlocNumaNodes[i]->logical_index;
 359             machine->numa.nodes[i].memory = hwlocNumaNodes[i]->memory.total_memory;
 360             machine->numa.nodes[i].logicalProcessorId.clear();
 361
 362             // Get list of PUs in this numa node
 363             std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(hwlocNumaNodes[i], HWLOC_OBJ_PU);
 364
 365             for (auto &p : hwlocPUs)
 366             {
 367                 machine->numa.nodes[i].logicalProcessorId.push_back(p->os_index);
 368
 369                 GMX_RELEASE_ASSERT(p->os_index < machine->logicalProcessors.size(), "OS index of PU in hwloc larger than processor count");
 370
 371                 machine->logicalProcessors[p->os_index].numaNodeId = static_cast<int>(i);
 372                 std::size_t s = machine->logicalProcessors[p->os_index].socketRankInMachine;
 373                 std::size_t c = machine->logicalProcessors[p->os_index].coreRankInSocket;
 374
 375                 GMX_RELEASE_ASSERT(s < machine->sockets.size(), "Socket index in logicalProcessors larger than socket count");
 376                 GMX_RELEASE_ASSERT(c < machine->sockets[s].cores.size(), "Core index in logicalProcessors larger than core count");
 377                 // Set numaNodeId in core too
 378                 machine->sockets[s].cores[c].numaNodeId = i;
 379             }
 380         }
 381
 382         int depth = hwloc_get_type_depth(topo, HWLOC_OBJ_NUMANODE);
 383         const struct hwloc_distances_s * dist = hwloc_get_whole_distance_matrix_by_depth(topo, depth);
 384         if (dist != NULL && dist->nbobjs == hwlocNumaNodes.size())
 385         {
 386             machine->numa.baseLatency        = dist->latency_base;
 387             machine->numa.maxRelativeLatency = dist->latency_max;
 388             machine->numa.relativeLatency.resize(dist->nbobjs);
 389             for (std::size_t i = 0; i < dist->nbobjs; i++)
 390             {
 391                 machine->numa.relativeLatency[i].resize(dist->nbobjs);
 392                 for (std::size_t j = 0; j < dist->nbobjs; j++)
 393                 {
 394                     machine->numa.relativeLatency[i][j] = dist->latency[i*dist->nbobjs+j];
 395                 }
 396             }
 397         }
 398         else
 399         {
 400             topologyOk = false;
 401         }
 402     }
 403     else
 404     {
 405         // No numa nodes found. Use the entire machine as a numa node.
 406         const hwloc_obj_t hwlocMachine = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_MACHINE, NULL);
 407
 408         if (hwlocMachine != NULL)
 409         {
 410             machine->numa.nodes.resize(1);
 411             machine->numa.nodes[0].id           = 0;
 412             machine->numa.nodes[0].memory       = hwlocMachine->memory.total_memory;
 413             machine->numa.baseLatency           = 10;
 414             machine->numa.maxRelativeLatency    = 1;
 415             machine->numa.relativeLatency       = { { 1.0 } };
 416
 417             for (int i = 0; i < machine->logicalProcessorCount; i++)
 418             {
 419                 machine->numa.nodes[0].logicalProcessorId.push_back(i);
 420             }
 421             for (auto &l : machine->logicalProcessors)
 422             {
 423                 l.numaNodeId = 0;
 424             }
 425             for (auto &s : machine->sockets)
 426             {
 427                 for (auto &c : s.cores)
 428                 {
 429                     c.numaNodeId = 0;
 430                 }
 431             }
 432         }
 433         else
 434         {
 435             topologyOk = false;
 436         }
 437     }
 438
 439     if (topologyOk)
 440     {
 441         return 0;
 442     }
 443     else
 444     {
 445         machine->numa.nodes.clear();
 446         return -1;
 447     }
 448
 449 }
 450
 451 /*! \brief Read PCI device information from hwloc topology
 452  *
 453  *  \param topo    hwloc topology handle that has been initialized and loaded
 454  *  \param machine Pointer to the machine structure in the HardwareTopology
 455  *                 class, where PCI device information will be filled.
 456  * *
 457  *  \return If any devices were found the return value is 0, otherwise non-zero.
 458  */
 459 int
 460 parseHwLocDevices(const hwloc_topology_t             topo,
 461                   HardwareTopology::Machine *        machine)
 462 {
 463     const hwloc_obj_t        root    = hwloc_get_root_obj(topo);
 464     std::vector<hwloc_obj_t> pcidevs = getHwLocDescendantsByType(root, HWLOC_OBJ_PCI_DEVICE);
 465
 466     for (auto &p : pcidevs)
 467     {
 468         const hwloc_obj_t ancestor = hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_NUMANODE, p);
 469         int               numaId;
 470         if (ancestor != NULL)
 471         {
 472             numaId = ancestor->logical_index;
 473         }
 474         else
 475         {
 476             // If we only have a single numa node we belong to it, otherwise set it to -1 (unknown)
 477             numaId = (machine->numa.nodes.size() == 1) ?  0 : -1;
 478         }
 479
 480         GMX_RELEASE_ASSERT(p->attr, "Attributes should not be NULL for hwloc PCI object");
 481
 482         machine->devices.push_back( {
 483                                         p->attr->pcidev.vendor_id,
 484                                         p->attr->pcidev.device_id,
 485                                         p->attr->pcidev.class_id,
 486                                         p->attr->pcidev.domain,
 487                                         p->attr->pcidev.bus,
 488                                         p->attr->pcidev.dev,
 489                                         p->attr->pcidev.func,
 490                                         numaId
 491                                     } );
 492     }
 493     return pcidevs.empty();
 494 }
 495
 496 void
 497 parseHwLoc(HardwareTopology::Machine *        machine,
 498            HardwareTopology::SupportLevel *   supportLevel,
 499            bool *                             isThisSystem)
 500 {
 501     hwloc_topology_t    topo;
 502
 503     // Initialize a hwloc object, set flags to request IO device information too,
 504     // try to load the topology, and get the root object. If either step fails,
 505     // return that we do not have any support at all from hwloc.
 506     if (hwloc_topology_init(&topo) != 0)
 507     {
 508         hwloc_topology_destroy(topo);
 509         return; // SupportLevel::None.
 510     }
 511
 512     hwloc_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IO_DEVICES);
 513
 514     if (hwloc_topology_load(topo) != 0 || hwloc_get_root_obj(topo) == NULL)
 515     {
 516         hwloc_topology_destroy(topo);
 517         return; // SupportLevel::None.
 518     }
 519
 520     // If we get here, we can get a valid root object for the topology
 521     *isThisSystem = hwloc_topology_is_thissystem(topo);
 522
 523     // Parse basic information about sockets, cores, and hardware threads
 524     if (parseHwLocSocketsCoresThreads(topo, machine) == 0)
 525     {
 526         *supportLevel = HardwareTopology::SupportLevel::Basic;
 527     }
 528     else
 529     {
 530         hwloc_topology_destroy(topo);
 531         return; // SupportLevel::None.
 532     }
 533
 534     // Get information about cache and numa nodes
 535     if (parseHwLocCache(topo, machine) == 0 && parseHwLocNuma(topo, machine) == 0)
 536     {
 537         *supportLevel = HardwareTopology::SupportLevel::Full;
 538     }
 539     else
 540     {
 541         hwloc_topology_destroy(topo);
 542         return; // SupportLevel::Basic.
 543     }
 544
 545     // PCI devices
 546     if (parseHwLocDevices(topo, machine) == 0)
 547     {
 548         *supportLevel = HardwareTopology::SupportLevel::FullWithDevices;
 549     }
 550
 551     hwloc_topology_destroy(topo);
 552     return; // SupportLevel::Full or SupportLevel::FullWithDevices.
 553 }
 554
 555 #endif
 556
 557 /*! \brief Try to detect the number of logical processors.
 558  *
 559  *  \return The number of hardware processing units, or 0 if it fails.
 560  */
 561 int
 562 detectLogicalProcessorCount(FILE *fplog, const t_commrec *cr)
 563 {
 564     int count = 0;
 565
 566     {
 567 #if GMX_NATIVE_WINDOWS
 568         // Windows
 569         SYSTEM_INFO sysinfo;
 570         GetSystemInfo( &sysinfo );
 571         count = sysinfo.dwNumberOfProcessors;
 572 #elif defined HAVE_SYSCONF
 573         // We are probably on Unix. Check if we have the argument to use before executing any calls
 574 #    if defined(_SC_NPROCESSORS_CONF)
 575         count = sysconf(_SC_NPROCESSORS_CONF);
 576 #        if defined(_SC_NPROCESSORS_ONLN)
 577         /* On e.g. Arm, the Linux kernel can use advanced power saving features where
 578          * processors are brought online/offline dynamically. This will cause
 579          * _SC_NPROCESSORS_ONLN to report 1 at the beginning of the run. For this
 580          * reason we now warn if this mismatches with the detected core count. */
 581         int countOnline = sysconf(_SC_NPROCESSORS_ONLN);
 582         if (count != countOnline)
 583         {
 584             /* We assume that this scenario means that the kernel has
 585                disabled threads or cores, and that the only safe course is
 586                to assume that _SC_NPROCESSORS_ONLN should be used. Even
 587                this may not be valid if running in a containerized
 588                environment, such system calls may read from
 589                /sys/devices/system/cpu and report what the OS sees, rather
 590                than what the container cgroup is supposed to set up as
 591                limits. But we're not sure right now whether there's any
 592                (standard-ish) way to handle that.
 593
 594                On ARM, the kernel may have powered down the cores,
 595                which we'll warn the user about now. On x86, this
 596                means HT is disabled by the kernel, not in the
 597                BIOS. We're not sure what it means on other
 598                architectures, or even if it is possible, because
 599                sysconf is rather non-standardized. */
 600             if (isArm)
 601             {
 602                 md_print_warn(cr, fplog,
 603                               "%d CPUs configured, but only %d of them are online.\n"
 604                               "This can happen on embedded platforms (e.g. ARM) where the OS shuts some cores\n"
 605                               "off to save power, and will turn them back on later when the load increases.\n"
 606                               "However, this will likely mean GROMACS cannot pin threads to those cores. You\n"
 607                               "will likely see much better performance by forcing all cores to be online, and\n"
 608                               "making sure they run at their full clock frequency.", count, countOnline);
 609             }
 610             else
 611             {
 612                 md_print_warn(cr, fplog,
 613                               "Note: %d CPUs configured, but only %d of them are online, so GROMACS will use the latter.",
 614                               count, countOnline);
 615                 // We use the online count to avoid (potential) oversubscription.
 616                 count = countOnline;
 617             }
 618         }
 619 #        endif
 620 #    elif defined(_SC_NPROC_CONF)
 621         count = sysconf(_SC_NPROC_CONF);
 622 #    elif defined(_SC_NPROCESSORS_ONLN)
 623         count = sysconf(_SC_NPROCESSORS_ONLN);
 624 #    elif defined(_SC_NPROC_ONLN)
 625         count = sysconf(_SC_NPROC_ONLN);
 626 #    else
 627 #       warning "No valid sysconf argument value found. Executables will not be able to determine the number of logical cores: mdrun will use 1 thread by default!"
 628 #    endif      // End of check for sysconf argument values
 629
 630 #else
 631         count = 0; // Neither windows nor Unix.
 632 #endif
 633     }
 634 #if GMX_OPENMP
 635     int countFromOpenmp = gmx_omp_get_num_procs();
 636     if (count != countFromOpenmp)
 637     {
 638         md_print_warn(cr, fplog,
 639                       "Number of logical cores detected (%d) does not match the number reported by OpenMP (%d).\n"
 640                       "Consider setting the launch configuration manually!",
 641                       count, countFromOpenmp);
 642     }
 643 #endif
 644
 645     GMX_UNUSED_VALUE(cr);
 646     GMX_UNUSED_VALUE(fplog);
 647     return count;
 648 }
 649
 650 }   // namespace anonymous
 651
 652 // static
 653 HardwareTopology HardwareTopology::detect(FILE *fplog, const t_commrec *cr)
 654 {
 655     HardwareTopology result;
 656
 657     // Default values for machine and numa stuff
 658     result.machine_.logicalProcessorCount   = 0;
 659     result.machine_.numa.baseLatency        = 0.0;
 660     result.machine_.numa.maxRelativeLatency = 0.0;
 661     result.supportLevel_                    = SupportLevel::None;
 662     result.isThisSystem_                    = true;
 663
 664 #if GMX_HWLOC
 665     parseHwLoc(&result.machine_, &result.supportLevel_, &result.isThisSystem_);
 666 #endif
 667
 668     // If something went wrong in hwloc (or if it was not present) we might
 669     // have more information in cpuInfo
 670     if (result.supportLevel_ < SupportLevel::Basic)
 671     {
 672         // There might be topology information in cpuInfo
 673         parseCpuInfo(&result.machine_, &result.supportLevel_);
 674     }
 675     // If we did not manage to get anything from either hwloc or cpuInfo, find the cpu count at least
 676     if (result.supportLevel_ == SupportLevel::None)
 677     {
 678         // No topology information; try to detect the number of logical processors at least
 679         result.machine_.logicalProcessorCount = detectLogicalProcessorCount(fplog, cr);
 680         if (result.machine_.logicalProcessorCount > 0)
 681         {
 682             result.supportLevel_ = SupportLevel::LogicalProcessorCount;
 683         }
 684     }
 685     return result;
 686 }
 687
 688
 689 HardwareTopology::HardwareTopology()
 690     : supportLevel_(SupportLevel::None)
 691 {
 692 }
 693
 694 int HardwareTopology::numberOfCores() const
 695 {
 696     if (supportLevel() >= SupportLevel::Basic)
 697     {
 698         // We assume all sockets have the same number of cores as socket 0.
 699         // Since topology information is present, we can assume there is at least one socket.
 700         return machine().sockets.size() * machine().sockets[0].cores.size();
 701     }
 702     else if (supportLevel() >= SupportLevel::LogicalProcessorCount)
 703     {
 704         return machine().logicalProcessorCount;
 705     }
 706     else
 707     {
 708         return 0;
 709     }
 710 }
 711
 712 } // namespace gmx