src/gromacs/hardware/hardwaretopology.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2015,2016,2018,2019,2021, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \libinternal \file
  36  * \brief
  37  * Declares gmx::HardwareTopology
  38  *
  39  * \author Erik Lindahl <erik.lindahl@gmail.com>
  40  * \inlibraryapi
  41  * \ingroup module_hardware
  42  */
  43 #ifndef GMX_HARDWARE_HARDWARETOPOLOGY_H
  44 #define GMX_HARDWARE_HARDWARETOPOLOGY_H
  45
  46 #include <cstdint>
  47
  48 #include <vector>
  49
  50 namespace gmx
  51 {
  52
  53 /*! \libinternal \brief Information about sockets, cores, threads, numa, caches
  54  *
  55  * This class is the main GROMACS interface to provide information about the
  56  * hardware of the system we are running on. Internally, it uses either
  57  * hwloc for full or almost-full information, or a fallback implementation
  58  * that relies on CpuInfo on x86.
  59  *
  60  * You should always use this class (rather than CpuInfo directly) to query
  61  * the hardware layout in user code. Note that you cannot rely on any
  62  * information being present, but you must check with the supportLevel()
  63  * method before trying to access any information.
  64  */
  65 class HardwareTopology
  66 {
  67 public:
  68     /*! \brief Amount of topology information present (incremental) */
  69     enum class SupportLevel
  70     {
  71         None,                  //!< No hardware information whatsoever. Sorry.
  72         LogicalProcessorCount, //!< Only machine().logicalProcessorCount is valid
  73         Basic,                 //!< Socket, core and hardware thread info
  74         Full,                  //!< Cache, memory and numa node info
  75         FullWithDevices        //!< Information about devices on the PCI bus
  76     };
  77
  78     /*! \libinternal \brief Information about a single cache level */
  79     struct Cache
  80     {
  81         int         level;         //!< Level relative to core (starts at 1)
  82         std::size_t size;          //!< size in bytes, 0 if unknown
  83         int         linesize;      //!< size of each cache line in bytes, 0 if unknown
  84         int         associativity; //!< associativity, -1 means fully associative
  85         int         shared;        //!< Number of logical processors sharing this cache
  86     };
  87
  88     /*! \libinternal \brief Information about a single hardware thread in a core
  89      *
  90      * The id of the thread typically increases continuously as you walk
  91      * through sockets and cores in order of their ids. In general, this can
  92      * be different from the logical processor id provided by the operating
  93      * system. To achieve better load balancing when using SMT, Linux
  94      * typically assigns logical processors in a round-robin fashion
  95      * over all cores.
  96      */
  97     struct HWThread
  98     {
  99         int id;                 //!< Absolute id of this thread in hardware topology
 100         int logicalProcessorId; //!< Id of the operating system logical processor
 101     };
 102
 103     /*! \libinternal \brief Information about a single core in a socket */
 104     struct Core
 105     {
 106         int                   id;         //!< Absolute id of this core in hardware topology
 107         int                   numaNodeId; //!< id of the numa node of this core
 108         std::vector<HWThread> hwThreads;  //!< All the hardware threads in this core
 109     };
 110
 111     /*! \libinternal \brief Information about a single socket in the system */
 112     struct Socket
 113     {
 114         int               id;    //!< Absolute id of this socket in hardware topology
 115         std::vector<Core> cores; //!< All the cores in this socket
 116     };
 117
 118     /*! \libinternal \brief Information about each numa node in system */
 119     struct NumaNode
 120     {
 121         int              id;                 //!< Absolute id of numa node in hardware topology
 122         std::size_t      memory;             //!< Total detected memory in bytes
 123         std::vector<int> logicalProcessorId; //!< Vector of all the logical processors in this node
 124     };
 125
 126     /*! \libinternal \brief Information about a single numa node */
 127     struct Numa
 128     {
 129         std::vector<NumaNode>           nodes;       //!< Information about each numa node
 130         float                           baseLatency; //!< Scale factor for relative latencies
 131         std::vector<std::vector<float>> relativeLatency; //!< 2D matrix of relative latencies between nodes
 132         float                           maxRelativeLatency; //!< Largest relative latency
 133     };
 134
 135     /*! \libinternal \brief Information about a single PCI device.
 136      *
 137      *  \note On many systems the PCI bus is not directly connected to any numa node.
 138      *        For these systems the numaNodeId will be -1, so you cannot rely on this
 139      *        number reflecting a specific numa node.
 140      */
 141     struct Device
 142     {
 143         std::uint16_t vendorId;   //!< Vendor identification
 144         std::uint16_t deviceId;   //!< Vendor-specific device identification
 145         std::uint16_t classId;    //!< class (high 8 bits) and subclass (low 8 bits)
 146         std::uint16_t domain;     //!< Domain, usually 0 for PCI bus
 147         std::uint8_t  bus;        //!< Bus number in domain
 148         std::uint8_t  dev;        //!< Device on bus
 149         std::uint8_t  func;       //!< Function id for multi-function devices
 150         int           numaNodeId; //!< Numa node, -1 if the bus is not located inside a node
 151     };
 152
 153     /*! \libinternal \brief Information about socket, core and hwthread for a logical processor */
 154     struct LogicalProcessor
 155     {
 156         int socketRankInMachine; //!< Index of socket in machine
 157         int coreRankInSocket;    //!< Index of core in socket
 158         int hwThreadRankInCore;  //!< Index of hardware thread in core
 159         int numaNodeId;          //!< Index of numa node
 160     };
 161
 162     /*! \libinternal \brief Hardware topology information about the entire machine
 163      *
 164      * The machine structure is a tree with top-down information about all
 165      * sockets, cores, and hardware threads in the system. For example, an
 166      * operating system logical processor index can be found as
 167      * machine.socket[0].core[1].hwthread[2].logicalProcessorId.
 168      * In some cases you might need the opposite lookup, i.e. the physical
 169      * hardware data for a specific logical processor. This is present in the
 170      * logicalProcessor vector for convenience.
 171      *
 172      * \note The logicalProcessor vector will only have non-zero length if the
 173      *       support level is SupportLevel::Basic or higher. You cannot use the
 174      *       size of this vector to query the number of logical processors on
 175      *       lower support levels.
 176      */
 177     struct Machine
 178     {
 179         Machine();
 180
 181         int logicalProcessorCount;                       //!< Number of logical processors in system
 182         std::vector<LogicalProcessor> logicalProcessors; //!< Map logical processors to socket/core
 183         std::vector<Socket>           sockets;           //!< All the sockets in the system
 184         std::vector<Cache>            caches;            //!< Caches in increasing level order
 185         Numa                          numa;              //!< Structure with all numa information
 186         std::vector<Device>           devices;           //!< Devices on PCI bus
 187     };
 188
 189     /*! \brief Detects the hardware topology. */
 190     static HardwareTopology detect();
 191
 192     /*! \brief Creates a topology with given number of logical cores.
 193      *
 194      * The support level will be either None or LogicalProcessorCount.
 195      *
 196      * Intended for testing of code that uses the hardware topology.
 197      */
 198     explicit HardwareTopology(int logicalProcessorCount);
 199
 200     /*! \brief Check what topology information that is available and valid
 201      *
 202      *  The amount of hardware topology information that can be detected depends
 203      *  on both the hardware and whether GROMACS was linked with the external
 204      *  hwloc library. You cannot assume that any information is present,
 205      *  although we can almost always provide the number of logical processors.
 206      *  On x86 we can usually get basic information about how sockets, cores
 207      *  and hardware threads are ordered even without hwloc.
 208      *  With the hwloc library we can usually also get information about cache,
 209      *  memory and concepts such as core groups and ccNUMA nodes.
 210      *  Finally, if hwloc was built with support for libpci we can also
 211      *  detect how the PCI devices are connected.
 212      */
 213     SupportLevel supportLevel() const { return supportLevel_; }
 214
 215     /*! \brief Return true if we actually detected hardware.
 216      *
 217      *  \return This method will normally return true, when we actually ran
 218      *          the hardware detection as part of this process to construct
 219      *          the object. It will be false when the object was constructed
 220      *          by reading a cached XML file, or possibly generated from
 221      *          synthetic data.
 222      */
 223     bool isThisSystem() const { return isThisSystem_; }
 224
 225     /*! \brief Return the machine topology tree
 226      *
 227      *  You can always call this routine, but be aware that some or all contents
 228      *  will not be valid unless supportLevel() returns a sufficient level.
 229      *
 230      *  - With SupportLevel::LogicalProcessorCount, only the field
 231      *    machine.logicalProcessorCount is valid.
 232      *  - With SupportLevel::Basic, you can access the vectors of sockets,
 233      *    cores, and hardware threads, and query what logical processorId
 234      *    each hardware thread corresponds to.
 235      *  - SupportLevel::Full adds cache, memory and ccNUMA information.
 236      *  - SupportLevel::FullWithDevices also adds the PCI express bus.
 237      *
 238      *  While data that is not valid has been initialized to special values,
 239      *  you should not rely on those but query the supportLevel() method before
 240      *  accessing it.
 241      */
 242     const Machine& machine() const { return machine_; }
 243
 244     /*! \brief Returns the number of cores.
 245      *
 246      * You can always call this routine, but if sufficient support is not
 247      * available, it may return the logical processor count or zero instead
 248      * of the physical core count.
 249      */
 250     int numberOfCores() const;
 251
 252 private:
 253     HardwareTopology();
 254
 255     SupportLevel supportLevel_; //!< Available topology information
 256     Machine      machine_;      //!< The machine map
 257     bool         isThisSystem_; //!< Machine map is real (vs. cached/synthetic)
 258 };
 259
 260 } // namespace gmx
 261
 262 #endif // GMX_HARDWARE_HARDWARETOPOLOGY_H