src/gromacs/hardware/cpuinfo.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2015,2016,2017,2018,2019,2020,2021, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \libinternal \file
  36  * \brief
  37  * Declares gmx::CpuInfo
  38  *
  39  * \author Erik Lindahl <erik.lindahl@gmail.com>
  40  * \inlibraryapi
  41  * \ingroup module_hardware
  42  */
  43 #ifndef GMX_HARDWARE_CPUINFO_H
  44 #define GMX_HARDWARE_CPUINFO_H
  45
  46 #include <map>
  47 #include <set>
  48 #include <string>
  49 #include <vector>
  50
  51 namespace gmx
  52 {
  53
  54 /*! \libinternal \brief Detect CPU capabilities and basic logical processor info
  55  *
  56  *  This class provides a lot of information about x86 CPUs, and some very
  57  *  limited information about other hardware. The logical processor information
  58  *  is only available on x86, and is used as a fallback implementation in
  59  *  the HardwareTopology class.
  60  *  If you actually need information about the hardware topology, use the much
  61  *  more general implementation in the HardwareTopology class instead, since
  62  *  that will both be more portable and contain more information.
  63  *
  64  * \ingroup module_hardware
  65  */
  66 class CpuInfo
  67 {
  68
  69 public:
  70     /*! \brief Amount of cpu information present (incremental) */
  71     enum class SupportLevel
  72     {
  73         None,                //!< No cpu information whatsoever. Sorry.
  74         Name,                //!< Only vendor and/or brand is set
  75         Features,            //!< Some features are set
  76         LogicalProcessorInfo //!< Everything includling logical processor information
  77     };
  78
  79     /*! \brief Processor/system vendors */
  80     enum class Vendor
  81     {
  82         Unknown, //!< Unidentified
  83         Intel,   //!< GenuineIntel
  84         Amd,     //!< AuthenticAMD
  85         Fujitsu, //!< Only works on Linux (parsed from /proc/cpuinfo)
  86         Ibm,     //!< Only works on Linux (parsed from /proc/cpuinfo)
  87         Arm,     //!< Only works on Linux (parsed from /proc/cpuinfo)
  88         Oracle,  //!< Cannot detect anything else yet (no /proc/cpuinfo available)
  89         Hygon,   //!< HygonGenuine
  90     };
  91
  92     /*! \brief List of CPU features
  93      *
  94      *  These values can be used as arguments to the feature() method
  95      *  to check whether a specific feature was found on the CPU we are
  96      *  running on.
  97      */
  98     enum class Feature
  99     {
 100         X86_Aes,             //!< x86 advanced encryption standard accel.
 101         X86_Amd,             //!< This is an AMD x86 processor
 102         X86_Apic,            //!< APIC support
 103         X86_Avx,             //!< Advanced vector extensions
 104         X86_Avx2,            //!< AVX2 including gather support (not used yet)
 105         X86_Avx512F,         //!< Foundation AVX-512 instructions
 106         X86_Avx512PF,        //!< Extended gather/scatter for AVX-512
 107         X86_Avx512ER,        //!< AVX-512 exponential and reciprocal extensions
 108         X86_Avx512CD,        //!< Memory conflict-detection for AVX-512
 109         X86_Avx512BW,        //!< AVX-512 byte and word instructions
 110         X86_Avx512VL,        //!< AVX-512 vector length extensions
 111         X86_Avx512BF16,      //!< AVX-512 BFloat16 instructions
 112         X86_Avx512secondFMA, //!< AVX-512 second FMA unit
 113         X86_Clfsh,           //!< Supports CLFLUSH instruction
 114         X86_Cmov,            //!< Conditional move insn support
 115         X86_Cx8,             //!< Supports CMPXCHG8B (8-byte compare-exchange)
 116         X86_Cx16,            //!< Supports CMPXCHG16B (16-byte compare-exchg)
 117         X86_F16C,            //!< Supports 16-bit FP conversion instructions
 118         X86_Fma,             //!< Fused-multiply add support (mainly for AVX)
 119         X86_Fma4,            //!< 4-operand FMA, only on AMD for now
 120         X86_Hle,             //!< Hardware lock elision
 121         X86_Htt,   //!< Hyper-Threading enabled (NOTE: might not match the CPUID HTT support flag)
 122         X86_Intel, //!< This is an Intel x86 processor
 123         X86_Lahf,  //!< LAHF/SAHF support in 64 bits
 124         X86_MisalignSse, //!< Support for misaligned SSE data instructions
 125         X86_Mmx,         //!< MMX registers and instructions
 126         X86_Msr,         //!< Supports Intel model-specific-registers
 127         X86_NonstopTsc,  //!< Invariant TSC (constant rate in ACPI states)
 128         X86_Pcid,        //!< Process context identifier support
 129         X86_Pclmuldq,    //!< Carry-less 64-bit multiplication supported
 130         X86_Pdcm,        //!< Perfmon and Debug Capability
 131         X86_PDPE1GB,     //!< Support for 1GB pages
 132         X86_Popcnt,      //!< Supports the POPCNT (population count) insn
 133         X86_Pse,         //!< Supports 4MB-pages (page size extension)
 134         X86_Rdrnd,       //!< RDRAND high-quality hardware random numbers
 135         X86_Rdtscp,      //!< Serializing rdtscp instruction available
 136         X86_Rtm,         //!< Restricted transactional memory
 137         X86_Sha,         //!< Intel SHA extensions
 138         X86_Sse2,        //!< SSE 2
 139         X86_Sse3,        //!< SSE 3
 140         X86_Sse4A,       //!< SSE 4A
 141         X86_Sse4_1,      //!< SSE 4.1
 142         X86_Sse4_2,      //!< SSE 4.2
 143         X86_Ssse3,       //!< Supplemental SSE3
 144         X86_Tdt,         //!< TSC deadline timer
 145         X86_X2Apic,      //!< Extended xAPIC Support
 146         X86_Xop,         //!< AMD extended instructions, only AMD for now
 147         Arm_Neon,        //!< 32-bit ARM NEON
 148         Arm_NeonAsimd,   //!< 64-bit ARM AArch64 Advanced SIMD
 149         Arm_Sve,         //!< ARM Scalable Vector Extensions
 150         Ibm_Qpx,         //!< IBM QPX SIMD (BlueGene/Q)
 151         Ibm_Vmx,         //!< IBM VMX SIMD (Altivec on Power6 and later)
 152         Ibm_Vsx,         //!< IBM VSX SIMD (Power7 and later)
 153         Fujitsu_HpcAce,  //!< Fujitsu Sparc64 HPC-ACE
 154         X86_Hygon        //!< This is a Hygon x86 processor
 155     };
 156
 157     /*! \libinternal \brief Entry with basic information for a single logical processor */
 158     struct LogicalProcessor
 159     {
 160         int socketRankInMachine; //!< Relative rank of the current socket in the system
 161         int coreRankInSocket;    //!< Relative rank of the current core in its socket
 162         int hwThreadRankInCore;  //!< Relative rank of logical processor in its core
 163     };
 164
 165     /*! \brief Perform detection and construct a CpuInfo class from the results.
 166      *
 167      *  \note The detection should generally be performed again in different
 168      *        contexts.  This might seem like overkill, but there
 169      *        are systems (e.g. Arm) where processors can go completely offline
 170      *        during deep sleep, so at least in theory it is good to have a
 171      *        possibility of forcing re-detection if necessary.
 172      */
 173     static CpuInfo detect();
 174
 175     /*! \brief Check what cpu information is available
 176      *
 177      *  The amount of cpu information that can be detected depends on the
 178      *  OS, compiler, and CPU, and on non-x86 platforms it can be fragile.
 179      *  Before basing decisions on the output or warning the user about
 180      *  optimizations, you want to check whether it was possible to detect
 181      *  the information you need.
 182      */
 183     SupportLevel supportLevel() const { return supportLevel_; }
 184
 185     /*! \brief Enumerated value for vendor */
 186     Vendor vendor() const { return vendor_; }
 187
 188     /*! \brief String description of vendor:
 189      *
 190      *  \throws std::out_of_range if the vendor is not present in the internal
 191      *          map of vendor names. This can only happen if we extend the enum
 192      *          type but forget to add the string with the vendor name.
 193      */
 194     const std::string& vendorString() const;
 195
 196     /*! \brief String description of processor */
 197     const std::string& brandString() const { return brandString_; }
 198
 199     /*! \brief Major version/generation of the processor */
 200     int family() const { return family_; }
 201
 202     /*! \brief Middle version of the processor */
 203     int model() const { return model_; }
 204
 205     /*! \brief Minor version of the processor */
 206     int stepping() const { return stepping_; }
 207
 208     /*! \brief Check for availability of specific feature
 209      *
 210      *  \param f  feature to query support for
 211      *
 212      *  \return True if the feature is available, otherwise false.
 213      */
 214     bool feature(Feature f) const
 215     {
 216         // If the entry is present in the set it is supported
 217         return (features_.count(f) != 0);
 218     }
 219
 220     /*! \brief String description of a specific feature
 221      *
 222      *  \throws std::out_of_range if the feature is not present in the internal
 223      *          map of feature names. This can only happen if we extend the enum
 224      *          type but forget to add the string with the feature name.
 225      */
 226     static const std::string& featureString(Feature f);
 227
 228     /*! \brief Set of all supported features on this processor
 229      *
 230      *  This is only intended for logfiles, debugging or similar output when we
 231      *  need a full list of all the features available on the CPU.
 232      */
 233     const std::set<Feature>& featureSet() const { return features_; }
 234
 235     /*! \brief Reference to processing unit topology
 236      *
 237      *  Only a few systems (x86) provide logical processor information in cpuinfo.
 238      *  This method returns a reference to a vector, whose length will either be
 239      *  zero (if topology information is not available) or the number of enabled
 240      *  processing units, as defined by the operating system. In the latter
 241      *  case, each entry will contain information about the relative rank in the
 242      *  core and socket of this hardware thread.
 243      *
 244      *  This is only meant to be use as a fallback implementation for our
 245      *  HardwareTopology class; any user code that needs access to hardware
 246      *  topology information should use that class instead.
 247      *
 248      *  \note For clarity, it is likely better to use the supportLevel()
 249      *        method to check if this information is available rather than
 250      *        relying on the length of the vector.
 251      */
 252     const std::vector<LogicalProcessor>& logicalProcessors() const { return logicalProcessors_; }
 253
 254 private:
 255     CpuInfo();
 256
 257     SupportLevel                  supportLevel_;      //!< Available cpuinfo information
 258     Vendor                        vendor_;            //!<  Value of vendor for current cpu
 259     std::string                   brandString_;       //!<  Text description of cpu
 260     int                           family_;            //!<  Major version of current cpu
 261     int                           model_;             //!<  Middle version of current cpu
 262     int                           stepping_;          //!<  Minor version of current cpu
 263     std::set<Feature>             features_;          //!< Set of features supported on this cpu
 264     std::vector<LogicalProcessor> logicalProcessors_; //!< Simple logical processor topology
 265 };                                                    // class CpuInfo
 266
 267 /*! \brief Return true if the CPU is an Intel x86 Nehalem
 268  *
 269  * \param cpuInfo  Object with cpu information
 270  *
 271  * \returns  True if running on Nehalem CPU
 272  */
 273 bool cpuIsX86Nehalem(const CpuInfo& cpuInfo);
 274
 275 /*! \brief Return true if the CPU is a first generation AMD Zen (produced by AMD or Hygon)
 276  *
 277  * \param cpuInfo  Object with cpu information
 278  *
 279  * \returns  True if running on a first generation AMD Zen
 280  */
 281 bool cpuIsAmdZen1(const CpuInfo& cpuInfo);
 282
 283 } // namespace gmx
 284
 285 #endif // GMX_HARDWARE_CPUINFO_H