src/gromacs/simd/support.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \internal \file
  37  *
  38  * \brief Implements SIMD architecture support query routines
  39  *
  40  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  41  *
  42  * \ingroup module_simd
  43  */
  44
  45 #include "gmxpre.h"
  46
  47 #include "support.h"
  48
  49 #include "config.h"
  50
  51 #if GMX_SIMD_ARM_SVE
  52 #    include <arm_sve.h>
  53 #endif
  54
  55 #include <cstdio>
  56 #include <cstdlib>
  57
  58 #include <map>
  59 #include <string>
  60
  61 #include "gromacs/hardware/cpuinfo.h"
  62 #include "gromacs/hardware/identifyavx512fmaunits.h"
  63 #include "gromacs/utility/fatalerror.h"
  64 #include "gromacs/utility/stringutil.h"
  65
  66 namespace gmx
  67 {
  68
  69 /*! \cond libapi */
  70
  71 const std::string& simdString(SimdType s)
  72 {
  73     static const std::map<SimdType, std::string> name = {
  74         { SimdType::None, "None" },
  75         { SimdType::Reference, "Reference" },
  76         { SimdType::Generic, "Generic" },
  77         { SimdType::X86_Sse2, "SSE2" },
  78         { SimdType::X86_Sse4_1, "SSE4.1" },
  79         { SimdType::X86_Avx128Fma, "AVX_128_FMA" },
  80         { SimdType::X86_Avx, "AVX_256" },
  81         { SimdType::X86_Avx2, "AVX2_256" },
  82         { SimdType::X86_Avx2_128, "AVX2_128" },
  83         { SimdType::X86_Avx512, "AVX_512" },
  84         { SimdType::X86_Avx512Knl, "AVX_512_KNL" },
  85         { SimdType::X86_Mic, "X86_MIC" },
  86         { SimdType::Arm_Neon, "ARM_NEON" },
  87         { SimdType::Arm_NeonAsimd, "ARM_NEON_ASIMD" },
  88         { SimdType::Arm_Sve, "ARM_SVE" },
  89         { SimdType::Ibm_Vmx, "IBM_VMX" },
  90         { SimdType::Ibm_Vsx, "IBM_VSX" },
  91         { SimdType::Fujitsu_HpcAce, "Fujitsu HPC-ACE" }
  92     };
  93
  94     return name.at(s);
  95 }
  96
  97 SimdType simdSuggested(const CpuInfo& c)
  98 {
  99     SimdType suggested = SimdType::None;
 100
 101     if (c.supportLevel() >= CpuInfo::SupportLevel::Features)
 102     {
 103         switch (c.vendor())
 104         {
 105             case CpuInfo::Vendor::Intel:
 106                 if (c.feature(CpuInfo::Feature::X86_Avx512ER))
 107                 {
 108                     suggested = SimdType::X86_Avx512Knl;
 109                 }
 110                 else if (c.feature(CpuInfo::Feature::X86_Avx512F))
 111                 {
 112                     // If we could not identify the number of AVX512 FMA units we assume 2
 113                     suggested = (identifyAvx512FmaUnits() == 1) ? SimdType::X86_Avx2 : SimdType::X86_Avx512;
 114                 }
 115                 else if (c.feature(CpuInfo::Feature::X86_Avx2))
 116                 {
 117                     suggested = SimdType::X86_Avx2;
 118                 }
 119                 else if (c.feature(CpuInfo::Feature::X86_Avx))
 120                 {
 121                     suggested = SimdType::X86_Avx;
 122                 }
 123                 else if (c.feature(CpuInfo::Feature::X86_Sse4_1))
 124                 {
 125                     suggested = SimdType::X86_Sse4_1;
 126                 }
 127                 else if (c.feature(CpuInfo::Feature::X86_Sse2))
 128                 {
 129                     suggested = SimdType::X86_Sse2;
 130                 }
 131                 break;
 132             case CpuInfo::Vendor::Amd:
 133             case CpuInfo::Vendor::Hygon:
 134                 if (c.feature(CpuInfo::Feature::X86_Avx2))
 135                 {
 136                     // AMD Zen supports 256-bit AVX2, but Zen1 performs better with 128-bit
 137                     // since it can execute two independent such instructions per cycle,
 138                     // and wider SIMD has slightly lower efficiency in GROMACS.
 139                     // However... Zen2 supports full-width execution of 256-bit AVX2,
 140                     // so we only want to apply this hack to Zen/Zen+.
 141                     suggested = cpuIsAmdZen1(c) ? SimdType::X86_Avx2_128 : SimdType::X86_Avx2;
 142                 }
 143                 else if (c.feature(CpuInfo::Feature::X86_Avx))
 144                 {
 145                     // Use 128-bit FMA SIMD if Fma4 flag is set, otherwise plain 256-bit AVX
 146                     if (c.feature(CpuInfo::Feature::X86_Fma4))
 147                     {
 148                         suggested = SimdType::X86_Avx128Fma;
 149                     }
 150                     else
 151                     {
 152                         suggested = SimdType::X86_Avx;
 153                     }
 154                 }
 155                 else if (c.feature(CpuInfo::Feature::X86_Sse4_1))
 156                 {
 157                     suggested = SimdType::X86_Sse4_1;
 158                 }
 159                 else if (c.feature(CpuInfo::Feature::X86_Sse2))
 160                 {
 161                     suggested = SimdType::X86_Sse2;
 162                 }
 163
 164                 break;
 165             case CpuInfo::Vendor::Arm:
 166                 if (c.feature(CpuInfo::Feature::Arm_Sve))
 167                 {
 168                     suggested = SimdType::Arm_Sve;
 169                 }
 170                 else if (c.feature(CpuInfo::Feature::Arm_NeonAsimd))
 171                 {
 172                     suggested = SimdType::Arm_NeonAsimd;
 173                 }
 174                 else if (c.feature(CpuInfo::Feature::Arm_Neon))
 175                 {
 176                     suggested = SimdType::Arm_Neon;
 177                 }
 178                 break;
 179             case CpuInfo::Vendor::Ibm:
 180                 if (c.feature(CpuInfo::Feature::Ibm_Vsx))
 181                 {
 182                     suggested = SimdType::Ibm_Vsx;
 183                 }
 184                 else if (c.feature(CpuInfo::Feature::Ibm_Vmx))
 185                 {
 186                     suggested = SimdType::Ibm_Vmx;
 187                 }
 188                 break;
 189             case CpuInfo::Vendor::Fujitsu:
 190                 if (c.feature(CpuInfo::Feature::Fujitsu_HpcAce))
 191                 {
 192                     suggested = SimdType::Fujitsu_HpcAce;
 193                 }
 194                 break;
 195             default: break;
 196         }
 197     }
 198     return suggested;
 199 }
 200
 201 SimdType simdCompiled()
 202 {
 203 #if GMX_SIMD_X86_AVX_512_KNL
 204     return SimdType::X86_Avx512Knl;
 205 #elif GMX_SIMD_X86_AVX_512
 206     return SimdType::X86_Avx512;
 207 #elif GMX_SIMD_X86_MIC
 208     return SimdType::X86_Mic;
 209 #elif GMX_SIMD_X86_AVX2_256
 210     return SimdType::X86_Avx2;
 211 #elif GMX_SIMD_X86_AVX2_128
 212     return SimdType::X86_Avx2_128;
 213 #elif GMX_SIMD_X86_AVX_256
 214     return SimdType::X86_Avx;
 215 #elif GMX_SIMD_X86_AVX_128_FMA
 216     return SimdType::X86_Avx128Fma;
 217 #elif GMX_SIMD_X86_SSE4_1
 218     return SimdType::X86_Sse4_1;
 219 #elif GMX_SIMD_X86_SSE2
 220     return SimdType::X86_Sse2;
 221 #elif GMX_SIMD_ARM_NEON
 222     return SimdType::Arm_Neon;
 223 #elif GMX_SIMD_ARM_NEON_ASIMD
 224     return SimdType::Arm_NeonAsimd;
 225 #elif GMX_SIMD_ARM_SVE
 226     return SimdType::Arm_Sve;
 227 #elif GMX_SIMD_IBM_VMX
 228     return SimdType::Ibm_Vmx;
 229 #elif GMX_SIMD_IBM_VSX
 230     return SimdType::Ibm_Vsx;
 231 #elif GMX_SIMD_SPARC64_HPC_ACE
 232     return SimdType::Fujitsu_HpcAce;
 233 #elif GMX_SIMD_REFERENCE
 234     return SimdType::Reference;
 235 #else
 236     return SimdType::None;
 237 #endif
 238 }
 239
 240 bool simdCheck(gmx::SimdType wanted, FILE* log, bool warnToStdErr)
 241 {
 242     SimdType compiled = simdCompiled();
 243
 244     gmx::TextLineWrapper wrapper;
 245     std::string          logMsg;
 246     std::string          warnMsg;
 247
 248     wrapper.settings().setLineLength(78);
 249
 250     if (compiled == SimdType::X86_Avx2 && wanted == SimdType::X86_Avx512)
 251     {
 252         logMsg  = wrapper.wrapToString(formatString(
 253                 "Highest SIMD level requested by all nodes in run: %s\n"
 254                 "SIMD instructions selected at compile time:       %s\n"
 255                 "This program was compiled for different hardware than you are running on, "
 256                 "which could influence performance. This build might have been configured on "
 257                 "a login node with only a single AVX-512 FMA unit (in which case AVX2 is faster), "
 258                 "while the node you are running on has dual AVX-512 FMA units.",
 259                 simdString(wanted).c_str(), simdString(compiled).c_str()));
 260         warnMsg = wrapper.wrapToString(formatString(
 261                 "Compiled SIMD: %s, but for this host/run %s might be better (see log).",
 262                 simdString(compiled).c_str(), simdString(wanted).c_str()));
 263     }
 264     else if (compiled == SimdType::X86_Avx512 && wanted == SimdType::X86_Avx2
 265              && identifyAvx512FmaUnits() == 1)
 266     {
 267         // The reason for explicitly checking the number of FMA units above is to avoid triggering
 268         // this conditional if the AVX2 SIMD was requested by some other node in a heterogeneous MPI run.
 269         logMsg  = wrapper.wrapToString(formatString(
 270                 "Highest SIMD level requested by all nodes in run: %s\n"
 271                 "SIMD instructions selected at compile time:       %s\n"
 272                 "This program was compiled for different hardware than you are running on, "
 273                 "which could influence performance."
 274                 "This host supports AVX-512, but since it only has 1 AVX-512"
 275                 "FMA unit, it would be faster to use AVX2 instead.",
 276                 simdString(wanted).c_str(), simdString(compiled).c_str()));
 277         warnMsg = wrapper.wrapToString(formatString(
 278                 "Compiled SIMD: %s, but for this host/run %s might be better (see log).",
 279                 simdString(compiled).c_str(), simdString(wanted).c_str()));
 280     }
 281     else if (compiled == SimdType::X86_Avx2 && wanted == SimdType::X86_Avx2_128)
 282     {
 283         // Wanted SimdType::X86_Avx2_128 can only be the AMD Zen architecture.
 284         // AVX2_256 is only up to a few percent slower than AVX2_128
 285         // in both single and double precision. AVX2_256 is slightly
 286         // faster with nonbondeds and PME on a GPU. Don't warn the user.
 287     }
 288     else if (compiled > wanted && !(compiled == SimdType::X86_Avx && wanted == SimdType::X86_Avx128Fma))
 289     {
 290         // Normally it is close to catastrophic if the compiled SIMD type is larger than
 291         // the supported one, but AVX128Fma is an exception: AMD CPUs will (strongly) prefer
 292         // AVX128Fma, but they will work fine with AVX too. Thus, make an exception for this.
 293         logMsg = wrapper.wrapToString(
 294                 formatString("Highest SIMD level requested by all nodes in run: %s\n"
 295                              "SIMD instructions selected at compile time:       %s\n"
 296                              "Compiled SIMD newer than requested; program might crash.",
 297                              simdString(wanted).c_str(), simdString(compiled).c_str()));
 298         warnMsg = logMsg;
 299     }
 300     else if (wanted != compiled)
 301     {
 302         // This warning will also occur if compiled is X86_Avx and wanted is X86_Avx128Fma
 303         logMsg  = wrapper.wrapToString(formatString(
 304                 "Highest SIMD level requested by all nodes in run: %s\n"
 305                 "SIMD instructions selected at compile time:       %s\n"
 306                 "This program was compiled for different hardware than you are running on, "
 307                 "which could influence performance.",
 308                 simdString(wanted).c_str(), simdString(compiled).c_str()));
 309         warnMsg = wrapper.wrapToString(formatString(
 310                 "Compiled SIMD: %s, but for this host/run %s might be better (see log).",
 311                 simdString(compiled).c_str(), simdString(wanted).c_str()));
 312 #if GMX_SIMD_ARM_SVE
 313     }
 314     else if ((compiled == SimdType::Arm_Sve) && (svcntb() != GMX_SIMD_ARM_SVE_LENGTH_VALUE / 8))
 315     {
 316         logMsg  = wrapper.wrapToString(formatString(
 317                 "Longest SVE length requested by all nodes in run: %d\n"
 318                 "SVE length selected at compile time:               %ld\n"
 319                 "This program was compiled for different hardware than you are running on, "
 320                 "which will lead to incorrect behavior.\n"
 321                 "Aborting",
 322                 GMX_SIMD_ARM_SVE_LENGTH_VALUE, svcntb() * 8));
 323         warnMsg = wrapper.wrapToString(formatString(
 324                 "Compiled SVE Length: %d, but for this process requires %ld (see log).",
 325                 GMX_SIMD_ARM_SVE_LENGTH_VALUE, svcntb() * 8));
 326 #endif
 327     }
 328
 329     if (!logMsg.empty() && log != nullptr)
 330     {
 331         fprintf(log, "%s\n", logMsg.c_str());
 332     }
 333     if (!warnMsg.empty() && warnToStdErr)
 334     {
 335         fprintf(stderr, "%s\n", warnMsg.c_str());
 336     }
 337 #if GMX_SIMD_ARM_SVE
 338     if ((compiled == SimdType::Arm_Sve) && (svcntb() != GMX_SIMD_ARM_SVE_LENGTH_VALUE / 8))
 339     {
 340         gmx_exit_on_fatal_error(ExitType_Abort, 1);
 341     }
 342 #endif
 343
 344     return (wanted == compiled);
 345 }
 346
 347 /*! \endcond */
 348
 349 } // namespace gmx