2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 * \brief Implements SIMD architecture support query routines
40 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
42 * \ingroup module_simd
57 #include "gromacs/hardware/cpuinfo.h"
58 #include "gromacs/hardware/identifyavx512fmaunits.h"
59 #include "gromacs/utility/stringutil.h"
67 simdString(SimdType s)
69 static const std::map<SimdType, std::string> name =
71 { SimdType::None, "None" },
72 { SimdType::Reference, "Reference" },
73 { SimdType::Generic, "Generic" },
74 { SimdType::X86_Sse2, "SSE2" },
75 { SimdType::X86_Sse4_1, "SSE4.1" },
76 { SimdType::X86_Avx128Fma, "AVX_128_FMA" },
77 { SimdType::X86_Avx, "AVX_256" },
78 { SimdType::X86_Avx2, "AVX2_256" },
79 { SimdType::X86_Avx2_128, "AVX2_128" },
80 { SimdType::X86_Avx512, "AVX_512" },
81 { SimdType::X86_Avx512Knl, "AVX_512_KNL" },
82 { SimdType::X86_Mic, "X86_MIC" },
83 { SimdType::Arm_Neon, "ARM_NEON" },
84 { SimdType::Arm_NeonAsimd, "ARM_NEON_ASIMD" },
85 { SimdType::Ibm_Vmx, "IBM_VMX" },
86 { SimdType::Ibm_Vsx, "IBM_VSX" },
87 { SimdType::Fujitsu_HpcAce, "Fujitsu HPC-ACE" }
94 simdSuggested(const CpuInfo &c)
96 SimdType suggested = SimdType::None;
98 if (c.supportLevel() >= CpuInfo::SupportLevel::Features)
102 case CpuInfo::Vendor::Intel:
103 if (c.feature(CpuInfo::Feature::X86_Avx512ER))
105 suggested = SimdType::X86_Avx512Knl;
107 else if (c.feature(CpuInfo::Feature::X86_Avx512F))
109 // If we could not identify the number of AVX512 FMA units we assume 2
110 suggested = ( identifyAvx512FmaUnits() == 1 ) ? SimdType::X86_Avx2 : SimdType::X86_Avx512;
112 else if (c.feature(CpuInfo::Feature::X86_Avx2))
114 suggested = SimdType::X86_Avx2;
116 else if (c.feature(CpuInfo::Feature::X86_Avx))
118 suggested = SimdType::X86_Avx;
120 else if (c.feature(CpuInfo::Feature::X86_Sse4_1))
122 suggested = SimdType::X86_Sse4_1;
124 else if (c.feature(CpuInfo::Feature::X86_Sse2))
126 suggested = SimdType::X86_Sse2;
129 case CpuInfo::Vendor::Amd:
130 case CpuInfo::Vendor::Hygon:
131 if (c.feature(CpuInfo::Feature::X86_Avx2))
133 // AMD Ryzen supports 256-bit AVX2, but performs better with 128-bit
134 // since it can execute two independent such instructions per cycle,
135 // and wider SIMD has slightly lower efficiency in GROMACS.
136 suggested = SimdType::X86_Avx2_128;
138 else if (c.feature(CpuInfo::Feature::X86_Avx))
140 // Use 128-bit FMA SIMD if Fma4 flag is set, otherwise plain 256-bit AVX
141 if (c.feature(CpuInfo::Feature::X86_Fma4))
143 suggested = SimdType::X86_Avx128Fma;
147 suggested = SimdType::X86_Avx;
150 else if (c.feature(CpuInfo::Feature::X86_Sse4_1))
152 suggested = SimdType::X86_Sse4_1;
154 else if (c.feature(CpuInfo::Feature::X86_Sse2))
156 suggested = SimdType::X86_Sse2;
160 case CpuInfo::Vendor::Arm:
161 if (c.feature(CpuInfo::Feature::Arm_NeonAsimd))
163 suggested = SimdType::Arm_NeonAsimd;
165 else if (c.feature(CpuInfo::Feature::Arm_Neon))
167 suggested = SimdType::Arm_Neon;
170 case CpuInfo::Vendor::Ibm:
171 if (c.feature(CpuInfo::Feature::Ibm_Vsx))
173 suggested = SimdType::Ibm_Vsx;
175 else if (c.feature(CpuInfo::Feature::Ibm_Vmx))
177 suggested = SimdType::Ibm_Vmx;
180 case CpuInfo::Vendor::Fujitsu:
181 if (c.feature(CpuInfo::Feature::Fujitsu_HpcAce))
183 suggested = SimdType::Fujitsu_HpcAce;
196 #if GMX_SIMD_X86_AVX_512_KNL
197 return SimdType::X86_Avx512Knl;
198 #elif GMX_SIMD_X86_AVX_512
199 return SimdType::X86_Avx512;
200 #elif GMX_SIMD_X86_MIC
201 return SimdType::X86_Mic;
202 #elif GMX_SIMD_X86_AVX2_256
203 return SimdType::X86_Avx2;
204 #elif GMX_SIMD_X86_AVX2_128
205 return SimdType::X86_Avx2_128;
206 #elif GMX_SIMD_X86_AVX_256
207 return SimdType::X86_Avx;
208 #elif GMX_SIMD_X86_AVX_128_FMA
209 return SimdType::X86_Avx128Fma;
210 #elif GMX_SIMD_X86_SSE4_1
211 return SimdType::X86_Sse4_1;
212 #elif GMX_SIMD_X86_SSE2
213 return SimdType::X86_Sse2;
214 #elif GMX_SIMD_ARM_NEON
215 return SimdType::Arm_Neon;
216 #elif GMX_SIMD_ARM_NEON_ASIMD
217 return SimdType::Arm_NeonAsimd;
218 #elif GMX_SIMD_IBM_VMX
219 return SimdType::Ibm_Vmx;
220 #elif GMX_SIMD_IBM_VSX
221 return SimdType::Ibm_Vsx;
222 #elif GMX_SIMD_SPARC64_HPC_ACE
223 return SimdType::Fujitsu_HpcAce;
224 #elif GMX_SIMD_REFERENCE
225 return SimdType::Reference;
227 return SimdType::None;
232 simdCheck(gmx::SimdType wanted,
236 SimdType compiled = simdCompiled();
238 gmx::TextLineWrapper wrapper;
242 wrapper.settings().setLineLength(78);
244 if (compiled == SimdType::X86_Avx2 && wanted == SimdType::X86_Avx512)
246 logMsg = wrapper.wrapToString(formatString("Highest SIMD level requested by all nodes in run: %s\n"
247 "SIMD instructions selected at compile time: %s\n"
248 "This program was compiled for different hardware than you are running on, "
249 "which could influence performance. This build might have been configured on "
250 "a login node with only a single AVX-512 FMA unit (in which case AVX2 is faster), "
251 "while the node you are running on has dual AVX-512 FMA units.",
252 simdString(wanted).c_str(), simdString(compiled).c_str()));
253 warnMsg = wrapper.wrapToString(formatString("Compiled SIMD: %s, but for this host/run %s might be better (see log).",
254 simdString(compiled).c_str(), simdString(wanted).c_str()));
256 else if (compiled == SimdType::X86_Avx512 && wanted == SimdType::X86_Avx2 && identifyAvx512FmaUnits() == 1)
258 // The reason for explicitly checking the number of FMA units above is to avoid triggering
259 // this conditional if the AVX2 SIMD was requested by some other node in a heterogeneous MPI run.
260 logMsg = wrapper.wrapToString(formatString("Highest SIMD level requested by all nodes in run: %s\n"
261 "SIMD instructions selected at compile time: %s\n"
262 "This program was compiled for different hardware than you are running on, "
263 "which could influence performance."
264 "This host supports AVX-512, but since it only has 1 AVX-512"
265 "FMA unit, it would be faster to use AVX2 instead.",
266 simdString(wanted).c_str(), simdString(compiled).c_str()));
267 warnMsg = wrapper.wrapToString(formatString("Compiled SIMD: %s, but for this host/run %s might be better (see log).",
268 simdString(compiled).c_str(), simdString(wanted).c_str()));
270 else if (compiled == SimdType::X86_Avx2 && wanted == SimdType::X86_Avx2_128)
272 // Wanted SimdType::X86_Avx2_128 can only be the AMD Zen architecture.
273 // AVX2_256 is only up to a few percent slower than AVX2_128
274 // in both single and double precision. AVX2_256 is slightly
275 // faster with nonbondeds and PME on a GPU. Don't warn the user.
277 else if (compiled > wanted && !(compiled == SimdType::X86_Avx && wanted == SimdType::X86_Avx128Fma))
279 // Normally it is close to catastrophic if the compiled SIMD type is larger than
280 // the supported one, but AVX128Fma is an exception: AMD CPUs will (strongly) prefer
281 // AVX128Fma, but they will work fine with AVX too. Thus, make an exception for this.
282 logMsg = wrapper.wrapToString(formatString("Highest SIMD level requested by all nodes in run: %s\n"
283 "SIMD instructions selected at compile time: %s\n"
284 "Compiled SIMD newer than requested; program might crash.",
285 simdString(wanted).c_str(), simdString(compiled).c_str()));
288 else if (wanted != compiled)
290 // This warning will also occur if compiled is X86_Avx and wanted is X86_Avx128Fma
291 logMsg = wrapper.wrapToString(formatString("Highest SIMD level requested by all nodes in run: %s\n"
292 "SIMD instructions selected at compile time: %s\n"
293 "This program was compiled for different hardware than you are running on, "
294 "which could influence performance.",
295 simdString(wanted).c_str(), simdString(compiled).c_str()));
296 warnMsg = wrapper.wrapToString(formatString("Compiled SIMD: %s, but for this host/run %s might be better (see log).",
297 simdString(compiled).c_str(), simdString(wanted).c_str()));
300 if (!logMsg.empty() && log != nullptr)
302 fprintf(log, "%s\n", logMsg.c_str());
304 if (!warnMsg.empty() && warnToStdErr)
306 fprintf(stderr, "%s\n", warnMsg.c_str());
309 return (wanted == compiled);