2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 * \brief Implements SIMD architecture support query routines
40 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
42 * \ingroup module_simd
57 #include "gromacs/hardware/cpuinfo.h"
58 #include "gromacs/hardware/identifyavx512fmaunits.h"
59 #include "gromacs/utility/stringutil.h"
67 simdString(SimdType s)
69 static const std::map<SimdType, std::string> name =
71 { SimdType::None, "None" },
72 { SimdType::Reference, "Reference" },
73 { SimdType::Generic, "Generic" },
74 { SimdType::X86_Sse2, "SSE2" },
75 { SimdType::X86_Sse4_1, "SSE4.1" },
76 { SimdType::X86_Avx128Fma, "AVX_128_FMA" },
77 { SimdType::X86_Avx, "AVX_256" },
78 { SimdType::X86_Avx2, "AVX2_256" },
79 { SimdType::X86_Avx2_128, "AVX2_128" },
80 { SimdType::X86_Avx512, "AVX_512" },
81 { SimdType::X86_Avx512Knl, "AVX_512_KNL" },
82 { SimdType::X86_Mic, "X86_MIC" },
83 { SimdType::Arm_Neon, "ARM_NEON" },
84 { SimdType::Arm_NeonAsimd, "ARM_NEON_ASIMD" },
85 { SimdType::Ibm_Vmx, "IBM_VMX" },
86 { SimdType::Ibm_Vsx, "IBM_VSX" },
87 { SimdType::Fujitsu_HpcAce, "Fujitsu HPC-ACE" }
94 simdSuggested(const CpuInfo &c)
96 SimdType suggested = SimdType::None;
98 if (c.supportLevel() >= CpuInfo::SupportLevel::Features)
102 case CpuInfo::Vendor::Intel:
103 if (c.feature(CpuInfo::Feature::X86_Avx512ER))
105 suggested = SimdType::X86_Avx512Knl;
107 else if (c.feature(CpuInfo::Feature::X86_Avx512F))
109 // If we could not identify the number of AVX512 FMA units we assume 2
110 suggested = ( identifyAvx512FmaUnits() == 1 ) ? SimdType::X86_Avx2 : SimdType::X86_Avx512;
112 else if (c.feature(CpuInfo::Feature::X86_Avx2))
114 suggested = SimdType::X86_Avx2;
116 else if (c.feature(CpuInfo::Feature::X86_Avx))
118 suggested = SimdType::X86_Avx;
120 else if (c.feature(CpuInfo::Feature::X86_Sse4_1))
122 suggested = SimdType::X86_Sse4_1;
124 else if (c.feature(CpuInfo::Feature::X86_Sse2))
126 suggested = SimdType::X86_Sse2;
129 case CpuInfo::Vendor::Amd:
130 if (c.feature(CpuInfo::Feature::X86_Avx2))
132 // AMD Ryzen supports 256-bit AVX2, but performs better with 128-bit
133 // since it can execute two independent such instructions per cycle,
134 // and wider SIMD has slightly lower efficiency in GROMACS.
135 suggested = SimdType::X86_Avx2_128;
137 else if (c.feature(CpuInfo::Feature::X86_Avx))
139 // Use 128-bit FMA SIMD if Fma4 flag is set, otherwise plain 256-bit AVX
140 if (c.feature(CpuInfo::Feature::X86_Fma4))
142 suggested = SimdType::X86_Avx128Fma;
146 suggested = SimdType::X86_Avx;
149 else if (c.feature(CpuInfo::Feature::X86_Sse4_1))
151 suggested = SimdType::X86_Sse4_1;
153 else if (c.feature(CpuInfo::Feature::X86_Sse2))
155 suggested = SimdType::X86_Sse2;
159 case CpuInfo::Vendor::Arm:
160 if (c.feature(CpuInfo::Feature::Arm_NeonAsimd))
162 suggested = SimdType::Arm_NeonAsimd;
164 else if (c.feature(CpuInfo::Feature::Arm_Neon))
166 suggested = SimdType::Arm_Neon;
169 case CpuInfo::Vendor::Ibm:
170 if (c.feature(CpuInfo::Feature::Ibm_Vsx))
172 suggested = SimdType::Ibm_Vsx;
174 else if (c.feature(CpuInfo::Feature::Ibm_Vmx))
176 suggested = SimdType::Ibm_Vmx;
179 case CpuInfo::Vendor::Fujitsu:
180 if (c.feature(CpuInfo::Feature::Fujitsu_HpcAce))
182 suggested = SimdType::Fujitsu_HpcAce;
195 #if GMX_SIMD_X86_AVX_512_KNL
196 return SimdType::X86_Avx512Knl;
197 #elif GMX_SIMD_X86_AVX_512
198 return SimdType::X86_Avx512;
199 #elif GMX_SIMD_X86_MIC
200 return SimdType::X86_Mic;
201 #elif GMX_SIMD_X86_AVX2_256
202 return SimdType::X86_Avx2;
203 #elif GMX_SIMD_X86_AVX2_128
204 return SimdType::X86_Avx2_128;
205 #elif GMX_SIMD_X86_AVX_256
206 return SimdType::X86_Avx;
207 #elif GMX_SIMD_X86_AVX_128_FMA
208 return SimdType::X86_Avx128Fma;
209 #elif GMX_SIMD_X86_SSE4_1
210 return SimdType::X86_Sse4_1;
211 #elif GMX_SIMD_X86_SSE2
212 return SimdType::X86_Sse2;
213 #elif GMX_SIMD_ARM_NEON
214 return SimdType::Arm_Neon;
215 #elif GMX_SIMD_ARM_NEON_ASIMD
216 return SimdType::Arm_NeonAsimd;
217 #elif GMX_SIMD_IBM_VMX
218 return SimdType::Ibm_Vmx;
219 #elif GMX_SIMD_IBM_VSX
220 return SimdType::Ibm_Vsx;
221 #elif GMX_SIMD_SPARC64_HPC_ACE
222 return SimdType::Fujitsu_HpcAce;
223 #elif GMX_SIMD_REFERENCE
224 return SimdType::Reference;
226 return SimdType::None;
231 simdCheck(gmx::SimdType wanted,
235 SimdType compiled = simdCompiled();
237 gmx::TextLineWrapper wrapper;
241 wrapper.settings().setLineLength(78);
243 if (compiled == SimdType::X86_Avx2 && wanted == SimdType::X86_Avx512)
245 logMsg = wrapper.wrapToString(formatString("Highest SIMD level requested by all nodes in run: %s\n"
246 "SIMD instructions selected at compile time: %s\n"
247 "This program was compiled for different hardware than you are running on, "
248 "which could influence performance. This build might have been configured on "
249 "a login node with only a single AVX-512 FMA unit (in which case AVX2 is faster), "
250 "while the node you are running on has dual AVX-512 FMA units.",
251 simdString(wanted).c_str(), simdString(compiled).c_str()));
252 warnMsg = wrapper.wrapToString(formatString("Compiled SIMD: %s, but for this host/run %s might be better (see log).",
253 simdString(compiled).c_str(), simdString(wanted).c_str()));
255 else if (compiled == SimdType::X86_Avx512 && wanted == SimdType::X86_Avx2 && identifyAvx512FmaUnits() == 1)
257 // The reason for explicitly checking the number of FMA units above is to avoid triggering
258 // this conditional if the AVX2 SIMD was requested by some other node in a heterogeneous MPI run.
259 logMsg = wrapper.wrapToString(formatString("Highest SIMD level requested by all nodes in run: %s\n"
260 "SIMD instructions selected at compile time: %s\n"
261 "This program was compiled for different hardware than you are running on, "
262 "which could influence performance."
263 "This host supports AVX-512, but since it only has 1 AVX-512"
264 "FMA unit, it would be faster to use AVX2 instead.",
265 simdString(wanted).c_str(), simdString(compiled).c_str()));
266 warnMsg = wrapper.wrapToString(formatString("Compiled SIMD: %s, but for this host/run %s might be better (see log).",
267 simdString(compiled).c_str(), simdString(wanted).c_str()));
269 else if (compiled == SimdType::X86_Avx2 && wanted == SimdType::X86_Avx2_128)
271 // Wanted SimdType::X86_Avx2_128 can only be the AMD Zen architecture.
272 // AVX2_256 is only up to a few percent slower than AVX2_128
273 // in both single and double precision. AVX2_256 is slightly
274 // faster with nonbondeds and PME on a GPU. Don't warn the user.
276 else if (compiled > wanted && !(compiled == SimdType::X86_Avx && wanted == SimdType::X86_Avx128Fma))
278 // Normally it is close to catastrophic if the compiled SIMD type is larger than
279 // the supported one, but AVX128Fma is an exception: AMD CPUs will (strongly) prefer
280 // AVX128Fma, but they will work fine with AVX too. Thus, make an exception for this.
281 logMsg = wrapper.wrapToString(formatString("Highest SIMD level requested by all nodes in run: %s\n"
282 "SIMD instructions selected at compile time: %s\n"
283 "Compiled SIMD newer than requested; program might crash.",
284 simdString(wanted).c_str(), simdString(compiled).c_str()));
287 else if (wanted != compiled)
289 // This warning will also occur if compiled is X86_Avx and wanted is X86_Avx128Fma
290 logMsg = wrapper.wrapToString(formatString("Highest SIMD level requested by all nodes in run: %s\n"
291 "SIMD instructions selected at compile time: %s\n"
292 "This program was compiled for different hardware than you are running on, "
293 "which could influence performance.",
294 simdString(wanted).c_str(), simdString(compiled).c_str()));
295 warnMsg = wrapper.wrapToString(formatString("Compiled SIMD: %s, but for this host/run %s might be better (see log).",
296 simdString(compiled).c_str(), simdString(wanted).c_str()));
299 if (!logMsg.empty() && log != nullptr)
301 fprintf(log, "%s\n", logMsg.c_str());
303 if (!warnMsg.empty() && warnToStdErr)
305 fprintf(stderr, "%s\n", warnMsg.c_str());
308 return (wanted == compiled);