clang-tidy: google tests applicable
[alexxy/gromacs.git] / src / gromacs / simd / support.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2015,2016,2017,2018, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35
36 /*! \internal \file
37  *
38  * \brief Implements SIMD architecture support query routines
39  *
40  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
41  *
42  * \ingroup module_simd
43  */
44
45 #include "gmxpre.h"
46
47 #include "support.h"
48
49 #include "config.h"
50
51 #include <cstdio>
52 #include <cstdlib>
53
54 #include <map>
55 #include <string>
56
57 #include "gromacs/hardware/cpuinfo.h"
58 #include "gromacs/hardware/identifyavx512fmaunits.h"
59 #include "gromacs/utility/stringutil.h"
60
61 namespace gmx
62 {
63
64 /*! \cond libapi */
65
66 const std::string &
67 simdString(SimdType s)
68 {
69     static const std::map<SimdType, std::string> name =
70     {
71         { SimdType::None,           "None"            },
72         { SimdType::Reference,      "Reference"       },
73         { SimdType::Generic,        "Generic"         },
74         { SimdType::X86_Sse2,       "SSE2"            },
75         { SimdType::X86_Sse4_1,     "SSE4.1"          },
76         { SimdType::X86_Avx128Fma,  "AVX_128_FMA"     },
77         { SimdType::X86_Avx,        "AVX_256"         },
78         { SimdType::X86_Avx2,       "AVX2_256"        },
79         { SimdType::X86_Avx2_128,   "AVX2_128"        },
80         { SimdType::X86_Avx512,     "AVX_512"         },
81         { SimdType::X86_Avx512Knl,  "AVX_512_KNL"     },
82         { SimdType::X86_Mic,        "X86_MIC"         },
83         { SimdType::Arm_Neon,       "ARM_NEON"        },
84         { SimdType::Arm_NeonAsimd,  "ARM_NEON_ASIMD"  },
85         { SimdType::Ibm_Vmx,        "IBM_VMX"         },
86         { SimdType::Ibm_Vsx,        "IBM_VSX"         },
87         { SimdType::Fujitsu_HpcAce, "Fujitsu HPC-ACE" }
88     };
89
90     return name.at(s);
91 }
92
93 SimdType
94 simdSuggested(const CpuInfo &c)
95 {
96     SimdType suggested = SimdType::None;
97
98     if (c.supportLevel() >= CpuInfo::SupportLevel::Features)
99     {
100         switch (c.vendor())
101         {
102             case CpuInfo::Vendor::Intel:
103                 if (c.feature(CpuInfo::Feature::X86_Avx512ER))
104                 {
105                     suggested = SimdType::X86_Avx512Knl;
106                 }
107                 else if (c.feature(CpuInfo::Feature::X86_Avx512F))
108                 {
109                     // If we could not identify the number of AVX512 FMA units we assume 2
110                     suggested = ( identifyAvx512FmaUnits() == 1 ) ? SimdType::X86_Avx2 : SimdType::X86_Avx512;
111                 }
112                 else if (c.feature(CpuInfo::Feature::X86_Avx2))
113                 {
114                     suggested = SimdType::X86_Avx2;
115                 }
116                 else if (c.feature(CpuInfo::Feature::X86_Avx))
117                 {
118                     suggested = SimdType::X86_Avx;
119                 }
120                 else if (c.feature(CpuInfo::Feature::X86_Sse4_1))
121                 {
122                     suggested = SimdType::X86_Sse4_1;
123                 }
124                 else if (c.feature(CpuInfo::Feature::X86_Sse2))
125                 {
126                     suggested = SimdType::X86_Sse2;
127                 }
128                 break;
129             case CpuInfo::Vendor::Amd:
130                 if (c.feature(CpuInfo::Feature::X86_Avx2))
131                 {
132                     // AMD Ryzen supports 256-bit AVX2, but performs better with 128-bit
133                     // since it can execute two independent such instructions per cycle,
134                     // and wider SIMD has slightly lower efficiency in GROMACS.
135                     suggested = SimdType::X86_Avx2_128;
136                 }
137                 else if (c.feature(CpuInfo::Feature::X86_Avx))
138                 {
139                     // Use 128-bit FMA SIMD if Fma4 flag is set, otherwise plain 256-bit AVX
140                     if (c.feature(CpuInfo::Feature::X86_Fma4))
141                     {
142                         suggested = SimdType::X86_Avx128Fma;
143                     }
144                     else
145                     {
146                         suggested = SimdType::X86_Avx;
147                     }
148                 }
149                 else if (c.feature(CpuInfo::Feature::X86_Sse4_1))
150                 {
151                     suggested = SimdType::X86_Sse4_1;
152                 }
153                 else if (c.feature(CpuInfo::Feature::X86_Sse2))
154                 {
155                     suggested = SimdType::X86_Sse2;
156                 }
157
158                 break;
159             case CpuInfo::Vendor::Arm:
160                 if (c.feature(CpuInfo::Feature::Arm_NeonAsimd))
161                 {
162                     suggested = SimdType::Arm_NeonAsimd;
163                 }
164                 else if (c.feature(CpuInfo::Feature::Arm_Neon))
165                 {
166                     suggested = SimdType::Arm_Neon;
167                 }
168                 break;
169             case CpuInfo::Vendor::Ibm:
170                 if (c.feature(CpuInfo::Feature::Ibm_Vsx))
171                 {
172                     suggested = SimdType::Ibm_Vsx;
173                 }
174                 else if (c.feature(CpuInfo::Feature::Ibm_Vmx))
175                 {
176                     suggested = SimdType::Ibm_Vmx;
177                 }
178                 break;
179             case CpuInfo::Vendor::Fujitsu:
180                 if (c.feature(CpuInfo::Feature::Fujitsu_HpcAce))
181                 {
182                     suggested = SimdType::Fujitsu_HpcAce;
183                 }
184                 break;
185             default:
186                 break;
187         }
188     }
189     return suggested;
190 }
191
192 SimdType
193 simdCompiled()
194 {
195 #if GMX_SIMD_X86_AVX_512_KNL
196     return SimdType::X86_Avx512Knl;
197 #elif GMX_SIMD_X86_AVX_512
198     return SimdType::X86_Avx512;
199 #elif GMX_SIMD_X86_MIC
200     return SimdType::X86_Mic;
201 #elif GMX_SIMD_X86_AVX2_256
202     return SimdType::X86_Avx2;
203 #elif GMX_SIMD_X86_AVX2_128
204     return SimdType::X86_Avx2_128;
205 #elif GMX_SIMD_X86_AVX_256
206     return SimdType::X86_Avx;
207 #elif GMX_SIMD_X86_AVX_128_FMA
208     return SimdType::X86_Avx128Fma;
209 #elif GMX_SIMD_X86_SSE4_1
210     return SimdType::X86_Sse4_1;
211 #elif GMX_SIMD_X86_SSE2
212     return SimdType::X86_Sse2;
213 #elif GMX_SIMD_ARM_NEON
214     return SimdType::Arm_Neon;
215 #elif GMX_SIMD_ARM_NEON_ASIMD
216     return SimdType::Arm_NeonAsimd;
217 #elif GMX_SIMD_IBM_VMX
218     return SimdType::Ibm_Vmx;
219 #elif GMX_SIMD_IBM_VSX
220     return SimdType::Ibm_Vsx;
221 #elif GMX_SIMD_SPARC64_HPC_ACE
222     return SimdType::Fujitsu_HpcAce;
223 #elif GMX_SIMD_REFERENCE
224     return SimdType::Reference;
225 #else
226     return SimdType::None;
227 #endif
228 }
229
230 bool
231 simdCheck(gmx::SimdType    wanted,
232           FILE *           log,
233           bool             warnToStdErr)
234 {
235     SimdType             compiled  = simdCompiled();
236
237     gmx::TextLineWrapper wrapper;
238     std::string          logMsg;
239     std::string          warnMsg;
240
241     wrapper.settings().setLineLength(78);
242
243     if (compiled == SimdType::X86_Avx2 && wanted == SimdType::X86_Avx512)
244     {
245         logMsg = wrapper.wrapToString(formatString("Highest SIMD level requested by all nodes in run: %s\n"
246                                                    "SIMD instructions selected at compile time:       %s\n"
247                                                    "This program was compiled for different hardware than you are running on, "
248                                                    "which could influence performance. This build might have been configured on "
249                                                    "a login node with only a single AVX-512 FMA unit (in which case AVX2 is faster), "
250                                                    "while the node you are running on has dual AVX-512 FMA units.",
251                                                    simdString(wanted).c_str(), simdString(compiled).c_str()));
252         warnMsg = wrapper.wrapToString(formatString("Compiled SIMD: %s, but for this host/run %s might be better (see log).",
253                                                     simdString(compiled).c_str(), simdString(wanted).c_str()));
254     }
255     else if (compiled == SimdType::X86_Avx512 && wanted == SimdType::X86_Avx2 && identifyAvx512FmaUnits() == 1)
256     {
257         // The reason for explicitly checking the number of FMA units above is to avoid triggering
258         // this conditional if the AVX2 SIMD was requested by some other node in a heterogeneous MPI run.
259         logMsg = wrapper.wrapToString(formatString("Highest SIMD level requested by all nodes in run: %s\n"
260                                                    "SIMD instructions selected at compile time:       %s\n"
261                                                    "This program was compiled for different hardware than you are running on, "
262                                                    "which could influence performance."
263                                                    "This host supports AVX-512, but since it only has 1 AVX-512"
264                                                    "FMA unit, it would be faster to use AVX2 instead.",
265                                                    simdString(wanted).c_str(), simdString(compiled).c_str()));
266         warnMsg = wrapper.wrapToString(formatString("Compiled SIMD: %s, but for this host/run %s might be better (see log).",
267                                                     simdString(compiled).c_str(), simdString(wanted).c_str()));
268     }
269     else if (compiled == SimdType::X86_Avx2 && wanted == SimdType::X86_Avx2_128)
270     {
271         // Wanted SimdType::X86_Avx2_128 can only be the AMD Zen architecture.
272         // AVX2_256 is only up to a few percent slower than AVX2_128
273         // in both single and double precision. AVX2_256 is slightly
274         // faster with nonbondeds and PME on a GPU. Don't warn the user.
275     }
276     else if (compiled > wanted && !(compiled == SimdType::X86_Avx && wanted == SimdType::X86_Avx128Fma))
277     {
278         // Normally it is close to catastrophic if the compiled SIMD type is larger than
279         // the supported one, but AVX128Fma is an exception: AMD CPUs will (strongly) prefer
280         // AVX128Fma, but they will work fine with AVX too. Thus, make an exception for this.
281         logMsg = wrapper.wrapToString(formatString("Highest SIMD level requested by all nodes in run: %s\n"
282                                                    "SIMD instructions selected at compile time:       %s\n"
283                                                    "Compiled SIMD newer than requested; program might crash.",
284                                                    simdString(wanted).c_str(), simdString(compiled).c_str()));
285         warnMsg = logMsg;
286     }
287     else if (wanted != compiled)
288     {
289         // This warning will also occur if compiled is X86_Avx and wanted is X86_Avx128Fma
290         logMsg = wrapper.wrapToString(formatString("Highest SIMD level requested by all nodes in run: %s\n"
291                                                    "SIMD instructions selected at compile time:       %s\n"
292                                                    "This program was compiled for different hardware than you are running on, "
293                                                    "which could influence performance.",
294                                                    simdString(wanted).c_str(), simdString(compiled).c_str()));
295         warnMsg = wrapper.wrapToString(formatString("Compiled SIMD: %s, but for this host/run %s might be better (see log).",
296                                                     simdString(compiled).c_str(), simdString(wanted).c_str()));
297     }
298
299     if (!logMsg.empty() && log != nullptr)
300     {
301         fprintf(log, "%s\n", logMsg.c_str());
302     }
303     if (!warnMsg.empty() && warnToStdErr)
304     {
305         fprintf(stderr, "%s\n", warnMsg.c_str());
306     }
307
308     return (wanted == compiled);
309 }
310
311 /*! \endcond */
312
313 }  // namespace gmx