Merge "Simple patch for openmm_wrapper.cpp" into release-4-6
[alexxy/gromacs.git] / src / gmxlib / gmx_detectcpu.c
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
2  *
3  * 
4  * This file is part of GROMACS.
5  * Copyright (c) 2012-  
6  *
7  * Written by the Gromacs development team under coordination of
8  * David van der Spoel, Berk Hess, and Erik Lindahl.
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public License
12  * as published by the Free Software Foundation; either version 2
13  * of the License, or (at your option) any later version.
14  *
15  * To help us fund GROMACS development, we humbly ask that you cite
16  * the research papers on the package. Check out http://www.gromacs.org
17  * 
18  * And Hey:
19  * Gnomes, ROck Monsters And Chili Sauce
20  */
21 #ifdef HAVE_CONFIG_H
22 #include <config.h>
23 #endif
24
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <ctype.h>
29 #ifdef _MSC_VER
30 /* MSVC definition for __cpuid() */
31 #include <intrin.h>
32 #endif
33
34
35 #include "gmx_detectcpu.h"
36
37
38 const char *
39 gmx_detectcpu_vendorid_string[GMX_DETECTCPU_NVENDORS] =
40 {
41     "Unknown",
42     "GenuineIntel",
43     "AuthenticAMD"
44 };
45
46 const char *
47 gmx_detectcpu_feature_string[GMX_DETECTCPU_NFEATURES] =
48 {
49     "CannotDetect",
50     "htt",
51     "sse2",
52     "sse4.1",
53     "rdrand",
54     "aes",
55     "avx",
56     "fma",
57     "fma4",
58     "xop",
59     "avx2",
60     "rdtscp"
61 };
62
63 const char *
64 gmx_detectcpu_acceleration_string[GMX_DETECTCPU_NACCELERATIONS] =
65 {
66     "None",
67     "SSE2",
68     "SSE4.1",
69     "AVX_128_FMA",
70     "AVX_256"
71 };
72
73
74
75
76
77 /* What type of acceleration was compiled in, if any?
78  * This is set from Cmake. Note that the SSE2 and SSE4_1 macros are set for
79  * AVX too, so it is important that they appear last in the list.
80  */
81 #ifdef GMX_X86_AVX_256
82 static const
83 gmx_detectcpu_acceleration_t 
84 compiled_acc = GMX_DETECTCPU_ACCELERATION_X86_AVX_256;
85 #elif defined GMX_X86_AVX_128_FMA
86 static const
87 gmx_detectcpu_acceleration_t 
88 compiled_acc = GMX_DETECTCPU_ACCELERATION_X86_AVX_128_FMA;
89 #elif defined GMX_X86_SSE4_1
90 static const
91 gmx_detectcpu_acceleration_t 
92 compiled_acc = GMX_DETECTCPU_ACCELERATION_X86_SSE4_1;
93 #elif defined GMX_X86_SSE2
94 static const
95 gmx_detectcpu_acceleration_t 
96 compiled_acc = GMX_DETECTCPU_ACCELERATION_X86_SSE2;
97 #else
98 static const
99 gmx_detectcpu_acceleration_t 
100 compiled_acc = GMX_DETECTCPU_ACCELERATION_NONE;
101 #endif
102
103 /* Execute CPUID on x86 class CPUs. level sets function to exec, and the
104  * contents of register output is returned. See Intel/AMD docs for details.
105  */
106 #if defined (__i386__) || defined (__x86_64__) || defined (_M_IX86) || defined (_M_X64)
107 /* Currently CPUID is only supported (1) if we can use an instruction on MSVC, or (2)
108  * if the compiler handles GNU-style inline assembly.
109  */
110 #if (defined GMX_X86_GCC_INLINE_ASM || defined _MSC_VER)
111 #define GMX_X86_HAVE_CPUID
112 static int
113 execute_cpuid_x86(unsigned int level,
114                   unsigned int * eax,
115                   unsigned int * ebx,
116                   unsigned int * ecx,
117                   unsigned int * edx)
118 {
119     unsigned int _eax,_ebx,_ecx,_edx;
120     int rc;
121
122 #ifdef _MSC_VER
123     int CPUInfo[4];
124
125     /* MSVC */
126     __cpuid(CPUInfo,level);
127
128     _eax=CPUInfo[0];
129     _ebx=CPUInfo[1];
130     _ecx=CPUInfo[2];
131     _edx=CPUInfo[3];
132
133     rc = 0;
134
135 #else
136     /* for now this means GMX_X86_GCC_INLINE_ASM should be defined,
137      * but there might be more options added in the future.
138      */
139     /* tested on 32 & 64 GCC, and Intel icc. */
140 #if defined (__x86_64__) || defined (_M_X64)
141     __asm__("push  %%rbx      \n\t"
142             "cpuid            \n\t"
143             "movl %%ebx, %1   \n\t"
144             "pop  %%rbx       \n\t"
145             : "=a"(_eax), "=r"(_ebx), "=c"(_ecx), "=d"(_edx) : "0"(level));
146 #else
147     __asm__("push %%ebx       \n\t"
148             "cpuid            \n\t"
149             "movl %%ebx, %1   \n\t"
150             "pop %%ebx        \n\t"
151             : "=a"(_eax), "=r"(_ebx), "=c"(_ecx), "=d"(_edx) : "0"(level));
152 #endif
153     
154     rc = 0;
155 #endif
156     /* If you end up having a compiler that really doesn't understand this and
157      * you can't fix it, create a separate ifdef and set the results to:
158      *
159      * _eax=_ebx=_ecx=_edx=0;
160      * rc = -1;
161      *
162      * However, this will lose you ALL Gromacs x86 acceleration, so you want to
163      * try really hard before giving up!
164      */
165
166     *eax = _eax;
167     *ebx = _ebx;
168     *ecx = _ecx;
169     *edx = _edx;
170
171     return rc;
172 }
173 #endif /* GMX_X86_GCC_INLINE_ASM or _MSC_VER */
174 #endif /* architecture is x86 */
175
176
177 /* Identify CPU features common to Intel & AMD - mainly brand string,
178  * version and some features. Vendor has already been detected outside this.
179  */
180 static int
181 detectcpu_common_x86(gmx_detectcpu_t *              data)
182 {
183     int                       fn,max_stdfn,max_extfn;
184     unsigned int              eax,ebx,ecx,edx;
185     char                      str[GMX_DETECTCPU_STRLEN];
186     char *                    p;
187
188 #ifdef GMX_X86_HAVE_CPUID
189     /* Find largest standard/extended function input value */
190     execute_cpuid_x86(0x0,&eax,&ebx,&ecx,&edx);
191     max_stdfn = eax;
192     execute_cpuid_x86(0x80000000,&eax,&ebx,&ecx,&edx);
193     max_extfn = eax;
194
195     p = str;
196     if(max_extfn>=0x80000005)
197     {
198         /* Get CPU brand string */
199         for(fn=0x80000002;fn<0x80000005;fn++)
200         {
201             execute_cpuid_x86(fn,&eax,&ebx,&ecx,&edx);
202             memcpy(p,&eax,4);
203             memcpy(p+4,&ebx,4);
204             memcpy(p+8,&ecx,4);
205             memcpy(p+12,&edx,4);
206             p+=16;
207         }
208         *p='\0';
209
210         /* Remove empty initial space */
211         p = str;
212         while(isspace(*(p)))
213         {
214             p++;
215         }
216     }
217     else
218     {
219         *p='\0';
220     }
221     strncpy(data->brand,p,GMX_DETECTCPU_STRLEN);
222
223     /* Find basic CPU properties */
224     if(max_stdfn>=1)
225     {
226         execute_cpuid_x86(1,&eax,&ebx,&ecx,&edx);
227
228         data->family   = ((eax & 0x0FF00000) >> 20) + ((eax & 0x00000F00) >> 8);
229         /* Note that extended model should be shifted left 4, so only shift right 12 iso 16. */
230         data->model    = ((eax & 0x000F0000) >> 12) + ((eax & 0x000000F0) >> 4);
231         data->stepping = (eax & 0x0000000F);
232
233         /* Feature flags common to AMD and intel */
234         data->feature[GMX_DETECTCPU_FEATURE_X86_FMA]     = (ecx & (1 << 12)) != 0;
235         data->feature[GMX_DETECTCPU_FEATURE_X86_SSE4_1]  = (ecx & (1 << 19)) != 0;
236         data->feature[GMX_DETECTCPU_FEATURE_X86_AES]     = (ecx & (1 << 25)) != 0;
237         data->feature[GMX_DETECTCPU_FEATURE_X86_AVX]     = (ecx & (1 << 28)) != 0;
238         data->feature[GMX_DETECTCPU_FEATURE_X86_RDRAND]  = (ecx & (1 << 30)) != 0;
239
240         data->feature[GMX_DETECTCPU_FEATURE_X86_SSE2]    = (edx & (1 << 26)) != 0;
241         data->feature[GMX_DETECTCPU_FEATURE_X86_HTT]     = (edx & (1 << 28)) != 0;
242     }
243
244     if(max_extfn>=0x80000001)
245     {
246         execute_cpuid_x86(0x80000001,&eax,&ebx,&ecx,&edx);
247         data->feature[GMX_DETECTCPU_FEATURE_X86_RDTSCP]  = (edx & (1 << 27)) != 0;
248     }
249
250 #else
251     /* No CPUID present */
252     strncpy(data->brand,"Unknown CPU brand",GMX_DETECTCPU_STRLEN);
253     data->family   = 0;
254     data->model    = 0;
255     data->stepping = 0;
256 #endif
257
258     return 0;
259 }
260
261 /* Detection of AMD-specific CPU features */
262 static int
263 detectcpu_amd(gmx_detectcpu_t *              data)
264 {
265     int                       max_stdfn,max_extfn;
266     unsigned int              eax,ebx,ecx,edx;
267
268     detectcpu_common_x86(data);
269
270 #ifdef GMX_X86_HAVE_CPUID
271     execute_cpuid_x86(0x0,&eax,&ebx,&ecx,&edx);
272     max_stdfn = eax;
273
274     execute_cpuid_x86(0x80000000,&eax,&ebx,&ecx,&edx);
275     max_extfn = eax;
276
277     if(max_extfn>=0x80000001)
278     {
279         execute_cpuid_x86(0x80000001,&eax,&ebx,&ecx,&edx);
280
281         data->feature[GMX_DETECTCPU_FEATURE_X86_XOP]     = (ecx & (1 << 11)) != 0;
282         data->feature[GMX_DETECTCPU_FEATURE_X86_FMA4]    = (ecx & (1 << 16)) != 0;
283     }
284 #endif
285
286     return 0;
287 }
288
289 /* Detection of Intel-specific CPU features */
290 static int
291 detectcpu_intel(gmx_detectcpu_t *              data)
292 {
293     int                       max_stdfn;
294     unsigned int              eax,ebx,ecx,edx;
295
296     detectcpu_common_x86(data);
297
298 #ifdef GMX_X86_HAVE_CPUID
299     execute_cpuid_x86(0x0,&eax,&ebx,&ecx,&edx);
300     max_stdfn = eax;
301
302     if(max_stdfn>=7)
303     {
304         execute_cpuid_x86(0x7,&eax,&ebx,&ecx,&edx);
305         data->feature[GMX_DETECTCPU_FEATURE_X86_AVX2]    = (ebx & (1 << 5)) != 0;
306     }
307
308 #endif
309
310     return 0;
311 }
312
313 /* Try to find the vendor of the current CPU, so we know what specific
314  * detection routine to call.
315  */
316 static gmx_detectcpu_vendorid_t
317 detectcpu_vendor(void)
318 {
319     gmx_detectcpu_vendorid_t   i,vendor;
320     /* Register data used on x86 */
321     unsigned int               eax,ebx,ecx,edx;
322     char                       vendorstring[13];
323
324     /* Set default first */
325     vendor = GMX_DETECTCPU_VENDOR_UNKNOWN;
326
327 #ifdef GMX_X86_HAVE_CPUID
328     execute_cpuid_x86(0,&eax,&ebx,&ecx,&edx);
329
330     memcpy(vendorstring,&ebx,4);
331     memcpy(vendorstring+4,&edx,4);
332     memcpy(vendorstring+8,&ecx,4);
333
334     vendorstring[12]='\0';
335
336     for(i=GMX_DETECTCPU_VENDOR_UNKNOWN;i<GMX_DETECTCPU_NVENDORS;i++)
337     {
338         if(!strncmp(vendorstring,gmx_detectcpu_vendorid_string[i],12))
339         {
340             vendor = i;
341         }
342     }
343 #endif
344
345     return vendor;
346 }
347
348 int
349 gmx_detectcpu                   (gmx_detectcpu_t *              data)
350 {
351     int i;
352
353     for(i=0;i<GMX_DETECTCPU_NFEATURES;i++)
354     {
355         data->feature[i]=0;
356     }
357     
358     data->vendorid = detectcpu_vendor();
359
360     switch(data->vendorid)
361     {
362         case GMX_DETECTCPU_VENDOR_INTEL:
363             detectcpu_intel(data);
364             break;
365         case GMX_DETECTCPU_VENDOR_AMD:
366             detectcpu_amd(data);
367             break;
368         default:
369             /* Could not find vendor */
370             strncpy(data->brand,"Unknown CPU brand",GMX_DETECTCPU_STRLEN);
371             data->family         = 0;
372             data->model          = 0;
373             data->stepping       = 0;
374
375             for(i=0;i<GMX_DETECTCPU_NFEATURES;i++)
376             {
377                 data->feature[i]=0;
378             }
379             data->feature[GMX_DETECTCPU_FEATURE_CANNOTDETECT] = 1;
380             break;
381     }
382
383     return 0;
384 }
385
386
387
388
389 int
390 gmx_detectcpu_formatstring       (gmx_detectcpu_t              data,
391                                   char *                        str,
392                                   int                           n)
393 {
394     int c;
395     int i;
396
397 #ifdef _MSC_VER
398     _snprintf(str,n,
399               "Vendor: %s\n"
400               "Brand:  %s\n"
401               "Family: %2d  Model: %2d  Stepping: %2d\n"
402               "Features:",
403               gmx_detectcpu_vendorid_string[data.vendorid],
404               data.brand,
405               data.family,data.model,data.stepping);
406 #else
407     snprintf(str,n,
408              "Vendor: %s\n"
409              "Brand:  %s\n"
410              "Family: %2d  Model: %2d  Stepping: %2d\n"
411              "Features:",
412              gmx_detectcpu_vendorid_string[data.vendorid],
413              data.brand,
414              data.family,data.model,data.stepping);
415 #endif
416
417     str[n-1] = '\0';
418     c = strlen(str);
419     n   -= c;
420     str += c;
421
422     for(i=0;i<GMX_DETECTCPU_NFEATURES;i++)
423     {
424         if(data.feature[i]==1)
425         {
426 #ifdef _MSC_VER
427             _snprintf(str,n," %s",gmx_detectcpu_feature_string[i]);
428 #else
429             snprintf(str,n," %s",gmx_detectcpu_feature_string[i]);
430 #endif
431             str[n-1] = '\0';
432             c = strlen(str);
433             n   -= c;
434             str += c;
435         }
436     }
437 #ifdef _MSC_VER
438     _snprintf(str,n,"\n");
439 #else
440     snprintf(str,n,"\n");
441 #endif
442     str[n-1] = '\0';
443
444     return 0;
445 }
446
447
448
449 int
450 gmx_detectcpu_suggest_acceleration  (gmx_detectcpu_t                 data,
451                                      gmx_detectcpu_acceleration_t *  acc)
452 {
453     gmx_detectcpu_acceleration_t tmpacc;
454
455     tmpacc = GMX_DETECTCPU_ACCELERATION_NONE;
456
457     if(data.vendorid==GMX_DETECTCPU_VENDOR_INTEL)
458     {
459         if(data.feature[GMX_DETECTCPU_FEATURE_X86_AVX]==1)
460         {
461             tmpacc = GMX_DETECTCPU_ACCELERATION_X86_AVX_256;
462         }
463         else if(data.feature[GMX_DETECTCPU_FEATURE_X86_SSE4_1]==1)
464         {
465             tmpacc = GMX_DETECTCPU_ACCELERATION_X86_SSE4_1;
466         }
467         else if(data.feature[GMX_DETECTCPU_FEATURE_X86_SSE2]==1)
468         {
469             tmpacc = GMX_DETECTCPU_ACCELERATION_X86_SSE2;
470         }
471     }
472     else if(data.vendorid==GMX_DETECTCPU_VENDOR_AMD)
473     {
474         if(data.feature[GMX_DETECTCPU_FEATURE_X86_AVX]==1)
475         {
476             tmpacc = GMX_DETECTCPU_ACCELERATION_X86_AVX_128_FMA;
477         }
478         else if(data.feature[GMX_DETECTCPU_FEATURE_X86_SSE4_1]==1)
479         {
480             tmpacc = GMX_DETECTCPU_ACCELERATION_X86_SSE4_1;
481         }
482         else if(data.feature[GMX_DETECTCPU_FEATURE_X86_SSE2]==1)
483         {
484             tmpacc = GMX_DETECTCPU_ACCELERATION_X86_SSE2;
485         }
486     }
487
488     *acc = tmpacc;
489
490     return 0;
491 }
492
493
494
495 int
496 gmx_detectcpu_check_acceleration(gmx_detectcpu_t   data,
497                                  FILE *           log)
498 {
499     int                           rc;
500     char                          str[1024];
501     gmx_detectcpu_acceleration_t  acc;
502
503     gmx_detectcpu_suggest_acceleration(data,&acc);
504     rc = (acc != compiled_acc);
505
506     gmx_detectcpu_formatstring(data,str,1023);
507     str[1023] = '\0';
508
509     if(log!=NULL)
510     {
511         fprintf(log,
512                 "Detecting CPU-specific acceleration. Present hardware specification:\n"
513                 "%s"
514                 "Acceleration most likely to fit this hardware: %s\n"
515                 "Acceleration selected at Gromacs compile time: %s\n\n",
516                 str,
517                 gmx_detectcpu_acceleration_string[acc],
518                 gmx_detectcpu_acceleration_string[compiled_acc]);
519     }
520
521     if(rc!=0)
522     {
523         if(log!=NULL)
524         {
525             fprintf(log,"WARNING! Binary not matching hardware - you are likely losing performance.\n\n");
526         }
527         printf("\nWARNING! Binary not matching hardware - you are likely losing performance.\n"
528                "Acceleration most likely to fit this hardware: %s\n"
529                "Acceleration selected at Gromacs compile time: %s\n\n",
530                gmx_detectcpu_acceleration_string[acc],
531                gmx_detectcpu_acceleration_string[compiled_acc]);
532     }
533
534     return rc;
535 }
536
537
538
539
540 #ifdef GMX_DETECTCPU_STANDALONE
541 /* Stand-alone program to enable queries of CPU features from Cmake.
542  * Note that you need to check inline ASM capabilities before compling and set 
543  * -DGMX_X86_GCC_INLINE_ASM for the cpuid instruction to work...
544  */
545 int
546 main(int argc, char **argv)
547 {
548     gmx_detectcpu_t               data;
549     gmx_detectcpu_acceleration_t  acc;
550     int                           i,cnt;
551
552     if(argc<2)
553     {
554         fprintf(stdout,
555                 "Usage:\n\n%s [flags]\n\n"
556                 "Available flags:\n"
557                 "-vendor        Print CPU vendor.\n"
558                 "-brand         Print CPU brand string.\n"
559                 "-family        Print CPU family version.\n"
560                 "-model         Print CPU model version.\n"
561                 "-stepping      Print CPU stepping version.\n"
562                 "-features      Print CPU feature flags.\n"
563                 "-acceleration  Print suggested Gromacs acceleration.\n"
564                 ,argv[0]);
565         exit(0);
566     }
567
568     gmx_detectcpu(&data);
569
570     if(!strncmp(argv[1],"-vendor",3))
571     {
572         printf("%s\n",gmx_detectcpu_vendorid_string[data.vendorid]);
573     }
574     else if(!strncmp(argv[1],"-brand",3))
575     {
576         printf("%s\n",data.brand);
577     }
578     else if(!strncmp(argv[1],"-family",3))
579     {
580         printf("%d\n",data.family);
581     }
582     else if(!strncmp(argv[1],"-model",3))
583     {
584         printf("%d\n",data.model);
585     }
586     else if(!strncmp(argv[1],"-stepping",3))
587     {
588         printf("%d\n",data.stepping);
589     }
590     else if(!strncmp(argv[1],"-features",3))
591     {
592         cnt = 0;
593         for(i=0;i<GMX_DETECTCPU_NFEATURES;i++)
594         {
595             if(data.feature[i]==1)
596             {
597                 if(cnt++ > 0)
598                 {
599                     printf(" ");
600                 }
601                 printf("%s",gmx_detectcpu_feature_string[i]);
602             }
603         }
604         printf("\n");
605     }
606     else if(!strncmp(argv[1],"-acceleration",3))
607     {
608         gmx_detectcpu_suggest_acceleration(data,&acc);
609         fprintf(stdout,"%s\n",gmx_detectcpu_acceleration_string[acc]);
610     }
611
612     return 0;
613 }
614
615 #endif