2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012, by the GROMACS development team, led by
5 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
6 * others, as listed in the AUTHORS file in the top-level source
7 * directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
49 /* MSVC definition for __cpuid() */
53 /* sysconf() definition */
60 #include "gmx_cpuid.h"
63 /* Global constant character strings corresponding to our enumerated types */
65 gmx_cpuid_vendor_string[GMX_CPUID_NVENDORS] =
74 gmx_cpuid_feature_string[GMX_CPUID_NFEATURES] =
114 gmx_cpuid_acceleration_string[GMX_CPUID_NACCELERATIONS] =
124 /* Max length of brand string */
125 #define GMX_CPUID_BRAND_MAXLEN 256
128 /* Contents of the abstract datatype */
131 enum gmx_cpuid_vendor vendor;
132 char brand[GMX_CPUID_BRAND_MAXLEN];
136 /* Not using gmx_bool here, since this file must be possible to compile without simple.h */
137 char feature[GMX_CPUID_NFEATURES];
141 /* Simple routines to access the data structure. The initialization routine is
142 * further down since that needs to call other static routines in this file.
144 enum gmx_cpuid_vendor
145 gmx_cpuid_vendor (gmx_cpuid_t cpuid)
147 return cpuid->vendor;
152 gmx_cpuid_brand (gmx_cpuid_t cpuid)
158 gmx_cpuid_family (gmx_cpuid_t cpuid)
160 return cpuid->family;
164 gmx_cpuid_model (gmx_cpuid_t cpuid)
170 gmx_cpuid_stepping (gmx_cpuid_t cpuid)
172 return cpuid->stepping;
176 gmx_cpuid_feature (gmx_cpuid_t cpuid,
177 enum gmx_cpuid_feature feature)
179 return (cpuid->feature[feature]!=0);
185 /* What type of acceleration was compiled in, if any?
186 * This is set from Cmake. Note that the SSE2 and SSE4_1 macros are set for
187 * AVX too, so it is important that they appear last in the list.
189 #ifdef GMX_X86_AVX_256
191 enum gmx_cpuid_acceleration
192 compiled_acc = GMX_CPUID_ACCELERATION_X86_AVX_256;
193 #elif defined GMX_X86_AVX_128_FMA
195 enum gmx_cpuid_acceleration
196 compiled_acc = GMX_CPUID_ACCELERATION_X86_AVX_128_FMA;
197 #elif defined GMX_X86_SSE4_1
199 enum gmx_cpuid_acceleration
200 compiled_acc = GMX_CPUID_ACCELERATION_X86_SSE4_1;
201 #elif defined GMX_X86_SSE2
203 enum gmx_cpuid_acceleration
204 compiled_acc = GMX_CPUID_ACCELERATION_X86_SSE2;
207 enum gmx_cpuid_acceleration
208 compiled_acc = GMX_CPUID_ACCELERATION_NONE;
212 /* Currently CPUID is only supported (1) if we can use an instruction on MSVC, or (2)
213 * if the compiler handles GNU-style inline assembly.
215 #if defined (__i386__) || defined (__x86_64__) || defined (_M_IX86) || defined (_M_X64)
217 /* Execute CPUID on x86 class CPUs. level sets function to exec, and the
218 * contents of register output is returned. See Intel/AMD docs for details.
220 * This version supports extended information where we can also have an input
221 * value in the ecx register. This is ignored for most levels, but some of them
222 * (e.g. level 0xB on Intel) use it.
225 execute_x86cpuid(unsigned int level,
234 #if (defined _MSC_VER)
237 #if (_MSC_VER > 1500) || (_MSC_VER==1500 & _MSC_FULL_VER >= 150030729)
238 /* MSVC 9.0 SP1 or later */
239 __cpuidex(CPUInfo,level,ecxval);
242 __cpuid(CPUInfo,level);
243 /* Set an error code if the user wanted a non-zero ecxval, since we did not have cpuidex */
244 rc = (ecxval>0) ? -1 : 0;
251 #elif (defined GMX_X86_GCC_INLINE_ASM)
252 /* for now this means GMX_X86_GCC_INLINE_ASM should be defined,
253 * but there might be more options added in the future.
259 #if defined(__i386__) && defined(__PIC__)
260 /* Avoid clobbering the global offset table in 32-bit pic code (ebx register) */
261 __asm__ __volatile__ ("xchgl %%ebx, %1 \n\t"
263 "xchgl %%ebx, %1 \n\t"
264 : "+a"(*eax), "+r"(*ebx), "+c"(*ecx), "+d"(*edx));
266 /* i386 without PIC, or x86-64. Things are easy and we can clobber any reg we want :-) */
267 __asm__ __volatile__ ("cpuid \n\t"
268 : "+a"(*eax), "+b"(*ebx), "+c"(*ecx), "+d"(*edx));
273 * Apparently this is an x86 platform where we don't know how to call cpuid.
275 * This is REALLY bad, since we will lose all Gromacs acceleration.
286 #endif /* architecture is x86 */
289 /* Identify CPU features common to Intel & AMD - mainly brand string,
290 * version and some features. Vendor has already been detected outside this.
293 cpuid_check_common_x86(gmx_cpuid_t cpuid)
295 int fn,max_stdfn,max_extfn;
296 unsigned int eax,ebx,ecx,edx;
297 char str[GMX_CPUID_BRAND_MAXLEN];
300 /* Find largest standard/extended function input value */
301 execute_x86cpuid(0x0,0,&eax,&ebx,&ecx,&edx);
303 execute_x86cpuid(0x80000000,0,&eax,&ebx,&ecx,&edx);
307 if(max_extfn>=0x80000005)
309 /* Get CPU brand string */
310 for(fn=0x80000002;fn<0x80000005;fn++)
312 execute_x86cpuid(fn,0,&eax,&ebx,&ecx,&edx);
321 /* Remove empty initial space */
327 strncpy(cpuid->brand,p,GMX_CPUID_BRAND_MAXLEN);
331 strncpy(cpuid->brand,"Unknown CPU brand",GMX_CPUID_BRAND_MAXLEN);
334 /* Find basic CPU properties */
337 execute_x86cpuid(0x1,0,&eax,&ebx,&ecx,&edx);
339 cpuid->family = ((eax & 0x0FF00000) >> 20) + ((eax & 0x00000F00) >> 8);
340 /* Note that extended model should be shifted left 4, so only shift right 12 iso 16. */
341 cpuid->model = ((eax & 0x000F0000) >> 12) + ((eax & 0x000000F0) >> 4);
342 cpuid->stepping = (eax & 0x0000000F);
344 /* Feature flags common to AMD and intel */
345 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE3] = (ecx & (1 << 0)) != 0;
346 cpuid->feature[GMX_CPUID_FEATURE_X86_PCLMULDQ] = (ecx & (1 << 1)) != 0;
347 cpuid->feature[GMX_CPUID_FEATURE_X86_SSSE3] = (ecx & (1 << 9)) != 0;
348 cpuid->feature[GMX_CPUID_FEATURE_X86_FMA] = (ecx & (1 << 12)) != 0;
349 cpuid->feature[GMX_CPUID_FEATURE_X86_CX16] = (ecx & (1 << 13)) != 0;
350 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4_1] = (ecx & (1 << 19)) != 0;
351 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4_2] = (ecx & (1 << 20)) != 0;
352 cpuid->feature[GMX_CPUID_FEATURE_X86_POPCNT] = (ecx & (1 << 23)) != 0;
353 cpuid->feature[GMX_CPUID_FEATURE_X86_AES] = (ecx & (1 << 25)) != 0;
354 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX] = (ecx & (1 << 28)) != 0;
355 cpuid->feature[GMX_CPUID_FEATURE_X86_F16C] = (ecx & (1 << 29)) != 0;
356 cpuid->feature[GMX_CPUID_FEATURE_X86_RDRND] = (ecx & (1 << 30)) != 0;
358 cpuid->feature[GMX_CPUID_FEATURE_X86_PSE] = (edx & (1 << 3)) != 0;
359 cpuid->feature[GMX_CPUID_FEATURE_X86_MSR] = (edx & (1 << 5)) != 0;
360 cpuid->feature[GMX_CPUID_FEATURE_X86_CX8] = (edx & (1 << 8)) != 0;
361 cpuid->feature[GMX_CPUID_FEATURE_X86_APIC] = (edx & (1 << 9)) != 0;
362 cpuid->feature[GMX_CPUID_FEATURE_X86_CMOV] = (edx & (1 << 15)) != 0;
363 cpuid->feature[GMX_CPUID_FEATURE_X86_CLFSH] = (edx & (1 << 19)) != 0;
364 cpuid->feature[GMX_CPUID_FEATURE_X86_MMX] = (edx & (1 << 23)) != 0;
365 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE2] = (edx & (1 << 26)) != 0;
366 cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] = (edx & (1 << 28)) != 0;
372 cpuid->stepping = -1;
375 if(max_extfn>=0x80000001)
377 execute_x86cpuid(0x80000001,0,&eax,&ebx,&ecx,&edx);
378 cpuid->feature[GMX_CPUID_FEATURE_X86_LAHF_LM] = (ecx & (1 << 0)) != 0;
379 cpuid->feature[GMX_CPUID_FEATURE_X86_PDPE1GB] = (edx & (1 << 26)) != 0;
380 cpuid->feature[GMX_CPUID_FEATURE_X86_RDTSCP] = (edx & (1 << 27)) != 0;
383 if(max_extfn>=0x80000007)
385 execute_x86cpuid(0x80000007,0,&eax,&ebx,&ecx,&edx);
386 cpuid->feature[GMX_CPUID_FEATURE_X86_NONSTOP_TSC] = (edx & (1 << 8)) != 0;
392 /* Detection of AMD-specific CPU features */
394 cpuid_check_amd_x86(gmx_cpuid_t cpuid)
396 int max_stdfn,max_extfn;
397 unsigned int eax,ebx,ecx,edx;
399 cpuid_check_common_x86(cpuid);
401 execute_x86cpuid(0x0,0,&eax,&ebx,&ecx,&edx);
404 execute_x86cpuid(0x80000000,0,&eax,&ebx,&ecx,&edx);
407 if(max_extfn>=0x80000001)
409 execute_x86cpuid(0x80000001,0,&eax,&ebx,&ecx,&edx);
411 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4A] = (ecx & (1 << 6)) != 0;
412 cpuid->feature[GMX_CPUID_FEATURE_X86_MISALIGNSSE] = (ecx & (1 << 7)) != 0;
413 cpuid->feature[GMX_CPUID_FEATURE_X86_XOP] = (ecx & (1 << 11)) != 0;
414 cpuid->feature[GMX_CPUID_FEATURE_X86_FMA4] = (ecx & (1 << 16)) != 0;
420 /* Detection of Intel-specific CPU features */
422 cpuid_check_intel_x86(gmx_cpuid_t cpuid)
424 unsigned int max_stdfn,max_extfn;
425 unsigned int eax,ebx,ecx,edx;
427 unsigned int max_logical_cores,max_physical_cores;
429 cpuid_check_common_x86(cpuid);
431 execute_x86cpuid(0x0,0,&eax,&ebx,&ecx,&edx);
434 execute_x86cpuid(0x80000000,0,&eax,&ebx,&ecx,&edx);
439 execute_x86cpuid(0x1,0,&eax,&ebx,&ecx,&edx);
440 cpuid->feature[GMX_CPUID_FEATURE_X86_PDCM] = (ecx & (1 << 15)) != 0;
441 cpuid->feature[GMX_CPUID_FEATURE_X86_PCID] = (ecx & (1 << 17)) != 0;
442 cpuid->feature[GMX_CPUID_FEATURE_X86_X2APIC] = (ecx & (1 << 21)) != 0;
443 cpuid->feature[GMX_CPUID_FEATURE_X86_TDT] = (ecx & (1 << 24)) != 0;
448 execute_x86cpuid(0x7,0,&eax,&ebx,&ecx,&edx);
449 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX2] = (ebx & (1 << 5)) != 0;
452 /* Check whether Hyper-Threading is enabled, not only supported */
453 if(cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] && max_stdfn>=4)
455 execute_x86cpuid(0x1,0,&eax,&ebx,&ecx,&edx);
456 max_logical_cores = (ebx >> 16) & 0x0FF;
457 execute_x86cpuid(0x4,0,&eax,&ebx,&ecx,&edx);
458 max_physical_cores = ((eax >> 26) & 0x3F) + 1;
460 /* Clear HTT flag if we only have 1 logical core per physical */
461 if(max_logical_cores/max_physical_cores < 2)
463 cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] = 0;
469 /* Try to find the vendor of the current CPU, so we know what specific
470 * detection routine to call.
472 static enum gmx_cpuid_vendor
473 cpuid_check_vendor(void)
475 enum gmx_cpuid_vendor i,vendor;
476 /* Register data used on x86 */
477 unsigned int eax,ebx,ecx,edx;
478 char vendorstring[13];
480 /* Set default first */
481 vendor = GMX_CPUID_VENDOR_UNKNOWN;
483 execute_x86cpuid(0x0,0,&eax,&ebx,&ecx,&edx);
485 memcpy(vendorstring,&ebx,4);
486 memcpy(vendorstring+4,&edx,4);
487 memcpy(vendorstring+8,&ecx,4);
489 vendorstring[12]='\0';
491 for(i=GMX_CPUID_VENDOR_UNKNOWN;i<GMX_CPUID_NVENDORS;i++)
493 if(!strncmp(vendorstring,gmx_cpuid_vendor_string[i],12))
506 gmx_cpuid_init (gmx_cpuid_t * pcpuid)
511 cpuid = malloc(sizeof(*cpuid));
515 for(i=0;i<GMX_CPUID_NFEATURES;i++)
520 cpuid->vendor = cpuid_check_vendor();
522 switch(cpuid->vendor)
524 case GMX_CPUID_VENDOR_INTEL:
525 cpuid_check_intel_x86(cpuid);
527 case GMX_CPUID_VENDOR_AMD:
528 cpuid_check_amd_x86(cpuid);
531 /* Could not find vendor */
532 strncpy(cpuid->brand,"Unknown CPU brand",GMX_CPUID_BRAND_MAXLEN);
537 for(i=0;i<GMX_CPUID_NFEATURES;i++)
541 cpuid->feature[GMX_CPUID_FEATURE_CANNOTDETECT] = 1;
551 gmx_cpuid_done (gmx_cpuid_t cpuid)
558 gmx_cpuid_formatstring (gmx_cpuid_t cpuid,
564 enum gmx_cpuid_feature feature;
570 "Family: %2d Model: %2d Stepping: %2d\n"
572 gmx_cpuid_vendor_string[gmx_cpuid_vendor(cpuid)],
573 gmx_cpuid_brand(cpuid),
574 gmx_cpuid_family(cpuid),gmx_cpuid_model(cpuid),gmx_cpuid_stepping(cpuid));
579 "Family: %2d Model: %2d Stepping: %2d\n"
581 gmx_cpuid_vendor_string[gmx_cpuid_vendor(cpuid)],
582 gmx_cpuid_brand(cpuid),
583 gmx_cpuid_family(cpuid),gmx_cpuid_model(cpuid),gmx_cpuid_stepping(cpuid));
591 for(feature=GMX_CPUID_FEATURE_CANNOTDETECT;feature<GMX_CPUID_NFEATURES;feature++)
593 if(gmx_cpuid_feature(cpuid,feature)==1)
596 _snprintf(str,n," %s",gmx_cpuid_feature_string[feature]);
598 snprintf(str,n," %s",gmx_cpuid_feature_string[feature]);
607 _snprintf(str,n,"\n");
609 snprintf(str,n,"\n");
618 enum gmx_cpuid_acceleration
619 gmx_cpuid_acceleration_suggest (gmx_cpuid_t cpuid)
621 enum gmx_cpuid_acceleration tmpacc;
623 tmpacc = GMX_CPUID_ACCELERATION_NONE;
625 if(gmx_cpuid_vendor(cpuid)==GMX_CPUID_VENDOR_INTEL)
627 if(gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_AVX))
629 tmpacc = GMX_CPUID_ACCELERATION_X86_AVX_256;
631 else if(gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_SSE4_1))
633 tmpacc = GMX_CPUID_ACCELERATION_X86_SSE4_1;
635 else if(gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_SSE2))
637 tmpacc = GMX_CPUID_ACCELERATION_X86_SSE2;
640 else if(gmx_cpuid_vendor(cpuid)==GMX_CPUID_VENDOR_AMD)
642 if(gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_AVX))
644 tmpacc = GMX_CPUID_ACCELERATION_X86_AVX_128_FMA;
646 else if(gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_SSE4_1))
648 tmpacc = GMX_CPUID_ACCELERATION_X86_SSE4_1;
650 else if(gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_SSE2))
652 tmpacc = GMX_CPUID_ACCELERATION_X86_SSE2;
662 gmx_cpuid_acceleration_check(gmx_cpuid_t cpuid,
667 enum gmx_cpuid_acceleration acc;
669 acc = gmx_cpuid_acceleration_suggest(cpuid);
671 rc = (acc != compiled_acc);
673 gmx_cpuid_formatstring(cpuid,str,1023);
679 "\nDetecting CPU-specific acceleration.\nPresent hardware specification:\n"
681 "Acceleration most likely to fit this hardware: %s\n"
682 "Acceleration selected at GROMACS compile time: %s\n\n",
684 gmx_cpuid_acceleration_string[acc],
685 gmx_cpuid_acceleration_string[compiled_acc]);
692 fprintf(log,"\nBinary not matching hardware - you might be losing performance.\n"
693 "Acceleration most likely to fit this hardware: %s\n"
694 "Acceleration selected at GROMACS compile time: %s\n\n",
695 gmx_cpuid_acceleration_string[acc],
696 gmx_cpuid_acceleration_string[compiled_acc]);
698 printf("Compiled acceleration: %s (Gromacs could use %s on this machine, which is better)\n",
699 gmx_cpuid_acceleration_string[compiled_acc],
700 gmx_cpuid_acceleration_string[acc]);
706 enum gmx_cpuid_x86_smt
707 gmx_cpuid_x86_smt(gmx_cpuid_t cpuid)
710 #if (defined HAVE_SCHED_H && defined HAVE_SCHED_SETAFFINITY && defined HAVE_SYSCONF && defined __linux__)
713 cpu_set_t cpuset,save_cpuset;
715 unsigned int eax,ebx,ecx,edx;
719 if( gmx_cpuid_vendor(cpuid)!=GMX_CPUID_VENDOR_INTEL ||
720 gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_HTT)==0)
722 return GMX_CPUID_X86_SMT_DISABLED;
725 /* Check cpuid max standard function */
726 execute_x86cpuid(0x0,0,&eax,&ebx,&ecx,&edx);
728 /* Early CPUs that do not support function 11 do not support SMT either */
731 return GMX_CPUID_X86_SMT_DISABLED;
734 /* If we got here, it is a modern Intel CPU that supports detection, as does our OS */
736 /* How many processors? */
737 nproc = sysconf(_SC_NPROCESSORS_ONLN);
739 apic_id = malloc(sizeof(int)*nproc);
741 sched_getaffinity(0,sizeof(cpu_set_t),&save_cpuset);
743 /* Get x2APIC ID from each hardware thread */
748 sched_setaffinity(0,sizeof(cpu_set_t),&cpuset);
749 execute_x86cpuid(0xB,0,&eax,&ebx,&ecx,&edx);
753 /* Reset affinity to the value it had when calling this routine */
754 sched_setaffinity(0,sizeof(cpu_set_t),&save_cpuset);
756 core_shift_bits = eax & 0x1F;
758 /* Check if there is any other APIC id that is identical to [0], apart from
759 * the hardware thread bit.
762 for(i=1;i<nproc && smt_found==0;i++)
764 smt_found = (apic_id[i]>>core_shift_bits == apic_id[0] >> core_shift_bits);
771 return GMX_CPUID_X86_SMT_ENABLED;
775 return GMX_CPUID_X86_SMT_DISABLED;
778 /* Do the trivial stuff first. If Hyper-Threading isn't even supported it
779 * cannot be enabled, no matter what OS detection we use!
781 if(0==gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_HTT))
783 return GMX_CPUID_X86_SMT_DISABLED;
787 return GMX_CPUID_X86_SMT_CANNOTDETECT;
795 #ifdef GMX_CPUID_STANDALONE
796 /* Stand-alone program to enable queries of CPU features from Cmake.
797 * Note that you need to check inline ASM capabilities before compiling and set
798 * -DGMX_X86_GCC_INLINE_ASM for the cpuid instruction to work...
801 main(int argc, char **argv)
804 enum gmx_cpuid_acceleration acc;
810 "Usage:\n\n%s [flags]\n\n"
812 "-vendor Print CPU vendor.\n"
813 "-brand Print CPU brand string.\n"
814 "-family Print CPU family version.\n"
815 "-model Print CPU model version.\n"
816 "-stepping Print CPU stepping version.\n"
817 "-features Print CPU feature flags.\n"
818 "-acceleration Print suggested GROMACS acceleration.\n"
823 gmx_cpuid_init(&cpuid);
825 if(!strncmp(argv[1],"-vendor",3))
827 printf("%s\n",gmx_cpuid_vendor_string[cpuid->vendor]);
829 else if(!strncmp(argv[1],"-brand",3))
831 printf("%s\n",cpuid->brand);
833 else if(!strncmp(argv[1],"-family",3))
835 printf("%d\n",cpuid->family);
837 else if(!strncmp(argv[1],"-model",3))
839 printf("%d\n",cpuid->model);
841 else if(!strncmp(argv[1],"-stepping",3))
843 printf("%d\n",cpuid->stepping);
845 else if(!strncmp(argv[1],"-features",3))
848 for(i=0;i<GMX_CPUID_NFEATURES;i++)
850 if(cpuid->feature[i]==1)
856 printf("%s",gmx_cpuid_feature_string[i]);
861 else if(!strncmp(argv[1],"-acceleration",3))
863 acc = gmx_cpuid_acceleration_suggest(cpuid);
864 fprintf(stdout,"%s\n",gmx_cpuid_acceleration_string[acc]);
867 gmx_cpuid_done(cpuid);