2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012, by the GROMACS development team, led by
5 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
6 * others, as listed in the AUTHORS file in the top-level source
7 * directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
49 /* MSVC definition for __cpuid() */
53 /* sysconf() definition */
57 #include "gmx_cpuid.h"
61 /* For convenience, and to enable configure-time invocation, we keep all architectures
62 * in a single file, but to avoid repeated ifdefs we set the overall architecture here.
64 #if defined (__i386__) || defined (__x86_64__) || defined (_M_IX86) || defined (_M_X64)
65 # define GMX_CPUID_X86
68 /* Global constant character strings corresponding to our enumerated types */
70 gmx_cpuid_vendor_string[GMX_CPUID_NVENDORS] =
79 gmx_cpuid_feature_string[GMX_CPUID_NFEATURES] =
119 gmx_cpuid_acceleration_string[GMX_CPUID_NACCELERATIONS] =
129 /* Max length of brand string */
130 #define GMX_CPUID_BRAND_MAXLEN 256
133 /* Contents of the abstract datatype */
136 enum gmx_cpuid_vendor vendor;
137 char brand[GMX_CPUID_BRAND_MAXLEN];
141 /* Not using gmx_bool here, since this file must be possible to compile without simple.h */
142 char feature[GMX_CPUID_NFEATURES];
144 /* Basic CPU topology information. For x86 this is a bit complicated since the topology differs between
145 * operating systems and sometimes even settings. For most other architectures you can likely just check
146 * the documentation and then write static information to these arrays rather than detecting on-the-fly.
148 int have_cpu_topology;
149 int nproc; /* total number of logical processors from OS */
151 int ncores_per_package;
152 int nhwthreads_per_core;
154 int * core_id; /* Local core id in each package */
155 int * hwthread_id; /* Local hwthread id in each core */
156 int * locality_order; /* Processor indices sorted in locality order */
160 /* Simple routines to access the data structure. The initialization routine is
161 * further down since that needs to call other static routines in this file.
163 enum gmx_cpuid_vendor
164 gmx_cpuid_vendor (gmx_cpuid_t cpuid)
166 return cpuid->vendor;
171 gmx_cpuid_brand (gmx_cpuid_t cpuid)
177 gmx_cpuid_family (gmx_cpuid_t cpuid)
179 return cpuid->family;
183 gmx_cpuid_model (gmx_cpuid_t cpuid)
189 gmx_cpuid_stepping (gmx_cpuid_t cpuid)
191 return cpuid->stepping;
195 gmx_cpuid_feature (gmx_cpuid_t cpuid,
196 enum gmx_cpuid_feature feature)
198 return (cpuid->feature[feature]!=0);
204 /* What type of acceleration was compiled in, if any?
205 * This is set from Cmake. Note that the SSE2 and SSE4_1 macros are set for
206 * AVX too, so it is important that they appear last in the list.
208 #ifdef GMX_X86_AVX_256
210 enum gmx_cpuid_acceleration
211 compiled_acc = GMX_CPUID_ACCELERATION_X86_AVX_256;
212 #elif defined GMX_X86_AVX_128_FMA
214 enum gmx_cpuid_acceleration
215 compiled_acc = GMX_CPUID_ACCELERATION_X86_AVX_128_FMA;
216 #elif defined GMX_X86_SSE4_1
218 enum gmx_cpuid_acceleration
219 compiled_acc = GMX_CPUID_ACCELERATION_X86_SSE4_1;
220 #elif defined GMX_X86_SSE2
222 enum gmx_cpuid_acceleration
223 compiled_acc = GMX_CPUID_ACCELERATION_X86_SSE2;
226 enum gmx_cpuid_acceleration
227 compiled_acc = GMX_CPUID_ACCELERATION_NONE;
233 /* Execute CPUID on x86 class CPUs. level sets function to exec, and the
234 * contents of register output is returned. See Intel/AMD docs for details.
236 * This version supports extended information where we can also have an input
237 * value in the ecx register. This is ignored for most levels, but some of them
238 * (e.g. level 0xB on Intel) use it.
241 execute_x86cpuid(unsigned int level,
250 /* Currently CPUID is only supported (1) if we can use an instruction on MSVC, or (2)
251 * if the compiler handles GNU-style inline assembly.
254 #if (defined _MSC_VER)
257 #if (_MSC_VER > 1500) || (_MSC_VER==1500 & _MSC_FULL_VER >= 150030729)
258 /* MSVC 9.0 SP1 or later */
259 __cpuidex(CPUInfo,level,ecxval);
262 __cpuid(CPUInfo,level);
263 /* Set an error code if the user wanted a non-zero ecxval, since we did not have cpuidex */
264 rc = (ecxval>0) ? -1 : 0;
271 #elif (defined GMX_X86_GCC_INLINE_ASM)
272 /* for now this means GMX_X86_GCC_INLINE_ASM should be defined,
273 * but there might be more options added in the future.
279 #if defined(__i386__) && defined(__PIC__)
280 /* Avoid clobbering the global offset table in 32-bit pic code (ebx register) */
281 __asm__ __volatile__ ("xchgl %%ebx, %1 \n\t"
283 "xchgl %%ebx, %1 \n\t"
284 : "+a"(*eax), "+r"(*ebx), "+c"(*ecx), "+d"(*edx));
286 /* i386 without PIC, or x86-64. Things are easy and we can clobber any reg we want :-) */
287 __asm__ __volatile__ ("cpuid \n\t"
288 : "+a"(*eax), "+b"(*ebx), "+c"(*ecx), "+d"(*edx));
293 * Apparently this is an x86 platform where we don't know how to call cpuid.
295 * This is REALLY bad, since we will lose all Gromacs acceleration.
308 /* Identify CPU features common to Intel & AMD - mainly brand string,
309 * version and some features. Vendor has already been detected outside this.
312 cpuid_check_common_x86(gmx_cpuid_t cpuid)
314 int fn,max_stdfn,max_extfn;
315 unsigned int eax,ebx,ecx,edx;
316 char str[GMX_CPUID_BRAND_MAXLEN];
319 /* Find largest standard/extended function input value */
320 execute_x86cpuid(0x0,0,&eax,&ebx,&ecx,&edx);
322 execute_x86cpuid(0x80000000,0,&eax,&ebx,&ecx,&edx);
326 if(max_extfn>=0x80000005)
328 /* Get CPU brand string */
329 for(fn=0x80000002;fn<0x80000005;fn++)
331 execute_x86cpuid(fn,0,&eax,&ebx,&ecx,&edx);
340 /* Remove empty initial space */
346 strncpy(cpuid->brand,p,GMX_CPUID_BRAND_MAXLEN);
350 strncpy(cpuid->brand,"Unknown CPU brand",GMX_CPUID_BRAND_MAXLEN);
353 /* Find basic CPU properties */
356 execute_x86cpuid(0x1,0,&eax,&ebx,&ecx,&edx);
358 cpuid->family = ((eax & 0x0FF00000) >> 20) + ((eax & 0x00000F00) >> 8);
359 /* Note that extended model should be shifted left 4, so only shift right 12 iso 16. */
360 cpuid->model = ((eax & 0x000F0000) >> 12) + ((eax & 0x000000F0) >> 4);
361 cpuid->stepping = (eax & 0x0000000F);
363 /* Feature flags common to AMD and intel */
364 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE3] = (ecx & (1 << 0)) != 0;
365 cpuid->feature[GMX_CPUID_FEATURE_X86_PCLMULDQ] = (ecx & (1 << 1)) != 0;
366 cpuid->feature[GMX_CPUID_FEATURE_X86_SSSE3] = (ecx & (1 << 9)) != 0;
367 cpuid->feature[GMX_CPUID_FEATURE_X86_FMA] = (ecx & (1 << 12)) != 0;
368 cpuid->feature[GMX_CPUID_FEATURE_X86_CX16] = (ecx & (1 << 13)) != 0;
369 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4_1] = (ecx & (1 << 19)) != 0;
370 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4_2] = (ecx & (1 << 20)) != 0;
371 cpuid->feature[GMX_CPUID_FEATURE_X86_POPCNT] = (ecx & (1 << 23)) != 0;
372 cpuid->feature[GMX_CPUID_FEATURE_X86_AES] = (ecx & (1 << 25)) != 0;
373 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX] = (ecx & (1 << 28)) != 0;
374 cpuid->feature[GMX_CPUID_FEATURE_X86_F16C] = (ecx & (1 << 29)) != 0;
375 cpuid->feature[GMX_CPUID_FEATURE_X86_RDRND] = (ecx & (1 << 30)) != 0;
377 cpuid->feature[GMX_CPUID_FEATURE_X86_PSE] = (edx & (1 << 3)) != 0;
378 cpuid->feature[GMX_CPUID_FEATURE_X86_MSR] = (edx & (1 << 5)) != 0;
379 cpuid->feature[GMX_CPUID_FEATURE_X86_CX8] = (edx & (1 << 8)) != 0;
380 cpuid->feature[GMX_CPUID_FEATURE_X86_APIC] = (edx & (1 << 9)) != 0;
381 cpuid->feature[GMX_CPUID_FEATURE_X86_CMOV] = (edx & (1 << 15)) != 0;
382 cpuid->feature[GMX_CPUID_FEATURE_X86_CLFSH] = (edx & (1 << 19)) != 0;
383 cpuid->feature[GMX_CPUID_FEATURE_X86_MMX] = (edx & (1 << 23)) != 0;
384 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE2] = (edx & (1 << 26)) != 0;
385 cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] = (edx & (1 << 28)) != 0;
391 cpuid->stepping = -1;
394 if(max_extfn>=0x80000001)
396 execute_x86cpuid(0x80000001,0,&eax,&ebx,&ecx,&edx);
397 cpuid->feature[GMX_CPUID_FEATURE_X86_LAHF_LM] = (ecx & (1 << 0)) != 0;
398 cpuid->feature[GMX_CPUID_FEATURE_X86_PDPE1GB] = (edx & (1 << 26)) != 0;
399 cpuid->feature[GMX_CPUID_FEATURE_X86_RDTSCP] = (edx & (1 << 27)) != 0;
402 if(max_extfn>=0x80000007)
404 execute_x86cpuid(0x80000007,0,&eax,&ebx,&ecx,&edx);
405 cpuid->feature[GMX_CPUID_FEATURE_X86_NONSTOP_TSC] = (edx & (1 << 8)) != 0;
410 /* This routine returns the number of unique different elements found in the array,
411 * and renumbers these starting from 0. For example, the array {0,1,2,8,9,10,8,9,10,0,1,2}
412 * will be rewritten to {0,1,2,3,4,5,3,4,5,0,1,2}, and it returns 6 for the
413 * number of unique elements.
416 cpuid_renumber_elements(int *data, int n)
419 int i,j,nunique,found;
421 unique = malloc(sizeof(int)*n);
426 for(j=0,found=0;j<nunique && !found;j++)
428 found = (data[i]==unique[j]);
432 /* Insert in sorted order! */
433 for(j=nunique++;j>0 && unique[j-1]>data[i];j--)
435 unique[j]=unique[j-1];
443 for(j=0;j<nunique;j++)
445 if(data[i]==unique[j])
454 /* APIC IDs, or everything you wanted to know about your x86 cores but were afraid to ask...
456 * Raw APIC IDs are unfortunately somewhat dirty. For technical reasons they are assigned
457 * in power-of-2 chunks, and even then there are no guarantees about specific numbers - all
458 * we know is that the part for each thread/core/package is unique, and how many bits are
459 * reserved for that part.
460 * This routine does internal renumbering so we get continuous indices, and also
461 * decodes the actual number of packages,cores-per-package and hwthreads-per-core.
464 cpuid_x86_decode_apic_id(gmx_cpuid_t cpuid,int *apic_id,int core_bits,int hwthread_bits)
467 int hwthread_mask,core_mask_after_shift;
469 cpuid->hwthread_id = malloc(sizeof(int)*cpuid->nproc);
470 cpuid->core_id = malloc(sizeof(int)*cpuid->nproc);
471 cpuid->package_id = malloc(sizeof(int)*cpuid->nproc);
472 cpuid->locality_order = malloc(sizeof(int)*cpuid->nproc);
474 hwthread_mask = (1 << hwthread_bits) - 1;
475 core_mask_after_shift = (1 << core_bits) - 1;
477 for(i=0;i<cpuid->nproc;i++)
479 cpuid->hwthread_id[i] = apic_id[i] & hwthread_mask;
480 cpuid->core_id[i] = (apic_id[i] >> hwthread_bits) & core_mask_after_shift;
481 cpuid->package_id[i] = apic_id[i] >> (core_bits + hwthread_bits);
484 cpuid->npackages = cpuid_renumber_elements(cpuid->package_id,cpuid->nproc);
485 cpuid->ncores_per_package = cpuid_renumber_elements(cpuid->core_id,cpuid->nproc);
486 cpuid->nhwthreads_per_core = cpuid_renumber_elements(cpuid->hwthread_id,cpuid->nproc);
488 /* Create a locality order array, i.e. first all resources in package0, which in turn
489 * are sorted so we first have all resources in core0, where threads are sorted in order, etc.
491 for(i=0;i<cpuid->nproc;i++)
493 idx = (cpuid->package_id[i]*cpuid->ncores_per_package + cpuid->core_id[i])*cpuid->nhwthreads_per_core + cpuid->hwthread_id[i];
494 cpuid->locality_order[idx]=i;
499 /* Detection of AMD-specific CPU features */
501 cpuid_check_amd_x86(gmx_cpuid_t cpuid)
503 int max_stdfn,max_extfn;
504 unsigned int eax,ebx,ecx,edx;
506 int hwthread_bits,core_bits;
509 cpuid_check_common_x86(cpuid);
511 execute_x86cpuid(0x0,0,&eax,&ebx,&ecx,&edx);
514 execute_x86cpuid(0x80000000,0,&eax,&ebx,&ecx,&edx);
517 if(max_extfn>=0x80000001)
519 execute_x86cpuid(0x80000001,0,&eax,&ebx,&ecx,&edx);
521 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4A] = (ecx & (1 << 6)) != 0;
522 cpuid->feature[GMX_CPUID_FEATURE_X86_MISALIGNSSE] = (ecx & (1 << 7)) != 0;
523 cpuid->feature[GMX_CPUID_FEATURE_X86_XOP] = (ecx & (1 << 11)) != 0;
524 cpuid->feature[GMX_CPUID_FEATURE_X86_FMA4] = (ecx & (1 << 16)) != 0;
527 /* Query APIC information on AMD */
528 if(max_extfn>=0x80000008)
530 #if (defined HAVE_SCHED_H && defined HAVE_SCHED_SETAFFINITY && defined HAVE_SYSCONF && defined __linux__)
532 cpu_set_t cpuset,save_cpuset;
533 cpuid->nproc = sysconf(_SC_NPROCESSORS_ONLN);
534 apic_id = malloc(sizeof(int)*cpuid->nproc);
535 sched_getaffinity(0,sizeof(cpu_set_t),&save_cpuset);
536 /* Get APIC id from each core */
538 for(i=0;i<cpuid->nproc;i++)
541 sched_setaffinity(0,sizeof(cpu_set_t),&cpuset);
542 execute_x86cpuid(0x1,0,&eax,&ebx,&ecx,&edx);
543 apic_id[i]=ebx >> 24;
546 /* Reset affinity to the value it had when calling this routine */
547 sched_setaffinity(0,sizeof(cpu_set_t),&save_cpuset);
548 #define CPUID_HAVE_APIC
549 #elif defined GMX_NATIVE_WINDOWS
552 unsigned int save_affinity,affinity;
553 GetSystemInfo( &sysinfo );
554 cpuid->nproc = sysinfo.dwNumberOfProcessors;
555 apic_id = malloc(sizeof(int)*cpuid->nproc);
556 /* Get previous affinity mask */
557 save_affinity = SetThreadAffinityMask(GetCurrentThread(),1);
558 for(i=0;i<cpuid->nproc;i++)
560 SetThreadAffinityMask(GetCurrentThread(),(1<<i));
562 execute_x86cpuid(0x1,0,&eax,&ebx,&ecx,&edx);
563 apic_id[i]=ebx >> 24;
565 SetThreadAffinityMask(GetCurrentThread(),save_affinity);
566 #define CPUID_HAVE_APIC
568 #ifdef CPUID_HAVE_APIC
569 /* AMD does not support SMT yet - there are no hwthread bits in apic ID */
571 /* Get number of core bits in apic ID - try modern extended method first */
572 execute_x86cpuid(0x80000008,0,&eax,&ebx,&ecx,&edx);
573 core_bits = (ecx >> 12) & 0xf;
576 /* Legacy method for old single/dual core AMD CPUs */
578 for(core_bits=0;(i>>core_bits)>0;core_bits++) ;
580 cpuid_x86_decode_apic_id(cpuid,apic_id,core_bits,hwthread_bits);
581 cpuid->have_cpu_topology = 1;
587 /* Detection of Intel-specific CPU features */
589 cpuid_check_intel_x86(gmx_cpuid_t cpuid)
591 unsigned int max_stdfn,max_extfn;
592 unsigned int eax,ebx,ecx,edx;
594 unsigned int max_logical_cores,max_physical_cores;
595 int hwthread_bits,core_bits;
598 cpuid_check_common_x86(cpuid);
600 execute_x86cpuid(0x0,0,&eax,&ebx,&ecx,&edx);
603 execute_x86cpuid(0x80000000,0,&eax,&ebx,&ecx,&edx);
608 execute_x86cpuid(0x1,0,&eax,&ebx,&ecx,&edx);
609 cpuid->feature[GMX_CPUID_FEATURE_X86_PDCM] = (ecx & (1 << 15)) != 0;
610 cpuid->feature[GMX_CPUID_FEATURE_X86_PCID] = (ecx & (1 << 17)) != 0;
611 cpuid->feature[GMX_CPUID_FEATURE_X86_X2APIC] = (ecx & (1 << 21)) != 0;
612 cpuid->feature[GMX_CPUID_FEATURE_X86_TDT] = (ecx & (1 << 24)) != 0;
617 execute_x86cpuid(0x7,0,&eax,&ebx,&ecx,&edx);
618 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX2] = (ebx & (1 << 5)) != 0;
621 /* Check whether Hyper-Threading is enabled, not only supported */
622 if(cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] && max_stdfn>=4)
624 execute_x86cpuid(0x1,0,&eax,&ebx,&ecx,&edx);
625 max_logical_cores = (ebx >> 16) & 0x0FF;
626 execute_x86cpuid(0x4,0,&eax,&ebx,&ecx,&edx);
627 max_physical_cores = ((eax >> 26) & 0x3F) + 1;
629 /* Clear HTT flag if we only have 1 logical core per physical */
630 if(max_logical_cores/max_physical_cores < 2)
632 cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] = 0;
638 /* Query x2 APIC information from cores */
639 #if (defined HAVE_SCHED_H && defined HAVE_SCHED_SETAFFINITY && defined HAVE_SYSCONF && defined __linux__)
641 cpu_set_t cpuset,save_cpuset;
642 cpuid->nproc = sysconf(_SC_NPROCESSORS_ONLN);
643 apic_id = malloc(sizeof(int)*cpuid->nproc);
644 sched_getaffinity(0,sizeof(cpu_set_t),&save_cpuset);
645 /* Get x2APIC ID from each hardware thread */
647 for(i=0;i<cpuid->nproc;i++)
650 sched_setaffinity(0,sizeof(cpu_set_t),&cpuset);
651 execute_x86cpuid(0xB,0,&eax,&ebx,&ecx,&edx);
655 /* Reset affinity to the value it had when calling this routine */
656 sched_setaffinity(0,sizeof(cpu_set_t),&save_cpuset);
657 #define CPUID_HAVE_APIC
658 #elif defined GMX_NATIVE_WINDOWS
661 unsigned int save_affinity,affinity;
662 GetSystemInfo( &sysinfo );
663 cpuid->nproc = sysinfo.dwNumberOfProcessors;
664 apic_id = malloc(sizeof(int)*cpuid->nproc);
665 /* Get previous affinity mask */
666 save_affinity = SetThreadAffinityMask(GetCurrentThread(),1);
667 for(i=0;i<cpuid->nproc;i++)
669 SetThreadAffinityMask(GetCurrentThread(),(1<<i));
671 execute_x86cpuid(0xB,0,&eax,&ebx,&ecx,&edx);
674 SetThreadAffinityMask(GetCurrentThread(),save_affinity);
675 #define CPUID_HAVE_APIC
677 #ifdef CPUID_HAVE_APIC
678 execute_x86cpuid(0xB,0,&eax,&ebx,&ecx,&edx);
679 hwthread_bits = eax & 0x1F;
680 execute_x86cpuid(0xB,1,&eax,&ebx,&ecx,&edx);
681 core_bits = (eax & 0x1F) - hwthread_bits;
682 cpuid_x86_decode_apic_id(cpuid,apic_id,core_bits,hwthread_bits);
683 cpuid->have_cpu_topology = 1;
688 #endif /* GMX_CPUID_X86 */
692 /* Try to find the vendor of the current CPU, so we know what specific
693 * detection routine to call.
695 static enum gmx_cpuid_vendor
696 cpuid_check_vendor(void)
698 enum gmx_cpuid_vendor i,vendor;
699 /* Register data used on x86 */
700 unsigned int eax,ebx,ecx,edx;
701 char vendorstring[13];
703 /* Set default first */
704 vendor = GMX_CPUID_VENDOR_UNKNOWN;
707 execute_x86cpuid(0x0,0,&eax,&ebx,&ecx,&edx);
709 memcpy(vendorstring,&ebx,4);
710 memcpy(vendorstring+4,&edx,4);
711 memcpy(vendorstring+8,&ecx,4);
713 vendorstring[12]='\0';
715 for(i=GMX_CPUID_VENDOR_UNKNOWN;i<GMX_CPUID_NVENDORS;i++)
717 if(!strncmp(vendorstring,gmx_cpuid_vendor_string[i],12))
723 vendor = GMX_CPUID_VENDOR_UNKNOWN;
732 gmx_cpuid_topology(gmx_cpuid_t cpuid,
735 int * ncores_per_package,
736 int * nhwthreads_per_core,
737 const int ** package_id,
738 const int ** core_id,
739 const int ** hwthread_id,
740 const int ** locality_order)
744 if(cpuid->have_cpu_topology)
746 *nprocessors = cpuid->nproc;
747 *npackages = cpuid->npackages;
748 *ncores_per_package = cpuid->ncores_per_package;
749 *nhwthreads_per_core = cpuid->nhwthreads_per_core;
750 *package_id = cpuid->package_id;
751 *core_id = cpuid->core_id;
752 *hwthread_id = cpuid->hwthread_id;
753 *locality_order = cpuid->locality_order;
764 enum gmx_cpuid_x86_smt
765 gmx_cpuid_x86_smt(gmx_cpuid_t cpuid)
767 enum gmx_cpuid_x86_smt rc;
769 if(cpuid->have_cpu_topology)
771 rc = (cpuid->nhwthreads_per_core>1) ? GMX_CPUID_X86_SMT_ENABLED : GMX_CPUID_X86_SMT_DISABLED;
773 else if(cpuid->vendor==GMX_CPUID_VENDOR_AMD || gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_HTT)==0)
775 rc = GMX_CPUID_X86_SMT_DISABLED;
779 rc = GMX_CPUID_X86_SMT_CANNOTDETECT;
786 gmx_cpuid_init (gmx_cpuid_t * pcpuid)
791 cpuid = malloc(sizeof(*cpuid));
795 for(i=0;i<GMX_CPUID_NFEATURES;i++)
799 cpuid->have_cpu_topology = 0;
801 cpuid->npackages = 0;
802 cpuid->ncores_per_package = 0;
803 cpuid->nhwthreads_per_core = 0;
804 cpuid->package_id = NULL;
805 cpuid->core_id = NULL;
806 cpuid->hwthread_id = NULL;
807 cpuid->locality_order = NULL;
809 cpuid->vendor = cpuid_check_vendor();
811 switch(cpuid->vendor)
814 case GMX_CPUID_VENDOR_INTEL:
815 cpuid_check_intel_x86(cpuid);
817 case GMX_CPUID_VENDOR_AMD:
818 cpuid_check_amd_x86(cpuid);
822 /* Could not find vendor */
823 strncpy(cpuid->brand,"Unknown CPU brand",GMX_CPUID_BRAND_MAXLEN);
828 for(i=0;i<GMX_CPUID_NFEATURES;i++)
832 cpuid->feature[GMX_CPUID_FEATURE_CANNOTDETECT] = 1;
842 gmx_cpuid_done (gmx_cpuid_t cpuid)
849 gmx_cpuid_formatstring (gmx_cpuid_t cpuid,
855 enum gmx_cpuid_feature feature;
861 "Family: %2d Model: %2d Stepping: %2d\n"
863 gmx_cpuid_vendor_string[gmx_cpuid_vendor(cpuid)],
864 gmx_cpuid_brand(cpuid),
865 gmx_cpuid_family(cpuid),gmx_cpuid_model(cpuid),gmx_cpuid_stepping(cpuid));
870 "Family: %2d Model: %2d Stepping: %2d\n"
872 gmx_cpuid_vendor_string[gmx_cpuid_vendor(cpuid)],
873 gmx_cpuid_brand(cpuid),
874 gmx_cpuid_family(cpuid),gmx_cpuid_model(cpuid),gmx_cpuid_stepping(cpuid));
882 for(feature=GMX_CPUID_FEATURE_CANNOTDETECT;feature<GMX_CPUID_NFEATURES;feature++)
884 if(gmx_cpuid_feature(cpuid,feature)==1)
887 _snprintf(str,n," %s",gmx_cpuid_feature_string[feature]);
889 snprintf(str,n," %s",gmx_cpuid_feature_string[feature]);
898 _snprintf(str,n,"\n");
900 snprintf(str,n,"\n");
909 enum gmx_cpuid_acceleration
910 gmx_cpuid_acceleration_suggest (gmx_cpuid_t cpuid)
912 enum gmx_cpuid_acceleration tmpacc;
914 tmpacc = GMX_CPUID_ACCELERATION_NONE;
916 if(gmx_cpuid_vendor(cpuid)==GMX_CPUID_VENDOR_INTEL)
918 if(gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_AVX))
920 tmpacc = GMX_CPUID_ACCELERATION_X86_AVX_256;
922 else if(gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_SSE4_1))
924 tmpacc = GMX_CPUID_ACCELERATION_X86_SSE4_1;
926 else if(gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_SSE2))
928 tmpacc = GMX_CPUID_ACCELERATION_X86_SSE2;
931 else if(gmx_cpuid_vendor(cpuid)==GMX_CPUID_VENDOR_AMD)
933 if(gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_AVX))
935 tmpacc = GMX_CPUID_ACCELERATION_X86_AVX_128_FMA;
937 else if(gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_SSE4_1))
939 tmpacc = GMX_CPUID_ACCELERATION_X86_SSE4_1;
941 else if(gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_SSE2))
943 tmpacc = GMX_CPUID_ACCELERATION_X86_SSE2;
953 gmx_cpuid_acceleration_check(gmx_cpuid_t cpuid,
958 enum gmx_cpuid_acceleration acc;
960 acc = gmx_cpuid_acceleration_suggest(cpuid);
962 rc = (acc != compiled_acc);
964 gmx_cpuid_formatstring(cpuid,str,1023);
970 "\nDetecting CPU-specific acceleration.\nPresent hardware specification:\n"
972 "Acceleration most likely to fit this hardware: %s\n"
973 "Acceleration selected at GROMACS compile time: %s\n\n",
975 gmx_cpuid_acceleration_string[acc],
976 gmx_cpuid_acceleration_string[compiled_acc]);
983 fprintf(log,"\nBinary not matching hardware - you might be losing performance.\n"
984 "Acceleration most likely to fit this hardware: %s\n"
985 "Acceleration selected at GROMACS compile time: %s\n\n",
986 gmx_cpuid_acceleration_string[acc],
987 gmx_cpuid_acceleration_string[compiled_acc]);
989 printf("Compiled acceleration: %s (Gromacs could use %s on this machine, which is better)\n",
990 gmx_cpuid_acceleration_string[compiled_acc],
991 gmx_cpuid_acceleration_string[acc]);
998 #ifdef GMX_CPUID_STANDALONE
999 /* Stand-alone program to enable queries of CPU features from Cmake.
1000 * Note that you need to check inline ASM capabilities before compiling and set
1001 * -DGMX_X86_GCC_INLINE_ASM for the cpuid instruction to work...
1004 main(int argc, char **argv)
1007 enum gmx_cpuid_acceleration acc;
1013 "Usage:\n\n%s [flags]\n\n"
1014 "Available flags:\n"
1015 "-vendor Print CPU vendor.\n"
1016 "-brand Print CPU brand string.\n"
1017 "-family Print CPU family version.\n"
1018 "-model Print CPU model version.\n"
1019 "-stepping Print CPU stepping version.\n"
1020 "-features Print CPU feature flags.\n"
1021 "-acceleration Print suggested GROMACS acceleration.\n"
1026 gmx_cpuid_init(&cpuid);
1028 if(!strncmp(argv[1],"-vendor",3))
1030 printf("%s\n",gmx_cpuid_vendor_string[cpuid->vendor]);
1032 else if(!strncmp(argv[1],"-brand",3))
1034 printf("%s\n",cpuid->brand);
1036 else if(!strncmp(argv[1],"-family",3))
1038 printf("%d\n",cpuid->family);
1040 else if(!strncmp(argv[1],"-model",3))
1042 printf("%d\n",cpuid->model);
1044 else if(!strncmp(argv[1],"-stepping",3))
1046 printf("%d\n",cpuid->stepping);
1048 else if(!strncmp(argv[1],"-features",3))
1051 for(i=0;i<GMX_CPUID_NFEATURES;i++)
1053 if(cpuid->feature[i]==1)
1059 printf("%s",gmx_cpuid_feature_string[i]);
1064 else if(!strncmp(argv[1],"-acceleration",3))
1066 acc = gmx_cpuid_acceleration_suggest(cpuid);
1067 fprintf(stdout,"%s\n",gmx_cpuid_acceleration_string[acc]);
1070 gmx_cpuid_done(cpuid);