1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This file is part of GROMACS.
7 * Written by the Gromacs development team under coordination of
8 * David van der Spoel, Berk Hess, and Erik Lindahl.
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
15 * To help us fund GROMACS development, we humbly ask that you cite
16 * the research papers on the package. Check out http://www.gromacs.org
19 * Gnomes, ROck Monsters And Chili Sauce
35 /* MSVC definition for __cpuid() */
37 /* sysinfo functions */
41 /* sysconf() definition */
45 #include "gmx_cpuid.h"
49 /* For convenience, and to enable configure-time invocation, we keep all architectures
50 * in a single file, but to avoid repeated ifdefs we set the overall architecture here.
52 #if defined (__i386__) || defined (__x86_64__) || defined (_M_IX86) || defined (_M_X64)
53 # define GMX_CPUID_X86
56 /* Global constant character strings corresponding to our enumerated types */
58 gmx_cpuid_vendor_string[GMX_CPUID_NVENDORS] =
67 gmx_cpuid_feature_string[GMX_CPUID_NFEATURES] =
107 gmx_cpuid_acceleration_string[GMX_CPUID_NACCELERATIONS] =
117 /* Max length of brand string */
118 #define GMX_CPUID_BRAND_MAXLEN 256
121 /* Contents of the abstract datatype */
124 enum gmx_cpuid_vendor vendor;
125 char brand[GMX_CPUID_BRAND_MAXLEN];
129 /* Not using gmx_bool here, since this file must be possible to compile without simple.h */
130 char feature[GMX_CPUID_NFEATURES];
132 /* Basic CPU topology information. For x86 this is a bit complicated since the topology differs between
133 * operating systems and sometimes even settings. For most other architectures you can likely just check
134 * the documentation and then write static information to these arrays rather than detecting on-the-fly.
136 int have_cpu_topology;
137 int nproc; /* total number of logical processors from OS */
139 int ncores_per_package;
140 int nhwthreads_per_core;
142 int * core_id; /* Local core id in each package */
143 int * hwthread_id; /* Local hwthread id in each core */
144 int * locality_order; /* Processor indices sorted in locality order */
148 /* Simple routines to access the data structure. The initialization routine is
149 * further down since that needs to call other static routines in this file.
151 enum gmx_cpuid_vendor
152 gmx_cpuid_vendor (gmx_cpuid_t cpuid)
154 return cpuid->vendor;
159 gmx_cpuid_brand (gmx_cpuid_t cpuid)
165 gmx_cpuid_family (gmx_cpuid_t cpuid)
167 return cpuid->family;
171 gmx_cpuid_model (gmx_cpuid_t cpuid)
177 gmx_cpuid_stepping (gmx_cpuid_t cpuid)
179 return cpuid->stepping;
183 gmx_cpuid_feature (gmx_cpuid_t cpuid,
184 enum gmx_cpuid_feature feature)
186 return (cpuid->feature[feature] != 0);
192 /* What type of acceleration was compiled in, if any?
193 * This is set from Cmake. Note that the SSE2 and SSE4_1 macros are set for
194 * AVX too, so it is important that they appear last in the list.
196 #ifdef GMX_X86_AVX_256
198 enum gmx_cpuid_acceleration
199 compiled_acc = GMX_CPUID_ACCELERATION_X86_AVX_256;
200 #elif defined GMX_X86_AVX_128_FMA
202 enum gmx_cpuid_acceleration
203 compiled_acc = GMX_CPUID_ACCELERATION_X86_AVX_128_FMA;
204 #elif defined GMX_X86_SSE4_1
206 enum gmx_cpuid_acceleration
207 compiled_acc = GMX_CPUID_ACCELERATION_X86_SSE4_1;
208 #elif defined GMX_X86_SSE2
210 enum gmx_cpuid_acceleration
211 compiled_acc = GMX_CPUID_ACCELERATION_X86_SSE2;
214 enum gmx_cpuid_acceleration
215 compiled_acc = GMX_CPUID_ACCELERATION_NONE;
221 /* Execute CPUID on x86 class CPUs. level sets function to exec, and the
222 * contents of register output is returned. See Intel/AMD docs for details.
224 * This version supports extended information where we can also have an input
225 * value in the ecx register. This is ignored for most levels, but some of them
226 * (e.g. level 0xB on Intel) use it.
229 execute_x86cpuid(unsigned int level,
238 /* Currently CPUID is only supported (1) if we can use an instruction on MSVC, or (2)
239 * if the compiler handles GNU-style inline assembly.
242 #if (defined _MSC_VER)
245 #if (_MSC_VER > 1500) || (_MSC_VER == 1500 & _MSC_FULL_VER >= 150030729)
246 /* MSVC 9.0 SP1 or later */
247 __cpuidex(CPUInfo, level, ecxval);
250 __cpuid(CPUInfo, level);
251 /* Set an error code if the user wanted a non-zero ecxval, since we did not have cpuidex */
252 rc = (ecxval > 0) ? -1 : 0;
259 #elif (defined GMX_X86_GCC_INLINE_ASM)
260 /* for now this means GMX_X86_GCC_INLINE_ASM should be defined,
261 * but there might be more options added in the future.
267 #if defined(__i386__) && defined(__PIC__)
268 /* Avoid clobbering the global offset table in 32-bit pic code (ebx register) */
269 __asm__ __volatile__ ("xchgl %%ebx, %1 \n\t"
271 "xchgl %%ebx, %1 \n\t"
272 : "+a" (*eax), "+r" (*ebx), "+c" (*ecx), "+d" (*edx));
274 /* i386 without PIC, or x86-64. Things are easy and we can clobber any reg we want :-) */
275 __asm__ __volatile__ ("cpuid \n\t"
276 : "+a" (*eax), "+b" (*ebx), "+c" (*ecx), "+d" (*edx));
281 * Apparently this is an x86 platform where we don't know how to call cpuid.
283 * This is REALLY bad, since we will lose all Gromacs acceleration.
296 /* Identify CPU features common to Intel & AMD - mainly brand string,
297 * version and some features. Vendor has already been detected outside this.
300 cpuid_check_common_x86(gmx_cpuid_t cpuid)
302 int fn, max_stdfn, max_extfn;
303 unsigned int eax, ebx, ecx, edx;
304 char str[GMX_CPUID_BRAND_MAXLEN];
307 /* Find largest standard/extended function input value */
308 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
310 execute_x86cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx);
314 if (max_extfn >= 0x80000005)
316 /* Get CPU brand string */
317 for (fn = 0x80000002; fn < 0x80000005; fn++)
319 execute_x86cpuid(fn, 0, &eax, &ebx, &ecx, &edx);
321 memcpy(p+4, &ebx, 4);
322 memcpy(p+8, &ecx, 4);
323 memcpy(p+12, &edx, 4);
328 /* Remove empty initial space */
330 while (isspace(*(p)))
334 strncpy(cpuid->brand, p, GMX_CPUID_BRAND_MAXLEN);
338 strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_BRAND_MAXLEN);
341 /* Find basic CPU properties */
344 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
346 cpuid->family = ((eax & 0x0FF00000) >> 20) + ((eax & 0x00000F00) >> 8);
347 /* Note that extended model should be shifted left 4, so only shift right 12 iso 16. */
348 cpuid->model = ((eax & 0x000F0000) >> 12) + ((eax & 0x000000F0) >> 4);
349 cpuid->stepping = (eax & 0x0000000F);
351 /* Feature flags common to AMD and intel */
352 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE3] = (ecx & (1 << 0)) != 0;
353 cpuid->feature[GMX_CPUID_FEATURE_X86_PCLMULDQ] = (ecx & (1 << 1)) != 0;
354 cpuid->feature[GMX_CPUID_FEATURE_X86_SSSE3] = (ecx & (1 << 9)) != 0;
355 cpuid->feature[GMX_CPUID_FEATURE_X86_FMA] = (ecx & (1 << 12)) != 0;
356 cpuid->feature[GMX_CPUID_FEATURE_X86_CX16] = (ecx & (1 << 13)) != 0;
357 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4_1] = (ecx & (1 << 19)) != 0;
358 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4_2] = (ecx & (1 << 20)) != 0;
359 cpuid->feature[GMX_CPUID_FEATURE_X86_POPCNT] = (ecx & (1 << 23)) != 0;
360 cpuid->feature[GMX_CPUID_FEATURE_X86_AES] = (ecx & (1 << 25)) != 0;
361 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX] = (ecx & (1 << 28)) != 0;
362 cpuid->feature[GMX_CPUID_FEATURE_X86_F16C] = (ecx & (1 << 29)) != 0;
363 cpuid->feature[GMX_CPUID_FEATURE_X86_RDRND] = (ecx & (1 << 30)) != 0;
365 cpuid->feature[GMX_CPUID_FEATURE_X86_PSE] = (edx & (1 << 3)) != 0;
366 cpuid->feature[GMX_CPUID_FEATURE_X86_MSR] = (edx & (1 << 5)) != 0;
367 cpuid->feature[GMX_CPUID_FEATURE_X86_CX8] = (edx & (1 << 8)) != 0;
368 cpuid->feature[GMX_CPUID_FEATURE_X86_APIC] = (edx & (1 << 9)) != 0;
369 cpuid->feature[GMX_CPUID_FEATURE_X86_CMOV] = (edx & (1 << 15)) != 0;
370 cpuid->feature[GMX_CPUID_FEATURE_X86_CLFSH] = (edx & (1 << 19)) != 0;
371 cpuid->feature[GMX_CPUID_FEATURE_X86_MMX] = (edx & (1 << 23)) != 0;
372 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE2] = (edx & (1 << 26)) != 0;
373 cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] = (edx & (1 << 28)) != 0;
379 cpuid->stepping = -1;
382 if (max_extfn >= 0x80000001)
384 execute_x86cpuid(0x80000001, 0, &eax, &ebx, &ecx, &edx);
385 cpuid->feature[GMX_CPUID_FEATURE_X86_LAHF_LM] = (ecx & (1 << 0)) != 0;
386 cpuid->feature[GMX_CPUID_FEATURE_X86_PDPE1GB] = (edx & (1 << 26)) != 0;
387 cpuid->feature[GMX_CPUID_FEATURE_X86_RDTSCP] = (edx & (1 << 27)) != 0;
390 if (max_extfn >= 0x80000007)
392 execute_x86cpuid(0x80000007, 0, &eax, &ebx, &ecx, &edx);
393 cpuid->feature[GMX_CPUID_FEATURE_X86_NONSTOP_TSC] = (edx & (1 << 8)) != 0;
398 /* This routine returns the number of unique different elements found in the array,
399 * and renumbers these starting from 0. For example, the array {0,1,2,8,9,10,8,9,10,0,1,2}
400 * will be rewritten to {0,1,2,3,4,5,3,4,5,0,1,2}, and it returns 6 for the
401 * number of unique elements.
404 cpuid_renumber_elements(int *data, int n)
407 int i, j, nunique, found;
409 unique = malloc(sizeof(int)*n);
412 for (i = 0; i < n; i++)
414 for (j = 0, found = 0; j < nunique && !found; j++)
416 found = (data[i] == unique[j]);
420 /* Insert in sorted order! */
421 for (j = nunique++; j > 0 && unique[j-1] > data[i]; j--)
423 unique[j] = unique[j-1];
429 for (i = 0; i < n; i++)
431 for (j = 0; j < nunique; j++)
433 if (data[i] == unique[j])
442 /* APIC IDs, or everything you wanted to know about your x86 cores but were afraid to ask...
444 * Raw APIC IDs are unfortunately somewhat dirty. For technical reasons they are assigned
445 * in power-of-2 chunks, and even then there are no guarantees about specific numbers - all
446 * we know is that the part for each thread/core/package is unique, and how many bits are
447 * reserved for that part.
448 * This routine does internal renumbering so we get continuous indices, and also
449 * decodes the actual number of packages,cores-per-package and hwthreads-per-core.
452 cpuid_x86_decode_apic_id(gmx_cpuid_t cpuid, int *apic_id, int core_bits, int hwthread_bits)
455 int hwthread_mask, core_mask_after_shift;
457 cpuid->hwthread_id = malloc(sizeof(int)*cpuid->nproc);
458 cpuid->core_id = malloc(sizeof(int)*cpuid->nproc);
459 cpuid->package_id = malloc(sizeof(int)*cpuid->nproc);
460 cpuid->locality_order = malloc(sizeof(int)*cpuid->nproc);
462 hwthread_mask = (1 << hwthread_bits) - 1;
463 core_mask_after_shift = (1 << core_bits) - 1;
465 for (i = 0; i < cpuid->nproc; i++)
467 cpuid->hwthread_id[i] = apic_id[i] & hwthread_mask;
468 cpuid->core_id[i] = (apic_id[i] >> hwthread_bits) & core_mask_after_shift;
469 cpuid->package_id[i] = apic_id[i] >> (core_bits + hwthread_bits);
472 cpuid->npackages = cpuid_renumber_elements(cpuid->package_id, cpuid->nproc);
473 cpuid->ncores_per_package = cpuid_renumber_elements(cpuid->core_id, cpuid->nproc);
474 cpuid->nhwthreads_per_core = cpuid_renumber_elements(cpuid->hwthread_id, cpuid->nproc);
476 /* Create a locality order array, i.e. first all resources in package0, which in turn
477 * are sorted so we first have all resources in core0, where threads are sorted in order, etc.
479 for (i = 0; i < cpuid->nproc; i++)
481 idx = (cpuid->package_id[i]*cpuid->ncores_per_package + cpuid->core_id[i])*cpuid->nhwthreads_per_core + cpuid->hwthread_id[i];
482 cpuid->locality_order[idx] = i;
487 /* Detection of AMD-specific CPU features */
489 cpuid_check_amd_x86(gmx_cpuid_t cpuid)
491 int max_stdfn, max_extfn;
492 unsigned int eax, ebx, ecx, edx;
493 int hwthread_bits, core_bits;
496 cpuid_check_common_x86(cpuid);
498 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
501 execute_x86cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx);
504 if (max_extfn >= 0x80000001)
506 execute_x86cpuid(0x80000001, 0, &eax, &ebx, &ecx, &edx);
508 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4A] = (ecx & (1 << 6)) != 0;
509 cpuid->feature[GMX_CPUID_FEATURE_X86_MISALIGNSSE] = (ecx & (1 << 7)) != 0;
510 cpuid->feature[GMX_CPUID_FEATURE_X86_XOP] = (ecx & (1 << 11)) != 0;
511 cpuid->feature[GMX_CPUID_FEATURE_X86_FMA4] = (ecx & (1 << 16)) != 0;
514 /* Query APIC information on AMD */
515 if (max_extfn >= 0x80000008)
517 #if (defined HAVE_SCHED_H && defined HAVE_SCHED_SETAFFINITY && defined HAVE_SYSCONF && defined __linux__)
520 cpu_set_t cpuset, save_cpuset;
521 cpuid->nproc = sysconf(_SC_NPROCESSORS_ONLN);
522 apic_id = malloc(sizeof(int)*cpuid->nproc);
523 sched_getaffinity(0, sizeof(cpu_set_t), &save_cpuset);
524 /* Get APIC id from each core */
526 for (i = 0; i < cpuid->nproc; i++)
529 sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
530 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
531 apic_id[i] = ebx >> 24;
534 /* Reset affinity to the value it had when calling this routine */
535 sched_setaffinity(0, sizeof(cpu_set_t), &save_cpuset);
536 #define CPUID_HAVE_APIC
537 #elif defined GMX_NATIVE_WINDOWS
541 unsigned int save_affinity, affinity;
542 GetSystemInfo( &sysinfo );
543 cpuid->nproc = sysinfo.dwNumberOfProcessors;
544 apic_id = malloc(sizeof(int)*cpuid->nproc);
545 /* Get previous affinity mask */
546 save_affinity = SetThreadAffinityMask(GetCurrentThread(), 1);
547 for (i = 0; i < cpuid->nproc; i++)
549 SetThreadAffinityMask(GetCurrentThread(), (((DWORD_PTR)1)<<i));
551 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
552 apic_id[i] = ebx >> 24;
554 SetThreadAffinityMask(GetCurrentThread(), save_affinity);
555 #define CPUID_HAVE_APIC
557 #ifdef CPUID_HAVE_APIC
558 /* AMD does not support SMT yet - there are no hwthread bits in apic ID */
560 /* Get number of core bits in apic ID - try modern extended method first */
561 execute_x86cpuid(0x80000008, 0, &eax, &ebx, &ecx, &edx);
562 core_bits = (ecx >> 12) & 0xf;
565 /* Legacy method for old single/dual core AMD CPUs */
567 for (core_bits = 0; (i>>core_bits) > 0; core_bits++)
572 cpuid_x86_decode_apic_id(cpuid, apic_id, core_bits, hwthread_bits);
573 cpuid->have_cpu_topology = 1;
579 /* Detection of Intel-specific CPU features */
581 cpuid_check_intel_x86(gmx_cpuid_t cpuid)
583 unsigned int max_stdfn, max_extfn;
584 unsigned int eax, ebx, ecx, edx;
585 unsigned int max_logical_cores, max_physical_cores;
586 int hwthread_bits, core_bits;
589 cpuid_check_common_x86(cpuid);
591 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
594 execute_x86cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx);
599 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
600 cpuid->feature[GMX_CPUID_FEATURE_X86_PDCM] = (ecx & (1 << 15)) != 0;
601 cpuid->feature[GMX_CPUID_FEATURE_X86_PCID] = (ecx & (1 << 17)) != 0;
602 cpuid->feature[GMX_CPUID_FEATURE_X86_X2APIC] = (ecx & (1 << 21)) != 0;
603 cpuid->feature[GMX_CPUID_FEATURE_X86_TDT] = (ecx & (1 << 24)) != 0;
608 execute_x86cpuid(0x7, 0, &eax, &ebx, &ecx, &edx);
609 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX2] = (ebx & (1 << 5)) != 0;
612 /* Check whether Hyper-Threading is enabled, not only supported */
613 if (cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] && max_stdfn >= 4)
615 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
616 max_logical_cores = (ebx >> 16) & 0x0FF;
617 execute_x86cpuid(0x4, 0, &eax, &ebx, &ecx, &edx);
618 max_physical_cores = ((eax >> 26) & 0x3F) + 1;
620 /* Clear HTT flag if we only have 1 logical core per physical */
621 if (max_logical_cores/max_physical_cores < 2)
623 cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] = 0;
627 if (max_stdfn >= 0xB)
629 /* Query x2 APIC information from cores */
630 #if (defined HAVE_SCHED_H && defined HAVE_SCHED_SETAFFINITY && defined HAVE_SYSCONF && defined __linux__)
633 cpu_set_t cpuset, save_cpuset;
634 cpuid->nproc = sysconf(_SC_NPROCESSORS_ONLN);
635 apic_id = malloc(sizeof(int)*cpuid->nproc);
636 sched_getaffinity(0, sizeof(cpu_set_t), &save_cpuset);
637 /* Get x2APIC ID from each hardware thread */
639 for (i = 0; i < cpuid->nproc; i++)
642 sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
643 execute_x86cpuid(0xB, 0, &eax, &ebx, &ecx, &edx);
647 /* Reset affinity to the value it had when calling this routine */
648 sched_setaffinity(0, sizeof(cpu_set_t), &save_cpuset);
649 #define CPUID_HAVE_APIC
650 #elif defined GMX_NATIVE_WINDOWS
654 unsigned int save_affinity, affinity;
655 GetSystemInfo( &sysinfo );
656 cpuid->nproc = sysinfo.dwNumberOfProcessors;
657 apic_id = malloc(sizeof(int)*cpuid->nproc);
658 /* Get previous affinity mask */
659 save_affinity = SetThreadAffinityMask(GetCurrentThread(), 1);
660 for (i = 0; i < cpuid->nproc; i++)
662 SetThreadAffinityMask(GetCurrentThread(), (((DWORD_PTR)1)<<i));
664 execute_x86cpuid(0xB, 0, &eax, &ebx, &ecx, &edx);
667 SetThreadAffinityMask(GetCurrentThread(), save_affinity);
668 #define CPUID_HAVE_APIC
670 #ifdef CPUID_HAVE_APIC
671 execute_x86cpuid(0xB, 0, &eax, &ebx, &ecx, &edx);
672 hwthread_bits = eax & 0x1F;
673 execute_x86cpuid(0xB, 1, &eax, &ebx, &ecx, &edx);
674 core_bits = (eax & 0x1F) - hwthread_bits;
675 cpuid_x86_decode_apic_id(cpuid, apic_id, core_bits, hwthread_bits);
676 cpuid->have_cpu_topology = 1;
681 #endif /* GMX_CPUID_X86 */
685 /* Try to find the vendor of the current CPU, so we know what specific
686 * detection routine to call.
688 static enum gmx_cpuid_vendor
689 cpuid_check_vendor(void)
691 enum gmx_cpuid_vendor i, vendor;
692 /* Register data used on x86 */
693 unsigned int eax, ebx, ecx, edx;
694 char vendorstring[13];
696 /* Set default first */
697 vendor = GMX_CPUID_VENDOR_UNKNOWN;
700 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
702 memcpy(vendorstring, &ebx, 4);
703 memcpy(vendorstring+4, &edx, 4);
704 memcpy(vendorstring+8, &ecx, 4);
706 vendorstring[12] = '\0';
708 for (i = GMX_CPUID_VENDOR_UNKNOWN; i < GMX_CPUID_NVENDORS; i++)
710 if (!strncmp(vendorstring, gmx_cpuid_vendor_string[i], 12))
716 vendor = GMX_CPUID_VENDOR_UNKNOWN;
725 gmx_cpuid_topology(gmx_cpuid_t cpuid,
728 int * ncores_per_package,
729 int * nhwthreads_per_core,
730 const int ** package_id,
731 const int ** core_id,
732 const int ** hwthread_id,
733 const int ** locality_order)
737 if (cpuid->have_cpu_topology)
739 *nprocessors = cpuid->nproc;
740 *npackages = cpuid->npackages;
741 *ncores_per_package = cpuid->ncores_per_package;
742 *nhwthreads_per_core = cpuid->nhwthreads_per_core;
743 *package_id = cpuid->package_id;
744 *core_id = cpuid->core_id;
745 *hwthread_id = cpuid->hwthread_id;
746 *locality_order = cpuid->locality_order;
757 enum gmx_cpuid_x86_smt
758 gmx_cpuid_x86_smt(gmx_cpuid_t cpuid)
760 enum gmx_cpuid_x86_smt rc;
762 if (cpuid->have_cpu_topology)
764 rc = (cpuid->nhwthreads_per_core > 1) ? GMX_CPUID_X86_SMT_ENABLED : GMX_CPUID_X86_SMT_DISABLED;
766 else if (cpuid->vendor == GMX_CPUID_VENDOR_AMD || gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_HTT) == 0)
768 rc = GMX_CPUID_X86_SMT_DISABLED;
772 rc = GMX_CPUID_X86_SMT_CANNOTDETECT;
779 gmx_cpuid_init (gmx_cpuid_t * pcpuid)
784 cpuid = malloc(sizeof(*cpuid));
788 for (i = 0; i < GMX_CPUID_NFEATURES; i++)
790 cpuid->feature[i] = 0;
792 cpuid->have_cpu_topology = 0;
794 cpuid->npackages = 0;
795 cpuid->ncores_per_package = 0;
796 cpuid->nhwthreads_per_core = 0;
797 cpuid->package_id = NULL;
798 cpuid->core_id = NULL;
799 cpuid->hwthread_id = NULL;
800 cpuid->locality_order = NULL;
802 cpuid->vendor = cpuid_check_vendor();
804 switch (cpuid->vendor)
807 case GMX_CPUID_VENDOR_INTEL:
808 cpuid_check_intel_x86(cpuid);
810 case GMX_CPUID_VENDOR_AMD:
811 cpuid_check_amd_x86(cpuid);
815 /* Could not find vendor */
816 strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_BRAND_MAXLEN);
821 for (i = 0; i < GMX_CPUID_NFEATURES; i++)
823 cpuid->feature[i] = 0;
825 cpuid->feature[GMX_CPUID_FEATURE_CANNOTDETECT] = 1;
835 gmx_cpuid_done (gmx_cpuid_t cpuid)
842 gmx_cpuid_formatstring (gmx_cpuid_t cpuid,
848 enum gmx_cpuid_feature feature;
854 "Family: %2d Model: %2d Stepping: %2d\n"
856 gmx_cpuid_vendor_string[gmx_cpuid_vendor(cpuid)],
857 gmx_cpuid_brand(cpuid),
858 gmx_cpuid_family(cpuid), gmx_cpuid_model(cpuid), gmx_cpuid_stepping(cpuid));
863 "Family: %2d Model: %2d Stepping: %2d\n"
865 gmx_cpuid_vendor_string[gmx_cpuid_vendor(cpuid)],
866 gmx_cpuid_brand(cpuid),
867 gmx_cpuid_family(cpuid), gmx_cpuid_model(cpuid), gmx_cpuid_stepping(cpuid));
875 for (feature = GMX_CPUID_FEATURE_CANNOTDETECT; feature < GMX_CPUID_NFEATURES; feature++)
877 if (gmx_cpuid_feature(cpuid, feature) == 1)
880 _snprintf(str, n, " %s", gmx_cpuid_feature_string[feature]);
882 snprintf(str, n, " %s", gmx_cpuid_feature_string[feature]);
891 _snprintf(str, n, "\n");
893 snprintf(str, n, "\n");
902 enum gmx_cpuid_acceleration
903 gmx_cpuid_acceleration_suggest (gmx_cpuid_t cpuid)
905 enum gmx_cpuid_acceleration tmpacc;
907 tmpacc = GMX_CPUID_ACCELERATION_NONE;
909 if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_INTEL)
911 if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX))
913 tmpacc = GMX_CPUID_ACCELERATION_X86_AVX_256;
915 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE4_1))
917 tmpacc = GMX_CPUID_ACCELERATION_X86_SSE4_1;
919 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE2))
921 tmpacc = GMX_CPUID_ACCELERATION_X86_SSE2;
924 else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_AMD)
926 if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX))
928 tmpacc = GMX_CPUID_ACCELERATION_X86_AVX_128_FMA;
930 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE4_1))
932 tmpacc = GMX_CPUID_ACCELERATION_X86_SSE4_1;
934 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE2))
936 tmpacc = GMX_CPUID_ACCELERATION_X86_SSE2;
946 gmx_cpuid_acceleration_check(gmx_cpuid_t cpuid,
951 enum gmx_cpuid_acceleration acc;
953 acc = gmx_cpuid_acceleration_suggest(cpuid);
955 rc = (acc != compiled_acc);
957 gmx_cpuid_formatstring(cpuid, str, 1023);
963 "\nDetecting CPU-specific acceleration.\nPresent hardware specification:\n"
965 "Acceleration most likely to fit this hardware: %s\n"
966 "Acceleration selected at GROMACS compile time: %s\n\n",
968 gmx_cpuid_acceleration_string[acc],
969 gmx_cpuid_acceleration_string[compiled_acc]);
976 fprintf(log, "\nBinary not matching hardware - you might be losing performance.\n"
977 "Acceleration most likely to fit this hardware: %s\n"
978 "Acceleration selected at GROMACS compile time: %s\n\n",
979 gmx_cpuid_acceleration_string[acc],
980 gmx_cpuid_acceleration_string[compiled_acc]);
982 printf("Compiled acceleration: %s (Gromacs could use %s on this machine, which is better)\n",
983 gmx_cpuid_acceleration_string[compiled_acc],
984 gmx_cpuid_acceleration_string[acc]);
991 #ifdef GMX_CPUID_STANDALONE
992 /* Stand-alone program to enable queries of CPU features from Cmake.
993 * Note that you need to check inline ASM capabilities before compiling and set
994 * -DGMX_X86_GCC_INLINE_ASM for the cpuid instruction to work...
997 main(int argc, char **argv)
1000 enum gmx_cpuid_acceleration acc;
1006 "Usage:\n\n%s [flags]\n\n"
1007 "Available flags:\n"
1008 "-vendor Print CPU vendor.\n"
1009 "-brand Print CPU brand string.\n"
1010 "-family Print CPU family version.\n"
1011 "-model Print CPU model version.\n"
1012 "-stepping Print CPU stepping version.\n"
1013 "-features Print CPU feature flags.\n"
1014 "-acceleration Print suggested GROMACS acceleration.\n",
1019 gmx_cpuid_init(&cpuid);
1021 if (!strncmp(argv[1], "-vendor", 3))
1023 printf("%s\n", gmx_cpuid_vendor_string[cpuid->vendor]);
1025 else if (!strncmp(argv[1], "-brand", 3))
1027 printf("%s\n", cpuid->brand);
1029 else if (!strncmp(argv[1], "-family", 3))
1031 printf("%d\n", cpuid->family);
1033 else if (!strncmp(argv[1], "-model", 3))
1035 printf("%d\n", cpuid->model);
1037 else if (!strncmp(argv[1], "-stepping", 3))
1039 printf("%d\n", cpuid->stepping);
1041 else if (!strncmp(argv[1], "-features", 3))
1044 for (i = 0; i < GMX_CPUID_NFEATURES; i++)
1046 if (cpuid->feature[i] == 1)
1052 printf("%s", gmx_cpuid_feature_string[i]);
1057 else if (!strncmp(argv[1], "-acceleration", 3))
1059 acc = gmx_cpuid_acceleration_suggest(cpuid);
1060 fprintf(stdout, "%s\n", gmx_cpuid_acceleration_string[acc]);
1063 gmx_cpuid_done(cpuid);