2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 #include "gromacs/legacyheaders/gmx_cpuid.h"
49 #ifdef GMX_NATIVE_WINDOWS
50 /* MSVC definition for __cpuid() */
54 /* sysinfo functions */
61 /* sysconf() definition */
66 /* For convenience, and to enable configure-time invocation, we keep all architectures
67 * in a single file, but to avoid repeated ifdefs we set the overall architecture here.
70 /* OK, it is x86, but can we execute cpuid? */
71 #if defined(GMX_X86_GCC_INLINE_ASM) || ( defined(_MSC_VER) && ( (_MSC_VER > 1500) || (_MSC_VER == 1500 & _MSC_FULL_VER >= 150030729)))
72 # define GMX_CPUID_X86
76 /* Global constant character strings corresponding to our enumerated types */
78 gmx_cpuid_vendor_string[GMX_CPUID_NVENDORS] =
85 "IBM", /* Used on Power and BlueGene/Q */
90 gmx_cpuid_vendor_string_alternative[GMX_CPUID_NVENDORS] =
97 "ibm", /* Used on Power and BlueGene/Q */
102 gmx_cpuid_feature_string[GMX_CPUID_NFEATURES] =
151 gmx_cpuid_simd_string[GMX_CPUID_NSIMD] =
171 /* Max length of brand string */
172 #define GMX_CPUID_STRLEN 256
175 /* Contents of the abstract datatype */
178 enum gmx_cpuid_vendor vendor;
179 char brand[GMX_CPUID_STRLEN];
183 /* Not using gmx_bool here, since this file must be possible to compile without simple.h */
184 char feature[GMX_CPUID_NFEATURES];
186 /* Basic CPU topology information. For x86 this is a bit complicated since the topology differs between
187 * operating systems and sometimes even settings. For most other architectures you can likely just check
188 * the documentation and then write static information to these arrays rather than detecting on-the-fly.
190 int have_cpu_topology;
191 int nproc; /* total number of logical processors from OS */
193 int ncores_per_package;
194 int nhwthreads_per_core;
196 int * core_id; /* Local core id in each package */
197 int * hwthread_id; /* Local hwthread id in each core */
198 int * locality_order; /* Processor indices sorted in locality order */
202 /* Simple routines to access the data structure. The initialization routine is
203 * further down since that needs to call other static routines in this file.
205 enum gmx_cpuid_vendor
206 gmx_cpuid_vendor (gmx_cpuid_t cpuid)
208 return cpuid->vendor;
213 gmx_cpuid_brand (gmx_cpuid_t cpuid)
219 gmx_cpuid_family (gmx_cpuid_t cpuid)
221 return cpuid->family;
225 gmx_cpuid_model (gmx_cpuid_t cpuid)
231 gmx_cpuid_stepping (gmx_cpuid_t cpuid)
233 return cpuid->stepping;
237 gmx_cpuid_feature (gmx_cpuid_t cpuid,
238 enum gmx_cpuid_feature feature)
240 return (cpuid->feature[feature] != 0);
245 gmx_cpuid_is_intel_nehalem (const gmx_cpuid_t cpuid)
247 return (cpuid->vendor == GMX_CPUID_VENDOR_INTEL &&
248 cpuid->family == 6 &&
249 (cpuid->model == 0x2E ||
250 cpuid->model == 0x1A ||
251 cpuid->model == 0x1E ||
252 cpuid->model == 0x2F ||
253 cpuid->model == 0x2C ||
254 cpuid->model == 0x25));
258 /* What type of SIMD was compiled in, if any? */
259 #ifdef GMX_SIMD_X86_AVX_512ER
260 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_512ER;
261 #elif defined GMX_SIMD_X86_AVX_512F
262 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_512F;
263 #elif defined GMX_SIMD_X86_AVX2_256
264 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX2_256;
265 #elif defined GMX_SIMD_X86_AVX_256
266 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_256;
267 #elif defined GMX_SIMD_X86_AVX_128_FMA
268 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_128_FMA;
269 #elif defined GMX_SIMD_X86_SSE4_1
270 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_SSE4_1;
271 #elif defined GMX_SIMD_X86_SSE2
272 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_SSE2;
273 #elif defined GMX_SIMD_ARM_NEON
274 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_ARM_NEON;
275 #elif defined GMX_SIMD_ARM_NEON_ASIMD
276 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_ARM_NEON_ASIMD;
277 #elif defined GMX_SIMD_SPARC64_HPC_ACE
278 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_SPARC64_HPC_ACE;
279 #elif defined GMX_SIMD_IBM_QPX
280 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_QPX;
281 #elif defined GMX_SIMD_IBM_VMX
282 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_VMX;
283 #elif defined GMX_SIMD_IBM_VSX
284 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_VSX;
285 #elif defined GMX_SIMD_REFERENCE
286 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_REFERENCE;
288 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_NONE;
295 return compiled_simd;
301 /* Execute CPUID on x86 class CPUs. level sets function to exec, and the
302 * contents of register output is returned. See Intel/AMD docs for details.
304 * This version supports extended information where we can also have an input
305 * value in the ecx register. This is ignored for most levels, but some of them
306 * (e.g. level 0xB on Intel) use it.
309 execute_x86cpuid(unsigned int level,
318 /* Currently CPUID is only supported (1) if we can use an instruction on MSVC, or (2)
319 * if the compiler handles GNU-style inline assembly.
322 #if (defined _MSC_VER)
325 #if (_MSC_VER > 1500) || (_MSC_VER == 1500 & _MSC_FULL_VER >= 150030729)
326 /* MSVC 9.0 SP1 or later */
327 __cpuidex(CPUInfo, level, ecxval);
330 __cpuid(CPUInfo, level);
331 /* Set an error code if the user wanted a non-zero ecxval, since we did not have cpuidex */
332 rc = (ecxval > 0) ? -1 : 0;
339 #elif (defined GMX_X86_GCC_INLINE_ASM)
340 /* for now this means GMX_X86_GCC_INLINE_ASM should be defined,
341 * but there might be more options added in the future.
347 #if defined(__i386__) && defined(__PIC__)
348 /* Avoid clobbering the global offset table in 32-bit pic code (ebx register) */
349 __asm__ __volatile__ ("xchgl %%ebx, %1 \n\t"
351 "xchgl %%ebx, %1 \n\t"
352 : "+a" (*eax), "+r" (*ebx), "+c" (*ecx), "+d" (*edx));
354 /* i386 without PIC, or x86-64. Things are easy and we can clobber any reg we want :-) */
355 __asm__ __volatile__ ("cpuid \n\t"
356 : "+a" (*eax), "+b" (*ebx), "+c" (*ecx), "+d" (*edx));
361 * Apparently this is an x86 platform where we don't know how to call cpuid.
363 * This is REALLY bad, since we will lose all Gromacs SIMD support.
376 /* Identify CPU features common to Intel & AMD - mainly brand string,
377 * version and some features. Vendor has already been detected outside this.
380 cpuid_check_common_x86(gmx_cpuid_t cpuid)
382 int fn, max_stdfn, max_extfn;
383 unsigned int eax, ebx, ecx, edx;
384 char str[GMX_CPUID_STRLEN];
387 /* Find largest standard/extended function input value */
388 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
390 execute_x86cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx);
394 if (max_extfn >= 0x80000005)
396 /* Get CPU brand string */
397 for (fn = 0x80000002; fn < 0x80000005; fn++)
399 execute_x86cpuid(fn, 0, &eax, &ebx, &ecx, &edx);
401 memcpy(p+4, &ebx, 4);
402 memcpy(p+8, &ecx, 4);
403 memcpy(p+12, &edx, 4);
408 /* Remove empty initial space */
410 while (isspace(*(p)))
414 strncpy(cpuid->brand, p, GMX_CPUID_STRLEN);
418 strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_STRLEN);
421 /* Find basic CPU properties */
424 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
426 cpuid->family = ((eax & 0x0FF00000) >> 20) + ((eax & 0x00000F00) >> 8);
427 /* Note that extended model should be shifted left 4, so only shift right 12 iso 16. */
428 cpuid->model = ((eax & 0x000F0000) >> 12) + ((eax & 0x000000F0) >> 4);
429 cpuid->stepping = (eax & 0x0000000F);
431 /* Feature flags common to AMD and intel */
432 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE3] = (ecx & (1 << 0)) != 0;
433 cpuid->feature[GMX_CPUID_FEATURE_X86_PCLMULDQ] = (ecx & (1 << 1)) != 0;
434 cpuid->feature[GMX_CPUID_FEATURE_X86_SSSE3] = (ecx & (1 << 9)) != 0;
435 cpuid->feature[GMX_CPUID_FEATURE_X86_FMA] = (ecx & (1 << 12)) != 0;
436 cpuid->feature[GMX_CPUID_FEATURE_X86_CX16] = (ecx & (1 << 13)) != 0;
437 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4_1] = (ecx & (1 << 19)) != 0;
438 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4_2] = (ecx & (1 << 20)) != 0;
439 cpuid->feature[GMX_CPUID_FEATURE_X86_POPCNT] = (ecx & (1 << 23)) != 0;
440 cpuid->feature[GMX_CPUID_FEATURE_X86_AES] = (ecx & (1 << 25)) != 0;
441 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX] = (ecx & (1 << 28)) != 0;
442 cpuid->feature[GMX_CPUID_FEATURE_X86_F16C] = (ecx & (1 << 29)) != 0;
443 cpuid->feature[GMX_CPUID_FEATURE_X86_RDRND] = (ecx & (1 << 30)) != 0;
445 cpuid->feature[GMX_CPUID_FEATURE_X86_PSE] = (edx & (1 << 3)) != 0;
446 cpuid->feature[GMX_CPUID_FEATURE_X86_MSR] = (edx & (1 << 5)) != 0;
447 cpuid->feature[GMX_CPUID_FEATURE_X86_CX8] = (edx & (1 << 8)) != 0;
448 cpuid->feature[GMX_CPUID_FEATURE_X86_APIC] = (edx & (1 << 9)) != 0;
449 cpuid->feature[GMX_CPUID_FEATURE_X86_CMOV] = (edx & (1 << 15)) != 0;
450 cpuid->feature[GMX_CPUID_FEATURE_X86_CLFSH] = (edx & (1 << 19)) != 0;
451 cpuid->feature[GMX_CPUID_FEATURE_X86_MMX] = (edx & (1 << 23)) != 0;
452 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE2] = (edx & (1 << 26)) != 0;
453 cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] = (edx & (1 << 28)) != 0;
459 cpuid->stepping = -1;
462 if (max_extfn >= 0x80000001)
464 execute_x86cpuid(0x80000001, 0, &eax, &ebx, &ecx, &edx);
465 cpuid->feature[GMX_CPUID_FEATURE_X86_LAHF_LM] = (ecx & (1 << 0)) != 0;
466 cpuid->feature[GMX_CPUID_FEATURE_X86_PDPE1GB] = (edx & (1 << 26)) != 0;
467 cpuid->feature[GMX_CPUID_FEATURE_X86_RDTSCP] = (edx & (1 << 27)) != 0;
470 if (max_extfn >= 0x80000007)
472 execute_x86cpuid(0x80000007, 0, &eax, &ebx, &ecx, &edx);
473 cpuid->feature[GMX_CPUID_FEATURE_X86_NONSTOP_TSC] = (edx & (1 << 8)) != 0;
478 /* This routine returns the number of unique different elements found in the array,
479 * and renumbers these starting from 0. For example, the array {0,1,2,8,9,10,8,9,10,0,1,2}
480 * will be rewritten to {0,1,2,3,4,5,3,4,5,0,1,2}, and it returns 6 for the
481 * number of unique elements.
484 cpuid_renumber_elements(int *data, int n)
487 int i, j, nunique, found;
489 unique = malloc(sizeof(int)*n);
492 for (i = 0; i < n; i++)
494 for (j = 0, found = 0; j < nunique && !found; j++)
496 found = (data[i] == unique[j]);
500 /* Insert in sorted order! */
501 for (j = nunique++; j > 0 && unique[j-1] > data[i]; j--)
503 unique[j] = unique[j-1];
509 for (i = 0; i < n; i++)
511 for (j = 0; j < nunique; j++)
513 if (data[i] == unique[j])
523 /* APIC IDs, or everything you wanted to know about your x86 cores but were afraid to ask...
525 * Raw APIC IDs are unfortunately somewhat dirty. For technical reasons they are assigned
526 * in power-of-2 chunks, and even then there are no guarantees about specific numbers - all
527 * we know is that the part for each thread/core/package is unique, and how many bits are
528 * reserved for that part.
529 * This routine does internal renumbering so we get continuous indices, and also
530 * decodes the actual number of packages,cores-per-package and hwthreads-per-core.
531 * Returns: 0 on success, non-zero on failure.
534 cpuid_x86_decode_apic_id(gmx_cpuid_t cpuid, int *apic_id, int core_bits, int hwthread_bits)
537 int hwthread_mask, core_mask_after_shift;
539 cpuid->hwthread_id = malloc(sizeof(int)*cpuid->nproc);
540 cpuid->core_id = malloc(sizeof(int)*cpuid->nproc);
541 cpuid->package_id = malloc(sizeof(int)*cpuid->nproc);
542 cpuid->locality_order = malloc(sizeof(int)*cpuid->nproc);
544 hwthread_mask = (1 << hwthread_bits) - 1;
545 core_mask_after_shift = (1 << core_bits) - 1;
547 for (i = 0; i < cpuid->nproc; i++)
549 cpuid->hwthread_id[i] = apic_id[i] & hwthread_mask;
550 cpuid->core_id[i] = (apic_id[i] >> hwthread_bits) & core_mask_after_shift;
551 cpuid->package_id[i] = apic_id[i] >> (core_bits + hwthread_bits);
554 cpuid->npackages = cpuid_renumber_elements(cpuid->package_id, cpuid->nproc);
555 cpuid->ncores_per_package = cpuid_renumber_elements(cpuid->core_id, cpuid->nproc);
556 cpuid->nhwthreads_per_core = cpuid_renumber_elements(cpuid->hwthread_id, cpuid->nproc);
558 /* now check for consistency */
559 if ( (cpuid->npackages * cpuid->ncores_per_package *
560 cpuid->nhwthreads_per_core) != cpuid->nproc)
562 /* the packages/cores-per-package/hwthreads-per-core counts are
567 /* Create a locality order array, i.e. first all resources in package0, which in turn
568 * are sorted so we first have all resources in core0, where threads are sorted in order, etc.
571 for (i = 0; i < cpuid->nproc; i++)
573 idx = (cpuid->package_id[i]*cpuid->ncores_per_package + cpuid->core_id[i])*cpuid->nhwthreads_per_core + cpuid->hwthread_id[i];
574 cpuid->locality_order[idx] = i;
580 /* Detection of AMD-specific CPU features */
582 cpuid_check_amd_x86(gmx_cpuid_t cpuid)
584 int max_stdfn, max_extfn, ret;
585 unsigned int eax, ebx, ecx, edx;
586 int hwthread_bits, core_bits;
589 cpuid_check_common_x86(cpuid);
591 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
594 execute_x86cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx);
597 if (max_extfn >= 0x80000001)
599 execute_x86cpuid(0x80000001, 0, &eax, &ebx, &ecx, &edx);
601 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4A] = (ecx & (1 << 6)) != 0;
602 cpuid->feature[GMX_CPUID_FEATURE_X86_MISALIGNSSE] = (ecx & (1 << 7)) != 0;
603 cpuid->feature[GMX_CPUID_FEATURE_X86_XOP] = (ecx & (1 << 11)) != 0;
604 cpuid->feature[GMX_CPUID_FEATURE_X86_FMA4] = (ecx & (1 << 16)) != 0;
607 /* Query APIC information on AMD */
608 if (max_extfn >= 0x80000008)
610 #if (defined HAVE_SCHED_AFFINITY && defined HAVE_SYSCONF && defined __linux__)
613 cpu_set_t cpuset, save_cpuset;
614 cpuid->nproc = sysconf(_SC_NPROCESSORS_ONLN);
615 apic_id = malloc(sizeof(int)*cpuid->nproc);
616 sched_getaffinity(0, sizeof(cpu_set_t), &save_cpuset);
617 /* Get APIC id from each core */
619 for (i = 0; i < cpuid->nproc; i++)
622 sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
623 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
624 apic_id[i] = ebx >> 24;
627 /* Reset affinity to the value it had when calling this routine */
628 sched_setaffinity(0, sizeof(cpu_set_t), &save_cpuset);
629 #define CPUID_HAVE_APIC
630 #elif defined GMX_NATIVE_WINDOWS
634 unsigned int save_affinity, affinity;
635 GetSystemInfo( &sysinfo );
636 cpuid->nproc = sysinfo.dwNumberOfProcessors;
637 apic_id = malloc(sizeof(int)*cpuid->nproc);
638 /* Get previous affinity mask */
639 save_affinity = SetThreadAffinityMask(GetCurrentThread(), 1);
640 for (i = 0; i < cpuid->nproc; i++)
642 SetThreadAffinityMask(GetCurrentThread(), (((DWORD_PTR)1)<<i));
644 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
645 apic_id[i] = ebx >> 24;
647 SetThreadAffinityMask(GetCurrentThread(), save_affinity);
648 #define CPUID_HAVE_APIC
650 #ifdef CPUID_HAVE_APIC
651 /* AMD does not support SMT yet - there are no hwthread bits in apic ID */
653 /* Get number of core bits in apic ID - try modern extended method first */
654 execute_x86cpuid(0x80000008, 0, &eax, &ebx, &ecx, &edx);
655 core_bits = (ecx >> 12) & 0xf;
658 /* Legacy method for old single/dual core AMD CPUs */
660 for (core_bits = 0; (i>>core_bits) > 0; core_bits++)
665 ret = cpuid_x86_decode_apic_id(cpuid, apic_id, core_bits,
667 cpuid->have_cpu_topology = (ret == 0);
673 /* Detection of Intel-specific CPU features */
675 cpuid_check_intel_x86(gmx_cpuid_t cpuid)
677 unsigned int max_stdfn, max_extfn, ret;
678 unsigned int eax, ebx, ecx, edx;
679 unsigned int max_logical_cores, max_physical_cores;
680 int hwthread_bits, core_bits;
683 cpuid_check_common_x86(cpuid);
685 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
688 execute_x86cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx);
693 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
694 cpuid->feature[GMX_CPUID_FEATURE_X86_PDCM] = (ecx & (1 << 15)) != 0;
695 cpuid->feature[GMX_CPUID_FEATURE_X86_PCID] = (ecx & (1 << 17)) != 0;
696 cpuid->feature[GMX_CPUID_FEATURE_X86_X2APIC] = (ecx & (1 << 21)) != 0;
697 cpuid->feature[GMX_CPUID_FEATURE_X86_TDT] = (ecx & (1 << 24)) != 0;
702 execute_x86cpuid(0x7, 0, &eax, &ebx, &ecx, &edx);
703 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX2] = (ebx & (1 << 5)) != 0;
704 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512F] = (ebx & (1 << 16)) != 0;
705 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512PF] = (ebx & (1 << 26)) != 0;
706 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512ER] = (ebx & (1 << 27)) != 0;
707 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512CD] = (ebx & (1 << 28)) != 0;
710 /* Check whether Hyper-Threading is enabled, not only supported */
711 if (cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] && max_stdfn >= 4)
713 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
714 max_logical_cores = (ebx >> 16) & 0x0FF;
715 execute_x86cpuid(0x4, 0, &eax, &ebx, &ecx, &edx);
716 max_physical_cores = ((eax >> 26) & 0x3F) + 1;
718 /* Clear HTT flag if we only have 1 logical core per physical */
719 if (max_logical_cores/max_physical_cores < 2)
721 cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] = 0;
725 if (max_stdfn >= 0xB)
727 /* Query x2 APIC information from cores */
728 #if (defined HAVE_SCHED_AFFINITY && defined HAVE_SYSCONF && defined __linux__)
731 cpu_set_t cpuset, save_cpuset;
732 cpuid->nproc = sysconf(_SC_NPROCESSORS_ONLN);
733 apic_id = malloc(sizeof(int)*cpuid->nproc);
734 sched_getaffinity(0, sizeof(cpu_set_t), &save_cpuset);
735 /* Get x2APIC ID from each hardware thread */
737 for (i = 0; i < cpuid->nproc; i++)
740 sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
741 execute_x86cpuid(0xB, 0, &eax, &ebx, &ecx, &edx);
745 /* Reset affinity to the value it had when calling this routine */
746 sched_setaffinity(0, sizeof(cpu_set_t), &save_cpuset);
747 #define CPUID_HAVE_APIC
748 #elif defined GMX_NATIVE_WINDOWS
752 unsigned int save_affinity, affinity;
753 GetSystemInfo( &sysinfo );
754 cpuid->nproc = sysinfo.dwNumberOfProcessors;
755 apic_id = malloc(sizeof(int)*cpuid->nproc);
756 /* Get previous affinity mask */
757 save_affinity = SetThreadAffinityMask(GetCurrentThread(), 1);
758 for (i = 0; i < cpuid->nproc; i++)
760 SetThreadAffinityMask(GetCurrentThread(), (((DWORD_PTR)1)<<i));
762 execute_x86cpuid(0xB, 0, &eax, &ebx, &ecx, &edx);
765 SetThreadAffinityMask(GetCurrentThread(), save_affinity);
766 #define CPUID_HAVE_APIC
768 #ifdef CPUID_HAVE_APIC
769 execute_x86cpuid(0xB, 0, &eax, &ebx, &ecx, &edx);
770 hwthread_bits = eax & 0x1F;
771 execute_x86cpuid(0xB, 1, &eax, &ebx, &ecx, &edx);
772 core_bits = (eax & 0x1F) - hwthread_bits;
773 ret = cpuid_x86_decode_apic_id(cpuid, apic_id, core_bits,
775 cpuid->have_cpu_topology = (ret == 0);
780 #endif /* GMX_CPUID_X86 */
785 chomp_substring_before_colon(const char *in, char *s, int maxlength)
788 strncpy(s, in, maxlength);
793 while (isspace(*(--p)) && (p >= s))
805 chomp_substring_after_colon(const char *in, char *s, int maxlength)
808 if ( (p = strchr(in, ':')) != NULL)
815 strncpy(s, p, maxlength);
817 while (isspace(*(--p)) && (p >= s))
829 cpuid_check_arm(gmx_cpuid_t cpuid)
831 #if defined(__linux__) || defined(__linux)
833 char buffer[GMX_CPUID_STRLEN], buffer2[GMX_CPUID_STRLEN], buffer3[GMX_CPUID_STRLEN];
835 if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
837 while ( (fgets(buffer, sizeof(buffer), fp) != NULL))
839 chomp_substring_before_colon(buffer, buffer2, GMX_CPUID_STRLEN);
840 chomp_substring_after_colon(buffer, buffer3, GMX_CPUID_STRLEN);
842 if (!strcmp(buffer2, "Processor"))
844 strncpy(cpuid->brand, buffer3, GMX_CPUID_STRLEN);
846 else if (!strcmp(buffer2, "CPU architecture"))
848 cpuid->family = strtol(buffer3, NULL, 10);
849 if (!strcmp(buffer3, "AArch64"))
854 else if (!strcmp(buffer2, "CPU part"))
856 cpuid->model = strtol(buffer3, NULL, 16);
858 else if (!strcmp(buffer2, "CPU revision"))
860 cpuid->stepping = strtol(buffer3, NULL, 10);
862 else if (!strcmp(buffer2, "Features") && strstr(buffer3, "neon"))
864 cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON] = 1;
866 else if (!strcmp(buffer2, "Features") && strstr(buffer3, "asimd"))
868 cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON_ASIMD] = 1;
875 /* Strange 64-bit non-linux platform. However, since NEON ASIMD is present on all
876 * implementations of AArch64 this far, we assume it is present for now.
878 cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON_ASIMD] = 1;
880 /* Strange 32-bit non-linux platform. We cannot assume that neon is present. */
881 cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON] = 0;
889 cpuid_check_ibm(gmx_cpuid_t cpuid)
891 #if defined(__linux__) || defined(__linux)
893 char buffer[GMX_CPUID_STRLEN], before_colon[GMX_CPUID_STRLEN], after_colon[GMX_CPUID_STRLEN];
895 if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
897 while ( (fgets(buffer, sizeof(buffer), fp) != NULL))
899 chomp_substring_before_colon(buffer, before_colon, GMX_CPUID_STRLEN);
900 chomp_substring_after_colon(buffer, after_colon, GMX_CPUID_STRLEN);
902 if (!strcmp(before_colon, "cpu") || !strcmp(before_colon, "Processor"))
904 strncpy(cpuid->brand, after_colon, GMX_CPUID_STRLEN);
906 if (!strcmp(before_colon, "model name") ||
907 !strcmp(before_colon, "model") ||
908 !strcmp(before_colon, "Processor") ||
909 !strcmp(before_colon, "cpu"))
911 if (strstr(after_colon, "altivec"))
913 cpuid->feature[GMX_CPUID_FEATURE_IBM_VMX] = 1;
915 if (!strstr(after_colon, "POWER6") && !strstr(after_colon, "Power6") &&
916 !strstr(after_colon, "power6"))
918 cpuid->feature[GMX_CPUID_FEATURE_IBM_VSX] = 1;
926 if (strstr(cpuid->brand, "A2"))
929 cpuid->feature[GMX_CPUID_FEATURE_IBM_QPX] = 1;
932 strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_STRLEN);
933 cpuid->feature[GMX_CPUID_FEATURE_IBM_QPX] = 0;
934 cpuid->feature[GMX_CPUID_FEATURE_IBM_VMX] = 0;
935 cpuid->feature[GMX_CPUID_FEATURE_IBM_VSX] = 0;
941 /* Try to find the vendor of the current CPU, so we know what specific
942 * detection routine to call.
944 static enum gmx_cpuid_vendor
945 cpuid_check_vendor(void)
947 enum gmx_cpuid_vendor i, vendor;
948 /* Register data used on x86 */
949 unsigned int eax, ebx, ecx, edx;
950 char vendorstring[13];
952 char buffer[GMX_CPUID_STRLEN];
953 char before_colon[GMX_CPUID_STRLEN];
954 char after_colon[GMX_CPUID_STRLEN];
956 /* Set default first */
957 vendor = GMX_CPUID_VENDOR_UNKNOWN;
960 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
962 memcpy(vendorstring, &ebx, 4);
963 memcpy(vendorstring+4, &edx, 4);
964 memcpy(vendorstring+8, &ecx, 4);
966 vendorstring[12] = '\0';
968 for (i = GMX_CPUID_VENDOR_UNKNOWN; i < GMX_CPUID_NVENDORS; i++)
970 if (!strncmp(vendorstring, gmx_cpuid_vendor_string[i], 12))
975 #elif defined(__linux__) || defined(__linux)
976 /* General Linux. Try to get CPU vendor from /proc/cpuinfo */
977 if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
979 while ( (vendor == GMX_CPUID_VENDOR_UNKNOWN) && (fgets(buffer, sizeof(buffer), fp) != NULL))
981 chomp_substring_before_colon(buffer, before_colon, sizeof(before_colon));
982 /* Intel/AMD use "vendor_id", IBM "vendor", "model", or "cpu". Fujitsu "manufacture".
983 * On ARM there does not seem to be a vendor, but ARM or AArch64 is listed in the Processor string.
984 * Add others if you have them!
986 if (!strcmp(before_colon, "vendor_id")
987 || !strcmp(before_colon, "vendor")
988 || !strcmp(before_colon, "manufacture")
989 || !strcmp(before_colon, "model")
990 || !strcmp(before_colon, "Processor")
991 || !strcmp(before_colon, "cpu"))
993 chomp_substring_after_colon(buffer, after_colon, sizeof(after_colon));
994 for (i = GMX_CPUID_VENDOR_UNKNOWN; i < GMX_CPUID_NVENDORS; i++)
996 /* Be liberal and accept if we find the vendor
997 * string (or alternative string) anywhere. Using
998 * strcasestr() would be non-portable. */
999 if (strstr(after_colon, gmx_cpuid_vendor_string[i])
1000 || strstr(after_colon, gmx_cpuid_vendor_string_alternative[i]))
1005 /* If we did not find vendor yet, check if it is IBM:
1006 * On some Power/PowerPC systems it only says power, not IBM.
1008 if (vendor == GMX_CPUID_VENDOR_UNKNOWN &&
1009 ((strstr(after_colon, "POWER") || strstr(after_colon, "Power") ||
1010 strstr(after_colon, "power"))))
1012 vendor = GMX_CPUID_VENDOR_IBM;
1018 #elif defined(__arm__) || defined (__arm) || defined(__aarch64__)
1019 /* If we are using ARM on something that is not linux we have to trust the compiler,
1020 * and we cannot get the extra info that might be present in /proc/cpuinfo.
1022 vendor = GMX_CPUID_VENDOR_ARM;
1030 gmx_cpuid_topology(gmx_cpuid_t cpuid,
1033 int * ncores_per_package,
1034 int * nhwthreads_per_core,
1035 const int ** package_id,
1036 const int ** core_id,
1037 const int ** hwthread_id,
1038 const int ** locality_order)
1042 if (cpuid->have_cpu_topology)
1044 *nprocessors = cpuid->nproc;
1045 *npackages = cpuid->npackages;
1046 *ncores_per_package = cpuid->ncores_per_package;
1047 *nhwthreads_per_core = cpuid->nhwthreads_per_core;
1048 *package_id = cpuid->package_id;
1049 *core_id = cpuid->core_id;
1050 *hwthread_id = cpuid->hwthread_id;
1051 *locality_order = cpuid->locality_order;
1062 enum gmx_cpuid_x86_smt
1063 gmx_cpuid_x86_smt(gmx_cpuid_t cpuid)
1065 enum gmx_cpuid_x86_smt rc;
1067 if (cpuid->have_cpu_topology)
1069 rc = (cpuid->nhwthreads_per_core > 1) ? GMX_CPUID_X86_SMT_ENABLED : GMX_CPUID_X86_SMT_DISABLED;
1071 else if (cpuid->vendor == GMX_CPUID_VENDOR_AMD || gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_HTT) == 0)
1073 rc = GMX_CPUID_X86_SMT_DISABLED;
1077 rc = GMX_CPUID_X86_SMT_CANNOTDETECT;
1084 gmx_cpuid_init (gmx_cpuid_t * pcpuid)
1089 char buffer[GMX_CPUID_STRLEN], buffer2[GMX_CPUID_STRLEN];
1092 cpuid = malloc(sizeof(*cpuid));
1096 for (i = 0; i < GMX_CPUID_NFEATURES; i++)
1098 cpuid->feature[i] = 0;
1101 cpuid->have_cpu_topology = 0;
1103 cpuid->npackages = 0;
1104 cpuid->ncores_per_package = 0;
1105 cpuid->nhwthreads_per_core = 0;
1106 cpuid->package_id = NULL;
1107 cpuid->core_id = NULL;
1108 cpuid->hwthread_id = NULL;
1109 cpuid->locality_order = NULL;
1111 cpuid->vendor = cpuid_check_vendor();
1113 switch (cpuid->vendor)
1115 #ifdef GMX_CPUID_X86
1116 case GMX_CPUID_VENDOR_INTEL:
1117 cpuid_check_intel_x86(cpuid);
1119 case GMX_CPUID_VENDOR_AMD:
1120 cpuid_check_amd_x86(cpuid);
1123 case GMX_CPUID_VENDOR_ARM:
1124 cpuid_check_arm(cpuid);
1126 case GMX_CPUID_VENDOR_IBM:
1127 cpuid_check_ibm(cpuid);
1131 strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_STRLEN);
1132 #if defined(__linux__) || defined(__linux)
1133 /* General Linux. Try to get CPU type from /proc/cpuinfo */
1134 if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
1137 while ( (found_brand == 0) && (fgets(buffer, sizeof(buffer), fp) != NULL))
1139 chomp_substring_before_colon(buffer, buffer2, sizeof(buffer2));
1140 /* Intel uses "model name", Fujitsu and IBM "cpu". */
1141 if (!strcmp(buffer2, "model name") || !strcmp(buffer2, "cpu"))
1143 chomp_substring_after_colon(buffer, cpuid->brand, GMX_CPUID_STRLEN);
1152 cpuid->stepping = 0;
1154 for (i = 0; i < GMX_CPUID_NFEATURES; i++)
1156 cpuid->feature[i] = 0;
1158 cpuid->feature[GMX_CPUID_FEATURE_CANNOTDETECT] = 1;
1167 gmx_cpuid_done (gmx_cpuid_t cpuid)
1174 gmx_cpuid_formatstring (gmx_cpuid_t cpuid,
1180 enum gmx_cpuid_feature feature;
1186 " Family: %2d model: %2d stepping: %2d\n"
1188 gmx_cpuid_vendor_string[gmx_cpuid_vendor(cpuid)],
1189 gmx_cpuid_brand(cpuid),
1190 gmx_cpuid_family(cpuid), gmx_cpuid_model(cpuid), gmx_cpuid_stepping(cpuid));
1195 " Family: %2d model: %2d stepping: %2d\n"
1197 gmx_cpuid_vendor_string[gmx_cpuid_vendor(cpuid)],
1198 gmx_cpuid_brand(cpuid),
1199 gmx_cpuid_family(cpuid), gmx_cpuid_model(cpuid), gmx_cpuid_stepping(cpuid));
1207 for (feature = GMX_CPUID_FEATURE_CANNOTDETECT; feature < GMX_CPUID_NFEATURES; feature++)
1209 if (gmx_cpuid_feature(cpuid, feature) == 1)
1212 _snprintf(str, n, " %s", gmx_cpuid_feature_string[feature]);
1214 snprintf(str, n, " %s", gmx_cpuid_feature_string[feature]);
1223 _snprintf(str, n, "\n");
1225 snprintf(str, n, "\n");
1235 gmx_cpuid_simd_suggest (gmx_cpuid_t cpuid)
1237 enum gmx_cpuid_simd tmpsimd;
1239 tmpsimd = GMX_CPUID_SIMD_NONE;
1241 if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_INTEL)
1243 /* TODO: Add check for AVX-512F & AVX-512ER here as soon as we
1244 * have implemented verlet kernels for them. Until then,
1245 * we should pick AVX2 instead for the automatic detection.
1247 if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX2))
1249 tmpsimd = GMX_CPUID_SIMD_X86_AVX2_256;
1251 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX))
1253 tmpsimd = GMX_CPUID_SIMD_X86_AVX_256;
1255 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE4_1))
1257 tmpsimd = GMX_CPUID_SIMD_X86_SSE4_1;
1259 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE2))
1261 tmpsimd = GMX_CPUID_SIMD_X86_SSE2;
1264 else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_AMD)
1266 if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX))
1268 tmpsimd = GMX_CPUID_SIMD_X86_AVX_128_FMA;
1270 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE4_1))
1272 tmpsimd = GMX_CPUID_SIMD_X86_SSE4_1;
1274 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE2))
1276 tmpsimd = GMX_CPUID_SIMD_X86_SSE2;
1279 else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_FUJITSU)
1281 if (strstr(gmx_cpuid_brand(cpuid), "SPARC64"))
1283 tmpsimd = GMX_CPUID_SIMD_SPARC64_HPC_ACE;
1286 else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_IBM)
1288 if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_IBM_QPX))
1290 tmpsimd = GMX_CPUID_SIMD_IBM_QPX;
1292 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_IBM_VSX))
1294 /* VSX is better than VMX, so we check it first */
1295 tmpsimd = GMX_CPUID_SIMD_IBM_VSX;
1297 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_IBM_VMX))
1299 tmpsimd = GMX_CPUID_SIMD_IBM_VMX;
1302 else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_ARM)
1304 if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_ARM_NEON_ASIMD))
1306 tmpsimd = GMX_CPUID_SIMD_ARM_NEON_ASIMD;
1308 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_ARM_NEON))
1310 tmpsimd = GMX_CPUID_SIMD_ARM_NEON;
1318 gmx_cpuid_simd_check(enum gmx_cpuid_simd simd_suggest,
1320 int print_to_stderr)
1324 rc = (simd_suggest != compiled_simd);
1330 fprintf(log, "\nBinary not matching hardware - you might be losing performance.\n"
1331 "SIMD instructions most likely to fit this hardware: %s\n"
1332 "SIMD instructions selected at GROMACS compile time: %s\n\n",
1333 gmx_cpuid_simd_string[simd_suggest],
1334 gmx_cpuid_simd_string[compiled_simd]);
1336 if (print_to_stderr)
1338 fprintf(stderr, "Compiled SIMD instructions: %s, GROMACS could use %s on this machine, which is better\n\n",
1339 gmx_cpuid_simd_string[compiled_simd],
1340 gmx_cpuid_simd_string[simd_suggest]);
1347 #ifdef GMX_CPUID_STANDALONE
1348 /* Stand-alone program to enable queries of CPU features from Cmake.
1349 * Note that you need to check inline ASM capabilities before compiling and set
1350 * -DGMX_X86_GCC_INLINE_ASM for the cpuid instruction to work...
1353 main(int argc, char **argv)
1356 enum gmx_cpuid_simd simd;
1362 "Usage:\n\n%s [flags]\n\n"
1363 "Available flags:\n"
1364 "-vendor Print CPU vendor.\n"
1365 "-brand Print CPU brand string.\n"
1366 "-family Print CPU family version.\n"
1367 "-model Print CPU model version.\n"
1368 "-stepping Print CPU stepping version.\n"
1369 "-features Print CPU feature flags.\n"
1370 "-simd Print suggested GROMACS SIMD instructions.\n",
1375 gmx_cpuid_init(&cpuid);
1377 if (!strncmp(argv[1], "-vendor", 3))
1379 printf("%s\n", gmx_cpuid_vendor_string[cpuid->vendor]);
1381 else if (!strncmp(argv[1], "-brand", 3))
1383 printf("%s\n", cpuid->brand);
1385 else if (!strncmp(argv[1], "-family", 3))
1387 printf("%d\n", cpuid->family);
1389 else if (!strncmp(argv[1], "-model", 3))
1391 printf("%d\n", cpuid->model);
1393 else if (!strncmp(argv[1], "-stepping", 3))
1395 printf("%d\n", cpuid->stepping);
1397 else if (!strncmp(argv[1], "-features", 3))
1400 for (i = 0; i < GMX_CPUID_NFEATURES; i++)
1402 if (cpuid->feature[i] == 1)
1408 printf("%s", gmx_cpuid_feature_string[i]);
1413 else if (!strncmp(argv[1], "-simd", 3))
1415 simd = gmx_cpuid_simd_suggest(cpuid);
1416 fprintf(stdout, "%s\n", gmx_cpuid_simd_string[simd]);
1419 gmx_cpuid_done(cpuid);