2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
43 # define _GNU_SOURCE 1
52 #ifdef GMX_NATIVE_WINDOWS
53 /* MSVC definition for __cpuid() */
57 /* sysinfo functions */
61 /* sysconf() definition */
65 #include "gromacs/legacyheaders/gmx_cpuid.h"
69 /* For convenience, and to enable configure-time invocation, we keep all architectures
70 * in a single file, but to avoid repeated ifdefs we set the overall architecture here.
73 /* OK, it is x86, but can we execute cpuid? */
74 #if defined(GMX_X86_GCC_INLINE_ASM) || ( defined(_MSC_VER) && ( (_MSC_VER > 1500) || (_MSC_VER == 1500 & _MSC_FULL_VER >= 150030729)))
75 # define GMX_CPUID_X86
79 /* Global constant character strings corresponding to our enumerated types */
81 gmx_cpuid_vendor_string[GMX_CPUID_NVENDORS] =
92 gmx_cpuid_vendor_string_alternative[GMX_CPUID_NVENDORS] =
99 "ibm" /* Used on BlueGene/Q */
103 gmx_cpuid_feature_string[GMX_CPUID_NFEATURES] =
143 gmx_cpuid_simd_string[GMX_CPUID_NSIMD] =
157 /* Max length of brand string */
158 #define GMX_CPUID_BRAND_MAXLEN 256
161 /* Contents of the abstract datatype */
164 enum gmx_cpuid_vendor vendor;
165 char brand[GMX_CPUID_BRAND_MAXLEN];
169 /* Not using gmx_bool here, since this file must be possible to compile without simple.h */
170 char feature[GMX_CPUID_NFEATURES];
172 /* Basic CPU topology information. For x86 this is a bit complicated since the topology differs between
173 * operating systems and sometimes even settings. For most other architectures you can likely just check
174 * the documentation and then write static information to these arrays rather than detecting on-the-fly.
176 int have_cpu_topology;
177 int nproc; /* total number of logical processors from OS */
179 int ncores_per_package;
180 int nhwthreads_per_core;
182 int * core_id; /* Local core id in each package */
183 int * hwthread_id; /* Local hwthread id in each core */
184 int * locality_order; /* Processor indices sorted in locality order */
188 /* Simple routines to access the data structure. The initialization routine is
189 * further down since that needs to call other static routines in this file.
191 enum gmx_cpuid_vendor
192 gmx_cpuid_vendor (gmx_cpuid_t cpuid)
194 return cpuid->vendor;
199 gmx_cpuid_brand (gmx_cpuid_t cpuid)
205 gmx_cpuid_family (gmx_cpuid_t cpuid)
207 return cpuid->family;
211 gmx_cpuid_model (gmx_cpuid_t cpuid)
217 gmx_cpuid_stepping (gmx_cpuid_t cpuid)
219 return cpuid->stepping;
223 gmx_cpuid_feature (gmx_cpuid_t cpuid,
224 enum gmx_cpuid_feature feature)
226 return (cpuid->feature[feature] != 0);
232 /* What type of SIMD was compiled in, if any? */
233 #ifdef GMX_SIMD_X86_AVX2_256
234 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX2_256;
235 #elif defined GMX_SIMD_X86_AVX_256
236 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_256;
237 #elif defined GMX_SIMD_X86_AVX_128_FMA
238 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_128_FMA;
239 #elif defined GMX_SIMD_X86_SSE4_1
240 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_SSE4_1;
241 #elif defined GMX_SIMD_X86_SSE2
242 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_SSE2;
243 #elif defined GMX_SIMD_SPARC64_HPC_ACE
244 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_SPARC64_HPC_ACE;
245 #elif defined GMX_SIMD_IBM_QPX
246 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_QPX;
247 #elif defined GMX_SIMD_REFERENCE
248 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_REFERENCE;
250 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_NONE;
256 /* Execute CPUID on x86 class CPUs. level sets function to exec, and the
257 * contents of register output is returned. See Intel/AMD docs for details.
259 * This version supports extended information where we can also have an input
260 * value in the ecx register. This is ignored for most levels, but some of them
261 * (e.g. level 0xB on Intel) use it.
264 execute_x86cpuid(unsigned int level,
273 /* Currently CPUID is only supported (1) if we can use an instruction on MSVC, or (2)
274 * if the compiler handles GNU-style inline assembly.
277 #if (defined _MSC_VER)
280 #if (_MSC_VER > 1500) || (_MSC_VER == 1500 & _MSC_FULL_VER >= 150030729)
281 /* MSVC 9.0 SP1 or later */
282 __cpuidex(CPUInfo, level, ecxval);
285 __cpuid(CPUInfo, level);
286 /* Set an error code if the user wanted a non-zero ecxval, since we did not have cpuidex */
287 rc = (ecxval > 0) ? -1 : 0;
294 #elif (defined GMX_X86_GCC_INLINE_ASM)
295 /* for now this means GMX_X86_GCC_INLINE_ASM should be defined,
296 * but there might be more options added in the future.
302 #if defined(__i386__) && defined(__PIC__)
303 /* Avoid clobbering the global offset table in 32-bit pic code (ebx register) */
304 __asm__ __volatile__ ("xchgl %%ebx, %1 \n\t"
306 "xchgl %%ebx, %1 \n\t"
307 : "+a" (*eax), "+r" (*ebx), "+c" (*ecx), "+d" (*edx));
309 /* i386 without PIC, or x86-64. Things are easy and we can clobber any reg we want :-) */
310 __asm__ __volatile__ ("cpuid \n\t"
311 : "+a" (*eax), "+b" (*ebx), "+c" (*ecx), "+d" (*edx));
316 * Apparently this is an x86 platform where we don't know how to call cpuid.
318 * This is REALLY bad, since we will lose all Gromacs SIMD support.
331 /* Identify CPU features common to Intel & AMD - mainly brand string,
332 * version and some features. Vendor has already been detected outside this.
335 cpuid_check_common_x86(gmx_cpuid_t cpuid)
337 int fn, max_stdfn, max_extfn;
338 unsigned int eax, ebx, ecx, edx;
339 char str[GMX_CPUID_BRAND_MAXLEN];
342 /* Find largest standard/extended function input value */
343 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
345 execute_x86cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx);
349 if (max_extfn >= 0x80000005)
351 /* Get CPU brand string */
352 for (fn = 0x80000002; fn < 0x80000005; fn++)
354 execute_x86cpuid(fn, 0, &eax, &ebx, &ecx, &edx);
356 memcpy(p+4, &ebx, 4);
357 memcpy(p+8, &ecx, 4);
358 memcpy(p+12, &edx, 4);
363 /* Remove empty initial space */
365 while (isspace(*(p)))
369 strncpy(cpuid->brand, p, GMX_CPUID_BRAND_MAXLEN);
373 strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_BRAND_MAXLEN);
376 /* Find basic CPU properties */
379 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
381 cpuid->family = ((eax & 0x0FF00000) >> 20) + ((eax & 0x00000F00) >> 8);
382 /* Note that extended model should be shifted left 4, so only shift right 12 iso 16. */
383 cpuid->model = ((eax & 0x000F0000) >> 12) + ((eax & 0x000000F0) >> 4);
384 cpuid->stepping = (eax & 0x0000000F);
386 /* Feature flags common to AMD and intel */
387 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE3] = (ecx & (1 << 0)) != 0;
388 cpuid->feature[GMX_CPUID_FEATURE_X86_PCLMULDQ] = (ecx & (1 << 1)) != 0;
389 cpuid->feature[GMX_CPUID_FEATURE_X86_SSSE3] = (ecx & (1 << 9)) != 0;
390 cpuid->feature[GMX_CPUID_FEATURE_X86_FMA] = (ecx & (1 << 12)) != 0;
391 cpuid->feature[GMX_CPUID_FEATURE_X86_CX16] = (ecx & (1 << 13)) != 0;
392 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4_1] = (ecx & (1 << 19)) != 0;
393 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4_2] = (ecx & (1 << 20)) != 0;
394 cpuid->feature[GMX_CPUID_FEATURE_X86_POPCNT] = (ecx & (1 << 23)) != 0;
395 cpuid->feature[GMX_CPUID_FEATURE_X86_AES] = (ecx & (1 << 25)) != 0;
396 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX] = (ecx & (1 << 28)) != 0;
397 cpuid->feature[GMX_CPUID_FEATURE_X86_F16C] = (ecx & (1 << 29)) != 0;
398 cpuid->feature[GMX_CPUID_FEATURE_X86_RDRND] = (ecx & (1 << 30)) != 0;
400 cpuid->feature[GMX_CPUID_FEATURE_X86_PSE] = (edx & (1 << 3)) != 0;
401 cpuid->feature[GMX_CPUID_FEATURE_X86_MSR] = (edx & (1 << 5)) != 0;
402 cpuid->feature[GMX_CPUID_FEATURE_X86_CX8] = (edx & (1 << 8)) != 0;
403 cpuid->feature[GMX_CPUID_FEATURE_X86_APIC] = (edx & (1 << 9)) != 0;
404 cpuid->feature[GMX_CPUID_FEATURE_X86_CMOV] = (edx & (1 << 15)) != 0;
405 cpuid->feature[GMX_CPUID_FEATURE_X86_CLFSH] = (edx & (1 << 19)) != 0;
406 cpuid->feature[GMX_CPUID_FEATURE_X86_MMX] = (edx & (1 << 23)) != 0;
407 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE2] = (edx & (1 << 26)) != 0;
408 cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] = (edx & (1 << 28)) != 0;
414 cpuid->stepping = -1;
417 if (max_extfn >= 0x80000001)
419 execute_x86cpuid(0x80000001, 0, &eax, &ebx, &ecx, &edx);
420 cpuid->feature[GMX_CPUID_FEATURE_X86_LAHF_LM] = (ecx & (1 << 0)) != 0;
421 cpuid->feature[GMX_CPUID_FEATURE_X86_PDPE1GB] = (edx & (1 << 26)) != 0;
422 cpuid->feature[GMX_CPUID_FEATURE_X86_RDTSCP] = (edx & (1 << 27)) != 0;
425 if (max_extfn >= 0x80000007)
427 execute_x86cpuid(0x80000007, 0, &eax, &ebx, &ecx, &edx);
428 cpuid->feature[GMX_CPUID_FEATURE_X86_NONSTOP_TSC] = (edx & (1 << 8)) != 0;
433 /* This routine returns the number of unique different elements found in the array,
434 * and renumbers these starting from 0. For example, the array {0,1,2,8,9,10,8,9,10,0,1,2}
435 * will be rewritten to {0,1,2,3,4,5,3,4,5,0,1,2}, and it returns 6 for the
436 * number of unique elements.
439 cpuid_renumber_elements(int *data, int n)
442 int i, j, nunique, found;
444 unique = malloc(sizeof(int)*n);
447 for (i = 0; i < n; i++)
449 for (j = 0, found = 0; j < nunique && !found; j++)
451 found = (data[i] == unique[j]);
455 /* Insert in sorted order! */
456 for (j = nunique++; j > 0 && unique[j-1] > data[i]; j--)
458 unique[j] = unique[j-1];
464 for (i = 0; i < n; i++)
466 for (j = 0; j < nunique; j++)
468 if (data[i] == unique[j])
478 /* APIC IDs, or everything you wanted to know about your x86 cores but were afraid to ask...
480 * Raw APIC IDs are unfortunately somewhat dirty. For technical reasons they are assigned
481 * in power-of-2 chunks, and even then there are no guarantees about specific numbers - all
482 * we know is that the part for each thread/core/package is unique, and how many bits are
483 * reserved for that part.
484 * This routine does internal renumbering so we get continuous indices, and also
485 * decodes the actual number of packages,cores-per-package and hwthreads-per-core.
486 * Returns: 0 on success, non-zero on failure.
489 cpuid_x86_decode_apic_id(gmx_cpuid_t cpuid, int *apic_id, int core_bits, int hwthread_bits)
492 int hwthread_mask, core_mask_after_shift;
494 cpuid->hwthread_id = malloc(sizeof(int)*cpuid->nproc);
495 cpuid->core_id = malloc(sizeof(int)*cpuid->nproc);
496 cpuid->package_id = malloc(sizeof(int)*cpuid->nproc);
497 cpuid->locality_order = malloc(sizeof(int)*cpuid->nproc);
499 hwthread_mask = (1 << hwthread_bits) - 1;
500 core_mask_after_shift = (1 << core_bits) - 1;
502 for (i = 0; i < cpuid->nproc; i++)
504 cpuid->hwthread_id[i] = apic_id[i] & hwthread_mask;
505 cpuid->core_id[i] = (apic_id[i] >> hwthread_bits) & core_mask_after_shift;
506 cpuid->package_id[i] = apic_id[i] >> (core_bits + hwthread_bits);
509 cpuid->npackages = cpuid_renumber_elements(cpuid->package_id, cpuid->nproc);
510 cpuid->ncores_per_package = cpuid_renumber_elements(cpuid->core_id, cpuid->nproc);
511 cpuid->nhwthreads_per_core = cpuid_renumber_elements(cpuid->hwthread_id, cpuid->nproc);
513 /* now check for consistency */
514 if ( (cpuid->npackages * cpuid->ncores_per_package *
515 cpuid->nhwthreads_per_core) != cpuid->nproc)
517 /* the packages/cores-per-package/hwthreads-per-core counts are
522 /* Create a locality order array, i.e. first all resources in package0, which in turn
523 * are sorted so we first have all resources in core0, where threads are sorted in order, etc.
526 for (i = 0; i < cpuid->nproc; i++)
528 idx = (cpuid->package_id[i]*cpuid->ncores_per_package + cpuid->core_id[i])*cpuid->nhwthreads_per_core + cpuid->hwthread_id[i];
529 cpuid->locality_order[idx] = i;
535 /* Detection of AMD-specific CPU features */
537 cpuid_check_amd_x86(gmx_cpuid_t cpuid)
539 int max_stdfn, max_extfn, ret;
540 unsigned int eax, ebx, ecx, edx;
541 int hwthread_bits, core_bits;
544 cpuid_check_common_x86(cpuid);
546 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
549 execute_x86cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx);
552 if (max_extfn >= 0x80000001)
554 execute_x86cpuid(0x80000001, 0, &eax, &ebx, &ecx, &edx);
556 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4A] = (ecx & (1 << 6)) != 0;
557 cpuid->feature[GMX_CPUID_FEATURE_X86_MISALIGNSSE] = (ecx & (1 << 7)) != 0;
558 cpuid->feature[GMX_CPUID_FEATURE_X86_XOP] = (ecx & (1 << 11)) != 0;
559 cpuid->feature[GMX_CPUID_FEATURE_X86_FMA4] = (ecx & (1 << 16)) != 0;
562 /* Query APIC information on AMD */
563 if (max_extfn >= 0x80000008)
565 #if (defined HAVE_SCHED_AFFINITY && defined HAVE_SYSCONF && defined __linux__)
568 cpu_set_t cpuset, save_cpuset;
569 cpuid->nproc = sysconf(_SC_NPROCESSORS_ONLN);
570 apic_id = malloc(sizeof(int)*cpuid->nproc);
571 sched_getaffinity(0, sizeof(cpu_set_t), &save_cpuset);
572 /* Get APIC id from each core */
574 for (i = 0; i < cpuid->nproc; i++)
577 sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
578 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
579 apic_id[i] = ebx >> 24;
582 /* Reset affinity to the value it had when calling this routine */
583 sched_setaffinity(0, sizeof(cpu_set_t), &save_cpuset);
584 #define CPUID_HAVE_APIC
585 #elif defined GMX_NATIVE_WINDOWS
589 unsigned int save_affinity, affinity;
590 GetSystemInfo( &sysinfo );
591 cpuid->nproc = sysinfo.dwNumberOfProcessors;
592 apic_id = malloc(sizeof(int)*cpuid->nproc);
593 /* Get previous affinity mask */
594 save_affinity = SetThreadAffinityMask(GetCurrentThread(), 1);
595 for (i = 0; i < cpuid->nproc; i++)
597 SetThreadAffinityMask(GetCurrentThread(), (((DWORD_PTR)1)<<i));
599 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
600 apic_id[i] = ebx >> 24;
602 SetThreadAffinityMask(GetCurrentThread(), save_affinity);
603 #define CPUID_HAVE_APIC
605 #ifdef CPUID_HAVE_APIC
606 /* AMD does not support SMT yet - there are no hwthread bits in apic ID */
608 /* Get number of core bits in apic ID - try modern extended method first */
609 execute_x86cpuid(0x80000008, 0, &eax, &ebx, &ecx, &edx);
610 core_bits = (ecx >> 12) & 0xf;
613 /* Legacy method for old single/dual core AMD CPUs */
615 for (core_bits = 0; (i>>core_bits) > 0; core_bits++)
620 ret = cpuid_x86_decode_apic_id(cpuid, apic_id, core_bits,
622 cpuid->have_cpu_topology = (ret == 0);
628 /* Detection of Intel-specific CPU features */
630 cpuid_check_intel_x86(gmx_cpuid_t cpuid)
632 unsigned int max_stdfn, max_extfn, ret;
633 unsigned int eax, ebx, ecx, edx;
634 unsigned int max_logical_cores, max_physical_cores;
635 int hwthread_bits, core_bits;
638 cpuid_check_common_x86(cpuid);
640 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
643 execute_x86cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx);
648 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
649 cpuid->feature[GMX_CPUID_FEATURE_X86_PDCM] = (ecx & (1 << 15)) != 0;
650 cpuid->feature[GMX_CPUID_FEATURE_X86_PCID] = (ecx & (1 << 17)) != 0;
651 cpuid->feature[GMX_CPUID_FEATURE_X86_X2APIC] = (ecx & (1 << 21)) != 0;
652 cpuid->feature[GMX_CPUID_FEATURE_X86_TDT] = (ecx & (1 << 24)) != 0;
657 execute_x86cpuid(0x7, 0, &eax, &ebx, &ecx, &edx);
658 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX2] = (ebx & (1 << 5)) != 0;
661 /* Check whether Hyper-Threading is enabled, not only supported */
662 if (cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] && max_stdfn >= 4)
664 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
665 max_logical_cores = (ebx >> 16) & 0x0FF;
666 execute_x86cpuid(0x4, 0, &eax, &ebx, &ecx, &edx);
667 max_physical_cores = ((eax >> 26) & 0x3F) + 1;
669 /* Clear HTT flag if we only have 1 logical core per physical */
670 if (max_logical_cores/max_physical_cores < 2)
672 cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] = 0;
676 if (max_stdfn >= 0xB)
678 /* Query x2 APIC information from cores */
679 #if (defined HAVE_SCHED_AFFINITY && defined HAVE_SYSCONF && defined __linux__)
682 cpu_set_t cpuset, save_cpuset;
683 cpuid->nproc = sysconf(_SC_NPROCESSORS_ONLN);
684 apic_id = malloc(sizeof(int)*cpuid->nproc);
685 sched_getaffinity(0, sizeof(cpu_set_t), &save_cpuset);
686 /* Get x2APIC ID from each hardware thread */
688 for (i = 0; i < cpuid->nproc; i++)
691 sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
692 execute_x86cpuid(0xB, 0, &eax, &ebx, &ecx, &edx);
696 /* Reset affinity to the value it had when calling this routine */
697 sched_setaffinity(0, sizeof(cpu_set_t), &save_cpuset);
698 #define CPUID_HAVE_APIC
699 #elif defined GMX_NATIVE_WINDOWS
703 unsigned int save_affinity, affinity;
704 GetSystemInfo( &sysinfo );
705 cpuid->nproc = sysinfo.dwNumberOfProcessors;
706 apic_id = malloc(sizeof(int)*cpuid->nproc);
707 /* Get previous affinity mask */
708 save_affinity = SetThreadAffinityMask(GetCurrentThread(), 1);
709 for (i = 0; i < cpuid->nproc; i++)
711 SetThreadAffinityMask(GetCurrentThread(), (((DWORD_PTR)1)<<i));
713 execute_x86cpuid(0xB, 0, &eax, &ebx, &ecx, &edx);
716 SetThreadAffinityMask(GetCurrentThread(), save_affinity);
717 #define CPUID_HAVE_APIC
719 #ifdef CPUID_HAVE_APIC
720 execute_x86cpuid(0xB, 0, &eax, &ebx, &ecx, &edx);
721 hwthread_bits = eax & 0x1F;
722 execute_x86cpuid(0xB, 1, &eax, &ebx, &ecx, &edx);
723 core_bits = (eax & 0x1F) - hwthread_bits;
724 ret = cpuid_x86_decode_apic_id(cpuid, apic_id, core_bits,
726 cpuid->have_cpu_topology = (ret == 0);
731 #endif /* GMX_CPUID_X86 */
737 chomp_substring_before_colon(const char *in, char *s, int maxlength)
740 strncpy(s, in, maxlength);
745 while (isspace(*(--p)) && (p >= s))
757 chomp_substring_after_colon(const char *in, char *s, int maxlength)
760 if ( (p = strchr(in, ':')) != NULL)
767 strncpy(s, p, maxlength);
769 while (isspace(*(--p)) && (p >= s))
780 /* Try to find the vendor of the current CPU, so we know what specific
781 * detection routine to call.
783 static enum gmx_cpuid_vendor
784 cpuid_check_vendor(void)
786 enum gmx_cpuid_vendor i, vendor;
787 /* Register data used on x86 */
788 unsigned int eax, ebx, ecx, edx;
789 char vendorstring[13];
791 char buffer[255], before_colon[255], after_colon[255];
793 /* Set default first */
794 vendor = GMX_CPUID_VENDOR_UNKNOWN;
797 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
799 memcpy(vendorstring, &ebx, 4);
800 memcpy(vendorstring+4, &edx, 4);
801 memcpy(vendorstring+8, &ecx, 4);
803 vendorstring[12] = '\0';
805 for (i = GMX_CPUID_VENDOR_UNKNOWN; i < GMX_CPUID_NVENDORS; i++)
807 if (!strncmp(vendorstring, gmx_cpuid_vendor_string[i], 12))
812 #elif defined(__linux__) || defined(__linux)
813 /* General Linux. Try to get CPU vendor from /proc/cpuinfo */
814 if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
816 while ( (vendor == GMX_CPUID_VENDOR_UNKNOWN) && (fgets(buffer, sizeof(buffer), fp) != NULL))
818 chomp_substring_before_colon(buffer, before_colon, sizeof(before_colon));
819 /* Intel/AMD use "vendor_id", IBM "vendor"(?) or "model". Fujitsu "manufacture". Add others if you have them! */
820 if (!strcmp(before_colon, "vendor_id")
821 || !strcmp(before_colon, "vendor")
822 || !strcmp(before_colon, "manufacture")
823 || !strcmp(before_colon, "model"))
825 chomp_substring_after_colon(buffer, after_colon, sizeof(after_colon));
826 for (i = GMX_CPUID_VENDOR_UNKNOWN; i < GMX_CPUID_NVENDORS; i++)
828 /* Be liberal and accept if we find the vendor
829 * string (or alternative string) anywhere. Using
830 * strcasestr() would be non-portable. */
831 if (strstr(after_colon, gmx_cpuid_vendor_string[i])
832 || strstr(after_colon, gmx_cpuid_vendor_string_alternative[i]))
849 gmx_cpuid_topology(gmx_cpuid_t cpuid,
852 int * ncores_per_package,
853 int * nhwthreads_per_core,
854 const int ** package_id,
855 const int ** core_id,
856 const int ** hwthread_id,
857 const int ** locality_order)
861 if (cpuid->have_cpu_topology)
863 *nprocessors = cpuid->nproc;
864 *npackages = cpuid->npackages;
865 *ncores_per_package = cpuid->ncores_per_package;
866 *nhwthreads_per_core = cpuid->nhwthreads_per_core;
867 *package_id = cpuid->package_id;
868 *core_id = cpuid->core_id;
869 *hwthread_id = cpuid->hwthread_id;
870 *locality_order = cpuid->locality_order;
881 enum gmx_cpuid_x86_smt
882 gmx_cpuid_x86_smt(gmx_cpuid_t cpuid)
884 enum gmx_cpuid_x86_smt rc;
886 if (cpuid->have_cpu_topology)
888 rc = (cpuid->nhwthreads_per_core > 1) ? GMX_CPUID_X86_SMT_ENABLED : GMX_CPUID_X86_SMT_DISABLED;
890 else if (cpuid->vendor == GMX_CPUID_VENDOR_AMD || gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_HTT) == 0)
892 rc = GMX_CPUID_X86_SMT_DISABLED;
896 rc = GMX_CPUID_X86_SMT_CANNOTDETECT;
903 gmx_cpuid_init (gmx_cpuid_t * pcpuid)
908 char buffer[255], buffer2[255];
911 cpuid = malloc(sizeof(*cpuid));
915 for (i = 0; i < GMX_CPUID_NFEATURES; i++)
917 cpuid->feature[i] = 0;
920 cpuid->have_cpu_topology = 0;
922 cpuid->npackages = 0;
923 cpuid->ncores_per_package = 0;
924 cpuid->nhwthreads_per_core = 0;
925 cpuid->package_id = NULL;
926 cpuid->core_id = NULL;
927 cpuid->hwthread_id = NULL;
928 cpuid->locality_order = NULL;
930 cpuid->vendor = cpuid_check_vendor();
932 switch (cpuid->vendor)
935 case GMX_CPUID_VENDOR_INTEL:
936 cpuid_check_intel_x86(cpuid);
938 case GMX_CPUID_VENDOR_AMD:
939 cpuid_check_amd_x86(cpuid);
944 strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_BRAND_MAXLEN);
945 #if defined(__linux__) || defined(__linux)
946 /* General Linux. Try to get CPU type from /proc/cpuinfo */
947 if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
950 while ( (found_brand == 0) && (fgets(buffer, sizeof(buffer), fp) != NULL))
952 chomp_substring_before_colon(buffer, buffer2, sizeof(buffer2));
953 /* Intel uses "model name", Fujitsu and IBM "cpu". */
954 if (!strcmp(buffer2, "model name") || !strcmp(buffer2, "cpu"))
956 chomp_substring_after_colon(buffer, cpuid->brand, GMX_CPUID_BRAND_MAXLEN);
967 for (i = 0; i < GMX_CPUID_NFEATURES; i++)
969 cpuid->feature[i] = 0;
971 cpuid->feature[GMX_CPUID_FEATURE_CANNOTDETECT] = 1;
980 gmx_cpuid_done (gmx_cpuid_t cpuid)
987 gmx_cpuid_formatstring (gmx_cpuid_t cpuid,
993 enum gmx_cpuid_feature feature;
999 "Family: %2d Model: %2d Stepping: %2d\n"
1001 gmx_cpuid_vendor_string[gmx_cpuid_vendor(cpuid)],
1002 gmx_cpuid_brand(cpuid),
1003 gmx_cpuid_family(cpuid), gmx_cpuid_model(cpuid), gmx_cpuid_stepping(cpuid));
1008 "Family: %2d Model: %2d Stepping: %2d\n"
1010 gmx_cpuid_vendor_string[gmx_cpuid_vendor(cpuid)],
1011 gmx_cpuid_brand(cpuid),
1012 gmx_cpuid_family(cpuid), gmx_cpuid_model(cpuid), gmx_cpuid_stepping(cpuid));
1020 for (feature = GMX_CPUID_FEATURE_CANNOTDETECT; feature < GMX_CPUID_NFEATURES; feature++)
1022 if (gmx_cpuid_feature(cpuid, feature) == 1)
1025 _snprintf(str, n, " %s", gmx_cpuid_feature_string[feature]);
1027 snprintf(str, n, " %s", gmx_cpuid_feature_string[feature]);
1036 _snprintf(str, n, "\n");
1038 snprintf(str, n, "\n");
1048 gmx_cpuid_simd_suggest (gmx_cpuid_t cpuid)
1050 enum gmx_cpuid_simd tmpsimd;
1052 tmpsimd = GMX_CPUID_SIMD_NONE;
1054 if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_INTEL)
1056 if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX2))
1058 tmpsimd = GMX_CPUID_SIMD_X86_AVX2_256;
1060 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX))
1062 tmpsimd = GMX_CPUID_SIMD_X86_AVX_256;
1064 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE4_1))
1066 tmpsimd = GMX_CPUID_SIMD_X86_SSE4_1;
1068 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE2))
1070 tmpsimd = GMX_CPUID_SIMD_X86_SSE2;
1073 else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_AMD)
1075 if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX))
1077 tmpsimd = GMX_CPUID_SIMD_X86_AVX_128_FMA;
1079 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE4_1))
1081 tmpsimd = GMX_CPUID_SIMD_X86_SSE4_1;
1083 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE2))
1085 tmpsimd = GMX_CPUID_SIMD_X86_SSE2;
1088 else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_FUJITSU)
1090 if (strstr(gmx_cpuid_brand(cpuid), "SPARC64"))
1092 tmpsimd = GMX_CPUID_SIMD_SPARC64_HPC_ACE;
1095 else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_IBM)
1097 if (strstr(gmx_cpuid_brand(cpuid), "A2"))
1099 tmpsimd = GMX_CPUID_SIMD_IBM_QPX;
1108 gmx_cpuid_simd_check(gmx_cpuid_t cpuid,
1110 int print_to_stderr)
1114 enum gmx_cpuid_simd simd;
1116 simd = gmx_cpuid_simd_suggest(cpuid);
1118 rc = (simd != compiled_simd);
1120 gmx_cpuid_formatstring(cpuid, str, 1023);
1126 "\nDetecting CPU SIMD instructions.\nPresent hardware specification:\n"
1128 "SIMD instructions most likely to fit this hardware: %s\n"
1129 "SIMD instructions selected at GROMACS compile time: %s\n\n",
1131 gmx_cpuid_simd_string[simd],
1132 gmx_cpuid_simd_string[compiled_simd]);
1139 fprintf(log, "\nBinary not matching hardware - you might be losing performance.\n"
1140 "SIMD instructions most likely to fit this hardware: %s\n"
1141 "SIMD instructions selected at GROMACS compile time: %s\n\n",
1142 gmx_cpuid_simd_string[simd],
1143 gmx_cpuid_simd_string[compiled_simd]);
1145 if (print_to_stderr)
1147 fprintf(stderr, "Compiled SIMD instructions: %s (Gromacs could use %s on this machine, which is better)\n",
1148 gmx_cpuid_simd_string[compiled_simd],
1149 gmx_cpuid_simd_string[simd]);
1156 #ifdef GMX_CPUID_STANDALONE
1157 /* Stand-alone program to enable queries of CPU features from Cmake.
1158 * Note that you need to check inline ASM capabilities before compiling and set
1159 * -DGMX_X86_GCC_INLINE_ASM for the cpuid instruction to work...
1162 main(int argc, char **argv)
1165 enum gmx_cpuid_simd simd;
1171 "Usage:\n\n%s [flags]\n\n"
1172 "Available flags:\n"
1173 "-vendor Print CPU vendor.\n"
1174 "-brand Print CPU brand string.\n"
1175 "-family Print CPU family version.\n"
1176 "-model Print CPU model version.\n"
1177 "-stepping Print CPU stepping version.\n"
1178 "-features Print CPU feature flags.\n"
1179 "-simd Print suggested GROMACS SIMD instructions.\n",
1184 gmx_cpuid_init(&cpuid);
1186 if (!strncmp(argv[1], "-vendor", 3))
1188 printf("%s\n", gmx_cpuid_vendor_string[cpuid->vendor]);
1190 else if (!strncmp(argv[1], "-brand", 3))
1192 printf("%s\n", cpuid->brand);
1194 else if (!strncmp(argv[1], "-family", 3))
1196 printf("%d\n", cpuid->family);
1198 else if (!strncmp(argv[1], "-model", 3))
1200 printf("%d\n", cpuid->model);
1202 else if (!strncmp(argv[1], "-stepping", 3))
1204 printf("%d\n", cpuid->stepping);
1206 else if (!strncmp(argv[1], "-features", 3))
1209 for (i = 0; i < GMX_CPUID_NFEATURES; i++)
1211 if (cpuid->feature[i] == 1)
1217 printf("%s", gmx_cpuid_feature_string[i]);
1222 else if (!strncmp(argv[1], "-simd", 3))
1224 simd = gmx_cpuid_simd_suggest(cpuid);
1225 fprintf(stdout, "%s\n", gmx_cpuid_simd_string[simd]);
1228 gmx_cpuid_done(cpuid);