1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This file is part of GROMACS.
7 * Written by the Gromacs development team under coordination of
8 * David van der Spoel, Berk Hess, and Erik Lindahl.
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
15 * To help us fund GROMACS development, we humbly ask that you cite
16 * the research papers on the package. Check out http://www.gromacs.org
19 * Gnomes, ROck Monsters And Chili Sauce
35 /* MSVC definition for __cpuid() */
37 /* sysinfo functions */
41 /* sysconf() definition */
45 #include "gmx_cpuid.h"
49 /* For convenience, and to enable configure-time invocation, we keep all architectures
50 * in a single file, but to avoid repeated ifdefs we set the overall architecture here.
53 /* OK, it is x86, but can we execute cpuid? */
54 #if defined(GMX_X86_GCC_INLINE_ASM) || ( defined(_MSC_VER) && ( (_MSC_VER > 1500) || (_MSC_VER==1500 & _MSC_FULL_VER >= 150030729)))
55 # define GMX_CPUID_X86
59 /* Global constant character strings corresponding to our enumerated types */
61 gmx_cpuid_vendor_string[GMX_CPUID_NVENDORS] =
72 gmx_cpuid_vendor_string_alternative[GMX_CPUID_NVENDORS] =
79 "ibm" /* Used on BlueGene/Q */
83 gmx_cpuid_feature_string[GMX_CPUID_NFEATURES] =
123 gmx_cpuid_acceleration_string[GMX_CPUID_NACCELERATIONS] =
135 /* Max length of brand string */
136 #define GMX_CPUID_BRAND_MAXLEN 256
139 /* Contents of the abstract datatype */
142 enum gmx_cpuid_vendor vendor;
143 char brand[GMX_CPUID_BRAND_MAXLEN];
147 /* Not using gmx_bool here, since this file must be possible to compile without simple.h */
148 char feature[GMX_CPUID_NFEATURES];
150 /* Basic CPU topology information. For x86 this is a bit complicated since the topology differs between
151 * operating systems and sometimes even settings. For most other architectures you can likely just check
152 * the documentation and then write static information to these arrays rather than detecting on-the-fly.
154 int have_cpu_topology;
155 int nproc; /* total number of logical processors from OS */
157 int ncores_per_package;
158 int nhwthreads_per_core;
160 int * core_id; /* Local core id in each package */
161 int * hwthread_id; /* Local hwthread id in each core */
162 int * locality_order; /* Processor indices sorted in locality order */
166 /* Simple routines to access the data structure. The initialization routine is
167 * further down since that needs to call other static routines in this file.
169 enum gmx_cpuid_vendor
170 gmx_cpuid_vendor (gmx_cpuid_t cpuid)
172 return cpuid->vendor;
177 gmx_cpuid_brand (gmx_cpuid_t cpuid)
183 gmx_cpuid_family (gmx_cpuid_t cpuid)
185 return cpuid->family;
189 gmx_cpuid_model (gmx_cpuid_t cpuid)
195 gmx_cpuid_stepping (gmx_cpuid_t cpuid)
197 return cpuid->stepping;
201 gmx_cpuid_feature (gmx_cpuid_t cpuid,
202 enum gmx_cpuid_feature feature)
204 return (cpuid->feature[feature] != 0);
210 /* What type of acceleration was compiled in, if any?
211 * This is set from Cmake. Note that the SSE2 and SSE4_1 macros are set for
212 * AVX too, so it is important that they appear last in the list.
214 #ifdef GMX_X86_AVX_256
216 enum gmx_cpuid_acceleration
217 compiled_acc = GMX_CPUID_ACCELERATION_X86_AVX_256;
218 #elif defined GMX_X86_AVX_128_FMA
220 enum gmx_cpuid_acceleration
221 compiled_acc = GMX_CPUID_ACCELERATION_X86_AVX_128_FMA;
222 #elif defined GMX_X86_SSE4_1
224 enum gmx_cpuid_acceleration
225 compiled_acc = GMX_CPUID_ACCELERATION_X86_SSE4_1;
226 #elif defined GMX_X86_SSE2
228 enum gmx_cpuid_acceleration
229 compiled_acc = GMX_CPUID_ACCELERATION_X86_SSE2;
230 #elif defined GMX_CPU_ACCELERATION_SPARC64_HPC_ACE
232 enum gmx_cpuid_acceleration
233 compiled_acc = GMX_CPUID_ACCELERATION_SPARC64_HPC_ACE;
234 #elif defined GMX_CPU_ACCELERATION_IBM_QPX
236 enum gmx_cpuid_acceleration
237 compiled_acc = GMX_CPUID_ACCELERATION_IBM_QPX;
240 enum gmx_cpuid_acceleration
241 compiled_acc = GMX_CPUID_ACCELERATION_NONE;
247 /* Execute CPUID on x86 class CPUs. level sets function to exec, and the
248 * contents of register output is returned. See Intel/AMD docs for details.
250 * This version supports extended information where we can also have an input
251 * value in the ecx register. This is ignored for most levels, but some of them
252 * (e.g. level 0xB on Intel) use it.
255 execute_x86cpuid(unsigned int level,
264 /* Currently CPUID is only supported (1) if we can use an instruction on MSVC, or (2)
265 * if the compiler handles GNU-style inline assembly.
268 #if (defined _MSC_VER)
271 #if (_MSC_VER > 1500) || (_MSC_VER == 1500 & _MSC_FULL_VER >= 150030729)
272 /* MSVC 9.0 SP1 or later */
273 __cpuidex(CPUInfo, level, ecxval);
276 __cpuid(CPUInfo, level);
277 /* Set an error code if the user wanted a non-zero ecxval, since we did not have cpuidex */
278 rc = (ecxval > 0) ? -1 : 0;
285 #elif (defined GMX_X86_GCC_INLINE_ASM)
286 /* for now this means GMX_X86_GCC_INLINE_ASM should be defined,
287 * but there might be more options added in the future.
293 #if defined(__i386__) && defined(__PIC__)
294 /* Avoid clobbering the global offset table in 32-bit pic code (ebx register) */
295 __asm__ __volatile__ ("xchgl %%ebx, %1 \n\t"
297 "xchgl %%ebx, %1 \n\t"
298 : "+a" (*eax), "+r" (*ebx), "+c" (*ecx), "+d" (*edx));
300 /* i386 without PIC, or x86-64. Things are easy and we can clobber any reg we want :-) */
301 __asm__ __volatile__ ("cpuid \n\t"
302 : "+a" (*eax), "+b" (*ebx), "+c" (*ecx), "+d" (*edx));
307 * Apparently this is an x86 platform where we don't know how to call cpuid.
309 * This is REALLY bad, since we will lose all Gromacs acceleration.
322 /* Identify CPU features common to Intel & AMD - mainly brand string,
323 * version and some features. Vendor has already been detected outside this.
326 cpuid_check_common_x86(gmx_cpuid_t cpuid)
328 int fn, max_stdfn, max_extfn;
329 unsigned int eax, ebx, ecx, edx;
330 char str[GMX_CPUID_BRAND_MAXLEN];
333 /* Find largest standard/extended function input value */
334 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
336 execute_x86cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx);
340 if (max_extfn >= 0x80000005)
342 /* Get CPU brand string */
343 for (fn = 0x80000002; fn < 0x80000005; fn++)
345 execute_x86cpuid(fn, 0, &eax, &ebx, &ecx, &edx);
347 memcpy(p+4, &ebx, 4);
348 memcpy(p+8, &ecx, 4);
349 memcpy(p+12, &edx, 4);
354 /* Remove empty initial space */
356 while (isspace(*(p)))
360 strncpy(cpuid->brand, p, GMX_CPUID_BRAND_MAXLEN);
364 strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_BRAND_MAXLEN);
367 /* Find basic CPU properties */
370 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
372 cpuid->family = ((eax & 0x0FF00000) >> 20) + ((eax & 0x00000F00) >> 8);
373 /* Note that extended model should be shifted left 4, so only shift right 12 iso 16. */
374 cpuid->model = ((eax & 0x000F0000) >> 12) + ((eax & 0x000000F0) >> 4);
375 cpuid->stepping = (eax & 0x0000000F);
377 /* Feature flags common to AMD and intel */
378 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE3] = (ecx & (1 << 0)) != 0;
379 cpuid->feature[GMX_CPUID_FEATURE_X86_PCLMULDQ] = (ecx & (1 << 1)) != 0;
380 cpuid->feature[GMX_CPUID_FEATURE_X86_SSSE3] = (ecx & (1 << 9)) != 0;
381 cpuid->feature[GMX_CPUID_FEATURE_X86_FMA] = (ecx & (1 << 12)) != 0;
382 cpuid->feature[GMX_CPUID_FEATURE_X86_CX16] = (ecx & (1 << 13)) != 0;
383 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4_1] = (ecx & (1 << 19)) != 0;
384 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4_2] = (ecx & (1 << 20)) != 0;
385 cpuid->feature[GMX_CPUID_FEATURE_X86_POPCNT] = (ecx & (1 << 23)) != 0;
386 cpuid->feature[GMX_CPUID_FEATURE_X86_AES] = (ecx & (1 << 25)) != 0;
387 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX] = (ecx & (1 << 28)) != 0;
388 cpuid->feature[GMX_CPUID_FEATURE_X86_F16C] = (ecx & (1 << 29)) != 0;
389 cpuid->feature[GMX_CPUID_FEATURE_X86_RDRND] = (ecx & (1 << 30)) != 0;
391 cpuid->feature[GMX_CPUID_FEATURE_X86_PSE] = (edx & (1 << 3)) != 0;
392 cpuid->feature[GMX_CPUID_FEATURE_X86_MSR] = (edx & (1 << 5)) != 0;
393 cpuid->feature[GMX_CPUID_FEATURE_X86_CX8] = (edx & (1 << 8)) != 0;
394 cpuid->feature[GMX_CPUID_FEATURE_X86_APIC] = (edx & (1 << 9)) != 0;
395 cpuid->feature[GMX_CPUID_FEATURE_X86_CMOV] = (edx & (1 << 15)) != 0;
396 cpuid->feature[GMX_CPUID_FEATURE_X86_CLFSH] = (edx & (1 << 19)) != 0;
397 cpuid->feature[GMX_CPUID_FEATURE_X86_MMX] = (edx & (1 << 23)) != 0;
398 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE2] = (edx & (1 << 26)) != 0;
399 cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] = (edx & (1 << 28)) != 0;
405 cpuid->stepping = -1;
408 if (max_extfn >= 0x80000001)
410 execute_x86cpuid(0x80000001, 0, &eax, &ebx, &ecx, &edx);
411 cpuid->feature[GMX_CPUID_FEATURE_X86_LAHF_LM] = (ecx & (1 << 0)) != 0;
412 cpuid->feature[GMX_CPUID_FEATURE_X86_PDPE1GB] = (edx & (1 << 26)) != 0;
413 cpuid->feature[GMX_CPUID_FEATURE_X86_RDTSCP] = (edx & (1 << 27)) != 0;
416 if (max_extfn >= 0x80000007)
418 execute_x86cpuid(0x80000007, 0, &eax, &ebx, &ecx, &edx);
419 cpuid->feature[GMX_CPUID_FEATURE_X86_NONSTOP_TSC] = (edx & (1 << 8)) != 0;
424 /* This routine returns the number of unique different elements found in the array,
425 * and renumbers these starting from 0. For example, the array {0,1,2,8,9,10,8,9,10,0,1,2}
426 * will be rewritten to {0,1,2,3,4,5,3,4,5,0,1,2}, and it returns 6 for the
427 * number of unique elements.
430 cpuid_renumber_elements(int *data, int n)
433 int i, j, nunique, found;
435 unique = malloc(sizeof(int)*n);
438 for (i = 0; i < n; i++)
440 for (j = 0, found = 0; j < nunique && !found; j++)
442 found = (data[i] == unique[j]);
446 /* Insert in sorted order! */
447 for (j = nunique++; j > 0 && unique[j-1] > data[i]; j--)
449 unique[j] = unique[j-1];
455 for (i = 0; i < n; i++)
457 for (j = 0; j < nunique; j++)
459 if (data[i] == unique[j])
468 /* APIC IDs, or everything you wanted to know about your x86 cores but were afraid to ask...
470 * Raw APIC IDs are unfortunately somewhat dirty. For technical reasons they are assigned
471 * in power-of-2 chunks, and even then there are no guarantees about specific numbers - all
472 * we know is that the part for each thread/core/package is unique, and how many bits are
473 * reserved for that part.
474 * This routine does internal renumbering so we get continuous indices, and also
475 * decodes the actual number of packages,cores-per-package and hwthreads-per-core.
476 * Returns: 0 on success, non-zero on failure.
479 cpuid_x86_decode_apic_id(gmx_cpuid_t cpuid, int *apic_id, int core_bits, int hwthread_bits)
482 int hwthread_mask, core_mask_after_shift;
484 cpuid->hwthread_id = malloc(sizeof(int)*cpuid->nproc);
485 cpuid->core_id = malloc(sizeof(int)*cpuid->nproc);
486 cpuid->package_id = malloc(sizeof(int)*cpuid->nproc);
487 cpuid->locality_order = malloc(sizeof(int)*cpuid->nproc);
489 hwthread_mask = (1 << hwthread_bits) - 1;
490 core_mask_after_shift = (1 << core_bits) - 1;
492 for (i = 0; i < cpuid->nproc; i++)
494 cpuid->hwthread_id[i] = apic_id[i] & hwthread_mask;
495 cpuid->core_id[i] = (apic_id[i] >> hwthread_bits) & core_mask_after_shift;
496 cpuid->package_id[i] = apic_id[i] >> (core_bits + hwthread_bits);
499 cpuid->npackages = cpuid_renumber_elements(cpuid->package_id, cpuid->nproc);
500 cpuid->ncores_per_package = cpuid_renumber_elements(cpuid->core_id, cpuid->nproc);
501 cpuid->nhwthreads_per_core = cpuid_renumber_elements(cpuid->hwthread_id, cpuid->nproc);
503 /* now check for consistency */
504 if ( (cpuid->npackages * cpuid->ncores_per_package *
505 cpuid->nhwthreads_per_core) != cpuid->nproc )
507 /* the packages/cores-per-package/hwthreads-per-core counts are
512 /* Create a locality order array, i.e. first all resources in package0, which in turn
513 * are sorted so we first have all resources in core0, where threads are sorted in order, etc.
516 for (i = 0; i < cpuid->nproc; i++)
518 idx = (cpuid->package_id[i]*cpuid->ncores_per_package + cpuid->core_id[i])*cpuid->nhwthreads_per_core + cpuid->hwthread_id[i];
519 cpuid->locality_order[idx] = i;
525 /* Detection of AMD-specific CPU features */
527 cpuid_check_amd_x86(gmx_cpuid_t cpuid)
529 int max_stdfn, max_extfn, ret;
530 unsigned int eax, ebx, ecx, edx;
531 int hwthread_bits, core_bits;
534 cpuid_check_common_x86(cpuid);
536 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
539 execute_x86cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx);
542 if (max_extfn >= 0x80000001)
544 execute_x86cpuid(0x80000001, 0, &eax, &ebx, &ecx, &edx);
546 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4A] = (ecx & (1 << 6)) != 0;
547 cpuid->feature[GMX_CPUID_FEATURE_X86_MISALIGNSSE] = (ecx & (1 << 7)) != 0;
548 cpuid->feature[GMX_CPUID_FEATURE_X86_XOP] = (ecx & (1 << 11)) != 0;
549 cpuid->feature[GMX_CPUID_FEATURE_X86_FMA4] = (ecx & (1 << 16)) != 0;
552 /* Query APIC information on AMD */
553 if (max_extfn >= 0x80000008)
555 #if (defined HAVE_SCHED_H && defined HAVE_SCHED_SETAFFINITY && defined HAVE_SYSCONF && defined __linux__)
558 cpu_set_t cpuset, save_cpuset;
559 cpuid->nproc = sysconf(_SC_NPROCESSORS_ONLN);
560 apic_id = malloc(sizeof(int)*cpuid->nproc);
561 sched_getaffinity(0, sizeof(cpu_set_t), &save_cpuset);
562 /* Get APIC id from each core */
564 for (i = 0; i < cpuid->nproc; i++)
567 sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
568 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
569 apic_id[i] = ebx >> 24;
572 /* Reset affinity to the value it had when calling this routine */
573 sched_setaffinity(0, sizeof(cpu_set_t), &save_cpuset);
574 #define CPUID_HAVE_APIC
575 #elif defined GMX_NATIVE_WINDOWS
579 unsigned int save_affinity, affinity;
580 GetSystemInfo( &sysinfo );
581 cpuid->nproc = sysinfo.dwNumberOfProcessors;
582 apic_id = malloc(sizeof(int)*cpuid->nproc);
583 /* Get previous affinity mask */
584 save_affinity = SetThreadAffinityMask(GetCurrentThread(), 1);
585 for (i = 0; i < cpuid->nproc; i++)
587 SetThreadAffinityMask(GetCurrentThread(), (((DWORD_PTR)1)<<i));
589 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
590 apic_id[i] = ebx >> 24;
592 SetThreadAffinityMask(GetCurrentThread(), save_affinity);
593 #define CPUID_HAVE_APIC
595 #ifdef CPUID_HAVE_APIC
596 /* AMD does not support SMT yet - there are no hwthread bits in apic ID */
598 /* Get number of core bits in apic ID - try modern extended method first */
599 execute_x86cpuid(0x80000008, 0, &eax, &ebx, &ecx, &edx);
600 core_bits = (ecx >> 12) & 0xf;
603 /* Legacy method for old single/dual core AMD CPUs */
605 for (core_bits = 0; (i>>core_bits) > 0; core_bits++)
610 ret = cpuid_x86_decode_apic_id(cpuid, apic_id, core_bits,
612 cpuid->have_cpu_topology = (ret == 0);
618 /* Detection of Intel-specific CPU features */
620 cpuid_check_intel_x86(gmx_cpuid_t cpuid)
622 unsigned int max_stdfn, max_extfn, ret;
623 unsigned int eax, ebx, ecx, edx;
624 unsigned int max_logical_cores, max_physical_cores;
625 int hwthread_bits, core_bits;
628 cpuid_check_common_x86(cpuid);
630 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
633 execute_x86cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx);
638 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
639 cpuid->feature[GMX_CPUID_FEATURE_X86_PDCM] = (ecx & (1 << 15)) != 0;
640 cpuid->feature[GMX_CPUID_FEATURE_X86_PCID] = (ecx & (1 << 17)) != 0;
641 cpuid->feature[GMX_CPUID_FEATURE_X86_X2APIC] = (ecx & (1 << 21)) != 0;
642 cpuid->feature[GMX_CPUID_FEATURE_X86_TDT] = (ecx & (1 << 24)) != 0;
647 execute_x86cpuid(0x7, 0, &eax, &ebx, &ecx, &edx);
648 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX2] = (ebx & (1 << 5)) != 0;
651 /* Check whether Hyper-Threading is enabled, not only supported */
652 if (cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] && max_stdfn >= 4)
654 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
655 max_logical_cores = (ebx >> 16) & 0x0FF;
656 execute_x86cpuid(0x4, 0, &eax, &ebx, &ecx, &edx);
657 max_physical_cores = ((eax >> 26) & 0x3F) + 1;
659 /* Clear HTT flag if we only have 1 logical core per physical */
660 if (max_logical_cores/max_physical_cores < 2)
662 cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] = 0;
666 if (max_stdfn >= 0xB)
668 /* Query x2 APIC information from cores */
669 #if (defined HAVE_SCHED_H && defined HAVE_SCHED_SETAFFINITY && defined HAVE_SYSCONF && defined __linux__)
672 cpu_set_t cpuset, save_cpuset;
673 cpuid->nproc = sysconf(_SC_NPROCESSORS_ONLN);
674 apic_id = malloc(sizeof(int)*cpuid->nproc);
675 sched_getaffinity(0, sizeof(cpu_set_t), &save_cpuset);
676 /* Get x2APIC ID from each hardware thread */
678 for (i = 0; i < cpuid->nproc; i++)
681 sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
682 execute_x86cpuid(0xB, 0, &eax, &ebx, &ecx, &edx);
686 /* Reset affinity to the value it had when calling this routine */
687 sched_setaffinity(0, sizeof(cpu_set_t), &save_cpuset);
688 #define CPUID_HAVE_APIC
689 #elif defined GMX_NATIVE_WINDOWS
693 unsigned int save_affinity, affinity;
694 GetSystemInfo( &sysinfo );
695 cpuid->nproc = sysinfo.dwNumberOfProcessors;
696 apic_id = malloc(sizeof(int)*cpuid->nproc);
697 /* Get previous affinity mask */
698 save_affinity = SetThreadAffinityMask(GetCurrentThread(), 1);
699 for (i = 0; i < cpuid->nproc; i++)
701 SetThreadAffinityMask(GetCurrentThread(), (((DWORD_PTR)1)<<i));
703 execute_x86cpuid(0xB, 0, &eax, &ebx, &ecx, &edx);
706 SetThreadAffinityMask(GetCurrentThread(), save_affinity);
707 #define CPUID_HAVE_APIC
709 #ifdef CPUID_HAVE_APIC
710 execute_x86cpuid(0xB, 0, &eax, &ebx, &ecx, &edx);
711 hwthread_bits = eax & 0x1F;
712 execute_x86cpuid(0xB, 1, &eax, &ebx, &ecx, &edx);
713 core_bits = (eax & 0x1F) - hwthread_bits;
714 ret = cpuid_x86_decode_apic_id(cpuid, apic_id, core_bits,
716 cpuid->have_cpu_topology = (ret == 0);
721 #endif /* GMX_CPUID_X86 */
727 chomp_substring_before_colon(const char *in, char *s, int maxlength)
730 strncpy(s,in,maxlength);
735 while(isspace(*(--p)) && (p>=s))
747 chomp_substring_after_colon(const char *in, char *s, int maxlength)
750 if( (p = strchr(in,':'))!=NULL)
753 while(isspace(*p)) p++;
754 strncpy(s,p,maxlength);
756 while(isspace(*(--p)) && (p>=s))
767 /* Try to find the vendor of the current CPU, so we know what specific
768 * detection routine to call.
770 static enum gmx_cpuid_vendor
771 cpuid_check_vendor(void)
773 enum gmx_cpuid_vendor i, vendor;
774 /* Register data used on x86 */
775 unsigned int eax, ebx, ecx, edx;
776 char vendorstring[13];
778 char buffer[255],before_colon[255], after_colon[255];
780 /* Set default first */
781 vendor = GMX_CPUID_VENDOR_UNKNOWN;
784 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
786 memcpy(vendorstring, &ebx, 4);
787 memcpy(vendorstring+4, &edx, 4);
788 memcpy(vendorstring+8, &ecx, 4);
790 vendorstring[12] = '\0';
792 for (i = GMX_CPUID_VENDOR_UNKNOWN; i < GMX_CPUID_NVENDORS; i++)
794 if (!strncmp(vendorstring, gmx_cpuid_vendor_string[i], 12))
799 #elif defined(__linux__) || defined(__linux)
800 /* General Linux. Try to get CPU vendor from /proc/cpuinfo */
801 if( (fp = fopen("/proc/cpuinfo","r")) != NULL)
803 while( (vendor == GMX_CPUID_VENDOR_UNKNOWN) && (fgets(buffer,sizeof(buffer),fp) != NULL))
805 chomp_substring_before_colon(buffer,before_colon,sizeof(before_colon));
806 /* Intel/AMD use "vendor_id", IBM "vendor"(?) or "model". Fujitsu "manufacture". Add others if you have them! */
807 if( !strcmp(before_colon,"vendor_id")
808 || !strcmp(before_colon,"vendor")
809 || !strcmp(before_colon,"manufacture")
810 || !strcmp(before_colon,"model"))
812 chomp_substring_after_colon(buffer,after_colon,sizeof(after_colon));
813 for(i=GMX_CPUID_VENDOR_UNKNOWN; i<GMX_CPUID_NVENDORS; i++)
815 /* Be liberal and accept if we find the vendor
816 * string (or alternative string) anywhere. Using
817 * strcasestr() would be non-portable. */
818 if(strstr(after_colon,gmx_cpuid_vendor_string[i])
819 || strstr(after_colon,gmx_cpuid_vendor_string_alternative[i]))
836 gmx_cpuid_topology(gmx_cpuid_t cpuid,
839 int * ncores_per_package,
840 int * nhwthreads_per_core,
841 const int ** package_id,
842 const int ** core_id,
843 const int ** hwthread_id,
844 const int ** locality_order)
848 if (cpuid->have_cpu_topology)
850 *nprocessors = cpuid->nproc;
851 *npackages = cpuid->npackages;
852 *ncores_per_package = cpuid->ncores_per_package;
853 *nhwthreads_per_core = cpuid->nhwthreads_per_core;
854 *package_id = cpuid->package_id;
855 *core_id = cpuid->core_id;
856 *hwthread_id = cpuid->hwthread_id;
857 *locality_order = cpuid->locality_order;
868 enum gmx_cpuid_x86_smt
869 gmx_cpuid_x86_smt(gmx_cpuid_t cpuid)
871 enum gmx_cpuid_x86_smt rc;
873 if (cpuid->have_cpu_topology)
875 rc = (cpuid->nhwthreads_per_core > 1) ? GMX_CPUID_X86_SMT_ENABLED : GMX_CPUID_X86_SMT_DISABLED;
877 else if (cpuid->vendor == GMX_CPUID_VENDOR_AMD || gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_HTT) == 0)
879 rc = GMX_CPUID_X86_SMT_DISABLED;
883 rc = GMX_CPUID_X86_SMT_CANNOTDETECT;
890 gmx_cpuid_init (gmx_cpuid_t * pcpuid)
895 char buffer[255],buffer2[255];
898 cpuid = malloc(sizeof(*cpuid));
902 for (i = 0; i < GMX_CPUID_NFEATURES; i++)
904 cpuid->feature[i] = 0;
907 cpuid->have_cpu_topology = 0;
909 cpuid->npackages = 0;
910 cpuid->ncores_per_package = 0;
911 cpuid->nhwthreads_per_core = 0;
912 cpuid->package_id = NULL;
913 cpuid->core_id = NULL;
914 cpuid->hwthread_id = NULL;
915 cpuid->locality_order = NULL;
917 cpuid->vendor = cpuid_check_vendor();
919 switch (cpuid->vendor)
922 case GMX_CPUID_VENDOR_INTEL:
923 cpuid_check_intel_x86(cpuid);
925 case GMX_CPUID_VENDOR_AMD:
926 cpuid_check_amd_x86(cpuid);
931 strncpy(cpuid->brand,"Unknown CPU brand",GMX_CPUID_BRAND_MAXLEN);
932 #if defined(__linux__) || defined(__linux)
933 /* General Linux. Try to get CPU type from /proc/cpuinfo */
934 if( (fp = fopen("/proc/cpuinfo","r")) != NULL)
937 while( (found_brand==0) && (fgets(buffer,sizeof(buffer),fp) !=NULL))
939 chomp_substring_before_colon(buffer,buffer2,sizeof(buffer2));
940 /* Intel uses "model name", Fujitsu and IBM "cpu". */
941 if( !strcmp(buffer2,"model name") || !strcmp(buffer2,"cpu"))
943 chomp_substring_after_colon(buffer,cpuid->brand,GMX_CPUID_BRAND_MAXLEN);
954 for(i=0; i<GMX_CPUID_NFEATURES; i++)
958 cpuid->feature[GMX_CPUID_FEATURE_CANNOTDETECT] = 1;
967 gmx_cpuid_done (gmx_cpuid_t cpuid)
974 gmx_cpuid_formatstring (gmx_cpuid_t cpuid,
980 enum gmx_cpuid_feature feature;
986 "Family: %2d Model: %2d Stepping: %2d\n"
988 gmx_cpuid_vendor_string[gmx_cpuid_vendor(cpuid)],
989 gmx_cpuid_brand(cpuid),
990 gmx_cpuid_family(cpuid), gmx_cpuid_model(cpuid), gmx_cpuid_stepping(cpuid));
995 "Family: %2d Model: %2d Stepping: %2d\n"
997 gmx_cpuid_vendor_string[gmx_cpuid_vendor(cpuid)],
998 gmx_cpuid_brand(cpuid),
999 gmx_cpuid_family(cpuid), gmx_cpuid_model(cpuid), gmx_cpuid_stepping(cpuid));
1007 for (feature = GMX_CPUID_FEATURE_CANNOTDETECT; feature < GMX_CPUID_NFEATURES; feature++)
1009 if (gmx_cpuid_feature(cpuid, feature) == 1)
1012 _snprintf(str, n, " %s", gmx_cpuid_feature_string[feature]);
1014 snprintf(str, n, " %s", gmx_cpuid_feature_string[feature]);
1023 _snprintf(str, n, "\n");
1025 snprintf(str, n, "\n");
1034 enum gmx_cpuid_acceleration
1035 gmx_cpuid_acceleration_suggest (gmx_cpuid_t cpuid)
1037 enum gmx_cpuid_acceleration tmpacc;
1039 tmpacc = GMX_CPUID_ACCELERATION_NONE;
1041 if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_INTEL)
1043 if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX))
1045 tmpacc = GMX_CPUID_ACCELERATION_X86_AVX_256;
1047 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE4_1))
1049 tmpacc = GMX_CPUID_ACCELERATION_X86_SSE4_1;
1051 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE2))
1053 tmpacc = GMX_CPUID_ACCELERATION_X86_SSE2;
1056 else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_AMD)
1058 if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX))
1060 tmpacc = GMX_CPUID_ACCELERATION_X86_AVX_128_FMA;
1062 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE4_1))
1064 tmpacc = GMX_CPUID_ACCELERATION_X86_SSE4_1;
1066 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE2))
1068 tmpacc = GMX_CPUID_ACCELERATION_X86_SSE2;
1071 else if(gmx_cpuid_vendor(cpuid)==GMX_CPUID_VENDOR_FUJITSU)
1073 if(strstr(gmx_cpuid_brand(cpuid),"SPARC64"))
1075 tmpacc = GMX_CPUID_ACCELERATION_SPARC64_HPC_ACE;
1078 else if(gmx_cpuid_vendor(cpuid)==GMX_CPUID_VENDOR_IBM)
1080 if(strstr(gmx_cpuid_brand(cpuid),"A2"))
1082 tmpacc = GMX_CPUID_ACCELERATION_IBM_QPX;
1091 gmx_cpuid_acceleration_check(gmx_cpuid_t cpuid,
1093 int print_to_stderr)
1097 enum gmx_cpuid_acceleration acc;
1099 acc = gmx_cpuid_acceleration_suggest(cpuid);
1101 rc = (acc != compiled_acc);
1103 gmx_cpuid_formatstring(cpuid, str, 1023);
1109 "\nDetecting CPU-specific acceleration.\nPresent hardware specification:\n"
1111 "Acceleration most likely to fit this hardware: %s\n"
1112 "Acceleration selected at GROMACS compile time: %s\n\n",
1114 gmx_cpuid_acceleration_string[acc],
1115 gmx_cpuid_acceleration_string[compiled_acc]);
1122 fprintf(log, "\nBinary not matching hardware - you might be losing performance.\n"
1123 "Acceleration most likely to fit this hardware: %s\n"
1124 "Acceleration selected at GROMACS compile time: %s\n\n",
1125 gmx_cpuid_acceleration_string[acc],
1126 gmx_cpuid_acceleration_string[compiled_acc]);
1128 if (print_to_stderr)
1130 fprintf(stderr, "Compiled acceleration: %s (Gromacs could use %s on this machine, which is better)\n",
1131 gmx_cpuid_acceleration_string[compiled_acc],
1132 gmx_cpuid_acceleration_string[acc]);
1139 #ifdef GMX_CPUID_STANDALONE
1140 /* Stand-alone program to enable queries of CPU features from Cmake.
1141 * Note that you need to check inline ASM capabilities before compiling and set
1142 * -DGMX_X86_GCC_INLINE_ASM for the cpuid instruction to work...
1145 main(int argc, char **argv)
1148 enum gmx_cpuid_acceleration acc;
1154 "Usage:\n\n%s [flags]\n\n"
1155 "Available flags:\n"
1156 "-vendor Print CPU vendor.\n"
1157 "-brand Print CPU brand string.\n"
1158 "-family Print CPU family version.\n"
1159 "-model Print CPU model version.\n"
1160 "-stepping Print CPU stepping version.\n"
1161 "-features Print CPU feature flags.\n"
1162 "-acceleration Print suggested GROMACS acceleration.\n",
1167 gmx_cpuid_init(&cpuid);
1169 if (!strncmp(argv[1], "-vendor", 3))
1171 printf("%s\n", gmx_cpuid_vendor_string[cpuid->vendor]);
1173 else if (!strncmp(argv[1], "-brand", 3))
1175 printf("%s\n", cpuid->brand);
1177 else if (!strncmp(argv[1], "-family", 3))
1179 printf("%d\n", cpuid->family);
1181 else if (!strncmp(argv[1], "-model", 3))
1183 printf("%d\n", cpuid->model);
1185 else if (!strncmp(argv[1], "-stepping", 3))
1187 printf("%d\n", cpuid->stepping);
1189 else if (!strncmp(argv[1], "-features", 3))
1192 for (i = 0; i < GMX_CPUID_NFEATURES; i++)
1194 if (cpuid->feature[i] == 1)
1200 printf("%s", gmx_cpuid_feature_string[i]);
1205 else if (!strncmp(argv[1], "-acceleration", 3))
1207 acc = gmx_cpuid_acceleration_suggest(cpuid);
1208 fprintf(stdout, "%s\n", gmx_cpuid_acceleration_string[acc]);
1211 gmx_cpuid_done(cpuid);