Sort all includes in src/gromacs
[alexxy/gromacs.git] / src / gromacs / gmxlib / gmx_cpuid.c
index c97a48a95cd49afcccc31aa513e403912ce57d33..18109192d9cd1410f7b4119f83aa4e6c1bd2089a 100644 (file)
@@ -1,57 +1,77 @@
-/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+/*
+ * This file is part of the GROMACS molecular simulation package.
  *
+ * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
  *
- * This file is part of GROMACS.
- * Copyright (c) 2012-
- *
- * Written by the Gromacs development team under coordination of
- * David van der Spoel, Berk Hess, and Erik Lindahl.
- *
- * This library is free software; you can redistribute it and/or
+ * GROMACS is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2
+ * as published by the Free Software Foundation; either version 2.1
  * of the License, or (at your option) any later version.
  *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  *
- * And Hey:
- * Gnomes, ROck Monsters And Chili Sauce
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
  */
+#include "gmxpre.h"
+
 #ifdef HAVE_CONFIG_H
-#include <config.h>
+#include "config.h"
 #endif
 
 #ifdef HAVE_SCHED_H
-#define _GNU_SOURCE
-#include <sched.h>
+#  ifndef _GNU_SOURCE
+#    define _GNU_SOURCE 1
+#  endif
+#  include <sched.h>
 #endif
 
+#include <ctype.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <ctype.h>
-#ifdef _MSC_VER
+#ifdef GMX_NATIVE_WINDOWS
 /* MSVC definition for __cpuid() */
-#include <intrin.h>
+    #ifdef _MSC_VER
+        #include <intrin.h>
+    #endif
 /* sysinfo functions */
-#include <windows.h>
+    #include <windows.h>
 #endif
 #ifdef HAVE_UNISTD_H
 /* sysconf() definition */
-#include <unistd.h>
+    #include <unistd.h>
 #endif
 
-#include "gmx_cpuid.h"
+#include "gromacs/legacyheaders/gmx_cpuid.h"
 
 
 
 /* For convenience, and to enable configure-time invocation, we keep all architectures
  * in a single file, but to avoid repeated ifdefs we set the overall architecture here.
  */
-#if defined (__i386__) || defined (__x86_64__) || defined (_M_IX86) || defined (_M_X64)
+#ifdef GMX_TARGET_X86
 /* OK, it is x86, but can we execute cpuid? */
-#if defined(GMX_X86_GCC_INLINE_ASM) || ( defined(_MSC_VER) && ( (_MSC_VER > 1500) || (_MSC_VER==1500 & _MSC_FULL_VER >= 150030729)))
+#if defined(GMX_X86_GCC_INLINE_ASM) || ( defined(_MSC_VER) && ( (_MSC_VER > 1500) || (_MSC_VER == 1500 & _MSC_FULL_VER >= 150030729)))
 #    define GMX_CPUID_X86
 #endif
 #endif
@@ -65,7 +85,8 @@ gmx_cpuid_vendor_string[GMX_CPUID_NVENDORS] =
     "GenuineIntel",
     "AuthenticAMD",
     "Fujitsu",
-    "IBM"
+    "IBM",
+    "ARM"
 };
 
 const char *
@@ -76,7 +97,8 @@ gmx_cpuid_vendor_string_alternative[GMX_CPUID_NVENDORS] =
     "GenuineIntel",
     "AuthenticAMD",
     "Fujitsu",
-    "ibm" /* Used on BlueGene/Q */
+    "ibm", /* Used on BlueGene/Q */
+    "AArch64"
 };
 
 const char *
@@ -116,31 +138,37 @@ gmx_cpuid_feature_string[GMX_CPUID_NFEATURES] =
     "ssse3",
     "tdt",
     "x2apic",
-    "xop"
+    "xop",
+    "arm_neon",
+    "arm_neon_asimd"
 };
 
 const char *
-gmx_cpuid_acceleration_string[GMX_CPUID_NACCELERATIONS] =
+gmx_cpuid_simd_string[GMX_CPUID_NSIMD] =
 {
     "CannotDetect",
     "None",
+    "Reference",
     "SSE2",
     "SSE4.1",
     "AVX_128_FMA",
     "AVX_256",
+    "AVX2_256",
     "Sparc64 HPC-ACE",
-    "IBM_QPX"
+    "IBM_QPX",
+    "ARM_NEON",
+    "ARM_NEON_ASIMD"
 };
 
 /* Max length of brand string */
-#define GMX_CPUID_BRAND_MAXLEN 256
+#define GMX_CPUID_STRLEN 256
 
 
 /* Contents of the abstract datatype */
 struct gmx_cpuid
 {
     enum gmx_cpuid_vendor      vendor;
-    char                       brand[GMX_CPUID_BRAND_MAXLEN];
+    char                       brand[GMX_CPUID_STRLEN];
     int                        family;
     int                        model;
     int                        stepping;
@@ -207,38 +235,29 @@ gmx_cpuid_feature           (gmx_cpuid_t                cpuid,
 
 
 
-/* What type of acceleration was compiled in, if any?
- * This is set from Cmake. Note that the SSE2 and SSE4_1 macros are set for
- * AVX too, so it is important that they appear last in the list.
- */
-#ifdef GMX_X86_AVX_256
-static const
-enum gmx_cpuid_acceleration
-    compiled_acc = GMX_CPUID_ACCELERATION_X86_AVX_256;
-#elif defined GMX_X86_AVX_128_FMA
-static const
-enum gmx_cpuid_acceleration
-    compiled_acc = GMX_CPUID_ACCELERATION_X86_AVX_128_FMA;
-#elif defined GMX_X86_SSE4_1
-static const
-enum gmx_cpuid_acceleration
-    compiled_acc = GMX_CPUID_ACCELERATION_X86_SSE4_1;
-#elif defined GMX_X86_SSE2
-static const
-enum gmx_cpuid_acceleration
-    compiled_acc = GMX_CPUID_ACCELERATION_X86_SSE2;
-#elif defined GMX_CPU_ACCELERATION_SPARC64_HPC_ACE
-static const
-enum gmx_cpuid_acceleration
-    compiled_acc = GMX_CPUID_ACCELERATION_SPARC64_HPC_ACE;
-#elif defined GMX_CPU_ACCELERATION_IBM_QPX
-static const
-enum gmx_cpuid_acceleration
-    compiled_acc = GMX_CPUID_ACCELERATION_IBM_QPX;
+/* What type of SIMD was compiled in, if any? */
+#ifdef GMX_SIMD_X86_AVX2_256
+static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX2_256;
+#elif defined GMX_SIMD_X86_AVX_256
+static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_256;
+#elif defined GMX_SIMD_X86_AVX_128_FMA
+static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_128_FMA;
+#elif defined GMX_SIMD_X86_SSE4_1
+static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_SSE4_1;
+#elif defined GMX_SIMD_X86_SSE2
+static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_SSE2;
+#elif defined GMX_SIMD_ARM_NEON
+static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_ARM_NEON;
+#elif defined GMX_SIMD_ARM_NEON_ASIMD
+static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_ARM_NEON_ASIMD;
+#elif defined GMX_SIMD_SPARC64_HPC_ACE
+static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_SPARC64_HPC_ACE;
+#elif defined GMX_SIMD_IBM_QPX
+static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_QPX;
+#elif defined GMX_SIMD_REFERENCE
+static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_REFERENCE;
 #else
-static const
-enum gmx_cpuid_acceleration
-    compiled_acc = GMX_CPUID_ACCELERATION_NONE;
+static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_NONE;
 #endif
 
 
@@ -306,7 +325,7 @@ execute_x86cpuid(unsigned int   level,
     /* Death and horror!
      * Apparently this is an x86 platform where we don't know how to call cpuid.
      *
-     * This is REALLY bad, since we will lose all Gromacs acceleration.
+     * This is REALLY bad, since we will lose all Gromacs SIMD support.
      */
     *eax = 0;
     *ebx = 0;
@@ -327,7 +346,7 @@ cpuid_check_common_x86(gmx_cpuid_t                cpuid)
 {
     int                       fn, max_stdfn, max_extfn;
     unsigned int              eax, ebx, ecx, edx;
-    char                      str[GMX_CPUID_BRAND_MAXLEN];
+    char                      str[GMX_CPUID_STRLEN];
     char *                    p;
 
     /* Find largest standard/extended function input value */
@@ -357,11 +376,11 @@ cpuid_check_common_x86(gmx_cpuid_t                cpuid)
         {
             p++;
         }
-        strncpy(cpuid->brand, p, GMX_CPUID_BRAND_MAXLEN);
+        strncpy(cpuid->brand, p, GMX_CPUID_STRLEN);
     }
     else
     {
-        strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_BRAND_MAXLEN);
+        strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_STRLEN);
     }
 
     /* Find basic CPU properties */
@@ -462,6 +481,7 @@ cpuid_renumber_elements(int *data, int n)
             }
         }
     }
+    free(unique);
     return nunique;
 }
 
@@ -502,7 +522,7 @@ cpuid_x86_decode_apic_id(gmx_cpuid_t cpuid, int *apic_id, int core_bits, int hwt
 
     /* now check for consistency */
     if ( (cpuid->npackages * cpuid->ncores_per_package *
-          cpuid->nhwthreads_per_core) != cpuid->nproc )
+          cpuid->nhwthreads_per_core) != cpuid->nproc)
     {
         /* the packages/cores-per-package/hwthreads-per-core counts are
            inconsistent. */
@@ -552,7 +572,7 @@ cpuid_check_amd_x86(gmx_cpuid_t                cpuid)
     /* Query APIC information on AMD */
     if (max_extfn >= 0x80000008)
     {
-#if (defined HAVE_SCHED_H && defined HAVE_SCHED_SETAFFINITY && defined HAVE_SYSCONF && defined __linux__)
+#if (defined HAVE_SCHED_AFFINITY && defined HAVE_SYSCONF && defined __linux__)
         /* Linux */
         unsigned int   i;
         cpu_set_t      cpuset, save_cpuset;
@@ -607,7 +627,7 @@ cpuid_check_amd_x86(gmx_cpuid_t                cpuid)
                 ;
             }
         }
-        ret = cpuid_x86_decode_apic_id(cpuid, apic_id, core_bits, 
+        ret = cpuid_x86_decode_apic_id(cpuid, apic_id, core_bits,
                                        hwthread_bits);
         cpuid->have_cpu_topology = (ret == 0);
 #endif
@@ -666,7 +686,7 @@ cpuid_check_intel_x86(gmx_cpuid_t                cpuid)
     if (max_stdfn >= 0xB)
     {
         /* Query x2 APIC information from cores */
-#if (defined HAVE_SCHED_H && defined HAVE_SCHED_SETAFFINITY && defined HAVE_SYSCONF && defined __linux__)
+#if (defined HAVE_SCHED_AFFINITY && defined HAVE_SYSCONF && defined __linux__)
         /* Linux */
         unsigned int   i;
         cpu_set_t      cpuset, save_cpuset;
@@ -711,8 +731,8 @@ cpuid_check_intel_x86(gmx_cpuid_t                cpuid)
         hwthread_bits    = eax & 0x1F;
         execute_x86cpuid(0xB, 1, &eax, &ebx, &ecx, &edx);
         core_bits        = (eax & 0x1F) - hwthread_bits;
-        ret = cpuid_x86_decode_apic_id(cpuid, apic_id, core_bits, 
-                                       hwthread_bits);
+        ret              = cpuid_x86_decode_apic_id(cpuid, apic_id, core_bits,
+                                                    hwthread_bits);
         cpuid->have_cpu_topology = (ret == 0);
 #endif
     }
@@ -722,24 +742,23 @@ cpuid_check_intel_x86(gmx_cpuid_t                cpuid)
 
 
 
-
 static void
 chomp_substring_before_colon(const char *in, char *s, int maxlength)
 {
     char *p;
-    strncpy(s,in,maxlength);
-    p = strchr(s,':');
-    if(p!=NULL)
+    strncpy(s, in, maxlength);
+    p = strchr(s, ':');
+    if (p != NULL)
     {
-        *p='\0';
-        while(isspace(*(--p)) && (p>=s))
+        *p = '\0';
+        while (isspace(*(--p)) && (p >= s))
         {
-            *p='\0';
+            *p = '\0';
         }
     }
     else
     {
-        *s='\0';
+        *s = '\0';
     }
 }
 
@@ -747,23 +766,86 @@ static void
 chomp_substring_after_colon(const char *in, char *s, int maxlength)
 {
     char *p;
-    if( (p = strchr(in,':'))!=NULL)
+    if ( (p = strchr(in, ':')) != NULL)
     {
         p++;
-        while(isspace(*p)) p++;
-        strncpy(s,p,maxlength);
+        while (isspace(*p))
+        {
+            p++;
+        }
+        strncpy(s, p, maxlength);
         p = s+strlen(s);
-        while(isspace(*(--p)) && (p>=s))
+        while (isspace(*(--p)) && (p >= s))
         {
-            *p='\0';
+            *p = '\0';
         }
     }
     else
     {
-        *s='\0';
+        *s = '\0';
     }
 }
 
+static int
+cpuid_check_arm(gmx_cpuid_t                cpuid)
+{
+#if defined(__linux__) || defined(__linux)
+    FILE *fp;
+    char  buffer[GMX_CPUID_STRLEN], buffer2[GMX_CPUID_STRLEN], buffer3[GMX_CPUID_STRLEN];
+
+    if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
+    {
+        while ( (fgets(buffer, sizeof(buffer), fp) != NULL))
+        {
+            chomp_substring_before_colon(buffer, buffer2, GMX_CPUID_STRLEN);
+            chomp_substring_after_colon(buffer, buffer3, GMX_CPUID_STRLEN);
+
+            if (!strcmp(buffer2, "Processor"))
+            {
+                strncpy(cpuid->brand, buffer3, GMX_CPUID_STRLEN);
+            }
+            else if (!strcmp(buffer2, "CPU architecture"))
+            {
+                cpuid->family = strtol(buffer3, NULL, 10);
+                if (!strcmp(buffer3, "AArch64"))
+                {
+                    cpuid->family = 8;
+                }
+            }
+            else if (!strcmp(buffer2, "CPU part"))
+            {
+                cpuid->model = strtol(buffer3, NULL, 16);
+            }
+            else if (!strcmp(buffer2, "CPU revision"))
+            {
+                cpuid->stepping = strtol(buffer3, NULL, 10);
+            }
+            else if (!strcmp(buffer2, "Features") && strstr(buffer3, "neon"))
+            {
+                cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON] = 1;
+            }
+            else if (!strcmp(buffer2, "Features") && strstr(buffer3, "asimd"))
+            {
+                cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON_ASIMD] = 1;
+            }
+        }
+    }
+    fclose(fp);
+#else
+#    ifdef __aarch64__
+    /* Strange 64-bit non-linux platform. However, since NEON ASIMD is present on all
+     * implementations of AArch64 this far, we assume it is present for now.
+     */
+    cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON_ASIMD] = 1;
+#    else
+    /* Strange 32-bit non-linux platform. We cannot assume that neon is present. */
+    cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON] = 0;
+#    endif
+#endif
+    return 0;
+}
+
+
 /* Try to find the vendor of the current CPU, so we know what specific
  * detection routine to call.
  */
@@ -775,7 +857,9 @@ cpuid_check_vendor(void)
     unsigned int               eax, ebx, ecx, edx;
     char                       vendorstring[13];
     FILE *                     fp;
-    char                       buffer[255],before_colon[255], after_colon[255];
+    char                       buffer[GMX_CPUID_STRLEN];
+    char                       before_colon[GMX_CPUID_STRLEN];
+    char                       after_colon[GMX_CPUID_STRLEN];
 
     /* Set default first */
     vendor = GMX_CPUID_VENDOR_UNKNOWN;
@@ -798,25 +882,29 @@ cpuid_check_vendor(void)
     }
 #elif defined(__linux__) || defined(__linux)
     /* General Linux. Try to get CPU vendor from /proc/cpuinfo */
-    if( (fp = fopen("/proc/cpuinfo","r")) != NULL)
+    if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
     {
-        while( (vendor == GMX_CPUID_VENDOR_UNKNOWN) && (fgets(buffer,sizeof(buffer),fp) != NULL))
+        while ( (vendor == GMX_CPUID_VENDOR_UNKNOWN) && (fgets(buffer, sizeof(buffer), fp) != NULL))
         {
-            chomp_substring_before_colon(buffer,before_colon,sizeof(before_colon));
-            /* Intel/AMD use "vendor_id", IBM "vendor"(?) or "model". Fujitsu "manufacture". Add others if you have them! */
-            if( !strcmp(before_colon,"vendor_id")
-                || !strcmp(before_colon,"vendor")
-                || !strcmp(before_colon,"manufacture")
-                || !strcmp(before_colon,"model"))
+            chomp_substring_before_colon(buffer, before_colon, sizeof(before_colon));
+            /* Intel/AMD use "vendor_id", IBM "vendor"(?) or "model". Fujitsu "manufacture".
+             * On ARM there does not seem to be a vendor, but ARM or AArch64 is listed in the Processor string.
+             * Add others if you have them!
+             */
+            if (!strcmp(before_colon, "vendor_id")
+                || !strcmp(before_colon, "vendor")
+                || !strcmp(before_colon, "manufacture")
+                || !strcmp(before_colon, "model")
+                || !strcmp(before_colon, "Processor"))
             {
-                chomp_substring_after_colon(buffer,after_colon,sizeof(after_colon));
-                for(i=GMX_CPUID_VENDOR_UNKNOWN; i<GMX_CPUID_NVENDORS; i++)
+                chomp_substring_after_colon(buffer, after_colon, sizeof(after_colon));
+                for (i = GMX_CPUID_VENDOR_UNKNOWN; i < GMX_CPUID_NVENDORS; i++)
                 {
                     /* Be liberal and accept if we find the vendor
                      * string (or alternative string) anywhere. Using
                      * strcasestr() would be non-portable. */
-                    if(strstr(after_colon,gmx_cpuid_vendor_string[i])
-                       || strstr(after_colon,gmx_cpuid_vendor_string_alternative[i]))
+                    if (strstr(after_colon, gmx_cpuid_vendor_string[i])
+                        || strstr(after_colon, gmx_cpuid_vendor_string_alternative[i]))
                     {
                         vendor = i;
                     }
@@ -825,8 +913,12 @@ cpuid_check_vendor(void)
         }
     }
     fclose(fp);
+#elif defined(__arm__) || defined (__arm) || defined(__aarch64__)
+    /* If we are using ARM on something that is not linux we have to trust the compiler,
+     * and we cannot get the extra info that might be present in /proc/cpuinfo.
+     */
+    vendor = GMX_CPUID_VENDOR_ARM;
 #endif
-
     return vendor;
 }
 
@@ -892,7 +984,7 @@ gmx_cpuid_init               (gmx_cpuid_t *              pcpuid)
     gmx_cpuid_t cpuid;
     int         i;
     FILE *      fp;
-    char        buffer[255],buffer2[255];
+    char        buffer[GMX_CPUID_STRLEN], buffer2[GMX_CPUID_STRLEN];
     int         found_brand;
 
     cpuid = malloc(sizeof(*cpuid));
@@ -926,21 +1018,24 @@ gmx_cpuid_init               (gmx_cpuid_t *              pcpuid)
             cpuid_check_amd_x86(cpuid);
             break;
 #endif
+        case GMX_CPUID_VENDOR_ARM:
+            cpuid_check_arm(cpuid);
+            break;
         default:
             /* Default value */
-            strncpy(cpuid->brand,"Unknown CPU brand",GMX_CPUID_BRAND_MAXLEN);
+            strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_STRLEN);
 #if defined(__linux__) || defined(__linux)
             /* General Linux. Try to get CPU type from /proc/cpuinfo */
-            if( (fp = fopen("/proc/cpuinfo","r")) != NULL)
+            if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
             {
                 found_brand = 0;
-                while( (found_brand==0) && (fgets(buffer,sizeof(buffer),fp) !=NULL))
+                while ( (found_brand == 0) && (fgets(buffer, sizeof(buffer), fp) != NULL))
                 {
-                    chomp_substring_before_colon(buffer,buffer2,sizeof(buffer2));
+                    chomp_substring_before_colon(buffer, buffer2, sizeof(buffer2));
                     /* Intel uses "model name", Fujitsu and IBM "cpu". */
-                    if( !strcmp(buffer2,"model name") || !strcmp(buffer2,"cpu"))
+                    if (!strcmp(buffer2, "model name") || !strcmp(buffer2, "cpu"))
                     {
-                        chomp_substring_after_colon(buffer,cpuid->brand,GMX_CPUID_BRAND_MAXLEN);
+                        chomp_substring_after_colon(buffer, cpuid->brand, GMX_CPUID_STRLEN);
                         found_brand = 1;
                     }
                 }
@@ -950,10 +1045,10 @@ gmx_cpuid_init               (gmx_cpuid_t *              pcpuid)
             cpuid->family         = 0;
             cpuid->model          = 0;
             cpuid->stepping       = 0;
-            
-            for(i=0; i<GMX_CPUID_NFEATURES; i++)
+
+            for (i = 0; i < GMX_CPUID_NFEATURES; i++)
             {
-                cpuid->feature[i]=0;
+                cpuid->feature[i] = 0;
             }
             cpuid->feature[GMX_CPUID_FEATURE_CANNOTDETECT] = 1;
             break;
@@ -1031,73 +1126,89 @@ gmx_cpuid_formatstring       (gmx_cpuid_t              cpuid,
 
 
 
-enum gmx_cpuid_acceleration
-gmx_cpuid_acceleration_suggest  (gmx_cpuid_t                 cpuid)
+enum gmx_cpuid_simd
+gmx_cpuid_simd_suggest  (gmx_cpuid_t                 cpuid)
 {
-    enum gmx_cpuid_acceleration  tmpacc;
+    enum gmx_cpuid_simd  tmpsimd;
 
-    tmpacc = GMX_CPUID_ACCELERATION_NONE;
+    tmpsimd = GMX_CPUID_SIMD_NONE;
 
     if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_INTEL)
     {
-        if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX))
+        if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX2))
+        {
+            tmpsimd = GMX_CPUID_SIMD_X86_AVX2_256;
+        }
+        else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX))
         {
-            tmpacc = GMX_CPUID_ACCELERATION_X86_AVX_256;
+            tmpsimd = GMX_CPUID_SIMD_X86_AVX_256;
         }
         else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE4_1))
         {
-            tmpacc = GMX_CPUID_ACCELERATION_X86_SSE4_1;
+            tmpsimd = GMX_CPUID_SIMD_X86_SSE4_1;
         }
         else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE2))
         {
-            tmpacc = GMX_CPUID_ACCELERATION_X86_SSE2;
+            tmpsimd = GMX_CPUID_SIMD_X86_SSE2;
         }
     }
     else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_AMD)
     {
         if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX))
         {
-            tmpacc = GMX_CPUID_ACCELERATION_X86_AVX_128_FMA;
+            tmpsimd = GMX_CPUID_SIMD_X86_AVX_128_FMA;
         }
         else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE4_1))
         {
-            tmpacc = GMX_CPUID_ACCELERATION_X86_SSE4_1;
+            tmpsimd = GMX_CPUID_SIMD_X86_SSE4_1;
         }
         else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE2))
         {
-            tmpacc = GMX_CPUID_ACCELERATION_X86_SSE2;
+            tmpsimd = GMX_CPUID_SIMD_X86_SSE2;
         }
     }
-    else if(gmx_cpuid_vendor(cpuid)==GMX_CPUID_VENDOR_FUJITSU)
+    else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_FUJITSU)
     {
-        if(strstr(gmx_cpuid_brand(cpuid),"SPARC64"))
+        if (strstr(gmx_cpuid_brand(cpuid), "SPARC64"))
         {
-            tmpacc = GMX_CPUID_ACCELERATION_SPARC64_HPC_ACE;
+            tmpsimd = GMX_CPUID_SIMD_SPARC64_HPC_ACE;
         }
     }
-    else if(gmx_cpuid_vendor(cpuid)==GMX_CPUID_VENDOR_IBM)
+    else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_IBM)
     {
-        if(strstr(gmx_cpuid_brand(cpuid),"A2"))
+        if (strstr(gmx_cpuid_brand(cpuid), "A2"))
         {
-            tmpacc = GMX_CPUID_ACCELERATION_IBM_QPX;
+            tmpsimd = GMX_CPUID_SIMD_IBM_QPX;
         }
     }
-    return tmpacc;
+    else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_ARM)
+    {
+        if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_ARM_NEON_ASIMD))
+        {
+            tmpsimd = GMX_CPUID_SIMD_ARM_NEON_ASIMD;
+        }
+        else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_ARM_NEON))
+        {
+            tmpsimd = GMX_CPUID_SIMD_ARM_NEON;
+        }
+    }
+    return tmpsimd;
 }
 
 
 
 int
-gmx_cpuid_acceleration_check(gmx_cpuid_t   cpuid,
-                             FILE *        log)
+gmx_cpuid_simd_check(gmx_cpuid_t   cpuid,
+                     FILE *        log,
+                     int           print_to_stderr)
 {
     int                           rc;
     char                          str[1024];
-    enum gmx_cpuid_acceleration   acc;
+    enum gmx_cpuid_simd           simd;
 
-    acc = gmx_cpuid_acceleration_suggest(cpuid);
+    simd = gmx_cpuid_simd_suggest(cpuid);
 
-    rc = (acc != compiled_acc);
+    rc = (simd != compiled_simd);
 
     gmx_cpuid_formatstring(cpuid, str, 1023);
     str[1023] = '\0';
@@ -1105,13 +1216,13 @@ gmx_cpuid_acceleration_check(gmx_cpuid_t   cpuid,
     if (log != NULL)
     {
         fprintf(log,
-                "\nDetecting CPU-specific acceleration.\nPresent hardware specification:\n"
+                "\nDetecting CPU SIMD instructions.\nPresent hardware specification:\n"
                 "%s"
-                "Acceleration most likely to fit this hardware: %s\n"
-                "Acceleration selected at GROMACS compile time: %s\n\n",
+                "SIMD instructions most likely to fit this hardware: %s\n"
+                "SIMD instructions selected at GROMACS compile time: %s\n\n",
                 str,
-                gmx_cpuid_acceleration_string[acc],
-                gmx_cpuid_acceleration_string[compiled_acc]);
+                gmx_cpuid_simd_string[simd],
+                gmx_cpuid_simd_string[compiled_simd]);
     }
 
     if (rc != 0)
@@ -1119,14 +1230,17 @@ gmx_cpuid_acceleration_check(gmx_cpuid_t   cpuid,
         if (log != NULL)
         {
             fprintf(log, "\nBinary not matching hardware - you might be losing performance.\n"
-                    "Acceleration most likely to fit this hardware: %s\n"
-                    "Acceleration selected at GROMACS compile time: %s\n\n",
-                    gmx_cpuid_acceleration_string[acc],
-                    gmx_cpuid_acceleration_string[compiled_acc]);
+                    "SIMD instructions most likely to fit this hardware: %s\n"
+                    "SIMD instructions selected at GROMACS compile time: %s\n\n",
+                    gmx_cpuid_simd_string[simd],
+                    gmx_cpuid_simd_string[compiled_simd]);
+        }
+        if (print_to_stderr)
+        {
+            fprintf(stderr, "Compiled SIMD instructions: %s (Gromacs could use %s on this machine, which is better)\n",
+                    gmx_cpuid_simd_string[compiled_simd],
+                    gmx_cpuid_simd_string[simd]);
         }
-        printf("Compiled acceleration: %s (Gromacs could use %s on this machine, which is better)\n",
-               gmx_cpuid_acceleration_string[compiled_acc],
-               gmx_cpuid_acceleration_string[acc]);
     }
     return rc;
 }
@@ -1141,7 +1255,7 @@ int
 main(int argc, char **argv)
 {
     gmx_cpuid_t                   cpuid;
-    enum gmx_cpuid_acceleration   acc;
+    enum gmx_cpuid_simd           simd;
     int                           i, cnt;
 
     if (argc < 2)
@@ -1155,7 +1269,7 @@ main(int argc, char **argv)
                 "-model         Print CPU model version.\n"
                 "-stepping      Print CPU stepping version.\n"
                 "-features      Print CPU feature flags.\n"
-                "-acceleration  Print suggested GROMACS acceleration.\n",
+                "-simd          Print suggested GROMACS SIMD instructions.\n",
                 argv[0]);
         exit(0);
     }
@@ -1198,10 +1312,10 @@ main(int argc, char **argv)
         }
         printf("\n");
     }
-    else if (!strncmp(argv[1], "-acceleration", 3))
+    else if (!strncmp(argv[1], "-simd", 3))
     {
-        acc = gmx_cpuid_acceleration_suggest(cpuid);
-        fprintf(stdout, "%s\n", gmx_cpuid_acceleration_string[acc]);
+        simd = gmx_cpuid_simd_suggest(cpuid);
+        fprintf(stdout, "%s\n", gmx_cpuid_simd_string[simd]);
     }
 
     gmx_cpuid_done(cpuid);