Merge branch 'release-5-0'

author Berk Hess <hess@kth.se>

Wed, 22 Apr 2015 08:57:49 +0000 (10:57 +0200)

committer Berk Hess <hess@kth.se>

Wed, 22 Apr 2015 08:57:49 +0000 (10:57 +0200)
author Berk Hess <hess@kth.se>
Wed, 22 Apr 2015 08:57:49 +0000 (10:57 +0200)
committer Berk Hess <hess@kth.se>
Wed, 22 Apr 2015 08:57:49 +0000 (10:57 +0200)
diff --combined src/gromacs/gmxlib/gmx_cpuid.c

index f91e2b42c45f726b07b855f44b71f7b892d94fb2,53923a1c9ce65ec1890e2bcaf642414de2789ac6..44091a79e292e98818f25de455f0cfb5dd20f22c
--- 1/src/gromacs/gmxlib/gmx_cpuid.c
--- 2/src/gromacs/gmxlib/gmx_cpuid.c
+++ b/src/gromacs/gmxlib/gmx_cpuid.c
@@@ -32,20 -32,21 +32,20 @@@
    * To help us fund GROMACS development, we humbly ask that you cite
    * the research papers on the package. Check out http://www.gromacs.org.
    */
- -#ifdef HAVE_CONFIG_H
- -#include <config.h>
- -#endif
+ +#include "gmxpre.h"
   
- -#ifdef HAVE_SCHED_H
- -#  ifndef _GNU_SOURCE
- -#    define _GNU_SOURCE 1
- -#  endif
- -#  include <sched.h>
+ +/*! \cond */
+ +#include "gromacs/legacyheaders/gmx_cpuid.h"
+ +
+ +#ifdef HAVE_CONFIG_H
+ +#include "config.h"
   #endif
   
+ +#include <ctype.h>
   #include <stdio.h>
   #include <stdlib.h>
   #include <string.h>
- -#include <ctype.h>
+ +
   #ifdef GMX_NATIVE_WINDOWS
   /* MSVC definition for __cpuid() */
       #ifdef _MSC_VER
@@@ -54,14 -55,14 +54,14 @@@
   /* sysinfo functions */
       #include <windows.h>
   #endif
+ +#ifdef HAVE_SCHED_H
+ +    #include <sched.h>
+ +#endif
   #ifdef HAVE_UNISTD_H
   /* sysconf() definition */
       #include <unistd.h>
   #endif
   
- -#include "gmx_cpuid.h"
- -
- -
   
   /* For convenience, and to enable configure-time invocation, we keep all architectures
    * in a single file, but to avoid repeated ifdefs we set the overall architecture here.
@@@ -82,8 -83,7 +82,8 @@@ gmx_cpuid_vendor_string[GMX_CPUID_NVEND
       "GenuineIntel",
       "AuthenticAMD",
       "Fujitsu",
- -    "IBM"
+ +    "IBM", /* Used on Power and BlueGene/Q */
+ +    "ARM"
   };
   
   const char *
@@@ -94,8 -94,7 +94,8 @@@ gmx_cpuid_vendor_string_alternative[GMX
       "GenuineIntel",
       "AuthenticAMD",
       "Fujitsu",
- -    "ibm" /* Used on BlueGene/Q */
+ +    "ibm", /* Used on Power and BlueGene/Q */
+ +    "AArch64"
   };
   
   const char *
@@@ -106,10 -105,6 +106,10 @@@ gmx_cpuid_feature_string[GMX_CPUID_NFEA
       "apic",
       "avx",
       "avx2",
+ +    "avx512f",
+ +    "avx512pf",
+ +    "avx512er",
+ +    "avx512cd",
       "clfsh",
       "cmov",
       "cx8",
@@@ -139,12 -134,7 +139,12 @@@
       "ssse3",
       "tdt",
       "x2apic",
- -    "xop"
+ +    "xop",
+ +    "arm_neon",
+ +    "arm_neon_asimd",
+ +    "QPX",
+ +    "VMX",
+ +    "VSX"
   };
   
   const char *
@@@ -158,25 -148,19 +158,25 @@@ gmx_cpuid_simd_string[GMX_CPUID_NSIMD] 
       "AVX_128_FMA",
       "AVX_256",
       "AVX2_256",
+ +    "AVX_512F",
+ +    "AVX_512ER",
       "Sparc64 HPC-ACE",
- -    "IBM_QPX"
+ +    "IBM_QPX",
+ +    "IBM_VMX",
+ +    "IBM_VSX",
+ +    "ARM_NEON",
+ +    "ARM_NEON_ASIMD"
   };
   
   /* Max length of brand string */
- -#define GMX_CPUID_BRAND_MAXLEN 256
+ +#define GMX_CPUID_STRLEN 256
   
   
   /* Contents of the abstract datatype */
   struct gmx_cpuid
   {
       enum gmx_cpuid_vendor      vendor;
- -    char                       brand[GMX_CPUID_BRAND_MAXLEN];
+ +    char                       brand[GMX_CPUID_STRLEN];
       int                        family;
       int                        model;
       int                        stepping;
@@@ -241,14 -225,22 +241,26 @@@ gmx_cpuid_feature           (gmx_cpuid_
   }
   
   
+ int
+ gmx_cpuid_is_intel_nehalem  (const gmx_cpuid_t          cpuid)
+ {
+     return (cpuid->vendor == GMX_CPUID_VENDOR_INTEL &&
+             cpuid->family == 6 &&
+             (cpuid->model == 0x2E ||
+              cpuid->model == 0x1A ||
+              cpuid->model == 0x1E ||
+              cpuid->model == 0x2F ||
+              cpuid->model == 0x2C ||
+              cpuid->model == 0x25));
+ }
   
   
   /* What type of SIMD was compiled in, if any? */
- -#ifdef GMX_SIMD_X86_AVX2_256
+ +#ifdef GMX_SIMD_X86_AVX_512ER
+ +static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_512ER;
+ +#elif defined GMX_SIMD_X86_AVX_512F
+ +static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_512F;
+ +#elif defined GMX_SIMD_X86_AVX2_256
   static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX2_256;
   #elif defined GMX_SIMD_X86_AVX_256
   static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_256;
@@@ -258,18 -250,10 +270,18 @@@ static const enum gmx_cpuid_simd compil
   static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_SSE4_1;
   #elif defined GMX_SIMD_X86_SSE2
   static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_SSE2;
+ +#elif defined GMX_SIMD_ARM_NEON
+ +static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_ARM_NEON;
+ +#elif defined GMX_SIMD_ARM_NEON_ASIMD
+ +static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_ARM_NEON_ASIMD;
   #elif defined GMX_SIMD_SPARC64_HPC_ACE
   static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_SPARC64_HPC_ACE;
   #elif defined GMX_SIMD_IBM_QPX
   static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_QPX;
+ +#elif defined GMX_SIMD_IBM_VMX
+ +static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_VMX;
+ +#elif defined GMX_SIMD_IBM_VSX
+ +static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_VSX;
   #elif defined GMX_SIMD_REFERENCE
   static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_REFERENCE;
   #else
@@@ -362,7 -346,7 +374,7 @@@ cpuid_check_common_x86(gmx_cpuid_
   {
       int                       fn, max_stdfn, max_extfn;
       unsigned int              eax, ebx, ecx, edx;
- -    char                      str[GMX_CPUID_BRAND_MAXLEN];
+ +    char                      str[GMX_CPUID_STRLEN];
       char *                    p;
   
       /* Find largest standard/extended function input value */
@@@ -392,11 -376,11 +404,11 @@@
           {
               p++;
           }
- -        strncpy(cpuid->brand, p, GMX_CPUID_BRAND_MAXLEN);
+ +        strncpy(cpuid->brand, p, GMX_CPUID_STRLEN);
       }
       else
       {
- -        strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_BRAND_MAXLEN);
+ +        strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_STRLEN);
       }
   
       /* Find basic CPU properties */
@@@ -497,7 -481,6 +509,7 @@@ cpuid_renumber_elements(int *data, int 
               }
           }
       }
+ +    free(unique);
       return nunique;
   }
   
@@@ -681,11 -664,7 +693,11 @@@ cpuid_check_intel_x86(gmx_cpuid_
       if (max_stdfn >= 7)
       {
           execute_x86cpuid(0x7, 0, &eax, &ebx, &ecx, &edx);
- -        cpuid->feature[GMX_CPUID_FEATURE_X86_AVX2]    = (ebx & (1 << 5))  != 0;
+ +        cpuid->feature[GMX_CPUID_FEATURE_X86_AVX2]      = (ebx & (1 << 5))  != 0;
+ +        cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512F]  = (ebx & (1 << 16)) != 0;
+ +        cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512PF] = (ebx & (1 << 26)) != 0;
+ +        cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512ER] = (ebx & (1 << 27)) != 0;
+ +        cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512CD] = (ebx & (1 << 28)) != 0;
       }
   
       /* Check whether Hyper-Threading is enabled, not only supported */
@@@ -762,6 -741,7 +774,6 @@@
   
   
   
- -
   static void
   chomp_substring_before_colon(const char *in, char *s, int maxlength)
   {
@@@ -806,119 -786,6 +818,119 @@@ chomp_substring_after_colon(const char 
       }
   }
   
+ +static int
+ +cpuid_check_arm(gmx_cpuid_t                cpuid)
+ +{
+ +#if defined(__linux__) || defined(__linux)
+ +    FILE *fp;
+ +    char  buffer[GMX_CPUID_STRLEN], buffer2[GMX_CPUID_STRLEN], buffer3[GMX_CPUID_STRLEN];
+ +
+ +    if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
+ +    {
+ +        while ( (fgets(buffer, sizeof(buffer), fp) != NULL))
+ +        {
+ +            chomp_substring_before_colon(buffer, buffer2, GMX_CPUID_STRLEN);
+ +            chomp_substring_after_colon(buffer, buffer3, GMX_CPUID_STRLEN);
+ +
+ +            if (!strcmp(buffer2, "Processor"))
+ +            {
+ +                strncpy(cpuid->brand, buffer3, GMX_CPUID_STRLEN);
+ +            }
+ +            else if (!strcmp(buffer2, "CPU architecture"))
+ +            {
+ +                cpuid->family = strtol(buffer3, NULL, 10);
+ +                if (!strcmp(buffer3, "AArch64"))
+ +                {
+ +                    cpuid->family = 8;
+ +                }
+ +            }
+ +            else if (!strcmp(buffer2, "CPU part"))
+ +            {
+ +                cpuid->model = strtol(buffer3, NULL, 16);
+ +            }
+ +            else if (!strcmp(buffer2, "CPU revision"))
+ +            {
+ +                cpuid->stepping = strtol(buffer3, NULL, 10);
+ +            }
+ +            else if (!strcmp(buffer2, "Features") && strstr(buffer3, "neon"))
+ +            {
+ +                cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON] = 1;
+ +            }
+ +            else if (!strcmp(buffer2, "Features") && strstr(buffer3, "asimd"))
+ +            {
+ +                cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON_ASIMD] = 1;
+ +            }
+ +        }
+ +    }
+ +    fclose(fp);
+ +#else
+ +#    ifdef __aarch64__
+ +    /* Strange 64-bit non-linux platform. However, since NEON ASIMD is present on all
+ +     * implementations of AArch64 this far, we assume it is present for now.
+ +     */
+ +    cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON_ASIMD] = 1;
+ +#    else
+ +    /* Strange 32-bit non-linux platform. We cannot assume that neon is present. */
+ +    cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON] = 0;
+ +#    endif
+ +#endif
+ +    return 0;
+ +}
+ +
+ +
+ +static int
+ +cpuid_check_ibm(gmx_cpuid_t                cpuid)
+ +{
+ +#if defined(__linux__) || defined(__linux)
+ +    FILE *fp;
+ +    char  buffer[GMX_CPUID_STRLEN], before_colon[GMX_CPUID_STRLEN], after_colon[GMX_CPUID_STRLEN];
+ +
+ +    if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
+ +    {
+ +        while ( (fgets(buffer, sizeof(buffer), fp) != NULL))
+ +        {
+ +            chomp_substring_before_colon(buffer, before_colon, GMX_CPUID_STRLEN);
+ +            chomp_substring_after_colon(buffer, after_colon, GMX_CPUID_STRLEN);
+ +
+ +            if (!strcmp(before_colon, "cpu") || !strcmp(before_colon, "Processor"))
+ +            {
+ +                strncpy(cpuid->brand, after_colon, GMX_CPUID_STRLEN);
+ +            }
+ +            if (!strcmp(before_colon, "model name") ||
+ +                !strcmp(before_colon, "model") ||
+ +                !strcmp(before_colon, "Processor") ||
+ +                !strcmp(before_colon, "cpu"))
+ +            {
+ +                if (strstr(after_colon, "altivec"))
+ +                {
+ +                    cpuid->feature[GMX_CPUID_FEATURE_IBM_VMX] = 1;
+ +
+ +                    if (!strstr(after_colon, "POWER6") && !strstr(after_colon, "Power6") &&
+ +                        !strstr(after_colon, "power6"))
+ +                    {
+ +                        cpuid->feature[GMX_CPUID_FEATURE_IBM_VSX] = 1;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }
+ +    fclose(fp);
+ +
+ +    if (strstr(cpuid->brand, "A2"))
+ +    {
+ +        /* BlueGene/Q */
+ +        cpuid->feature[GMX_CPUID_FEATURE_IBM_QPX] = 1;
+ +    }
+ +#else
+ +    strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_STRLEN);
+ +    cpuid->feature[GMX_CPUID_FEATURE_IBM_QPX] = 0;
+ +    cpuid->feature[GMX_CPUID_FEATURE_IBM_VMX] = 0;
+ +    cpuid->feature[GMX_CPUID_FEATURE_IBM_VSX] = 0;
+ +#endif
+ +    return 0;
+ +}
+ +
+ +
   /* Try to find the vendor of the current CPU, so we know what specific
    * detection routine to call.
    */
@@@ -930,9 -797,7 +942,9 @@@ cpuid_check_vendor(void
       unsigned int               eax, ebx, ecx, edx;
       char                       vendorstring[13];
       FILE *                     fp;
- -    char                       buffer[255], before_colon[255], after_colon[255];
+ +    char                       buffer[GMX_CPUID_STRLEN];
+ +    char                       before_colon[GMX_CPUID_STRLEN];
+ +    char                       after_colon[GMX_CPUID_STRLEN];
   
       /* Set default first */
       vendor = GMX_CPUID_VENDOR_UNKNOWN;
@@@ -960,16 -825,11 +972,16 @@@
           while ( (vendor == GMX_CPUID_VENDOR_UNKNOWN) && (fgets(buffer, sizeof(buffer), fp) != NULL))
           {
               chomp_substring_before_colon(buffer, before_colon, sizeof(before_colon));
- -            /* Intel/AMD use "vendor_id", IBM "vendor"(?) or "model". Fujitsu "manufacture". Add others if you have them! */
+ +            /* Intel/AMD use "vendor_id", IBM "vendor", "model", or "cpu". Fujitsu "manufacture".
+ +             * On ARM there does not seem to be a vendor, but ARM or AArch64 is listed in the Processor string.
+ +             * Add others if you have them!
+ +             */
               if (!strcmp(before_colon, "vendor_id")
                   || !strcmp(before_colon, "vendor")
                   || !strcmp(before_colon, "manufacture")
- -                || !strcmp(before_colon, "model"))
+ +                || !strcmp(before_colon, "model")
+ +                || !strcmp(before_colon, "Processor")
+ +                || !strcmp(before_colon, "cpu"))
               {
                   chomp_substring_after_colon(buffer, after_colon, sizeof(after_colon));
                   for (i = GMX_CPUID_VENDOR_UNKNOWN; i < GMX_CPUID_NVENDORS; i++)
@@@ -983,25 -843,12 +995,25 @@@
                           vendor = i;
                       }
                   }
+ +                /* If we did not find vendor yet, check if it is IBM:
+ +                 * On some Power/PowerPC systems it only says power, not IBM.
+ +                 */
+ +                if (vendor == GMX_CPUID_VENDOR_UNKNOWN &&
+ +                    ((strstr(after_colon, "POWER") || strstr(after_colon, "Power") ||
+ +                      strstr(after_colon, "power"))))
+ +                {
+ +                    vendor = GMX_CPUID_VENDOR_IBM;
+ +                }
               }
           }
       }
       fclose(fp);
+ +#elif defined(__arm__) || defined (__arm) || defined(__aarch64__)
+ +    /* If we are using ARM on something that is not linux we have to trust the compiler,
+ +     * and we cannot get the extra info that might be present in /proc/cpuinfo.
+ +     */
+ +    vendor = GMX_CPUID_VENDOR_ARM;
   #endif
- -
       return vendor;
   }
   
@@@ -1067,7 -914,7 +1079,7 @@@ gmx_cpuid_init               (gmx_cpuid
       gmx_cpuid_t cpuid;
       int         i;
       FILE *      fp;
- -    char        buffer[255], buffer2[255];
+ +    char        buffer[GMX_CPUID_STRLEN], buffer2[GMX_CPUID_STRLEN];
       int         found_brand;
   
       cpuid = malloc(sizeof(*cpuid));
@@@ -1101,15 -948,9 +1113,15 @@@
               cpuid_check_amd_x86(cpuid);
               break;
   #endif
+ +        case GMX_CPUID_VENDOR_ARM:
+ +            cpuid_check_arm(cpuid);
+ +            break;
+ +        case GMX_CPUID_VENDOR_IBM:
+ +            cpuid_check_ibm(cpuid);
+ +            break;
           default:
               /* Default value */
- -            strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_BRAND_MAXLEN);
+ +            strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_STRLEN);
   #if defined(__linux__) || defined(__linux)
               /* General Linux. Try to get CPU type from /proc/cpuinfo */
               if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
@@@ -1121,7 -962,7 +1133,7 @@@
                       /* Intel uses "model name", Fujitsu and IBM "cpu". */
                       if (!strcmp(buffer2, "model name") || !strcmp(buffer2, "cpu"))
                       {
- -                        chomp_substring_after_colon(buffer, cpuid->brand, GMX_CPUID_BRAND_MAXLEN);
+ +                        chomp_substring_after_colon(buffer, cpuid->brand, GMX_CPUID_STRLEN);
                           found_brand = 1;
                       }
                   }
@@@ -1221,10 -1062,6 +1233,10 @@@ gmx_cpuid_simd_suggest  (gmx_cpuid_
   
       if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_INTEL)
       {
+ +        /* TODO: Add check for AVX-512F & AVX-512ER here as soon as we
+ +         * have implemented verlet kernels for them. Until then,
+ +         * we should pick AVX2 instead for the automatic detection.
+ +         */
           if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX2))
           {
               tmpsimd = GMX_CPUID_SIMD_X86_AVX2_256;
@@@ -1266,30 -1103,10 +1278,30 @@@
       }
       else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_IBM)
       {
- -        if (strstr(gmx_cpuid_brand(cpuid), "A2"))
+ +        if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_IBM_QPX))
           {
               tmpsimd = GMX_CPUID_SIMD_IBM_QPX;
           }
+ +        else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_IBM_VSX))
+ +        {
+ +            /* VSX is better than VMX, so we check it first */
+ +            tmpsimd = GMX_CPUID_SIMD_IBM_VSX;
+ +        }
+ +        else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_IBM_VMX))
+ +        {
+ +            tmpsimd = GMX_CPUID_SIMD_IBM_VMX;
+ +        }
+ +    }
+ +    else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_ARM)
+ +    {
+ +        if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_ARM_NEON_ASIMD))
+ +        {
+ +            tmpsimd = GMX_CPUID_SIMD_ARM_NEON_ASIMD;
+ +        }
+ +        else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_ARM_NEON))
+ +        {
+ +            tmpsimd = GMX_CPUID_SIMD_ARM_NEON;
+ +        }
       }
       return tmpsimd;
   }
@@@ -1424,5 -1241,3 +1436,5 @@@ main(int argc, char **argv
   }
   
   #endif
+ +
+ +/*! \endcond */
diff --combined src/gromacs/gmxpreprocess/readir.c

index 4e69abd8b38dc91287c196c27101e7fbf7bbe468,4b53e5b1e1d47e1e2add3b095c1aacbc4aa394b5..2d86536b178e5ddbd4077a3c550f8c50bca99fb9
--- 1/src/gromacs/gmxpreprocess/readir.c
--- 2/src/gromacs/gmxpreprocess/readir.c
+++ b/src/gromacs/gmxpreprocess/readir.c
@@@ -34,34 -34,35 +34,34 @@@
    * To help us fund GROMACS development, we humbly ask that you cite
    * the research papers on the package. Check out http://www.gromacs.org.
    */
- -#ifdef HAVE_CONFIG_H
- -#include <config.h>
- -#endif
+ +#include "gmxpre.h"
+ +
+ +#include "readir.h"
   
   #include <ctype.h>
- -#include <stdlib.h>
   #include <limits.h>
- -#include "sysstuff.h"
- -#include "gromacs/utility/smalloc.h"
- -#include "typedefs.h"
- -#include "physics.h"
- -#include "names.h"
- -#include "gmx_fatal.h"
- -#include "macros.h"
- -#include "index.h"
- -#include "symtab.h"
+ +#include <stdlib.h>
+ +
+ +#include "gromacs/gmxpreprocess/toputil.h"
+ +#include "gromacs/legacyheaders/chargegroup.h"
+ +#include "gromacs/legacyheaders/inputrec.h"
+ +#include "gromacs/legacyheaders/macros.h"
+ +#include "gromacs/legacyheaders/names.h"
+ +#include "gromacs/legacyheaders/network.h"
+ +#include "gromacs/legacyheaders/readinp.h"
+ +#include "gromacs/legacyheaders/typedefs.h"
+ +#include "gromacs/legacyheaders/warninp.h"
+ +#include "gromacs/math/units.h"
+ +#include "gromacs/math/vec.h"
+ +#include "gromacs/mdlib/calc_verletbuf.h"
+ +#include "gromacs/pbcutil/pbc.h"
+ +#include "gromacs/topology/block.h"
+ +#include "gromacs/topology/index.h"
+ +#include "gromacs/topology/mtop_util.h"
+ +#include "gromacs/topology/symtab.h"
   #include "gromacs/utility/cstringutil.h"
- -#include "readinp.h"
- -#include "warninp.h"
- -#include "readir.h"
- -#include "toputil.h"
- -#include "index.h"
- -#include "network.h"
- -#include "vec.h"
- -#include "pbc.h"
- -#include "mtop_util.h"
- -#include "chargegroup.h"
- -#include "inputrec.h"
- -#include "calc_verletbuf.h"
+ +#include "gromacs/utility/fatalerror.h"
+ +#include "gromacs/utility/smalloc.h"
   
   #define MAXPTR 254
   #define NOGID  255
@@@ -159,7 -160,7 +159,7 @@@ static void GetSimTemps(int ntemps, t_s
           }
           else if (simtemp->eSimTempScale == esimtempEXPONENTIAL)
           {
- -            simtemp->temperatures[i] = simtemp->simtemp_low + (simtemp->simtemp_high-simtemp->simtemp_low)*((exp(temperature_lambdas[i])-1)/(exp(1.0)-1));
+ +            simtemp->temperatures[i] = simtemp->simtemp_low + (simtemp->simtemp_high-simtemp->simtemp_low)*(gmx_expm1(temperature_lambdas[i])/gmx_expm1(1.0));
           }
           else
           {
@@@ -269,8 -270,6 +269,8 @@@ void check_ir(const char *mdparin, t_in
       {
           warning_error(wi, "rlist should be >= 0");
       }
+ +    sprintf(err_buf, "nstlist can not be smaller than 0. (If you were trying to use the heuristic neighbour-list update scheme for efficient buffering for improved energy conservation, please use the Verlet cut-off scheme instead.)");
+ +    CHECK(ir->nstlist < 0);
   
       process_interaction_modifier(ir, &ir->coulomb_modifier);
       process_interaction_modifier(ir, &ir->vdw_modifier);
@@@ -307,9 -306,9 +307,9 @@@
           {
               warning_error(wi, "rlistlong can not be shorter than rlist");
           }
- -        if (IR_TWINRANGE(*ir) && ir->nstlist <= 0)
+ +        if (IR_TWINRANGE(*ir) && ir->nstlist == 0)
           {
- -            warning_error(wi, "Can not have nstlist<=0 with twin-range interactions");
+ +            warning_error(wi, "Can not have nstlist == 0 with twin-range interactions");
           }
       }
   
@@@ -881,6 -880,10 +881,6 @@@
                 (ir->ePBC     != epbcNONE) ||
                 (ir->rcoulomb != 0.0)      || (ir->rvdw != 0.0));
   
- -        if (ir->nstlist < 0)
- -        {
- -            warning_error(wi, "Can not have heuristic neighborlist updates without cut-off");
- -        }
           if (ir->nstlist > 0)
           {
               warning_note(wi, "Simulating without cut-offs can be (slightly) faster with nstlist=0, nstype=simple and only one MPI rank");
@@@ -1282,7 -1285,8 +1282,7 @@@ nd %s"
       if (ir->cutoff_scheme == ecutsGROUP)
       {
           if (((ir->coulomb_modifier != eintmodNONE && ir->rcoulomb == ir->rlist) ||
- -             (ir->vdw_modifier != eintmodNONE && ir->rvdw == ir->rlist)) &&
- -            ir->nstlist != 1)
+ +             (ir->vdw_modifier != eintmodNONE && ir->rvdw == ir->rlist)))
           {
               warning_note(wi, "With exact cut-offs, rlist should be "
                            "larger than rcoulomb and rvdw, so that there "
@@@ -1309,6 -1313,14 +1309,6 @@@
           warning_note(wi, "You have selected user tables with dispersion correction, the dispersion will be corrected to -C6/r^6 beyond rvdw_switch (the tabulated interaction between rvdw_switch and rvdw will not be double counted). Make sure that you really want dispersion correction to -C6/r^6.");
       }
   
- -    if (ir->nstlist == -1)
- -    {
- -        sprintf(err_buf, "With nstlist=-1 rvdw and rcoulomb should be smaller than rlist to account for diffusion and possibly charge-group radii");
- -        CHECK(ir->rvdw >= ir->rlist || ir->rcoulomb >= ir->rlist);
- -    }
- -    sprintf(err_buf, "nstlist can not be smaller than -1");
- -    CHECK(ir->nstlist < -1);
- -
       if (ir->eI == eiLBFGS && (ir->coulombtype == eelCUT || ir->vdwtype == evdwCUT)
           && ir->rvdw != 0)
       {
@@@ -1436,7 -1448,7 +1436,7 @@@ int str_nelem(const char *str, int maxp
       int   np = 0;
       char *copy0, *copy;
   
- -    copy0 = strdup(str);
+ +    copy0 = gmx_strdup(str);
       copy  = copy0;
       ltrim(copy);
       while (*copy != '\0')
@@@ -1691,7 -1703,7 +1691,7 @@@ static void do_wall_params(t_inputrec *
           }
           for (i = 0; i < ir->nwall; i++)
           {
- -            opts->wall_atomtype[i] = strdup(names[i]);
+ +            opts->wall_atomtype[i] = gmx_strdup(names[i]);
           }
   
           if (ir->wall_type == ewt93 || ir->wall_type == ewt104)
@@@ -2082,11 -2094,12 +2082,11 @@@ void get_ir(const char *mdparin, const 
   
       /* COM pulling */
       CCTYPE("COM PULLING");
- -    CTYPE("Pull type: no, umbrella, constraint or constant-force");
- -    EETYPE("pull",          ir->ePull, epull_names);
- -    if (ir->ePull != epullNO)
+ +    EETYPE("pull",          ir->bPull, yesno_names);
+ +    if (ir->bPull)
       {
           snew(ir->pull, 1);
- -        is->pull_grp = read_pullparams(&ninp, &inp, ir->pull, &opts->pull_start, wi);
+ +        is->pull_grp = read_pullparams(&ninp, &inp, ir->pull, wi);
       }
   
       /* Enforced rotation */
@@@ -2194,6 -2207,9 +2194,9 @@@
       CTYPE ("Format is number of terms (int) and for all terms an amplitude (real)");
       CTYPE ("and a phase angle (real)");
       STYPE ("E-x",     is->efield_x,   NULL);
+     CTYPE ("Time dependent (pulsed) electric field. Format is omega, time for pulse");
+     CTYPE ("peak, and sigma (width) for pulse. Sigma = 0 removes pulse, leaving");
+     CTYPE ("the field to be a cosine function.");
       STYPE ("E-xt",    is->efield_xt,  NULL);
       STYPE ("E-y",     is->efield_y,   NULL);
       STYPE ("E-yt",    is->efield_yt,  NULL);
@@@ -2353,7 -2369,7 +2356,7 @@@
       {
           if (ir->efep != efepNO)
           {
- -            opts->couple_moltype = strdup(is->couple_moltype);
+ +            opts->couple_moltype = gmx_strdup(is->couple_moltype);
               if (opts->couple_lam0 == opts->couple_lam1)
               {
                   warning(wi, "The lambda=0 and lambda=1 states for coupling are identical");
@@@ -2531,9 -2547,6 +2534,9 @@@ static int search_QMstring(const char *
   } /* search_QMstring */
   
   /* We would like gn to be const as well, but C doesn't allow this */
+ +/* TODO this is utility functionality (search for the index of a
+ +   string in a collection), so should be refactored and located more
+ +   centrally. */
   int search_string(const char *s, int ng, char *gn[])
   {
       int i;
@@@ -2839,7 -2852,7 +2842,7 @@@ static void calc_nrdf(gmx_mtop_t *mtop
           }
       }
   
- -    if (ir->ePull == epullCONSTRAINT)
+ +    if (ir->bPull)
       {
           /* Correct nrdf for the COM constraints.
            * We correct using the TC and VCM group of the first atom
@@@ -2851,11 -2864,6 +2854,11 @@@
   
           for (i = 0; i < pull->ncoord; i++)
           {
+ +            if (pull->coord[i].eType != epullCONSTRAINT)
+ +            {
+ +                continue;
+ +            }
+ +
               imin = 1;
   
               for (j = 0; j < 2; j++)
@@@ -2969,7 -2977,7 +2972,7 @@@ static void decode_cos(char *s, t_cosin
       double  a, phi;
       int     i;
   
- -    t = strdup(s);
+ +    t = gmx_strdup(s);
       trim(t);
   
       cosine->n   = 0;
@@@ -3150,7 -3158,7 +3153,7 @@@ static void make_swap_groups
   
   void make_IMD_group(t_IMD *IMDgroup, char *IMDgname, t_blocka *grps, char **gnames)
   {
- -    int      ig = -1, i;
+ +    int      ig, i;
   
   
       ig            = search_string(IMDgname, grps->nr, gnames);
@@@ -3481,7 -3489,7 +3484,7 @@@ void do_index(const char* mdparin, cons
           }
       }
   
- -    if (ir->ePull != epullNO)
+ +    if (ir->bPull)
       {
           make_pull_groups(ir->pull, is->pull_grp, grps, gnames);
   
@@@ -3857,15 -3865,7 +3860,15 @@@ static gmx_bool absolute_reference(t_in
                           case efbposresSPHERE:
                               AbsRef[XX] = AbsRef[YY] = AbsRef[ZZ] = 1;
                               break;
+ +                        case efbposresCYLINDERX:
+ +                            AbsRef[YY] = AbsRef[ZZ] = 1;
+ +                            break;
+ +                        case efbposresCYLINDERY:
+ +                            AbsRef[XX] = AbsRef[ZZ] = 1;
+ +                            break;
                           case efbposresCYLINDER:
+ +                        /* efbposres is a synonym for efbposresCYLINDERZ for backwards compatibility */
+ +                        case efbposresCYLINDERZ:
                               AbsRef[XX] = AbsRef[YY] = 1;
                               break;
                           case efbposresX: /* d=XX */
@@@ -4239,42 -4239,45 +4242,42 @@@ void triple_check(const char *mdparin, 
           gmx_fatal(FARGS, "Soft-core interactions are only supported with VdW repulsion power 12");
       }
   
- -    if (ir->ePull != epullNO)
+ +    if (ir->bPull)
       {
- -        gmx_bool bPullAbsoluteRef;
+ +        gmx_bool bWarned;
   
- -        bPullAbsoluteRef = FALSE;
- -        for (i = 0; i < ir->pull->ncoord; i++)
+ +        bWarned = FALSE;
+ +        for (i = 0; i < ir->pull->ncoord && !bWarned; i++)
           {
- -            bPullAbsoluteRef = bPullAbsoluteRef ||
- -                ir->pull->coord[i].group[0] == 0 ||
- -                ir->pull->coord[i].group[1] == 0;
- -        }
- -        if (bPullAbsoluteRef)
- -        {
- -            absolute_reference(ir, sys, FALSE, AbsRef);
- -            for (m = 0; m < DIM; m++)
+ +            if (ir->pull->coord[i].group[0] == 0 ||
+ +                ir->pull->coord[i].group[1] == 0)
               {
- -                if (ir->pull->dim[m] && !AbsRef[m])
+ +                absolute_reference(ir, sys, FALSE, AbsRef);
+ +                for (m = 0; m < DIM; m++)
                   {
- -                    warning(wi, "You are using an absolute reference for pulling, but the rest of the system does not have an absolute reference. This will lead to artifacts.");
- -                    break;
+ +                    if (ir->pull->coord[i].dim[m] && !AbsRef[m])
+ +                    {
+ +                        warning(wi, "You are using an absolute reference for pulling, but the rest of the system does not have an absolute reference. This will lead to artifacts.");
+ +                        bWarned = TRUE;
+ +                        break;
+ +                    }
                   }
               }
           }
   
- -        if (ir->pull->eGeom == epullgDIRPBC)
+ +        for (i = 0; i < 3; i++)
           {
- -            for (i = 0; i < 3; i++)
+ +            for (m = 0; m <= i; m++)
               {
- -                for (m = 0; m <= i; m++)
+ +                if ((ir->epc != epcNO && ir->compress[i][m] != 0) ||
+ +                    ir->deform[i][m] != 0)
                   {
- -                    if ((ir->epc != epcNO && ir->compress[i][m] != 0) ||
- -                        ir->deform[i][m] != 0)
+ +                    for (c = 0; c < ir->pull->ncoord; c++)
                       {
- -                        for (c = 0; c < ir->pull->ncoord; c++)
+ +                        if (ir->pull->coord[c].eGeom == epullgDIRPBC &&
+ +                            ir->pull->coord[c].vec[m] != 0)
                           {
- -                            if (ir->pull->coord[c].vec[m] != 0)
- -                            {
- -                                gmx_fatal(FARGS, "Can not have dynamic box while using pull geometry '%s' (dim %c)", EPULLGEOM(ir->pull->eGeom), 'x'+m);
- -                            }
+ +                            gmx_fatal(FARGS, "Can not have dynamic box while using pull geometry '%s' (dim %c)", EPULLGEOM(ir->pull->coord[c].eGeom), 'x'+m);
                           }
                       }
                   }
@@@ -4285,10 -4288,7 +4288,10 @@@
       check_disre(sys);
   }
   
- -void double_check(t_inputrec *ir, matrix box, gmx_bool bConstr, warninp_t wi)
+ +void double_check(t_inputrec *ir, matrix box,
+ +                  gmx_bool bHasNormalConstraints,
+ +                  gmx_bool bHasAnyConstraints,
+ +                  warninp_t wi)
   {
       real        min_size;
       gmx_bool    bTWIN;
@@@ -4301,7 -4301,7 +4304,7 @@@
           warning_error(wi, ptr);
       }
   
- -    if (bConstr && ir->eConstrAlg == econtSHAKE)
+ +    if (bHasNormalConstraints && ir->eConstrAlg == econtSHAKE)
       {
           if (ir->shake_tol <= 0.0)
           {
@@@ -4324,7 -4324,7 +4327,7 @@@
           }
       }
   
- -    if ( (ir->eConstrAlg == econtLINCS) && bConstr)
+ +    if ( (ir->eConstrAlg == econtLINCS) && bHasNormalConstraints)
       {
           /* If we have Lincs constraints: */
           if (ir->eI == eiMD && ir->etc == etcNO &&
@@@ -4345,9 -4345,9 +4348,9 @@@
           }
       }
   
- -    if (bConstr && ir->epc == epcMTTK)
+ +    if (bHasAnyConstraints && ir->epc == epcMTTK)
       {
- -        warning_note(wi, "MTTK with constraints is deprecated, and will be removed in GROMACS 5.1");
+ +        warning_error(wi, "Constraints are not implemented with MTTK pressure control.");
       }
   
       if (ir->LincsWarnAngle > 90.0)
diff --combined src/gromacs/legacyheaders/gmx_cpuid.h

index 5e296941c5f22070538c8e0057a06ef7c4eb4123,9eb261cb83c62258ec4b8488b761cd3f1c76f487..517bf2e7a296537e1d292b9fd7daa7b9e504ddeb
--- 1/src/gromacs/legacyheaders/gmx_cpuid.h
--- 2/src/gromacs/legacyheaders/gmx_cpuid.h
+++ b/src/gromacs/legacyheaders/gmx_cpuid.h
@@@ -55,7 -55,6 +55,7 @@@ enum gmx_cpuid_vendo
       GMX_CPUID_VENDOR_AMD,
       GMX_CPUID_VENDOR_FUJITSU,
       GMX_CPUID_VENDOR_IBM,
+ +    GMX_CPUID_VENDOR_ARM,
       GMX_CPUID_NVENDORS
   };
   
@@@ -82,10 -81,6 +82,10 @@@ enum gmx_cpuid_featur
       GMX_CPUID_FEATURE_X86_APIC,          /* APIC support                                 */
       GMX_CPUID_FEATURE_X86_AVX,           /* Advanced vector extensions                   */
       GMX_CPUID_FEATURE_X86_AVX2,          /* AVX2 including gather support (not used yet) */
+ +    GMX_CPUID_FEATURE_X86_AVX_512F,      /* Foundation AVX-512 instructions              */
+ +    GMX_CPUID_FEATURE_X86_AVX_512PF,     /* Extended gather/scatter for AVX-512          */
+ +    GMX_CPUID_FEATURE_X86_AVX_512ER,     /* Extended-range 1/x and /1sqrt(x) for AVX-512 */
+ +    GMX_CPUID_FEATURE_X86_AVX_512CD,     /* Memory conflict-detection for AVX-512        */
       GMX_CPUID_FEATURE_X86_CLFSH,         /* Supports CLFLUSH instruction                 */
       GMX_CPUID_FEATURE_X86_CMOV,          /* Conditional move insn support                */
       GMX_CPUID_FEATURE_X86_CX8,           /* Supports CMPXCHG8B (8-byte compare-exchange) */
@@@ -116,11 -111,6 +116,11 @@@
       GMX_CPUID_FEATURE_X86_TDT,           /* TSC deadline timer                           */
       GMX_CPUID_FEATURE_X86_X2APIC,        /* Extended xAPIC Support                       */
       GMX_CPUID_FEATURE_X86_XOP,           /* AMD extended instructions, only AMD for now  */
+ +    GMX_CPUID_FEATURE_ARM_NEON,          /* 32-bit ARM NEON                              */
+ +    GMX_CPUID_FEATURE_ARM_NEON_ASIMD,    /* 64-bit ARM AArch64 Advanced SIMD             */
+ +    GMX_CPUID_FEATURE_IBM_QPX,           /* IBM QPX SIMD (BlueGene/Q and later)          */
+ +    GMX_CPUID_FEATURE_IBM_VMX,           /* IBM VMX SIMD (Altivec on Power6 and later)   */
+ +    GMX_CPUID_FEATURE_IBM_VSX,           /* IBM VSX SIMD (Power7 and later)              */
       GMX_CPUID_NFEATURES
   };
   
@@@ -140,14 -130,8 +140,14 @@@ enum gmx_cpuid_sim
       GMX_CPUID_SIMD_X86_AVX_128_FMA,
       GMX_CPUID_SIMD_X86_AVX_256,
       GMX_CPUID_SIMD_X86_AVX2_256,
+ +    GMX_CPUID_SIMD_X86_AVX_512F,
+ +    GMX_CPUID_SIMD_X86_AVX_512ER,
       GMX_CPUID_SIMD_SPARC64_HPC_ACE,
       GMX_CPUID_SIMD_IBM_QPX,
+ +    GMX_CPUID_SIMD_IBM_VMX,
+ +    GMX_CPUID_SIMD_IBM_VSX,
+ +    GMX_CPUID_SIMD_ARM_NEON,
+ +    GMX_CPUID_SIMD_ARM_NEON_ASIMD,
       GMX_CPUID_NSIMD
   };
   
@@@ -210,6 -194,13 +210,13 @@@ gmx_cpuid_feature           (gmx_cpuid_
                                enum gmx_cpuid_feature     feature);
   
   
+ /* Check whether the CPU is an Intel with Nehalem microarchitecture.
+  * Return 0 if not Intel Nehalem, 1 if Intel Nehalem.
+  */
+ int
+ gmx_cpuid_is_intel_nehalem  (const gmx_cpuid_t          cpuid);
+ 
+ 
   /* Return pointers to cpu topology information.
    *
    * Important: CPU topology requires more OS support than most other
diff --combined src/programs/mdrun/runner.cpp

index 10c6cc47fa1712367db2e76beb33c0c930a9a5c2,b1e52db1d7c80d78e89869392126cfaefe6b0125..2e122a897d016f9b7e1c57cb15bd98414944fb7f
--- 1/src/programs/mdrun/runner.cpp
--- 2/src/programs/mdrun/runner.c
+++ b/src/programs/mdrun/runner.cpp
@@@ -34,67 -34,68 +34,67 @@@
    * To help us fund GROMACS development, we humbly ask that you cite
    * the research papers on the package. Check out http://www.gromacs.org.
    */
- -#ifdef HAVE_CONFIG_H
- -#include <config.h>
- -#endif
+ +
+ +#include "gmxpre.h"
+ +
+ +#include "config.h"
+ +
+ +#include <assert.h>
   #include <signal.h>
   #include <stdlib.h>
- -#ifdef HAVE_UNISTD_H
- -#include <unistd.h>
- -#endif
   #include <string.h>
- -#include <assert.h>
   
- -#include "typedefs.h"
- -#include "gromacs/utility/smalloc.h"
- -#include "sysstuff.h"
- -#include "copyrite.h"
- -#include "force.h"
- -#include "mdrun.h"
- -#include "md_logging.h"
- -#include "md_support.h"
- -#include "network.h"
- -#include "names.h"
- -#include "disre.h"
- -#include "orires.h"
- -#include "pme.h"
- -#include "mdatoms.h"
- -#include "repl_ex.h"
- -#include "deform.h"
- -#include "qmmm.h"
- -#include "domdec.h"
- -#include "coulomb.h"
- -#include "constr.h"
- -#include "mvdata.h"
- -#include "checkpoint.h"
- -#include "mtop_util.h"
- -#include "sighandler.h"
- -#include "txtdump.h"
- -#include "gmx_detect_hardware.h"
- -#include "gmx_omp_nthreads.h"
- -#include "gromacs/gmxpreprocess/calc_verletbuf.h"
- -#include "gmx_fatal_collective.h"
- -#include "membed.h"
- -#include "macros.h"
- -#include "gmx_thread_affinity.h"
- -#include "inputrec.h"
+ +#include <algorithm>
   
+ +#include "gromacs/domdec/domdec.h"
+ +#include "gromacs/essentialdynamics/edsam.h"
+ +#include "gromacs/ewald/pme.h"
   #include "gromacs/fileio/tpxio.h"
- -#include "gromacs/mdlib/nbnxn_search.h"
+ +#include "gromacs/legacyheaders/checkpoint.h"
+ +#include "gromacs/legacyheaders/constr.h"
+ +#include "gromacs/legacyheaders/disre.h"
+ +#include "gromacs/legacyheaders/force.h"
+ +#include "gromacs/legacyheaders/gmx_detect_hardware.h"
+ +#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+ +#include "gromacs/legacyheaders/gmx_thread_affinity.h"
+ +#include "gromacs/legacyheaders/inputrec.h"
+ +#include "gromacs/legacyheaders/main.h"
+ +#include "gromacs/legacyheaders/md_logging.h"
+ +#include "gromacs/legacyheaders/md_support.h"
+ +#include "gromacs/legacyheaders/mdatoms.h"
+ +#include "gromacs/legacyheaders/mdrun.h"
+ +#include "gromacs/legacyheaders/names.h"
+ +#include "gromacs/legacyheaders/network.h"
+ +#include "gromacs/legacyheaders/oenv.h"
+ +#include "gromacs/legacyheaders/orires.h"
+ +#include "gromacs/legacyheaders/qmmm.h"
+ +#include "gromacs/legacyheaders/sighandler.h"
+ +#include "gromacs/legacyheaders/txtdump.h"
+ +#include "gromacs/legacyheaders/typedefs.h"
+ +#include "gromacs/math/calculate-ewald-splitting-coefficient.h"
+ +#include "gromacs/math/vec.h"
+ +#include "gromacs/mdlib/calc_verletbuf.h"
   #include "gromacs/mdlib/nbnxn_consts.h"
- -#include "gromacs/timing/wallcycle.h"
- -#include "gromacs/utility/gmxmpi.h"
- -#include "gromacs/utility/gmxomp.h"
- -#include "gromacs/swap/swapcoords.h"
- -#include "gromacs/essentialdynamics/edsam.h"
+ +#include "gromacs/mdlib/nbnxn_search.h"
+ +#include "gromacs/pbcutil/pbc.h"
   #include "gromacs/pulling/pull.h"
   #include "gromacs/pulling/pull_rotation.h"
+ +#include "gromacs/swap/swapcoords.h"
+ +#include "gromacs/timing/wallcycle.h"
+ +#include "gromacs/topology/mtop_util.h"
+ +#include "gromacs/utility/gmxassert.h"
+ +#include "gromacs/utility/gmxmpi.h"
+ +#include "gromacs/utility/smalloc.h"
+ +
+ +#include "deform.h"
+ +#include "membed.h"
+ +#include "repl_ex.h"
   
   #ifdef GMX_FAHCORE
   #include "corewrap.h"
   #endif
   
- -#include "gpu_utils.h"
- -#include "nbnxn_cuda_data_mgmt.h"
+ +#include "gromacs/gmxlib/gpu_utils/gpu_utils.h"
   
   typedef struct {
       gmx_integrator_t *func;
@@@ -294,7 -295,7 +294,7 @@@ static int get_tmpi_omp_thread_division
       else if (hw_opt->nthreads_omp > 0)
       {
           /* Here we could oversubscribe, when we do, we issue a warning later */
- -        nthreads_tmpi = max(1, nthreads_tot/hw_opt->nthreads_omp);
+ +        nthreads_tmpi = std::max(1, nthreads_tot/hw_opt->nthreads_omp);
       }
       else
       {
@@@ -309,20 -310,22 +309,22 @@@
            * two CPUs with HT, so we need a limit<16; thus we use 12.
            * A reasonable limit for Intel Sandy and Ivy bridge,
            * not knowing the topology, is 16 threads.
+          * Below we check for Intel and AVX, which for now includes
+          * Sandy/Ivy Bridge, Has/Broadwell. By checking for AVX instead of
+          * model numbers we ensure also future Intel CPUs are covered.
            */
           const int nthreads_omp_always_faster             =  4;
           const int nthreads_omp_always_faster_Nehalem     = 12;
-         const int nthreads_omp_always_faster_SandyBridge = 16;
-         gmx_bool  bIntel_Family6;
+         const int nthreads_omp_always_faster_Intel_AVX   = 16;
+         gmx_bool  bIntelAVX;
   
-         bIntel_Family6 =
+         bIntelAVX =
               (gmx_cpuid_vendor(hwinfo->cpuid_info) == GMX_CPUID_VENDOR_INTEL &&
-              gmx_cpuid_family(hwinfo->cpuid_info) == 6);
+              gmx_cpuid_feature(hwinfo->cpuid_info, GMX_CPUID_FEATURE_X86_AVX));
   
           if (nthreads_tot <= nthreads_omp_always_faster ||
-             (bIntel_Family6 &&
-              ((gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_Nehalem && nthreads_tot <= nthreads_omp_always_faster_Nehalem) ||
-               (gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_SandyBridge && nthreads_tot <= nthreads_omp_always_faster_SandyBridge))))
+             ((gmx_cpuid_is_intel_nehalem(hwinfo->cpuid_info) && nthreads_tot <= nthreads_omp_always_faster_Nehalem) ||
+              (bIntelAVX && nthreads_tot <= nthreads_omp_always_faster_Intel_AVX)))
           {
               /* Use pure OpenMP parallelization */
               nthreads_tmpi = 1;
@@@ -353,6 -356,8 +355,6 @@@ static int get_nthreads_mpi(const gmx_h
   {
       int      nthreads_hw, nthreads_tot_max, nthreads_tmpi, nthreads_new, ngpu;
       int      min_atoms_per_mpi_thread;
- -    char    *env;
- -    char     sbuf[STRLEN];
       gmx_bool bCanUseGPU;
   
       if (hw_opt->nthreads_tmpi > 0)
@@@ -427,7 -432,7 +429,7 @@@
       {
           /* the thread number was chosen automatically, but there are too many
              threads (too few atoms per thread) */
- -        nthreads_new = max(1, mtop->natoms/min_atoms_per_mpi_thread);
+ +        nthreads_new = std::max(1, mtop->natoms/min_atoms_per_mpi_thread);
   
           /* Avoid partial use of Hyper-Threading */
           if (gmx_cpuid_x86_smt(hwinfo->cpuid_info) == GMX_CPUID_X86_SMT_ENABLED &&
@@@ -475,7 -480,7 +477,7 @@@
   /* We determine the extra cost of the non-bonded kernels compared to
    * a reference nstlist value of 10 (which is the default in grompp).
    */
- -static const int    nbnxn_reference_nstlist = 10;
+ +static const int    nbnxnReferenceNstlist = 10;
   /* The values to try when switching  */
   const int           nstlist_try[] = { 20, 25, 40 };
   #define NNSTL  sizeof(nstlist_try)/sizeof(nstlist_try[0])
@@@ -504,9 -509,9 +506,9 @@@ static void increase_nstlist(FILE *fp, 
       float                  listfac_ok, listfac_max;
       int                    nstlist_orig, nstlist_prev;
       verletbuf_list_setup_t ls;
- -    real                   rlist_nstlist10, rlist_inc, rlist_ok, rlist_max;
+ +    real                   rlistWithReferenceNstlist, rlist_inc, rlist_ok, rlist_max;
       real                   rlist_new, rlist_prev;
- -    int                    nstlist_ind = 0;
+ +    size_t                 nstlist_ind = 0;
       t_state                state_tmp;
       gmx_bool               bBox, bDD, bCont;
       const char            *nstl_gpu = "\nFor optimal performance with a GPU nstlist (now %d) should be larger.\nThe optimum depends on your CPU and GPU resources.\nYou might want to try several nstlist values.\n";
@@@ -515,7 -520,6 +517,7 @@@
       const char            *box_err  = "Can not increase nstlist because the box is too small";
       const char            *dd_err   = "Can not increase nstlist because of domain decomposition limitations";
       char                   buf[STRLEN];
+ +    const float            oneThird = 1.0f / 3.0f;
   
       if (nstlist_cmdline <= 0)
       {
@@@ -601,19 -605,19 +603,19 @@@
       verletbuf_get_list_setup(bGPU, &ls);
   
       /* Allow rlist to make the list a given factor larger than the list
- -     * would be with nstlist=10.
+ +     * would be with the reference value for nstlist (10).
        */
       nstlist_prev = ir->nstlist;
- -    ir->nstlist  = 10;
+ +    ir->nstlist  = nbnxnReferenceNstlist;
       calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL,
- -                            &rlist_nstlist10);
+ +                            &rlistWithReferenceNstlist);
       ir->nstlist  = nstlist_prev;
   
       /* Determine the pair list size increase due to zero interactions */
       rlist_inc = nbnxn_get_rlist_effective_inc(ls.cluster_size_j,
                                                 mtop->natoms/det(box));
- -    rlist_ok  = (rlist_nstlist10 + rlist_inc)*pow(listfac_ok, 1.0/3.0) - rlist_inc;
- -    rlist_max = (rlist_nstlist10 + rlist_inc)*pow(listfac_max, 1.0/3.0) - rlist_inc;
+ +    rlist_ok  = (rlistWithReferenceNstlist + rlist_inc)*pow(listfac_ok, oneThird) - rlist_inc;
+ +    rlist_max = (rlistWithReferenceNstlist + rlist_inc)*pow(listfac_max, oneThird) - rlist_inc;
       if (debug)
       {
           fprintf(debug, "nstlist tuning: rlist_inc %.3f rlist_ok %.3f rlist_max %.3f\n",
@@@ -753,6 -757,99 +755,6 @@@ static void prepare_verlet_scheme(FIL
       }
   }
   
- -static void convert_to_verlet_scheme(FILE *fplog,
- -                                     t_inputrec *ir,
- -                                     gmx_mtop_t *mtop, real box_vol)
- -{
- -    char *conv_mesg = "Converting input file with group cut-off scheme to the Verlet cut-off scheme";
- -
- -    md_print_warn(NULL, fplog, "%s\n", conv_mesg);
- -
- -    ir->cutoff_scheme = ecutsVERLET;
- -    ir->verletbuf_tol = 0.005;
- -
- -    if (ir->rcoulomb != ir->rvdw)
- -    {
- -        gmx_fatal(FARGS, "The VdW and Coulomb cut-offs are different, whereas the Verlet scheme only supports equal cut-offs");
- -    }
- -
- -    if (ir->vdwtype == evdwUSER || EEL_USER(ir->coulombtype))
- -    {
- -        gmx_fatal(FARGS, "User non-bonded potentials are not (yet) supported with the Verlet scheme");
- -    }
- -    else if (ir_vdw_switched(ir) || ir_coulomb_switched(ir))
- -    {
- -        if (ir_vdw_switched(ir) && ir->vdw_modifier == eintmodNONE)
- -        {
- -            ir->vdwtype = evdwCUT;
- -
- -            switch (ir->vdwtype)
- -            {
- -                case evdwSHIFT:  ir->vdw_modifier = eintmodFORCESWITCH; break;
- -                case evdwSWITCH: ir->vdw_modifier = eintmodPOTSWITCH; break;
- -                default: gmx_fatal(FARGS, "The Verlet scheme does not support Van der Waals interactions of type '%s'", evdw_names[ir->vdwtype]);
- -            }
- -        }
- -        if (ir_coulomb_switched(ir) && ir->coulomb_modifier == eintmodNONE)
- -        {
- -            if (EEL_FULL(ir->coulombtype))
- -            {
- -                /* With full electrostatic only PME can be switched */
- -                ir->coulombtype      = eelPME;
- -                ir->coulomb_modifier = eintmodPOTSHIFT;
- -            }
- -            else
- -            {
- -                md_print_warn(NULL, fplog, "NOTE: Replacing %s electrostatics with reaction-field with epsilon-rf=inf\n", eel_names[ir->coulombtype]);
- -                ir->coulombtype      = eelRF;
- -                ir->epsilon_rf       = 0.0;
- -                ir->coulomb_modifier = eintmodPOTSHIFT;
- -            }
- -        }
- -
- -        /* We set the pair energy error tolerance to a small number.
- -         * Note that this is only for testing. For production the user
- -         * should think about this and set the mdp options.
- -         */
- -        ir->verletbuf_tol = 1e-4;
- -    }
- -
- -    if (inputrec2nboundeddim(ir) != 3)
- -    {
- -        gmx_fatal(FARGS, "Can only convert old tpr files to the Verlet cut-off scheme with 3D pbc");
- -    }
- -
- -    if (ir->efep != efepNO || ir->implicit_solvent != eisNO)
- -    {
- -        gmx_fatal(FARGS, "Will not convert old tpr files to the Verlet cut-off scheme with free-energy calculations or implicit solvent");
- -    }
- -
- -    if (EI_DYNAMICS(ir->eI) && !(EI_MD(ir->eI) && ir->etc == etcNO))
- -    {
- -        verletbuf_list_setup_t ls;
- -
- -        verletbuf_get_list_setup(FALSE, &ls);
- -        calc_verlet_buffer_size(mtop, box_vol, ir, -1, &ls, NULL, &ir->rlist);
- -    }
- -    else
- -    {
- -        real rlist_fac;
- -
- -        if (EI_MD(ir->eI))
- -        {
- -            rlist_fac       = 1 + verlet_buffer_ratio_NVE_T0;
- -        }
- -        else
- -        {
- -            rlist_fac       = 1 + verlet_buffer_ratio_nodynamics;
- -        }
- -        ir->verletbuf_tol   = -1;
- -        ir->rlist           = rlist_fac*max(ir->rvdw, ir->rcoulomb);
- -    }
- -
- -    gmx_mtop_remove_chargegroups(mtop);
- -}
- -
   static void print_hw_opt(FILE *fp, const gmx_hw_opt_t *hw_opt)
   {
       fprintf(fp, "hw_opt: nt %d ntmpi %d ntomp %d ntomp_pme %d gpu_id '%s'\n",
@@@ -914,36 -1011,72 +916,36 @@@ static void override_nsteps_cmdline(FIL
                                       t_inputrec      *ir,
                                       const t_commrec *cr)
   {
- -    char sbuf[STEPSTRSIZE];
- -
       assert(ir);
       assert(cr);
   
       /* override with anything else than the default -2 */
       if (nsteps_cmdline > -2)
       {
- -        char stmp[STRLEN];
+ +        char sbuf_steps[STEPSTRSIZE];
+ +        char sbuf_msg[STRLEN];
   
           ir->nsteps = nsteps_cmdline;
           if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1)
           {
- -            sprintf(stmp, "Overriding nsteps with value passed on the command line: %s steps, %.3g ps",
- -                    gmx_step_str(nsteps_cmdline, sbuf),
+ +            sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps, %.3g ps",
+ +                    gmx_step_str(nsteps_cmdline, sbuf_steps),
                       fabs(nsteps_cmdline*ir->delta_t));
           }
           else
           {
- -            sprintf(stmp, "Overriding nsteps with value passed on the command line: %s steps",
- -                    gmx_step_str(nsteps_cmdline, sbuf));
+ +            sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps",
+ +                    gmx_step_str(nsteps_cmdline, sbuf_steps));
           }
   
- -        md_print_warn(cr, fplog, "%s\n", stmp);
+ +        md_print_warn(cr, fplog, "%s\n", sbuf_msg);
       }
- -}
- -
- -/* Frees GPU memory and destroys the CUDA context.
- - *
- - * Note that this function needs to be called even if GPUs are not used
- - * in this run because the PME ranks have no knowledge of whether GPUs
- - * are used or not, but all ranks need to enter the barrier below.
- - */
- -static void free_gpu_resources(const t_forcerec *fr,
- -                               const t_commrec  *cr)
- -{
- -    gmx_bool bIsPPrankUsingGPU;
- -    char     gpu_err_str[STRLEN];
- -
- -    bIsPPrankUsingGPU = (cr->duty & DUTY_PP) && fr->nbv != NULL && fr->nbv->bUseGPU;
- -
- -    if (bIsPPrankUsingGPU)
+ +    else if (nsteps_cmdline < -2)
       {
- -        /* free nbnxn data in GPU memory */
- -        nbnxn_cuda_free(fr->nbv->cu_nbv);
- -
- -        /* With tMPI we need to wait for all ranks to finish deallocation before
- -         * destroying the context in free_gpu() as some ranks may be sharing
- -         * GPU and context.
- -         * Note: as only PP ranks need to free GPU resources, so it is safe to
- -         * not call the barrier on PME ranks.
- -         */
- -#ifdef GMX_THREAD_MPI
- -        if (PAR(cr))
- -        {
- -            gmx_barrier(cr);
- -        }
- -#endif  /* GMX_THREAD_MPI */
- -
- -        /* uninitialize GPU (by destroying the context) */
- -        if (!free_gpu(gpu_err_str))
- -        {
- -            gmx_warning("On rank %d failed to free GPU #%d: %s",
- -                        cr->nodeid, get_current_gpu_device_id(), gpu_err_str);
- -        }
+ +        gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %d",
+ +                  nsteps_cmdline);
       }
+ +    /* Do nothing if nsteps_cmdline == -2 */
   }
   
   int mdrunner(gmx_hw_opt_t *hw_opt,
@@@ -960,11 -1093,13 +962,11 @@@
                const char *deviceOptions, int imdport, unsigned long Flags)
   {
       gmx_bool                  bForceUseGPU, bTryUseGPU, bRerunMD, bCantUseGPU;
- -    double                    nodetime = 0, realtime;
       t_inputrec               *inputrec;
       t_state                  *state = NULL;
       matrix                    box;
       gmx_ddbox_t               ddbox = {0};
       int                       npme_major, npme_minor;
- -    real                      tmpr1, tmpr2;
       t_nrnb                   *nrnb;
       gmx_mtop_t               *mtop          = NULL;
       t_mdatoms                *mdatoms       = NULL;
@@@ -972,16 -1107,19 +974,16 @@@
       t_fcdata                 *fcd           = NULL;
       real                      ewaldcoeff_q  = 0;
       real                      ewaldcoeff_lj = 0;
- -    gmx_pme_t                *pmedata       = NULL;
+ +    struct gmx_pme_t        **pmedata       = NULL;
       gmx_vsite_t              *vsite         = NULL;
       gmx_constr_t              constr;
- -    int                       i, m, nChargePerturbed = -1, nTypePerturbed = 0, status, nalloc;
- -    char                     *gro;
+ +    int                       nChargePerturbed = -1, nTypePerturbed = 0, status;
       gmx_wallcycle_t           wcycle;
       gmx_bool                  bReadEkin;
- -    int                       list;
       gmx_walltime_accounting_t walltime_accounting = NULL;
       int                       rc;
       gmx_int64_t               reset_counters;
       gmx_edsam_t               ed           = NULL;
- -    t_commrec                *cr_old       = cr;
       int                       nthreads_pme = 1;
       int                       nthreads_pp  = 1;
       gmx_membed_t              membed       = NULL;
@@@ -1023,7 -1161,13 +1025,7 @@@
       if (SIMMASTER(cr))
       {
           /* Read (nearly) all data required for the simulation */
- -        read_tpx_state(ftp2fn(efTPX, nfile, fnm), inputrec, state, NULL, mtop);
- -
- -        if (inputrec->cutoff_scheme != ecutsVERLET &&
- -            ((Flags & MD_TESTVERLET) || getenv("GMX_VERLET_SCHEME") != NULL))
- -        {
- -            convert_to_verlet_scheme(fplog, inputrec, mtop, det(state->box));
- -        }
+ +        read_tpx_state(ftp2fn(efTPR, nfile, fnm), inputrec, state, NULL, mtop);
   
           if (inputrec->cutoff_scheme == ecutsVERLET)
           {
@@@ -1062,7 -1206,8 +1064,7 @@@
               {
                   md_print_warn(cr, fplog,
                                 "NOTE: GPU(s) found, but the current simulation can not use GPUs\n"
- -                              "      To use a GPU, set the mdp option: cutoff-scheme = Verlet\n"
- -                              "      (for quick performance testing you can use the -testverlet option)\n");
+ +                              "      To use a GPU, set the mdp option: cutoff-scheme = Verlet\n");
               }
   
               if (bForceUseGPU)
@@@ -1087,14 -1232,29 +1089,14 @@@
           }
       }
   
- -    /* Check for externally set OpenMP affinity and turn off internal
- -     * pinning if any is found. We need to do this check early to tell
- -     * thread-MPI whether it should do pinning when spawning threads.
- -     * TODO: the above no longer holds, we should move these checks down
- -     */
- -    gmx_omp_check_thread_affinity(fplog, cr, hw_opt);
- -
       /* Check and update the hardware options for internal consistency */
       check_and_update_hw_opt_1(hw_opt, SIMMASTER(cr));
   
+ +    /* Early check for externally set process affinity. */
+ +    gmx_check_thread_affinity_set(fplog, cr,
+ +                                  hw_opt, hwinfo->nthreads_hw_avail, FALSE);
       if (SIMMASTER(cr))
       {
- -#ifdef GMX_THREAD_MPI
- -        /* Early check for externally set process affinity.
- -         * With thread-MPI this is needed as pinning might get turned off,
- -         * which needs to be known before starting thread-MPI.
- -         * With thread-MPI hw_opt is processed here on the master rank
- -         * and passed to the other ranks later, so we only do this on master.
- -         */
- -        gmx_check_thread_affinity_set(fplog,
- -                                      NULL,
- -                                      hw_opt, hwinfo->nthreads_hw_avail, FALSE);
- -#endif
   
   #ifdef GMX_THREAD_MPI
           if (cr->npmenodes > 0 && hw_opt->nthreads_tmpi <= 0)
@@@ -1131,7 -1291,6 +1133,7 @@@
   
           if (hw_opt->nthreads_tmpi > 1)
           {
+ +            t_commrec *cr_old       = cr;
               /* now start the threads. */
               cr = mdrunner_start_threads(hw_opt, fplog, cr_old, nfile, fnm,
                                           oenv, bVerbose, bCompact, nstglobalcomm,
@@@ -1194,7 -1353,7 +1196,7 @@@
                     "but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec"
   #endif
   #endif
- -                  , ShortProgram()
+ +                  , output_env_get_program_display_name(oenv)
                     );
       }
   
@@@ -1285,9 -1444,16 +1287,9 @@@
           }
       }
   
- -    if (((MASTER(cr) || (Flags & MD_SEPPOT)) && (Flags & MD_APPENDFILES))
- -#ifdef GMX_THREAD_MPI
- -        /* With thread MPI only the master node/thread exists in mdrun.c,
- -         * therefore non-master nodes need to open the "seppot" log file here.
- -         */
- -        || (!MASTER(cr) && (Flags & MD_SEPPOT))
- -#endif
- -        )
+ +    if (MASTER(cr) && (Flags & MD_APPENDFILES))
       {
- -        gmx_log_open(ftp2fn(efLOG, nfile, fnm), cr, !(Flags & MD_SEPPOT),
+ +        gmx_log_open(ftp2fn(efLOG, nfile, fnm), cr,
                        Flags, &fplog);
       }
   
@@@ -1381,13 -1547,6 +1383,13 @@@
                             (cr->duty & DUTY_PP) == 0,
                             inputrec->cutoff_scheme == ecutsVERLET);
   
+ +#ifndef NDEBUG
+ +    if (integrator[inputrec->eI].func != do_tpi &&
+ +        inputrec->cutoff_scheme == ecutsVERLET)
+ +    {
+ +        gmx_feenableexcept();
+ +    }
+ +#endif
       if (PAR(cr))
       {
           /* The master rank decided on the use of GPUs,
@@@ -1467,6 -1626,7 +1469,6 @@@
           /*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE,
              "nofile","nofile","nofile","nofile",FALSE,pforce);
            */
- -        fr->bSepDVDL = ((Flags & MD_SEPPOT) == MD_SEPPOT);
   
           /* Initialize QM-MM */
           if (fr->bQMMM)
@@@ -1589,7 -1749,7 +1591,7 @@@
           /* Assumes uniform use of the number of OpenMP threads */
           walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault));
   
- -        if (inputrec->ePull != epullNO)
+ +        if (inputrec->bPull)
           {
               /* Initialize pull code */
               init_pull(fplog, inputrec, nfile, fnm, mtop, cr, oenv, inputrec->fepvals->init_lambda,
@@@ -1614,7 -1774,6 +1616,7 @@@
   
           if (DOMAINDECOMP(cr))
           {
+ +            GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP");
               dd_init_bondeds(fplog, cr->dd, mtop, vsite, inputrec,
                               Flags & MD_DDBONDCHECK, fr->cginfo_mb);
   
@@@ -1639,7 -1798,7 +1641,7 @@@
                                         Flags,
                                         walltime_accounting);
   
- -        if (inputrec->ePull != epullNO)
+ +        if (inputrec->bPull)
           {
               finish_pull(inputrec->pull);
           }
@@@ -1652,7 -1811,6 +1654,7 @@@
       }
       else
       {
+ +        GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
           /* do PME only */
           walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
           gmx_pmeonly(*pmedata, cr, nrnb, wcycle, walltime_accounting, ewaldcoeff_q, ewaldcoeff_lj, inputrec);
@@@ -1665,12 -1823,13 +1667,12 @@@
        */
       finish_run(fplog, cr,
                  inputrec, nrnb, wcycle, walltime_accounting,
- -               fr != NULL && fr->nbv != NULL && fr->nbv->bUseGPU ?
- -               nbnxn_cuda_get_timings(fr->nbv->cu_nbv) : NULL,
+ +               fr ? fr->nbv : NULL,
                  EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr));
   
   
       /* Free GPU memory and context */
- -    free_gpu_resources(fr, cr);
+ +    free_gpu_resources(fr, cr, &hwinfo->gpu_info, fr ? fr->gpu_opt : NULL);
   
       if (opt2bSet("-membed", nfile, fnm))
       {
@@@ -1691,8 -1850,6 +1693,8 @@@
   
       rc = (int)gmx_get_stop_condition();
   
+ +    done_ed(&ed);
+ +
   #ifdef GMX_THREAD_MPI
       /* we need to join all threads. The sub-threads join when they
          exit this function, but the master thread needs to be told to
author	Berk Hess <hess@kth.se>
	Wed, 22 Apr 2015 08:57:49 +0000 (10:57 +0200)
committer	Berk Hess <hess@kth.se>
	Wed, 22 Apr 2015 08:57:49 +0000 (10:57 +0200)
		1	2
src/gromacs/gmxlib/gmx_cpuid.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxpreprocess/readir.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_cpuid.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/programs/mdrun/runner.cpp	patch \|	diff1 \|	diff2 \|	blob \| history