Trivial conflict resolved in runner.cpp.
Change-Id: I5bf8d6672246938f498fff609a7b0943505d2afb
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
-#ifdef HAVE_SCHED_H
-# ifndef _GNU_SOURCE
-# define _GNU_SOURCE 1
-# endif
-# include <sched.h>
+/*! \cond */
+#include "gromacs/legacyheaders/gmx_cpuid.h"
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
#endif
+#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <ctype.h>
+
#ifdef GMX_NATIVE_WINDOWS
/* MSVC definition for __cpuid() */
#ifdef _MSC_VER
/* sysinfo functions */
#include <windows.h>
#endif
+#ifdef HAVE_SCHED_H
+ #include <sched.h>
+#endif
#ifdef HAVE_UNISTD_H
/* sysconf() definition */
#include <unistd.h>
#endif
-#include "gmx_cpuid.h"
-
-
/* For convenience, and to enable configure-time invocation, we keep all architectures
* in a single file, but to avoid repeated ifdefs we set the overall architecture here.
"GenuineIntel",
"AuthenticAMD",
"Fujitsu",
- "IBM"
+ "IBM", /* Used on Power and BlueGene/Q */
+ "ARM"
};
const char *
"GenuineIntel",
"AuthenticAMD",
"Fujitsu",
- "ibm" /* Used on BlueGene/Q */
+ "ibm", /* Used on Power and BlueGene/Q */
+ "AArch64"
};
const char *
"apic",
"avx",
"avx2",
+ "avx512f",
+ "avx512pf",
+ "avx512er",
+ "avx512cd",
"clfsh",
"cmov",
"cx8",
"ssse3",
"tdt",
"x2apic",
- "xop"
+ "xop",
+ "arm_neon",
+ "arm_neon_asimd",
+ "QPX",
+ "VMX",
+ "VSX"
};
const char *
"AVX_128_FMA",
"AVX_256",
"AVX2_256",
+ "AVX_512F",
+ "AVX_512ER",
"Sparc64 HPC-ACE",
- "IBM_QPX"
+ "IBM_QPX",
+ "IBM_VMX",
+ "IBM_VSX",
+ "ARM_NEON",
+ "ARM_NEON_ASIMD"
};
/* Max length of brand string */
-#define GMX_CPUID_BRAND_MAXLEN 256
+#define GMX_CPUID_STRLEN 256
/* Contents of the abstract datatype */
struct gmx_cpuid
{
enum gmx_cpuid_vendor vendor;
- char brand[GMX_CPUID_BRAND_MAXLEN];
+ char brand[GMX_CPUID_STRLEN];
int family;
int model;
int stepping;
}
+ int
+ gmx_cpuid_is_intel_nehalem (const gmx_cpuid_t cpuid)
+ {
+ return (cpuid->vendor == GMX_CPUID_VENDOR_INTEL &&
+ cpuid->family == 6 &&
+ (cpuid->model == 0x2E ||
+ cpuid->model == 0x1A ||
+ cpuid->model == 0x1E ||
+ cpuid->model == 0x2F ||
+ cpuid->model == 0x2C ||
+ cpuid->model == 0x25));
+ }
/* What type of SIMD was compiled in, if any? */
-#ifdef GMX_SIMD_X86_AVX2_256
+#ifdef GMX_SIMD_X86_AVX_512ER
+static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_512ER;
+#elif defined GMX_SIMD_X86_AVX_512F
+static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_512F;
+#elif defined GMX_SIMD_X86_AVX2_256
static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX2_256;
#elif defined GMX_SIMD_X86_AVX_256
static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_256;
static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_SSE4_1;
#elif defined GMX_SIMD_X86_SSE2
static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_SSE2;
+#elif defined GMX_SIMD_ARM_NEON
+static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_ARM_NEON;
+#elif defined GMX_SIMD_ARM_NEON_ASIMD
+static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_ARM_NEON_ASIMD;
#elif defined GMX_SIMD_SPARC64_HPC_ACE
static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_SPARC64_HPC_ACE;
#elif defined GMX_SIMD_IBM_QPX
static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_QPX;
+#elif defined GMX_SIMD_IBM_VMX
+static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_VMX;
+#elif defined GMX_SIMD_IBM_VSX
+static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_VSX;
#elif defined GMX_SIMD_REFERENCE
static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_REFERENCE;
#else
{
int fn, max_stdfn, max_extfn;
unsigned int eax, ebx, ecx, edx;
- char str[GMX_CPUID_BRAND_MAXLEN];
+ char str[GMX_CPUID_STRLEN];
char * p;
/* Find largest standard/extended function input value */
{
p++;
}
- strncpy(cpuid->brand, p, GMX_CPUID_BRAND_MAXLEN);
+ strncpy(cpuid->brand, p, GMX_CPUID_STRLEN);
}
else
{
- strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_BRAND_MAXLEN);
+ strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_STRLEN);
}
/* Find basic CPU properties */
}
}
}
+ free(unique);
return nunique;
}
if (max_stdfn >= 7)
{
execute_x86cpuid(0x7, 0, &eax, &ebx, &ecx, &edx);
- cpuid->feature[GMX_CPUID_FEATURE_X86_AVX2] = (ebx & (1 << 5)) != 0;
+ cpuid->feature[GMX_CPUID_FEATURE_X86_AVX2] = (ebx & (1 << 5)) != 0;
+ cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512F] = (ebx & (1 << 16)) != 0;
+ cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512PF] = (ebx & (1 << 26)) != 0;
+ cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512ER] = (ebx & (1 << 27)) != 0;
+ cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512CD] = (ebx & (1 << 28)) != 0;
}
/* Check whether Hyper-Threading is enabled, not only supported */
-
static void
chomp_substring_before_colon(const char *in, char *s, int maxlength)
{
}
}
+static int
+cpuid_check_arm(gmx_cpuid_t cpuid)
+{
+#if defined(__linux__) || defined(__linux)
+ FILE *fp;
+ char buffer[GMX_CPUID_STRLEN], buffer2[GMX_CPUID_STRLEN], buffer3[GMX_CPUID_STRLEN];
+
+ if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
+ {
+ while ( (fgets(buffer, sizeof(buffer), fp) != NULL))
+ {
+ chomp_substring_before_colon(buffer, buffer2, GMX_CPUID_STRLEN);
+ chomp_substring_after_colon(buffer, buffer3, GMX_CPUID_STRLEN);
+
+ if (!strcmp(buffer2, "Processor"))
+ {
+ strncpy(cpuid->brand, buffer3, GMX_CPUID_STRLEN);
+ }
+ else if (!strcmp(buffer2, "CPU architecture"))
+ {
+ cpuid->family = strtol(buffer3, NULL, 10);
+ if (!strcmp(buffer3, "AArch64"))
+ {
+ cpuid->family = 8;
+ }
+ }
+ else if (!strcmp(buffer2, "CPU part"))
+ {
+ cpuid->model = strtol(buffer3, NULL, 16);
+ }
+ else if (!strcmp(buffer2, "CPU revision"))
+ {
+ cpuid->stepping = strtol(buffer3, NULL, 10);
+ }
+ else if (!strcmp(buffer2, "Features") && strstr(buffer3, "neon"))
+ {
+ cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON] = 1;
+ }
+ else if (!strcmp(buffer2, "Features") && strstr(buffer3, "asimd"))
+ {
+ cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON_ASIMD] = 1;
+ }
+ }
+ }
+ fclose(fp);
+#else
+# ifdef __aarch64__
+ /* Strange 64-bit non-linux platform. However, since NEON ASIMD is present on all
+ * implementations of AArch64 this far, we assume it is present for now.
+ */
+ cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON_ASIMD] = 1;
+# else
+ /* Strange 32-bit non-linux platform. We cannot assume that neon is present. */
+ cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON] = 0;
+# endif
+#endif
+ return 0;
+}
+
+
+static int
+cpuid_check_ibm(gmx_cpuid_t cpuid)
+{
+#if defined(__linux__) || defined(__linux)
+ FILE *fp;
+ char buffer[GMX_CPUID_STRLEN], before_colon[GMX_CPUID_STRLEN], after_colon[GMX_CPUID_STRLEN];
+
+ if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
+ {
+ while ( (fgets(buffer, sizeof(buffer), fp) != NULL))
+ {
+ chomp_substring_before_colon(buffer, before_colon, GMX_CPUID_STRLEN);
+ chomp_substring_after_colon(buffer, after_colon, GMX_CPUID_STRLEN);
+
+ if (!strcmp(before_colon, "cpu") || !strcmp(before_colon, "Processor"))
+ {
+ strncpy(cpuid->brand, after_colon, GMX_CPUID_STRLEN);
+ }
+ if (!strcmp(before_colon, "model name") ||
+ !strcmp(before_colon, "model") ||
+ !strcmp(before_colon, "Processor") ||
+ !strcmp(before_colon, "cpu"))
+ {
+ if (strstr(after_colon, "altivec"))
+ {
+ cpuid->feature[GMX_CPUID_FEATURE_IBM_VMX] = 1;
+
+ if (!strstr(after_colon, "POWER6") && !strstr(after_colon, "Power6") &&
+ !strstr(after_colon, "power6"))
+ {
+ cpuid->feature[GMX_CPUID_FEATURE_IBM_VSX] = 1;
+ }
+ }
+ }
+ }
+ }
+ fclose(fp);
+
+ if (strstr(cpuid->brand, "A2"))
+ {
+ /* BlueGene/Q */
+ cpuid->feature[GMX_CPUID_FEATURE_IBM_QPX] = 1;
+ }
+#else
+ strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_STRLEN);
+ cpuid->feature[GMX_CPUID_FEATURE_IBM_QPX] = 0;
+ cpuid->feature[GMX_CPUID_FEATURE_IBM_VMX] = 0;
+ cpuid->feature[GMX_CPUID_FEATURE_IBM_VSX] = 0;
+#endif
+ return 0;
+}
+
+
/* Try to find the vendor of the current CPU, so we know what specific
* detection routine to call.
*/
unsigned int eax, ebx, ecx, edx;
char vendorstring[13];
FILE * fp;
- char buffer[255], before_colon[255], after_colon[255];
+ char buffer[GMX_CPUID_STRLEN];
+ char before_colon[GMX_CPUID_STRLEN];
+ char after_colon[GMX_CPUID_STRLEN];
/* Set default first */
vendor = GMX_CPUID_VENDOR_UNKNOWN;
while ( (vendor == GMX_CPUID_VENDOR_UNKNOWN) && (fgets(buffer, sizeof(buffer), fp) != NULL))
{
chomp_substring_before_colon(buffer, before_colon, sizeof(before_colon));
- /* Intel/AMD use "vendor_id", IBM "vendor"(?) or "model". Fujitsu "manufacture". Add others if you have them! */
+ /* Intel/AMD use "vendor_id", IBM "vendor", "model", or "cpu". Fujitsu "manufacture".
+ * On ARM there does not seem to be a vendor, but ARM or AArch64 is listed in the Processor string.
+ * Add others if you have them!
+ */
if (!strcmp(before_colon, "vendor_id")
|| !strcmp(before_colon, "vendor")
|| !strcmp(before_colon, "manufacture")
- || !strcmp(before_colon, "model"))
+ || !strcmp(before_colon, "model")
+ || !strcmp(before_colon, "Processor")
+ || !strcmp(before_colon, "cpu"))
{
chomp_substring_after_colon(buffer, after_colon, sizeof(after_colon));
for (i = GMX_CPUID_VENDOR_UNKNOWN; i < GMX_CPUID_NVENDORS; i++)
vendor = i;
}
}
+ /* If we did not find vendor yet, check if it is IBM:
+ * On some Power/PowerPC systems it only says power, not IBM.
+ */
+ if (vendor == GMX_CPUID_VENDOR_UNKNOWN &&
+ ((strstr(after_colon, "POWER") || strstr(after_colon, "Power") ||
+ strstr(after_colon, "power"))))
+ {
+ vendor = GMX_CPUID_VENDOR_IBM;
+ }
}
}
}
fclose(fp);
+#elif defined(__arm__) || defined (__arm) || defined(__aarch64__)
+ /* If we are using ARM on something that is not linux we have to trust the compiler,
+ * and we cannot get the extra info that might be present in /proc/cpuinfo.
+ */
+ vendor = GMX_CPUID_VENDOR_ARM;
#endif
-
return vendor;
}
gmx_cpuid_t cpuid;
int i;
FILE * fp;
- char buffer[255], buffer2[255];
+ char buffer[GMX_CPUID_STRLEN], buffer2[GMX_CPUID_STRLEN];
int found_brand;
cpuid = malloc(sizeof(*cpuid));
cpuid_check_amd_x86(cpuid);
break;
#endif
+ case GMX_CPUID_VENDOR_ARM:
+ cpuid_check_arm(cpuid);
+ break;
+ case GMX_CPUID_VENDOR_IBM:
+ cpuid_check_ibm(cpuid);
+ break;
default:
/* Default value */
- strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_BRAND_MAXLEN);
+ strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_STRLEN);
#if defined(__linux__) || defined(__linux)
/* General Linux. Try to get CPU type from /proc/cpuinfo */
if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
/* Intel uses "model name", Fujitsu and IBM "cpu". */
if (!strcmp(buffer2, "model name") || !strcmp(buffer2, "cpu"))
{
- chomp_substring_after_colon(buffer, cpuid->brand, GMX_CPUID_BRAND_MAXLEN);
+ chomp_substring_after_colon(buffer, cpuid->brand, GMX_CPUID_STRLEN);
found_brand = 1;
}
}
if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_INTEL)
{
+ /* TODO: Add check for AVX-512F & AVX-512ER here as soon as we
+ * have implemented verlet kernels for them. Until then,
+ * we should pick AVX2 instead for the automatic detection.
+ */
if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX2))
{
tmpsimd = GMX_CPUID_SIMD_X86_AVX2_256;
}
else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_IBM)
{
- if (strstr(gmx_cpuid_brand(cpuid), "A2"))
+ if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_IBM_QPX))
{
tmpsimd = GMX_CPUID_SIMD_IBM_QPX;
}
+ else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_IBM_VSX))
+ {
+ /* VSX is better than VMX, so we check it first */
+ tmpsimd = GMX_CPUID_SIMD_IBM_VSX;
+ }
+ else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_IBM_VMX))
+ {
+ tmpsimd = GMX_CPUID_SIMD_IBM_VMX;
+ }
+ }
+ else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_ARM)
+ {
+ if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_ARM_NEON_ASIMD))
+ {
+ tmpsimd = GMX_CPUID_SIMD_ARM_NEON_ASIMD;
+ }
+ else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_ARM_NEON))
+ {
+ tmpsimd = GMX_CPUID_SIMD_ARM_NEON;
+ }
}
return tmpsimd;
}
}
#endif
+
+/*! \endcond */
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
+
+#include "readir.h"
#include <ctype.h>
-#include <stdlib.h>
#include <limits.h>
-#include "sysstuff.h"
-#include "gromacs/utility/smalloc.h"
-#include "typedefs.h"
-#include "physics.h"
-#include "names.h"
-#include "gmx_fatal.h"
-#include "macros.h"
-#include "index.h"
-#include "symtab.h"
+#include <stdlib.h>
+
+#include "gromacs/gmxpreprocess/toputil.h"
+#include "gromacs/legacyheaders/chargegroup.h"
+#include "gromacs/legacyheaders/inputrec.h"
+#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/legacyheaders/names.h"
+#include "gromacs/legacyheaders/network.h"
+#include "gromacs/legacyheaders/readinp.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/legacyheaders/warninp.h"
+#include "gromacs/math/units.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/calc_verletbuf.h"
+#include "gromacs/pbcutil/pbc.h"
+#include "gromacs/topology/block.h"
+#include "gromacs/topology/index.h"
+#include "gromacs/topology/mtop_util.h"
+#include "gromacs/topology/symtab.h"
#include "gromacs/utility/cstringutil.h"
-#include "readinp.h"
-#include "warninp.h"
-#include "readir.h"
-#include "toputil.h"
-#include "index.h"
-#include "network.h"
-#include "vec.h"
-#include "pbc.h"
-#include "mtop_util.h"
-#include "chargegroup.h"
-#include "inputrec.h"
-#include "calc_verletbuf.h"
+#include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/smalloc.h"
#define MAXPTR 254
#define NOGID 255
}
else if (simtemp->eSimTempScale == esimtempEXPONENTIAL)
{
- simtemp->temperatures[i] = simtemp->simtemp_low + (simtemp->simtemp_high-simtemp->simtemp_low)*((exp(temperature_lambdas[i])-1)/(exp(1.0)-1));
+ simtemp->temperatures[i] = simtemp->simtemp_low + (simtemp->simtemp_high-simtemp->simtemp_low)*(gmx_expm1(temperature_lambdas[i])/gmx_expm1(1.0));
}
else
{
{
warning_error(wi, "rlist should be >= 0");
}
+ sprintf(err_buf, "nstlist can not be smaller than 0. (If you were trying to use the heuristic neighbour-list update scheme for efficient buffering for improved energy conservation, please use the Verlet cut-off scheme instead.)");
+ CHECK(ir->nstlist < 0);
process_interaction_modifier(ir, &ir->coulomb_modifier);
process_interaction_modifier(ir, &ir->vdw_modifier);
{
warning_error(wi, "rlistlong can not be shorter than rlist");
}
- if (IR_TWINRANGE(*ir) && ir->nstlist <= 0)
+ if (IR_TWINRANGE(*ir) && ir->nstlist == 0)
{
- warning_error(wi, "Can not have nstlist<=0 with twin-range interactions");
+ warning_error(wi, "Can not have nstlist == 0 with twin-range interactions");
}
}
(ir->ePBC != epbcNONE) ||
(ir->rcoulomb != 0.0) || (ir->rvdw != 0.0));
- if (ir->nstlist < 0)
- {
- warning_error(wi, "Can not have heuristic neighborlist updates without cut-off");
- }
if (ir->nstlist > 0)
{
warning_note(wi, "Simulating without cut-offs can be (slightly) faster with nstlist=0, nstype=simple and only one MPI rank");
if (ir->cutoff_scheme == ecutsGROUP)
{
if (((ir->coulomb_modifier != eintmodNONE && ir->rcoulomb == ir->rlist) ||
- (ir->vdw_modifier != eintmodNONE && ir->rvdw == ir->rlist)) &&
- ir->nstlist != 1)
+ (ir->vdw_modifier != eintmodNONE && ir->rvdw == ir->rlist)))
{
warning_note(wi, "With exact cut-offs, rlist should be "
"larger than rcoulomb and rvdw, so that there "
warning_note(wi, "You have selected user tables with dispersion correction, the dispersion will be corrected to -C6/r^6 beyond rvdw_switch (the tabulated interaction between rvdw_switch and rvdw will not be double counted). Make sure that you really want dispersion correction to -C6/r^6.");
}
- if (ir->nstlist == -1)
- {
- sprintf(err_buf, "With nstlist=-1 rvdw and rcoulomb should be smaller than rlist to account for diffusion and possibly charge-group radii");
- CHECK(ir->rvdw >= ir->rlist || ir->rcoulomb >= ir->rlist);
- }
- sprintf(err_buf, "nstlist can not be smaller than -1");
- CHECK(ir->nstlist < -1);
-
if (ir->eI == eiLBFGS && (ir->coulombtype == eelCUT || ir->vdwtype == evdwCUT)
&& ir->rvdw != 0)
{
int np = 0;
char *copy0, *copy;
- copy0 = strdup(str);
+ copy0 = gmx_strdup(str);
copy = copy0;
ltrim(copy);
while (*copy != '\0')
}
for (i = 0; i < ir->nwall; i++)
{
- opts->wall_atomtype[i] = strdup(names[i]);
+ opts->wall_atomtype[i] = gmx_strdup(names[i]);
}
if (ir->wall_type == ewt93 || ir->wall_type == ewt104)
/* COM pulling */
CCTYPE("COM PULLING");
- CTYPE("Pull type: no, umbrella, constraint or constant-force");
- EETYPE("pull", ir->ePull, epull_names);
- if (ir->ePull != epullNO)
+ EETYPE("pull", ir->bPull, yesno_names);
+ if (ir->bPull)
{
snew(ir->pull, 1);
- is->pull_grp = read_pullparams(&ninp, &inp, ir->pull, &opts->pull_start, wi);
+ is->pull_grp = read_pullparams(&ninp, &inp, ir->pull, wi);
}
/* Enforced rotation */
CTYPE ("Format is number of terms (int) and for all terms an amplitude (real)");
CTYPE ("and a phase angle (real)");
STYPE ("E-x", is->efield_x, NULL);
+ CTYPE ("Time dependent (pulsed) electric field. Format is omega, time for pulse");
+ CTYPE ("peak, and sigma (width) for pulse. Sigma = 0 removes pulse, leaving");
+ CTYPE ("the field to be a cosine function.");
STYPE ("E-xt", is->efield_xt, NULL);
STYPE ("E-y", is->efield_y, NULL);
STYPE ("E-yt", is->efield_yt, NULL);
{
if (ir->efep != efepNO)
{
- opts->couple_moltype = strdup(is->couple_moltype);
+ opts->couple_moltype = gmx_strdup(is->couple_moltype);
if (opts->couple_lam0 == opts->couple_lam1)
{
warning(wi, "The lambda=0 and lambda=1 states for coupling are identical");
} /* search_QMstring */
/* We would like gn to be const as well, but C doesn't allow this */
+/* TODO this is utility functionality (search for the index of a
+ string in a collection), so should be refactored and located more
+ centrally. */
int search_string(const char *s, int ng, char *gn[])
{
int i;
}
}
- if (ir->ePull == epullCONSTRAINT)
+ if (ir->bPull)
{
/* Correct nrdf for the COM constraints.
* We correct using the TC and VCM group of the first atom
for (i = 0; i < pull->ncoord; i++)
{
+ if (pull->coord[i].eType != epullCONSTRAINT)
+ {
+ continue;
+ }
+
imin = 1;
for (j = 0; j < 2; j++)
double a, phi;
int i;
- t = strdup(s);
+ t = gmx_strdup(s);
trim(t);
cosine->n = 0;
void make_IMD_group(t_IMD *IMDgroup, char *IMDgname, t_blocka *grps, char **gnames)
{
- int ig = -1, i;
+ int ig, i;
ig = search_string(IMDgname, grps->nr, gnames);
}
}
- if (ir->ePull != epullNO)
+ if (ir->bPull)
{
make_pull_groups(ir->pull, is->pull_grp, grps, gnames);
case efbposresSPHERE:
AbsRef[XX] = AbsRef[YY] = AbsRef[ZZ] = 1;
break;
+ case efbposresCYLINDERX:
+ AbsRef[YY] = AbsRef[ZZ] = 1;
+ break;
+ case efbposresCYLINDERY:
+ AbsRef[XX] = AbsRef[ZZ] = 1;
+ break;
case efbposresCYLINDER:
+ /* efbposres is a synonym for efbposresCYLINDERZ for backwards compatibility */
+ case efbposresCYLINDERZ:
AbsRef[XX] = AbsRef[YY] = 1;
break;
case efbposresX: /* d=XX */
gmx_fatal(FARGS, "Soft-core interactions are only supported with VdW repulsion power 12");
}
- if (ir->ePull != epullNO)
+ if (ir->bPull)
{
- gmx_bool bPullAbsoluteRef;
+ gmx_bool bWarned;
- bPullAbsoluteRef = FALSE;
- for (i = 0; i < ir->pull->ncoord; i++)
+ bWarned = FALSE;
+ for (i = 0; i < ir->pull->ncoord && !bWarned; i++)
{
- bPullAbsoluteRef = bPullAbsoluteRef ||
- ir->pull->coord[i].group[0] == 0 ||
- ir->pull->coord[i].group[1] == 0;
- }
- if (bPullAbsoluteRef)
- {
- absolute_reference(ir, sys, FALSE, AbsRef);
- for (m = 0; m < DIM; m++)
+ if (ir->pull->coord[i].group[0] == 0 ||
+ ir->pull->coord[i].group[1] == 0)
{
- if (ir->pull->dim[m] && !AbsRef[m])
+ absolute_reference(ir, sys, FALSE, AbsRef);
+ for (m = 0; m < DIM; m++)
{
- warning(wi, "You are using an absolute reference for pulling, but the rest of the system does not have an absolute reference. This will lead to artifacts.");
- break;
+ if (ir->pull->coord[i].dim[m] && !AbsRef[m])
+ {
+ warning(wi, "You are using an absolute reference for pulling, but the rest of the system does not have an absolute reference. This will lead to artifacts.");
+ bWarned = TRUE;
+ break;
+ }
}
}
}
- if (ir->pull->eGeom == epullgDIRPBC)
+ for (i = 0; i < 3; i++)
{
- for (i = 0; i < 3; i++)
+ for (m = 0; m <= i; m++)
{
- for (m = 0; m <= i; m++)
+ if ((ir->epc != epcNO && ir->compress[i][m] != 0) ||
+ ir->deform[i][m] != 0)
{
- if ((ir->epc != epcNO && ir->compress[i][m] != 0) ||
- ir->deform[i][m] != 0)
+ for (c = 0; c < ir->pull->ncoord; c++)
{
- for (c = 0; c < ir->pull->ncoord; c++)
+ if (ir->pull->coord[c].eGeom == epullgDIRPBC &&
+ ir->pull->coord[c].vec[m] != 0)
{
- if (ir->pull->coord[c].vec[m] != 0)
- {
- gmx_fatal(FARGS, "Can not have dynamic box while using pull geometry '%s' (dim %c)", EPULLGEOM(ir->pull->eGeom), 'x'+m);
- }
+ gmx_fatal(FARGS, "Can not have dynamic box while using pull geometry '%s' (dim %c)", EPULLGEOM(ir->pull->coord[c].eGeom), 'x'+m);
}
}
}
check_disre(sys);
}
-void double_check(t_inputrec *ir, matrix box, gmx_bool bConstr, warninp_t wi)
+void double_check(t_inputrec *ir, matrix box,
+ gmx_bool bHasNormalConstraints,
+ gmx_bool bHasAnyConstraints,
+ warninp_t wi)
{
real min_size;
gmx_bool bTWIN;
warning_error(wi, ptr);
}
- if (bConstr && ir->eConstrAlg == econtSHAKE)
+ if (bHasNormalConstraints && ir->eConstrAlg == econtSHAKE)
{
if (ir->shake_tol <= 0.0)
{
}
}
- if ( (ir->eConstrAlg == econtLINCS) && bConstr)
+ if ( (ir->eConstrAlg == econtLINCS) && bHasNormalConstraints)
{
/* If we have Lincs constraints: */
if (ir->eI == eiMD && ir->etc == etcNO &&
}
}
- if (bConstr && ir->epc == epcMTTK)
+ if (bHasAnyConstraints && ir->epc == epcMTTK)
{
- warning_note(wi, "MTTK with constraints is deprecated, and will be removed in GROMACS 5.1");
+ warning_error(wi, "Constraints are not implemented with MTTK pressure control.");
}
if (ir->LincsWarnAngle > 90.0)
GMX_CPUID_VENDOR_AMD,
GMX_CPUID_VENDOR_FUJITSU,
GMX_CPUID_VENDOR_IBM,
+ GMX_CPUID_VENDOR_ARM,
GMX_CPUID_NVENDORS
};
GMX_CPUID_FEATURE_X86_APIC, /* APIC support */
GMX_CPUID_FEATURE_X86_AVX, /* Advanced vector extensions */
GMX_CPUID_FEATURE_X86_AVX2, /* AVX2 including gather support (not used yet) */
+ GMX_CPUID_FEATURE_X86_AVX_512F, /* Foundation AVX-512 instructions */
+ GMX_CPUID_FEATURE_X86_AVX_512PF, /* Extended gather/scatter for AVX-512 */
+ GMX_CPUID_FEATURE_X86_AVX_512ER, /* Extended-range 1/x and /1sqrt(x) for AVX-512 */
+ GMX_CPUID_FEATURE_X86_AVX_512CD, /* Memory conflict-detection for AVX-512 */
GMX_CPUID_FEATURE_X86_CLFSH, /* Supports CLFLUSH instruction */
GMX_CPUID_FEATURE_X86_CMOV, /* Conditional move insn support */
GMX_CPUID_FEATURE_X86_CX8, /* Supports CMPXCHG8B (8-byte compare-exchange) */
GMX_CPUID_FEATURE_X86_TDT, /* TSC deadline timer */
GMX_CPUID_FEATURE_X86_X2APIC, /* Extended xAPIC Support */
GMX_CPUID_FEATURE_X86_XOP, /* AMD extended instructions, only AMD for now */
+ GMX_CPUID_FEATURE_ARM_NEON, /* 32-bit ARM NEON */
+ GMX_CPUID_FEATURE_ARM_NEON_ASIMD, /* 64-bit ARM AArch64 Advanced SIMD */
+ GMX_CPUID_FEATURE_IBM_QPX, /* IBM QPX SIMD (BlueGene/Q and later) */
+ GMX_CPUID_FEATURE_IBM_VMX, /* IBM VMX SIMD (Altivec on Power6 and later) */
+ GMX_CPUID_FEATURE_IBM_VSX, /* IBM VSX SIMD (Power7 and later) */
GMX_CPUID_NFEATURES
};
GMX_CPUID_SIMD_X86_AVX_128_FMA,
GMX_CPUID_SIMD_X86_AVX_256,
GMX_CPUID_SIMD_X86_AVX2_256,
+ GMX_CPUID_SIMD_X86_AVX_512F,
+ GMX_CPUID_SIMD_X86_AVX_512ER,
GMX_CPUID_SIMD_SPARC64_HPC_ACE,
GMX_CPUID_SIMD_IBM_QPX,
+ GMX_CPUID_SIMD_IBM_VMX,
+ GMX_CPUID_SIMD_IBM_VSX,
+ GMX_CPUID_SIMD_ARM_NEON,
+ GMX_CPUID_SIMD_ARM_NEON_ASIMD,
GMX_CPUID_NSIMD
};
enum gmx_cpuid_feature feature);
+ /* Check whether the CPU is an Intel with Nehalem microarchitecture.
+ * Return 0 if not Intel Nehalem, 1 if Intel Nehalem.
+ */
+ int
+ gmx_cpuid_is_intel_nehalem (const gmx_cpuid_t cpuid);
+
+
/* Return pointers to cpu topology information.
*
* Important: CPU topology requires more OS support than most other
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+
+#include "gmxpre.h"
+
+#include "config.h"
+
+#include <assert.h>
#include <signal.h>
#include <stdlib.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
#include <string.h>
-#include <assert.h>
-#include "typedefs.h"
-#include "gromacs/utility/smalloc.h"
-#include "sysstuff.h"
-#include "copyrite.h"
-#include "force.h"
-#include "mdrun.h"
-#include "md_logging.h"
-#include "md_support.h"
-#include "network.h"
-#include "names.h"
-#include "disre.h"
-#include "orires.h"
-#include "pme.h"
-#include "mdatoms.h"
-#include "repl_ex.h"
-#include "deform.h"
-#include "qmmm.h"
-#include "domdec.h"
-#include "coulomb.h"
-#include "constr.h"
-#include "mvdata.h"
-#include "checkpoint.h"
-#include "mtop_util.h"
-#include "sighandler.h"
-#include "txtdump.h"
-#include "gmx_detect_hardware.h"
-#include "gmx_omp_nthreads.h"
-#include "gromacs/gmxpreprocess/calc_verletbuf.h"
-#include "gmx_fatal_collective.h"
-#include "membed.h"
-#include "macros.h"
-#include "gmx_thread_affinity.h"
-#include "inputrec.h"
+#include <algorithm>
+#include "gromacs/domdec/domdec.h"
+#include "gromacs/essentialdynamics/edsam.h"
+#include "gromacs/ewald/pme.h"
#include "gromacs/fileio/tpxio.h"
-#include "gromacs/mdlib/nbnxn_search.h"
+#include "gromacs/legacyheaders/checkpoint.h"
+#include "gromacs/legacyheaders/constr.h"
+#include "gromacs/legacyheaders/disre.h"
+#include "gromacs/legacyheaders/force.h"
+#include "gromacs/legacyheaders/gmx_detect_hardware.h"
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/gmx_thread_affinity.h"
+#include "gromacs/legacyheaders/inputrec.h"
+#include "gromacs/legacyheaders/main.h"
+#include "gromacs/legacyheaders/md_logging.h"
+#include "gromacs/legacyheaders/md_support.h"
+#include "gromacs/legacyheaders/mdatoms.h"
+#include "gromacs/legacyheaders/mdrun.h"
+#include "gromacs/legacyheaders/names.h"
+#include "gromacs/legacyheaders/network.h"
+#include "gromacs/legacyheaders/oenv.h"
+#include "gromacs/legacyheaders/orires.h"
+#include "gromacs/legacyheaders/qmmm.h"
+#include "gromacs/legacyheaders/sighandler.h"
+#include "gromacs/legacyheaders/txtdump.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/math/calculate-ewald-splitting-coefficient.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/calc_verletbuf.h"
#include "gromacs/mdlib/nbnxn_consts.h"
-#include "gromacs/timing/wallcycle.h"
-#include "gromacs/utility/gmxmpi.h"
-#include "gromacs/utility/gmxomp.h"
-#include "gromacs/swap/swapcoords.h"
-#include "gromacs/essentialdynamics/edsam.h"
+#include "gromacs/mdlib/nbnxn_search.h"
+#include "gromacs/pbcutil/pbc.h"
#include "gromacs/pulling/pull.h"
#include "gromacs/pulling/pull_rotation.h"
+#include "gromacs/swap/swapcoords.h"
+#include "gromacs/timing/wallcycle.h"
+#include "gromacs/topology/mtop_util.h"
+#include "gromacs/utility/gmxassert.h"
+#include "gromacs/utility/gmxmpi.h"
+#include "gromacs/utility/smalloc.h"
+
+#include "deform.h"
+#include "membed.h"
+#include "repl_ex.h"
#ifdef GMX_FAHCORE
#include "corewrap.h"
#endif
-#include "gpu_utils.h"
-#include "nbnxn_cuda_data_mgmt.h"
+#include "gromacs/gmxlib/gpu_utils/gpu_utils.h"
typedef struct {
gmx_integrator_t *func;
else if (hw_opt->nthreads_omp > 0)
{
/* Here we could oversubscribe, when we do, we issue a warning later */
- nthreads_tmpi = max(1, nthreads_tot/hw_opt->nthreads_omp);
+ nthreads_tmpi = std::max(1, nthreads_tot/hw_opt->nthreads_omp);
}
else
{
* two CPUs with HT, so we need a limit<16; thus we use 12.
* A reasonable limit for Intel Sandy and Ivy bridge,
* not knowing the topology, is 16 threads.
+ * Below we check for Intel and AVX, which for now includes
+ * Sandy/Ivy Bridge, Has/Broadwell. By checking for AVX instead of
+ * model numbers we ensure also future Intel CPUs are covered.
*/
const int nthreads_omp_always_faster = 4;
const int nthreads_omp_always_faster_Nehalem = 12;
- const int nthreads_omp_always_faster_SandyBridge = 16;
- gmx_bool bIntel_Family6;
+ const int nthreads_omp_always_faster_Intel_AVX = 16;
+ gmx_bool bIntelAVX;
- bIntel_Family6 =
+ bIntelAVX =
(gmx_cpuid_vendor(hwinfo->cpuid_info) == GMX_CPUID_VENDOR_INTEL &&
- gmx_cpuid_family(hwinfo->cpuid_info) == 6);
+ gmx_cpuid_feature(hwinfo->cpuid_info, GMX_CPUID_FEATURE_X86_AVX));
if (nthreads_tot <= nthreads_omp_always_faster ||
- (bIntel_Family6 &&
- ((gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_Nehalem && nthreads_tot <= nthreads_omp_always_faster_Nehalem) ||
- (gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_SandyBridge && nthreads_tot <= nthreads_omp_always_faster_SandyBridge))))
+ ((gmx_cpuid_is_intel_nehalem(hwinfo->cpuid_info) && nthreads_tot <= nthreads_omp_always_faster_Nehalem) ||
+ (bIntelAVX && nthreads_tot <= nthreads_omp_always_faster_Intel_AVX)))
{
/* Use pure OpenMP parallelization */
nthreads_tmpi = 1;
{
int nthreads_hw, nthreads_tot_max, nthreads_tmpi, nthreads_new, ngpu;
int min_atoms_per_mpi_thread;
- char *env;
- char sbuf[STRLEN];
gmx_bool bCanUseGPU;
if (hw_opt->nthreads_tmpi > 0)
{
/* the thread number was chosen automatically, but there are too many
threads (too few atoms per thread) */
- nthreads_new = max(1, mtop->natoms/min_atoms_per_mpi_thread);
+ nthreads_new = std::max(1, mtop->natoms/min_atoms_per_mpi_thread);
/* Avoid partial use of Hyper-Threading */
if (gmx_cpuid_x86_smt(hwinfo->cpuid_info) == GMX_CPUID_X86_SMT_ENABLED &&
/* We determine the extra cost of the non-bonded kernels compared to
* a reference nstlist value of 10 (which is the default in grompp).
*/
-static const int nbnxn_reference_nstlist = 10;
+static const int nbnxnReferenceNstlist = 10;
/* The values to try when switching */
const int nstlist_try[] = { 20, 25, 40 };
#define NNSTL sizeof(nstlist_try)/sizeof(nstlist_try[0])
float listfac_ok, listfac_max;
int nstlist_orig, nstlist_prev;
verletbuf_list_setup_t ls;
- real rlist_nstlist10, rlist_inc, rlist_ok, rlist_max;
+ real rlistWithReferenceNstlist, rlist_inc, rlist_ok, rlist_max;
real rlist_new, rlist_prev;
- int nstlist_ind = 0;
+ size_t nstlist_ind = 0;
t_state state_tmp;
gmx_bool bBox, bDD, bCont;
const char *nstl_gpu = "\nFor optimal performance with a GPU nstlist (now %d) should be larger.\nThe optimum depends on your CPU and GPU resources.\nYou might want to try several nstlist values.\n";
const char *box_err = "Can not increase nstlist because the box is too small";
const char *dd_err = "Can not increase nstlist because of domain decomposition limitations";
char buf[STRLEN];
+ const float oneThird = 1.0f / 3.0f;
if (nstlist_cmdline <= 0)
{
verletbuf_get_list_setup(bGPU, &ls);
/* Allow rlist to make the list a given factor larger than the list
- * would be with nstlist=10.
+ * would be with the reference value for nstlist (10).
*/
nstlist_prev = ir->nstlist;
- ir->nstlist = 10;
+ ir->nstlist = nbnxnReferenceNstlist;
calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL,
- &rlist_nstlist10);
+ &rlistWithReferenceNstlist);
ir->nstlist = nstlist_prev;
/* Determine the pair list size increase due to zero interactions */
rlist_inc = nbnxn_get_rlist_effective_inc(ls.cluster_size_j,
mtop->natoms/det(box));
- rlist_ok = (rlist_nstlist10 + rlist_inc)*pow(listfac_ok, 1.0/3.0) - rlist_inc;
- rlist_max = (rlist_nstlist10 + rlist_inc)*pow(listfac_max, 1.0/3.0) - rlist_inc;
+ rlist_ok = (rlistWithReferenceNstlist + rlist_inc)*pow(listfac_ok, oneThird) - rlist_inc;
+ rlist_max = (rlistWithReferenceNstlist + rlist_inc)*pow(listfac_max, oneThird) - rlist_inc;
if (debug)
{
fprintf(debug, "nstlist tuning: rlist_inc %.3f rlist_ok %.3f rlist_max %.3f\n",
}
}
-static void convert_to_verlet_scheme(FILE *fplog,
- t_inputrec *ir,
- gmx_mtop_t *mtop, real box_vol)
-{
- char *conv_mesg = "Converting input file with group cut-off scheme to the Verlet cut-off scheme";
-
- md_print_warn(NULL, fplog, "%s\n", conv_mesg);
-
- ir->cutoff_scheme = ecutsVERLET;
- ir->verletbuf_tol = 0.005;
-
- if (ir->rcoulomb != ir->rvdw)
- {
- gmx_fatal(FARGS, "The VdW and Coulomb cut-offs are different, whereas the Verlet scheme only supports equal cut-offs");
- }
-
- if (ir->vdwtype == evdwUSER || EEL_USER(ir->coulombtype))
- {
- gmx_fatal(FARGS, "User non-bonded potentials are not (yet) supported with the Verlet scheme");
- }
- else if (ir_vdw_switched(ir) || ir_coulomb_switched(ir))
- {
- if (ir_vdw_switched(ir) && ir->vdw_modifier == eintmodNONE)
- {
- ir->vdwtype = evdwCUT;
-
- switch (ir->vdwtype)
- {
- case evdwSHIFT: ir->vdw_modifier = eintmodFORCESWITCH; break;
- case evdwSWITCH: ir->vdw_modifier = eintmodPOTSWITCH; break;
- default: gmx_fatal(FARGS, "The Verlet scheme does not support Van der Waals interactions of type '%s'", evdw_names[ir->vdwtype]);
- }
- }
- if (ir_coulomb_switched(ir) && ir->coulomb_modifier == eintmodNONE)
- {
- if (EEL_FULL(ir->coulombtype))
- {
- /* With full electrostatic only PME can be switched */
- ir->coulombtype = eelPME;
- ir->coulomb_modifier = eintmodPOTSHIFT;
- }
- else
- {
- md_print_warn(NULL, fplog, "NOTE: Replacing %s electrostatics with reaction-field with epsilon-rf=inf\n", eel_names[ir->coulombtype]);
- ir->coulombtype = eelRF;
- ir->epsilon_rf = 0.0;
- ir->coulomb_modifier = eintmodPOTSHIFT;
- }
- }
-
- /* We set the pair energy error tolerance to a small number.
- * Note that this is only for testing. For production the user
- * should think about this and set the mdp options.
- */
- ir->verletbuf_tol = 1e-4;
- }
-
- if (inputrec2nboundeddim(ir) != 3)
- {
- gmx_fatal(FARGS, "Can only convert old tpr files to the Verlet cut-off scheme with 3D pbc");
- }
-
- if (ir->efep != efepNO || ir->implicit_solvent != eisNO)
- {
- gmx_fatal(FARGS, "Will not convert old tpr files to the Verlet cut-off scheme with free-energy calculations or implicit solvent");
- }
-
- if (EI_DYNAMICS(ir->eI) && !(EI_MD(ir->eI) && ir->etc == etcNO))
- {
- verletbuf_list_setup_t ls;
-
- verletbuf_get_list_setup(FALSE, &ls);
- calc_verlet_buffer_size(mtop, box_vol, ir, -1, &ls, NULL, &ir->rlist);
- }
- else
- {
- real rlist_fac;
-
- if (EI_MD(ir->eI))
- {
- rlist_fac = 1 + verlet_buffer_ratio_NVE_T0;
- }
- else
- {
- rlist_fac = 1 + verlet_buffer_ratio_nodynamics;
- }
- ir->verletbuf_tol = -1;
- ir->rlist = rlist_fac*max(ir->rvdw, ir->rcoulomb);
- }
-
- gmx_mtop_remove_chargegroups(mtop);
-}
-
static void print_hw_opt(FILE *fp, const gmx_hw_opt_t *hw_opt)
{
fprintf(fp, "hw_opt: nt %d ntmpi %d ntomp %d ntomp_pme %d gpu_id '%s'\n",
t_inputrec *ir,
const t_commrec *cr)
{
- char sbuf[STEPSTRSIZE];
-
assert(ir);
assert(cr);
/* override with anything else than the default -2 */
if (nsteps_cmdline > -2)
{
- char stmp[STRLEN];
+ char sbuf_steps[STEPSTRSIZE];
+ char sbuf_msg[STRLEN];
ir->nsteps = nsteps_cmdline;
if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1)
{
- sprintf(stmp, "Overriding nsteps with value passed on the command line: %s steps, %.3g ps",
- gmx_step_str(nsteps_cmdline, sbuf),
+ sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps, %.3g ps",
+ gmx_step_str(nsteps_cmdline, sbuf_steps),
fabs(nsteps_cmdline*ir->delta_t));
}
else
{
- sprintf(stmp, "Overriding nsteps with value passed on the command line: %s steps",
- gmx_step_str(nsteps_cmdline, sbuf));
+ sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps",
+ gmx_step_str(nsteps_cmdline, sbuf_steps));
}
- md_print_warn(cr, fplog, "%s\n", stmp);
+ md_print_warn(cr, fplog, "%s\n", sbuf_msg);
}
-}
-
-/* Frees GPU memory and destroys the CUDA context.
- *
- * Note that this function needs to be called even if GPUs are not used
- * in this run because the PME ranks have no knowledge of whether GPUs
- * are used or not, but all ranks need to enter the barrier below.
- */
-static void free_gpu_resources(const t_forcerec *fr,
- const t_commrec *cr)
-{
- gmx_bool bIsPPrankUsingGPU;
- char gpu_err_str[STRLEN];
-
- bIsPPrankUsingGPU = (cr->duty & DUTY_PP) && fr->nbv != NULL && fr->nbv->bUseGPU;
-
- if (bIsPPrankUsingGPU)
+ else if (nsteps_cmdline < -2)
{
- /* free nbnxn data in GPU memory */
- nbnxn_cuda_free(fr->nbv->cu_nbv);
-
- /* With tMPI we need to wait for all ranks to finish deallocation before
- * destroying the context in free_gpu() as some ranks may be sharing
- * GPU and context.
- * Note: as only PP ranks need to free GPU resources, so it is safe to
- * not call the barrier on PME ranks.
- */
-#ifdef GMX_THREAD_MPI
- if (PAR(cr))
- {
- gmx_barrier(cr);
- }
-#endif /* GMX_THREAD_MPI */
-
- /* uninitialize GPU (by destroying the context) */
- if (!free_gpu(gpu_err_str))
- {
- gmx_warning("On rank %d failed to free GPU #%d: %s",
- cr->nodeid, get_current_gpu_device_id(), gpu_err_str);
- }
+ gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %d",
+ nsteps_cmdline);
}
+ /* Do nothing if nsteps_cmdline == -2 */
}
int mdrunner(gmx_hw_opt_t *hw_opt,
const char *deviceOptions, int imdport, unsigned long Flags)
{
gmx_bool bForceUseGPU, bTryUseGPU, bRerunMD, bCantUseGPU;
- double nodetime = 0, realtime;
t_inputrec *inputrec;
t_state *state = NULL;
matrix box;
gmx_ddbox_t ddbox = {0};
int npme_major, npme_minor;
- real tmpr1, tmpr2;
t_nrnb *nrnb;
gmx_mtop_t *mtop = NULL;
t_mdatoms *mdatoms = NULL;
t_fcdata *fcd = NULL;
real ewaldcoeff_q = 0;
real ewaldcoeff_lj = 0;
- gmx_pme_t *pmedata = NULL;
+ struct gmx_pme_t **pmedata = NULL;
gmx_vsite_t *vsite = NULL;
gmx_constr_t constr;
- int i, m, nChargePerturbed = -1, nTypePerturbed = 0, status, nalloc;
- char *gro;
+ int nChargePerturbed = -1, nTypePerturbed = 0, status;
gmx_wallcycle_t wcycle;
gmx_bool bReadEkin;
- int list;
gmx_walltime_accounting_t walltime_accounting = NULL;
int rc;
gmx_int64_t reset_counters;
gmx_edsam_t ed = NULL;
- t_commrec *cr_old = cr;
int nthreads_pme = 1;
int nthreads_pp = 1;
gmx_membed_t membed = NULL;
if (SIMMASTER(cr))
{
/* Read (nearly) all data required for the simulation */
- read_tpx_state(ftp2fn(efTPX, nfile, fnm), inputrec, state, NULL, mtop);
-
- if (inputrec->cutoff_scheme != ecutsVERLET &&
- ((Flags & MD_TESTVERLET) || getenv("GMX_VERLET_SCHEME") != NULL))
- {
- convert_to_verlet_scheme(fplog, inputrec, mtop, det(state->box));
- }
+ read_tpx_state(ftp2fn(efTPR, nfile, fnm), inputrec, state, NULL, mtop);
if (inputrec->cutoff_scheme == ecutsVERLET)
{
{
md_print_warn(cr, fplog,
"NOTE: GPU(s) found, but the current simulation can not use GPUs\n"
- " To use a GPU, set the mdp option: cutoff-scheme = Verlet\n"
- " (for quick performance testing you can use the -testverlet option)\n");
+ " To use a GPU, set the mdp option: cutoff-scheme = Verlet\n");
}
if (bForceUseGPU)
}
}
- /* Check for externally set OpenMP affinity and turn off internal
- * pinning if any is found. We need to do this check early to tell
- * thread-MPI whether it should do pinning when spawning threads.
- * TODO: the above no longer holds, we should move these checks down
- */
- gmx_omp_check_thread_affinity(fplog, cr, hw_opt);
-
/* Check and update the hardware options for internal consistency */
check_and_update_hw_opt_1(hw_opt, SIMMASTER(cr));
+ /* Early check for externally set process affinity. */
+ gmx_check_thread_affinity_set(fplog, cr,
+ hw_opt, hwinfo->nthreads_hw_avail, FALSE);
if (SIMMASTER(cr))
{
-#ifdef GMX_THREAD_MPI
- /* Early check for externally set process affinity.
- * With thread-MPI this is needed as pinning might get turned off,
- * which needs to be known before starting thread-MPI.
- * With thread-MPI hw_opt is processed here on the master rank
- * and passed to the other ranks later, so we only do this on master.
- */
- gmx_check_thread_affinity_set(fplog,
- NULL,
- hw_opt, hwinfo->nthreads_hw_avail, FALSE);
-#endif
#ifdef GMX_THREAD_MPI
if (cr->npmenodes > 0 && hw_opt->nthreads_tmpi <= 0)
if (hw_opt->nthreads_tmpi > 1)
{
+ t_commrec *cr_old = cr;
/* now start the threads. */
cr = mdrunner_start_threads(hw_opt, fplog, cr_old, nfile, fnm,
oenv, bVerbose, bCompact, nstglobalcomm,
"but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec"
#endif
#endif
- , ShortProgram()
+ , output_env_get_program_display_name(oenv)
);
}
}
}
- if (((MASTER(cr) || (Flags & MD_SEPPOT)) && (Flags & MD_APPENDFILES))
-#ifdef GMX_THREAD_MPI
- /* With thread MPI only the master node/thread exists in mdrun.c,
- * therefore non-master nodes need to open the "seppot" log file here.
- */
- || (!MASTER(cr) && (Flags & MD_SEPPOT))
-#endif
- )
+ if (MASTER(cr) && (Flags & MD_APPENDFILES))
{
- gmx_log_open(ftp2fn(efLOG, nfile, fnm), cr, !(Flags & MD_SEPPOT),
+ gmx_log_open(ftp2fn(efLOG, nfile, fnm), cr,
Flags, &fplog);
}
(cr->duty & DUTY_PP) == 0,
inputrec->cutoff_scheme == ecutsVERLET);
+#ifndef NDEBUG
+ if (integrator[inputrec->eI].func != do_tpi &&
+ inputrec->cutoff_scheme == ecutsVERLET)
+ {
+ gmx_feenableexcept();
+ }
+#endif
if (PAR(cr))
{
/* The master rank decided on the use of GPUs,
/*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE,
"nofile","nofile","nofile","nofile",FALSE,pforce);
*/
- fr->bSepDVDL = ((Flags & MD_SEPPOT) == MD_SEPPOT);
/* Initialize QM-MM */
if (fr->bQMMM)
/* Assumes uniform use of the number of OpenMP threads */
walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault));
- if (inputrec->ePull != epullNO)
+ if (inputrec->bPull)
{
/* Initialize pull code */
init_pull(fplog, inputrec, nfile, fnm, mtop, cr, oenv, inputrec->fepvals->init_lambda,
if (DOMAINDECOMP(cr))
{
+ GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP");
dd_init_bondeds(fplog, cr->dd, mtop, vsite, inputrec,
Flags & MD_DDBONDCHECK, fr->cginfo_mb);
Flags,
walltime_accounting);
- if (inputrec->ePull != epullNO)
+ if (inputrec->bPull)
{
finish_pull(inputrec->pull);
}
}
else
{
+ GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
/* do PME only */
walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
gmx_pmeonly(*pmedata, cr, nrnb, wcycle, walltime_accounting, ewaldcoeff_q, ewaldcoeff_lj, inputrec);
*/
finish_run(fplog, cr,
inputrec, nrnb, wcycle, walltime_accounting,
- fr != NULL && fr->nbv != NULL && fr->nbv->bUseGPU ?
- nbnxn_cuda_get_timings(fr->nbv->cu_nbv) : NULL,
+ fr ? fr->nbv : NULL,
EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr));
/* Free GPU memory and context */
- free_gpu_resources(fr, cr);
+ free_gpu_resources(fr, cr, &hwinfo->gpu_info, fr ? fr->gpu_opt : NULL);
if (opt2bSet("-membed", nfile, fnm))
{
rc = (int)gmx_get_stop_condition();
+ done_ed(&ed);
+
#ifdef GMX_THREAD_MPI
/* we need to join all threads. The sub-threads join when they
exit this function, but the master thread needs to be told to