Miscellaneous
^^^^^^^^^^^^^
+Added AMD Zen 2 detection
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The AMD Zen 2 architecture is now detected as different from Zen 1
+and uses 256-bit wide AVX2 SIMD instructions (GMX_SIMD=AVX2_256) by default.
+Also the non-bonded kernel parameters have been tuned for Zen 2.
+This has a significant impact on performance.
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
{
const int ncore = hwinfo_g->hardwareTopology->numberOfCores();
/* Zen has family=23, for now we treat future AMD CPUs like Zen */
- const bool cpuIsAmdZen = (cpuInfo.vendor() == CpuInfo::Vendor::Amd &&
- cpuInfo.family() >= 23);
+ const bool cpuIsAmdZen1 = (cpuInfo.vendor() == CpuInfo::Vendor::Amd &&
+ cpuInfo.family() == 23 &&
+ (cpuInfo.model() == 1 || cpuInfo.model() == 17 ||
+ cpuInfo.model() == 8 || cpuInfo.model() == 24));
#if GMX_LIB_MPI
int nhwthread, ngpu, i;
maxMinLocal[7] = -maxMinLocal[2];
maxMinLocal[8] = -maxMinLocal[3];
maxMinLocal[9] = -maxMinLocal[4];
- maxMinLocal[10] = (cpuIsAmdZen ? 1 : 0);
+ maxMinLocal[10] = (cpuIsAmdZen1 ? 1 : 0);
MPI_Allreduce(maxMinLocal.data(), maxMinReduced.data(), maxMinLocal.size(),
MPI_INT, MPI_MAX, MPI_COMM_WORLD);
hwinfo_g->simd_suggest_min = -maxMinReduced[8];
hwinfo_g->simd_suggest_max = maxMinReduced[3];
hwinfo_g->bIdenticalGPUs = (maxMinReduced[4] == -maxMinReduced[9]);
- hwinfo_g->haveAmdZenCpu = (maxMinReduced[10] > 0);
+ hwinfo_g->haveAmdZen1Cpu = (maxMinReduced[10] > 0);
#else
/* All ranks use the same pointer, protected by a mutex in the caller */
hwinfo_g->nphysicalnode = 1;
hwinfo_g->simd_suggest_min = static_cast<int>(simdSuggested(cpuInfo));
hwinfo_g->simd_suggest_max = static_cast<int>(simdSuggested(cpuInfo));
hwinfo_g->bIdenticalGPUs = TRUE;
- hwinfo_g->haveAmdZenCpu = cpuIsAmdZen;
+ hwinfo_g->haveAmdZen1Cpu = cpuIsAmdZen1;
GMX_UNUSED_VALUE(physicalNodeComm);
#endif
}
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
int simd_suggest_max; /* Highest SIMD instruction set supported by at least one rank */
gmx_bool bIdenticalGPUs; /* TRUE if all ranks have the same type(s) and order of GPUs */
- bool haveAmdZenCpu; /* TRUE when at least one CPU in any of the nodes is AMD Zen */
+ bool haveAmdZen1Cpu; /* TRUE when at least one CPU in any of the nodes is AMD Zen of the first generation */
};
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
*kernel_type = nbnxnk4xN_SIMD_2xNN;
}
#endif
- if (hardwareInfo.haveAmdZenCpu)
+ if (hardwareInfo.haveAmdZen1Cpu)
{
/* One 256-bit FMA per cycle makes 2xNN faster */
*kernel_type = nbnxnk4xN_SIMD_2xNN;
/* On AMD Zen, tabulated Ewald kernels are faster on all 4 combinations
* of single or double precision and 128 or 256-bit AVX2.
*/
- if (!hardwareInfo.haveAmdZenCpu)
+ if (!hardwareInfo.haveAmdZen1Cpu)
{
*ewald_excl = ewaldexclAnalytical;
}