GMX_SIMD
"SIMD instruction set for CPU kernels and compiler optimization"
"${GMX_SUGGESTED_SIMD}"
- None SSE2 SSE4.1 AVX_128_FMA AVX_256 AVX2_256 AVX_512 AVX_512_KNL MIC ARM_NEON ARM_NEON_ASIMD IBM_QPX IBM_VMX IBM_VSX Sparc64_HPC_ACE Reference)
+ None SSE2 SSE4.1 AVX_128_FMA AVX_256 AVX2_256 AVX2_128 AVX_512 AVX_512_KNL MIC ARM_NEON ARM_NEON_ASIMD IBM_QPX IBM_VMX IBM_VSX Sparc64_HPC_ACE Reference)
if(GMX_TARGET_MIC)
set(GMX_FFT_LIBRARY_DEFAULT "mkl")
#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
+# Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
# top-level source directory and at http://www.gromacs.org.
elseif(OUTPUT_TMP MATCHES " avx512f ")
set(OUTPUT_SIMD "AVX_512")
elseif(OUTPUT_TMP MATCHES " avx2 ")
- set(OUTPUT_SIMD "AVX2_256")
+ if(OUTPUT_TMP MATCHES " amd ")
+ set(OUTPUT_SIMD "AVX2_128")
+ else()
+ set(OUTPUT_SIMD "AVX2_256")
+ endif()
elseif(OUTPUT_TMP MATCHES " avx ")
if(OUTPUT_TMP MATCHES " fma4 ")
# AMD that works better with avx-128-fma
set(GMX_SIMD_X86_${GMX_SIMD} 1)
set(SIMD_STATUS_MESSAGE "Enabling 256-bit AVX SIMD instructions")
-elseif(GMX_SIMD STREQUAL "AVX2_256")
+elseif(GMX_SIMD MATCHES "AVX2_")
prepare_x86_toolchain(TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS)
set(SIMD_C_FLAGS "${TOOLCHAIN_C_FLAGS}")
set(SIMD_CXX_FLAGS "${TOOLCHAIN_CXX_FLAGS}")
set(GMX_SIMD_X86_${GMX_SIMD} 1)
- set(SIMD_STATUS_MESSAGE "Enabling 256-bit AVX2 SIMD instructions")
+
+ if(GMX_SIMD STREQUAL "AVX2_128")
+ set(SIMD_STATUS_MESSAGE "Enabling 128-bit AVX2 SIMD instructions")
+ else()
+ set(SIMD_STATUS_MESSAGE "Enabling 256-bit AVX2 SIMD instructions")
+ endif()
elseif(GMX_SIMD STREQUAL "MIC")
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2009,2010,2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2009,2010,2011,2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
/* AVX2 256-bit SIMD instruction set level was selected */
#cmakedefine01 GMX_SIMD_X86_AVX2_256
+/* AVX2 128-bit SIMD instruction set level was selected */
+#cmakedefine01 GMX_SIMD_X86_AVX2_128
+
/* MIC (Xeon Phi) SIMD instruction set level was selected */
#cmakedefine01 GMX_SIMD_X86_MIC
defined __x86_64__ || defined __amd64__ || defined _M_X64 || defined _M_AMD64
result.vendor_ = detectX86Vendor();
+
+ if (result.vendor_ == CpuInfo::Vendor::Intel)
+ {
+ result.features_.insert(CpuInfo::Feature::X86_Intel);
+ }
+ else if (result.vendor_ == CpuInfo::Vendor::Amd)
+ {
+ result.features_.insert(CpuInfo::Feature::X86_Amd);
+ }
detectX86Features(&result.brandString_, &result.family_, &result.model_,
&result.stepping_, &result.features_);
result.logicalProcessors_ = detectX86LogicalProcessors();
CpuInfo::s_featureStrings_ =
{
{ CpuInfo::Feature::X86_Aes, "aes" },
+ { CpuInfo::Feature::X86_Amd, "amd" },
{ CpuInfo::Feature::X86_Apic, "apic" },
{ CpuInfo::Feature::X86_Avx, "avx" },
{ CpuInfo::Feature::X86_Avx2, "avx2" },
{ CpuInfo::Feature::X86_Fma4, "fma4" },
{ CpuInfo::Feature::X86_Hle, "hle" },
{ CpuInfo::Feature::X86_Htt, "htt" },
+ { CpuInfo::Feature::X86_Intel, "intel" },
{ CpuInfo::Feature::X86_Lahf, "lahf" },
{ CpuInfo::Feature::X86_MisalignSse, "misalignsse" },
{ CpuInfo::Feature::X86_Mmx, "mmx" },
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2015,2016,2017, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
enum class Feature
{
X86_Aes, //!< x86 advanced encryption standard accel.
+ X86_Amd, //!< This is an AMD x86 processor
X86_Apic, //!< APIC support
X86_Avx, //!< Advanced vector extensions
X86_Avx2, //!< AVX2 including gather support (not used yet)
X86_Fma4, //!< 4-operand FMA, only on AMD for now
X86_Hle, //!< Hardware lock elision
X86_Htt, //!< Hyper-Threading supported (but maybe not enabled)
+ X86_Intel, //!< This is an Intel x86 processor
X86_Lahf, //!< LAHF/SAHF support in 64 bits
X86_MisalignSse, //!< Support for misaligned SSE data instructions
X86_Mmx, //!< MMX registers and instructions
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+#ifndef GMX_SIMD_IMPL_X86_AVX2_128_H
+#define GMX_SIMD_IMPL_X86_AVX2_128_H
+
+#include "impl_x86_avx2_128_definitions.h"
+#include "impl_x86_avx2_128_general.h"
+#include "impl_x86_avx2_128_simd4_double.h"
+#include "impl_x86_avx2_128_simd4_float.h"
+#include "impl_x86_avx2_128_simd_double.h"
+#include "impl_x86_avx2_128_simd_float.h"
+#include "impl_x86_avx2_128_util_double.h"
+#include "impl_x86_avx2_128_util_float.h"
+
+#endif // GMX_SIMD_IMPL_X86_AVX2_128_H
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+#ifndef GMX_SIMD_IMPL_X86_AVX2_128_DEFINITIONS_H
+#define GMX_SIMD_IMPL_X86_AVX2_128_DEFINITIONS_H
+
+// Capability definitions for (mostly) 128-bit AVX2
+#define GMX_SIMD 1
+#define GMX_SIMD_HAVE_FLOAT 1
+#define GMX_SIMD_HAVE_DOUBLE 1
+#define GMX_SIMD_HAVE_LOADU 1
+#define GMX_SIMD_HAVE_STOREU 1
+#define GMX_SIMD_HAVE_LOGICAL 1
+#define GMX_SIMD_HAVE_FMA 1
+#define GMX_SIMD_HAVE_FINT32_EXTRACT 1
+#define GMX_SIMD_HAVE_FINT32_LOGICAL 1
+#define GMX_SIMD_HAVE_FINT32_ARITHMETICS 1
+#define GMX_SIMD_HAVE_DINT32_EXTRACT 1
+#define GMX_SIMD_HAVE_DINT32_LOGICAL 1
+#define GMX_SIMD_HAVE_DINT32_ARITHMETICS 1
+#define GMX_SIMD_HAVE_NATIVE_COPYSIGN_FLOAT 0
+#define GMX_SIMD_HAVE_NATIVE_RSQRT_ITER_FLOAT 0
+#define GMX_SIMD_HAVE_NATIVE_RCP_ITER_FLOAT 0
+#define GMX_SIMD_HAVE_NATIVE_LOG_FLOAT 0
+#define GMX_SIMD_HAVE_NATIVE_EXP2_FLOAT 0
+#define GMX_SIMD_HAVE_NATIVE_EXP_FLOAT 0
+#define GMX_SIMD_HAVE_NATIVE_COPYSIGN_DOUBLE 0
+#define GMX_SIMD_HAVE_NATIVE_RSQRT_ITER_DOUBLE 0
+#define GMX_SIMD_HAVE_NATIVE_RCP_ITER_DOUBLE 0
+#define GMX_SIMD_HAVE_NATIVE_LOG_DOUBLE 0
+#define GMX_SIMD_HAVE_NATIVE_EXP2_DOUBLE 0
+#define GMX_SIMD_HAVE_NATIVE_EXP_DOUBLE 0
+#define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_FLOAT 1
+#define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_DOUBLE 1
+#define GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT 0 // No need for half-simd, width is 4
+#define GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE 0 // No need for half-simd, width is 2
+
+#define GMX_SIMD4_HAVE_FLOAT 1
+#define GMX_SIMD4_HAVE_DOUBLE 1
+
+// Implementation details
+#define GMX_SIMD_FLOAT_WIDTH 4
+#define GMX_SIMD_DOUBLE_WIDTH 2
+#define GMX_SIMD_FINT32_WIDTH 4
+#define GMX_SIMD_DINT32_WIDTH 2
+#define GMX_SIMD4_WIDTH 4
+#define GMX_SIMD_RSQRT_BITS 11
+#define GMX_SIMD_RCP_BITS 11
+
+#endif // GMX_SIMD_IMPL_X86_AVX2_128_DEFINITIONS_H
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+#ifndef GMX_SIMD_IMPL_X86_AVX2_128_GENERAL_H
+#define GMX_SIMD_IMPL_X86_AVX2_128_GENERAL_H
+
+#include "gromacs/simd/impl_x86_avx_256/impl_x86_avx_256_general.h"
+
+#endif // GMX_SIMD_IMPL_X86_AVX2_128_GENERAL_H
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+#ifndef GMX_SIMD_IMPL_X86_AVX2_128_SIMD4_DOUBLE_H
+#define GMX_SIMD_IMPL_X86_AVX2_128_SIMD4_DOUBLE_H
+
+#include "gromacs/simd/impl_x86_avx2_256/impl_x86_avx2_256_simd4_double.h"
+
+#endif // GMX_SIMD_IMPL_X86_AVX2_128_SIMD4_DOUBLE_H
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+#ifndef GMX_SIMD_IMPL_X86_AVX2_128_SIMD4_FLOAT_H
+#define GMX_SIMD_IMPL_X86_AVX2_128_SIMD4_FLOAT_H
+
+#include "gromacs/simd/impl_x86_avx2_256/impl_x86_avx2_256_simd4_float.h"
+
+#endif // GMX_SIMD_IMPL_X86_AVX2_128_SIMD4_FLOAT_H
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+#ifndef GMX_SIMD_IMPL_X86_AVX2_128_SIMD_DOUBLE_H
+#define GMX_SIMD_IMPL_X86_AVX2_128_SIMD_DOUBLE_H
+
+#include "config.h"
+
+#include <immintrin.h>
+
+#include "gromacs/simd/impl_x86_sse4_1/impl_x86_sse4_1_simd_double.h"
+
+namespace gmx
+{
+
+static inline double gmx_simdcall
+reduce(SimdDouble a)
+{
+ __m128d b = _mm_add_sd(a.simdInternal_, _mm_permute_pd(a.simdInternal_, _MM_SHUFFLE2(1, 1)));
+ return *reinterpret_cast<double *>(&b);
+}
+
+static inline SimdDouble gmx_simdcall
+fma(SimdDouble a, SimdDouble b, SimdDouble c)
+{
+ return {
+ _mm_fmadd_pd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
+ };
+}
+
+static inline SimdDouble gmx_simdcall
+fms(SimdDouble a, SimdDouble b, SimdDouble c)
+{
+ return {
+ _mm_fmsub_pd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
+ };
+}
+
+static inline SimdDouble gmx_simdcall
+fnma(SimdDouble a, SimdDouble b, SimdDouble c)
+{
+ return {
+ _mm_fnmadd_pd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
+ };
+}
+
+static inline SimdDouble gmx_simdcall
+fnms(SimdDouble a, SimdDouble b, SimdDouble c)
+{
+ return {
+ _mm_fnmsub_pd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
+ };
+}
+
+} // namespace gmx
+
+#endif // GMX_SIMD_IMPL_X86_AVX2_128_SIMD_DOUBLE_H
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+#ifndef GMX_SIMD_IMPL_X86_AVX2_128_SIMD_FLOAT_H
+#define GMX_SIMD_IMPL_X86_AVX2_128_SIMD_FLOAT_H
+
+#include "config.h"
+
+#include <immintrin.h>
+
+#include "gromacs/simd/impl_x86_sse4_1/impl_x86_sse4_1_simd_float.h"
+
+namespace gmx
+{
+
+static inline float gmx_simdcall
+reduce(SimdFloat a)
+{
+ a.simdInternal_ = _mm_add_ps(a.simdInternal_, _mm_permute_ps(a.simdInternal_, _MM_SHUFFLE(1, 0, 3, 2)));
+ a.simdInternal_ = _mm_add_ss(a.simdInternal_, _mm_permute_ps(a.simdInternal_, _MM_SHUFFLE(0, 3, 2, 1)));
+ return *reinterpret_cast<float *>(&a);
+}
+
+static inline SimdFloat gmx_simdcall
+fma(SimdFloat a, SimdFloat b, SimdFloat c)
+{
+ return {
+ _mm_fmadd_ps(a.simdInternal_, b.simdInternal_, c.simdInternal_)
+ };
+}
+
+static inline SimdFloat gmx_simdcall
+fms(SimdFloat a, SimdFloat b, SimdFloat c)
+{
+ return {
+ _mm_fmsub_ps(a.simdInternal_, b.simdInternal_, c.simdInternal_)
+ };
+}
+
+static inline SimdFloat gmx_simdcall
+fnma(SimdFloat a, SimdFloat b, SimdFloat c)
+{
+ return {
+ _mm_fnmadd_ps(a.simdInternal_, b.simdInternal_, c.simdInternal_)
+ };
+}
+
+static inline SimdFloat gmx_simdcall
+fnms(SimdFloat a, SimdFloat b, SimdFloat c)
+{
+ return {
+ _mm_fnmsub_ps(a.simdInternal_, b.simdInternal_, c.simdInternal_)
+ };
+}
+
+} // namespace gmx
+
+#endif // GMX_SIMD_IMPL_X86_AVX2_128_SIMD_FLOAT_H
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+#ifndef GMX_SIMD_IMPL_X86_AVX2_128_UTIL_DOUBLE_H
+#define GMX_SIMD_IMPL_X86_AVX2_128_UTIL_DOUBLE_H
+
+#include "config.h"
+
+#include <immintrin.h>
+
+#include "gromacs/simd/impl_x86_sse4_1/impl_x86_sse4_1_util_double.h"
+
+namespace gmx
+{
+
+static inline void gmx_simdcall
+expandScalarsToTriplets(SimdDouble scalar,
+ SimdDouble * triplets0,
+ SimdDouble * triplets1,
+ SimdDouble * triplets2)
+{
+ triplets0->simdInternal_ = _mm_permute_pd(scalar.simdInternal_, _MM_SHUFFLE2(0, 0));
+ triplets1->simdInternal_ = _mm_permute_pd(scalar.simdInternal_, _MM_SHUFFLE2(1, 0));
+ triplets2->simdInternal_ = _mm_permute_pd(scalar.simdInternal_, _MM_SHUFFLE2(1, 1));
+}
+
+static inline double
+reduceIncr4ReturnSum(double * m,
+ SimdDouble v0,
+ SimdDouble v1,
+ SimdDouble v2,
+ SimdDouble v3)
+{
+ __m128d t1, t2, t3, t4;
+
+ t1 = _mm_unpacklo_pd(v0.simdInternal_, v1.simdInternal_);
+ t2 = _mm_unpackhi_pd(v0.simdInternal_, v1.simdInternal_);
+ t3 = _mm_unpacklo_pd(v2.simdInternal_, v3.simdInternal_);
+ t4 = _mm_unpackhi_pd(v2.simdInternal_, v3.simdInternal_);
+
+ t1 = _mm_add_pd(t1, t2);
+ t3 = _mm_add_pd(t3, t4);
+
+ assert(std::size_t(m) % 16 == 0);
+
+ t2 = _mm_add_pd(t1, _mm_load_pd(m));
+ t4 = _mm_add_pd(t3, _mm_load_pd(m + 2));
+ _mm_store_pd(m, t2);
+ _mm_store_pd(m + 2, t4);
+
+ t1 = _mm_add_pd(t1, t3);
+
+ t2 = _mm_add_sd(t1, _mm_permute_pd(t1, _MM_SHUFFLE2(1, 1)));
+ return *reinterpret_cast<double *>(&t2);
+}
+
+} // namespace gmx
+
+#endif // GMX_SIMD_IMPL_X86_AVX2_128_UTIL_DOUBLE_H
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+#ifndef GMX_SIMD_IMPL_X86_AVX2_128_UTIL_FLOAT_H
+#define GMX_SIMD_IMPL_X86_AVX2_128_UTIL_FLOAT_H
+
+#include "config.h"
+
+#include <immintrin.h>
+
+#include "gromacs/simd/impl_x86_sse4_1/impl_x86_sse4_1_util_float.h"
+
+namespace gmx
+{
+
+static inline void gmx_simdcall
+expandScalarsToTriplets(SimdFloat scalar,
+ SimdFloat * triplets0,
+ SimdFloat * triplets1,
+ SimdFloat * triplets2)
+{
+ triplets0->simdInternal_ = _mm_permute_ps(scalar.simdInternal_, _MM_SHUFFLE(1, 0, 0, 0));
+ triplets1->simdInternal_ = _mm_permute_ps(scalar.simdInternal_, _MM_SHUFFLE(2, 2, 1, 1));
+ triplets2->simdInternal_ = _mm_permute_ps(scalar.simdInternal_, _MM_SHUFFLE(3, 3, 3, 2));
+}
+
+static inline float gmx_simdcall
+reduceIncr4ReturnSum(float * m,
+ SimdFloat v0,
+ SimdFloat v1,
+ SimdFloat v2,
+ SimdFloat v3)
+{
+ _MM_TRANSPOSE4_PS(v0.simdInternal_, v1.simdInternal_, v2.simdInternal_, v3.simdInternal_);
+ v0.simdInternal_ = _mm_add_ps(v0.simdInternal_, v1.simdInternal_);
+ v2.simdInternal_ = _mm_add_ps(v2.simdInternal_, v3.simdInternal_);
+ v0.simdInternal_ = _mm_add_ps(v0.simdInternal_, v2.simdInternal_);
+
+ assert(std::size_t(m) % 16 == 0);
+
+ v2.simdInternal_ = _mm_add_ps(v0.simdInternal_, _mm_load_ps(m));
+ _mm_store_ps(m, v2.simdInternal_);
+
+ __m128 b = _mm_add_ps(v0.simdInternal_, _mm_permute_ps(v0.simdInternal_, _MM_SHUFFLE(1, 0, 3, 2)));
+ b = _mm_add_ss(b, _mm_permute_ps(b, _MM_SHUFFLE(0, 3, 2, 1)));
+ return *reinterpret_cast<float *>(&b);
+}
+
+} // namespace gmx
+
+#endif // GMX_SIMD_IMPL_X86_AVX2_128_UTIL_FLOAT_H
# include "impl_x86_avx_256/impl_x86_avx_256.h"
#elif GMX_SIMD_X86_AVX2_256
# include "impl_x86_avx2_256/impl_x86_avx2_256.h"
+#elif GMX_SIMD_X86_AVX2_128
+# include "impl_x86_avx2_128/impl_x86_avx2_128.h"
#elif GMX_SIMD_X86_MIC
# include "impl_x86_mic/impl_x86_mic.h"
#elif GMX_SIMD_X86_AVX_512
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2015,2016,2017, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
{ SimdType::X86_Avx128Fma, "AVX_128_FMA" },
{ SimdType::X86_Avx, "AVX_256" },
{ SimdType::X86_Avx2, "AVX2_256" },
+ { SimdType::X86_Avx2_128, "AVX2_128" },
{ SimdType::X86_Avx512, "AVX_512" },
{ SimdType::X86_Avx512Knl, "AVX_512_KNL" },
{ SimdType::X86_Mic, "X86_MIC" },
case CpuInfo::Vendor::Amd:
if (c.feature(CpuInfo::Feature::X86_Avx2))
{
- // When Amd starts supporting Avx2 we assume it will be 256 bits
- suggested = SimdType::X86_Avx2;
+ // AMD Ryzen supports 256-bit AVX2, but performs better with 128-bit
+ // since it can execute two independent such instructions per cycle,
+ // and wider SIMD has slightly lower efficiency in GROMACS.
+ suggested = SimdType::X86_Avx2_128;
}
else if (c.feature(CpuInfo::Feature::X86_Avx))
{
return SimdType::X86_Mic;
#elif GMX_SIMD_X86_AVX2_256
return SimdType::X86_Avx2;
+#elif GMX_SIMD_X86_AVX2_128
+ return SimdType::X86_Avx2_128;
#elif GMX_SIMD_X86_AVX_256
return SimdType::X86_Avx;
#elif GMX_SIMD_X86_AVX_128_FMA
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2015,2016,2017, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
X86_Avx128Fma, //!< 128-bit Avx with FMA (Amd)
X86_Avx, //!< 256-bit Avx
X86_Avx2, //!< AVX2
+ X86_Avx2_128, //!< 128-bit AVX2, better than 256-bit for AMD Ryzen
X86_Avx512, //!< AVX_512
X86_Avx512Knl, //!< AVX_512_KNL
X86_Mic, //!< Knight's corner