From 99a1e1f7ada3e6718875e39eb789f24172b880ae Mon Sep 17 00:00:00 2001 From: Roland Schulz Date: Fri, 29 May 2015 16:06:43 -0400 Subject: [PATCH] Use stack buffer for LINCS&bonded gather/scatter Fixes #1726 Change-Id: Idd57adc02c8fc05529a50e75f3017475b476a90f --- src/gromacs/listed-forces/bonded.cpp | 10 +++++++-- src/gromacs/mdlib/clincs.cpp | 23 +++++++++++++------ src/gromacs/utility/basedefinitions.h | 32 ++++++++++++++++++++++++++- 3 files changed, 55 insertions(+), 10 deletions(-) diff --git a/src/gromacs/listed-forces/bonded.cpp b/src/gromacs/listed-forces/bonded.cpp index 2e2c859892..070547a203 100644 --- a/src/gromacs/listed-forces/bonded.cpp +++ b/src/gromacs/listed-forces/bonded.cpp @@ -177,7 +177,7 @@ gmx_hack_simd4_transpose_to_simd_r(const gmx_simd4_float_t *a, * \param[in] v Array of rvecs * \param[in] index0 Index into the vector array * \param[in] index1 Index into the vector array - * \param[in,out] buf_aligned Aligned tmp buffer of size 3*GMX_SIMD_REAL_WIDTH + * \param[in,out] buf Aligned tmp buffer of size 3*GMX_SIMD_REAL_WIDTH * \param[out] dx SIMD register with x difference * \param[out] dy SIMD register with y difference * \param[out] dz SIMD register with z difference @@ -186,7 +186,7 @@ static gmx_inline void gmx_simdcall gmx_hack_simd_gather_rvec_dist_two_index(const rvec *v, const int *index0, const int *index1, - real gmx_unused *buf_aligned, + real gmx_unused *buf, gmx_simd_real_t *dx, gmx_simd_real_t *dy, gmx_simd_real_t *dz) @@ -204,6 +204,12 @@ gmx_hack_simd_gather_rvec_dist_two_index(const rvec *v, } gmx_hack_simd4_transpose_to_simd_r(d, dx, dy, dz, &tmp); #else /* generic SIMD */ +#if GMX_ALIGNMENT + GMX_ALIGNED(real, GMX_SIMD_REAL_WIDTH) buf_aligned[3*GMX_SIMD_REAL_WIDTH]; +#else + real* buf_aligned = buf; +#endif + int i, m; for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++) diff --git a/src/gromacs/mdlib/clincs.cpp b/src/gromacs/mdlib/clincs.cpp index a1b9ed893c..7be3cef649 100644 --- a/src/gromacs/mdlib/clincs.cpp +++ b/src/gromacs/mdlib/clincs.cpp @@ -206,9 +206,6 @@ gmx_hack_simd_transpose_to_simd4_r(gmx_simd_float_t row0, #endif /* AVX */ - - - #ifdef GMX_SIMD_HAVE_REAL /*! \brief Store differences between indexed rvecs in SIMD registers. * @@ -217,7 +214,7 @@ gmx_hack_simd_transpose_to_simd4_r(gmx_simd_float_t row0, * * \param[in] v Array of rvecs * \param[in] pair_index Index pairs for GMX_SIMD_REAL_WIDTH vector pairs - * \param[in,out] buf_aligned Aligned tmp buffer of size 3*GMX_SIMD_REAL_WIDTH + * \param[in,out] buf Aligned tmp buffer of size 3*GMX_SIMD_REAL_WIDTH * \param[out] dx SIMD register with x difference * \param[out] dy SIMD register with y difference * \param[out] dz SIMD register with z difference @@ -225,7 +222,7 @@ gmx_hack_simd_transpose_to_simd4_r(gmx_simd_float_t row0, static gmx_inline void gmx_simdcall gmx_hack_simd_gather_rvec_dist_pair_index(const rvec *v, const int *pair_index, - real gmx_unused *buf_aligned, + real gmx_unused *buf, gmx_simd_real_t *dx, gmx_simd_real_t *dy, gmx_simd_real_t *dz) @@ -243,6 +240,12 @@ gmx_hack_simd_gather_rvec_dist_pair_index(const rvec *v, gmx_hack_simd4_transpose_to_simd_r(d, dx, dy, dz, &tmp); #else +#if GMX_ALIGNMENT + GMX_ALIGNED(real, GMX_SIMD_REAL_WIDTH) buf_aligned[3*GMX_SIMD_REAL_WIDTH]; +#else + real* buf_aligned = buf; +#endif + int i, m; for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++) @@ -266,14 +269,14 @@ gmx_hack_simd_gather_rvec_dist_pair_index(const rvec *v, * \param[in] x SIMD register with x-components of the vectors * \param[in] y SIMD register with y-components of the vectors * \param[in] z SIMD register with z-components of the vectors - * \param[in,out] buf_aligned Aligned tmp buffer of size 3*GMX_SIMD_REAL_WIDTH + * \param[in,out] buf Aligned tmp buffer of size 3*GMX_SIMD_REAL_WIDTH * \param[out] v Array of GMX_SIMD_REAL_WIDTH rvecs */ static gmx_inline void gmx_simdcall gmx_simd_store_vec_to_rvec(gmx_simd_real_t x, gmx_simd_real_t y, gmx_simd_real_t z, - real gmx_unused *buf_aligned, + real gmx_unused *buf, rvec *v) { #if defined(GMX_SIMD_X86_AVX_256) || defined(GMX_SIMD_X86_AVX2_256) @@ -288,6 +291,12 @@ gmx_simd_store_vec_to_rvec(gmx_simd_real_t x, gmx_hack_simd4_store3_r(v[i], s4[i]); } #else +#if GMX_ALIGNMENT + GMX_ALIGNED(real, GMX_SIMD_REAL_WIDTH) buf_aligned[3*GMX_SIMD_REAL_WIDTH]; +#else + real* buf_aligned = buf; +#endif + int i, m; gmx_simd_store_r(buf_aligned + 0*GMX_SIMD_REAL_WIDTH, x); diff --git a/src/gromacs/utility/basedefinitions.h b/src/gromacs/utility/basedefinitions.h index be20290f68..4e5dc6d3e9 100644 --- a/src/gromacs/utility/basedefinitions.h +++ b/src/gromacs/utility/basedefinitions.h @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -222,6 +222,36 @@ typedef uint64_t gmx_uint64_t; #endif #endif +/*! \def GMX_ALIGNMENT + * \brief + * Supports aligned variables */ + +/*! \def GMX_ALIGNED(type, alignment) + * \brief + * Declare variable with data alignment + * + * \param[in] type Type of variable + * \param[in] alignment Alignment in multiples of type + * + * Typical usage: + * \code + GMX_ALIGNED(real, GMX_SIMD_REAL_WIDTH) buf[...]; + \endcode + */ +/* alignas(x) is not used even with GMX-CXX11 because it isn't in the list of + tested features and thus might not be supported. + MSVC2010 has align but doesn't support sizeof inside. */ +#if defined(_MSC_VER) && (_MSC_VER >= 1700 || defined(__ICL)) +# define GMX_ALIGNMENT 1 +# define GMX_ALIGNED(type, alignment) __declspec(align(alignment*sizeof(type))) type +#elif defined(__GNUC__) || defined(__clang__) +# define GMX_ALIGNMENT 1 +# define GMX_ALIGNED(type, alignment) __attribute__ ((__aligned__(alignment*sizeof(type)))) type +#else +# define GMX_ALIGNMENT 0 +# define GMX_ALIGNED(type, alignment) +#endif + /*! \brief * Macro to explicitly ignore an unused value. * -- 2.22.0