From b94ae4256fda537ef7b7d7af7e249b4d6416301a Mon Sep 17 00:00:00 2001 From: Berk Hess Date: Thu, 9 Jan 2014 20:54:28 +0100 Subject: [PATCH] Made reference SIMD work again The nbnxn reference SIMD code broke during reorganization. Added GMX_SIMD_HAVE_ERFC macro. Change-Id: I601dccab52ea2a7dc1341ce984c514679bdbfee1 --- .../nbnxn_kernels/nbnxn_kernel_simd_utils.h | 4 ++-- .../nbnxn_kernel_simd_utils_ref.h | 18 ++++++++++++------ .../simd_4xn/nbnxn_kernel_simd_4xn_common.h | 2 +- src/gromacs/mdlib/pme.c | 10 +++++----- src/gromacs/simd/general_x86_mic.h | 4 +++- src/gromacs/simd/macros.h | 7 +++++-- 6 files changed, 28 insertions(+), 17 deletions(-) diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils.h b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils.h index 8ba1726f2e..75c1313f4f 100644 --- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils.h +++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -58,7 +58,7 @@ /* Align a stack-based thread-local working array. */ static gmx_inline int * -prepare_table_load_buffer(const int *array) +prepare_table_load_buffer(const int gmx_unused *array) { return NULL; } diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils_ref.h b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils_ref.h index 642a2a49a2..e757a8d561 100644 --- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils_ref.h +++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils_ref.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -96,10 +96,13 @@ gmx_add_pr4(gmx_mm_pr4 a, gmx_mm_pr4 b) return c; } - #else -typedef gmx_simd_ref_pr gmx_simd_ref_pr4; +typedef gmx_simd_ref_pr gmx_mm_pr4; + +#define gmx_load_pr4 gmx_load_pr +#define gmx_store_pr4 gmx_store_pr +#define gmx_add_pr4 gmx_add_pr #endif @@ -225,6 +228,7 @@ gmx_sum4_hpr(gmx_simd_ref_pr a, gmx_simd_ref_pr b) return c; } +#ifdef GMX_NBNXN_SIMD_2XNN /* Sum the elements of halfs of each input register and store sums in out */ static gmx_inline gmx_mm_pr4 gmx_mm_transpose_sum4h_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b) @@ -247,6 +251,7 @@ gmx_mm_transpose_sum4h_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b) return sum; } +#endif static gmx_inline void gmx_pr_to_2hpr(gmx_simd_ref_pr a, gmx_mm_hpr *b, gmx_mm_hpr *c) @@ -276,7 +281,8 @@ gmx_2hpr_to_pr(gmx_mm_hpr a, gmx_mm_hpr b, gmx_simd_ref_pr *c) #ifndef TAB_FDV0 static gmx_inline void -load_table_f(const real *tab_coul_F, gmx_simd_ref_epi32 ti_S, int *ti, +load_table_f(const real *tab_coul_F, gmx_simd_ref_epi32 ti_S, + int gmx_unused *ti, gmx_simd_ref_pr *ctab0_S, gmx_simd_ref_pr *ctab1_S) { int i; @@ -397,8 +403,8 @@ static gmx_inline void gmx_mm_invsqrt2_pd(gmx_simd_ref_pr in0, gmx_simd_ref_pr in1, gmx_simd_ref_pr *out0, gmx_simd_ref_pr *out1) { - out0 = gmx_invsqrt_pr(in0); - out1 = gmx_invsqrt_pr(in1); + *out0 = gmx_invsqrt_pr(in0); + *out1 = gmx_invsqrt_pr(in1); } #endif diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h index 4208713676..57b0e46099 100644 --- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h +++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h @@ -68,7 +68,7 @@ gmx_load_simd_4xn_interactions(int excl, gmx_mm_pb *interact_S2, gmx_mm_pb *interact_S3) { -#ifdef GMX_X86_SSE2 +#if defined GMX_X86_SSE2 || defined GMX_SIMD_REFERENCE_PLAIN_C /* Load integer interaction mask */ gmx_exclfilter mask_pr_S = gmx_load1_exclfilter(excl); *interact_S0 = gmx_checkbitmask_pb(mask_pr_S, filter_S0); diff --git a/src/gromacs/mdlib/pme.c b/src/gromacs/mdlib/pme.c index ad2ae70f42..ff52b5b9fe 100644 --- a/src/gromacs/mdlib/pme.c +++ b/src/gromacs/mdlib/pme.c @@ -88,9 +88,9 @@ /* Include the SIMD macro file and then check for support */ #include "gromacs/simd/macros.h" -#if defined GMX_HAVE_SIMD_MACROS && defined GMX_SIMD_HAVE_EXP +#if defined GMX_HAVE_SIMD_MACROS /* Turn on arbitrary width SIMD intrinsics for PME solve */ -#define PME_SIMD +#define PME_SIMD_SOLVE #endif #define PME_GRID_QA 0 /* Gridindex for A-state for Q */ @@ -1876,7 +1876,7 @@ static void realloc_work(pme_work_t *work, int nkx) /* Allocate an aligned pointer for SIMD operations, including extra * elements at the end for padding. */ -#ifdef PME_SIMD +#ifdef PME_SIMD_SOLVE simd_width = GMX_SIMD_WIDTH_HERE; #else /* We can use any alignment, apart from 0, so we use 4 */ @@ -1909,7 +1909,7 @@ static void free_work(pme_work_t *work) } -#ifdef PME_SIMD +#if defined PME_SIMD_SOLVE && defined GMX_SIMD_HAVE_EXP /* Calculate exponentials through SIMD */ inline static void calc_exponentials_q(int gmx_unused start, int end, real f, real *d_aligned, real *r_aligned, real *e_aligned) { @@ -1954,7 +1954,7 @@ inline static void calc_exponentials_q(int start, int end, real f, real *d, real } #endif -#ifdef PME_SIMD +#if defined PME_SIMD_SOLVE && defined GMX_SIMD_HAVE_ERFC /* Calculate exponentials through SIMD */ inline static void calc_exponentials_lj(int gmx_unused start, int end, real *r_aligned, real *factor_aligned, real *d_aligned) { diff --git a/src/gromacs/simd/general_x86_mic.h b/src/gromacs/simd/general_x86_mic.h index d354e09871..b41b42e09c 100644 --- a/src/gromacs/simd/general_x86_mic.h +++ b/src/gromacs/simd/general_x86_mic.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2013, by the GROMACS development team, led by + * Copyright (c) 2013,2014, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -137,6 +137,8 @@ gmx_cvttpr_epi32(gmx_mm_ps a) #define GMX_SIMD_HAVE_EXP #define gmx_exp_pr _mm512_exp_ps + +#define GMX_SIMD_HAVE_ERFC #define gmx_erfc_pr _mm512_erfc_ps #define GMX_SIMD_HAVE_TRIGONOMETRIC diff --git a/src/gromacs/simd/macros.h b/src/gromacs/simd/macros.h index ca1e499a61..d95b30923c 100644 --- a/src/gromacs/simd/macros.h +++ b/src/gromacs/simd/macros.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -54,7 +54,8 @@ #define GMX_HAVE_SIMD_MACROS /* In general the reference SIMD supports any SIMD width, including 1. - * See types/nb_verlet.h for details + * But the nbnxn SIMD 4xN kernels only support 2, 4, 8 and 2xNN only 8, 16, + * see types/nb_verlet.h for details */ #define GMX_SIMD_REF_WIDTH 4 @@ -212,6 +213,7 @@ /* exp and trigonometric functions are included above */ #define GMX_SIMD_HAVE_EXP +#define GMX_SIMD_HAVE_ERFC #define GMX_SIMD_HAVE_TRIGONOMETRIC #if !defined GMX_X86_AVX_256 || defined GMX_USE_HALF_WIDTH_SIMD_HERE @@ -755,6 +757,7 @@ static gmx_inline gmx_mm_pr gmx_always_inline gmx_atan2_pr(gmx_mm_pr a, gmx_mm_p #endif } +#define GMX_SIMD_HAVE_ERFC static gmx_inline gmx_mm_pr gmx_always_inline gmx_erfc_pr(gmx_mm_pr a) { /* The BG/Q qpxmath.h vector math library intended for use with -- 2.22.0