From c7600299d3e5f753d807252f8e8dbe7b5d7bce01 Mon Sep 17 00:00:00 2001 From: Erik Lindahl Date: Thu, 21 Aug 2014 08:35:41 +0200 Subject: [PATCH] Added negative zero preprocessor constants We had some discussions already when we started using negative zero that it could be fragile on some compilers, and apparently PGI is one of them. Maybe not the most important target, but it makes sense to have a common constant in one place, and it can also save some cycles in tight loops to clearly separate float from double versions so we avoid extra precision conversions. Change-Id: Id30a536b4f99f0310bfb2ec5185275c466cf5e07 --- src/gromacs/legacyheaders/types/simple.h | 37 +++++++++++++++---- .../simd/impl_intel_mic/impl_intel_mic.h | 8 ++-- .../impl_x86_avx_128_fma.h | 4 +- .../simd/impl_x86_avx_256/impl_x86_avx_256.h | 4 +- .../simd/impl_x86_sse2/impl_x86_sse2.h | 8 ++-- src/gromacs/simd/simd_math.h | 24 ++++++------ src/testutils/tests/testasserts_tests.cpp | 6 +-- 7 files changed, 56 insertions(+), 35 deletions(-) diff --git a/src/gromacs/legacyheaders/types/simple.h b/src/gromacs/legacyheaders/types/simple.h index bf1a5216af..de1ebdcb77 100644 --- a/src/gromacs/legacyheaders/types/simple.h +++ b/src/gromacs/legacyheaders/types/simple.h @@ -109,6 +109,25 @@ typedef int atom_id; /* To indicate an atoms id */ /*! \brief Minimum single precision value */ #define GMX_FLOAT_MIN 1.175494351E-38F +#ifdef __PGI +/* The portland group x86 C/C++ compilers do not treat negative zero initializers + * correctly, but "optimizes" them to positive zero, so we implement it explicitly. + * These constructs are optimized to simple loads at compile time. If you want to + * use them on other compilers those have to support gcc preprocessor extensions. + * Note: These initializers might be sensitive to the endianness (which can + * be different for byte and word order), so check that it works for your platform + * and add a separate section if necessary before adding to the ifdef above. + */ +# define GMX_DOUBLE_NEGZERO ({ const union { int di[2]; double d; } _gmx_dzero = {0, -2147483648}; _gmx_dzero.d; }) +# define GMX_FLOAT_NEGZERO ({ const union { int fi; float f; } _gmx_fzero = {-2147483648}; _gmx_fzero.f; }) +#else +/*! \brief Negative zero in double */ +# define GMX_DOUBLE_NEGZERO (-0.0) + +/*! \brief Negative zero in float */ +# define GMX_FLOAT_NEGZERO (-0.0f) +#endif + /* Check whether we already have a real type! */ #ifdef GMX_DOUBLE @@ -118,10 +137,11 @@ typedef double real; #define HAVE_REAL #endif -#define GMX_MPI_REAL MPI_DOUBLE -#define GMX_REAL_EPS GMX_DOUBLE_EPS -#define GMX_REAL_MIN GMX_DOUBLE_MIN -#define GMX_REAL_MAX GMX_DOUBLE_MAX +#define GMX_MPI_REAL MPI_DOUBLE +#define GMX_REAL_EPS GMX_DOUBLE_EPS +#define GMX_REAL_MIN GMX_DOUBLE_MIN +#define GMX_REAL_MAX GMX_DOUBLE_MAX +#define GMX_REAL_NEGZERO GMX_DOUBLE_NEGZERO #define gmx_real_fullprecision_pfmt "%21.14e" #else @@ -130,10 +150,11 @@ typedef float real; #define HAVE_REAL #endif -#define GMX_MPI_REAL MPI_FLOAT -#define GMX_REAL_EPS GMX_FLOAT_EPS -#define GMX_REAL_MIN GMX_FLOAT_MIN -#define GMX_REAL_MAX GMX_FLOAT_MAX +#define GMX_MPI_REAL MPI_FLOAT +#define GMX_REAL_EPS GMX_FLOAT_EPS +#define GMX_REAL_MIN GMX_FLOAT_MIN +#define GMX_REAL_MAX GMX_FLOAT_MAX +#define GMX_REAL_NEGZERO GMX_FLOAT_NEGZERO #define gmx_real_fullprecision_pfmt "%14.7e" #endif diff --git a/src/gromacs/simd/impl_intel_mic/impl_intel_mic.h b/src/gromacs/simd/impl_intel_mic/impl_intel_mic.h index 8c62c3a43b..e3d49126f0 100644 --- a/src/gromacs/simd/impl_intel_mic/impl_intel_mic.h +++ b/src/gromacs/simd/impl_intel_mic/impl_intel_mic.h @@ -98,7 +98,7 @@ #define gmx_simd_xor_f(a, b) _mm512_castsi512_ps(_mm512_xor_epi32(_mm512_castps_si512(a), _mm512_castps_si512(b))) #define gmx_simd_rsqrt_f _mm512_rsqrt23_ps #define gmx_simd_rcp_f _mm512_rcp23_ps -#define gmx_simd_fabs_f(x) gmx_simd_andnot_f(_mm512_set1_ps(-0.0), x) +#define gmx_simd_fabs_f(x) gmx_simd_andnot_f(_mm512_set1_ps(GMX_FLOAT_NEGZERO), x) #define gmx_simd_fneg_f(x) _mm512_addn_ps(x, _mm512_setzero_ps()) #define gmx_simd_max_f _mm512_gmax_ps #define gmx_simd_min_f _mm512_gmin_ps @@ -189,7 +189,7 @@ #define gmx_simd_xor_d(a, b) _mm512_castsi512_pd(_mm512_xor_epi32(_mm512_castpd_si512(a), _mm512_castpd_si512(b))) #define gmx_simd_rsqrt_d(x) _mm512_cvtpslo_pd(_mm512_rsqrt23_ps(_mm512_cvtpd_pslo(x))) #define gmx_simd_rcp_d(x) _mm512_cvtpslo_pd(_mm512_rcp23_ps(_mm512_cvtpd_pslo(x))) -#define gmx_simd_fabs_d(x) gmx_simd_andnot_d(_mm512_set1_pd(-0.0), x) +#define gmx_simd_fabs_d(x) gmx_simd_andnot_d(_mm512_set1_pd(GMX_DOUBLE_NEGZERO), x) #define gmx_simd_fneg_d(x) _mm512_addn_pd(x, _mm512_setzero_pd()) #define gmx_simd_max_d _mm512_gmax_pd #define gmx_simd_min_d _mm512_gmin_pd @@ -282,7 +282,7 @@ #define gmx_simd4_or_f(a, b) _mm512_castsi512_ps(_mm512_mask_or_epi32(_mm512_undefined_epi32(), gmx_simd4_mask, _mm512_castps_si512(a), _mm512_castps_si512(b))) #define gmx_simd4_xor_f(a, b) _mm512_castsi512_ps(_mm512_mask_xor_epi32(_mm512_undefined_epi32(), gmx_simd4_mask, _mm512_castps_si512(a), _mm512_castps_si512(b))) #define gmx_simd4_rsqrt_f(a) _mm512_mask_rsqrt23_ps(_mm512_undefined_ps(), gmx_simd4_mask, a) -#define gmx_simd4_fabs_f(x) gmx_simd4_andnot_f(_mm512_set1_ps(-0.0), x) +#define gmx_simd4_fabs_f(x) gmx_simd4_andnot_f(_mm512_set1_ps(GMX_FLOAT_NEGZERO), x) #define gmx_simd4_fneg_f(x) _mm512_mask_addn_ps(_mm512_undefined_ps(), gmx_simd4_mask, x, _mm512_setzero_ps()) #define gmx_simd4_max_f(a, b) _mm512_mask_gmax_ps(_mm512_undefined_ps(), gmx_simd4_mask, a, b) #define gmx_simd4_min_f(a, b) _mm512_mask_gmin_ps(_mm512_undefined_ps(), gmx_simd4_mask, a, b) @@ -325,7 +325,7 @@ #define gmx_simd4_or_d(a, b) _mm512_castsi512_pd(_mm512_mask_or_epi32(_mm512_undefined_epi32(), mask_loh, _mm512_castpd_si512(a), _mm512_castpd_si512(b))) #define gmx_simd4_xor_d(a, b) _mm512_castsi512_pd(_mm512_mask_xor_epi32(_mm512_undefined_epi32(), mask_loh, _mm512_castpd_si512(a), _mm512_castpd_si512(b))) #define gmx_simd4_rsqrt_d(a) _mm512_mask_cvtpslo_pd(_mm512_undefined_pd(), gmx_simd4_mask, _mm512_mask_rsqrt23_ps(_mm512_undefined_ps(), gmx_simd4_mask, _mm512_mask_cvtpd_pslo(_mm512_undefined_ps(), gmx_simd4_mask, x))) -#define gmx_simd4_fabs_d(x) gmx_simd4_andnot_d(_mm512_set1_pd(-0.0), x) +#define gmx_simd4_fabs_d(x) gmx_simd4_andnot_d(_mm512_set1_pd(GMX_DOUBLE_NEGZERO), x) #define gmx_simd4_fneg_d(x) _mm512_mask_addn_pd(_mm512_undefined_pd(), gmx_simd4_mask, x, _mm512_setzero_pd()) #define gmx_simd4_max_d(a, b) _mm512_mask_gmax_pd(_mm512_undefined_pd(), gmx_simd4_mask, a, b) #define gmx_simd4_min_d(a, b) _mm512_mask_gmin_pd(_mm512_undefined_pd(), gmx_simd4_mask, a, b) diff --git a/src/gromacs/simd/impl_x86_avx_128_fma/impl_x86_avx_128_fma.h b/src/gromacs/simd/impl_x86_avx_128_fma/impl_x86_avx_128_fma.h index 823bfe81b9..5b3096bee3 100644 --- a/src/gromacs/simd/impl_x86_avx_128_fma/impl_x86_avx_128_fma.h +++ b/src/gromacs/simd/impl_x86_avx_128_fma/impl_x86_avx_128_fma.h @@ -104,8 +104,8 @@ #define gmx_simd4_or_d _mm256_or_pd #define gmx_simd4_xor_d _mm256_xor_pd #define gmx_simd4_rsqrt_d(x) _mm256_cvtps_pd(_mm_rsqrt_ps(_mm256_cvtpd_ps(x))) -#define gmx_simd4_fabs_d(x) _mm256_andnot_pd(_mm256_set1_pd(-0.0), x) -#define gmx_simd4_fneg_d(x) _mm256_xor_pd(x, _mm256_set1_pd(-0.0)) +#define gmx_simd4_fabs_d(x) _mm256_andnot_pd(_mm256_set1_pd(GMX_DOUBLE_NEGZERO), x) +#define gmx_simd4_fneg_d(x) _mm256_xor_pd(x, _mm256_set1_pd(GMX_DOUBLE_NEGZERO)) #define gmx_simd4_max_d _mm256_max_pd #define gmx_simd4_min_d _mm256_min_pd #define gmx_simd4_round_d(x) _mm256_round_pd(x, _MM_FROUND_NINT) diff --git a/src/gromacs/simd/impl_x86_avx_256/impl_x86_avx_256.h b/src/gromacs/simd/impl_x86_avx_256/impl_x86_avx_256.h index d787708b4c..cff1a7e2cb 100644 --- a/src/gromacs/simd/impl_x86_avx_256/impl_x86_avx_256.h +++ b/src/gromacs/simd/impl_x86_avx_256/impl_x86_avx_256.h @@ -105,8 +105,8 @@ #define gmx_simd_xor_f _mm256_xor_ps #define gmx_simd_rsqrt_f _mm256_rsqrt_ps #define gmx_simd_rcp_f _mm256_rcp_ps -#define gmx_simd_fabs_f(x) _mm256_andnot_ps(_mm256_set1_ps(-0.0), x) -#define gmx_simd_fneg_f(x) _mm256_xor_ps(x, _mm256_set1_ps(-0.0)) +#define gmx_simd_fabs_f(x) _mm256_andnot_ps(_mm256_set1_ps(GMX_FLOAT_NEGZERO), x) +#define gmx_simd_fneg_f(x) _mm256_xor_ps(x, _mm256_set1_ps(GMX_FLOAT_NEGZERO)) #define gmx_simd_max_f _mm256_max_ps #define gmx_simd_min_f _mm256_min_ps #define gmx_simd_round_f(x) _mm256_round_ps(x, _MM_FROUND_NINT) diff --git a/src/gromacs/simd/impl_x86_sse2/impl_x86_sse2.h b/src/gromacs/simd/impl_x86_sse2/impl_x86_sse2.h index 5d6b195965..7aab27ae21 100644 --- a/src/gromacs/simd/impl_x86_sse2/impl_x86_sse2.h +++ b/src/gromacs/simd/impl_x86_sse2/impl_x86_sse2.h @@ -99,8 +99,8 @@ #define gmx_simd_xor_f _mm_xor_ps #define gmx_simd_rsqrt_f _mm_rsqrt_ps #define gmx_simd_rcp_f _mm_rcp_ps -#define gmx_simd_fabs_f(x) _mm_andnot_ps(_mm_set1_ps(-0.0), x) -#define gmx_simd_fneg_f(x) _mm_xor_ps(x, _mm_set1_ps(-0.0)) +#define gmx_simd_fabs_f(x) _mm_andnot_ps(_mm_set1_ps(GMX_FLOAT_NEGZERO), x) +#define gmx_simd_fneg_f(x) _mm_xor_ps(x, _mm_set1_ps(GMX_FLOAT_NEGZERO)) #define gmx_simd_max_f _mm_max_ps #define gmx_simd_min_f _mm_min_ps #define gmx_simd_round_f(x) _mm_cvtepi32_ps(_mm_cvtps_epi32(x)) @@ -183,8 +183,8 @@ #define gmx_simd_rsqrt_d(x) _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(x))) /* Don't use FMA for sqrt N-R iterations - this saves 1 instruction without FMA hardware */ #define gmx_simd_rcp_d(x) _mm_cvtps_pd(_mm_rcp_ps(_mm_cvtpd_ps(x))) -#define gmx_simd_fabs_d(x) _mm_andnot_pd(_mm_set1_pd(-0.0), x) -#define gmx_simd_fneg_d(x) _mm_xor_pd(x, _mm_set1_pd(-0.0)) +#define gmx_simd_fabs_d(x) _mm_andnot_pd(_mm_set1_pd(GMX_DOUBLE_NEGZERO), x) +#define gmx_simd_fneg_d(x) _mm_xor_pd(x, _mm_set1_pd(GMX_DOUBLE_NEGZERO)) #define gmx_simd_max_d _mm_max_pd #define gmx_simd_min_d _mm_min_pd #define gmx_simd_round_d(x) _mm_cvtepi32_pd(_mm_cvtpd_epi32(x)) diff --git a/src/gromacs/simd/simd_math.h b/src/gromacs/simd/simd_math.h index 1f6dc08361..68ab6774ef 100644 --- a/src/gromacs/simd/simd_math.h +++ b/src/gromacs/simd/simd_math.h @@ -124,7 +124,7 @@ static gmx_inline gmx_simd_float_t gmx_simdcall gmx_simd_xor_sign_f(gmx_simd_float_t a, gmx_simd_float_t b) { #ifdef GMX_SIMD_HAVE_LOGICAL - return gmx_simd_xor_f(a, gmx_simd_and_f(gmx_simd_set1_f(-0.0), b)); + return gmx_simd_xor_f(a, gmx_simd_and_f(gmx_simd_set1_f(GMX_FLOAT_NEGZERO), b)); #else return gmx_simd_blendv_f(a, gmx_simd_fneg_f(a), gmx_simd_cmplt_f(b, gmx_simd_setzero_f())); #endif @@ -729,8 +729,8 @@ gmx_simd_sincos_f(gmx_simd_float_t x, gmx_simd_float_t *sinval, gmx_simd_float_t y = gmx_simd_round_f(z); mask = gmx_simd_cvt_fib2fb(gmx_simd_cmpeq_fi(gmx_simd_and_fi(iy, ione), gmx_simd_setzero_fi())); - ssign = gmx_simd_blendzero_f(gmx_simd_set1_f(-0.0f), gmx_simd_cvt_fib2fb(gmx_simd_cmpeq_fi(gmx_simd_and_fi(iy, itwo), itwo))); - csign = gmx_simd_blendzero_f(gmx_simd_set1_f(-0.0f), gmx_simd_cvt_fib2fb(gmx_simd_cmpeq_fi(gmx_simd_and_fi(gmx_simd_add_fi(iy, ione), itwo), itwo))); + ssign = gmx_simd_blendzero_f(gmx_simd_set1_f(GMX_FLOAT_NEGZERO), gmx_simd_cvt_fib2fb(gmx_simd_cmpeq_fi(gmx_simd_and_fi(iy, itwo), itwo))); + csign = gmx_simd_blendzero_f(gmx_simd_set1_f(GMX_FLOAT_NEGZERO), gmx_simd_cvt_fib2fb(gmx_simd_cmpeq_fi(gmx_simd_and_fi(gmx_simd_add_fi(iy, ione), itwo), itwo))); #else const gmx_simd_float_t quarter = gmx_simd_set1_f(0.25f); const gmx_simd_float_t minusquarter = gmx_simd_set1_f(-0.25f); @@ -766,8 +766,8 @@ gmx_simd_sincos_f(gmx_simd_float_t x, gmx_simd_float_t *sinval, gmx_simd_float_t * active or inactive - you will get errors if only one is used. */ # ifdef GMX_SIMD_HAVE_LOGICAL - ssign = gmx_simd_and_f(ssign, gmx_simd_set1_f(-0.0f)); - csign = gmx_simd_andnot_f(q, gmx_simd_set1_f(-0.0f)); + ssign = gmx_simd_and_f(ssign, gmx_simd_set1_f(GMX_FLOAT_NEGZERO)); + csign = gmx_simd_andnot_f(q, gmx_simd_set1_f(GMX_FLOAT_NEGZERO)); ssign = gmx_simd_xor_f(ssign, csign); # else csign = gmx_simd_xor_sign_f(gmx_simd_set1_f(-1.0f), q); @@ -884,7 +884,7 @@ gmx_simd_tan_f(gmx_simd_float_t x) x = gmx_simd_fnmadd_f(y, argred1, x); x = gmx_simd_fnmadd_f(y, argred2, x); x = gmx_simd_fnmadd_f(y, argred3, x); - x = gmx_simd_xor_f(gmx_simd_blendzero_f(gmx_simd_set1_f(-0.0f), mask), x); + x = gmx_simd_xor_f(gmx_simd_blendzero_f(gmx_simd_set1_f(GMX_FLOAT_NEGZERO), mask), x); #else const gmx_simd_float_t quarter = gmx_simd_set1_f(0.25f); const gmx_simd_float_t half = gmx_simd_set1_f(0.5f); @@ -1338,7 +1338,7 @@ static gmx_inline gmx_simd_double_t gmx_simdcall gmx_simd_xor_sign_d(gmx_simd_double_t a, gmx_simd_double_t b) { #ifdef GMX_SIMD_HAVE_LOGICAL - return gmx_simd_xor_d(a, gmx_simd_and_d(gmx_simd_set1_d(-0.0), b)); + return gmx_simd_xor_d(a, gmx_simd_and_d(gmx_simd_set1_d(GMX_DOUBLE_NEGZERO), b)); #else return gmx_simd_blendv_d(a, gmx_simd_fneg_d(a), gmx_simd_cmplt_d(b, gmx_simd_setzero_d())); #endif @@ -2025,8 +2025,8 @@ gmx_simd_sincos_d(gmx_simd_double_t x, gmx_simd_double_t *sinval, gmx_simd_doubl y = gmx_simd_round_d(z); mask = gmx_simd_cvt_dib2db(gmx_simd_cmpeq_di(gmx_simd_and_di(iy, ione), gmx_simd_setzero_di())); - ssign = gmx_simd_blendzero_d(gmx_simd_set1_d(-0.0), gmx_simd_cvt_dib2db(gmx_simd_cmpeq_di(gmx_simd_and_di(iy, itwo), itwo))); - csign = gmx_simd_blendzero_d(gmx_simd_set1_d(-0.0), gmx_simd_cvt_dib2db(gmx_simd_cmpeq_di(gmx_simd_and_di(gmx_simd_add_di(iy, ione), itwo), itwo))); + ssign = gmx_simd_blendzero_d(gmx_simd_set1_d(GMX_DOUBLE_NEGZERO), gmx_simd_cvt_dib2db(gmx_simd_cmpeq_di(gmx_simd_and_di(iy, itwo), itwo))); + csign = gmx_simd_blendzero_d(gmx_simd_set1_d(GMX_DOUBLE_NEGZERO), gmx_simd_cvt_dib2db(gmx_simd_cmpeq_di(gmx_simd_and_di(gmx_simd_add_di(iy, ione), itwo), itwo))); #else const gmx_simd_double_t quarter = gmx_simd_set1_d(0.25); const gmx_simd_double_t minusquarter = gmx_simd_set1_d(-0.25); @@ -2062,8 +2062,8 @@ gmx_simd_sincos_d(gmx_simd_double_t x, gmx_simd_double_t *sinval, gmx_simd_doubl * active or inactive - you will get errors if only one is used. */ # ifdef GMX_SIMD_HAVE_LOGICAL - ssign = gmx_simd_and_d(ssign, gmx_simd_set1_d(-0.0)); - csign = gmx_simd_andnot_d(q, gmx_simd_set1_d(-0.0)); + ssign = gmx_simd_and_d(ssign, gmx_simd_set1_d(GMX_DOUBLE_NEGZERO)); + csign = gmx_simd_andnot_d(q, gmx_simd_set1_d(GMX_DOUBLE_NEGZERO)); ssign = gmx_simd_xor_d(ssign, csign); # else csign = gmx_simd_xor_sign_d(gmx_simd_set1_d(-1.0), q); @@ -2179,7 +2179,7 @@ gmx_simd_tan_d(gmx_simd_double_t x) x = gmx_simd_fnmadd_d(y, argred1, x); x = gmx_simd_fnmadd_d(y, argred2, x); x = gmx_simd_fnmadd_d(y, argred3, x); - x = gmx_simd_xor_d(gmx_simd_blendzero_d(gmx_simd_set1_d(-0.0), mask), x); + x = gmx_simd_xor_d(gmx_simd_blendzero_d(gmx_simd_set1_d(GMX_DOUBLE_NEGZERO), mask), x); #else const gmx_simd_double_t quarter = gmx_simd_set1_d(0.25); const gmx_simd_double_t half = gmx_simd_set1_d(0.5); diff --git a/src/testutils/tests/testasserts_tests.cpp b/src/testutils/tests/testasserts_tests.cpp index 111bbc5e62..c586374863 100644 --- a/src/testutils/tests/testasserts_tests.cpp +++ b/src/testutils/tests/testasserts_tests.cpp @@ -75,7 +75,7 @@ TEST(FloatingPointDifferenceTest, HandlesEqualValues) TEST(FloatingPointDifferenceTest, HandlesZerosOfDifferentSign) { - FloatingPointDifference diff(0.0, -0.0); + FloatingPointDifference diff(0.0, GMX_DOUBLE_NEGZERO); EXPECT_FALSE(diff.isNaN()); EXPECT_EQ(0.0, diff.asAbsolute()); EXPECT_EQ(0U, diff.asUlps()); @@ -91,7 +91,7 @@ TEST(FloatingPointDifferenceTest, HandlesSignComparisonWithZero) EXPECT_TRUE(diff.signsDiffer()); } { - FloatingPointDifference diff(-0.0, -1.2); + FloatingPointDifference diff(GMX_DOUBLE_NEGZERO, -1.2); EXPECT_FALSE(diff.isNaN()); EXPECT_DOUBLE_EQ(1.2, diff.asAbsolute()); EXPECT_FALSE(diff.signsDiffer()); @@ -120,7 +120,7 @@ TEST(FloatingPointDifferenceTest, HandlesUlpDifferences) TEST(FloatingPointDifferenceTest, HandlesUlpDifferenceAcrossZero) { - const double first = addUlps(-0.0, 2); + const double first = addUlps(GMX_DOUBLE_NEGZERO, 2); const double second = addUlps( 0.0, 2); FloatingPointDifference diff(first, second); EXPECT_FALSE(diff.isNaN()); -- 2.22.0