/*! \brief Minimum single precision value */
#define GMX_FLOAT_MIN 1.175494351E-38F
+#ifdef __PGI
+/* The portland group x86 C/C++ compilers do not treat negative zero initializers
+ * correctly, but "optimizes" them to positive zero, so we implement it explicitly.
+ * These constructs are optimized to simple loads at compile time. If you want to
+ * use them on other compilers those have to support gcc preprocessor extensions.
+ * Note: These initializers might be sensitive to the endianness (which can
+ * be different for byte and word order), so check that it works for your platform
+ * and add a separate section if necessary before adding to the ifdef above.
+ */
+# define GMX_DOUBLE_NEGZERO ({ const union { int di[2]; double d; } _gmx_dzero = {0, -2147483648}; _gmx_dzero.d; })
+# define GMX_FLOAT_NEGZERO ({ const union { int fi; float f; } _gmx_fzero = {-2147483648}; _gmx_fzero.f; })
+#else
+/*! \brief Negative zero in double */
+# define GMX_DOUBLE_NEGZERO (-0.0)
+
+/*! \brief Negative zero in float */
+# define GMX_FLOAT_NEGZERO (-0.0f)
+#endif
+
/* Check whether we already have a real type! */
#ifdef GMX_DOUBLE
#define HAVE_REAL
#endif
-#define GMX_MPI_REAL MPI_DOUBLE
-#define GMX_REAL_EPS GMX_DOUBLE_EPS
-#define GMX_REAL_MIN GMX_DOUBLE_MIN
-#define GMX_REAL_MAX GMX_DOUBLE_MAX
+#define GMX_MPI_REAL MPI_DOUBLE
+#define GMX_REAL_EPS GMX_DOUBLE_EPS
+#define GMX_REAL_MIN GMX_DOUBLE_MIN
+#define GMX_REAL_MAX GMX_DOUBLE_MAX
+#define GMX_REAL_NEGZERO GMX_DOUBLE_NEGZERO
#define gmx_real_fullprecision_pfmt "%21.14e"
#else
#define HAVE_REAL
#endif
-#define GMX_MPI_REAL MPI_FLOAT
-#define GMX_REAL_EPS GMX_FLOAT_EPS
-#define GMX_REAL_MIN GMX_FLOAT_MIN
-#define GMX_REAL_MAX GMX_FLOAT_MAX
+#define GMX_MPI_REAL MPI_FLOAT
+#define GMX_REAL_EPS GMX_FLOAT_EPS
+#define GMX_REAL_MIN GMX_FLOAT_MIN
+#define GMX_REAL_MAX GMX_FLOAT_MAX
+#define GMX_REAL_NEGZERO GMX_FLOAT_NEGZERO
#define gmx_real_fullprecision_pfmt "%14.7e"
#endif
#define gmx_simd_xor_f(a, b) _mm512_castsi512_ps(_mm512_xor_epi32(_mm512_castps_si512(a), _mm512_castps_si512(b)))
#define gmx_simd_rsqrt_f _mm512_rsqrt23_ps
#define gmx_simd_rcp_f _mm512_rcp23_ps
-#define gmx_simd_fabs_f(x) gmx_simd_andnot_f(_mm512_set1_ps(-0.0), x)
+#define gmx_simd_fabs_f(x) gmx_simd_andnot_f(_mm512_set1_ps(GMX_FLOAT_NEGZERO), x)
#define gmx_simd_fneg_f(x) _mm512_addn_ps(x, _mm512_setzero_ps())
#define gmx_simd_max_f _mm512_gmax_ps
#define gmx_simd_min_f _mm512_gmin_ps
#define gmx_simd_xor_d(a, b) _mm512_castsi512_pd(_mm512_xor_epi32(_mm512_castpd_si512(a), _mm512_castpd_si512(b)))
#define gmx_simd_rsqrt_d(x) _mm512_cvtpslo_pd(_mm512_rsqrt23_ps(_mm512_cvtpd_pslo(x)))
#define gmx_simd_rcp_d(x) _mm512_cvtpslo_pd(_mm512_rcp23_ps(_mm512_cvtpd_pslo(x)))
-#define gmx_simd_fabs_d(x) gmx_simd_andnot_d(_mm512_set1_pd(-0.0), x)
+#define gmx_simd_fabs_d(x) gmx_simd_andnot_d(_mm512_set1_pd(GMX_DOUBLE_NEGZERO), x)
#define gmx_simd_fneg_d(x) _mm512_addn_pd(x, _mm512_setzero_pd())
#define gmx_simd_max_d _mm512_gmax_pd
#define gmx_simd_min_d _mm512_gmin_pd
#define gmx_simd4_or_f(a, b) _mm512_castsi512_ps(_mm512_mask_or_epi32(_mm512_undefined_epi32(), gmx_simd4_mask, _mm512_castps_si512(a), _mm512_castps_si512(b)))
#define gmx_simd4_xor_f(a, b) _mm512_castsi512_ps(_mm512_mask_xor_epi32(_mm512_undefined_epi32(), gmx_simd4_mask, _mm512_castps_si512(a), _mm512_castps_si512(b)))
#define gmx_simd4_rsqrt_f(a) _mm512_mask_rsqrt23_ps(_mm512_undefined_ps(), gmx_simd4_mask, a)
-#define gmx_simd4_fabs_f(x) gmx_simd4_andnot_f(_mm512_set1_ps(-0.0), x)
+#define gmx_simd4_fabs_f(x) gmx_simd4_andnot_f(_mm512_set1_ps(GMX_FLOAT_NEGZERO), x)
#define gmx_simd4_fneg_f(x) _mm512_mask_addn_ps(_mm512_undefined_ps(), gmx_simd4_mask, x, _mm512_setzero_ps())
#define gmx_simd4_max_f(a, b) _mm512_mask_gmax_ps(_mm512_undefined_ps(), gmx_simd4_mask, a, b)
#define gmx_simd4_min_f(a, b) _mm512_mask_gmin_ps(_mm512_undefined_ps(), gmx_simd4_mask, a, b)
#define gmx_simd4_or_d(a, b) _mm512_castsi512_pd(_mm512_mask_or_epi32(_mm512_undefined_epi32(), mask_loh, _mm512_castpd_si512(a), _mm512_castpd_si512(b)))
#define gmx_simd4_xor_d(a, b) _mm512_castsi512_pd(_mm512_mask_xor_epi32(_mm512_undefined_epi32(), mask_loh, _mm512_castpd_si512(a), _mm512_castpd_si512(b)))
#define gmx_simd4_rsqrt_d(a) _mm512_mask_cvtpslo_pd(_mm512_undefined_pd(), gmx_simd4_mask, _mm512_mask_rsqrt23_ps(_mm512_undefined_ps(), gmx_simd4_mask, _mm512_mask_cvtpd_pslo(_mm512_undefined_ps(), gmx_simd4_mask, x)))
-#define gmx_simd4_fabs_d(x) gmx_simd4_andnot_d(_mm512_set1_pd(-0.0), x)
+#define gmx_simd4_fabs_d(x) gmx_simd4_andnot_d(_mm512_set1_pd(GMX_DOUBLE_NEGZERO), x)
#define gmx_simd4_fneg_d(x) _mm512_mask_addn_pd(_mm512_undefined_pd(), gmx_simd4_mask, x, _mm512_setzero_pd())
#define gmx_simd4_max_d(a, b) _mm512_mask_gmax_pd(_mm512_undefined_pd(), gmx_simd4_mask, a, b)
#define gmx_simd4_min_d(a, b) _mm512_mask_gmin_pd(_mm512_undefined_pd(), gmx_simd4_mask, a, b)
#define gmx_simd4_or_d _mm256_or_pd
#define gmx_simd4_xor_d _mm256_xor_pd
#define gmx_simd4_rsqrt_d(x) _mm256_cvtps_pd(_mm_rsqrt_ps(_mm256_cvtpd_ps(x)))
-#define gmx_simd4_fabs_d(x) _mm256_andnot_pd(_mm256_set1_pd(-0.0), x)
-#define gmx_simd4_fneg_d(x) _mm256_xor_pd(x, _mm256_set1_pd(-0.0))
+#define gmx_simd4_fabs_d(x) _mm256_andnot_pd(_mm256_set1_pd(GMX_DOUBLE_NEGZERO), x)
+#define gmx_simd4_fneg_d(x) _mm256_xor_pd(x, _mm256_set1_pd(GMX_DOUBLE_NEGZERO))
#define gmx_simd4_max_d _mm256_max_pd
#define gmx_simd4_min_d _mm256_min_pd
#define gmx_simd4_round_d(x) _mm256_round_pd(x, _MM_FROUND_NINT)
#define gmx_simd_xor_f _mm256_xor_ps
#define gmx_simd_rsqrt_f _mm256_rsqrt_ps
#define gmx_simd_rcp_f _mm256_rcp_ps
-#define gmx_simd_fabs_f(x) _mm256_andnot_ps(_mm256_set1_ps(-0.0), x)
-#define gmx_simd_fneg_f(x) _mm256_xor_ps(x, _mm256_set1_ps(-0.0))
+#define gmx_simd_fabs_f(x) _mm256_andnot_ps(_mm256_set1_ps(GMX_FLOAT_NEGZERO), x)
+#define gmx_simd_fneg_f(x) _mm256_xor_ps(x, _mm256_set1_ps(GMX_FLOAT_NEGZERO))
#define gmx_simd_max_f _mm256_max_ps
#define gmx_simd_min_f _mm256_min_ps
#define gmx_simd_round_f(x) _mm256_round_ps(x, _MM_FROUND_NINT)
#define gmx_simd_xor_f _mm_xor_ps
#define gmx_simd_rsqrt_f _mm_rsqrt_ps
#define gmx_simd_rcp_f _mm_rcp_ps
-#define gmx_simd_fabs_f(x) _mm_andnot_ps(_mm_set1_ps(-0.0), x)
-#define gmx_simd_fneg_f(x) _mm_xor_ps(x, _mm_set1_ps(-0.0))
+#define gmx_simd_fabs_f(x) _mm_andnot_ps(_mm_set1_ps(GMX_FLOAT_NEGZERO), x)
+#define gmx_simd_fneg_f(x) _mm_xor_ps(x, _mm_set1_ps(GMX_FLOAT_NEGZERO))
#define gmx_simd_max_f _mm_max_ps
#define gmx_simd_min_f _mm_min_ps
#define gmx_simd_round_f(x) _mm_cvtepi32_ps(_mm_cvtps_epi32(x))
#define gmx_simd_rsqrt_d(x) _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(x)))
/* Don't use FMA for sqrt N-R iterations - this saves 1 instruction without FMA hardware */
#define gmx_simd_rcp_d(x) _mm_cvtps_pd(_mm_rcp_ps(_mm_cvtpd_ps(x)))
-#define gmx_simd_fabs_d(x) _mm_andnot_pd(_mm_set1_pd(-0.0), x)
-#define gmx_simd_fneg_d(x) _mm_xor_pd(x, _mm_set1_pd(-0.0))
+#define gmx_simd_fabs_d(x) _mm_andnot_pd(_mm_set1_pd(GMX_DOUBLE_NEGZERO), x)
+#define gmx_simd_fneg_d(x) _mm_xor_pd(x, _mm_set1_pd(GMX_DOUBLE_NEGZERO))
#define gmx_simd_max_d _mm_max_pd
#define gmx_simd_min_d _mm_min_pd
#define gmx_simd_round_d(x) _mm_cvtepi32_pd(_mm_cvtpd_epi32(x))
gmx_simd_xor_sign_f(gmx_simd_float_t a, gmx_simd_float_t b)
{
#ifdef GMX_SIMD_HAVE_LOGICAL
- return gmx_simd_xor_f(a, gmx_simd_and_f(gmx_simd_set1_f(-0.0), b));
+ return gmx_simd_xor_f(a, gmx_simd_and_f(gmx_simd_set1_f(GMX_FLOAT_NEGZERO), b));
#else
return gmx_simd_blendv_f(a, gmx_simd_fneg_f(a), gmx_simd_cmplt_f(b, gmx_simd_setzero_f()));
#endif
y = gmx_simd_round_f(z);
mask = gmx_simd_cvt_fib2fb(gmx_simd_cmpeq_fi(gmx_simd_and_fi(iy, ione), gmx_simd_setzero_fi()));
- ssign = gmx_simd_blendzero_f(gmx_simd_set1_f(-0.0f), gmx_simd_cvt_fib2fb(gmx_simd_cmpeq_fi(gmx_simd_and_fi(iy, itwo), itwo)));
- csign = gmx_simd_blendzero_f(gmx_simd_set1_f(-0.0f), gmx_simd_cvt_fib2fb(gmx_simd_cmpeq_fi(gmx_simd_and_fi(gmx_simd_add_fi(iy, ione), itwo), itwo)));
+ ssign = gmx_simd_blendzero_f(gmx_simd_set1_f(GMX_FLOAT_NEGZERO), gmx_simd_cvt_fib2fb(gmx_simd_cmpeq_fi(gmx_simd_and_fi(iy, itwo), itwo)));
+ csign = gmx_simd_blendzero_f(gmx_simd_set1_f(GMX_FLOAT_NEGZERO), gmx_simd_cvt_fib2fb(gmx_simd_cmpeq_fi(gmx_simd_and_fi(gmx_simd_add_fi(iy, ione), itwo), itwo)));
#else
const gmx_simd_float_t quarter = gmx_simd_set1_f(0.25f);
const gmx_simd_float_t minusquarter = gmx_simd_set1_f(-0.25f);
* active or inactive - you will get errors if only one is used.
*/
# ifdef GMX_SIMD_HAVE_LOGICAL
- ssign = gmx_simd_and_f(ssign, gmx_simd_set1_f(-0.0f));
- csign = gmx_simd_andnot_f(q, gmx_simd_set1_f(-0.0f));
+ ssign = gmx_simd_and_f(ssign, gmx_simd_set1_f(GMX_FLOAT_NEGZERO));
+ csign = gmx_simd_andnot_f(q, gmx_simd_set1_f(GMX_FLOAT_NEGZERO));
ssign = gmx_simd_xor_f(ssign, csign);
# else
csign = gmx_simd_xor_sign_f(gmx_simd_set1_f(-1.0f), q);
x = gmx_simd_fnmadd_f(y, argred1, x);
x = gmx_simd_fnmadd_f(y, argred2, x);
x = gmx_simd_fnmadd_f(y, argred3, x);
- x = gmx_simd_xor_f(gmx_simd_blendzero_f(gmx_simd_set1_f(-0.0f), mask), x);
+ x = gmx_simd_xor_f(gmx_simd_blendzero_f(gmx_simd_set1_f(GMX_FLOAT_NEGZERO), mask), x);
#else
const gmx_simd_float_t quarter = gmx_simd_set1_f(0.25f);
const gmx_simd_float_t half = gmx_simd_set1_f(0.5f);
gmx_simd_xor_sign_d(gmx_simd_double_t a, gmx_simd_double_t b)
{
#ifdef GMX_SIMD_HAVE_LOGICAL
- return gmx_simd_xor_d(a, gmx_simd_and_d(gmx_simd_set1_d(-0.0), b));
+ return gmx_simd_xor_d(a, gmx_simd_and_d(gmx_simd_set1_d(GMX_DOUBLE_NEGZERO), b));
#else
return gmx_simd_blendv_d(a, gmx_simd_fneg_d(a), gmx_simd_cmplt_d(b, gmx_simd_setzero_d()));
#endif
y = gmx_simd_round_d(z);
mask = gmx_simd_cvt_dib2db(gmx_simd_cmpeq_di(gmx_simd_and_di(iy, ione), gmx_simd_setzero_di()));
- ssign = gmx_simd_blendzero_d(gmx_simd_set1_d(-0.0), gmx_simd_cvt_dib2db(gmx_simd_cmpeq_di(gmx_simd_and_di(iy, itwo), itwo)));
- csign = gmx_simd_blendzero_d(gmx_simd_set1_d(-0.0), gmx_simd_cvt_dib2db(gmx_simd_cmpeq_di(gmx_simd_and_di(gmx_simd_add_di(iy, ione), itwo), itwo)));
+ ssign = gmx_simd_blendzero_d(gmx_simd_set1_d(GMX_DOUBLE_NEGZERO), gmx_simd_cvt_dib2db(gmx_simd_cmpeq_di(gmx_simd_and_di(iy, itwo), itwo)));
+ csign = gmx_simd_blendzero_d(gmx_simd_set1_d(GMX_DOUBLE_NEGZERO), gmx_simd_cvt_dib2db(gmx_simd_cmpeq_di(gmx_simd_and_di(gmx_simd_add_di(iy, ione), itwo), itwo)));
#else
const gmx_simd_double_t quarter = gmx_simd_set1_d(0.25);
const gmx_simd_double_t minusquarter = gmx_simd_set1_d(-0.25);
* active or inactive - you will get errors if only one is used.
*/
# ifdef GMX_SIMD_HAVE_LOGICAL
- ssign = gmx_simd_and_d(ssign, gmx_simd_set1_d(-0.0));
- csign = gmx_simd_andnot_d(q, gmx_simd_set1_d(-0.0));
+ ssign = gmx_simd_and_d(ssign, gmx_simd_set1_d(GMX_DOUBLE_NEGZERO));
+ csign = gmx_simd_andnot_d(q, gmx_simd_set1_d(GMX_DOUBLE_NEGZERO));
ssign = gmx_simd_xor_d(ssign, csign);
# else
csign = gmx_simd_xor_sign_d(gmx_simd_set1_d(-1.0), q);
x = gmx_simd_fnmadd_d(y, argred1, x);
x = gmx_simd_fnmadd_d(y, argred2, x);
x = gmx_simd_fnmadd_d(y, argred3, x);
- x = gmx_simd_xor_d(gmx_simd_blendzero_d(gmx_simd_set1_d(-0.0), mask), x);
+ x = gmx_simd_xor_d(gmx_simd_blendzero_d(gmx_simd_set1_d(GMX_DOUBLE_NEGZERO), mask), x);
#else
const gmx_simd_double_t quarter = gmx_simd_set1_d(0.25);
const gmx_simd_double_t half = gmx_simd_set1_d(0.5);
TEST(FloatingPointDifferenceTest, HandlesZerosOfDifferentSign)
{
- FloatingPointDifference diff(0.0, -0.0);
+ FloatingPointDifference diff(0.0, GMX_DOUBLE_NEGZERO);
EXPECT_FALSE(diff.isNaN());
EXPECT_EQ(0.0, diff.asAbsolute());
EXPECT_EQ(0U, diff.asUlps());
EXPECT_TRUE(diff.signsDiffer());
}
{
- FloatingPointDifference diff(-0.0, -1.2);
+ FloatingPointDifference diff(GMX_DOUBLE_NEGZERO, -1.2);
EXPECT_FALSE(diff.isNaN());
EXPECT_DOUBLE_EQ(1.2, diff.asAbsolute());
EXPECT_FALSE(diff.signsDiffer());
TEST(FloatingPointDifferenceTest, HandlesUlpDifferenceAcrossZero)
{
- const double first = addUlps(-0.0, 2);
+ const double first = addUlps(GMX_DOUBLE_NEGZERO, 2);
const double second = addUlps( 0.0, 2);
FloatingPointDifference diff(first, second);
EXPECT_FALSE(diff.isNaN());