2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2017,2018,2019,2020,2021, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gromacs/simd/simd_math.h"
48 #include "gromacs/math/units.h"
49 #include "gromacs/math/utilities.h"
50 #include "gromacs/options/basicoptions.h"
51 #include "gromacs/simd/simd.h"
53 #include "testutils/refdata.h"
54 #include "testutils/testasserts.h"
66 /*! \addtogroup module_simd */
69 # if GMX_SIMD_HAVE_REAL
71 class SimdMathTest : public SimdTest
74 /*! \brief Type for half-open intervals specifying test ranges */
75 typedef std::pair<real, real> Range;
77 /*! \brief Control what is considered matching values
79 * Normal simply means that we request the values to be equal
80 * to within the specified tolerance.
81 * However, there are also two more cases that are special:
83 * - Even if we only care about normal (i.e., not denormal) values, some math
84 * libraries might clamp the value to zero, which means our SIMD output
85 * might not match their values. By using MatchRule::Dtz, we will consider
86 * all values both from the reference and test functions that are within the
87 * requested ulp tolerance of a denormal number to be equivalent to 0.0.
88 * - For some older architectures without fused multiply-add units (e.g. x86 SSE2),
89 * we might end up clamping the results to zero just before reaching
90 * denormal output, since the intermediate results e.g. in polynomial
91 * approximations can be smaller than the final one. We often simply don't
92 * care about those values, and then one can use
93 * MatchRule::ReferenceOrZero to allow the test value to either match
94 * the reference or be zero.
98 Normal, //!< Match function values
99 Dtz, //!< Match function values after setting denormals to zero both in test and reference
100 ReferenceOrZero, //!< Test values can either match reference or be zero
103 const std::map<MatchRule, std::string> matchRuleNames_ = {
104 { MatchRule::Normal, "Test should match reference." },
105 { MatchRule::Dtz, "Test should match reference, with denormals treated as 0.0." },
106 { MatchRule::ReferenceOrZero, "Test should match reference or 0.0." }
109 /*! \brief Settings used for simd math function comparisons */
110 struct CompareSettings
112 Range range; //!< Range over which to test function
113 std::int64_t ulpTol; //!< Ulp tolerance
114 real absTol; //!< Absolute tolerance
115 MatchRule matchRule; //!< Decide what we consider a match
118 ::testing::AssertionResult compareSimdMathFunction(const char* refFuncExpr,
119 const char* simdFuncExpr,
120 const char* compareSettingsExpr,
121 real refFunc(real x),
122 SimdReal gmx_simdcall simdFunc(SimdReal x),
123 const CompareSettings& compareSettings);
125 /*! \brief Generate test point vector
127 * \param range The test interval, half open. Upper limit is not included.
128 * Pass by value, since we need to modify in method anyway.
129 * \param points Number of points to generate. This might be increased
130 * slightly to account both for extra special values like 0.0
131 * and the SIMD width.
133 * This routine generates a vector with test points separated by constant
134 * multiplicative factors, based on the range and number of points in the
135 * class. If the range includes both negative and positive values, points
136 * will be generated separately for the negative/positive intervals down
137 * to the smallest real number that can be represented, and we also include
140 * This is highly useful for large test ranges. For example, with a linear
141 * 1000-point division of the range (1,1e10) the first three values to test
142 * would be 1, 10000000.999, and 20000000.998, etc. For large values we would
143 * commonly hit the point where adding the small delta has no effect due to
144 * limited numerical precision.
145 * When we instead use this routine, the values will be 1, 1.0239, 1.0471, etc.
146 * This will spread the entropy over all bits in the IEEE754 representation,
147 * and be a much better test of all potential input values.
149 * \note We do not use the static variable s_nPoints in the parent class
150 * to avoid altering any value the user has set on the command line; since
151 * it's a static member, changing it would have permanent effect.
153 static std::vector<real> generateTestPoints(Range range, std::size_t points);
155 /*! \brief Test routine for the test point vector generation
157 static void generateTestPointsTest();
160 /*! \brief Test approximate equality of SIMD vs reference version of a function.
162 * This macro takes vanilla C and SIMD flavors of a function and tests it with
163 * the number of points, range, and tolerances specified by the test fixture class.
165 * The third option controls the range, tolerances, and match settings.
167 # define GMX_EXPECT_SIMD_FUNC_NEAR(refFunc, tstFunc, compareSettings) \
168 EXPECT_PRED_FORMAT3(compareSimdMathFunction, refFunc, tstFunc, compareSettings)
170 std::vector<real> SimdMathTest::generateTestPoints(Range inputRange, std::size_t inputPoints)
173 std::vector<real> testPoints;
174 testPoints.reserve(inputPoints);
176 GMX_RELEASE_ASSERT(inputRange.first < inputRange.second,
177 "The start of the interval must come before the end");
179 std::vector<Range> testRanges;
181 if (inputRange.first < 0 && inputRange.second > 0)
183 testRanges.emplace_back(Range({ inputRange.first, -std::numeric_limits<real>::min() }));
184 testRanges.emplace_back(Range({ 0.0, inputRange.second }));
188 if (inputRange.second == 0)
190 inputRange.second = -std::numeric_limits<real>::min();
191 inputRange.first = std::min(inputRange.first, inputRange.second);
193 testRanges.push_back(inputRange);
196 for (Range& range : testRanges)
198 std::size_t points = inputPoints / testRanges.size();
200 // The value 0 is special, and can only occur at the start of
201 // the interval after the corrections outside this loop.
202 // Add it explicitly, and adjust the interval to continue
203 // at the first valid non-zero positive number.
204 if (range.first == 0)
206 testPoints.push_back(0.0);
207 range.first = std::numeric_limits<real>::min();
208 points--; // Used one point
213 std::conditional<sizeof(real) == sizeof(double), std::int64_t, std::int32_t>::type i;
217 high.r = range.second;
219 // IEEE754 floating-point numbers have the cool property that for any range of
220 // constant sign, for all non-zero numbers a constant (i.e., linear) difference
221 // in the bitwise representation corresponds to a constant multiplicative factor.
223 // Divide the ulp difference evenly
224 std::int64_t ulpDiff = high.i - low.i;
225 // dividend and divisor must both be signed types
226 std::int64_t ulpDelta = ulpDiff / static_cast<std::int64_t>(points);
227 std::int64_t minUlpDelta = (ulpDiff > 0) ? 1 : -1;
231 // Very short interval or very many points caused round-to-zero.
232 // Select the smallest possible change, which is one ulp (with correct sign)
233 ulpDelta = minUlpDelta;
234 points = std::abs(ulpDiff);
238 // Use an index-based loop to avoid floating-point comparisons with
239 // values that might have overflowed. Save one point for the very last
240 // bitwise value that is part of the interval
241 for (std::size_t i = 0; i < points - 1; i++)
243 testPoints.push_back(x.r);
247 // Make sure we test the very last point that is inside the interval
250 testPoints.push_back(x.r);
255 /*! \brief Implementation routine to compare SIMD vs reference functions.
257 * \param refFuncExpr Description of reference function expression
258 * \param simdFuncExpr Description of SIMD function expression
259 * \param compareSettingsExpr Description of compareSettings
260 * \param refFunc Reference math function pointer
261 * \param simdFunc SIMD math function pointer
262 * \param compareSettings Structure with the range, tolerances, and
263 * matching rules to use for the comparison.
265 * \note You should not never call this function directly, but use the
266 * macro GMX_EXPECT_SIMD_FUNC_NEAR(refFunc,tstFunc,matchRule) instead.
268 ::testing::AssertionResult SimdMathTest::compareSimdMathFunction(const char* refFuncExpr,
269 const char* simdFuncExpr,
270 const char gmx_unused* compareSettingsExpr,
271 real refFunc(real x),
272 SimdReal gmx_simdcall simdFunc(SimdReal x),
273 const CompareSettings& compareSettings)
275 std::vector<real> vx(GMX_SIMD_REAL_WIDTH);
276 std::vector<real> vref(GMX_SIMD_REAL_WIDTH);
277 std::vector<real> vtst(GMX_SIMD_REAL_WIDTH);
279 std::int64_t ulpDiff;
280 std::int64_t maxUlpDiff = 0;
282 real refValMaxUlpDiff, simdValMaxUlpDiff;
283 const int niter = s_nPoints / GMX_SIMD_REAL_WIDTH;
287 std::conditional<sizeof(real) == sizeof(double), std::int64_t, std::int32_t>::type i;
290 // Allow zero-size intervals - nothing to test means we succeeded at it
291 if (compareSettings.range.first == compareSettings.range.second)
293 ::testing::AssertionSuccess();
296 // Calculate the tolerance limit to use for denormals - we want
297 // values that are within the ulp tolerance of denormals to be considered matching
298 conv0.r = std::numeric_limits<real>::min();
299 conv0.i += compareSettings.ulpTol - 1; // min() itself is not denormal, but one ulp larger
300 const real denormalLimit = conv0.r;
302 // We want to test as many diverse bit combinations as possible over the range requested,
303 // and in particular do it evenly spaced in bit-space.
304 // Due to the way IEEE754 floating-point is represented, that means we should have a
305 // constant multiplicative factor between adjacent values. This gets a bit complicated
306 // when we have both positive and negative values, so we offload the generation of the
307 // specific testing values to a separate routine
308 std::vector<real> testPoints = generateTestPoints(compareSettings.range, s_nPoints);
310 size_t pointIndex = 0;
312 for (int iter = 0; iter < niter; iter++)
314 for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
316 vx[i] = testPoints[pointIndex];
317 vref[i] = refFunc(vx[i]);
318 // If we reach the end of the points, stop increasing index so we pad with
319 // extra copies of the last element up to the SIMD width
320 if (pointIndex + 1 < testPoints.size())
325 vtst = simdReal2Vector(simdFunc(vector2SimdReal(vx)));
327 bool absOk = true, signOk = true;
328 for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
330 if (compareSettings.matchRule == MatchRule::Dtz && std::abs(vref[i]) <= denormalLimit
331 && std::abs(vtst[i]) <= denormalLimit)
336 if (compareSettings.matchRule == MatchRule::ReferenceOrZero && vtst[i] == 0.0)
338 // If we accept 0.0 for the test function, we can continue to the next loop iteration.
342 absDiff = std::abs(vref[i] - vtst[i]);
343 absOk = absOk && (absDiff < compareSettings.absTol);
344 signOk = signOk && ((vref[i] >= 0 && vtst[i] >= 0) || (vref[i] <= 0 && vtst[i] <= 0));
346 if (absDiff >= compareSettings.absTol)
348 /* We replicate the trivial ulp differences comparison here rather than
349 * calling the lower-level routine for comparing them, since this enables
350 * us to run through the entire test range and report the largest deviation
351 * without lots of extra glue routines.
355 ulpDiff = llabs(conv0.i - conv1.i);
356 if (ulpDiff > maxUlpDiff)
358 maxUlpDiff = ulpDiff;
359 maxUlpDiffPos = vx[i];
360 refValMaxUlpDiff = vref[i];
361 simdValMaxUlpDiff = vtst[i];
365 if ((!absOk) && (!signOk))
367 return ::testing::AssertionFailure()
368 << "Failing SIMD math function comparison due to sign differences." << std::endl
369 << "Reference function: " << refFuncExpr << std::endl
370 << "Simd function: " << simdFuncExpr << std::endl
371 << "Test range is ( " << compareSettings.range.first << " , "
372 << compareSettings.range.second << " ) " << std::endl
373 << "Match rule: " << matchRuleNames_.at(compareSettings.matchRule) << std::endl
374 << "First sign difference around x=" << std::setprecision(20)
375 << ::testing::PrintToString(vx) << std::endl
376 << "Ref values: " << std::setprecision(20) << ::testing::PrintToString(vref)
378 << "SIMD values: " << std::setprecision(20) << ::testing::PrintToString(vtst)
383 GMX_RELEASE_ASSERT(compareSettings.ulpTol >= 0, "Invalid ulp value.");
384 if (maxUlpDiff <= compareSettings.ulpTol)
386 return ::testing::AssertionSuccess();
390 return ::testing::AssertionFailure()
391 << "Failing SIMD math function ulp comparison between " << refFuncExpr << " and "
392 << simdFuncExpr << std::endl
393 << "Requested ulp tolerance: " << compareSettings.ulpTol << std::endl
394 << "Requested abs tolerance: " << compareSettings.absTol << std::endl
395 << "Match rule: " << matchRuleNames_.at(compareSettings.matchRule) << std::endl
396 << "Largest Ulp difference occurs for x=" << std::setprecision(20) << maxUlpDiffPos
398 << "Ref values: " << std::setprecision(20) << refValMaxUlpDiff << std::endl
399 << "SIMD values: " << std::setprecision(20) << simdValMaxUlpDiff << std::endl
400 << "Ulp diff.: " << std::setprecision(20) << maxUlpDiff << std::endl;
404 // Actual routine to generate a small set of test points in current precision. This will
405 // be called by either the double or single precision test fixture, since we need different
406 // test names to compare to the right reference data.
407 void SimdMathTest::generateTestPointsTest()
410 gmx::test::TestReferenceData data;
411 gmx::test::TestReferenceChecker checker(data.rootChecker());
413 std::vector<real> result;
415 result = generateTestPoints(Range(-1e10, -1), points);
416 checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10,-1[");
418 result = generateTestPoints(Range(-1e10, -1e-10), points);
419 checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10, -1e-10[");
421 result = generateTestPoints(Range(1, 1e10), points);
422 checker.checkSequence(result.begin(), result.end(), "Test points for interval [1, 1e10[");
424 result = generateTestPoints(Range(1e-10, 1e10), points);
425 checker.checkSequence(result.begin(), result.end(), "Test points for interval [1e-10, 1e10[");
427 result = generateTestPoints(Range(-1e10, 1e-10), points);
428 checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10, 1e-10[");
430 result = generateTestPoints(Range(-1e-10, 1e-10), points);
431 checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e-10, 1e-10[");
433 result = generateTestPoints(Range(-1e-10, 1e10), points);
434 checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e-10, 1e10[");
436 result = generateTestPoints(Range(-1e10, 1e10), points);
437 checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10, 1e10[");
439 result = generateTestPoints(Range(-1000, 0), points);
440 checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1000, 0[");
442 result = generateTestPoints(Range(0, 1000), points);
443 checker.checkSequence(result.begin(), result.end(), "Test points for interval [0, 1000[");
450 // Actual math function tests below
452 /*! \cond internal */
453 /*! \addtogroup module_simd */
459 // Reference data is selected based on test name, so make the test name precision-dependent
461 TEST_F(SimdMathTest, generateTestPointsDouble)
463 generateTestPointsTest();
466 TEST_F(SimdMathTest, generateTestPointsFloat)
468 generateTestPointsTest();
472 TEST_F(SimdMathTest, copysign)
474 GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(-c0, c1, c2),
475 copysign(setSimdRealFrom3R(c0, c1, c2), setSimdRealFrom3R(-c3, c4, 0)));
476 GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(-c0, c1, c2),
477 copysign(setSimdRealFrom3R(-c0, -c1, -c2), setSimdRealFrom3R(-c3, c4, 0)));
478 GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(c0, -c1, c2),
479 copysign(setSimdRealFrom3R(c0, c1, c2), setSimdRealFrom3R(c3, -c4, 0)));
480 GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(c0, -c1, c2),
481 copysign(setSimdRealFrom3R(-c0, -c1, -c2), setSimdRealFrom3R(c3, -c4, 0)));
484 /*! \brief Function wrapper to evaluate reference 1/sqrt(x) */
485 real refInvsqrt(real x)
487 return 1.0 / std::sqrt(x);
490 TEST_F(SimdMathTest, invsqrt)
492 const real low = std::numeric_limits<float>::min();
493 const real high = std::numeric_limits<float>::max();
494 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
496 GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, invsqrt, settings);
499 TEST_F(SimdMathTest, maskzInvsqrt)
501 SimdReal x = setSimdRealFrom3R(c1, 0.0, c2);
502 SimdBool m = (setZero() < x);
503 SimdReal ref = setSimdRealFrom3R(1.0 / std::sqrt(c1), 0.0, 1.0 / std::sqrt(c2));
504 GMX_EXPECT_SIMD_REAL_NEAR(ref, maskzInvsqrt(x, m));
507 /*! \brief Function wrapper to return first result when testing \ref invsqrtPair */
508 SimdReal gmx_simdcall tstInvsqrtPair0(SimdReal x)
511 invsqrtPair(x, x, &r0, &r1);
515 /*! \brief Function wrapper to return second result when testing \ref invsqrtPair */
516 SimdReal gmx_simdcall tstInvsqrtPair1(SimdReal x)
519 invsqrtPair(x, x, &r0, &r1);
523 TEST_F(SimdMathTest, invsqrtPair)
525 const real low = std::numeric_limits<float>::min();
526 const real high = std::numeric_limits<float>::max();
528 // Accuracy conversions lose a bit of accuracy compared to all-double,
529 // so increase the tolerance to 4*ulpTol_
530 CompareSettings settings{ Range(low, high), 4 * ulpTol_, absTol_, MatchRule::Normal };
532 GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tstInvsqrtPair0, settings);
533 GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tstInvsqrtPair1, settings);
536 /*! \brief Function wrapper to evaluate reference sqrt(x) */
542 TEST_F(SimdMathTest, sqrt)
544 // Since the first lookup step is sometimes performed in single precision,
545 // our SIMD sqrt can only handle single-precision input values, even when
546 // compiled in double precision.
548 const real minFloat = std::numeric_limits<float>::min();
549 const real minSafeFloat = minFloat * 10;
550 const real maxSafeFloat = std::numeric_limits<float>::max() * 0.1;
551 CompareSettings settings;
552 // The accuracy conversions lose a bit of extra accuracy compared to
553 // doing the iterations in all-double.
554 setUlpTol(4 * ulpTol_);
556 // First test that 0.0 and a few other values work
557 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(0, std::sqrt(c1), std::sqrt(c2)),
558 sqrt(setSimdRealFrom3R(0, c1, c2)));
561 // As mentioned above, we cannot guarantee that very small double precision
562 // input values (below std::numeric_limits<float>::min()) are handled correctly,
563 // so our implementation will clamp it to zero. In this range we allow either
564 // the correct value or zero, but it's important that it does not result in NaN or Inf values.
566 // This test range must not be called for single precision, since if we try to divide
567 // the interval (0.0, low( in npoints we will try to multiply by factors so small that
568 // they end up being flushed to zero, and the loop would never end.
569 settings = { Range(0.0, minFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
570 GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt, settings);
573 // Next range: Just about minFloat the lookup should always work, but the results
574 // might be a bit fragile due to issues with the N-R iterations being flushed to zero
575 // for denormals. We can probably relax the latter in double precision, but since we
576 // anyway cannot handle numbers that cannot be represented in single it's not worth
577 // worrying too much about whether we have zero or an exact values around 10^-38....
578 settings = { Range(minFloat, minSafeFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
579 GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt, settings);
581 settings = { Range(minSafeFloat, maxSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
582 GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt, settings);
585 TEST_F(SimdMathTest, sqrtUnsafe)
587 const real minSafeFloat = std::numeric_limits<float>::min() * 10;
588 const real maxSafeFloat = std::numeric_limits<float>::max() * 0.1;
590 // The accuracy conversions lose a bit of extra accuracy compared to
591 // doing the iterations in all-double, so we use 4*ulpTol_
592 setUlpTol(4 * ulpTol_);
594 CompareSettings settings{ Range(minSafeFloat, maxSafeFloat), 4 * ulpTol_, absTol_, MatchRule::Normal };
595 GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt<MathOptimization::Unsafe>, settings);
598 /*! \brief Function wrapper to evaluate reference 1/x */
604 TEST_F(SimdMathTest, inv)
606 // Since the first lookup step is sometimes performed in single precision,
607 // our SIMD 1/x can only handle single-precision input values, even when
608 // compiled in double precision.
610 // Relevant threshold points
611 const real minSafeFloat = std::numeric_limits<float>::min()
612 * 10; // X value guaranteed not to result in Inf intermediates for 1/x calc.
613 const real maxSafeFloat = std::numeric_limits<float>::max()
614 * 0.1; // X value guaranteed not to result in DTZ intermediates for 1/x calc.
615 // Scale highest value by 1-eps, since we will do some arithmetics on this value
616 const real maxFloat =
617 std::numeric_limits<float>::max() * (1.0 - std::numeric_limits<float>::epsilon());
618 CompareSettings settings;
620 // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
621 settings = { Range(-maxFloat, -maxSafeFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
622 GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
624 // Normal checks for x < 0
625 settings = { Range(-maxSafeFloat, -minSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
626 GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
628 // We do not care about the small range -minSafeFloat < x < +minSafeFloat where the result can be +/- Inf, since we don't require strict IEEE754.
630 // Normal checks for x > 0
631 settings = { Range(minSafeFloat, maxSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
632 GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
634 // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
635 settings = { Range(maxSafeFloat, maxFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
636 GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
639 TEST_F(SimdMathTest, maskzInv)
641 SimdReal x = setSimdRealFrom3R(c1, 0.0, c2);
642 SimdBool m = (setZero() < x);
643 SimdReal ref = setSimdRealFrom3R(1.0 / c1, 0.0, 1.0 / c2);
644 GMX_EXPECT_SIMD_REAL_NEAR(ref, maskzInv(x, m));
647 TEST_F(SimdMathTest, cbrt)
649 const real low = -std::numeric_limits<real>::max();
650 const real high = std::numeric_limits<real>::max();
652 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
653 GMX_EXPECT_SIMD_FUNC_NEAR(std::cbrt, cbrt, settings);
656 /*! \brief Function wrapper to evaluate reference 1/cbrt(x) */
657 real refInvCbrt(real x)
659 return 1.0 / std::cbrt(x);
662 TEST_F(SimdMathTest, invcbrt)
664 // Negative values first
665 real low = -std::numeric_limits<real>::max();
666 real high = -std::numeric_limits<real>::min();
668 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
669 GMX_EXPECT_SIMD_FUNC_NEAR(refInvCbrt, invcbrt, settings);
672 low = std::numeric_limits<real>::min();
673 high = std::numeric_limits<real>::max();
674 settings = { Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
675 GMX_EXPECT_SIMD_FUNC_NEAR(refInvCbrt, invcbrt, settings);
678 TEST_F(SimdMathTest, log2)
680 const real low = std::numeric_limits<real>::min();
681 const real high = std::numeric_limits<real>::max();
683 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
684 GMX_EXPECT_SIMD_FUNC_NEAR(std::log2, log2, settings);
687 TEST_F(SimdMathTest, log)
689 const real low = std::numeric_limits<real>::min();
690 const real high = std::numeric_limits<real>::max();
692 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
693 GMX_EXPECT_SIMD_FUNC_NEAR(std::log, log, settings);
696 TEST_F(SimdMathTest, exp2)
698 // Relevant threshold points
699 constexpr real lowestReal = -std::numeric_limits<real>::max();
700 constexpr real lowestRealThatProducesNormal =
701 std::numeric_limits<real>::min_exponent
702 - 1; // adding the significant corresponds to one more unit in exponent
703 constexpr real lowestRealThatProducesDenormal =
704 lowestRealThatProducesNormal
705 - std::numeric_limits<real>::digits; // digits refer to bits in significand, so 24/53 for float/double
706 constexpr real highestRealThatProducesNormal =
707 std::numeric_limits<real>::max_exponent
708 - 1; // adding the significant corresponds to one more unit in exponent
709 CompareSettings settings;
711 // Below subnormal range all results should be zero (so, match the reference)
712 settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal };
713 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2, settings);
715 // Subnormal range, require matching, but DTZ is fine
716 settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal),
720 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2, settings);
722 // Normal range, standard result expected
723 settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
727 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2, settings);
730 TEST_F(SimdMathTest, exp2Unsafe)
732 // The unsafe version is only defined in the normal range
733 constexpr real lowestRealThatProducesNormal =
734 std::numeric_limits<real>::min_exponent
735 - 1; // adding the significant corresponds to one more unit in exponent
736 constexpr real highestRealThatProducesNormal =
737 std::numeric_limits<real>::max_exponent
738 - 1; // adding the significant corresponds to one more unit in exponent
740 CompareSettings settings{ Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
744 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2<MathOptimization::Unsafe>, settings);
747 TEST_F(SimdMathTest, exp)
749 // Relevant threshold points. See the exp2 test for more details about the values; these are
750 // simply scaled by log(2) due to the difference between exp2 and exp.
751 const real lowestReal = -std::numeric_limits<real>::max();
752 // In theory the smallest value should be (min_exponent-1)*log(2), but rounding after the multiplication will cause this
753 // value to be a single ulp too low. This might cause failed tests on CPUs that use different DTZ modes for SIMD vs.
754 // non-SIMD arithmetics (e.g. ARM v7), so multiply by (1.0-eps) to increase it by a single ulp.
755 const real lowestRealThatProducesNormal = (std::numeric_limits<real>::min_exponent - 1)
757 * (1 - std::numeric_limits<real>::epsilon());
758 const real lowestRealThatProducesDenormal =
759 lowestRealThatProducesNormal - std::numeric_limits<real>::digits * std::log(2.0);
760 const real highestRealThatProducesNormal =
761 (std::numeric_limits<real>::max_exponent - 1) * std::log(2.0);
762 CompareSettings settings;
764 // Below subnormal range all results should be zero (so, match the reference)
765 settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal };
766 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp, settings);
768 // Subnormal range, require matching, but DTZ is fine
769 settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal),
773 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp, settings);
775 // Normal range, standard result expected
776 settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
780 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp, settings);
783 TEST_F(SimdMathTest, expUnsafe)
785 // See test of exp() for comments about test ranges
786 const real lowestRealThatProducesNormal = (std::numeric_limits<real>::min_exponent - 1)
788 * (1 - std::numeric_limits<real>::epsilon());
789 const real highestRealThatProducesNormal =
790 (std::numeric_limits<real>::max_exponent - 1) * std::log(2.0);
792 CompareSettings settings{ Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
796 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp<MathOptimization::Unsafe>, settings);
799 TEST_F(SimdMathTest, pow)
801 // We already test the log2/exp2 components of pow() extensively above, and it's a very
802 // simple single-line function, so here we just test a handful of values to catch typos
803 // and then some special values.
805 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, c3), std::pow(c1, c4), std::pow(c2, c5)),
806 pow(rSimd_c0c1c2, rSimd_c3c4c5));
808 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, -c3), std::pow(c1, -c0), std::pow(c2, -c4)),
809 pow(rSimd_c0c1c2, rSimd_m3m0m4));
811 // 0^0 = 1 , 0^c1=0, -c1^0=1
812 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(1.0, 0.0, 1.0),
813 pow(setSimdRealFrom3R(0, 0.0, -c1), setSimdRealFrom3R(0.0, c1, 0.0)));
816 TEST_F(SimdMathTest, powUnsafe)
818 // We already test the log2/exp2 components of pow() extensively above, and it's a very
819 // simple single-line function, so here we just test a handful of values to catch typos
820 // and then some special values.
822 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, c3), std::pow(c1, c4), std::pow(c2, c5)),
823 pow<MathOptimization::Unsafe>(rSimd_c0c1c2, rSimd_c3c4c5));
825 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, -c3), std::pow(c1, -c0), std::pow(c2, -c4)),
826 pow<MathOptimization::Unsafe>(rSimd_c0c1c2, rSimd_m3m0m4));
829 /*! \brief Function wrapper for erf(x), with argument/return in default Gromacs precision.
831 * \note Single-precision erf() in some libraries can be slightly lower precision
832 * than the SIMD flavor, so we use a cast to force double precision for reference.
836 return std::erf(static_cast<double>(x));
839 TEST_F(SimdMathTest, erf)
841 CompareSettings settings{ Range(-9, 9), ulpTol_, std::numeric_limits<real>::min(), MatchRule::Normal };
842 GMX_EXPECT_SIMD_FUNC_NEAR(refErf, erf, settings);
845 /*! \brief Function wrapper for erfc(x), with argument/return in default Gromacs precision.
847 * \note Single-precision erfc() in some libraries can be slightly lower precision
848 * than the SIMD flavor, so we use a cast to force double precision for reference.
852 return std::erfc(static_cast<double>(x));
855 TEST_F(SimdMathTest, erfc)
857 // Our erfc algorithm has 4 ulp accuracy, so relax tolerance a bit to 4*ulpTol
858 CompareSettings settings{ Range(-9, 9), 4 * ulpTol_, std::numeric_limits<real>::min(), MatchRule::Normal };
859 GMX_EXPECT_SIMD_FUNC_NEAR(refErfc, erfc, settings);
862 TEST_F(SimdMathTest, sin)
864 CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
865 GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sin, settings);
867 // Range reduction leads to accuracy loss, so we might want higher tolerance here
868 settings = { Range(-10000, 10000), 2 * ulpTol_, absTol_, MatchRule::Normal };
869 GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sin, settings);
872 TEST_F(SimdMathTest, cos)
874 CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
875 GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cos, settings);
877 // Range reduction leads to accuracy loss, so we might want higher tolerance here
878 settings = { Range(-10000, 10000), 2 * ulpTol_, absTol_, MatchRule::Normal };
879 GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cos, settings);
882 TEST_F(SimdMathTest, tan)
884 // Tan(x) is a little sensitive due to the division in the algorithm.
885 // Rather than using lots of extra FP operations, we accept the algorithm
886 // presently only achieves a ~3 ulp error and use the medium tolerance.
887 CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
888 GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tan, settings);
890 // Range reduction leads to accuracy loss, so we might want higher tolerance here
891 settings = { Range(-10000, 10000), 2 * ulpTol_, absTol_, MatchRule::Normal };
892 GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tan, settings);
895 TEST_F(SimdMathTest, asin)
897 // Our present asin(x) algorithm achieves 2-3 ulp accuracy
898 CompareSettings settings{ Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal };
899 GMX_EXPECT_SIMD_FUNC_NEAR(std::asin, asin, settings);
902 TEST_F(SimdMathTest, acos)
904 // Our present acos(x) algorithm achieves 2-3 ulp accuracy
905 CompareSettings settings{ Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal };
906 GMX_EXPECT_SIMD_FUNC_NEAR(std::acos, acos, settings);
909 TEST_F(SimdMathTest, atan)
911 // Our present atan(x) algorithm achieves 1 ulp accuracy
912 CompareSettings settings{ Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal };
913 GMX_EXPECT_SIMD_FUNC_NEAR(std::atan, atan, settings);
916 TEST_F(SimdMathTest, atan2)
918 // test each quadrant
919 GMX_EXPECT_SIMD_REAL_NEAR(
920 setSimdRealFrom3R(std::atan2(c0, c3), std::atan2(c1, c4), std::atan2(c2, c5)),
921 atan2(rSimd_c0c1c2, rSimd_c3c4c5));
922 GMX_EXPECT_SIMD_REAL_NEAR(
923 setSimdRealFrom3R(std::atan2(-c0, c3), std::atan2(-c1, c4), std::atan2(-c2, c5)),
924 atan2(rSimd_m0m1m2, rSimd_c3c4c5));
925 GMX_EXPECT_SIMD_REAL_NEAR(
926 setSimdRealFrom3R(std::atan2(-c0, -c3), std::atan2(-c1, -c0), std::atan2(-c2, -c4)),
927 atan2(rSimd_m0m1m2, rSimd_m3m0m4));
928 GMX_EXPECT_SIMD_REAL_NEAR(
929 setSimdRealFrom3R(std::atan2(c0, -c3), std::atan2(c1, -c0), std::atan2(c2, -c4)),
930 atan2(rSimd_c0c1c2, rSimd_m3m0m4));
932 // cases important for calculating angles
933 // values on coordinate axes
934 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(0, c0), std::atan2(0, c1), std::atan2(0, c2)),
935 atan2(setZero(), rSimd_c0c1c2));
936 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(c0, 0), std::atan2(c1, 0), std::atan2(c2, 0)),
937 atan2(rSimd_c0c1c2, setZero()));
938 GMX_EXPECT_SIMD_REAL_NEAR(
939 setSimdRealFrom3R(std::atan2(0, -c0), std::atan2(0, -c1), std::atan2(0, -c2)),
940 atan2(setZero(), rSimd_m0m1m2));
941 GMX_EXPECT_SIMD_REAL_NEAR(
942 setSimdRealFrom3R(std::atan2(-c0, 0), std::atan2(-c1, 0), std::atan2(-c2, 0)),
943 atan2(rSimd_m0m1m2, setZero()));
944 // degenerate value (origin) should return 0.0. At least IBM xlc 13.1.5 gets the reference
945 // value wrong (-nan) at -O3 optimization, so we compare to the correct value (0.0) instead.
946 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom1R(0.0), atan2(setSimdRealFrom3R(0.0, 0.0, 0.0), setZero()));
949 /*! \brief Evaluate reference version of PME force correction. */
950 real refPmeForceCorrection(real x)
954 real y = std::sqrt(x);
955 return 2 * std::exp(-x) / (std::sqrt(M_PI) * x) - std::erf(static_cast<double>(y)) / (x * y);
959 return -4 / (3 * std::sqrt(M_PI));
963 // The PME corrections will be added to ~1/r2, so absolute tolerance of EPS is fine.
964 TEST_F(SimdMathTest, pmeForceCorrection)
966 // Pme correction relative accuracy only needs to be ~1e-6 accuracy single, 1e-10 double
967 const std::int64_t ulpTol = (GMX_DOUBLE ? 5e-10 : 5e-6) / GMX_REAL_EPS;
969 CompareSettings settings{ Range(0.15, 4), ulpTol, GMX_REAL_EPS, MatchRule::Normal };
970 GMX_EXPECT_SIMD_FUNC_NEAR(refPmeForceCorrection, pmeForceCorrection, settings);
973 /*! \brief Evaluate reference version of PME potential correction. */
974 real refPmePotentialCorrection(real x)
976 real y = std::sqrt(x);
977 return std::erf(static_cast<double>(y)) / y;
980 // The PME corrections will be added to ~1/r, so absolute tolerance of EPS is fine.
981 TEST_F(SimdMathTest, pmePotentialCorrection)
983 // Pme correction relative accuracy only needs to be ~1e-6 accuracy single, 1e-10 double
984 const std::int64_t ulpTol = (GMX_DOUBLE ? 5e-10 : 5e-6) / GMX_REAL_EPS;
986 CompareSettings settings{ Range(0.15, 4), ulpTol, GMX_REAL_EPS, MatchRule::Normal };
987 GMX_EXPECT_SIMD_FUNC_NEAR(refPmePotentialCorrection, pmePotentialCorrection, settings);
990 // Functions that only target single accuracy, even for double SIMD data
992 TEST_F(SimdMathTest, invsqrtSingleAccuracy)
994 // Here we always use float limits, since the lookup is not defined for numbers that
995 // cannot be represented in single precision.
996 const real low = std::numeric_limits<float>::min();
997 const real high = std::numeric_limits<float>::max();
998 /* Increase the allowed error by the difference between the actual precision and single */
999 setUlpTolSingleAccuracy(ulpTol_);
1001 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1002 GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, invsqrtSingleAccuracy, settings);
1005 /*! \brief Function wrapper to return first result when testing \ref invsqrtPairSingleAccuracy */
1006 SimdReal gmx_simdcall tst_invsqrt_SingleAccuracy_pair0(SimdReal x)
1009 invsqrtPairSingleAccuracy(x, x, &r0, &r1);
1013 /*! \brief Function wrapper to return second result when testing \ref invsqrtPairSingleAccuracy */
1014 SimdReal gmx_simdcall tst_invsqrt_SingleAccuracy_pair1(SimdReal x)
1017 invsqrtPairSingleAccuracy(x, x, &r0, &r1);
1021 TEST_F(SimdMathTest, invsqrtPairSingleAccuracy)
1023 // Float limits since lookup is always performed in single
1024 const real low = std::numeric_limits<float>::min();
1025 const real high = std::numeric_limits<float>::max();
1026 /* Increase the allowed error by the difference between the actual precision and single */
1027 setUlpTolSingleAccuracy(ulpTol_);
1029 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1030 GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tst_invsqrt_SingleAccuracy_pair0, settings);
1031 GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tst_invsqrt_SingleAccuracy_pair1, settings);
1034 TEST_F(SimdMathTest, sqrtSingleAccuracy)
1036 // Since the first lookup step is sometimes performed in single precision,
1037 // our SIMD sqrt can only handle single-precision input values, even when
1038 // compiled in double precision - thus we use single precision limits here.
1040 // Scale lowest value by 1+eps, since we will do some arithmetics on this value
1041 const real low = std::numeric_limits<float>::min() * (1.0 + std::numeric_limits<float>::epsilon());
1042 const real high = std::numeric_limits<float>::max();
1043 CompareSettings settings;
1045 // Increase the allowed error by the difference between the actual precision and single
1046 setUlpTolSingleAccuracy(ulpTol_);
1048 // First test that 0.0 and a few other values works
1049 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(0, std::sqrt(c0), std::sqrt(c1)),
1050 sqrtSingleAccuracy(setSimdRealFrom3R(0, c0, c1)));
1053 // As mentioned above, we cannot guarantee that very small double precision
1054 // input values (below std::numeric_limits<float>::min()) are handled correctly,
1055 // so our implementation will clamp it to zero. In this range we allow either
1056 // the correct value or zero, but it's important that it does not result in NaN or Inf values.
1058 // This test range must not be called for single precision, since if we try to divide
1059 // the interval (0.0, low( in npoints we will try to multiply by factors so small that
1060 // they end up being flushed to zero, and the loop would never end.
1061 settings = { Range(0.0, low), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
1062 GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrtSingleAccuracy, settings);
1065 settings = { Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1066 GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrtSingleAccuracy, settings);
1069 TEST_F(SimdMathTest, sqrtSingleAccuracyUnsafe)
1071 // Test the full range, but stick to float limits since lookup is done in single.
1072 const real low = std::numeric_limits<float>::min();
1073 const real high = std::numeric_limits<float>::max();
1075 /* Increase the allowed error by the difference between the actual precision and single */
1076 setUlpTolSingleAccuracy(ulpTol_);
1078 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1079 GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrtSingleAccuracy<MathOptimization::Unsafe>, settings);
1082 TEST_F(SimdMathTest, invSingleAccuracy)
1084 // Since the first lookup step is sometimes performed in single precision,
1085 // our SIMD 1/x can only handle single-precision input values, even when
1086 // compiled in double precision.
1088 // Relevant threshold points
1089 const real minSafeFloat = std::numeric_limits<float>::min()
1090 * 10; // X value guaranteed not to result in Inf intermediates for 1/x calc.
1091 const real maxSafeFloat = std::numeric_limits<float>::max()
1092 * 0.1; // X value guaranteed not to result in DTZ intermediates for 1/x calc.
1093 // Scale highest value by 1-eps, since we will do some arithmetics on this value
1094 const real maxFloat =
1095 std::numeric_limits<float>::max() * (1.0 - std::numeric_limits<float>::epsilon());
1096 CompareSettings settings;
1098 // Increase the allowed error by the difference between the actual precision and single
1099 setUlpTolSingleAccuracy(ulpTol_);
1101 // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
1102 settings = { Range(-maxFloat, -maxSafeFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
1103 GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1105 // Normal checks for x < 0
1106 settings = { Range(-maxSafeFloat, -minSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
1107 GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1109 // We do not care about the small range -minSafeFloat < x < +minSafeFloat where the result can be +/- Inf, since we don't require strict IEEE754.
1111 // Normal checks for x > 0
1112 settings = { Range(minSafeFloat, maxSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
1113 GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1115 // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
1116 settings = { Range(maxSafeFloat, maxFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
1117 GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1120 TEST_F(SimdMathTest, cbrtSingleAccuracy)
1122 const real low = -std::numeric_limits<real>::max();
1123 const real high = std::numeric_limits<real>::max();
1125 // Increase the allowed error by the difference between the actual precision and single
1126 setUlpTolSingleAccuracy(ulpTol_);
1128 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1129 GMX_EXPECT_SIMD_FUNC_NEAR(std::cbrt, cbrtSingleAccuracy, settings);
1132 TEST_F(SimdMathTest, invcbrtSingleAccuracy)
1134 // Increase the allowed error by the difference between the actual precision and single
1135 setUlpTolSingleAccuracy(ulpTol_);
1137 // Negative values first
1138 real low = -std::numeric_limits<real>::max();
1139 real high = -std::numeric_limits<real>::min();
1141 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1142 GMX_EXPECT_SIMD_FUNC_NEAR(refInvCbrt, invcbrtSingleAccuracy, settings);
1145 low = std::numeric_limits<real>::min();
1146 high = std::numeric_limits<real>::max();
1147 settings = { Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1148 GMX_EXPECT_SIMD_FUNC_NEAR(refInvCbrt, invcbrtSingleAccuracy, settings);
1151 TEST_F(SimdMathTest, log2SingleAccuracy)
1153 const real low = std::numeric_limits<real>::min();
1154 const real high = std::numeric_limits<real>::max();
1156 // Increase the allowed error by the difference between the actual precision and single
1157 setUlpTolSingleAccuracy(ulpTol_);
1159 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1160 GMX_EXPECT_SIMD_FUNC_NEAR(std::log2, log2SingleAccuracy, settings);
1163 TEST_F(SimdMathTest, logSingleAccuracy)
1165 const real low = std::numeric_limits<real>::min();
1166 const real high = std::numeric_limits<real>::max();
1168 // Increase the allowed error by the difference between the actual precision and single
1169 setUlpTolSingleAccuracy(ulpTol_);
1171 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1172 GMX_EXPECT_SIMD_FUNC_NEAR(std::log, logSingleAccuracy, settings);
1175 TEST_F(SimdMathTest, exp2SingleAccuracy)
1177 // Relevant threshold points - float limits since we only target single accuracy
1178 constexpr real lowestReal = -std::numeric_limits<real>::max();
1179 constexpr real lowestRealThatProducesNormal =
1180 std::numeric_limits<real>::min_exponent
1181 - 1; // adding the significant corresponds to one more unit in exponent
1182 constexpr real lowestRealThatProducesDenormal =
1183 lowestRealThatProducesNormal
1184 - std::numeric_limits<real>::digits; // digits refer to bits in significand, so 24/53 for float/double
1185 constexpr real highestRealThatProducesNormal =
1186 std::numeric_limits<real>::max_exponent
1187 - 1; // adding the significant corresponds to one more unit in exponent
1188 CompareSettings settings;
1190 // Increase the allowed error by the difference between the actual precision and single
1191 setUlpTolSingleAccuracy(ulpTol_);
1193 // Below subnormal range all results should be zero
1194 settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal };
1195 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2SingleAccuracy, settings);
1197 // Subnormal range, require matching, but DTZ is fine
1198 settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal),
1202 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2SingleAccuracy, settings);
1204 // Normal range, standard result expected
1205 settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
1208 MatchRule::Normal };
1209 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2SingleAccuracy, settings);
1212 TEST_F(SimdMathTest, exp2SingleAccuracyUnsafe)
1214 // The unsafe version is only defined in the normal range
1215 constexpr real lowestRealThatProducesNormal =
1216 std::numeric_limits<real>::min_exponent
1217 - 1; // adding the significant corresponds to one more unit in exponent
1218 constexpr real highestRealThatProducesNormal =
1219 std::numeric_limits<real>::max_exponent
1220 - 1; // adding the significant corresponds to one more unit in exponent
1222 /* Increase the allowed error by the difference between the actual precision and single */
1223 setUlpTolSingleAccuracy(ulpTol_);
1225 CompareSettings settings{ Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
1228 MatchRule::Normal };
1229 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2SingleAccuracy<MathOptimization::Unsafe>, settings);
1232 TEST_F(SimdMathTest, expSingleAccuracy)
1234 // See threshold point comments in normal exp() test
1235 const real lowestReal = -std::numeric_limits<real>::max();
1236 // In theory the smallest value should be (min_exponent-1)*log(2), but rounding after the multiplication will cause this
1237 // value to be a single ulp too low. This might cause failed tests on CPUs that use different DTZ modes for SIMD vs.
1238 // non-SIMD arithmetics (e.g. ARM v7), so multiply by (1.0-eps) to increase it by a single ulp.
1239 const real lowestRealThatProducesNormal = (std::numeric_limits<real>::min_exponent - 1)
1241 * (1.0 - std::numeric_limits<real>::epsilon());
1242 const real lowestRealThatProducesDenormal =
1243 lowestRealThatProducesNormal - std::numeric_limits<real>::digits * std::log(2.0);
1244 const real highestRealThatProducesNormal =
1245 (std::numeric_limits<real>::max_exponent - 1) * std::log(2.0);
1246 CompareSettings settings;
1248 // Increase the allowed error by the difference between the actual precision and single
1249 setUlpTolSingleAccuracy(ulpTol_);
1251 // Below subnormal range all results should be zero (so, match the reference)
1252 settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal };
1253 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, expSingleAccuracy, settings);
1255 // Subnormal range, require matching, but DTZ is fine
1256 settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal),
1260 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, expSingleAccuracy, settings);
1262 // Normal range, standard result expected
1263 settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
1266 MatchRule::Normal };
1267 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, expSingleAccuracy, settings);
1270 TEST_F(SimdMathTest, expSingleAccuracyUnsafe)
1272 // See test of exp() for comments about test ranges
1273 const real lowestRealThatProducesNormal = (std::numeric_limits<real>::min_exponent - 1)
1275 * (1 - std::numeric_limits<real>::epsilon());
1276 const real highestRealThatProducesNormal =
1277 (std::numeric_limits<real>::max_exponent - 1) * std::log(2.0);
1279 // Increase the allowed error by the difference between the actual precision and single
1280 setUlpTolSingleAccuracy(ulpTol_);
1282 CompareSettings settings{ Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
1285 MatchRule::Normal };
1286 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, expSingleAccuracy<MathOptimization::Unsafe>, settings);
1289 TEST_F(SimdMathTest, powSingleAccuracy)
1291 // We already test the log2/exp2 components of pow() extensively above, and it's a very
1292 // simple single-line function, so here we just test a handful of values to catch typos
1293 // and then some special values.
1295 // Increase the allowed error by the difference between the actual precision and single
1296 setUlpTolSingleAccuracy(ulpTol_);
1298 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, c3), std::pow(c1, c4), std::pow(c2, c5)),
1299 powSingleAccuracy(rSimd_c0c1c2, rSimd_c3c4c5));
1301 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, -c3), std::pow(c1, -c0), std::pow(c2, -c4)),
1302 powSingleAccuracy(rSimd_c0c1c2, rSimd_m3m0m4));
1304 // 0^0 = 1 , 0^c1=0, -c1^0=1
1305 GMX_EXPECT_SIMD_REAL_NEAR(
1306 setSimdRealFrom3R(1.0, 0.0, 1.0),
1307 powSingleAccuracy(setSimdRealFrom3R(0, 0.0, -c1), setSimdRealFrom3R(0.0, c1, 0.0)));
1310 TEST_F(SimdMathTest, powSingleAccuracyUnsafe)
1312 // We already test the log2/exp2 components of pow() extensively above, and it's a very
1313 // simple single-line function, so here we just test a handful of values to catch typos
1314 // and then some special values.
1316 // Increase the allowed error by the difference between the actual precision and single
1317 setUlpTolSingleAccuracy(ulpTol_);
1319 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, c3), std::pow(c1, c4), std::pow(c2, c5)),
1320 powSingleAccuracy<MathOptimization::Unsafe>(rSimd_c0c1c2, rSimd_c3c4c5));
1322 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, -c3), std::pow(c1, -c0), std::pow(c2, -c4)),
1323 powSingleAccuracy<MathOptimization::Unsafe>(rSimd_c0c1c2, rSimd_m3m0m4));
1326 TEST_F(SimdMathTest, erfSingleAccuracy)
1328 // Increase the allowed error by the difference between the actual precision and single
1329 setUlpTolSingleAccuracy(ulpTol_);
1331 CompareSettings settings{ Range(-9, 9), ulpTol_, GMX_REAL_MIN, MatchRule::Normal };
1332 GMX_EXPECT_SIMD_FUNC_NEAR(refErf, erfSingleAccuracy, settings);
1335 TEST_F(SimdMathTest, erfcSingleAccuracy)
1337 // Increase the allowed error by the difference between the actual precision and single
1338 setUlpTolSingleAccuracy(ulpTol_);
1340 // Our erfc algorithm has 4 ulp accuracy, so relax tolerance a bit
1341 CompareSettings settings{ Range(-9, 9), 4 * ulpTol_, GMX_REAL_MIN, MatchRule::Normal };
1342 GMX_EXPECT_SIMD_FUNC_NEAR(refErfc, erfcSingleAccuracy, settings);
1346 TEST_F(SimdMathTest, sinSingleAccuracy)
1348 /* Increase the allowed error by the difference between the actual precision and single */
1349 setUlpTolSingleAccuracy(ulpTol_);
1351 CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
1352 GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sinSingleAccuracy, settings);
1354 // Range reduction leads to accuracy loss, so we might want higher tolerance here
1355 settings = { Range(-10000, 10000), 2 * ulpTol_, absTol_, MatchRule::Normal };
1356 GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sinSingleAccuracy, settings);
1359 TEST_F(SimdMathTest, cosSingleAccuracy)
1361 /* Increase the allowed error by the difference between the actual precision and single */
1362 setUlpTolSingleAccuracy(ulpTol_);
1364 CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
1365 GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cosSingleAccuracy, settings);
1367 // Range reduction leads to accuracy loss, so we might want higher tolerance here
1368 settings = { Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal };
1369 GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cosSingleAccuracy, settings);
1372 TEST_F(SimdMathTest, tanSingleAccuracy)
1374 /* Increase the allowed error by the difference between the actual precision and single */
1375 setUlpTolSingleAccuracy(ulpTol_);
1377 // Tan(x) is a little sensitive due to the division in the algorithm.
1378 // Rather than using lots of extra FP operations, we accept the algorithm
1379 // presently only achieves a ~3 ulp error and use the medium tolerance.
1380 CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
1381 GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tanSingleAccuracy, settings);
1383 // Range reduction leads to accuracy loss, so we might want higher tolerance here
1384 settings = { Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal };
1385 GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tanSingleAccuracy, settings);
1388 TEST_F(SimdMathTest, asinSingleAccuracy)
1390 /* Increase the allowed error by the difference between the actual precision and single */
1391 setUlpTolSingleAccuracy(ulpTol_);
1393 // Our present asin(x) algorithm achieves 2-3 ulp accuracy
1394 CompareSettings settings{ Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal };
1395 GMX_EXPECT_SIMD_FUNC_NEAR(std::asin, asinSingleAccuracy, settings);
1398 TEST_F(SimdMathTest, acosSingleAccuracy)
1400 /* Increase the allowed error by the difference between the actual precision and single */
1401 setUlpTolSingleAccuracy(ulpTol_);
1403 // Our present acos(x) algorithm achieves 2-3 ulp accuracy
1404 CompareSettings settings{ Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal };
1405 GMX_EXPECT_SIMD_FUNC_NEAR(std::acos, acosSingleAccuracy, settings);
1408 TEST_F(SimdMathTest, atanSingleAccuracy)
1410 /* Increase the allowed error by the difference between the actual precision and single */
1411 setUlpTolSingleAccuracy(ulpTol_);
1413 // Our present atan(x) algorithm achieves 1 ulp accuracy
1414 CompareSettings settings{ Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal };
1415 GMX_EXPECT_SIMD_FUNC_NEAR(std::atan, atanSingleAccuracy, settings);
1418 TEST_F(SimdMathTest, atan2SingleAccuracy)
1420 /* Increase the allowed error by the difference between the actual precision and single */
1421 setUlpTolSingleAccuracy(ulpTol_);
1423 // test each quadrant
1424 GMX_EXPECT_SIMD_REAL_NEAR(
1425 setSimdRealFrom3R(std::atan2(c0, c3), std::atan2(c1, c4), std::atan2(c2, c5)),
1426 atan2SingleAccuracy(rSimd_c0c1c2, rSimd_c3c4c5));
1427 GMX_EXPECT_SIMD_REAL_NEAR(
1428 setSimdRealFrom3R(std::atan2(-c0, c3), std::atan2(-c1, c4), std::atan2(-c2, c5)),
1429 atan2SingleAccuracy(rSimd_m0m1m2, rSimd_c3c4c5));
1430 GMX_EXPECT_SIMD_REAL_NEAR(
1431 setSimdRealFrom3R(std::atan2(-c0, -c3), std::atan2(-c1, -c0), std::atan2(-c2, -c4)),
1432 atan2SingleAccuracy(rSimd_m0m1m2, rSimd_m3m0m4));
1433 GMX_EXPECT_SIMD_REAL_NEAR(
1434 setSimdRealFrom3R(std::atan2(c0, -c3), std::atan2(c1, -c0), std::atan2(c2, -c4)),
1435 atan2SingleAccuracy(rSimd_c0c1c2, rSimd_m3m0m4));
1436 // cases important for calculating angles
1437 // values on coordinate axes
1438 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(0, c0), std::atan2(0, c1), std::atan2(0, c2)),
1439 atan2SingleAccuracy(setZero(), rSimd_c0c1c2));
1440 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(c0, 0), std::atan2(c1, 0), std::atan2(c2, 0)),
1441 atan2SingleAccuracy(rSimd_c0c1c2, setZero()));
1442 GMX_EXPECT_SIMD_REAL_NEAR(
1443 setSimdRealFrom3R(std::atan2(0, -c0), std::atan2(0, -c1), std::atan2(0, -c2)),
1444 atan2SingleAccuracy(setZero(), rSimd_m0m1m2));
1445 GMX_EXPECT_SIMD_REAL_NEAR(
1446 setSimdRealFrom3R(std::atan2(-c0, 0), std::atan2(-c1, 0), std::atan2(-c2, 0)),
1447 atan2SingleAccuracy(rSimd_m0m1m2, setZero()));
1449 // degenerate value (origin) should return 0.0. At least IBM xlc 13.1.5 gets the reference
1450 // value wrong (-nan) at -O3 optimization, so we compare to the correct value (0.0) instead.
1451 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom1R(0.0),
1452 atan2SingleAccuracy(setSimdRealFrom3R(0.0, 0.0, 0.0), setZero()));
1455 TEST_F(SimdMathTest, pmeForceCorrectionSingleAccuracy)
1457 // The PME corrections will be added to ~1/r2, so absolute tolerance of EPS is fine.
1458 // Pme correction only needs to be ~1e-6 accuracy single.
1459 // Then increase the allowed error by the difference between the actual precision and single.
1460 setUlpTolSingleAccuracy(std::int64_t(5e-6 / GMX_FLOAT_EPS));
1462 CompareSettings settings{ Range(0.15, 4), ulpTol_, GMX_FLOAT_EPS, MatchRule::Normal };
1463 GMX_EXPECT_SIMD_FUNC_NEAR(refPmeForceCorrection, pmeForceCorrectionSingleAccuracy, settings);
1466 TEST_F(SimdMathTest, pmePotentialCorrectionSingleAccuracy)
1468 // The PME corrections will be added to ~1/r, so absolute tolerance of EPS is fine.
1469 // Pme correction only needs to be ~1e-6 accuracy single.
1470 // Then increase the allowed error by the difference between the actual precision and single.
1471 setUlpTolSingleAccuracy(std::int64_t(5e-6 / GMX_FLOAT_EPS));
1473 CompareSettings settings{ Range(0.15, 4), ulpTol_, GMX_FLOAT_EPS, MatchRule::Normal };
1474 GMX_EXPECT_SIMD_FUNC_NEAR(refPmePotentialCorrection, pmePotentialCorrectionSingleAccuracy, settings);
1479 # endif // GMX_SIMD_HAVE_REAL