2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2017,2018,2019,2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gromacs/simd/simd_math.h"
48 #include "gromacs/math/utilities.h"
49 #include "gromacs/options/basicoptions.h"
50 #include "gromacs/simd/simd.h"
52 #include "testutils/refdata.h"
53 #include "testutils/testasserts.h"
65 /*! \addtogroup module_simd */
68 # if GMX_SIMD_HAVE_REAL
70 class SimdMathTest : public SimdTest
73 /*! \brief Type for half-open intervals specifying test ranges */
74 typedef std::pair<real, real> Range;
76 /*! \brief Control what is considered matching values
78 * Normal simply means that we request the values to be equal
79 * to within the specified tolerance.
80 * However, there are also two more cases that are special:
82 * - Even if we only care about normal (i.e., not denormal) values, some math
83 * libraries might clamp the value to zero, which means our SIMD output
84 * might not match their values. By using MatchRule::Dtz, we will consider
85 * all values both from the reference and test functions that are within the
86 * requested ulp tolerance of a denormal number to be equivalent to 0.0.
87 * - For some older architectures without fused multiply-add units (e.g. x86 SSE2),
88 * we might end up clamping the results to zero just before reaching
89 * denormal output, since the intermediate results e.g. in polynomial
90 * approximations can be smaller than the final one. We often simply don't
91 * care about those values, and then one can use
92 * MatchRule::ReferenceOrZero to allow the test value to either match
93 * the reference or be zero.
97 Normal, //!< Match function values
98 Dtz, //!< Match function values after setting denormals to zero both in test and reference
99 ReferenceOrZero, //!< Test values can either match reference or be zero
102 const std::map<MatchRule, std::string> matchRuleNames_ = {
103 { MatchRule::Normal, "Test should match reference." },
104 { MatchRule::Dtz, "Test should match reference, with denormals treated as 0.0." },
105 { MatchRule::ReferenceOrZero, "Test should match reference or 0.0." }
108 /*! \brief Settings used for simd math function comparisons */
109 struct CompareSettings
111 Range range; //!< Range over which to test function
112 std::int64_t ulpTol; //!< Ulp tolerance
113 real absTol; //!< Absolute tolerance
114 MatchRule matchRule; //!< Decide what we consider a match
117 ::testing::AssertionResult compareSimdMathFunction(const char* refFuncExpr,
118 const char* simdFuncExpr,
119 const char* compareSettingsExpr,
120 real refFunc(real x),
121 SimdReal gmx_simdcall simdFunc(SimdReal x),
122 const CompareSettings& compareSettings);
124 /*! \brief Generate test point vector
126 * \param range The test interval, half open. Upper limit is not included.
127 * Pass by value, since we need to modify in method anyway.
128 * \param points Number of points to generate. This might be increased
129 * slightly to account both for extra special values like 0.0
130 * and the SIMD width.
132 * This routine generates a vector with test points separated by constant
133 * multiplicative factors, based on the range and number of points in the
134 * class. If the range includes both negative and positive values, points
135 * will be generated separately for the negative/positive intervals down
136 * to the smallest real number that can be represented, and we also include
139 * This is highly useful for large test ranges. For example, with a linear
140 * 1000-point division of the range (1,1e10) the first three values to test
141 * would be 1, 10000000.999, and 20000000.998, etc. For large values we would
142 * commonly hit the point where adding the small delta has no effect due to
143 * limited numerical precision.
144 * When we instead use this routine, the values will be 1, 1.0239, 1.0471, etc.
145 * This will spread the entropy over all bits in the IEEE754 representation,
146 * and be a much better test of all potential input values.
148 * \note We do not use the static variable s_nPoints in the parent class
149 * to avoid altering any value the user has set on the command line; since
150 * it's a static member, changing it would have permanent effect.
152 static std::vector<real> generateTestPoints(Range range, std::size_t points);
154 /*! \brief Test routine for the test point vector generation
156 static void generateTestPointsTest();
159 /*! \brief Test approximate equality of SIMD vs reference version of a function.
161 * This macro takes vanilla C and SIMD flavors of a function and tests it with
162 * the number of points, range, and tolerances specified by the test fixture class.
164 * The third option controls the range, tolerances, and match settings.
166 # define GMX_EXPECT_SIMD_FUNC_NEAR(refFunc, tstFunc, compareSettings) \
167 EXPECT_PRED_FORMAT3(compareSimdMathFunction, refFunc, tstFunc, compareSettings)
169 std::vector<real> SimdMathTest::generateTestPoints(Range inputRange, std::size_t inputPoints)
172 std::vector<real> testPoints;
173 testPoints.reserve(inputPoints);
175 GMX_RELEASE_ASSERT(inputRange.first < inputRange.second,
176 "The start of the interval must come before the end");
178 std::vector<Range> testRanges;
180 if (inputRange.first < 0 && inputRange.second > 0)
182 testRanges.emplace_back(Range({ inputRange.first, -std::numeric_limits<real>::min() }));
183 testRanges.emplace_back(Range({ 0.0, inputRange.second }));
187 if (inputRange.second == 0)
189 inputRange.second = -std::numeric_limits<real>::min();
190 inputRange.first = std::min(inputRange.first, inputRange.second);
192 testRanges.push_back(inputRange);
195 for (Range& range : testRanges)
197 std::size_t points = inputPoints / testRanges.size();
199 // The value 0 is special, and can only occur at the start of
200 // the interval after the corrections outside this loop.
201 // Add it explicitly, and adjust the interval to continue
202 // at the first valid non-zero positive number.
203 if (range.first == 0)
205 testPoints.push_back(0.0);
206 range.first = std::numeric_limits<real>::min();
207 points--; // Used one point
212 std::conditional<sizeof(real) == sizeof(double), std::int64_t, std::int32_t>::type i;
216 high.r = range.second;
218 // IEEE754 floating-point numbers have the cool property that for any range of
219 // constant sign, for all non-zero numbers a constant (i.e., linear) difference
220 // in the bitwise representation corresponds to a constant multiplicative factor.
222 // Divide the ulp difference evenly
223 std::int64_t ulpDiff = high.i - low.i;
224 // dividend and divisor must both be signed types
225 std::int64_t ulpDelta = ulpDiff / static_cast<std::int64_t>(points);
226 std::int64_t minUlpDelta = (ulpDiff > 0) ? 1 : -1;
230 // Very short interval or very many points caused round-to-zero.
231 // Select the smallest possible change, which is one ulp (with correct sign)
232 ulpDelta = minUlpDelta;
233 points = std::abs(ulpDiff);
237 // Use an index-based loop to avoid floating-point comparisons with
238 // values that might have overflowed. Save one point for the very last
239 // bitwise value that is part of the interval
240 for (std::size_t i = 0; i < points - 1; i++)
242 testPoints.push_back(x.r);
246 // Make sure we test the very last point that is inside the interval
249 testPoints.push_back(x.r);
254 /*! \brief Implementation routine to compare SIMD vs reference functions.
256 * \param refFuncExpr Description of reference function expression
257 * \param simdFuncExpr Description of SIMD function expression
258 * \param compareSettingsExpr Description of compareSettings
259 * \param refFunc Reference math function pointer
260 * \param simdFunc SIMD math function pointer
261 * \param compareSettings Structure with the range, tolerances, and
262 * matching rules to use for the comparison.
264 * \note You should not never call this function directly, but use the
265 * macro GMX_EXPECT_SIMD_FUNC_NEAR(refFunc,tstFunc,matchRule) instead.
267 ::testing::AssertionResult SimdMathTest::compareSimdMathFunction(const char* refFuncExpr,
268 const char* simdFuncExpr,
269 const char gmx_unused* compareSettingsExpr,
270 real refFunc(real x),
271 SimdReal gmx_simdcall simdFunc(SimdReal x),
272 const CompareSettings& compareSettings)
274 std::vector<real> vx(GMX_SIMD_REAL_WIDTH);
275 std::vector<real> vref(GMX_SIMD_REAL_WIDTH);
276 std::vector<real> vtst(GMX_SIMD_REAL_WIDTH);
278 std::int64_t ulpDiff;
279 std::int64_t maxUlpDiff = 0;
281 real refValMaxUlpDiff, simdValMaxUlpDiff;
282 const int niter = s_nPoints / GMX_SIMD_REAL_WIDTH;
286 std::conditional<sizeof(real) == sizeof(double), std::int64_t, std::int32_t>::type i;
289 // Allow zero-size intervals - nothing to test means we succeeded at it
290 if (compareSettings.range.first == compareSettings.range.second)
292 ::testing::AssertionSuccess();
295 // Calculate the tolerance limit to use for denormals - we want
296 // values that are within the ulp tolerance of denormals to be considered matching
297 conv0.r = std::numeric_limits<real>::min();
298 conv0.i += compareSettings.ulpTol - 1; // min() itself is not denormal, but one ulp larger
299 const real denormalLimit = conv0.r;
301 // We want to test as many diverse bit combinations as possible over the range requested,
302 // and in particular do it evenly spaced in bit-space.
303 // Due to the way IEEE754 floating-point is represented, that means we should have a
304 // constant multiplicative factor between adjacent values. This gets a bit complicated
305 // when we have both positive and negative values, so we offload the generation of the
306 // specific testing values to a separate routine
307 std::vector<real> testPoints = generateTestPoints(compareSettings.range, s_nPoints);
309 size_t pointIndex = 0;
311 for (int iter = 0; iter < niter; iter++)
313 for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
315 vx[i] = testPoints[pointIndex];
316 vref[i] = refFunc(vx[i]);
317 // If we reach the end of the points, stop increasing index so we pad with
318 // extra copies of the last element up to the SIMD width
319 if (pointIndex + 1 < testPoints.size())
324 vtst = simdReal2Vector(simdFunc(vector2SimdReal(vx)));
326 bool absOk = true, signOk = true;
327 for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
329 if (compareSettings.matchRule == MatchRule::Dtz && std::abs(vref[i]) <= denormalLimit
330 && std::abs(vtst[i]) <= denormalLimit)
335 if (compareSettings.matchRule == MatchRule::ReferenceOrZero && vtst[i] == 0.0)
337 // If we accept 0.0 for the test function, we can continue to the next loop iteration.
341 absDiff = std::abs(vref[i] - vtst[i]);
342 absOk = absOk && (absDiff < compareSettings.absTol);
343 signOk = signOk && ((vref[i] >= 0 && vtst[i] >= 0) || (vref[i] <= 0 && vtst[i] <= 0));
345 if (absDiff >= compareSettings.absTol)
347 /* We replicate the trivial ulp differences comparison here rather than
348 * calling the lower-level routine for comparing them, since this enables
349 * us to run through the entire test range and report the largest deviation
350 * without lots of extra glue routines.
354 ulpDiff = llabs(conv0.i - conv1.i);
355 if (ulpDiff > maxUlpDiff)
357 maxUlpDiff = ulpDiff;
358 maxUlpDiffPos = vx[i];
359 refValMaxUlpDiff = vref[i];
360 simdValMaxUlpDiff = vtst[i];
364 if ((!absOk) && (!signOk))
366 return ::testing::AssertionFailure()
367 << "Failing SIMD math function comparison due to sign differences." << std::endl
368 << "Reference function: " << refFuncExpr << std::endl
369 << "Simd function: " << simdFuncExpr << std::endl
370 << "Test range is ( " << compareSettings.range.first << " , "
371 << compareSettings.range.second << " ) " << std::endl
372 << "Match rule: " << matchRuleNames_.at(compareSettings.matchRule) << std::endl
373 << "First sign difference around x=" << std::setprecision(20)
374 << ::testing::PrintToString(vx) << std::endl
375 << "Ref values: " << std::setprecision(20) << ::testing::PrintToString(vref)
377 << "SIMD values: " << std::setprecision(20) << ::testing::PrintToString(vtst)
382 GMX_RELEASE_ASSERT(compareSettings.ulpTol >= 0, "Invalid ulp value.");
383 if (maxUlpDiff <= compareSettings.ulpTol)
385 return ::testing::AssertionSuccess();
389 return ::testing::AssertionFailure()
390 << "Failing SIMD math function ulp comparison between " << refFuncExpr << " and "
391 << simdFuncExpr << std::endl
392 << "Requested ulp tolerance: " << compareSettings.ulpTol << std::endl
393 << "Requested abs tolerance: " << compareSettings.absTol << std::endl
394 << "Match rule: " << matchRuleNames_.at(compareSettings.matchRule) << std::endl
395 << "Largest Ulp difference occurs for x=" << std::setprecision(20) << maxUlpDiffPos
397 << "Ref values: " << std::setprecision(20) << refValMaxUlpDiff << std::endl
398 << "SIMD values: " << std::setprecision(20) << simdValMaxUlpDiff << std::endl
399 << "Ulp diff.: " << std::setprecision(20) << maxUlpDiff << std::endl;
403 // Actual routine to generate a small set of test points in current precision. This will
404 // be called by either the double or single precision test fixture, since we need different
405 // test names to compare to the right reference data.
406 void SimdMathTest::generateTestPointsTest()
409 gmx::test::TestReferenceData data;
410 gmx::test::TestReferenceChecker checker(data.rootChecker());
412 std::vector<real> result;
414 result = generateTestPoints(Range(-1e10, -1), points);
415 checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10,-1[");
417 result = generateTestPoints(Range(-1e10, -1e-10), points);
418 checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10, -1e-10[");
420 result = generateTestPoints(Range(1, 1e10), points);
421 checker.checkSequence(result.begin(), result.end(), "Test points for interval [1, 1e10[");
423 result = generateTestPoints(Range(1e-10, 1e10), points);
424 checker.checkSequence(result.begin(), result.end(), "Test points for interval [1e-10, 1e10[");
426 result = generateTestPoints(Range(-1e10, 1e-10), points);
427 checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10, 1e-10[");
429 result = generateTestPoints(Range(-1e-10, 1e-10), points);
430 checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e-10, 1e-10[");
432 result = generateTestPoints(Range(-1e-10, 1e10), points);
433 checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e-10, 1e10[");
435 result = generateTestPoints(Range(-1e10, 1e10), points);
436 checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10, 1e10[");
438 result = generateTestPoints(Range(-1000, 0), points);
439 checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1000, 0[");
441 result = generateTestPoints(Range(0, 1000), points);
442 checker.checkSequence(result.begin(), result.end(), "Test points for interval [0, 1000[");
449 // Actual math function tests below
451 /*! \cond internal */
452 /*! \addtogroup module_simd */
458 // Reference data is selected based on test name, so make the test name precision-dependent
460 TEST_F(SimdMathTest, generateTestPointsDouble)
462 generateTestPointsTest();
465 TEST_F(SimdMathTest, generateTestPointsFloat)
467 generateTestPointsTest();
471 TEST_F(SimdMathTest, copysign)
473 GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(-c0, c1, c2),
474 copysign(setSimdRealFrom3R(c0, c1, c2), setSimdRealFrom3R(-c3, c4, 0)));
475 GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(-c0, c1, c2),
476 copysign(setSimdRealFrom3R(-c0, -c1, -c2), setSimdRealFrom3R(-c3, c4, 0)));
477 GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(c0, -c1, c2),
478 copysign(setSimdRealFrom3R(c0, c1, c2), setSimdRealFrom3R(c3, -c4, 0)));
479 GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(c0, -c1, c2),
480 copysign(setSimdRealFrom3R(-c0, -c1, -c2), setSimdRealFrom3R(c3, -c4, 0)));
483 /*! \brief Function wrapper to evaluate reference 1/sqrt(x) */
484 real refInvsqrt(real x)
486 return 1.0 / std::sqrt(x);
489 TEST_F(SimdMathTest, invsqrt)
491 const real low = std::numeric_limits<float>::min();
492 const real high = std::numeric_limits<float>::max();
493 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
495 GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, invsqrt, settings);
498 TEST_F(SimdMathTest, maskzInvsqrt)
500 SimdReal x = setSimdRealFrom3R(c1, 0.0, c2);
501 SimdBool m = (setZero() < x);
502 SimdReal ref = setSimdRealFrom3R(1.0 / std::sqrt(c1), 0.0, 1.0 / std::sqrt(c2));
503 GMX_EXPECT_SIMD_REAL_NEAR(ref, maskzInvsqrt(x, m));
506 /*! \brief Function wrapper to return first result when testing \ref invsqrtPair */
507 SimdReal gmx_simdcall tstInvsqrtPair0(SimdReal x)
510 invsqrtPair(x, x, &r0, &r1);
514 /*! \brief Function wrapper to return second result when testing \ref invsqrtPair */
515 SimdReal gmx_simdcall tstInvsqrtPair1(SimdReal x)
518 invsqrtPair(x, x, &r0, &r1);
522 TEST_F(SimdMathTest, invsqrtPair)
524 const real low = std::numeric_limits<float>::min();
525 const real high = std::numeric_limits<float>::max();
527 // Accuracy conversions lose a bit of accuracy compared to all-double,
528 // so increase the tolerance to 4*ulpTol_
529 CompareSettings settings{ Range(low, high), 4 * ulpTol_, absTol_, MatchRule::Normal };
531 GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tstInvsqrtPair0, settings);
532 GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tstInvsqrtPair1, settings);
535 /*! \brief Function wrapper to evaluate reference sqrt(x) */
541 TEST_F(SimdMathTest, sqrt)
543 // Since the first lookup step is sometimes performed in single precision,
544 // our SIMD sqrt can only handle single-precision input values, even when
545 // compiled in double precision.
547 const real minFloat = std::numeric_limits<float>::min();
548 const real minSafeFloat = minFloat * 10;
549 const real maxSafeFloat = std::numeric_limits<float>::max() * 0.1;
550 CompareSettings settings;
551 // The accuracy conversions lose a bit of extra accuracy compared to
552 // doing the iterations in all-double.
553 setUlpTol(4 * ulpTol_);
555 // First test that 0.0 and a few other values work
556 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(0, std::sqrt(c1), std::sqrt(c2)),
557 sqrt(setSimdRealFrom3R(0, c1, c2)));
560 // As mentioned above, we cannot guarantee that very small double precision
561 // input values (below std::numeric_limits<float>::min()) are handled correctly,
562 // so our implementation will clamp it to zero. In this range we allow either
563 // the correct value or zero, but it's important that it does not result in NaN or Inf values.
565 // This test range must not be called for single precision, since if we try to divide
566 // the interval (0.0, low( in npoints we will try to multiply by factors so small that
567 // they end up being flushed to zero, and the loop would never end.
568 settings = { Range(0.0, minFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
569 GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt, settings);
572 // Next range: Just about minFloat the lookup should always work, but the results
573 // might be a bit fragile due to issues with the N-R iterations being flushed to zero
574 // for denormals. We can probably relax the latter in double precision, but since we
575 // anyway cannot handle numbers that cannot be represented in single it's not worth
576 // worrying too much about whether we have zero or an exact values around 10^-38....
577 settings = { Range(minFloat, minSafeFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
578 GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt, settings);
580 settings = { Range(minSafeFloat, maxSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
581 GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt, settings);
584 TEST_F(SimdMathTest, sqrtUnsafe)
586 const real minSafeFloat = std::numeric_limits<float>::min() * 10;
587 const real maxSafeFloat = std::numeric_limits<float>::max() * 0.1;
589 // The accuracy conversions lose a bit of extra accuracy compared to
590 // doing the iterations in all-double, so we use 4*ulpTol_
591 setUlpTol(4 * ulpTol_);
593 CompareSettings settings{ Range(minSafeFloat, maxSafeFloat), 4 * ulpTol_, absTol_, MatchRule::Normal };
594 GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt<MathOptimization::Unsafe>, settings);
597 /*! \brief Function wrapper to evaluate reference 1/x */
603 TEST_F(SimdMathTest, inv)
605 // Since the first lookup step is sometimes performed in single precision,
606 // our SIMD 1/x can only handle single-precision input values, even when
607 // compiled in double precision.
609 // Relevant threshold points
610 const real minSafeFloat = std::numeric_limits<float>::min()
611 * 10; // X value guaranteed not to result in Inf intermediates for 1/x calc.
612 const real maxSafeFloat = std::numeric_limits<float>::max()
613 * 0.1; // X value guaranteed not to result in DTZ intermediates for 1/x calc.
614 // Scale highest value by 1-eps, since we will do some arithmetics on this value
615 const real maxFloat =
616 std::numeric_limits<float>::max() * (1.0 - std::numeric_limits<float>::epsilon());
617 CompareSettings settings;
619 // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
620 settings = { Range(-maxFloat, -maxSafeFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
621 GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
623 // Normal checks for x < 0
624 settings = { Range(-maxSafeFloat, -minSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
625 GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
627 // We do not care about the small range -minSafeFloat < x < +minSafeFloat where the result can be +/- Inf, since we don't require strict IEEE754.
629 // Normal checks for x > 0
630 settings = { Range(minSafeFloat, maxSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
631 GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
633 // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
634 settings = { Range(maxSafeFloat, maxFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
635 GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
638 TEST_F(SimdMathTest, maskzInv)
640 SimdReal x = setSimdRealFrom3R(c1, 0.0, c2);
641 SimdBool m = (setZero() < x);
642 SimdReal ref = setSimdRealFrom3R(1.0 / c1, 0.0, 1.0 / c2);
643 GMX_EXPECT_SIMD_REAL_NEAR(ref, maskzInv(x, m));
646 TEST_F(SimdMathTest, cbrt)
648 const real low = -std::numeric_limits<real>::max();
649 const real high = std::numeric_limits<real>::max();
651 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
652 GMX_EXPECT_SIMD_FUNC_NEAR(std::cbrt, cbrt, settings);
655 /*! \brief Function wrapper to evaluate reference 1/cbrt(x) */
656 real refInvCbrt(real x)
658 return 1.0 / std::cbrt(x);
661 TEST_F(SimdMathTest, invcbrt)
663 // Negative values first
664 real low = -std::numeric_limits<real>::max();
665 real high = -std::numeric_limits<real>::min();
667 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
668 GMX_EXPECT_SIMD_FUNC_NEAR(refInvCbrt, invcbrt, settings);
671 low = std::numeric_limits<real>::min();
672 high = std::numeric_limits<real>::max();
673 settings = { Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
674 GMX_EXPECT_SIMD_FUNC_NEAR(refInvCbrt, invcbrt, settings);
677 TEST_F(SimdMathTest, log2)
679 const real low = std::numeric_limits<real>::min();
680 const real high = std::numeric_limits<real>::max();
682 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
683 GMX_EXPECT_SIMD_FUNC_NEAR(std::log2, log2, settings);
686 TEST_F(SimdMathTest, log)
688 const real low = std::numeric_limits<real>::min();
689 const real high = std::numeric_limits<real>::max();
691 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
692 GMX_EXPECT_SIMD_FUNC_NEAR(std::log, log, settings);
695 TEST_F(SimdMathTest, exp2)
697 // Relevant threshold points
698 constexpr real lowestReal = -std::numeric_limits<real>::max();
699 constexpr real lowestRealThatProducesNormal =
700 std::numeric_limits<real>::min_exponent
701 - 1; // adding the significant corresponds to one more unit in exponent
702 constexpr real lowestRealThatProducesDenormal =
703 lowestRealThatProducesNormal
704 - std::numeric_limits<real>::digits; // digits refer to bits in significand, so 24/53 for float/double
705 constexpr real highestRealThatProducesNormal =
706 std::numeric_limits<real>::max_exponent
707 - 1; // adding the significant corresponds to one more unit in exponent
708 CompareSettings settings;
710 // Below subnormal range all results should be zero (so, match the reference)
711 settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal };
712 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2, settings);
714 // Subnormal range, require matching, but DTZ is fine
715 settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal), ulpTol_,
716 absTol_, MatchRule::Dtz };
717 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2, settings);
719 // Normal range, standard result expected
720 settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal), ulpTol_,
721 absTol_, MatchRule::Normal };
722 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2, settings);
725 TEST_F(SimdMathTest, exp2Unsafe)
727 // The unsafe version is only defined in the normal range
728 constexpr real lowestRealThatProducesNormal =
729 std::numeric_limits<real>::min_exponent
730 - 1; // adding the significant corresponds to one more unit in exponent
731 constexpr real highestRealThatProducesNormal =
732 std::numeric_limits<real>::max_exponent
733 - 1; // adding the significant corresponds to one more unit in exponent
735 CompareSettings settings{ Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
736 ulpTol_, absTol_, MatchRule::Normal };
737 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2<MathOptimization::Unsafe>, settings);
740 TEST_F(SimdMathTest, exp)
742 // Relevant threshold points. See the exp2 test for more details about the values; these are
743 // simply scaled by log(2) due to the difference between exp2 and exp.
744 const real lowestReal = -std::numeric_limits<real>::max();
745 // In theory the smallest value should be (min_exponent-1)*log(2), but rounding after the multiplication will cause this
746 // value to be a single ulp too low. This might cause failed tests on CPUs that use different DTZ modes for SIMD vs.
747 // non-SIMD arithmetics (ARM v7), so multiply by (1.0-eps) to increase it by a single ulp.
748 const real lowestRealThatProducesNormal = (std::numeric_limits<real>::min_exponent - 1)
750 * (1 - std::numeric_limits<real>::epsilon());
751 const real lowestRealThatProducesDenormal =
752 lowestRealThatProducesNormal - std::numeric_limits<real>::digits * std::log(2.0);
753 const real highestRealThatProducesNormal =
754 (std::numeric_limits<real>::max_exponent - 1) * std::log(2.0);
755 CompareSettings settings;
757 // Below subnormal range all results should be zero (so, match the reference)
758 settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal };
759 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp, settings);
761 // Subnormal range, require matching, but DTZ is fine
762 settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal), ulpTol_,
763 absTol_, MatchRule::Dtz };
764 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp, settings);
766 // Normal range, standard result expected
767 settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal), ulpTol_,
768 absTol_, MatchRule::Normal };
769 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp, settings);
772 TEST_F(SimdMathTest, expUnsafe)
774 // See test of exp() for comments about test ranges
775 const real lowestRealThatProducesNormal = (std::numeric_limits<real>::min_exponent - 1)
777 * (1 - std::numeric_limits<real>::epsilon());
778 const real highestRealThatProducesNormal =
779 (std::numeric_limits<real>::max_exponent - 1) * std::log(2.0);
781 CompareSettings settings{ Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
782 ulpTol_, absTol_, MatchRule::Normal };
783 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp<MathOptimization::Unsafe>, settings);
786 TEST_F(SimdMathTest, pow)
788 // We already test the log2/exp2 components of pow() extensively above, and it's a very
789 // simple single-line function, so here we just test a handful of values to catch typos
790 // and then some special values.
792 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, c3), std::pow(c1, c4), std::pow(c2, c5)),
793 pow(rSimd_c0c1c2, rSimd_c3c4c5));
795 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, -c3), std::pow(c1, -c0), std::pow(c2, -c4)),
796 pow(rSimd_c0c1c2, rSimd_m3m0m4));
798 // 0^0 = 1 , 0^c1=0, -c1^0=1
799 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(1.0, 0.0, 1.0),
800 pow(setSimdRealFrom3R(0, 0.0, -c1), setSimdRealFrom3R(0.0, c1, 0.0)));
803 TEST_F(SimdMathTest, powUnsafe)
805 // We already test the log2/exp2 components of pow() extensively above, and it's a very
806 // simple single-line function, so here we just test a handful of values to catch typos
807 // and then some special values.
809 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, c3), std::pow(c1, c4), std::pow(c2, c5)),
810 pow<MathOptimization::Unsafe>(rSimd_c0c1c2, rSimd_c3c4c5));
812 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, -c3), std::pow(c1, -c0), std::pow(c2, -c4)),
813 pow<MathOptimization::Unsafe>(rSimd_c0c1c2, rSimd_m3m0m4));
816 /*! \brief Function wrapper for erf(x), with argument/return in default Gromacs precision.
818 * \note Single-precision erf() in some libraries can be slightly lower precision
819 * than the SIMD flavor, so we use a cast to force double precision for reference.
823 return std::erf(static_cast<double>(x));
826 TEST_F(SimdMathTest, erf)
828 CompareSettings settings{ Range(-9, 9), ulpTol_, std::numeric_limits<real>::min(), MatchRule::Normal };
829 GMX_EXPECT_SIMD_FUNC_NEAR(refErf, erf, settings);
832 /*! \brief Function wrapper for erfc(x), with argument/return in default Gromacs precision.
834 * \note Single-precision erfc() in some libraries can be slightly lower precision
835 * than the SIMD flavor, so we use a cast to force double precision for reference.
839 return std::erfc(static_cast<double>(x));
842 TEST_F(SimdMathTest, erfc)
844 // Our erfc algorithm has 4 ulp accuracy, so relax tolerance a bit to 4*ulpTol
845 CompareSettings settings{ Range(-9, 9), 4 * ulpTol_, std::numeric_limits<real>::min(),
847 GMX_EXPECT_SIMD_FUNC_NEAR(refErfc, erfc, settings);
850 TEST_F(SimdMathTest, sin)
852 CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
853 GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sin, settings);
855 // Range reduction leads to accuracy loss, so we might want higher tolerance here
856 settings = { Range(-10000, 10000), 2 * ulpTol_, absTol_, MatchRule::Normal };
857 GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sin, settings);
860 TEST_F(SimdMathTest, cos)
862 CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
863 GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cos, settings);
865 // Range reduction leads to accuracy loss, so we might want higher tolerance here
866 settings = { Range(-10000, 10000), 2 * ulpTol_, absTol_, MatchRule::Normal };
867 GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cos, settings);
870 TEST_F(SimdMathTest, tan)
872 // Tan(x) is a little sensitive due to the division in the algorithm.
873 // Rather than using lots of extra FP operations, we accept the algorithm
874 // presently only achieves a ~3 ulp error and use the medium tolerance.
875 CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
876 GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tan, settings);
878 // Range reduction leads to accuracy loss, so we might want higher tolerance here
879 settings = { Range(-10000, 10000), 2 * ulpTol_, absTol_, MatchRule::Normal };
880 GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tan, settings);
883 TEST_F(SimdMathTest, asin)
885 // Our present asin(x) algorithm achieves 2-3 ulp accuracy
886 CompareSettings settings{ Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal };
887 GMX_EXPECT_SIMD_FUNC_NEAR(std::asin, asin, settings);
890 TEST_F(SimdMathTest, acos)
892 // Our present acos(x) algorithm achieves 2-3 ulp accuracy
893 CompareSettings settings{ Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal };
894 GMX_EXPECT_SIMD_FUNC_NEAR(std::acos, acos, settings);
897 TEST_F(SimdMathTest, atan)
899 // Our present atan(x) algorithm achieves 1 ulp accuracy
900 CompareSettings settings{ Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal };
901 GMX_EXPECT_SIMD_FUNC_NEAR(std::atan, atan, settings);
904 TEST_F(SimdMathTest, atan2)
906 // test each quadrant
907 GMX_EXPECT_SIMD_REAL_NEAR(
908 setSimdRealFrom3R(std::atan2(c0, c3), std::atan2(c1, c4), std::atan2(c2, c5)),
909 atan2(rSimd_c0c1c2, rSimd_c3c4c5));
910 GMX_EXPECT_SIMD_REAL_NEAR(
911 setSimdRealFrom3R(std::atan2(-c0, c3), std::atan2(-c1, c4), std::atan2(-c2, c5)),
912 atan2(rSimd_m0m1m2, rSimd_c3c4c5));
913 GMX_EXPECT_SIMD_REAL_NEAR(
914 setSimdRealFrom3R(std::atan2(-c0, -c3), std::atan2(-c1, -c0), std::atan2(-c2, -c4)),
915 atan2(rSimd_m0m1m2, rSimd_m3m0m4));
916 GMX_EXPECT_SIMD_REAL_NEAR(
917 setSimdRealFrom3R(std::atan2(c0, -c3), std::atan2(c1, -c0), std::atan2(c2, -c4)),
918 atan2(rSimd_c0c1c2, rSimd_m3m0m4));
920 // cases important for calculating angles
921 // values on coordinate axes
922 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(0, c0), std::atan2(0, c1), std::atan2(0, c2)),
923 atan2(setZero(), rSimd_c0c1c2));
924 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(c0, 0), std::atan2(c1, 0), std::atan2(c2, 0)),
925 atan2(rSimd_c0c1c2, setZero()));
926 GMX_EXPECT_SIMD_REAL_NEAR(
927 setSimdRealFrom3R(std::atan2(0, -c0), std::atan2(0, -c1), std::atan2(0, -c2)),
928 atan2(setZero(), rSimd_m0m1m2));
929 GMX_EXPECT_SIMD_REAL_NEAR(
930 setSimdRealFrom3R(std::atan2(-c0, 0), std::atan2(-c1, 0), std::atan2(-c2, 0)),
931 atan2(rSimd_m0m1m2, setZero()));
932 // degenerate value (origin) should return 0.0. At least IBM xlc 13.1.5 gets the reference
933 // value wrong (-nan) at -O3 optimization, so we compare to the correct value (0.0) instead.
934 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom1R(0.0), atan2(setSimdRealFrom3R(0.0, 0.0, 0.0), setZero()));
937 /*! \brief Evaluate reference version of PME force correction. */
938 real refPmeForceCorrection(real x)
942 real y = std::sqrt(x);
943 return 2 * std::exp(-x) / (std::sqrt(M_PI) * x) - std::erf(static_cast<double>(y)) / (x * y);
947 return -4 / (3 * std::sqrt(M_PI));
951 // The PME corrections will be added to ~1/r2, so absolute tolerance of EPS is fine.
952 TEST_F(SimdMathTest, pmeForceCorrection)
954 // Pme correction relative accuracy only needs to be ~1e-6 accuracy single, 1e-10 double
955 const std::int64_t ulpTol = (GMX_DOUBLE ? 5e-10 : 5e-6) / GMX_REAL_EPS;
957 CompareSettings settings{ Range(0.15, 4), ulpTol, GMX_REAL_EPS, MatchRule::Normal };
958 GMX_EXPECT_SIMD_FUNC_NEAR(refPmeForceCorrection, pmeForceCorrection, settings);
961 /*! \brief Evaluate reference version of PME potential correction. */
962 real refPmePotentialCorrection(real x)
964 real y = std::sqrt(x);
965 return std::erf(static_cast<double>(y)) / y;
968 // The PME corrections will be added to ~1/r, so absolute tolerance of EPS is fine.
969 TEST_F(SimdMathTest, pmePotentialCorrection)
971 // Pme correction relative accuracy only needs to be ~1e-6 accuracy single, 1e-10 double
972 const std::int64_t ulpTol = (GMX_DOUBLE ? 5e-10 : 5e-6) / GMX_REAL_EPS;
974 CompareSettings settings{ Range(0.15, 4), ulpTol, GMX_REAL_EPS, MatchRule::Normal };
975 GMX_EXPECT_SIMD_FUNC_NEAR(refPmePotentialCorrection, pmePotentialCorrection, settings);
978 // Functions that only target single accuracy, even for double SIMD data
980 TEST_F(SimdMathTest, invsqrtSingleAccuracy)
982 // Here we always use float limits, since the lookup is not defined for numbers that
983 // cannot be represented in single precision.
984 const real low = std::numeric_limits<float>::min();
985 const real high = std::numeric_limits<float>::max();
986 /* Increase the allowed error by the difference between the actual precision and single */
987 setUlpTolSingleAccuracy(ulpTol_);
989 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
990 GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, invsqrtSingleAccuracy, settings);
993 /*! \brief Function wrapper to return first result when testing \ref invsqrtPairSingleAccuracy */
994 SimdReal gmx_simdcall tst_invsqrt_SingleAccuracy_pair0(SimdReal x)
997 invsqrtPairSingleAccuracy(x, x, &r0, &r1);
1001 /*! \brief Function wrapper to return second result when testing \ref invsqrtPairSingleAccuracy */
1002 SimdReal gmx_simdcall tst_invsqrt_SingleAccuracy_pair1(SimdReal x)
1005 invsqrtPairSingleAccuracy(x, x, &r0, &r1);
1009 TEST_F(SimdMathTest, invsqrtPairSingleAccuracy)
1011 // Float limits since lookup is always performed in single
1012 const real low = std::numeric_limits<float>::min();
1013 const real high = std::numeric_limits<float>::max();
1014 /* Increase the allowed error by the difference between the actual precision and single */
1015 setUlpTolSingleAccuracy(ulpTol_);
1017 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1018 GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tst_invsqrt_SingleAccuracy_pair0, settings);
1019 GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tst_invsqrt_SingleAccuracy_pair1, settings);
1022 TEST_F(SimdMathTest, sqrtSingleAccuracy)
1024 // Since the first lookup step is sometimes performed in single precision,
1025 // our SIMD sqrt can only handle single-precision input values, even when
1026 // compiled in double precision - thus we use single precision limits here.
1028 // Scale lowest value by 1+eps, since we will do some arithmetics on this value
1029 const real low = std::numeric_limits<float>::min() * (1.0 + std::numeric_limits<float>::epsilon());
1030 const real high = std::numeric_limits<float>::max();
1031 CompareSettings settings;
1033 // Increase the allowed error by the difference between the actual precision and single
1034 setUlpTolSingleAccuracy(ulpTol_);
1036 // First test that 0.0 and a few other values works
1037 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(0, std::sqrt(c0), std::sqrt(c1)),
1038 sqrtSingleAccuracy(setSimdRealFrom3R(0, c0, c1)));
1041 // As mentioned above, we cannot guarantee that very small double precision
1042 // input values (below std::numeric_limits<float>::min()) are handled correctly,
1043 // so our implementation will clamp it to zero. In this range we allow either
1044 // the correct value or zero, but it's important that it does not result in NaN or Inf values.
1046 // This test range must not be called for single precision, since if we try to divide
1047 // the interval (0.0, low( in npoints we will try to multiply by factors so small that
1048 // they end up being flushed to zero, and the loop would never end.
1049 settings = { Range(0.0, low), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
1050 GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrtSingleAccuracy, settings);
1053 settings = { Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1054 GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrtSingleAccuracy, settings);
1057 TEST_F(SimdMathTest, sqrtSingleAccuracyUnsafe)
1059 // Test the full range, but stick to float limits since lookup is done in single.
1060 const real low = std::numeric_limits<float>::min();
1061 const real high = std::numeric_limits<float>::max();
1063 /* Increase the allowed error by the difference between the actual precision and single */
1064 setUlpTolSingleAccuracy(ulpTol_);
1066 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1067 GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrtSingleAccuracy<MathOptimization::Unsafe>, settings);
1070 TEST_F(SimdMathTest, invSingleAccuracy)
1072 // Since the first lookup step is sometimes performed in single precision,
1073 // our SIMD 1/x can only handle single-precision input values, even when
1074 // compiled in double precision.
1076 // Relevant threshold points
1077 const real minSafeFloat = std::numeric_limits<float>::min()
1078 * 10; // X value guaranteed not to result in Inf intermediates for 1/x calc.
1079 const real maxSafeFloat = std::numeric_limits<float>::max()
1080 * 0.1; // X value guaranteed not to result in DTZ intermediates for 1/x calc.
1081 // Scale highest value by 1-eps, since we will do some arithmetics on this value
1082 const real maxFloat =
1083 std::numeric_limits<float>::max() * (1.0 - std::numeric_limits<float>::epsilon());
1084 CompareSettings settings;
1086 // Increase the allowed error by the difference between the actual precision and single
1087 setUlpTolSingleAccuracy(ulpTol_);
1089 // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
1090 settings = { Range(-maxFloat, -maxSafeFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
1091 GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1093 // Normal checks for x < 0
1094 settings = { Range(-maxSafeFloat, -minSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
1095 GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1097 // We do not care about the small range -minSafeFloat < x < +minSafeFloat where the result can be +/- Inf, since we don't require strict IEEE754.
1099 // Normal checks for x > 0
1100 settings = { Range(minSafeFloat, maxSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
1101 GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1103 // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
1104 settings = { Range(maxSafeFloat, maxFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
1105 GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1108 TEST_F(SimdMathTest, cbrtSingleAccuracy)
1110 const real low = -std::numeric_limits<real>::max();
1111 const real high = std::numeric_limits<real>::max();
1113 // Increase the allowed error by the difference between the actual precision and single
1114 setUlpTolSingleAccuracy(ulpTol_);
1116 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1117 GMX_EXPECT_SIMD_FUNC_NEAR(std::cbrt, cbrtSingleAccuracy, settings);
1120 TEST_F(SimdMathTest, invcbrtSingleAccuracy)
1122 // Increase the allowed error by the difference between the actual precision and single
1123 setUlpTolSingleAccuracy(ulpTol_);
1125 // Negative values first
1126 real low = -std::numeric_limits<real>::max();
1127 real high = -std::numeric_limits<real>::min();
1129 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1130 GMX_EXPECT_SIMD_FUNC_NEAR(refInvCbrt, invcbrtSingleAccuracy, settings);
1133 low = std::numeric_limits<real>::min();
1134 high = std::numeric_limits<real>::max();
1135 settings = { Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1136 GMX_EXPECT_SIMD_FUNC_NEAR(refInvCbrt, invcbrtSingleAccuracy, settings);
1139 TEST_F(SimdMathTest, log2SingleAccuracy)
1141 const real low = std::numeric_limits<real>::min();
1142 const real high = std::numeric_limits<real>::max();
1144 // Increase the allowed error by the difference between the actual precision and single
1145 setUlpTolSingleAccuracy(ulpTol_);
1147 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1148 GMX_EXPECT_SIMD_FUNC_NEAR(std::log2, log2SingleAccuracy, settings);
1151 TEST_F(SimdMathTest, logSingleAccuracy)
1153 const real low = std::numeric_limits<real>::min();
1154 const real high = std::numeric_limits<real>::max();
1156 // Increase the allowed error by the difference between the actual precision and single
1157 setUlpTolSingleAccuracy(ulpTol_);
1159 CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1160 GMX_EXPECT_SIMD_FUNC_NEAR(std::log, logSingleAccuracy, settings);
1163 TEST_F(SimdMathTest, exp2SingleAccuracy)
1165 // Relevant threshold points - float limits since we only target single accuracy
1166 constexpr real lowestReal = -std::numeric_limits<real>::max();
1167 constexpr real lowestRealThatProducesNormal =
1168 std::numeric_limits<real>::min_exponent
1169 - 1; // adding the significant corresponds to one more unit in exponent
1170 constexpr real lowestRealThatProducesDenormal =
1171 lowestRealThatProducesNormal
1172 - std::numeric_limits<real>::digits; // digits refer to bits in significand, so 24/53 for float/double
1173 constexpr real highestRealThatProducesNormal =
1174 std::numeric_limits<real>::max_exponent
1175 - 1; // adding the significant corresponds to one more unit in exponent
1176 CompareSettings settings;
1178 // Increase the allowed error by the difference between the actual precision and single
1179 setUlpTolSingleAccuracy(ulpTol_);
1181 // Below subnormal range all results should be zero
1182 settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal };
1183 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2SingleAccuracy, settings);
1185 // Subnormal range, require matching, but DTZ is fine
1186 settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal), ulpTol_,
1187 absTol_, MatchRule::Dtz };
1188 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2SingleAccuracy, settings);
1190 // Normal range, standard result expected
1191 settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal), ulpTol_,
1192 absTol_, MatchRule::Normal };
1193 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2SingleAccuracy, settings);
1196 TEST_F(SimdMathTest, exp2SingleAccuracyUnsafe)
1198 // The unsafe version is only defined in the normal range
1199 constexpr real lowestRealThatProducesNormal =
1200 std::numeric_limits<real>::min_exponent
1201 - 1; // adding the significant corresponds to one more unit in exponent
1202 constexpr real highestRealThatProducesNormal =
1203 std::numeric_limits<real>::max_exponent
1204 - 1; // adding the significant corresponds to one more unit in exponent
1206 /* Increase the allowed error by the difference between the actual precision and single */
1207 setUlpTolSingleAccuracy(ulpTol_);
1209 CompareSettings settings{ Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
1210 ulpTol_, absTol_, MatchRule::Normal };
1211 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2SingleAccuracy<MathOptimization::Unsafe>, settings);
1214 TEST_F(SimdMathTest, expSingleAccuracy)
1216 // See threshold point comments in normal exp() test
1217 const real lowestReal = -std::numeric_limits<real>::max();
1218 // In theory the smallest value should be (min_exponent-1)*log(2), but rounding after the multiplication will cause this
1219 // value to be a single ulp too low. This might cause failed tests on CPUs that use different DTZ modes for SIMD vs.
1220 // non-SIMD arithmetics (ARM v7), so multiply by (1.0-eps) to increase it by a single ulp.
1221 const real lowestRealThatProducesNormal = (std::numeric_limits<real>::min_exponent - 1)
1223 * (1.0 - std::numeric_limits<real>::epsilon());
1224 const real lowestRealThatProducesDenormal =
1225 lowestRealThatProducesNormal - std::numeric_limits<real>::digits * std::log(2.0);
1226 const real highestRealThatProducesNormal =
1227 (std::numeric_limits<real>::max_exponent - 1) * std::log(2.0);
1228 CompareSettings settings;
1230 // Increase the allowed error by the difference between the actual precision and single
1231 setUlpTolSingleAccuracy(ulpTol_);
1233 // Below subnormal range all results should be zero (so, match the reference)
1234 settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal };
1235 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, expSingleAccuracy, settings);
1237 // Subnormal range, require matching, but DTZ is fine
1238 settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal), ulpTol_,
1239 absTol_, MatchRule::Dtz };
1240 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, expSingleAccuracy, settings);
1242 // Normal range, standard result expected
1243 settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal), ulpTol_,
1244 absTol_, MatchRule::Normal };
1245 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, expSingleAccuracy, settings);
1248 TEST_F(SimdMathTest, expSingleAccuracyUnsafe)
1250 // See test of exp() for comments about test ranges
1251 const real lowestRealThatProducesNormal = (std::numeric_limits<real>::min_exponent - 1)
1253 * (1 - std::numeric_limits<real>::epsilon());
1254 const real highestRealThatProducesNormal =
1255 (std::numeric_limits<real>::max_exponent - 1) * std::log(2.0);
1257 // Increase the allowed error by the difference between the actual precision and single
1258 setUlpTolSingleAccuracy(ulpTol_);
1260 CompareSettings settings{ Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
1261 ulpTol_, absTol_, MatchRule::Normal };
1262 GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, expSingleAccuracy<MathOptimization::Unsafe>, settings);
1265 TEST_F(SimdMathTest, powSingleAccuracy)
1267 // We already test the log2/exp2 components of pow() extensively above, and it's a very
1268 // simple single-line function, so here we just test a handful of values to catch typos
1269 // and then some special values.
1271 // Increase the allowed error by the difference between the actual precision and single
1272 setUlpTolSingleAccuracy(ulpTol_);
1274 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, c3), std::pow(c1, c4), std::pow(c2, c5)),
1275 powSingleAccuracy(rSimd_c0c1c2, rSimd_c3c4c5));
1277 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, -c3), std::pow(c1, -c0), std::pow(c2, -c4)),
1278 powSingleAccuracy(rSimd_c0c1c2, rSimd_m3m0m4));
1280 // 0^0 = 1 , 0^c1=0, -c1^0=1
1281 GMX_EXPECT_SIMD_REAL_NEAR(
1282 setSimdRealFrom3R(1.0, 0.0, 1.0),
1283 powSingleAccuracy(setSimdRealFrom3R(0, 0.0, -c1), setSimdRealFrom3R(0.0, c1, 0.0)));
1286 TEST_F(SimdMathTest, powSingleAccuracyUnsafe)
1288 // We already test the log2/exp2 components of pow() extensively above, and it's a very
1289 // simple single-line function, so here we just test a handful of values to catch typos
1290 // and then some special values.
1292 // Increase the allowed error by the difference between the actual precision and single
1293 setUlpTolSingleAccuracy(ulpTol_);
1295 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, c3), std::pow(c1, c4), std::pow(c2, c5)),
1296 powSingleAccuracy<MathOptimization::Unsafe>(rSimd_c0c1c2, rSimd_c3c4c5));
1298 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, -c3), std::pow(c1, -c0), std::pow(c2, -c4)),
1299 powSingleAccuracy<MathOptimization::Unsafe>(rSimd_c0c1c2, rSimd_m3m0m4));
1302 TEST_F(SimdMathTest, erfSingleAccuracy)
1304 // Increase the allowed error by the difference between the actual precision and single
1305 setUlpTolSingleAccuracy(ulpTol_);
1307 CompareSettings settings{ Range(-9, 9), ulpTol_, GMX_REAL_MIN, MatchRule::Normal };
1308 GMX_EXPECT_SIMD_FUNC_NEAR(refErf, erfSingleAccuracy, settings);
1311 TEST_F(SimdMathTest, erfcSingleAccuracy)
1313 // Increase the allowed error by the difference between the actual precision and single
1314 setUlpTolSingleAccuracy(ulpTol_);
1316 // Our erfc algorithm has 4 ulp accuracy, so relax tolerance a bit
1317 CompareSettings settings{ Range(-9, 9), 4 * ulpTol_, GMX_REAL_MIN, MatchRule::Normal };
1318 GMX_EXPECT_SIMD_FUNC_NEAR(refErfc, erfcSingleAccuracy, settings);
1322 TEST_F(SimdMathTest, sinSingleAccuracy)
1324 /* Increase the allowed error by the difference between the actual precision and single */
1325 setUlpTolSingleAccuracy(ulpTol_);
1327 CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
1328 GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sinSingleAccuracy, settings);
1330 // Range reduction leads to accuracy loss, so we might want higher tolerance here
1331 settings = { Range(-10000, 10000), 2 * ulpTol_, absTol_, MatchRule::Normal };
1332 GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sinSingleAccuracy, settings);
1335 TEST_F(SimdMathTest, cosSingleAccuracy)
1337 /* Increase the allowed error by the difference between the actual precision and single */
1338 setUlpTolSingleAccuracy(ulpTol_);
1340 CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
1341 GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cosSingleAccuracy, settings);
1343 // Range reduction leads to accuracy loss, so we might want higher tolerance here
1344 settings = { Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal };
1345 GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cosSingleAccuracy, settings);
1348 TEST_F(SimdMathTest, tanSingleAccuracy)
1350 /* Increase the allowed error by the difference between the actual precision and single */
1351 setUlpTolSingleAccuracy(ulpTol_);
1353 // Tan(x) is a little sensitive due to the division in the algorithm.
1354 // Rather than using lots of extra FP operations, we accept the algorithm
1355 // presently only achieves a ~3 ulp error and use the medium tolerance.
1356 CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
1357 GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tanSingleAccuracy, settings);
1359 // Range reduction leads to accuracy loss, so we might want higher tolerance here
1360 settings = { Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal };
1361 GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tanSingleAccuracy, settings);
1364 TEST_F(SimdMathTest, asinSingleAccuracy)
1366 /* Increase the allowed error by the difference between the actual precision and single */
1367 setUlpTolSingleAccuracy(ulpTol_);
1369 // Our present asin(x) algorithm achieves 2-3 ulp accuracy
1370 CompareSettings settings{ Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal };
1371 GMX_EXPECT_SIMD_FUNC_NEAR(std::asin, asinSingleAccuracy, settings);
1374 TEST_F(SimdMathTest, acosSingleAccuracy)
1376 /* Increase the allowed error by the difference between the actual precision and single */
1377 setUlpTolSingleAccuracy(ulpTol_);
1379 // Our present acos(x) algorithm achieves 2-3 ulp accuracy
1380 CompareSettings settings{ Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal };
1381 GMX_EXPECT_SIMD_FUNC_NEAR(std::acos, acosSingleAccuracy, settings);
1384 TEST_F(SimdMathTest, atanSingleAccuracy)
1386 /* Increase the allowed error by the difference between the actual precision and single */
1387 setUlpTolSingleAccuracy(ulpTol_);
1389 // Our present atan(x) algorithm achieves 1 ulp accuracy
1390 CompareSettings settings{ Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal };
1391 GMX_EXPECT_SIMD_FUNC_NEAR(std::atan, atanSingleAccuracy, settings);
1394 TEST_F(SimdMathTest, atan2SingleAccuracy)
1396 /* Increase the allowed error by the difference between the actual precision and single */
1397 setUlpTolSingleAccuracy(ulpTol_);
1399 // test each quadrant
1400 GMX_EXPECT_SIMD_REAL_NEAR(
1401 setSimdRealFrom3R(std::atan2(c0, c3), std::atan2(c1, c4), std::atan2(c2, c5)),
1402 atan2SingleAccuracy(rSimd_c0c1c2, rSimd_c3c4c5));
1403 GMX_EXPECT_SIMD_REAL_NEAR(
1404 setSimdRealFrom3R(std::atan2(-c0, c3), std::atan2(-c1, c4), std::atan2(-c2, c5)),
1405 atan2SingleAccuracy(rSimd_m0m1m2, rSimd_c3c4c5));
1406 GMX_EXPECT_SIMD_REAL_NEAR(
1407 setSimdRealFrom3R(std::atan2(-c0, -c3), std::atan2(-c1, -c0), std::atan2(-c2, -c4)),
1408 atan2SingleAccuracy(rSimd_m0m1m2, rSimd_m3m0m4));
1409 GMX_EXPECT_SIMD_REAL_NEAR(
1410 setSimdRealFrom3R(std::atan2(c0, -c3), std::atan2(c1, -c0), std::atan2(c2, -c4)),
1411 atan2SingleAccuracy(rSimd_c0c1c2, rSimd_m3m0m4));
1412 // cases important for calculating angles
1413 // values on coordinate axes
1414 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(0, c0), std::atan2(0, c1), std::atan2(0, c2)),
1415 atan2SingleAccuracy(setZero(), rSimd_c0c1c2));
1416 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(c0, 0), std::atan2(c1, 0), std::atan2(c2, 0)),
1417 atan2SingleAccuracy(rSimd_c0c1c2, setZero()));
1418 GMX_EXPECT_SIMD_REAL_NEAR(
1419 setSimdRealFrom3R(std::atan2(0, -c0), std::atan2(0, -c1), std::atan2(0, -c2)),
1420 atan2SingleAccuracy(setZero(), rSimd_m0m1m2));
1421 GMX_EXPECT_SIMD_REAL_NEAR(
1422 setSimdRealFrom3R(std::atan2(-c0, 0), std::atan2(-c1, 0), std::atan2(-c2, 0)),
1423 atan2SingleAccuracy(rSimd_m0m1m2, setZero()));
1425 // degenerate value (origin) should return 0.0. At least IBM xlc 13.1.5 gets the reference
1426 // value wrong (-nan) at -O3 optimization, so we compare to the correct value (0.0) instead.
1427 GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom1R(0.0),
1428 atan2SingleAccuracy(setSimdRealFrom3R(0.0, 0.0, 0.0), setZero()));
1431 TEST_F(SimdMathTest, pmeForceCorrectionSingleAccuracy)
1433 // The PME corrections will be added to ~1/r2, so absolute tolerance of EPS is fine.
1434 // Pme correction only needs to be ~1e-6 accuracy single.
1435 // Then increase the allowed error by the difference between the actual precision and single.
1436 setUlpTolSingleAccuracy(std::int64_t(5e-6 / GMX_FLOAT_EPS));
1438 CompareSettings settings{ Range(0.15, 4), ulpTol_, GMX_FLOAT_EPS, MatchRule::Normal };
1439 GMX_EXPECT_SIMD_FUNC_NEAR(refPmeForceCorrection, pmeForceCorrectionSingleAccuracy, settings);
1442 TEST_F(SimdMathTest, pmePotentialCorrectionSingleAccuracy)
1444 // The PME corrections will be added to ~1/r, so absolute tolerance of EPS is fine.
1445 // Pme correction only needs to be ~1e-6 accuracy single.
1446 // Then increase the allowed error by the difference between the actual precision and single.
1447 setUlpTolSingleAccuracy(std::int64_t(5e-6 / GMX_FLOAT_EPS));
1449 CompareSettings settings{ Range(0.15, 4), ulpTol_, GMX_FLOAT_EPS, MatchRule::Normal };
1450 GMX_EXPECT_SIMD_FUNC_NEAR(refPmePotentialCorrection, pmePotentialCorrectionSingleAccuracy, settings);
1455 # endif // GMX_SIMD_HAVE_REAL