src/gromacs/simd/tests/simd_math.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2017,2018,2019,2020,2021, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include "gmxpre.h"
  36
  37 #include "gromacs/simd/simd_math.h"
  38
  39 #include "config.h"
  40
  41 #include <cmath>
  42 #include <cstdint>
  43
  44 #include <map>
  45 #include <utility>
  46 #include <vector>
  47
  48 #include "gromacs/math/units.h"
  49 #include "gromacs/math/utilities.h"
  50 #include "gromacs/options/basicoptions.h"
  51 #include "gromacs/simd/simd.h"
  52
  53 #include "testutils/refdata.h"
  54 #include "testutils/testasserts.h"
  55
  56 #include "simd.h"
  57
  58 #if GMX_SIMD
  59
  60 namespace gmx
  61 {
  62 namespace test
  63 {
  64
  65 /*! \cond internal */
  66 /*! \addtogroup module_simd */
  67 /*! \{ */
  68
  69 #    if GMX_SIMD_HAVE_REAL
  70
  71 class SimdMathTest : public SimdTest
  72 {
  73 public:
  74     /*! \brief Type for half-open intervals specifying test ranges */
  75     typedef std::pair<real, real> Range;
  76
  77     /*! \brief Control what is considered matching values
  78      *
  79      * Normal simply means that we request the values to be equal
  80      * to within the specified tolerance.
  81      * However, there are also two more cases that are special:
  82      *
  83      * - Even if we only care about normal (i.e., not denormal) values, some math
  84      *   libraries might clamp the value to zero, which means our SIMD output
  85      *   might not match their values. By using MatchRule::Dtz, we will consider
  86      *   all values both from the reference and test functions that are within the
  87      *   requested ulp tolerance of a denormal number to be equivalent to 0.0.
  88      * - For some older architectures without fused multiply-add units (e.g. x86 SSE2),
  89      *   we might end up clamping the results to zero just before reaching
  90      *   denormal output, since the intermediate results e.g. in polynomial
  91      *   approximations can be smaller than the final one. We often simply don't
  92      *   care about those values, and then one can use
  93      *   MatchRule::ReferenceOrZero to allow the test value to either match
  94      *   the reference or be zero.
  95      */
  96     enum class MatchRule
  97     {
  98         Normal, //!< Match function values
  99         Dtz, //!< Match function values after setting denormals to zero both in test and reference
 100         ReferenceOrZero, //!< Test values can either match reference or be zero
 101     };
 102
 103     const std::map<MatchRule, std::string> matchRuleNames_ = {
 104         { MatchRule::Normal, "Test should match reference." },
 105         { MatchRule::Dtz, "Test should match reference, with denormals treated as 0.0." },
 106         { MatchRule::ReferenceOrZero, "Test should match reference or 0.0." }
 107     };
 108
 109     /*! \brief Settings used for simd math function comparisons */
 110     struct CompareSettings
 111     {
 112         Range        range;     //!< Range over which to test function
 113         std::int64_t ulpTol;    //!< Ulp tolerance
 114         real         absTol;    //!< Absolute tolerance
 115         MatchRule    matchRule; //!< Decide what we consider a match
 116     };
 117
 118     ::testing::AssertionResult compareSimdMathFunction(const char*            refFuncExpr,
 119                                                        const char*            simdFuncExpr,
 120                                                        const char*            compareSettingsExpr,
 121                                                        real                   refFunc(real x),
 122                                                        SimdReal gmx_simdcall  simdFunc(SimdReal x),
 123                                                        const CompareSettings& compareSettings);
 124
 125     /*! \brief Generate test point vector
 126      *
 127      *  \param range  The test interval, half open. Upper limit is not included.
 128      *                Pass by value, since we need to modify in method anyway.
 129      *  \param points Number of points to generate. This might be increased
 130      *                slightly to account both for extra special values like 0.0
 131      *                and the SIMD width.
 132      *
 133      * This routine generates a vector with test points separated by constant
 134      * multiplicative factors, based on the range and number of points in the
 135      * class. If the range includes both negative and positive values, points
 136      * will be generated separately for the negative/positive intervals down
 137      * to the smallest real number that can be represented, and we also include
 138      * 0.0 explicitly.
 139      *
 140      * This is highly useful for large test ranges. For example, with a linear
 141      * 1000-point division of the range (1,1e10) the first three values to test
 142      * would be 1, 10000000.999, and 20000000.998, etc. For large values we would
 143      * commonly hit the point where adding the small delta has no effect due to
 144      * limited numerical precision.
 145      * When we instead use this routine, the values will be 1, 1.0239, 1.0471, etc.
 146      * This will spread the entropy over all bits in the IEEE754 representation,
 147      * and be a much better test of all potential input values.
 148      *
 149      *  \note We do not use the static variable s_nPoints in the parent class
 150      *        to avoid altering any value the user has set on the command line; since
 151      *        it's a static member, changing it would have permanent effect.
 152      */
 153     static std::vector<real> generateTestPoints(Range range, std::size_t points);
 154
 155     /*! \brief Test routine for the test point vector generation
 156      */
 157     static void generateTestPointsTest();
 158 };
 159
 160 /*! \brief Test approximate equality of SIMD vs reference version of a function.
 161  *
 162  * This macro takes vanilla C and SIMD flavors of a function and tests it with
 163  * the number of points, range, and tolerances specified by the test fixture class.
 164  *
 165  * The third option controls the range, tolerances, and match settings.
 166  */
 167 #        define GMX_EXPECT_SIMD_FUNC_NEAR(refFunc, tstFunc, compareSettings) \
 168             EXPECT_PRED_FORMAT3(compareSimdMathFunction, refFunc, tstFunc, compareSettings)
 169
 170 std::vector<real> SimdMathTest::generateTestPoints(Range inputRange, std::size_t inputPoints)
 171 {
 172
 173     std::vector<real> testPoints;
 174     testPoints.reserve(inputPoints);
 175
 176     GMX_RELEASE_ASSERT(inputRange.first < inputRange.second,
 177                        "The start of the interval must come before the end");
 178
 179     std::vector<Range> testRanges;
 180
 181     if (inputRange.first < 0 && inputRange.second > 0)
 182     {
 183         testRanges.emplace_back(Range({ inputRange.first, -std::numeric_limits<real>::min() }));
 184         testRanges.emplace_back(Range({ 0.0, inputRange.second }));
 185     }
 186     else
 187     {
 188         if (inputRange.second == 0)
 189         {
 190             inputRange.second = -std::numeric_limits<real>::min();
 191             inputRange.first  = std::min(inputRange.first, inputRange.second);
 192         }
 193         testRanges.push_back(inputRange);
 194     }
 195
 196     for (Range& range : testRanges)
 197     {
 198         std::size_t points = inputPoints / testRanges.size();
 199
 200         // The value 0 is special, and can only occur at the start of
 201         // the interval after the corrections outside this loop.
 202         // Add it explicitly, and adjust the interval to continue
 203         // at the first valid non-zero positive number.
 204         if (range.first == 0)
 205         {
 206             testPoints.push_back(0.0);
 207             range.first = std::numeric_limits<real>::min();
 208             points--; // Used one point
 209         }
 210
 211         union
 212         {
 213             real                                                                               r;
 214             std::conditional<sizeof(real) == sizeof(double), std::int64_t, std::int32_t>::type i;
 215         } low, high, x;
 216
 217         low.r  = range.first;
 218         high.r = range.second;
 219
 220         // IEEE754 floating-point numbers have the cool property that for any range of
 221         // constant sign, for all non-zero numbers a constant (i.e., linear) difference
 222         // in the bitwise representation corresponds to a constant multiplicative factor.
 223         //
 224         // Divide the ulp difference evenly
 225         std::int64_t ulpDiff = high.i - low.i;
 226         // dividend and divisor must both be signed types
 227         std::int64_t ulpDelta    = ulpDiff / static_cast<std::int64_t>(points);
 228         std::int64_t minUlpDelta = (ulpDiff > 0) ? 1 : -1;
 229
 230         if (ulpDelta == 0)
 231         {
 232             // Very short interval or very many points caused round-to-zero.
 233             // Select the smallest possible change, which is one ulp (with correct sign)
 234             ulpDelta = minUlpDelta;
 235             points   = std::abs(ulpDiff);
 236         }
 237
 238         x.r = low.r;
 239         // Use an index-based loop to avoid floating-point comparisons with
 240         // values that might have overflowed. Save one point for the very last
 241         // bitwise value that is part of the interval
 242         for (std::size_t i = 0; i < points - 1; i++)
 243         {
 244             testPoints.push_back(x.r);
 245             x.i += ulpDelta;
 246         }
 247
 248         // Make sure we test the very last point that is inside the interval
 249         x.r = high.r;
 250         x.i -= minUlpDelta;
 251         testPoints.push_back(x.r);
 252     }
 253     return testPoints;
 254 }
 255
 256 /*! \brief Implementation routine to compare SIMD vs reference functions.
 257  *
 258  * \param refFuncExpr         Description of reference function expression
 259  * \param simdFuncExpr        Description of SIMD function expression
 260  * \param compareSettingsExpr Description of compareSettings
 261  * \param refFunc             Reference math function pointer
 262  * \param simdFunc            SIMD math function pointer
 263  * \param compareSettings     Structure with the range, tolerances, and
 264  *                            matching rules to use for the comparison.
 265  *
 266  * \note You should not never call this function directly,  but use the
 267  *       macro GMX_EXPECT_SIMD_FUNC_NEAR(refFunc,tstFunc,matchRule) instead.
 268  */
 269 ::testing::AssertionResult SimdMathTest::compareSimdMathFunction(const char* refFuncExpr,
 270                                                                  const char* simdFuncExpr,
 271                                                                  const char gmx_unused* compareSettingsExpr,
 272                                                                  real refFunc(real x),
 273                                                                  SimdReal gmx_simdcall simdFunc(SimdReal x),
 274                                                                  const CompareSettings& compareSettings)
 275 {
 276     std::vector<real> vx(GMX_SIMD_REAL_WIDTH);
 277     std::vector<real> vref(GMX_SIMD_REAL_WIDTH);
 278     std::vector<real> vtst(GMX_SIMD_REAL_WIDTH);
 279     real              absDiff;
 280     std::int64_t      ulpDiff;
 281     std::int64_t      maxUlpDiff = 0;
 282     real              maxUlpDiffPos;
 283     real              refValMaxUlpDiff, simdValMaxUlpDiff;
 284     const int         niter = s_nPoints / GMX_SIMD_REAL_WIDTH;
 285
 286     union
 287     {
 288         real                                                                               r;
 289         std::conditional<sizeof(real) == sizeof(double), std::int64_t, std::int32_t>::type i;
 290     } conv0, conv1;
 291
 292     // Allow zero-size intervals - nothing to test means we succeeded at it
 293     if (compareSettings.range.first == compareSettings.range.second)
 294     {
 295         ::testing::AssertionSuccess();
 296     }
 297
 298     // Calculate the tolerance limit to use for denormals - we want
 299     // values that are within the ulp tolerance of denormals to be considered matching
 300     conv0.r = std::numeric_limits<real>::min();
 301     conv0.i += compareSettings.ulpTol - 1; // min() itself is not denormal, but one ulp larger
 302     const real denormalLimit = conv0.r;
 303
 304     // We want to test as many diverse bit combinations as possible over the range requested,
 305     // and in particular do it evenly spaced in bit-space.
 306     // Due to the way IEEE754 floating-point is represented, that means we should have a
 307     // constant multiplicative factor between adjacent values. This gets a bit complicated
 308     // when we have both positive and negative values, so we offload the generation of the
 309     // specific testing values to a separate routine
 310     std::vector<real> testPoints = generateTestPoints(compareSettings.range, s_nPoints);
 311
 312     size_t pointIndex = 0;
 313
 314     for (int iter = 0; iter < niter; iter++)
 315     {
 316         for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
 317         {
 318             vx[i]   = testPoints[pointIndex];
 319             vref[i] = refFunc(vx[i]);
 320             // If we reach the end of the points, stop increasing index so we pad with
 321             // extra copies of the last element up to the SIMD width
 322             if (pointIndex + 1 < testPoints.size())
 323             {
 324                 pointIndex++;
 325             }
 326         }
 327         vtst = simdReal2Vector(simdFunc(vector2SimdReal(vx)));
 328
 329         bool absOk = true, signOk = true;
 330         for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
 331         {
 332             if (compareSettings.matchRule == MatchRule::Dtz && std::abs(vref[i]) <= denormalLimit
 333                 && std::abs(vtst[i]) <= denormalLimit)
 334             {
 335                 continue;
 336             }
 337
 338             if (compareSettings.matchRule == MatchRule::ReferenceOrZero && vtst[i] == 0.0)
 339             {
 340                 // If we accept 0.0 for the test function, we can continue to the next loop iteration.
 341                 continue;
 342             }
 343
 344             absDiff = std::abs(vref[i] - vtst[i]);
 345             absOk   = absOk && (absDiff < compareSettings.absTol);
 346             signOk  = signOk && ((vref[i] >= 0 && vtst[i] >= 0) || (vref[i] <= 0 && vtst[i] <= 0));
 347
 348             if (absDiff >= compareSettings.absTol)
 349             {
 350                 /* We replicate the trivial ulp differences comparison here rather than
 351                  * calling the lower-level routine for comparing them, since this enables
 352                  * us to run through the entire test range and report the largest deviation
 353                  * without lots of extra glue routines.
 354                  */
 355                 conv0.r = vref[i];
 356                 conv1.r = vtst[i];
 357                 ulpDiff = llabs(conv0.i - conv1.i);
 358                 if (ulpDiff > maxUlpDiff)
 359                 {
 360                     maxUlpDiff        = ulpDiff;
 361                     maxUlpDiffPos     = vx[i];
 362                     refValMaxUlpDiff  = vref[i];
 363                     simdValMaxUlpDiff = vtst[i];
 364                 }
 365             }
 366         }
 367         if ((!absOk) && (!signOk))
 368         {
 369             return ::testing::AssertionFailure()
 370                    << "Failing SIMD math function comparison due to sign differences." << std::endl
 371                    << "Reference function: " << refFuncExpr << std::endl
 372                    << "Simd function:      " << simdFuncExpr << std::endl
 373                    << "Test range is ( " << compareSettings.range.first << " , "
 374                    << compareSettings.range.second << " ) " << std::endl
 375                    << "Match rule: " << matchRuleNames_.at(compareSettings.matchRule) << std::endl
 376                    << "First sign difference around x=" << std::setprecision(20)
 377                    << ::testing::PrintToString(vx) << std::endl
 378                    << "Ref values:  " << std::setprecision(20) << ::testing::PrintToString(vref)
 379                    << std::endl
 380                    << "SIMD values: " << std::setprecision(20) << ::testing::PrintToString(vtst)
 381                    << std::endl;
 382         }
 383     }
 384
 385     GMX_RELEASE_ASSERT(compareSettings.ulpTol >= 0, "Invalid ulp value.");
 386     if (maxUlpDiff <= compareSettings.ulpTol)
 387     {
 388         return ::testing::AssertionSuccess();
 389     }
 390     else
 391     {
 392         return ::testing::AssertionFailure()
 393                << "Failing SIMD math function ulp comparison between " << refFuncExpr << " and "
 394                << simdFuncExpr << std::endl
 395                << "Requested ulp tolerance: " << compareSettings.ulpTol << std::endl
 396                << "Requested abs tolerance: " << compareSettings.absTol << std::endl
 397                << "Match rule: " << matchRuleNames_.at(compareSettings.matchRule) << std::endl
 398                << "Largest Ulp difference occurs for x=" << std::setprecision(20) << maxUlpDiffPos
 399                << std::endl
 400                << "Ref  values: " << std::setprecision(20) << refValMaxUlpDiff << std::endl
 401                << "SIMD values: " << std::setprecision(20) << simdValMaxUlpDiff << std::endl
 402                << "Ulp diff.:   " << std::setprecision(20) << maxUlpDiff << std::endl;
 403     }
 404 }
 405
 406 // Actual routine to generate a small set of test points in current precision. This will
 407 // be called by either the double or single precision test fixture, since we need different
 408 // test names to compare to the right reference data.
 409 void SimdMathTest::generateTestPointsTest()
 410 {
 411     int                             points(10);
 412     gmx::test::TestReferenceData    data;
 413     gmx::test::TestReferenceChecker checker(data.rootChecker());
 414
 415     std::vector<real> result;
 416
 417     result = generateTestPoints(Range(-1e10, -1), points);
 418     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10,-1[");
 419
 420     result = generateTestPoints(Range(-1e10, -1e-10), points);
 421     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10, -1e-10[");
 422
 423     result = generateTestPoints(Range(1, 1e10), points);
 424     checker.checkSequence(result.begin(), result.end(), "Test points for interval [1, 1e10[");
 425
 426     result = generateTestPoints(Range(1e-10, 1e10), points);
 427     checker.checkSequence(result.begin(), result.end(), "Test points for interval [1e-10, 1e10[");
 428
 429     result = generateTestPoints(Range(-1e10, 1e-10), points);
 430     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10, 1e-10[");
 431
 432     result = generateTestPoints(Range(-1e-10, 1e-10), points);
 433     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e-10, 1e-10[");
 434
 435     result = generateTestPoints(Range(-1e-10, 1e10), points);
 436     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e-10, 1e10[");
 437
 438     result = generateTestPoints(Range(-1e10, 1e10), points);
 439     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10, 1e10[");
 440
 441     result = generateTestPoints(Range(-1000, 0), points);
 442     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1000, 0[");
 443
 444     result = generateTestPoints(Range(0, 1000), points);
 445     checker.checkSequence(result.begin(), result.end(), "Test points for interval [0, 1000[");
 446 }
 447
 448 /*! \} */
 449 /*! \endcond */
 450
 451
 452 // Actual math function tests below
 453
 454 /*! \cond internal */
 455 /*! \addtogroup module_simd */
 456 /*! \{ */
 457
 458 namespace
 459 {
 460
 461 // Reference data is selected based on test name, so make the test name precision-dependent
 462 #        if GMX_DOUBLE
 463 TEST_F(SimdMathTest, generateTestPointsDouble)
 464 {
 465     generateTestPointsTest();
 466 }
 467 #        else
 468 TEST_F(SimdMathTest, generateTestPointsFloat)
 469 {
 470     generateTestPointsTest();
 471 }
 472 #        endif
 473
 474 TEST_F(SimdMathTest, copysign)
 475 {
 476     GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(-c0, c1, c2),
 477                             copysign(setSimdRealFrom3R(c0, c1, c2), setSimdRealFrom3R(-c3, c4, 0)));
 478     GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(-c0, c1, c2),
 479                             copysign(setSimdRealFrom3R(-c0, -c1, -c2), setSimdRealFrom3R(-c3, c4, 0)));
 480     GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(c0, -c1, c2),
 481                             copysign(setSimdRealFrom3R(c0, c1, c2), setSimdRealFrom3R(c3, -c4, 0)));
 482     GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(c0, -c1, c2),
 483                             copysign(setSimdRealFrom3R(-c0, -c1, -c2), setSimdRealFrom3R(c3, -c4, 0)));
 484 }
 485
 486 /*! \brief Function wrapper to evaluate reference 1/sqrt(x) */
 487 real refInvsqrt(real x)
 488 {
 489     return 1.0 / std::sqrt(x);
 490 }
 491
 492 TEST_F(SimdMathTest, invsqrt)
 493 {
 494     const real      low  = std::numeric_limits<float>::min();
 495     const real      high = std::numeric_limits<float>::max();
 496     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
 497
 498     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, invsqrt, settings);
 499 }
 500
 501 TEST_F(SimdMathTest, maskzInvsqrt)
 502 {
 503     SimdReal x   = setSimdRealFrom3R(c1, 0.0, c2);
 504     SimdBool m   = (setZero() < x);
 505     SimdReal ref = setSimdRealFrom3R(1.0 / std::sqrt(c1), 0.0, 1.0 / std::sqrt(c2));
 506     GMX_EXPECT_SIMD_REAL_NEAR(ref, maskzInvsqrt(x, m));
 507 }
 508
 509 /*! \brief Function wrapper to return first result when testing \ref invsqrtPair */
 510 SimdReal gmx_simdcall tstInvsqrtPair0(SimdReal x)
 511 {
 512     SimdReal r0, r1;
 513     invsqrtPair(x, x, &r0, &r1);
 514     return r0;
 515 }
 516
 517 /*! \brief Function wrapper to return second result when testing \ref invsqrtPair */
 518 SimdReal gmx_simdcall tstInvsqrtPair1(SimdReal x)
 519 {
 520     SimdReal r0, r1;
 521     invsqrtPair(x, x, &r0, &r1);
 522     return r1;
 523 }
 524
 525 TEST_F(SimdMathTest, invsqrtPair)
 526 {
 527     const real low  = std::numeric_limits<float>::min();
 528     const real high = std::numeric_limits<float>::max();
 529
 530     // Accuracy conversions lose a bit of accuracy compared to all-double,
 531     // so increase the tolerance to 4*ulpTol_
 532     CompareSettings settings{ Range(low, high), 4 * ulpTol_, absTol_, MatchRule::Normal };
 533
 534     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tstInvsqrtPair0, settings);
 535     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tstInvsqrtPair1, settings);
 536 }
 537
 538 /*! \brief Function wrapper to evaluate reference sqrt(x) */
 539 real refSqrt(real x)
 540 {
 541     return std::sqrt(x);
 542 }
 543
 544 TEST_F(SimdMathTest, sqrt)
 545 {
 546     // Since the first lookup step is sometimes performed in single precision,
 547     // our SIMD sqrt can only handle single-precision input values, even when
 548     // compiled in double precision.
 549
 550     const real      minFloat     = std::numeric_limits<float>::min();
 551     const real      minSafeFloat = minFloat * 10;
 552     const real      maxSafeFloat = std::numeric_limits<float>::max() * 0.1;
 553     CompareSettings settings;
 554     // The accuracy conversions lose a bit of extra accuracy compared to
 555     // doing the iterations in all-double.
 556     setUlpTol(4 * ulpTol_);
 557
 558     // First test that 0.0 and a few other values work
 559     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(0, std::sqrt(c1), std::sqrt(c2)),
 560                               sqrt(setSimdRealFrom3R(0, c1, c2)));
 561
 562 #        if GMX_DOUBLE
 563     // As mentioned above, we cannot guarantee that very small double precision
 564     // input values (below std::numeric_limits<float>::min()) are handled correctly,
 565     // so our implementation will clamp it to zero. In this range we allow either
 566     // the correct value or zero, but it's important that it does not result in NaN or Inf values.
 567     //
 568     // This test range must not be called for single precision, since if we try to divide
 569     // the interval (0.0, low( in npoints we will try to multiply by factors so small that
 570     // they end up being flushed to zero, and the loop would never end.
 571     settings = { Range(0.0, minFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
 572     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt, settings);
 573 #        endif
 574
 575     // Next range: Just about minFloat the lookup should always work, but the results
 576     // might be a bit fragile due to issues with the N-R iterations being flushed to zero
 577     // for denormals. We can probably relax the latter in double precision, but since we
 578     // anyway cannot handle numbers that cannot be represented in single it's not worth
 579     // worrying too much about whether we have zero or an exact values around 10^-38....
 580     settings = { Range(minFloat, minSafeFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
 581     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt, settings);
 582
 583     settings = { Range(minSafeFloat, maxSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
 584     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt, settings);
 585 }
 586
 587 TEST_F(SimdMathTest, sqrtUnsafe)
 588 {
 589     const real minSafeFloat = std::numeric_limits<float>::min() * 10;
 590     const real maxSafeFloat = std::numeric_limits<float>::max() * 0.1;
 591
 592     // The accuracy conversions lose a bit of extra accuracy compared to
 593     // doing the iterations in all-double, so we use 4*ulpTol_
 594     setUlpTol(4 * ulpTol_);
 595
 596     CompareSettings settings{ Range(minSafeFloat, maxSafeFloat), 4 * ulpTol_, absTol_, MatchRule::Normal };
 597     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt<MathOptimization::Unsafe>, settings);
 598 }
 599
 600 /*! \brief Function wrapper to evaluate reference 1/x */
 601 real refInv(real x)
 602 {
 603     return 1.0 / x;
 604 }
 605
 606 TEST_F(SimdMathTest, inv)
 607 {
 608     // Since the first lookup step is sometimes performed in single precision,
 609     // our SIMD 1/x can only handle single-precision input values, even when
 610     // compiled in double precision.
 611
 612     // Relevant threshold points
 613     const real minSafeFloat = std::numeric_limits<float>::min()
 614                               * 10; // X value guaranteed not to result in Inf intermediates for 1/x calc.
 615     const real maxSafeFloat = std::numeric_limits<float>::max()
 616                               * 0.1; // X value guaranteed not to result in DTZ intermediates for 1/x calc.
 617     // Scale highest value by 1-eps, since we will do some arithmetics on this value
 618     const real maxFloat =
 619             std::numeric_limits<float>::max() * (1.0 - std::numeric_limits<float>::epsilon());
 620     CompareSettings settings;
 621
 622     // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
 623     settings = { Range(-maxFloat, -maxSafeFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
 624     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
 625
 626     // Normal checks for x < 0
 627     settings = { Range(-maxSafeFloat, -minSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
 628     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
 629
 630     // We do not care about the small range -minSafeFloat < x < +minSafeFloat where the result can be +/- Inf, since we don't require strict IEEE754.
 631
 632     // Normal checks for x > 0
 633     settings = { Range(minSafeFloat, maxSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
 634     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
 635
 636     // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
 637     settings = { Range(maxSafeFloat, maxFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
 638     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
 639 }
 640
 641 TEST_F(SimdMathTest, maskzInv)
 642 {
 643     SimdReal x   = setSimdRealFrom3R(c1, 0.0, c2);
 644     SimdBool m   = (setZero() < x);
 645     SimdReal ref = setSimdRealFrom3R(1.0 / c1, 0.0, 1.0 / c2);
 646     GMX_EXPECT_SIMD_REAL_NEAR(ref, maskzInv(x, m));
 647 }
 648
 649 TEST_F(SimdMathTest, cbrt)
 650 {
 651     const real low  = -std::numeric_limits<real>::max();
 652     const real high = std::numeric_limits<real>::max();
 653
 654     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
 655     GMX_EXPECT_SIMD_FUNC_NEAR(std::cbrt, cbrt, settings);
 656 }
 657
 658 /*! \brief Function wrapper to evaluate reference 1/cbrt(x) */
 659 real refInvCbrt(real x)
 660 {
 661     return 1.0 / std::cbrt(x);
 662 }
 663
 664 TEST_F(SimdMathTest, invcbrt)
 665 {
 666     // Negative values first
 667     real low  = -std::numeric_limits<real>::max();
 668     real high = -std::numeric_limits<real>::min();
 669
 670     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
 671     GMX_EXPECT_SIMD_FUNC_NEAR(refInvCbrt, invcbrt, settings);
 672
 673     // Positive values
 674     low      = std::numeric_limits<real>::min();
 675     high     = std::numeric_limits<real>::max();
 676     settings = { Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
 677     GMX_EXPECT_SIMD_FUNC_NEAR(refInvCbrt, invcbrt, settings);
 678 }
 679
 680 TEST_F(SimdMathTest, log2)
 681 {
 682     const real low  = std::numeric_limits<real>::min();
 683     const real high = std::numeric_limits<real>::max();
 684
 685     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
 686     GMX_EXPECT_SIMD_FUNC_NEAR(std::log2, log2, settings);
 687 }
 688
 689 TEST_F(SimdMathTest, log)
 690 {
 691     const real low  = std::numeric_limits<real>::min();
 692     const real high = std::numeric_limits<real>::max();
 693
 694     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
 695     GMX_EXPECT_SIMD_FUNC_NEAR(std::log, log, settings);
 696 }
 697
 698 TEST_F(SimdMathTest, exp2)
 699 {
 700     // Relevant threshold points
 701     constexpr real lowestReal = -std::numeric_limits<real>::max();
 702     constexpr real lowestRealThatProducesNormal =
 703             std::numeric_limits<real>::min_exponent
 704             - 1; // adding the significant corresponds to one more unit in exponent
 705     constexpr real lowestRealThatProducesDenormal =
 706             lowestRealThatProducesNormal
 707             - std::numeric_limits<real>::digits; // digits refer to bits in significand, so 24/53 for float/double
 708     constexpr real highestRealThatProducesNormal =
 709             std::numeric_limits<real>::max_exponent
 710             - 1; // adding the significant corresponds to one more unit in exponent
 711     CompareSettings settings;
 712
 713     // Below subnormal range all results should be zero (so, match the reference)
 714     settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal };
 715     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2, settings);
 716
 717     // Subnormal range, require matching, but DTZ is fine
 718     settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal),
 719                  ulpTol_,
 720                  absTol_,
 721                  MatchRule::Dtz };
 722     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2, settings);
 723
 724     // Normal range, standard result expected
 725     settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
 726                  ulpTol_,
 727                  absTol_,
 728                  MatchRule::Normal };
 729     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2, settings);
 730 }
 731
 732 TEST_F(SimdMathTest, exp2Unsafe)
 733 {
 734     // The unsafe version is only defined in the normal range
 735     constexpr real lowestRealThatProducesNormal =
 736             std::numeric_limits<real>::min_exponent
 737             - 1; // adding the significant corresponds to one more unit in exponent
 738     constexpr real highestRealThatProducesNormal =
 739             std::numeric_limits<real>::max_exponent
 740             - 1; // adding the significant corresponds to one more unit in exponent
 741
 742     CompareSettings settings{ Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
 743                               ulpTol_,
 744                               absTol_,
 745                               MatchRule::Normal };
 746     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2<MathOptimization::Unsafe>, settings);
 747 }
 748
 749 TEST_F(SimdMathTest, exp)
 750 {
 751     // Relevant threshold points. See the exp2 test for more details about the values; these are
 752     // simply scaled by log(2) due to the difference between exp2 and exp.
 753     const real lowestReal = -std::numeric_limits<real>::max();
 754     // In theory the smallest value should be (min_exponent-1)*log(2), but rounding after the multiplication will cause this
 755     // value to be a single ulp too low. This might cause failed tests on CPUs that use different DTZ modes for SIMD vs.
 756     // non-SIMD arithmetics (e.g. ARM v7), so multiply by (1.0-eps) to increase it by a single ulp.
 757     const real lowestRealThatProducesNormal = (std::numeric_limits<real>::min_exponent - 1)
 758                                               * std::log(2.0)
 759                                               * (1 - std::numeric_limits<real>::epsilon());
 760     const real lowestRealThatProducesDenormal =
 761             lowestRealThatProducesNormal - std::numeric_limits<real>::digits * std::log(2.0);
 762     const real highestRealThatProducesNormal =
 763             (std::numeric_limits<real>::max_exponent - 1) * std::log(2.0);
 764     CompareSettings settings;
 765
 766     // Below subnormal range all results should be zero (so, match the reference)
 767     settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal };
 768     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp, settings);
 769
 770     // Subnormal range, require matching, but DTZ is fine
 771     settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal),
 772                  ulpTol_,
 773                  absTol_,
 774                  MatchRule::Dtz };
 775     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp, settings);
 776
 777     // Normal range, standard result expected
 778     settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
 779                  ulpTol_,
 780                  absTol_,
 781                  MatchRule::Normal };
 782     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp, settings);
 783 }
 784
 785 TEST_F(SimdMathTest, expUnsafe)
 786 {
 787     // See test of exp() for comments about test ranges
 788     const real lowestRealThatProducesNormal = (std::numeric_limits<real>::min_exponent - 1)
 789                                               * std::log(2.0)
 790                                               * (1 - std::numeric_limits<real>::epsilon());
 791     const real highestRealThatProducesNormal =
 792             (std::numeric_limits<real>::max_exponent - 1) * std::log(2.0);
 793
 794     CompareSettings settings{ Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
 795                               ulpTol_,
 796                               absTol_,
 797                               MatchRule::Normal };
 798     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp<MathOptimization::Unsafe>, settings);
 799 }
 800
 801 TEST_F(SimdMathTest, pow)
 802 {
 803     // We already test the log2/exp2 components of pow() extensively above, and it's a very
 804     // simple single-line function, so here we just test a handful of values to catch typos
 805     // and then some special values.
 806
 807     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, c3), std::pow(c1, c4), std::pow(c2, c5)),
 808                               pow(rSimd_c0c1c2, rSimd_c3c4c5));
 809
 810     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, -c3), std::pow(c1, -c0), std::pow(c2, -c4)),
 811                               pow(rSimd_c0c1c2, rSimd_m3m0m4));
 812
 813     // 0^0 = 1 , 0^c1=0, -c1^0=1
 814     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(1.0, 0.0, 1.0),
 815                               pow(setSimdRealFrom3R(0, 0.0, -c1), setSimdRealFrom3R(0.0, c1, 0.0)));
 816 }
 817
 818 TEST_F(SimdMathTest, powUnsafe)
 819 {
 820     // We already test the log2/exp2 components of pow() extensively above, and it's a very
 821     // simple single-line function, so here we just test a handful of values to catch typos
 822     // and then some special values.
 823
 824     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, c3), std::pow(c1, c4), std::pow(c2, c5)),
 825                               pow<MathOptimization::Unsafe>(rSimd_c0c1c2, rSimd_c3c4c5));
 826
 827     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, -c3), std::pow(c1, -c0), std::pow(c2, -c4)),
 828                               pow<MathOptimization::Unsafe>(rSimd_c0c1c2, rSimd_m3m0m4));
 829 }
 830
 831 /*! \brief Function wrapper for erf(x), with argument/return in default Gromacs precision.
 832  *
 833  * \note Single-precision erf() in some libraries can be slightly lower precision
 834  * than the SIMD flavor, so we use a cast to force double precision for reference.
 835  */
 836 real refErf(real x)
 837 {
 838     return std::erf(static_cast<double>(x));
 839 }
 840
 841 TEST_F(SimdMathTest, erf)
 842 {
 843     CompareSettings settings{ Range(-9, 9), ulpTol_, std::numeric_limits<real>::min(), MatchRule::Normal };
 844     GMX_EXPECT_SIMD_FUNC_NEAR(refErf, erf, settings);
 845 }
 846
 847 /*! \brief Function wrapper for erfc(x), with argument/return in default Gromacs precision.
 848  *
 849  * \note Single-precision erfc() in some libraries can be slightly lower precision
 850  * than the SIMD flavor, so we use a cast to force double precision for reference.
 851  */
 852 real refErfc(real x)
 853 {
 854     return std::erfc(static_cast<double>(x));
 855 }
 856
 857 TEST_F(SimdMathTest, erfc)
 858 {
 859     // Our erfc algorithm has 4 ulp accuracy, so relax tolerance a bit to 4*ulpTol
 860     CompareSettings settings{ Range(-9, 9), 4 * ulpTol_, std::numeric_limits<real>::min(), MatchRule::Normal };
 861     GMX_EXPECT_SIMD_FUNC_NEAR(refErfc, erfc, settings);
 862 }
 863
 864 TEST_F(SimdMathTest, sin)
 865 {
 866     CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
 867     GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sin, settings);
 868
 869     // Range reduction leads to accuracy loss, so we might want higher tolerance here
 870     settings = { Range(-10000, 10000), 2 * ulpTol_, absTol_, MatchRule::Normal };
 871     GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sin, settings);
 872 }
 873
 874 TEST_F(SimdMathTest, cos)
 875 {
 876     CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
 877     GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cos, settings);
 878
 879     // Range reduction leads to accuracy loss, so we might want higher tolerance here
 880     settings = { Range(-10000, 10000), 2 * ulpTol_, absTol_, MatchRule::Normal };
 881     GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cos, settings);
 882 }
 883
 884 TEST_F(SimdMathTest, tan)
 885 {
 886     // Tan(x) is a little sensitive due to the division in the algorithm.
 887     // Rather than using lots of extra FP operations, we accept the algorithm
 888     // presently only achieves a ~3 ulp error and use the medium tolerance.
 889     CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
 890     GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tan, settings);
 891
 892     // Range reduction leads to accuracy loss, so we might want higher tolerance here
 893     settings = { Range(-10000, 10000), 2 * ulpTol_, absTol_, MatchRule::Normal };
 894     GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tan, settings);
 895 }
 896
 897 TEST_F(SimdMathTest, asin)
 898 {
 899     // Our present asin(x) algorithm achieves 2-3 ulp accuracy
 900     CompareSettings settings{ Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal };
 901     GMX_EXPECT_SIMD_FUNC_NEAR(std::asin, asin, settings);
 902 }
 903
 904 TEST_F(SimdMathTest, acos)
 905 {
 906     // Our present acos(x) algorithm achieves 2-3 ulp accuracy
 907     CompareSettings settings{ Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal };
 908     GMX_EXPECT_SIMD_FUNC_NEAR(std::acos, acos, settings);
 909 }
 910
 911 TEST_F(SimdMathTest, atan)
 912 {
 913     // Our present atan(x) algorithm achieves 1 ulp accuracy
 914     CompareSettings settings{ Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal };
 915     GMX_EXPECT_SIMD_FUNC_NEAR(std::atan, atan, settings);
 916 }
 917
 918 TEST_F(SimdMathTest, atan2)
 919 {
 920     // test each quadrant
 921     GMX_EXPECT_SIMD_REAL_NEAR(
 922             setSimdRealFrom3R(std::atan2(c0, c3), std::atan2(c1, c4), std::atan2(c2, c5)),
 923             atan2(rSimd_c0c1c2, rSimd_c3c4c5));
 924     GMX_EXPECT_SIMD_REAL_NEAR(
 925             setSimdRealFrom3R(std::atan2(-c0, c3), std::atan2(-c1, c4), std::atan2(-c2, c5)),
 926             atan2(rSimd_m0m1m2, rSimd_c3c4c5));
 927     GMX_EXPECT_SIMD_REAL_NEAR(
 928             setSimdRealFrom3R(std::atan2(-c0, -c3), std::atan2(-c1, -c0), std::atan2(-c2, -c4)),
 929             atan2(rSimd_m0m1m2, rSimd_m3m0m4));
 930     GMX_EXPECT_SIMD_REAL_NEAR(
 931             setSimdRealFrom3R(std::atan2(c0, -c3), std::atan2(c1, -c0), std::atan2(c2, -c4)),
 932             atan2(rSimd_c0c1c2, rSimd_m3m0m4));
 933
 934     // cases important for calculating angles
 935     // values on coordinate axes
 936     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(0, c0), std::atan2(0, c1), std::atan2(0, c2)),
 937                               atan2(setZero(), rSimd_c0c1c2));
 938     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(c0, 0), std::atan2(c1, 0), std::atan2(c2, 0)),
 939                               atan2(rSimd_c0c1c2, setZero()));
 940     GMX_EXPECT_SIMD_REAL_NEAR(
 941             setSimdRealFrom3R(std::atan2(0, -c0), std::atan2(0, -c1), std::atan2(0, -c2)),
 942             atan2(setZero(), rSimd_m0m1m2));
 943     GMX_EXPECT_SIMD_REAL_NEAR(
 944             setSimdRealFrom3R(std::atan2(-c0, 0), std::atan2(-c1, 0), std::atan2(-c2, 0)),
 945             atan2(rSimd_m0m1m2, setZero()));
 946     // degenerate value (origin) should return 0.0. At least IBM xlc 13.1.5 gets the reference
 947     // value wrong (-nan) at -O3 optimization, so we compare to the correct value (0.0) instead.
 948     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom1R(0.0), atan2(setSimdRealFrom3R(0.0, 0.0, 0.0), setZero()));
 949 }
 950
 951 /*! \brief Evaluate reference version of PME force correction. */
 952 real refPmeForceCorrection(real x)
 953 {
 954     if (x != 0)
 955     {
 956         real y = std::sqrt(x);
 957         return 2 * std::exp(-x) / (std::sqrt(M_PI) * x) - std::erf(static_cast<double>(y)) / (x * y);
 958     }
 959     else
 960     {
 961         return -4 / (3 * std::sqrt(M_PI));
 962     }
 963 }
 964
 965 // The PME corrections will be added to ~1/r2, so absolute tolerance of EPS is fine.
 966 TEST_F(SimdMathTest, pmeForceCorrection)
 967 {
 968     // Pme correction relative accuracy only needs to be ~1e-6 accuracy single, 1e-10 double
 969     const std::int64_t ulpTol = (GMX_DOUBLE ? 5e-10 : 5e-6) / GMX_REAL_EPS;
 970
 971     CompareSettings settings{ Range(0.15, 4), ulpTol, GMX_REAL_EPS, MatchRule::Normal };
 972     GMX_EXPECT_SIMD_FUNC_NEAR(refPmeForceCorrection, pmeForceCorrection, settings);
 973 }
 974
 975 /*! \brief Evaluate reference version of PME potential correction. */
 976 real refPmePotentialCorrection(real x)
 977 {
 978     real y = std::sqrt(x);
 979     return std::erf(static_cast<double>(y)) / y;
 980 }
 981
 982 // The PME corrections will be added to ~1/r, so absolute tolerance of EPS is fine.
 983 TEST_F(SimdMathTest, pmePotentialCorrection)
 984 {
 985     // Pme correction relative accuracy only needs to be ~1e-6 accuracy single, 1e-10 double
 986     const std::int64_t ulpTol = (GMX_DOUBLE ? 5e-10 : 5e-6) / GMX_REAL_EPS;
 987
 988     CompareSettings settings{ Range(0.15, 4), ulpTol, GMX_REAL_EPS, MatchRule::Normal };
 989     GMX_EXPECT_SIMD_FUNC_NEAR(refPmePotentialCorrection, pmePotentialCorrection, settings);
 990 }
 991
 992 // Functions that only target single accuracy, even for double SIMD data
 993
 994 TEST_F(SimdMathTest, invsqrtSingleAccuracy)
 995 {
 996     // Here we always use float limits, since the lookup is not defined for numbers that
 997     // cannot be represented in single precision.
 998     const real low  = std::numeric_limits<float>::min();
 999     const real high = std::numeric_limits<float>::max();
1000     /* Increase the allowed error by the difference between the actual precision and single */
1001     setUlpTolSingleAccuracy(ulpTol_);
1002
1003     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1004     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, invsqrtSingleAccuracy, settings);
1005 }
1006
1007 /*! \brief Function wrapper to return first result when testing \ref invsqrtPairSingleAccuracy */
1008 SimdReal gmx_simdcall tst_invsqrt_SingleAccuracy_pair0(SimdReal x)
1009 {
1010     SimdReal r0, r1;
1011     invsqrtPairSingleAccuracy(x, x, &r0, &r1);
1012     return r0;
1013 }
1014
1015 /*! \brief Function wrapper to return second result when testing \ref invsqrtPairSingleAccuracy */
1016 SimdReal gmx_simdcall tst_invsqrt_SingleAccuracy_pair1(SimdReal x)
1017 {
1018     SimdReal r0, r1;
1019     invsqrtPairSingleAccuracy(x, x, &r0, &r1);
1020     return r1;
1021 }
1022
1023 TEST_F(SimdMathTest, invsqrtPairSingleAccuracy)
1024 {
1025     // Float limits since lookup is always performed in single
1026     const real low  = std::numeric_limits<float>::min();
1027     const real high = std::numeric_limits<float>::max();
1028     /* Increase the allowed error by the difference between the actual precision and single */
1029     setUlpTolSingleAccuracy(ulpTol_);
1030
1031     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1032     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tst_invsqrt_SingleAccuracy_pair0, settings);
1033     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tst_invsqrt_SingleAccuracy_pair1, settings);
1034 }
1035
1036 TEST_F(SimdMathTest, sqrtSingleAccuracy)
1037 {
1038     // Since the first lookup step is sometimes performed in single precision,
1039     // our SIMD sqrt can only handle single-precision input values, even when
1040     // compiled in double precision - thus we use single precision limits here.
1041
1042     // Scale lowest value by 1+eps, since we will do some arithmetics on this value
1043     const real low = std::numeric_limits<float>::min() * (1.0 + std::numeric_limits<float>::epsilon());
1044     const real      high = std::numeric_limits<float>::max();
1045     CompareSettings settings;
1046
1047     // Increase the allowed error by the difference between the actual precision and single
1048     setUlpTolSingleAccuracy(ulpTol_);
1049
1050     // First test that 0.0 and a few other values works
1051     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(0, std::sqrt(c0), std::sqrt(c1)),
1052                               sqrtSingleAccuracy(setSimdRealFrom3R(0, c0, c1)));
1053
1054 #        if GMX_DOUBLE
1055     // As mentioned above, we cannot guarantee that very small double precision
1056     // input values (below std::numeric_limits<float>::min()) are handled correctly,
1057     // so our implementation will clamp it to zero. In this range we allow either
1058     // the correct value or zero, but it's important that it does not result in NaN or Inf values.
1059     //
1060     // This test range must not be called for single precision, since if we try to divide
1061     // the interval (0.0, low( in npoints we will try to multiply by factors so small that
1062     // they end up being flushed to zero, and the loop would never end.
1063     settings = { Range(0.0, low), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
1064     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrtSingleAccuracy, settings);
1065 #        endif
1066
1067     settings = { Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1068     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrtSingleAccuracy, settings);
1069 }
1070
1071 TEST_F(SimdMathTest, sqrtSingleAccuracyUnsafe)
1072 {
1073     // Test the full range, but stick to float limits since lookup is done in single.
1074     const real low  = std::numeric_limits<float>::min();
1075     const real high = std::numeric_limits<float>::max();
1076
1077     /* Increase the allowed error by the difference between the actual precision and single */
1078     setUlpTolSingleAccuracy(ulpTol_);
1079
1080     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1081     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrtSingleAccuracy<MathOptimization::Unsafe>, settings);
1082 }
1083
1084 TEST_F(SimdMathTest, invSingleAccuracy)
1085 {
1086     // Since the first lookup step is sometimes performed in single precision,
1087     // our SIMD 1/x can only handle single-precision input values, even when
1088     // compiled in double precision.
1089
1090     // Relevant threshold points
1091     const real minSafeFloat = std::numeric_limits<float>::min()
1092                               * 10; // X value guaranteed not to result in Inf intermediates for 1/x calc.
1093     const real maxSafeFloat = std::numeric_limits<float>::max()
1094                               * 0.1; // X value guaranteed not to result in DTZ intermediates for 1/x calc.
1095     // Scale highest value by 1-eps, since we will do some arithmetics on this value
1096     const real maxFloat =
1097             std::numeric_limits<float>::max() * (1.0 - std::numeric_limits<float>::epsilon());
1098     CompareSettings settings;
1099
1100     // Increase the allowed error by the difference between the actual precision and single
1101     setUlpTolSingleAccuracy(ulpTol_);
1102
1103     // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
1104     settings = { Range(-maxFloat, -maxSafeFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
1105     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1106
1107     // Normal checks for x < 0
1108     settings = { Range(-maxSafeFloat, -minSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
1109     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1110
1111     // We do not care about the small range -minSafeFloat < x < +minSafeFloat where the result can be +/- Inf, since we don't require strict IEEE754.
1112
1113     // Normal checks for x > 0
1114     settings = { Range(minSafeFloat, maxSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
1115     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1116
1117     // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
1118     settings = { Range(maxSafeFloat, maxFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
1119     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1120 }
1121
1122 TEST_F(SimdMathTest, cbrtSingleAccuracy)
1123 {
1124     const real low  = -std::numeric_limits<real>::max();
1125     const real high = std::numeric_limits<real>::max();
1126
1127     // Increase the allowed error by the difference between the actual precision and single
1128     setUlpTolSingleAccuracy(ulpTol_);
1129
1130     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1131     GMX_EXPECT_SIMD_FUNC_NEAR(std::cbrt, cbrtSingleAccuracy, settings);
1132 }
1133
1134 TEST_F(SimdMathTest, invcbrtSingleAccuracy)
1135 {
1136     // Increase the allowed error by the difference between the actual precision and single
1137     setUlpTolSingleAccuracy(ulpTol_);
1138
1139     // Negative values first
1140     real low  = -std::numeric_limits<real>::max();
1141     real high = -std::numeric_limits<real>::min();
1142
1143     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1144     GMX_EXPECT_SIMD_FUNC_NEAR(refInvCbrt, invcbrtSingleAccuracy, settings);
1145
1146     // Positive values
1147     low      = std::numeric_limits<real>::min();
1148     high     = std::numeric_limits<real>::max();
1149     settings = { Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1150     GMX_EXPECT_SIMD_FUNC_NEAR(refInvCbrt, invcbrtSingleAccuracy, settings);
1151 }
1152
1153 TEST_F(SimdMathTest, log2SingleAccuracy)
1154 {
1155     const real low  = std::numeric_limits<real>::min();
1156     const real high = std::numeric_limits<real>::max();
1157
1158     // Increase the allowed error by the difference between the actual precision and single
1159     setUlpTolSingleAccuracy(ulpTol_);
1160
1161     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1162     GMX_EXPECT_SIMD_FUNC_NEAR(std::log2, log2SingleAccuracy, settings);
1163 }
1164
1165 TEST_F(SimdMathTest, logSingleAccuracy)
1166 {
1167     const real low  = std::numeric_limits<real>::min();
1168     const real high = std::numeric_limits<real>::max();
1169
1170     // Increase the allowed error by the difference between the actual precision and single
1171     setUlpTolSingleAccuracy(ulpTol_);
1172
1173     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1174     GMX_EXPECT_SIMD_FUNC_NEAR(std::log, logSingleAccuracy, settings);
1175 }
1176
1177 TEST_F(SimdMathTest, exp2SingleAccuracy)
1178 {
1179     // Relevant threshold points - float limits since we only target single accuracy
1180     constexpr real lowestReal = -std::numeric_limits<real>::max();
1181     constexpr real lowestRealThatProducesNormal =
1182             std::numeric_limits<real>::min_exponent
1183             - 1; // adding the significant corresponds to one more unit in exponent
1184     constexpr real lowestRealThatProducesDenormal =
1185             lowestRealThatProducesNormal
1186             - std::numeric_limits<real>::digits; // digits refer to bits in significand, so 24/53 for float/double
1187     constexpr real highestRealThatProducesNormal =
1188             std::numeric_limits<real>::max_exponent
1189             - 1; // adding the significant corresponds to one more unit in exponent
1190     CompareSettings settings;
1191
1192     // Increase the allowed error by the difference between the actual precision and single
1193     setUlpTolSingleAccuracy(ulpTol_);
1194
1195     // Below subnormal range all results should be zero
1196     settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal };
1197     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2SingleAccuracy, settings);
1198
1199     // Subnormal range, require matching, but DTZ is fine
1200     settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal),
1201                  ulpTol_,
1202                  absTol_,
1203                  MatchRule::Dtz };
1204     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2SingleAccuracy, settings);
1205
1206     // Normal range, standard result expected
1207     settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
1208                  ulpTol_,
1209                  absTol_,
1210                  MatchRule::Normal };
1211     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2SingleAccuracy, settings);
1212 }
1213
1214 TEST_F(SimdMathTest, exp2SingleAccuracyUnsafe)
1215 {
1216     // The unsafe version is only defined in the normal range
1217     constexpr real lowestRealThatProducesNormal =
1218             std::numeric_limits<real>::min_exponent
1219             - 1; // adding the significant corresponds to one more unit in exponent
1220     constexpr real highestRealThatProducesNormal =
1221             std::numeric_limits<real>::max_exponent
1222             - 1; // adding the significant corresponds to one more unit in exponent
1223
1224     /* Increase the allowed error by the difference between the actual precision and single */
1225     setUlpTolSingleAccuracy(ulpTol_);
1226
1227     CompareSettings settings{ Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
1228                               ulpTol_,
1229                               absTol_,
1230                               MatchRule::Normal };
1231     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2SingleAccuracy<MathOptimization::Unsafe>, settings);
1232 }
1233
1234 TEST_F(SimdMathTest, expSingleAccuracy)
1235 {
1236     // See threshold point comments in normal exp() test
1237     const real lowestReal = -std::numeric_limits<real>::max();
1238     // In theory the smallest value should be (min_exponent-1)*log(2), but rounding after the multiplication will cause this
1239     // value to be a single ulp too low. This might cause failed tests on CPUs that use different DTZ modes for SIMD vs.
1240     // non-SIMD arithmetics (e.g. ARM v7), so multiply by (1.0-eps) to increase it by a single ulp.
1241     const real lowestRealThatProducesNormal = (std::numeric_limits<real>::min_exponent - 1)
1242                                               * std::log(2.0)
1243                                               * (1.0 - std::numeric_limits<real>::epsilon());
1244     const real lowestRealThatProducesDenormal =
1245             lowestRealThatProducesNormal - std::numeric_limits<real>::digits * std::log(2.0);
1246     const real highestRealThatProducesNormal =
1247             (std::numeric_limits<real>::max_exponent - 1) * std::log(2.0);
1248     CompareSettings settings;
1249
1250     // Increase the allowed error by the difference between the actual precision and single
1251     setUlpTolSingleAccuracy(ulpTol_);
1252
1253     // Below subnormal range all results should be zero (so, match the reference)
1254     settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal };
1255     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, expSingleAccuracy, settings);
1256
1257     // Subnormal range, require matching, but DTZ is fine
1258     settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal),
1259                  ulpTol_,
1260                  absTol_,
1261                  MatchRule::Dtz };
1262     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, expSingleAccuracy, settings);
1263
1264     // Normal range, standard result expected
1265     settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
1266                  ulpTol_,
1267                  absTol_,
1268                  MatchRule::Normal };
1269     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, expSingleAccuracy, settings);
1270 }
1271
1272 TEST_F(SimdMathTest, expSingleAccuracyUnsafe)
1273 {
1274     // See test of exp() for comments about test ranges
1275     const real lowestRealThatProducesNormal = (std::numeric_limits<real>::min_exponent - 1)
1276                                               * std::log(2.0)
1277                                               * (1 - std::numeric_limits<real>::epsilon());
1278     const real highestRealThatProducesNormal =
1279             (std::numeric_limits<real>::max_exponent - 1) * std::log(2.0);
1280
1281     // Increase the allowed error by the difference between the actual precision and single
1282     setUlpTolSingleAccuracy(ulpTol_);
1283
1284     CompareSettings settings{ Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
1285                               ulpTol_,
1286                               absTol_,
1287                               MatchRule::Normal };
1288     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, expSingleAccuracy<MathOptimization::Unsafe>, settings);
1289 }
1290
1291 TEST_F(SimdMathTest, powSingleAccuracy)
1292 {
1293     // We already test the log2/exp2 components of pow() extensively above, and it's a very
1294     // simple single-line function, so here we just test a handful of values to catch typos
1295     // and then some special values.
1296
1297     // Increase the allowed error by the difference between the actual precision and single
1298     setUlpTolSingleAccuracy(ulpTol_);
1299
1300     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, c3), std::pow(c1, c4), std::pow(c2, c5)),
1301                               powSingleAccuracy(rSimd_c0c1c2, rSimd_c3c4c5));
1302
1303     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, -c3), std::pow(c1, -c0), std::pow(c2, -c4)),
1304                               powSingleAccuracy(rSimd_c0c1c2, rSimd_m3m0m4));
1305
1306     // 0^0 = 1 , 0^c1=0, -c1^0=1
1307     GMX_EXPECT_SIMD_REAL_NEAR(
1308             setSimdRealFrom3R(1.0, 0.0, 1.0),
1309             powSingleAccuracy(setSimdRealFrom3R(0, 0.0, -c1), setSimdRealFrom3R(0.0, c1, 0.0)));
1310 }
1311
1312 TEST_F(SimdMathTest, powSingleAccuracyUnsafe)
1313 {
1314     // We already test the log2/exp2 components of pow() extensively above, and it's a very
1315     // simple single-line function, so here we just test a handful of values to catch typos
1316     // and then some special values.
1317
1318     // Increase the allowed error by the difference between the actual precision and single
1319     setUlpTolSingleAccuracy(ulpTol_);
1320
1321     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, c3), std::pow(c1, c4), std::pow(c2, c5)),
1322                               powSingleAccuracy<MathOptimization::Unsafe>(rSimd_c0c1c2, rSimd_c3c4c5));
1323
1324     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, -c3), std::pow(c1, -c0), std::pow(c2, -c4)),
1325                               powSingleAccuracy<MathOptimization::Unsafe>(rSimd_c0c1c2, rSimd_m3m0m4));
1326 }
1327
1328 TEST_F(SimdMathTest, erfSingleAccuracy)
1329 {
1330     // Increase the allowed error by the difference between the actual precision and single
1331     setUlpTolSingleAccuracy(ulpTol_);
1332
1333     CompareSettings settings{ Range(-9, 9), ulpTol_, GMX_REAL_MIN, MatchRule::Normal };
1334     GMX_EXPECT_SIMD_FUNC_NEAR(refErf, erfSingleAccuracy, settings);
1335 }
1336
1337 TEST_F(SimdMathTest, erfcSingleAccuracy)
1338 {
1339     // Increase the allowed error by the difference between the actual precision and single
1340     setUlpTolSingleAccuracy(ulpTol_);
1341
1342     // Our erfc algorithm has 4 ulp accuracy, so relax tolerance a bit
1343     CompareSettings settings{ Range(-9, 9), 4 * ulpTol_, GMX_REAL_MIN, MatchRule::Normal };
1344     GMX_EXPECT_SIMD_FUNC_NEAR(refErfc, erfcSingleAccuracy, settings);
1345 }
1346
1347
1348 TEST_F(SimdMathTest, sinSingleAccuracy)
1349 {
1350     /* Increase the allowed error by the difference between the actual precision and single */
1351     setUlpTolSingleAccuracy(ulpTol_);
1352
1353     CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
1354     GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sinSingleAccuracy, settings);
1355
1356     // Range reduction leads to accuracy loss, so we might want higher tolerance here
1357     settings = { Range(-10000, 10000), 2 * ulpTol_, absTol_, MatchRule::Normal };
1358     GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sinSingleAccuracy, settings);
1359 }
1360
1361 TEST_F(SimdMathTest, cosSingleAccuracy)
1362 {
1363     /* Increase the allowed error by the difference between the actual precision and single */
1364     setUlpTolSingleAccuracy(ulpTol_);
1365
1366     CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
1367     GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cosSingleAccuracy, settings);
1368
1369     // Range reduction leads to accuracy loss, so we might want higher tolerance here
1370     settings = { Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal };
1371     GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cosSingleAccuracy, settings);
1372 }
1373
1374 TEST_F(SimdMathTest, tanSingleAccuracy)
1375 {
1376     /* Increase the allowed error by the difference between the actual precision and single */
1377     setUlpTolSingleAccuracy(ulpTol_);
1378
1379     // Tan(x) is a little sensitive due to the division in the algorithm.
1380     // Rather than using lots of extra FP operations, we accept the algorithm
1381     // presently only achieves a ~3 ulp error and use the medium tolerance.
1382     CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
1383     GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tanSingleAccuracy, settings);
1384
1385     // Range reduction leads to accuracy loss, so we might want higher tolerance here
1386     settings = { Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal };
1387     GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tanSingleAccuracy, settings);
1388 }
1389
1390 TEST_F(SimdMathTest, asinSingleAccuracy)
1391 {
1392     /* Increase the allowed error by the difference between the actual precision and single */
1393     setUlpTolSingleAccuracy(ulpTol_);
1394
1395     // Our present asin(x) algorithm achieves 2-3 ulp accuracy
1396     CompareSettings settings{ Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal };
1397     GMX_EXPECT_SIMD_FUNC_NEAR(std::asin, asinSingleAccuracy, settings);
1398 }
1399
1400 TEST_F(SimdMathTest, acosSingleAccuracy)
1401 {
1402     /* Increase the allowed error by the difference between the actual precision and single */
1403     setUlpTolSingleAccuracy(ulpTol_);
1404
1405     // Our present acos(x) algorithm achieves 2-3 ulp accuracy
1406     CompareSettings settings{ Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal };
1407     GMX_EXPECT_SIMD_FUNC_NEAR(std::acos, acosSingleAccuracy, settings);
1408 }
1409
1410 TEST_F(SimdMathTest, atanSingleAccuracy)
1411 {
1412     /* Increase the allowed error by the difference between the actual precision and single */
1413     setUlpTolSingleAccuracy(ulpTol_);
1414
1415     // Our present atan(x) algorithm achieves 1 ulp accuracy
1416     CompareSettings settings{ Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal };
1417     GMX_EXPECT_SIMD_FUNC_NEAR(std::atan, atanSingleAccuracy, settings);
1418 }
1419
1420 TEST_F(SimdMathTest, atan2SingleAccuracy)
1421 {
1422     /* Increase the allowed error by the difference between the actual precision and single */
1423     setUlpTolSingleAccuracy(ulpTol_);
1424
1425     // test each quadrant
1426     GMX_EXPECT_SIMD_REAL_NEAR(
1427             setSimdRealFrom3R(std::atan2(c0, c3), std::atan2(c1, c4), std::atan2(c2, c5)),
1428             atan2SingleAccuracy(rSimd_c0c1c2, rSimd_c3c4c5));
1429     GMX_EXPECT_SIMD_REAL_NEAR(
1430             setSimdRealFrom3R(std::atan2(-c0, c3), std::atan2(-c1, c4), std::atan2(-c2, c5)),
1431             atan2SingleAccuracy(rSimd_m0m1m2, rSimd_c3c4c5));
1432     GMX_EXPECT_SIMD_REAL_NEAR(
1433             setSimdRealFrom3R(std::atan2(-c0, -c3), std::atan2(-c1, -c0), std::atan2(-c2, -c4)),
1434             atan2SingleAccuracy(rSimd_m0m1m2, rSimd_m3m0m4));
1435     GMX_EXPECT_SIMD_REAL_NEAR(
1436             setSimdRealFrom3R(std::atan2(c0, -c3), std::atan2(c1, -c0), std::atan2(c2, -c4)),
1437             atan2SingleAccuracy(rSimd_c0c1c2, rSimd_m3m0m4));
1438     // cases important for calculating angles
1439     // values on coordinate axes
1440     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(0, c0), std::atan2(0, c1), std::atan2(0, c2)),
1441                               atan2SingleAccuracy(setZero(), rSimd_c0c1c2));
1442     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(c0, 0), std::atan2(c1, 0), std::atan2(c2, 0)),
1443                               atan2SingleAccuracy(rSimd_c0c1c2, setZero()));
1444     GMX_EXPECT_SIMD_REAL_NEAR(
1445             setSimdRealFrom3R(std::atan2(0, -c0), std::atan2(0, -c1), std::atan2(0, -c2)),
1446             atan2SingleAccuracy(setZero(), rSimd_m0m1m2));
1447     GMX_EXPECT_SIMD_REAL_NEAR(
1448             setSimdRealFrom3R(std::atan2(-c0, 0), std::atan2(-c1, 0), std::atan2(-c2, 0)),
1449             atan2SingleAccuracy(rSimd_m0m1m2, setZero()));
1450
1451     // degenerate value (origin) should return 0.0. At least IBM xlc 13.1.5 gets the reference
1452     // value wrong (-nan) at -O3 optimization, so we compare to the correct value (0.0) instead.
1453     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom1R(0.0),
1454                               atan2SingleAccuracy(setSimdRealFrom3R(0.0, 0.0, 0.0), setZero()));
1455 }
1456
1457 TEST_F(SimdMathTest, pmeForceCorrectionSingleAccuracy)
1458 {
1459     // The PME corrections will be added to ~1/r2, so absolute tolerance of EPS is fine.
1460     // Pme correction only needs to be ~1e-6 accuracy single.
1461     // Then increase the allowed error by the difference between the actual precision and single.
1462     setUlpTolSingleAccuracy(std::int64_t(5e-6 / GMX_FLOAT_EPS));
1463
1464     CompareSettings settings{ Range(0.15, 4), ulpTol_, GMX_FLOAT_EPS, MatchRule::Normal };
1465     GMX_EXPECT_SIMD_FUNC_NEAR(refPmeForceCorrection, pmeForceCorrectionSingleAccuracy, settings);
1466 }
1467
1468 TEST_F(SimdMathTest, pmePotentialCorrectionSingleAccuracy)
1469 {
1470     // The PME corrections will be added to ~1/r, so absolute tolerance of EPS is fine.
1471     // Pme correction only needs to be ~1e-6 accuracy single.
1472     // Then increase the allowed error by the difference between the actual precision and single.
1473     setUlpTolSingleAccuracy(std::int64_t(5e-6 / GMX_FLOAT_EPS));
1474
1475     CompareSettings settings{ Range(0.15, 4), ulpTol_, GMX_FLOAT_EPS, MatchRule::Normal };
1476     GMX_EXPECT_SIMD_FUNC_NEAR(refPmePotentialCorrection, pmePotentialCorrectionSingleAccuracy, settings);
1477 }
1478
1479 } // namespace
1480
1481 #    endif // GMX_SIMD_HAVE_REAL
1482
1483 /*! \} */
1484 /*! \endcond */
1485
1486 } // namespace test
1487 } // namespace gmx
1488
1489 #endif // GMX_SIMD