src/gromacs/simd/tests/simd_math.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2017,2018,2019,2020,2021, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include "gmxpre.h"
  36
  37 #include "gromacs/simd/simd_math.h"
  38
  39 #include "config.h"
  40
  41 #include <cmath>
  42 #include <cstdint>
  43
  44 #include <map>
  45 #include <utility>
  46 #include <vector>
  47
  48 #include "gromacs/math/units.h"
  49 #include "gromacs/math/utilities.h"
  50 #include "gromacs/options/basicoptions.h"
  51 #include "gromacs/simd/simd.h"
  52
  53 #include "testutils/refdata.h"
  54 #include "testutils/testasserts.h"
  55
  56 #include "simd.h"
  57
  58 #if GMX_SIMD
  59
  60 namespace gmx
  61 {
  62 namespace test
  63 {
  64
  65 /*! \cond internal */
  66 /*! \addtogroup module_simd */
  67 /*! \{ */
  68
  69 #    if GMX_SIMD_HAVE_REAL
  70
  71 class SimdMathTest : public SimdTest
  72 {
  73 public:
  74     /*! \brief Type for half-open intervals specifying test ranges */
  75     typedef std::pair<real, real> Range;
  76
  77     /*! \brief Control what is considered matching values
  78      *
  79      * Normal simply means that we request the values to be equal
  80      * to within the specified tolerance.
  81      * However, there are also two more cases that are special:
  82      *
  83      * - Even if we only care about normal (i.e., not denormal) values, some math
  84      *   libraries might clamp the value to zero, which means our SIMD output
  85      *   might not match their values. By using MatchRule::Dtz, we will consider
  86      *   all values both from the reference and test functions that are within the
  87      *   requested ulp tolerance of a denormal number to be equivalent to 0.0.
  88      * - For some older architectures without fused multiply-add units (e.g. x86 SSE2),
  89      *   we might end up clamping the results to zero just before reaching
  90      *   denormal output, since the intermediate results e.g. in polynomial
  91      *   approximations can be smaller than the final one. We often simply don't
  92      *   care about those values, and then one can use
  93      *   MatchRule::ReferenceOrZero to allow the test value to either match
  94      *   the reference or be zero.
  95      */
  96     enum class MatchRule
  97     {
  98         Normal, //!< Match function values
  99         Dtz, //!< Match function values after setting denormals to zero both in test and reference
 100         ReferenceOrZero, //!< Test values can either match reference or be zero
 101     };
 102
 103     const std::map<MatchRule, std::string> matchRuleNames_ = {
 104         { MatchRule::Normal, "Test should match reference." },
 105         { MatchRule::Dtz, "Test should match reference, with denormals treated as 0.0." },
 106         { MatchRule::ReferenceOrZero, "Test should match reference or 0.0." }
 107     };
 108
 109     /*! \brief Settings used for simd math function comparisons */
 110     struct CompareSettings
 111     {
 112         Range        range;     //!< Range over which to test function
 113         std::int64_t ulpTol;    //!< Ulp tolerance
 114         real         absTol;    //!< Absolute tolerance
 115         MatchRule    matchRule; //!< Decide what we consider a match
 116     };
 117
 118     ::testing::AssertionResult compareSimdMathFunction(const char* refFuncExpr,
 119                                                        const char* simdFuncExpr,
 120                                                        const char* compareSettingsExpr,
 121                                                        real        refFunc(real x),
 122                                                        SimdReal gmx_simdcall  simdFunc(SimdReal x),
 123                                                        const CompareSettings& compareSettings);
 124
 125     /*! \brief Generate test point vector
 126      *
 127      *  \param range  The test interval, half open. Upper limit is not included.
 128      *                Pass by value, since we need to modify in method anyway.
 129      *  \param points Number of points to generate. This might be increased
 130      *                slightly to account both for extra special values like 0.0
 131      *                and the SIMD width.
 132      *
 133      * This routine generates a vector with test points separated by constant
 134      * multiplicative factors, based on the range and number of points in the
 135      * class. If the range includes both negative and positive values, points
 136      * will be generated separately for the negative/positive intervals down
 137      * to the smallest real number that can be represented, and we also include
 138      * 0.0 explicitly.
 139      *
 140      * This is highly useful for large test ranges. For example, with a linear
 141      * 1000-point division of the range (1,1e10) the first three values to test
 142      * would be 1, 10000000.999, and 20000000.998, etc. For large values we would
 143      * commonly hit the point where adding the small delta has no effect due to
 144      * limited numerical precision.
 145      * When we instead use this routine, the values will be 1, 1.0239, 1.0471, etc.
 146      * This will spread the entropy over all bits in the IEEE754 representation,
 147      * and be a much better test of all potential input values.
 148      *
 149      *  \note We do not use the static variable s_nPoints in the parent class
 150      *        to avoid altering any value the user has set on the command line; since
 151      *        it's a static member, changing it would have permanent effect.
 152      */
 153     static std::vector<real> generateTestPoints(Range range, std::size_t points);
 154
 155     /*! \brief Test routine for the test point vector generation
 156      */
 157     static void generateTestPointsTest();
 158 };
 159
 160 /*! \brief Test approximate equality of SIMD vs reference version of a function.
 161  *
 162  * This macro takes vanilla C and SIMD flavors of a function and tests it with
 163  * the number of points, range, and tolerances specified by the test fixture class.
 164  *
 165  * The third option controls the range, tolerances, and match settings.
 166  */
 167 #        define GMX_EXPECT_SIMD_FUNC_NEAR(refFunc, tstFunc, compareSettings) \
 168             EXPECT_PRED_FORMAT3(compareSimdMathFunction, refFunc, tstFunc, compareSettings)
 169
 170 std::vector<real> SimdMathTest::generateTestPoints(Range inputRange, std::size_t inputPoints)
 171 {
 172
 173     std::vector<real> testPoints;
 174     testPoints.reserve(inputPoints);
 175
 176     GMX_RELEASE_ASSERT(inputRange.first < inputRange.second,
 177                        "The start of the interval must come before the end");
 178
 179     std::vector<Range> testRanges;
 180
 181     if (inputRange.first < 0 && inputRange.second > 0)
 182     {
 183         testRanges.emplace_back(Range({ inputRange.first, -std::numeric_limits<real>::min() }));
 184         testRanges.emplace_back(Range({ 0.0, inputRange.second }));
 185     }
 186     else
 187     {
 188         if (inputRange.second == 0)
 189         {
 190             inputRange.second = -std::numeric_limits<real>::min();
 191             inputRange.first  = std::min(inputRange.first, inputRange.second);
 192         }
 193         testRanges.push_back(inputRange);
 194     }
 195
 196     for (Range& range : testRanges)
 197     {
 198         std::size_t points = inputPoints / testRanges.size();
 199
 200         // The value 0 is special, and can only occur at the start of
 201         // the interval after the corrections outside this loop.
 202         // Add it explicitly, and adjust the interval to continue
 203         // at the first valid non-zero positive number.
 204         if (range.first == 0)
 205         {
 206             testPoints.push_back(0.0);
 207             range.first = std::numeric_limits<real>::min();
 208             points--; // Used one point
 209         }
 210
 211         union {
 212             real                                                                               r;
 213             std::conditional<sizeof(real) == sizeof(double), std::int64_t, std::int32_t>::type i;
 214         } low, high, x;
 215
 216         low.r  = range.first;
 217         high.r = range.second;
 218
 219         // IEEE754 floating-point numbers have the cool property that for any range of
 220         // constant sign, for all non-zero numbers a constant (i.e., linear) difference
 221         // in the bitwise representation corresponds to a constant multiplicative factor.
 222         //
 223         // Divide the ulp difference evenly
 224         std::int64_t ulpDiff = high.i - low.i;
 225         // dividend and divisor must both be signed types
 226         std::int64_t ulpDelta    = ulpDiff / static_cast<std::int64_t>(points);
 227         std::int64_t minUlpDelta = (ulpDiff > 0) ? 1 : -1;
 228
 229         if (ulpDelta == 0)
 230         {
 231             // Very short interval or very many points caused round-to-zero.
 232             // Select the smallest possible change, which is one ulp (with correct sign)
 233             ulpDelta = minUlpDelta;
 234             points   = std::abs(ulpDiff);
 235         }
 236
 237         x.r = low.r;
 238         // Use an index-based loop to avoid floating-point comparisons with
 239         // values that might have overflowed. Save one point for the very last
 240         // bitwise value that is part of the interval
 241         for (std::size_t i = 0; i < points - 1; i++)
 242         {
 243             testPoints.push_back(x.r);
 244             x.i += ulpDelta;
 245         }
 246
 247         // Make sure we test the very last point that is inside the interval
 248         x.r = high.r;
 249         x.i -= minUlpDelta;
 250         testPoints.push_back(x.r);
 251     }
 252     return testPoints;
 253 }
 254
 255 /*! \brief Implementation routine to compare SIMD vs reference functions.
 256  *
 257  * \param refFuncExpr         Description of reference function expression
 258  * \param simdFuncExpr        Description of SIMD function expression
 259  * \param compareSettingsExpr Description of compareSettings
 260  * \param refFunc             Reference math function pointer
 261  * \param simdFunc            SIMD math function pointer
 262  * \param compareSettings     Structure with the range, tolerances, and
 263  *                            matching rules to use for the comparison.
 264  *
 265  * \note You should not never call this function directly,  but use the
 266  *       macro GMX_EXPECT_SIMD_FUNC_NEAR(refFunc,tstFunc,matchRule) instead.
 267  */
 268 ::testing::AssertionResult SimdMathTest::compareSimdMathFunction(const char* refFuncExpr,
 269                                                                  const char* simdFuncExpr,
 270                                                                  const char gmx_unused* compareSettingsExpr,
 271                                                                  real refFunc(real x),
 272                                                                  SimdReal gmx_simdcall simdFunc(SimdReal x),
 273                                                                  const CompareSettings& compareSettings)
 274 {
 275     std::vector<real> vx(GMX_SIMD_REAL_WIDTH);
 276     std::vector<real> vref(GMX_SIMD_REAL_WIDTH);
 277     std::vector<real> vtst(GMX_SIMD_REAL_WIDTH);
 278     real              absDiff;
 279     std::int64_t      ulpDiff;
 280     std::int64_t      maxUlpDiff = 0;
 281     real              maxUlpDiffPos;
 282     real              refValMaxUlpDiff, simdValMaxUlpDiff;
 283     const int         niter = s_nPoints / GMX_SIMD_REAL_WIDTH;
 284
 285     union {
 286         real                                                                               r;
 287         std::conditional<sizeof(real) == sizeof(double), std::int64_t, std::int32_t>::type i;
 288     } conv0, conv1;
 289
 290     // Allow zero-size intervals - nothing to test means we succeeded at it
 291     if (compareSettings.range.first == compareSettings.range.second)
 292     {
 293         ::testing::AssertionSuccess();
 294     }
 295
 296     // Calculate the tolerance limit to use for denormals - we want
 297     // values that are within the ulp tolerance of denormals to be considered matching
 298     conv0.r = std::numeric_limits<real>::min();
 299     conv0.i += compareSettings.ulpTol - 1; // min() itself is not denormal, but one ulp larger
 300     const real denormalLimit = conv0.r;
 301
 302     // We want to test as many diverse bit combinations as possible over the range requested,
 303     // and in particular do it evenly spaced in bit-space.
 304     // Due to the way IEEE754 floating-point is represented, that means we should have a
 305     // constant multiplicative factor between adjacent values. This gets a bit complicated
 306     // when we have both positive and negative values, so we offload the generation of the
 307     // specific testing values to a separate routine
 308     std::vector<real> testPoints = generateTestPoints(compareSettings.range, s_nPoints);
 309
 310     size_t pointIndex = 0;
 311
 312     for (int iter = 0; iter < niter; iter++)
 313     {
 314         for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
 315         {
 316             vx[i]   = testPoints[pointIndex];
 317             vref[i] = refFunc(vx[i]);
 318             // If we reach the end of the points, stop increasing index so we pad with
 319             // extra copies of the last element up to the SIMD width
 320             if (pointIndex + 1 < testPoints.size())
 321             {
 322                 pointIndex++;
 323             }
 324         }
 325         vtst = simdReal2Vector(simdFunc(vector2SimdReal(vx)));
 326
 327         bool absOk = true, signOk = true;
 328         for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
 329         {
 330             if (compareSettings.matchRule == MatchRule::Dtz && std::abs(vref[i]) <= denormalLimit
 331                 && std::abs(vtst[i]) <= denormalLimit)
 332             {
 333                 continue;
 334             }
 335
 336             if (compareSettings.matchRule == MatchRule::ReferenceOrZero && vtst[i] == 0.0)
 337             {
 338                 // If we accept 0.0 for the test function, we can continue to the next loop iteration.
 339                 continue;
 340             }
 341
 342             absDiff = std::abs(vref[i] - vtst[i]);
 343             absOk   = absOk && (absDiff < compareSettings.absTol);
 344             signOk  = signOk && ((vref[i] >= 0 && vtst[i] >= 0) || (vref[i] <= 0 && vtst[i] <= 0));
 345
 346             if (absDiff >= compareSettings.absTol)
 347             {
 348                 /* We replicate the trivial ulp differences comparison here rather than
 349                  * calling the lower-level routine for comparing them, since this enables
 350                  * us to run through the entire test range and report the largest deviation
 351                  * without lots of extra glue routines.
 352                  */
 353                 conv0.r = vref[i];
 354                 conv1.r = vtst[i];
 355                 ulpDiff = llabs(conv0.i - conv1.i);
 356                 if (ulpDiff > maxUlpDiff)
 357                 {
 358                     maxUlpDiff        = ulpDiff;
 359                     maxUlpDiffPos     = vx[i];
 360                     refValMaxUlpDiff  = vref[i];
 361                     simdValMaxUlpDiff = vtst[i];
 362                 }
 363             }
 364         }
 365         if ((!absOk) && (!signOk))
 366         {
 367             return ::testing::AssertionFailure()
 368                    << "Failing SIMD math function comparison due to sign differences." << std::endl
 369                    << "Reference function: " << refFuncExpr << std::endl
 370                    << "Simd function:      " << simdFuncExpr << std::endl
 371                    << "Test range is ( " << compareSettings.range.first << " , "
 372                    << compareSettings.range.second << " ) " << std::endl
 373                    << "Match rule: " << matchRuleNames_.at(compareSettings.matchRule) << std::endl
 374                    << "First sign difference around x=" << std::setprecision(20)
 375                    << ::testing::PrintToString(vx) << std::endl
 376                    << "Ref values:  " << std::setprecision(20) << ::testing::PrintToString(vref)
 377                    << std::endl
 378                    << "SIMD values: " << std::setprecision(20) << ::testing::PrintToString(vtst)
 379                    << std::endl;
 380         }
 381     }
 382
 383     GMX_RELEASE_ASSERT(compareSettings.ulpTol >= 0, "Invalid ulp value.");
 384     if (maxUlpDiff <= compareSettings.ulpTol)
 385     {
 386         return ::testing::AssertionSuccess();
 387     }
 388     else
 389     {
 390         return ::testing::AssertionFailure()
 391                << "Failing SIMD math function ulp comparison between " << refFuncExpr << " and "
 392                << simdFuncExpr << std::endl
 393                << "Requested ulp tolerance: " << compareSettings.ulpTol << std::endl
 394                << "Requested abs tolerance: " << compareSettings.absTol << std::endl
 395                << "Match rule: " << matchRuleNames_.at(compareSettings.matchRule) << std::endl
 396                << "Largest Ulp difference occurs for x=" << std::setprecision(20) << maxUlpDiffPos
 397                << std::endl
 398                << "Ref  values: " << std::setprecision(20) << refValMaxUlpDiff << std::endl
 399                << "SIMD values: " << std::setprecision(20) << simdValMaxUlpDiff << std::endl
 400                << "Ulp diff.:   " << std::setprecision(20) << maxUlpDiff << std::endl;
 401     }
 402 }
 403
 404 // Actual routine to generate a small set of test points in current precision. This will
 405 // be called by either the double or single precision test fixture, since we need different
 406 // test names to compare to the right reference data.
 407 void SimdMathTest::generateTestPointsTest()
 408 {
 409     int                             points(10);
 410     gmx::test::TestReferenceData    data;
 411     gmx::test::TestReferenceChecker checker(data.rootChecker());
 412
 413     std::vector<real> result;
 414
 415     result = generateTestPoints(Range(-1e10, -1), points);
 416     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10,-1[");
 417
 418     result = generateTestPoints(Range(-1e10, -1e-10), points);
 419     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10, -1e-10[");
 420
 421     result = generateTestPoints(Range(1, 1e10), points);
 422     checker.checkSequence(result.begin(), result.end(), "Test points for interval [1, 1e10[");
 423
 424     result = generateTestPoints(Range(1e-10, 1e10), points);
 425     checker.checkSequence(result.begin(), result.end(), "Test points for interval [1e-10, 1e10[");
 426
 427     result = generateTestPoints(Range(-1e10, 1e-10), points);
 428     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10, 1e-10[");
 429
 430     result = generateTestPoints(Range(-1e-10, 1e-10), points);
 431     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e-10, 1e-10[");
 432
 433     result = generateTestPoints(Range(-1e-10, 1e10), points);
 434     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e-10, 1e10[");
 435
 436     result = generateTestPoints(Range(-1e10, 1e10), points);
 437     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10, 1e10[");
 438
 439     result = generateTestPoints(Range(-1000, 0), points);
 440     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1000, 0[");
 441
 442     result = generateTestPoints(Range(0, 1000), points);
 443     checker.checkSequence(result.begin(), result.end(), "Test points for interval [0, 1000[");
 444 }
 445
 446 /*! \} */
 447 /*! \endcond */
 448
 449
 450 // Actual math function tests below
 451
 452 /*! \cond internal */
 453 /*! \addtogroup module_simd */
 454 /*! \{ */
 455
 456 namespace
 457 {
 458
 459 // Reference data is selected based on test name, so make the test name precision-dependent
 460 #        if GMX_DOUBLE
 461 TEST_F(SimdMathTest, generateTestPointsDouble)
 462 {
 463     generateTestPointsTest();
 464 }
 465 #        else
 466 TEST_F(SimdMathTest, generateTestPointsFloat)
 467 {
 468     generateTestPointsTest();
 469 }
 470 #        endif
 471
 472 TEST_F(SimdMathTest, copysign)
 473 {
 474     GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(-c0, c1, c2),
 475                             copysign(setSimdRealFrom3R(c0, c1, c2), setSimdRealFrom3R(-c3, c4, 0)));
 476     GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(-c0, c1, c2),
 477                             copysign(setSimdRealFrom3R(-c0, -c1, -c2), setSimdRealFrom3R(-c3, c4, 0)));
 478     GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(c0, -c1, c2),
 479                             copysign(setSimdRealFrom3R(c0, c1, c2), setSimdRealFrom3R(c3, -c4, 0)));
 480     GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(c0, -c1, c2),
 481                             copysign(setSimdRealFrom3R(-c0, -c1, -c2), setSimdRealFrom3R(c3, -c4, 0)));
 482 }
 483
 484 /*! \brief Function wrapper to evaluate reference 1/sqrt(x) */
 485 real refInvsqrt(real x)
 486 {
 487     return 1.0 / std::sqrt(x);
 488 }
 489
 490 TEST_F(SimdMathTest, invsqrt)
 491 {
 492     const real      low  = std::numeric_limits<float>::min();
 493     const real      high = std::numeric_limits<float>::max();
 494     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
 495
 496     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, invsqrt, settings);
 497 }
 498
 499 TEST_F(SimdMathTest, maskzInvsqrt)
 500 {
 501     SimdReal x   = setSimdRealFrom3R(c1, 0.0, c2);
 502     SimdBool m   = (setZero() < x);
 503     SimdReal ref = setSimdRealFrom3R(1.0 / std::sqrt(c1), 0.0, 1.0 / std::sqrt(c2));
 504     GMX_EXPECT_SIMD_REAL_NEAR(ref, maskzInvsqrt(x, m));
 505 }
 506
 507 /*! \brief Function wrapper to return first result when testing \ref invsqrtPair */
 508 SimdReal gmx_simdcall tstInvsqrtPair0(SimdReal x)
 509 {
 510     SimdReal r0, r1;
 511     invsqrtPair(x, x, &r0, &r1);
 512     return r0;
 513 }
 514
 515 /*! \brief Function wrapper to return second result when testing \ref invsqrtPair */
 516 SimdReal gmx_simdcall tstInvsqrtPair1(SimdReal x)
 517 {
 518     SimdReal r0, r1;
 519     invsqrtPair(x, x, &r0, &r1);
 520     return r1;
 521 }
 522
 523 TEST_F(SimdMathTest, invsqrtPair)
 524 {
 525     const real low  = std::numeric_limits<float>::min();
 526     const real high = std::numeric_limits<float>::max();
 527
 528     // Accuracy conversions lose a bit of accuracy compared to all-double,
 529     // so increase the tolerance to 4*ulpTol_
 530     CompareSettings settings{ Range(low, high), 4 * ulpTol_, absTol_, MatchRule::Normal };
 531
 532     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tstInvsqrtPair0, settings);
 533     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tstInvsqrtPair1, settings);
 534 }
 535
 536 /*! \brief Function wrapper to evaluate reference sqrt(x) */
 537 real refSqrt(real x)
 538 {
 539     return std::sqrt(x);
 540 }
 541
 542 TEST_F(SimdMathTest, sqrt)
 543 {
 544     // Since the first lookup step is sometimes performed in single precision,
 545     // our SIMD sqrt can only handle single-precision input values, even when
 546     // compiled in double precision.
 547
 548     const real      minFloat     = std::numeric_limits<float>::min();
 549     const real      minSafeFloat = minFloat * 10;
 550     const real      maxSafeFloat = std::numeric_limits<float>::max() * 0.1;
 551     CompareSettings settings;
 552     // The accuracy conversions lose a bit of extra accuracy compared to
 553     // doing the iterations in all-double.
 554     setUlpTol(4 * ulpTol_);
 555
 556     // First test that 0.0 and a few other values work
 557     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(0, std::sqrt(c1), std::sqrt(c2)),
 558                               sqrt(setSimdRealFrom3R(0, c1, c2)));
 559
 560 #        if GMX_DOUBLE
 561     // As mentioned above, we cannot guarantee that very small double precision
 562     // input values (below std::numeric_limits<float>::min()) are handled correctly,
 563     // so our implementation will clamp it to zero. In this range we allow either
 564     // the correct value or zero, but it's important that it does not result in NaN or Inf values.
 565     //
 566     // This test range must not be called for single precision, since if we try to divide
 567     // the interval (0.0, low( in npoints we will try to multiply by factors so small that
 568     // they end up being flushed to zero, and the loop would never end.
 569     settings = { Range(0.0, minFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
 570     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt, settings);
 571 #        endif
 572
 573     // Next range: Just about minFloat the lookup should always work, but the results
 574     // might be a bit fragile due to issues with the N-R iterations being flushed to zero
 575     // for denormals. We can probably relax the latter in double precision, but since we
 576     // anyway cannot handle numbers that cannot be represented in single it's not worth
 577     // worrying too much about whether we have zero or an exact values around 10^-38....
 578     settings = { Range(minFloat, minSafeFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
 579     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt, settings);
 580
 581     settings = { Range(minSafeFloat, maxSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
 582     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt, settings);
 583 }
 584
 585 TEST_F(SimdMathTest, sqrtUnsafe)
 586 {
 587     const real minSafeFloat = std::numeric_limits<float>::min() * 10;
 588     const real maxSafeFloat = std::numeric_limits<float>::max() * 0.1;
 589
 590     // The accuracy conversions lose a bit of extra accuracy compared to
 591     // doing the iterations in all-double, so we use 4*ulpTol_
 592     setUlpTol(4 * ulpTol_);
 593
 594     CompareSettings settings{ Range(minSafeFloat, maxSafeFloat), 4 * ulpTol_, absTol_, MatchRule::Normal };
 595     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt<MathOptimization::Unsafe>, settings);
 596 }
 597
 598 /*! \brief Function wrapper to evaluate reference 1/x */
 599 real refInv(real x)
 600 {
 601     return 1.0 / x;
 602 }
 603
 604 TEST_F(SimdMathTest, inv)
 605 {
 606     // Since the first lookup step is sometimes performed in single precision,
 607     // our SIMD 1/x can only handle single-precision input values, even when
 608     // compiled in double precision.
 609
 610     // Relevant threshold points
 611     const real minSafeFloat = std::numeric_limits<float>::min()
 612                               * 10; // X value guaranteed not to result in Inf intermediates for 1/x calc.
 613     const real maxSafeFloat = std::numeric_limits<float>::max()
 614                               * 0.1; // X value guaranteed not to result in DTZ intermediates for 1/x calc.
 615     // Scale highest value by 1-eps, since we will do some arithmetics on this value
 616     const real maxFloat =
 617             std::numeric_limits<float>::max() * (1.0 - std::numeric_limits<float>::epsilon());
 618     CompareSettings settings;
 619
 620     // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
 621     settings = { Range(-maxFloat, -maxSafeFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
 622     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
 623
 624     // Normal checks for x < 0
 625     settings = { Range(-maxSafeFloat, -minSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
 626     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
 627
 628     // We do not care about the small range -minSafeFloat < x < +minSafeFloat where the result can be +/- Inf, since we don't require strict IEEE754.
 629
 630     // Normal checks for x > 0
 631     settings = { Range(minSafeFloat, maxSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
 632     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
 633
 634     // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
 635     settings = { Range(maxSafeFloat, maxFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
 636     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
 637 }
 638
 639 TEST_F(SimdMathTest, maskzInv)
 640 {
 641     SimdReal x   = setSimdRealFrom3R(c1, 0.0, c2);
 642     SimdBool m   = (setZero() < x);
 643     SimdReal ref = setSimdRealFrom3R(1.0 / c1, 0.0, 1.0 / c2);
 644     GMX_EXPECT_SIMD_REAL_NEAR(ref, maskzInv(x, m));
 645 }
 646
 647 TEST_F(SimdMathTest, cbrt)
 648 {
 649     const real low  = -std::numeric_limits<real>::max();
 650     const real high = std::numeric_limits<real>::max();
 651
 652     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
 653     GMX_EXPECT_SIMD_FUNC_NEAR(std::cbrt, cbrt, settings);
 654 }
 655
 656 /*! \brief Function wrapper to evaluate reference 1/cbrt(x) */
 657 real refInvCbrt(real x)
 658 {
 659     return 1.0 / std::cbrt(x);
 660 }
 661
 662 TEST_F(SimdMathTest, invcbrt)
 663 {
 664     // Negative values first
 665     real low  = -std::numeric_limits<real>::max();
 666     real high = -std::numeric_limits<real>::min();
 667
 668     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
 669     GMX_EXPECT_SIMD_FUNC_NEAR(refInvCbrt, invcbrt, settings);
 670
 671     // Positive values
 672     low      = std::numeric_limits<real>::min();
 673     high     = std::numeric_limits<real>::max();
 674     settings = { Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
 675     GMX_EXPECT_SIMD_FUNC_NEAR(refInvCbrt, invcbrt, settings);
 676 }
 677
 678 TEST_F(SimdMathTest, log2)
 679 {
 680     const real low  = std::numeric_limits<real>::min();
 681     const real high = std::numeric_limits<real>::max();
 682
 683     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
 684     GMX_EXPECT_SIMD_FUNC_NEAR(std::log2, log2, settings);
 685 }
 686
 687 TEST_F(SimdMathTest, log)
 688 {
 689     const real low  = std::numeric_limits<real>::min();
 690     const real high = std::numeric_limits<real>::max();
 691
 692     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
 693     GMX_EXPECT_SIMD_FUNC_NEAR(std::log, log, settings);
 694 }
 695
 696 TEST_F(SimdMathTest, exp2)
 697 {
 698     // Relevant threshold points
 699     constexpr real lowestReal = -std::numeric_limits<real>::max();
 700     constexpr real lowestRealThatProducesNormal =
 701             std::numeric_limits<real>::min_exponent
 702             - 1; // adding the significant corresponds to one more unit in exponent
 703     constexpr real lowestRealThatProducesDenormal =
 704             lowestRealThatProducesNormal
 705             - std::numeric_limits<real>::digits; // digits refer to bits in significand, so 24/53 for float/double
 706     constexpr real highestRealThatProducesNormal =
 707             std::numeric_limits<real>::max_exponent
 708             - 1; // adding the significant corresponds to one more unit in exponent
 709     CompareSettings settings;
 710
 711     // Below subnormal range all results should be zero (so, match the reference)
 712     settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal };
 713     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2, settings);
 714
 715     // Subnormal range, require matching, but DTZ is fine
 716     settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal),
 717                  ulpTol_,
 718                  absTol_,
 719                  MatchRule::Dtz };
 720     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2, settings);
 721
 722     // Normal range, standard result expected
 723     settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
 724                  ulpTol_,
 725                  absTol_,
 726                  MatchRule::Normal };
 727     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2, settings);
 728 }
 729
 730 TEST_F(SimdMathTest, exp2Unsafe)
 731 {
 732     // The unsafe version is only defined in the normal range
 733     constexpr real lowestRealThatProducesNormal =
 734             std::numeric_limits<real>::min_exponent
 735             - 1; // adding the significant corresponds to one more unit in exponent
 736     constexpr real highestRealThatProducesNormal =
 737             std::numeric_limits<real>::max_exponent
 738             - 1; // adding the significant corresponds to one more unit in exponent
 739
 740     CompareSettings settings{ Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
 741                               ulpTol_,
 742                               absTol_,
 743                               MatchRule::Normal };
 744     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2<MathOptimization::Unsafe>, settings);
 745 }
 746
 747 TEST_F(SimdMathTest, exp)
 748 {
 749     // Relevant threshold points. See the exp2 test for more details about the values; these are
 750     // simply scaled by log(2) due to the difference between exp2 and exp.
 751     const real lowestReal = -std::numeric_limits<real>::max();
 752     // In theory the smallest value should be (min_exponent-1)*log(2), but rounding after the multiplication will cause this
 753     // value to be a single ulp too low. This might cause failed tests on CPUs that use different DTZ modes for SIMD vs.
 754     // non-SIMD arithmetics (e.g. ARM v7), so multiply by (1.0-eps) to increase it by a single ulp.
 755     const real lowestRealThatProducesNormal = (std::numeric_limits<real>::min_exponent - 1)
 756                                               * std::log(2.0)
 757                                               * (1 - std::numeric_limits<real>::epsilon());
 758     const real lowestRealThatProducesDenormal =
 759             lowestRealThatProducesNormal - std::numeric_limits<real>::digits * std::log(2.0);
 760     const real highestRealThatProducesNormal =
 761             (std::numeric_limits<real>::max_exponent - 1) * std::log(2.0);
 762     CompareSettings settings;
 763
 764     // Below subnormal range all results should be zero (so, match the reference)
 765     settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal };
 766     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp, settings);
 767
 768     // Subnormal range, require matching, but DTZ is fine
 769     settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal),
 770                  ulpTol_,
 771                  absTol_,
 772                  MatchRule::Dtz };
 773     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp, settings);
 774
 775     // Normal range, standard result expected
 776     settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
 777                  ulpTol_,
 778                  absTol_,
 779                  MatchRule::Normal };
 780     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp, settings);
 781 }
 782
 783 TEST_F(SimdMathTest, expUnsafe)
 784 {
 785     // See test of exp() for comments about test ranges
 786     const real lowestRealThatProducesNormal = (std::numeric_limits<real>::min_exponent - 1)
 787                                               * std::log(2.0)
 788                                               * (1 - std::numeric_limits<real>::epsilon());
 789     const real highestRealThatProducesNormal =
 790             (std::numeric_limits<real>::max_exponent - 1) * std::log(2.0);
 791
 792     CompareSettings settings{ Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
 793                               ulpTol_,
 794                               absTol_,
 795                               MatchRule::Normal };
 796     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp<MathOptimization::Unsafe>, settings);
 797 }
 798
 799 TEST_F(SimdMathTest, pow)
 800 {
 801     // We already test the log2/exp2 components of pow() extensively above, and it's a very
 802     // simple single-line function, so here we just test a handful of values to catch typos
 803     // and then some special values.
 804
 805     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, c3), std::pow(c1, c4), std::pow(c2, c5)),
 806                               pow(rSimd_c0c1c2, rSimd_c3c4c5));
 807
 808     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, -c3), std::pow(c1, -c0), std::pow(c2, -c4)),
 809                               pow(rSimd_c0c1c2, rSimd_m3m0m4));
 810
 811     // 0^0 = 1 , 0^c1=0, -c1^0=1
 812     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(1.0, 0.0, 1.0),
 813                               pow(setSimdRealFrom3R(0, 0.0, -c1), setSimdRealFrom3R(0.0, c1, 0.0)));
 814 }
 815
 816 TEST_F(SimdMathTest, powUnsafe)
 817 {
 818     // We already test the log2/exp2 components of pow() extensively above, and it's a very
 819     // simple single-line function, so here we just test a handful of values to catch typos
 820     // and then some special values.
 821
 822     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, c3), std::pow(c1, c4), std::pow(c2, c5)),
 823                               pow<MathOptimization::Unsafe>(rSimd_c0c1c2, rSimd_c3c4c5));
 824
 825     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, -c3), std::pow(c1, -c0), std::pow(c2, -c4)),
 826                               pow<MathOptimization::Unsafe>(rSimd_c0c1c2, rSimd_m3m0m4));
 827 }
 828
 829 /*! \brief Function wrapper for erf(x), with argument/return in default Gromacs precision.
 830  *
 831  * \note Single-precision erf() in some libraries can be slightly lower precision
 832  * than the SIMD flavor, so we use a cast to force double precision for reference.
 833  */
 834 real refErf(real x)
 835 {
 836     return std::erf(static_cast<double>(x));
 837 }
 838
 839 TEST_F(SimdMathTest, erf)
 840 {
 841     CompareSettings settings{ Range(-9, 9), ulpTol_, std::numeric_limits<real>::min(), MatchRule::Normal };
 842     GMX_EXPECT_SIMD_FUNC_NEAR(refErf, erf, settings);
 843 }
 844
 845 /*! \brief Function wrapper for erfc(x), with argument/return in default Gromacs precision.
 846  *
 847  * \note Single-precision erfc() in some libraries can be slightly lower precision
 848  * than the SIMD flavor, so we use a cast to force double precision for reference.
 849  */
 850 real refErfc(real x)
 851 {
 852     return std::erfc(static_cast<double>(x));
 853 }
 854
 855 TEST_F(SimdMathTest, erfc)
 856 {
 857     // Our erfc algorithm has 4 ulp accuracy, so relax tolerance a bit to 4*ulpTol
 858     CompareSettings settings{ Range(-9, 9), 4 * ulpTol_, std::numeric_limits<real>::min(), MatchRule::Normal };
 859     GMX_EXPECT_SIMD_FUNC_NEAR(refErfc, erfc, settings);
 860 }
 861
 862 TEST_F(SimdMathTest, sin)
 863 {
 864     CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
 865     GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sin, settings);
 866
 867     // Range reduction leads to accuracy loss, so we might want higher tolerance here
 868     settings = { Range(-10000, 10000), 2 * ulpTol_, absTol_, MatchRule::Normal };
 869     GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sin, settings);
 870 }
 871
 872 TEST_F(SimdMathTest, cos)
 873 {
 874     CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
 875     GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cos, settings);
 876
 877     // Range reduction leads to accuracy loss, so we might want higher tolerance here
 878     settings = { Range(-10000, 10000), 2 * ulpTol_, absTol_, MatchRule::Normal };
 879     GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cos, settings);
 880 }
 881
 882 TEST_F(SimdMathTest, tan)
 883 {
 884     // Tan(x) is a little sensitive due to the division in the algorithm.
 885     // Rather than using lots of extra FP operations, we accept the algorithm
 886     // presently only achieves a ~3 ulp error and use the medium tolerance.
 887     CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
 888     GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tan, settings);
 889
 890     // Range reduction leads to accuracy loss, so we might want higher tolerance here
 891     settings = { Range(-10000, 10000), 2 * ulpTol_, absTol_, MatchRule::Normal };
 892     GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tan, settings);
 893 }
 894
 895 TEST_F(SimdMathTest, asin)
 896 {
 897     // Our present asin(x) algorithm achieves 2-3 ulp accuracy
 898     CompareSettings settings{ Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal };
 899     GMX_EXPECT_SIMD_FUNC_NEAR(std::asin, asin, settings);
 900 }
 901
 902 TEST_F(SimdMathTest, acos)
 903 {
 904     // Our present acos(x) algorithm achieves 2-3 ulp accuracy
 905     CompareSettings settings{ Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal };
 906     GMX_EXPECT_SIMD_FUNC_NEAR(std::acos, acos, settings);
 907 }
 908
 909 TEST_F(SimdMathTest, atan)
 910 {
 911     // Our present atan(x) algorithm achieves 1 ulp accuracy
 912     CompareSettings settings{ Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal };
 913     GMX_EXPECT_SIMD_FUNC_NEAR(std::atan, atan, settings);
 914 }
 915
 916 TEST_F(SimdMathTest, atan2)
 917 {
 918     // test each quadrant
 919     GMX_EXPECT_SIMD_REAL_NEAR(
 920             setSimdRealFrom3R(std::atan2(c0, c3), std::atan2(c1, c4), std::atan2(c2, c5)),
 921             atan2(rSimd_c0c1c2, rSimd_c3c4c5));
 922     GMX_EXPECT_SIMD_REAL_NEAR(
 923             setSimdRealFrom3R(std::atan2(-c0, c3), std::atan2(-c1, c4), std::atan2(-c2, c5)),
 924             atan2(rSimd_m0m1m2, rSimd_c3c4c5));
 925     GMX_EXPECT_SIMD_REAL_NEAR(
 926             setSimdRealFrom3R(std::atan2(-c0, -c3), std::atan2(-c1, -c0), std::atan2(-c2, -c4)),
 927             atan2(rSimd_m0m1m2, rSimd_m3m0m4));
 928     GMX_EXPECT_SIMD_REAL_NEAR(
 929             setSimdRealFrom3R(std::atan2(c0, -c3), std::atan2(c1, -c0), std::atan2(c2, -c4)),
 930             atan2(rSimd_c0c1c2, rSimd_m3m0m4));
 931
 932     // cases important for calculating angles
 933     // values on coordinate axes
 934     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(0, c0), std::atan2(0, c1), std::atan2(0, c2)),
 935                               atan2(setZero(), rSimd_c0c1c2));
 936     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(c0, 0), std::atan2(c1, 0), std::atan2(c2, 0)),
 937                               atan2(rSimd_c0c1c2, setZero()));
 938     GMX_EXPECT_SIMD_REAL_NEAR(
 939             setSimdRealFrom3R(std::atan2(0, -c0), std::atan2(0, -c1), std::atan2(0, -c2)),
 940             atan2(setZero(), rSimd_m0m1m2));
 941     GMX_EXPECT_SIMD_REAL_NEAR(
 942             setSimdRealFrom3R(std::atan2(-c0, 0), std::atan2(-c1, 0), std::atan2(-c2, 0)),
 943             atan2(rSimd_m0m1m2, setZero()));
 944     // degenerate value (origin) should return 0.0. At least IBM xlc 13.1.5 gets the reference
 945     // value wrong (-nan) at -O3 optimization, so we compare to the correct value (0.0) instead.
 946     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom1R(0.0), atan2(setSimdRealFrom3R(0.0, 0.0, 0.0), setZero()));
 947 }
 948
 949 /*! \brief Evaluate reference version of PME force correction. */
 950 real refPmeForceCorrection(real x)
 951 {
 952     if (x != 0)
 953     {
 954         real y = std::sqrt(x);
 955         return 2 * std::exp(-x) / (std::sqrt(M_PI) * x) - std::erf(static_cast<double>(y)) / (x * y);
 956     }
 957     else
 958     {
 959         return -4 / (3 * std::sqrt(M_PI));
 960     }
 961 }
 962
 963 // The PME corrections will be added to ~1/r2, so absolute tolerance of EPS is fine.
 964 TEST_F(SimdMathTest, pmeForceCorrection)
 965 {
 966     // Pme correction relative accuracy only needs to be ~1e-6 accuracy single, 1e-10 double
 967     const std::int64_t ulpTol = (GMX_DOUBLE ? 5e-10 : 5e-6) / GMX_REAL_EPS;
 968
 969     CompareSettings settings{ Range(0.15, 4), ulpTol, GMX_REAL_EPS, MatchRule::Normal };
 970     GMX_EXPECT_SIMD_FUNC_NEAR(refPmeForceCorrection, pmeForceCorrection, settings);
 971 }
 972
 973 /*! \brief Evaluate reference version of PME potential correction. */
 974 real refPmePotentialCorrection(real x)
 975 {
 976     real y = std::sqrt(x);
 977     return std::erf(static_cast<double>(y)) / y;
 978 }
 979
 980 // The PME corrections will be added to ~1/r, so absolute tolerance of EPS is fine.
 981 TEST_F(SimdMathTest, pmePotentialCorrection)
 982 {
 983     // Pme correction relative accuracy only needs to be ~1e-6 accuracy single, 1e-10 double
 984     const std::int64_t ulpTol = (GMX_DOUBLE ? 5e-10 : 5e-6) / GMX_REAL_EPS;
 985
 986     CompareSettings settings{ Range(0.15, 4), ulpTol, GMX_REAL_EPS, MatchRule::Normal };
 987     GMX_EXPECT_SIMD_FUNC_NEAR(refPmePotentialCorrection, pmePotentialCorrection, settings);
 988 }
 989
 990 // Functions that only target single accuracy, even for double SIMD data
 991
 992 TEST_F(SimdMathTest, invsqrtSingleAccuracy)
 993 {
 994     // Here we always use float limits, since the lookup is not defined for numbers that
 995     // cannot be represented in single precision.
 996     const real low  = std::numeric_limits<float>::min();
 997     const real high = std::numeric_limits<float>::max();
 998     /* Increase the allowed error by the difference between the actual precision and single */
 999     setUlpTolSingleAccuracy(ulpTol_);
1000
1001     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1002     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, invsqrtSingleAccuracy, settings);
1003 }
1004
1005 /*! \brief Function wrapper to return first result when testing \ref invsqrtPairSingleAccuracy */
1006 SimdReal gmx_simdcall tst_invsqrt_SingleAccuracy_pair0(SimdReal x)
1007 {
1008     SimdReal r0, r1;
1009     invsqrtPairSingleAccuracy(x, x, &r0, &r1);
1010     return r0;
1011 }
1012
1013 /*! \brief Function wrapper to return second result when testing \ref invsqrtPairSingleAccuracy */
1014 SimdReal gmx_simdcall tst_invsqrt_SingleAccuracy_pair1(SimdReal x)
1015 {
1016     SimdReal r0, r1;
1017     invsqrtPairSingleAccuracy(x, x, &r0, &r1);
1018     return r1;
1019 }
1020
1021 TEST_F(SimdMathTest, invsqrtPairSingleAccuracy)
1022 {
1023     // Float limits since lookup is always performed in single
1024     const real low  = std::numeric_limits<float>::min();
1025     const real high = std::numeric_limits<float>::max();
1026     /* Increase the allowed error by the difference between the actual precision and single */
1027     setUlpTolSingleAccuracy(ulpTol_);
1028
1029     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1030     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tst_invsqrt_SingleAccuracy_pair0, settings);
1031     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tst_invsqrt_SingleAccuracy_pair1, settings);
1032 }
1033
1034 TEST_F(SimdMathTest, sqrtSingleAccuracy)
1035 {
1036     // Since the first lookup step is sometimes performed in single precision,
1037     // our SIMD sqrt can only handle single-precision input values, even when
1038     // compiled in double precision - thus we use single precision limits here.
1039
1040     // Scale lowest value by 1+eps, since we will do some arithmetics on this value
1041     const real low = std::numeric_limits<float>::min() * (1.0 + std::numeric_limits<float>::epsilon());
1042     const real      high = std::numeric_limits<float>::max();
1043     CompareSettings settings;
1044
1045     // Increase the allowed error by the difference between the actual precision and single
1046     setUlpTolSingleAccuracy(ulpTol_);
1047
1048     // First test that 0.0 and a few other values works
1049     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(0, std::sqrt(c0), std::sqrt(c1)),
1050                               sqrtSingleAccuracy(setSimdRealFrom3R(0, c0, c1)));
1051
1052 #        if GMX_DOUBLE
1053     // As mentioned above, we cannot guarantee that very small double precision
1054     // input values (below std::numeric_limits<float>::min()) are handled correctly,
1055     // so our implementation will clamp it to zero. In this range we allow either
1056     // the correct value or zero, but it's important that it does not result in NaN or Inf values.
1057     //
1058     // This test range must not be called for single precision, since if we try to divide
1059     // the interval (0.0, low( in npoints we will try to multiply by factors so small that
1060     // they end up being flushed to zero, and the loop would never end.
1061     settings = { Range(0.0, low), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
1062     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrtSingleAccuracy, settings);
1063 #        endif
1064
1065     settings = { Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1066     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrtSingleAccuracy, settings);
1067 }
1068
1069 TEST_F(SimdMathTest, sqrtSingleAccuracyUnsafe)
1070 {
1071     // Test the full range, but stick to float limits since lookup is done in single.
1072     const real low  = std::numeric_limits<float>::min();
1073     const real high = std::numeric_limits<float>::max();
1074
1075     /* Increase the allowed error by the difference between the actual precision and single */
1076     setUlpTolSingleAccuracy(ulpTol_);
1077
1078     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1079     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrtSingleAccuracy<MathOptimization::Unsafe>, settings);
1080 }
1081
1082 TEST_F(SimdMathTest, invSingleAccuracy)
1083 {
1084     // Since the first lookup step is sometimes performed in single precision,
1085     // our SIMD 1/x can only handle single-precision input values, even when
1086     // compiled in double precision.
1087
1088     // Relevant threshold points
1089     const real minSafeFloat = std::numeric_limits<float>::min()
1090                               * 10; // X value guaranteed not to result in Inf intermediates for 1/x calc.
1091     const real maxSafeFloat = std::numeric_limits<float>::max()
1092                               * 0.1; // X value guaranteed not to result in DTZ intermediates for 1/x calc.
1093     // Scale highest value by 1-eps, since we will do some arithmetics on this value
1094     const real maxFloat =
1095             std::numeric_limits<float>::max() * (1.0 - std::numeric_limits<float>::epsilon());
1096     CompareSettings settings;
1097
1098     // Increase the allowed error by the difference between the actual precision and single
1099     setUlpTolSingleAccuracy(ulpTol_);
1100
1101     // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
1102     settings = { Range(-maxFloat, -maxSafeFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
1103     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1104
1105     // Normal checks for x < 0
1106     settings = { Range(-maxSafeFloat, -minSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
1107     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1108
1109     // We do not care about the small range -minSafeFloat < x < +minSafeFloat where the result can be +/- Inf, since we don't require strict IEEE754.
1110
1111     // Normal checks for x > 0
1112     settings = { Range(minSafeFloat, maxSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
1113     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1114
1115     // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
1116     settings = { Range(maxSafeFloat, maxFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
1117     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1118 }
1119
1120 TEST_F(SimdMathTest, cbrtSingleAccuracy)
1121 {
1122     const real low  = -std::numeric_limits<real>::max();
1123     const real high = std::numeric_limits<real>::max();
1124
1125     // Increase the allowed error by the difference between the actual precision and single
1126     setUlpTolSingleAccuracy(ulpTol_);
1127
1128     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1129     GMX_EXPECT_SIMD_FUNC_NEAR(std::cbrt, cbrtSingleAccuracy, settings);
1130 }
1131
1132 TEST_F(SimdMathTest, invcbrtSingleAccuracy)
1133 {
1134     // Increase the allowed error by the difference between the actual precision and single
1135     setUlpTolSingleAccuracy(ulpTol_);
1136
1137     // Negative values first
1138     real low  = -std::numeric_limits<real>::max();
1139     real high = -std::numeric_limits<real>::min();
1140
1141     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1142     GMX_EXPECT_SIMD_FUNC_NEAR(refInvCbrt, invcbrtSingleAccuracy, settings);
1143
1144     // Positive values
1145     low      = std::numeric_limits<real>::min();
1146     high     = std::numeric_limits<real>::max();
1147     settings = { Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1148     GMX_EXPECT_SIMD_FUNC_NEAR(refInvCbrt, invcbrtSingleAccuracy, settings);
1149 }
1150
1151 TEST_F(SimdMathTest, log2SingleAccuracy)
1152 {
1153     const real low  = std::numeric_limits<real>::min();
1154     const real high = std::numeric_limits<real>::max();
1155
1156     // Increase the allowed error by the difference between the actual precision and single
1157     setUlpTolSingleAccuracy(ulpTol_);
1158
1159     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1160     GMX_EXPECT_SIMD_FUNC_NEAR(std::log2, log2SingleAccuracy, settings);
1161 }
1162
1163 TEST_F(SimdMathTest, logSingleAccuracy)
1164 {
1165     const real low  = std::numeric_limits<real>::min();
1166     const real high = std::numeric_limits<real>::max();
1167
1168     // Increase the allowed error by the difference between the actual precision and single
1169     setUlpTolSingleAccuracy(ulpTol_);
1170
1171     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1172     GMX_EXPECT_SIMD_FUNC_NEAR(std::log, logSingleAccuracy, settings);
1173 }
1174
1175 TEST_F(SimdMathTest, exp2SingleAccuracy)
1176 {
1177     // Relevant threshold points - float limits since we only target single accuracy
1178     constexpr real lowestReal = -std::numeric_limits<real>::max();
1179     constexpr real lowestRealThatProducesNormal =
1180             std::numeric_limits<real>::min_exponent
1181             - 1; // adding the significant corresponds to one more unit in exponent
1182     constexpr real lowestRealThatProducesDenormal =
1183             lowestRealThatProducesNormal
1184             - std::numeric_limits<real>::digits; // digits refer to bits in significand, so 24/53 for float/double
1185     constexpr real highestRealThatProducesNormal =
1186             std::numeric_limits<real>::max_exponent
1187             - 1; // adding the significant corresponds to one more unit in exponent
1188     CompareSettings settings;
1189
1190     // Increase the allowed error by the difference between the actual precision and single
1191     setUlpTolSingleAccuracy(ulpTol_);
1192
1193     // Below subnormal range all results should be zero
1194     settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal };
1195     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2SingleAccuracy, settings);
1196
1197     // Subnormal range, require matching, but DTZ is fine
1198     settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal),
1199                  ulpTol_,
1200                  absTol_,
1201                  MatchRule::Dtz };
1202     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2SingleAccuracy, settings);
1203
1204     // Normal range, standard result expected
1205     settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
1206                  ulpTol_,
1207                  absTol_,
1208                  MatchRule::Normal };
1209     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2SingleAccuracy, settings);
1210 }
1211
1212 TEST_F(SimdMathTest, exp2SingleAccuracyUnsafe)
1213 {
1214     // The unsafe version is only defined in the normal range
1215     constexpr real lowestRealThatProducesNormal =
1216             std::numeric_limits<real>::min_exponent
1217             - 1; // adding the significant corresponds to one more unit in exponent
1218     constexpr real highestRealThatProducesNormal =
1219             std::numeric_limits<real>::max_exponent
1220             - 1; // adding the significant corresponds to one more unit in exponent
1221
1222     /* Increase the allowed error by the difference between the actual precision and single */
1223     setUlpTolSingleAccuracy(ulpTol_);
1224
1225     CompareSettings settings{ Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
1226                               ulpTol_,
1227                               absTol_,
1228                               MatchRule::Normal };
1229     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2SingleAccuracy<MathOptimization::Unsafe>, settings);
1230 }
1231
1232 TEST_F(SimdMathTest, expSingleAccuracy)
1233 {
1234     // See threshold point comments in normal exp() test
1235     const real lowestReal = -std::numeric_limits<real>::max();
1236     // In theory the smallest value should be (min_exponent-1)*log(2), but rounding after the multiplication will cause this
1237     // value to be a single ulp too low. This might cause failed tests on CPUs that use different DTZ modes for SIMD vs.
1238     // non-SIMD arithmetics (e.g. ARM v7), so multiply by (1.0-eps) to increase it by a single ulp.
1239     const real lowestRealThatProducesNormal = (std::numeric_limits<real>::min_exponent - 1)
1240                                               * std::log(2.0)
1241                                               * (1.0 - std::numeric_limits<real>::epsilon());
1242     const real lowestRealThatProducesDenormal =
1243             lowestRealThatProducesNormal - std::numeric_limits<real>::digits * std::log(2.0);
1244     const real highestRealThatProducesNormal =
1245             (std::numeric_limits<real>::max_exponent - 1) * std::log(2.0);
1246     CompareSettings settings;
1247
1248     // Increase the allowed error by the difference between the actual precision and single
1249     setUlpTolSingleAccuracy(ulpTol_);
1250
1251     // Below subnormal range all results should be zero (so, match the reference)
1252     settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal };
1253     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, expSingleAccuracy, settings);
1254
1255     // Subnormal range, require matching, but DTZ is fine
1256     settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal),
1257                  ulpTol_,
1258                  absTol_,
1259                  MatchRule::Dtz };
1260     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, expSingleAccuracy, settings);
1261
1262     // Normal range, standard result expected
1263     settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
1264                  ulpTol_,
1265                  absTol_,
1266                  MatchRule::Normal };
1267     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, expSingleAccuracy, settings);
1268 }
1269
1270 TEST_F(SimdMathTest, expSingleAccuracyUnsafe)
1271 {
1272     // See test of exp() for comments about test ranges
1273     const real lowestRealThatProducesNormal = (std::numeric_limits<real>::min_exponent - 1)
1274                                               * std::log(2.0)
1275                                               * (1 - std::numeric_limits<real>::epsilon());
1276     const real highestRealThatProducesNormal =
1277             (std::numeric_limits<real>::max_exponent - 1) * std::log(2.0);
1278
1279     // Increase the allowed error by the difference between the actual precision and single
1280     setUlpTolSingleAccuracy(ulpTol_);
1281
1282     CompareSettings settings{ Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
1283                               ulpTol_,
1284                               absTol_,
1285                               MatchRule::Normal };
1286     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, expSingleAccuracy<MathOptimization::Unsafe>, settings);
1287 }
1288
1289 TEST_F(SimdMathTest, powSingleAccuracy)
1290 {
1291     // We already test the log2/exp2 components of pow() extensively above, and it's a very
1292     // simple single-line function, so here we just test a handful of values to catch typos
1293     // and then some special values.
1294
1295     // Increase the allowed error by the difference between the actual precision and single
1296     setUlpTolSingleAccuracy(ulpTol_);
1297
1298     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, c3), std::pow(c1, c4), std::pow(c2, c5)),
1299                               powSingleAccuracy(rSimd_c0c1c2, rSimd_c3c4c5));
1300
1301     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, -c3), std::pow(c1, -c0), std::pow(c2, -c4)),
1302                               powSingleAccuracy(rSimd_c0c1c2, rSimd_m3m0m4));
1303
1304     // 0^0 = 1 , 0^c1=0, -c1^0=1
1305     GMX_EXPECT_SIMD_REAL_NEAR(
1306             setSimdRealFrom3R(1.0, 0.0, 1.0),
1307             powSingleAccuracy(setSimdRealFrom3R(0, 0.0, -c1), setSimdRealFrom3R(0.0, c1, 0.0)));
1308 }
1309
1310 TEST_F(SimdMathTest, powSingleAccuracyUnsafe)
1311 {
1312     // We already test the log2/exp2 components of pow() extensively above, and it's a very
1313     // simple single-line function, so here we just test a handful of values to catch typos
1314     // and then some special values.
1315
1316     // Increase the allowed error by the difference between the actual precision and single
1317     setUlpTolSingleAccuracy(ulpTol_);
1318
1319     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, c3), std::pow(c1, c4), std::pow(c2, c5)),
1320                               powSingleAccuracy<MathOptimization::Unsafe>(rSimd_c0c1c2, rSimd_c3c4c5));
1321
1322     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, -c3), std::pow(c1, -c0), std::pow(c2, -c4)),
1323                               powSingleAccuracy<MathOptimization::Unsafe>(rSimd_c0c1c2, rSimd_m3m0m4));
1324 }
1325
1326 TEST_F(SimdMathTest, erfSingleAccuracy)
1327 {
1328     // Increase the allowed error by the difference between the actual precision and single
1329     setUlpTolSingleAccuracy(ulpTol_);
1330
1331     CompareSettings settings{ Range(-9, 9), ulpTol_, GMX_REAL_MIN, MatchRule::Normal };
1332     GMX_EXPECT_SIMD_FUNC_NEAR(refErf, erfSingleAccuracy, settings);
1333 }
1334
1335 TEST_F(SimdMathTest, erfcSingleAccuracy)
1336 {
1337     // Increase the allowed error by the difference between the actual precision and single
1338     setUlpTolSingleAccuracy(ulpTol_);
1339
1340     // Our erfc algorithm has 4 ulp accuracy, so relax tolerance a bit
1341     CompareSettings settings{ Range(-9, 9), 4 * ulpTol_, GMX_REAL_MIN, MatchRule::Normal };
1342     GMX_EXPECT_SIMD_FUNC_NEAR(refErfc, erfcSingleAccuracy, settings);
1343 }
1344
1345
1346 TEST_F(SimdMathTest, sinSingleAccuracy)
1347 {
1348     /* Increase the allowed error by the difference between the actual precision and single */
1349     setUlpTolSingleAccuracy(ulpTol_);
1350
1351     CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
1352     GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sinSingleAccuracy, settings);
1353
1354     // Range reduction leads to accuracy loss, so we might want higher tolerance here
1355     settings = { Range(-10000, 10000), 2 * ulpTol_, absTol_, MatchRule::Normal };
1356     GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sinSingleAccuracy, settings);
1357 }
1358
1359 TEST_F(SimdMathTest, cosSingleAccuracy)
1360 {
1361     /* Increase the allowed error by the difference between the actual precision and single */
1362     setUlpTolSingleAccuracy(ulpTol_);
1363
1364     CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
1365     GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cosSingleAccuracy, settings);
1366
1367     // Range reduction leads to accuracy loss, so we might want higher tolerance here
1368     settings = { Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal };
1369     GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cosSingleAccuracy, settings);
1370 }
1371
1372 TEST_F(SimdMathTest, tanSingleAccuracy)
1373 {
1374     /* Increase the allowed error by the difference between the actual precision and single */
1375     setUlpTolSingleAccuracy(ulpTol_);
1376
1377     // Tan(x) is a little sensitive due to the division in the algorithm.
1378     // Rather than using lots of extra FP operations, we accept the algorithm
1379     // presently only achieves a ~3 ulp error and use the medium tolerance.
1380     CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
1381     GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tanSingleAccuracy, settings);
1382
1383     // Range reduction leads to accuracy loss, so we might want higher tolerance here
1384     settings = { Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal };
1385     GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tanSingleAccuracy, settings);
1386 }
1387
1388 TEST_F(SimdMathTest, asinSingleAccuracy)
1389 {
1390     /* Increase the allowed error by the difference between the actual precision and single */
1391     setUlpTolSingleAccuracy(ulpTol_);
1392
1393     // Our present asin(x) algorithm achieves 2-3 ulp accuracy
1394     CompareSettings settings{ Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal };
1395     GMX_EXPECT_SIMD_FUNC_NEAR(std::asin, asinSingleAccuracy, settings);
1396 }
1397
1398 TEST_F(SimdMathTest, acosSingleAccuracy)
1399 {
1400     /* Increase the allowed error by the difference between the actual precision and single */
1401     setUlpTolSingleAccuracy(ulpTol_);
1402
1403     // Our present acos(x) algorithm achieves 2-3 ulp accuracy
1404     CompareSettings settings{ Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal };
1405     GMX_EXPECT_SIMD_FUNC_NEAR(std::acos, acosSingleAccuracy, settings);
1406 }
1407
1408 TEST_F(SimdMathTest, atanSingleAccuracy)
1409 {
1410     /* Increase the allowed error by the difference between the actual precision and single */
1411     setUlpTolSingleAccuracy(ulpTol_);
1412
1413     // Our present atan(x) algorithm achieves 1 ulp accuracy
1414     CompareSettings settings{ Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal };
1415     GMX_EXPECT_SIMD_FUNC_NEAR(std::atan, atanSingleAccuracy, settings);
1416 }
1417
1418 TEST_F(SimdMathTest, atan2SingleAccuracy)
1419 {
1420     /* Increase the allowed error by the difference between the actual precision and single */
1421     setUlpTolSingleAccuracy(ulpTol_);
1422
1423     // test each quadrant
1424     GMX_EXPECT_SIMD_REAL_NEAR(
1425             setSimdRealFrom3R(std::atan2(c0, c3), std::atan2(c1, c4), std::atan2(c2, c5)),
1426             atan2SingleAccuracy(rSimd_c0c1c2, rSimd_c3c4c5));
1427     GMX_EXPECT_SIMD_REAL_NEAR(
1428             setSimdRealFrom3R(std::atan2(-c0, c3), std::atan2(-c1, c4), std::atan2(-c2, c5)),
1429             atan2SingleAccuracy(rSimd_m0m1m2, rSimd_c3c4c5));
1430     GMX_EXPECT_SIMD_REAL_NEAR(
1431             setSimdRealFrom3R(std::atan2(-c0, -c3), std::atan2(-c1, -c0), std::atan2(-c2, -c4)),
1432             atan2SingleAccuracy(rSimd_m0m1m2, rSimd_m3m0m4));
1433     GMX_EXPECT_SIMD_REAL_NEAR(
1434             setSimdRealFrom3R(std::atan2(c0, -c3), std::atan2(c1, -c0), std::atan2(c2, -c4)),
1435             atan2SingleAccuracy(rSimd_c0c1c2, rSimd_m3m0m4));
1436     // cases important for calculating angles
1437     // values on coordinate axes
1438     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(0, c0), std::atan2(0, c1), std::atan2(0, c2)),
1439                               atan2SingleAccuracy(setZero(), rSimd_c0c1c2));
1440     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(c0, 0), std::atan2(c1, 0), std::atan2(c2, 0)),
1441                               atan2SingleAccuracy(rSimd_c0c1c2, setZero()));
1442     GMX_EXPECT_SIMD_REAL_NEAR(
1443             setSimdRealFrom3R(std::atan2(0, -c0), std::atan2(0, -c1), std::atan2(0, -c2)),
1444             atan2SingleAccuracy(setZero(), rSimd_m0m1m2));
1445     GMX_EXPECT_SIMD_REAL_NEAR(
1446             setSimdRealFrom3R(std::atan2(-c0, 0), std::atan2(-c1, 0), std::atan2(-c2, 0)),
1447             atan2SingleAccuracy(rSimd_m0m1m2, setZero()));
1448
1449     // degenerate value (origin) should return 0.0. At least IBM xlc 13.1.5 gets the reference
1450     // value wrong (-nan) at -O3 optimization, so we compare to the correct value (0.0) instead.
1451     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom1R(0.0),
1452                               atan2SingleAccuracy(setSimdRealFrom3R(0.0, 0.0, 0.0), setZero()));
1453 }
1454
1455 TEST_F(SimdMathTest, pmeForceCorrectionSingleAccuracy)
1456 {
1457     // The PME corrections will be added to ~1/r2, so absolute tolerance of EPS is fine.
1458     // Pme correction only needs to be ~1e-6 accuracy single.
1459     // Then increase the allowed error by the difference between the actual precision and single.
1460     setUlpTolSingleAccuracy(std::int64_t(5e-6 / GMX_FLOAT_EPS));
1461
1462     CompareSettings settings{ Range(0.15, 4), ulpTol_, GMX_FLOAT_EPS, MatchRule::Normal };
1463     GMX_EXPECT_SIMD_FUNC_NEAR(refPmeForceCorrection, pmeForceCorrectionSingleAccuracy, settings);
1464 }
1465
1466 TEST_F(SimdMathTest, pmePotentialCorrectionSingleAccuracy)
1467 {
1468     // The PME corrections will be added to ~1/r, so absolute tolerance of EPS is fine.
1469     // Pme correction only needs to be ~1e-6 accuracy single.
1470     // Then increase the allowed error by the difference between the actual precision and single.
1471     setUlpTolSingleAccuracy(std::int64_t(5e-6 / GMX_FLOAT_EPS));
1472
1473     CompareSettings settings{ Range(0.15, 4), ulpTol_, GMX_FLOAT_EPS, MatchRule::Normal };
1474     GMX_EXPECT_SIMD_FUNC_NEAR(refPmePotentialCorrection, pmePotentialCorrectionSingleAccuracy, settings);
1475 }
1476
1477 } // namespace
1478
1479 #    endif // GMX_SIMD_HAVE_REAL
1480
1481 /*! \} */
1482 /*! \endcond */
1483
1484 } // namespace test
1485 } // namespace gmx
1486
1487 #endif // GMX_SIMD