src/gromacs/simd/tests/simd_math.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2017,2018,2019,2020,2021, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include "gmxpre.h"
  36
  37 #include "gromacs/simd/simd_math.h"
  38
  39 #include "config.h"
  40
  41 #include <cmath>
  42 #include <cstdint>
  43
  44 #include <map>
  45 #include <utility>
  46 #include <vector>
  47
  48 #include "gromacs/math/utilities.h"
  49 #include "gromacs/options/basicoptions.h"
  50 #include "gromacs/simd/simd.h"
  51
  52 #include "testutils/refdata.h"
  53 #include "testutils/testasserts.h"
  54
  55 #include "simd.h"
  56
  57 #if GMX_SIMD
  58
  59 namespace gmx
  60 {
  61 namespace test
  62 {
  63
  64 /*! \cond internal */
  65 /*! \addtogroup module_simd */
  66 /*! \{ */
  67
  68 #    if GMX_SIMD_HAVE_REAL
  69
  70 class SimdMathTest : public SimdTest
  71 {
  72 public:
  73     /*! \brief Type for half-open intervals specifying test ranges */
  74     typedef std::pair<real, real> Range;
  75
  76     /*! \brief Control what is considered matching values
  77      *
  78      * Normal simply means that we request the values to be equal
  79      * to within the specified tolerance.
  80      * However, there are also two more cases that are special:
  81      *
  82      * - Even if we only care about normal (i.e., not denormal) values, some math
  83      *   libraries might clamp the value to zero, which means our SIMD output
  84      *   might not match their values. By using MatchRule::Dtz, we will consider
  85      *   all values both from the reference and test functions that are within the
  86      *   requested ulp tolerance of a denormal number to be equivalent to 0.0.
  87      * - For some older architectures without fused multiply-add units (e.g. x86 SSE2),
  88      *   we might end up clamping the results to zero just before reaching
  89      *   denormal output, since the intermediate results e.g. in polynomial
  90      *   approximations can be smaller than the final one. We often simply don't
  91      *   care about those values, and then one can use
  92      *   MatchRule::ReferenceOrZero to allow the test value to either match
  93      *   the reference or be zero.
  94      */
  95     enum class MatchRule
  96     {
  97         Normal, //!< Match function values
  98         Dtz, //!< Match function values after setting denormals to zero both in test and reference
  99         ReferenceOrZero, //!< Test values can either match reference or be zero
 100     };
 101
 102     const std::map<MatchRule, std::string> matchRuleNames_ = {
 103         { MatchRule::Normal, "Test should match reference." },
 104         { MatchRule::Dtz, "Test should match reference, with denormals treated as 0.0." },
 105         { MatchRule::ReferenceOrZero, "Test should match reference or 0.0." }
 106     };
 107
 108     /*! \brief Settings used for simd math function comparisons */
 109     struct CompareSettings
 110     {
 111         Range        range;     //!< Range over which to test function
 112         std::int64_t ulpTol;    //!< Ulp tolerance
 113         real         absTol;    //!< Absolute tolerance
 114         MatchRule    matchRule; //!< Decide what we consider a match
 115     };
 116
 117     ::testing::AssertionResult compareSimdMathFunction(const char* refFuncExpr,
 118                                                        const char* simdFuncExpr,
 119                                                        const char* compareSettingsExpr,
 120                                                        real        refFunc(real x),
 121                                                        SimdReal gmx_simdcall  simdFunc(SimdReal x),
 122                                                        const CompareSettings& compareSettings);
 123
 124     /*! \brief Generate test point vector
 125      *
 126      *  \param range  The test interval, half open. Upper limit is not included.
 127      *                Pass by value, since we need to modify in method anyway.
 128      *  \param points Number of points to generate. This might be increased
 129      *                slightly to account both for extra special values like 0.0
 130      *                and the SIMD width.
 131      *
 132      * This routine generates a vector with test points separated by constant
 133      * multiplicative factors, based on the range and number of points in the
 134      * class. If the range includes both negative and positive values, points
 135      * will be generated separately for the negative/positive intervals down
 136      * to the smallest real number that can be represented, and we also include
 137      * 0.0 explicitly.
 138      *
 139      * This is highly useful for large test ranges. For example, with a linear
 140      * 1000-point division of the range (1,1e10) the first three values to test
 141      * would be 1, 10000000.999, and 20000000.998, etc. For large values we would
 142      * commonly hit the point where adding the small delta has no effect due to
 143      * limited numerical precision.
 144      * When we instead use this routine, the values will be 1, 1.0239, 1.0471, etc.
 145      * This will spread the entropy over all bits in the IEEE754 representation,
 146      * and be a much better test of all potential input values.
 147      *
 148      *  \note We do not use the static variable s_nPoints in the parent class
 149      *        to avoid altering any value the user has set on the command line; since
 150      *        it's a static member, changing it would have permanent effect.
 151      */
 152     static std::vector<real> generateTestPoints(Range range, std::size_t points);
 153
 154     /*! \brief Test routine for the test point vector generation
 155      */
 156     static void generateTestPointsTest();
 157 };
 158
 159 /*! \brief Test approximate equality of SIMD vs reference version of a function.
 160  *
 161  * This macro takes vanilla C and SIMD flavors of a function and tests it with
 162  * the number of points, range, and tolerances specified by the test fixture class.
 163  *
 164  * The third option controls the range, tolerances, and match settings.
 165  */
 166 #        define GMX_EXPECT_SIMD_FUNC_NEAR(refFunc, tstFunc, compareSettings) \
 167             EXPECT_PRED_FORMAT3(compareSimdMathFunction, refFunc, tstFunc, compareSettings)
 168
 169 std::vector<real> SimdMathTest::generateTestPoints(Range inputRange, std::size_t inputPoints)
 170 {
 171
 172     std::vector<real> testPoints;
 173     testPoints.reserve(inputPoints);
 174
 175     GMX_RELEASE_ASSERT(inputRange.first < inputRange.second,
 176                        "The start of the interval must come before the end");
 177
 178     std::vector<Range> testRanges;
 179
 180     if (inputRange.first < 0 && inputRange.second > 0)
 181     {
 182         testRanges.emplace_back(Range({ inputRange.first, -std::numeric_limits<real>::min() }));
 183         testRanges.emplace_back(Range({ 0.0, inputRange.second }));
 184     }
 185     else
 186     {
 187         if (inputRange.second == 0)
 188         {
 189             inputRange.second = -std::numeric_limits<real>::min();
 190             inputRange.first  = std::min(inputRange.first, inputRange.second);
 191         }
 192         testRanges.push_back(inputRange);
 193     }
 194
 195     for (Range& range : testRanges)
 196     {
 197         std::size_t points = inputPoints / testRanges.size();
 198
 199         // The value 0 is special, and can only occur at the start of
 200         // the interval after the corrections outside this loop.
 201         // Add it explicitly, and adjust the interval to continue
 202         // at the first valid non-zero positive number.
 203         if (range.first == 0)
 204         {
 205             testPoints.push_back(0.0);
 206             range.first = std::numeric_limits<real>::min();
 207             points--; // Used one point
 208         }
 209
 210         union {
 211             real                                                                               r;
 212             std::conditional<sizeof(real) == sizeof(double), std::int64_t, std::int32_t>::type i;
 213         } low, high, x;
 214
 215         low.r  = range.first;
 216         high.r = range.second;
 217
 218         // IEEE754 floating-point numbers have the cool property that for any range of
 219         // constant sign, for all non-zero numbers a constant (i.e., linear) difference
 220         // in the bitwise representation corresponds to a constant multiplicative factor.
 221         //
 222         // Divide the ulp difference evenly
 223         std::int64_t ulpDiff = high.i - low.i;
 224         // dividend and divisor must both be signed types
 225         std::int64_t ulpDelta    = ulpDiff / static_cast<std::int64_t>(points);
 226         std::int64_t minUlpDelta = (ulpDiff > 0) ? 1 : -1;
 227
 228         if (ulpDelta == 0)
 229         {
 230             // Very short interval or very many points caused round-to-zero.
 231             // Select the smallest possible change, which is one ulp (with correct sign)
 232             ulpDelta = minUlpDelta;
 233             points   = std::abs(ulpDiff);
 234         }
 235
 236         x.r = low.r;
 237         // Use an index-based loop to avoid floating-point comparisons with
 238         // values that might have overflowed. Save one point for the very last
 239         // bitwise value that is part of the interval
 240         for (std::size_t i = 0; i < points - 1; i++)
 241         {
 242             testPoints.push_back(x.r);
 243             x.i += ulpDelta;
 244         }
 245
 246         // Make sure we test the very last point that is inside the interval
 247         x.r = high.r;
 248         x.i -= minUlpDelta;
 249         testPoints.push_back(x.r);
 250     }
 251     return testPoints;
 252 }
 253
 254 /*! \brief Implementation routine to compare SIMD vs reference functions.
 255  *
 256  * \param refFuncExpr         Description of reference function expression
 257  * \param simdFuncExpr        Description of SIMD function expression
 258  * \param compareSettingsExpr Description of compareSettings
 259  * \param refFunc             Reference math function pointer
 260  * \param simdFunc            SIMD math function pointer
 261  * \param compareSettings     Structure with the range, tolerances, and
 262  *                            matching rules to use for the comparison.
 263  *
 264  * \note You should not never call this function directly,  but use the
 265  *       macro GMX_EXPECT_SIMD_FUNC_NEAR(refFunc,tstFunc,matchRule) instead.
 266  */
 267 ::testing::AssertionResult SimdMathTest::compareSimdMathFunction(const char* refFuncExpr,
 268                                                                  const char* simdFuncExpr,
 269                                                                  const char gmx_unused* compareSettingsExpr,
 270                                                                  real refFunc(real x),
 271                                                                  SimdReal gmx_simdcall simdFunc(SimdReal x),
 272                                                                  const CompareSettings& compareSettings)
 273 {
 274     std::vector<real> vx(GMX_SIMD_REAL_WIDTH);
 275     std::vector<real> vref(GMX_SIMD_REAL_WIDTH);
 276     std::vector<real> vtst(GMX_SIMD_REAL_WIDTH);
 277     real              absDiff;
 278     std::int64_t      ulpDiff;
 279     std::int64_t      maxUlpDiff = 0;
 280     real              maxUlpDiffPos;
 281     real              refValMaxUlpDiff, simdValMaxUlpDiff;
 282     const int         niter = s_nPoints / GMX_SIMD_REAL_WIDTH;
 283
 284     union {
 285         real                                                                               r;
 286         std::conditional<sizeof(real) == sizeof(double), std::int64_t, std::int32_t>::type i;
 287     } conv0, conv1;
 288
 289     // Allow zero-size intervals - nothing to test means we succeeded at it
 290     if (compareSettings.range.first == compareSettings.range.second)
 291     {
 292         ::testing::AssertionSuccess();
 293     }
 294
 295     // Calculate the tolerance limit to use for denormals - we want
 296     // values that are within the ulp tolerance of denormals to be considered matching
 297     conv0.r = std::numeric_limits<real>::min();
 298     conv0.i += compareSettings.ulpTol - 1; // min() itself is not denormal, but one ulp larger
 299     const real denormalLimit = conv0.r;
 300
 301     // We want to test as many diverse bit combinations as possible over the range requested,
 302     // and in particular do it evenly spaced in bit-space.
 303     // Due to the way IEEE754 floating-point is represented, that means we should have a
 304     // constant multiplicative factor between adjacent values. This gets a bit complicated
 305     // when we have both positive and negative values, so we offload the generation of the
 306     // specific testing values to a separate routine
 307     std::vector<real> testPoints = generateTestPoints(compareSettings.range, s_nPoints);
 308
 309     size_t pointIndex = 0;
 310
 311     for (int iter = 0; iter < niter; iter++)
 312     {
 313         for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
 314         {
 315             vx[i]   = testPoints[pointIndex];
 316             vref[i] = refFunc(vx[i]);
 317             // If we reach the end of the points, stop increasing index so we pad with
 318             // extra copies of the last element up to the SIMD width
 319             if (pointIndex + 1 < testPoints.size())
 320             {
 321                 pointIndex++;
 322             }
 323         }
 324         vtst = simdReal2Vector(simdFunc(vector2SimdReal(vx)));
 325
 326         bool absOk = true, signOk = true;
 327         for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
 328         {
 329             if (compareSettings.matchRule == MatchRule::Dtz && std::abs(vref[i]) <= denormalLimit
 330                 && std::abs(vtst[i]) <= denormalLimit)
 331             {
 332                 continue;
 333             }
 334
 335             if (compareSettings.matchRule == MatchRule::ReferenceOrZero && vtst[i] == 0.0)
 336             {
 337                 // If we accept 0.0 for the test function, we can continue to the next loop iteration.
 338                 continue;
 339             }
 340
 341             absDiff = std::abs(vref[i] - vtst[i]);
 342             absOk   = absOk && (absDiff < compareSettings.absTol);
 343             signOk  = signOk && ((vref[i] >= 0 && vtst[i] >= 0) || (vref[i] <= 0 && vtst[i] <= 0));
 344
 345             if (absDiff >= compareSettings.absTol)
 346             {
 347                 /* We replicate the trivial ulp differences comparison here rather than
 348                  * calling the lower-level routine for comparing them, since this enables
 349                  * us to run through the entire test range and report the largest deviation
 350                  * without lots of extra glue routines.
 351                  */
 352                 conv0.r = vref[i];
 353                 conv1.r = vtst[i];
 354                 ulpDiff = llabs(conv0.i - conv1.i);
 355                 if (ulpDiff > maxUlpDiff)
 356                 {
 357                     maxUlpDiff        = ulpDiff;
 358                     maxUlpDiffPos     = vx[i];
 359                     refValMaxUlpDiff  = vref[i];
 360                     simdValMaxUlpDiff = vtst[i];
 361                 }
 362             }
 363         }
 364         if ((!absOk) && (!signOk))
 365         {
 366             return ::testing::AssertionFailure()
 367                    << "Failing SIMD math function comparison due to sign differences." << std::endl
 368                    << "Reference function: " << refFuncExpr << std::endl
 369                    << "Simd function:      " << simdFuncExpr << std::endl
 370                    << "Test range is ( " << compareSettings.range.first << " , "
 371                    << compareSettings.range.second << " ) " << std::endl
 372                    << "Match rule: " << matchRuleNames_.at(compareSettings.matchRule) << std::endl
 373                    << "First sign difference around x=" << std::setprecision(20)
 374                    << ::testing::PrintToString(vx) << std::endl
 375                    << "Ref values:  " << std::setprecision(20) << ::testing::PrintToString(vref)
 376                    << std::endl
 377                    << "SIMD values: " << std::setprecision(20) << ::testing::PrintToString(vtst)
 378                    << std::endl;
 379         }
 380     }
 381
 382     GMX_RELEASE_ASSERT(compareSettings.ulpTol >= 0, "Invalid ulp value.");
 383     if (maxUlpDiff <= compareSettings.ulpTol)
 384     {
 385         return ::testing::AssertionSuccess();
 386     }
 387     else
 388     {
 389         return ::testing::AssertionFailure()
 390                << "Failing SIMD math function ulp comparison between " << refFuncExpr << " and "
 391                << simdFuncExpr << std::endl
 392                << "Requested ulp tolerance: " << compareSettings.ulpTol << std::endl
 393                << "Requested abs tolerance: " << compareSettings.absTol << std::endl
 394                << "Match rule: " << matchRuleNames_.at(compareSettings.matchRule) << std::endl
 395                << "Largest Ulp difference occurs for x=" << std::setprecision(20) << maxUlpDiffPos
 396                << std::endl
 397                << "Ref  values: " << std::setprecision(20) << refValMaxUlpDiff << std::endl
 398                << "SIMD values: " << std::setprecision(20) << simdValMaxUlpDiff << std::endl
 399                << "Ulp diff.:   " << std::setprecision(20) << maxUlpDiff << std::endl;
 400     }
 401 }
 402
 403 // Actual routine to generate a small set of test points in current precision. This will
 404 // be called by either the double or single precision test fixture, since we need different
 405 // test names to compare to the right reference data.
 406 void SimdMathTest::generateTestPointsTest()
 407 {
 408     int                             points(10);
 409     gmx::test::TestReferenceData    data;
 410     gmx::test::TestReferenceChecker checker(data.rootChecker());
 411
 412     std::vector<real> result;
 413
 414     result = generateTestPoints(Range(-1e10, -1), points);
 415     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10,-1[");
 416
 417     result = generateTestPoints(Range(-1e10, -1e-10), points);
 418     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10, -1e-10[");
 419
 420     result = generateTestPoints(Range(1, 1e10), points);
 421     checker.checkSequence(result.begin(), result.end(), "Test points for interval [1, 1e10[");
 422
 423     result = generateTestPoints(Range(1e-10, 1e10), points);
 424     checker.checkSequence(result.begin(), result.end(), "Test points for interval [1e-10, 1e10[");
 425
 426     result = generateTestPoints(Range(-1e10, 1e-10), points);
 427     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10, 1e-10[");
 428
 429     result = generateTestPoints(Range(-1e-10, 1e-10), points);
 430     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e-10, 1e-10[");
 431
 432     result = generateTestPoints(Range(-1e-10, 1e10), points);
 433     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e-10, 1e10[");
 434
 435     result = generateTestPoints(Range(-1e10, 1e10), points);
 436     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10, 1e10[");
 437
 438     result = generateTestPoints(Range(-1000, 0), points);
 439     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1000, 0[");
 440
 441     result = generateTestPoints(Range(0, 1000), points);
 442     checker.checkSequence(result.begin(), result.end(), "Test points for interval [0, 1000[");
 443 }
 444
 445 /*! \} */
 446 /*! \endcond */
 447
 448
 449 // Actual math function tests below
 450
 451 /*! \cond internal */
 452 /*! \addtogroup module_simd */
 453 /*! \{ */
 454
 455 namespace
 456 {
 457
 458 // Reference data is selected based on test name, so make the test name precision-dependent
 459 #        if GMX_DOUBLE
 460 TEST_F(SimdMathTest, generateTestPointsDouble)
 461 {
 462     generateTestPointsTest();
 463 }
 464 #        else
 465 TEST_F(SimdMathTest, generateTestPointsFloat)
 466 {
 467     generateTestPointsTest();
 468 }
 469 #        endif
 470
 471 TEST_F(SimdMathTest, copysign)
 472 {
 473     GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(-c0, c1, c2),
 474                             copysign(setSimdRealFrom3R(c0, c1, c2), setSimdRealFrom3R(-c3, c4, 0)));
 475     GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(-c0, c1, c2),
 476                             copysign(setSimdRealFrom3R(-c0, -c1, -c2), setSimdRealFrom3R(-c3, c4, 0)));
 477     GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(c0, -c1, c2),
 478                             copysign(setSimdRealFrom3R(c0, c1, c2), setSimdRealFrom3R(c3, -c4, 0)));
 479     GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(c0, -c1, c2),
 480                             copysign(setSimdRealFrom3R(-c0, -c1, -c2), setSimdRealFrom3R(c3, -c4, 0)));
 481 }
 482
 483 /*! \brief Function wrapper to evaluate reference 1/sqrt(x) */
 484 real refInvsqrt(real x)
 485 {
 486     return 1.0 / std::sqrt(x);
 487 }
 488
 489 TEST_F(SimdMathTest, invsqrt)
 490 {
 491     const real      low  = std::numeric_limits<float>::min();
 492     const real      high = std::numeric_limits<float>::max();
 493     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
 494
 495     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, invsqrt, settings);
 496 }
 497
 498 TEST_F(SimdMathTest, maskzInvsqrt)
 499 {
 500     SimdReal x   = setSimdRealFrom3R(c1, 0.0, c2);
 501     SimdBool m   = (setZero() < x);
 502     SimdReal ref = setSimdRealFrom3R(1.0 / std::sqrt(c1), 0.0, 1.0 / std::sqrt(c2));
 503     GMX_EXPECT_SIMD_REAL_NEAR(ref, maskzInvsqrt(x, m));
 504 }
 505
 506 /*! \brief Function wrapper to return first result when testing \ref invsqrtPair */
 507 SimdReal gmx_simdcall tstInvsqrtPair0(SimdReal x)
 508 {
 509     SimdReal r0, r1;
 510     invsqrtPair(x, x, &r0, &r1);
 511     return r0;
 512 }
 513
 514 /*! \brief Function wrapper to return second result when testing \ref invsqrtPair */
 515 SimdReal gmx_simdcall tstInvsqrtPair1(SimdReal x)
 516 {
 517     SimdReal r0, r1;
 518     invsqrtPair(x, x, &r0, &r1);
 519     return r1;
 520 }
 521
 522 TEST_F(SimdMathTest, invsqrtPair)
 523 {
 524     const real low  = std::numeric_limits<float>::min();
 525     const real high = std::numeric_limits<float>::max();
 526
 527     // Accuracy conversions lose a bit of accuracy compared to all-double,
 528     // so increase the tolerance to 4*ulpTol_
 529     CompareSettings settings{ Range(low, high), 4 * ulpTol_, absTol_, MatchRule::Normal };
 530
 531     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tstInvsqrtPair0, settings);
 532     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tstInvsqrtPair1, settings);
 533 }
 534
 535 /*! \brief Function wrapper to evaluate reference sqrt(x) */
 536 real refSqrt(real x)
 537 {
 538     return std::sqrt(x);
 539 }
 540
 541 TEST_F(SimdMathTest, sqrt)
 542 {
 543     // Since the first lookup step is sometimes performed in single precision,
 544     // our SIMD sqrt can only handle single-precision input values, even when
 545     // compiled in double precision.
 546
 547     const real      minFloat     = std::numeric_limits<float>::min();
 548     const real      minSafeFloat = minFloat * 10;
 549     const real      maxSafeFloat = std::numeric_limits<float>::max() * 0.1;
 550     CompareSettings settings;
 551     // The accuracy conversions lose a bit of extra accuracy compared to
 552     // doing the iterations in all-double.
 553     setUlpTol(4 * ulpTol_);
 554
 555     // First test that 0.0 and a few other values work
 556     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(0, std::sqrt(c1), std::sqrt(c2)),
 557                               sqrt(setSimdRealFrom3R(0, c1, c2)));
 558
 559 #        if GMX_DOUBLE
 560     // As mentioned above, we cannot guarantee that very small double precision
 561     // input values (below std::numeric_limits<float>::min()) are handled correctly,
 562     // so our implementation will clamp it to zero. In this range we allow either
 563     // the correct value or zero, but it's important that it does not result in NaN or Inf values.
 564     //
 565     // This test range must not be called for single precision, since if we try to divide
 566     // the interval (0.0, low( in npoints we will try to multiply by factors so small that
 567     // they end up being flushed to zero, and the loop would never end.
 568     settings = { Range(0.0, minFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
 569     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt, settings);
 570 #        endif
 571
 572     // Next range: Just about minFloat the lookup should always work, but the results
 573     // might be a bit fragile due to issues with the N-R iterations being flushed to zero
 574     // for denormals. We can probably relax the latter in double precision, but since we
 575     // anyway cannot handle numbers that cannot be represented in single it's not worth
 576     // worrying too much about whether we have zero or an exact values around 10^-38....
 577     settings = { Range(minFloat, minSafeFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
 578     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt, settings);
 579
 580     settings = { Range(minSafeFloat, maxSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
 581     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt, settings);
 582 }
 583
 584 TEST_F(SimdMathTest, sqrtUnsafe)
 585 {
 586     const real minSafeFloat = std::numeric_limits<float>::min() * 10;
 587     const real maxSafeFloat = std::numeric_limits<float>::max() * 0.1;
 588
 589     // The accuracy conversions lose a bit of extra accuracy compared to
 590     // doing the iterations in all-double, so we use 4*ulpTol_
 591     setUlpTol(4 * ulpTol_);
 592
 593     CompareSettings settings{ Range(minSafeFloat, maxSafeFloat), 4 * ulpTol_, absTol_, MatchRule::Normal };
 594     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt<MathOptimization::Unsafe>, settings);
 595 }
 596
 597 /*! \brief Function wrapper to evaluate reference 1/x */
 598 real refInv(real x)
 599 {
 600     return 1.0 / x;
 601 }
 602
 603 TEST_F(SimdMathTest, inv)
 604 {
 605     // Since the first lookup step is sometimes performed in single precision,
 606     // our SIMD 1/x can only handle single-precision input values, even when
 607     // compiled in double precision.
 608
 609     // Relevant threshold points
 610     const real minSafeFloat = std::numeric_limits<float>::min()
 611                               * 10; // X value guaranteed not to result in Inf intermediates for 1/x calc.
 612     const real maxSafeFloat = std::numeric_limits<float>::max()
 613                               * 0.1; // X value guaranteed not to result in DTZ intermediates for 1/x calc.
 614     // Scale highest value by 1-eps, since we will do some arithmetics on this value
 615     const real maxFloat =
 616             std::numeric_limits<float>::max() * (1.0 - std::numeric_limits<float>::epsilon());
 617     CompareSettings settings;
 618
 619     // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
 620     settings = { Range(-maxFloat, -maxSafeFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
 621     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
 622
 623     // Normal checks for x < 0
 624     settings = { Range(-maxSafeFloat, -minSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
 625     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
 626
 627     // We do not care about the small range -minSafeFloat < x < +minSafeFloat where the result can be +/- Inf, since we don't require strict IEEE754.
 628
 629     // Normal checks for x > 0
 630     settings = { Range(minSafeFloat, maxSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
 631     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
 632
 633     // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
 634     settings = { Range(maxSafeFloat, maxFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
 635     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
 636 }
 637
 638 TEST_F(SimdMathTest, maskzInv)
 639 {
 640     SimdReal x   = setSimdRealFrom3R(c1, 0.0, c2);
 641     SimdBool m   = (setZero() < x);
 642     SimdReal ref = setSimdRealFrom3R(1.0 / c1, 0.0, 1.0 / c2);
 643     GMX_EXPECT_SIMD_REAL_NEAR(ref, maskzInv(x, m));
 644 }
 645
 646 TEST_F(SimdMathTest, cbrt)
 647 {
 648     const real low  = -std::numeric_limits<real>::max();
 649     const real high = std::numeric_limits<real>::max();
 650
 651     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
 652     GMX_EXPECT_SIMD_FUNC_NEAR(std::cbrt, cbrt, settings);
 653 }
 654
 655 /*! \brief Function wrapper to evaluate reference 1/cbrt(x) */
 656 real refInvCbrt(real x)
 657 {
 658     return 1.0 / std::cbrt(x);
 659 }
 660
 661 TEST_F(SimdMathTest, invcbrt)
 662 {
 663     // Negative values first
 664     real low  = -std::numeric_limits<real>::max();
 665     real high = -std::numeric_limits<real>::min();
 666
 667     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
 668     GMX_EXPECT_SIMD_FUNC_NEAR(refInvCbrt, invcbrt, settings);
 669
 670     // Positive values
 671     low      = std::numeric_limits<real>::min();
 672     high     = std::numeric_limits<real>::max();
 673     settings = { Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
 674     GMX_EXPECT_SIMD_FUNC_NEAR(refInvCbrt, invcbrt, settings);
 675 }
 676
 677 TEST_F(SimdMathTest, log2)
 678 {
 679     const real low  = std::numeric_limits<real>::min();
 680     const real high = std::numeric_limits<real>::max();
 681
 682     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
 683     GMX_EXPECT_SIMD_FUNC_NEAR(std::log2, log2, settings);
 684 }
 685
 686 TEST_F(SimdMathTest, log)
 687 {
 688     const real low  = std::numeric_limits<real>::min();
 689     const real high = std::numeric_limits<real>::max();
 690
 691     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
 692     GMX_EXPECT_SIMD_FUNC_NEAR(std::log, log, settings);
 693 }
 694
 695 TEST_F(SimdMathTest, exp2)
 696 {
 697     // Relevant threshold points
 698     constexpr real lowestReal = -std::numeric_limits<real>::max();
 699     constexpr real lowestRealThatProducesNormal =
 700             std::numeric_limits<real>::min_exponent
 701             - 1; // adding the significant corresponds to one more unit in exponent
 702     constexpr real lowestRealThatProducesDenormal =
 703             lowestRealThatProducesNormal
 704             - std::numeric_limits<real>::digits; // digits refer to bits in significand, so 24/53 for float/double
 705     constexpr real highestRealThatProducesNormal =
 706             std::numeric_limits<real>::max_exponent
 707             - 1; // adding the significant corresponds to one more unit in exponent
 708     CompareSettings settings;
 709
 710     // Below subnormal range all results should be zero (so, match the reference)
 711     settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal };
 712     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2, settings);
 713
 714     // Subnormal range, require matching, but DTZ is fine
 715     settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal),
 716                  ulpTol_,
 717                  absTol_,
 718                  MatchRule::Dtz };
 719     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2, settings);
 720
 721     // Normal range, standard result expected
 722     settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
 723                  ulpTol_,
 724                  absTol_,
 725                  MatchRule::Normal };
 726     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2, settings);
 727 }
 728
 729 TEST_F(SimdMathTest, exp2Unsafe)
 730 {
 731     // The unsafe version is only defined in the normal range
 732     constexpr real lowestRealThatProducesNormal =
 733             std::numeric_limits<real>::min_exponent
 734             - 1; // adding the significant corresponds to one more unit in exponent
 735     constexpr real highestRealThatProducesNormal =
 736             std::numeric_limits<real>::max_exponent
 737             - 1; // adding the significant corresponds to one more unit in exponent
 738
 739     CompareSettings settings{ Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
 740                               ulpTol_,
 741                               absTol_,
 742                               MatchRule::Normal };
 743     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2<MathOptimization::Unsafe>, settings);
 744 }
 745
 746 TEST_F(SimdMathTest, exp)
 747 {
 748     // Relevant threshold points. See the exp2 test for more details about the values; these are
 749     // simply scaled by log(2) due to the difference between exp2 and exp.
 750     const real lowestReal = -std::numeric_limits<real>::max();
 751     // In theory the smallest value should be (min_exponent-1)*log(2), but rounding after the multiplication will cause this
 752     // value to be a single ulp too low. This might cause failed tests on CPUs that use different DTZ modes for SIMD vs.
 753     // non-SIMD arithmetics (e.g. ARM v7), so multiply by (1.0-eps) to increase it by a single ulp.
 754     const real lowestRealThatProducesNormal = (std::numeric_limits<real>::min_exponent - 1)
 755                                               * std::log(2.0)
 756                                               * (1 - std::numeric_limits<real>::epsilon());
 757     const real lowestRealThatProducesDenormal =
 758             lowestRealThatProducesNormal - std::numeric_limits<real>::digits * std::log(2.0);
 759     const real highestRealThatProducesNormal =
 760             (std::numeric_limits<real>::max_exponent - 1) * std::log(2.0);
 761     CompareSettings settings;
 762
 763     // Below subnormal range all results should be zero (so, match the reference)
 764     settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal };
 765     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp, settings);
 766
 767     // Subnormal range, require matching, but DTZ is fine
 768     settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal),
 769                  ulpTol_,
 770                  absTol_,
 771                  MatchRule::Dtz };
 772     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp, settings);
 773
 774     // Normal range, standard result expected
 775     settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
 776                  ulpTol_,
 777                  absTol_,
 778                  MatchRule::Normal };
 779     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp, settings);
 780 }
 781
 782 TEST_F(SimdMathTest, expUnsafe)
 783 {
 784     // See test of exp() for comments about test ranges
 785     const real lowestRealThatProducesNormal = (std::numeric_limits<real>::min_exponent - 1)
 786                                               * std::log(2.0)
 787                                               * (1 - std::numeric_limits<real>::epsilon());
 788     const real highestRealThatProducesNormal =
 789             (std::numeric_limits<real>::max_exponent - 1) * std::log(2.0);
 790
 791     CompareSettings settings{ Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
 792                               ulpTol_,
 793                               absTol_,
 794                               MatchRule::Normal };
 795     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp<MathOptimization::Unsafe>, settings);
 796 }
 797
 798 TEST_F(SimdMathTest, pow)
 799 {
 800     // We already test the log2/exp2 components of pow() extensively above, and it's a very
 801     // simple single-line function, so here we just test a handful of values to catch typos
 802     // and then some special values.
 803
 804     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, c3), std::pow(c1, c4), std::pow(c2, c5)),
 805                               pow(rSimd_c0c1c2, rSimd_c3c4c5));
 806
 807     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, -c3), std::pow(c1, -c0), std::pow(c2, -c4)),
 808                               pow(rSimd_c0c1c2, rSimd_m3m0m4));
 809
 810     // 0^0 = 1 , 0^c1=0, -c1^0=1
 811     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(1.0, 0.0, 1.0),
 812                               pow(setSimdRealFrom3R(0, 0.0, -c1), setSimdRealFrom3R(0.0, c1, 0.0)));
 813 }
 814
 815 TEST_F(SimdMathTest, powUnsafe)
 816 {
 817     // We already test the log2/exp2 components of pow() extensively above, and it's a very
 818     // simple single-line function, so here we just test a handful of values to catch typos
 819     // and then some special values.
 820
 821     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, c3), std::pow(c1, c4), std::pow(c2, c5)),
 822                               pow<MathOptimization::Unsafe>(rSimd_c0c1c2, rSimd_c3c4c5));
 823
 824     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, -c3), std::pow(c1, -c0), std::pow(c2, -c4)),
 825                               pow<MathOptimization::Unsafe>(rSimd_c0c1c2, rSimd_m3m0m4));
 826 }
 827
 828 /*! \brief Function wrapper for erf(x), with argument/return in default Gromacs precision.
 829  *
 830  * \note Single-precision erf() in some libraries can be slightly lower precision
 831  * than the SIMD flavor, so we use a cast to force double precision for reference.
 832  */
 833 real refErf(real x)
 834 {
 835     return std::erf(static_cast<double>(x));
 836 }
 837
 838 TEST_F(SimdMathTest, erf)
 839 {
 840     CompareSettings settings{ Range(-9, 9), ulpTol_, std::numeric_limits<real>::min(), MatchRule::Normal };
 841     GMX_EXPECT_SIMD_FUNC_NEAR(refErf, erf, settings);
 842 }
 843
 844 /*! \brief Function wrapper for erfc(x), with argument/return in default Gromacs precision.
 845  *
 846  * \note Single-precision erfc() in some libraries can be slightly lower precision
 847  * than the SIMD flavor, so we use a cast to force double precision for reference.
 848  */
 849 real refErfc(real x)
 850 {
 851     return std::erfc(static_cast<double>(x));
 852 }
 853
 854 TEST_F(SimdMathTest, erfc)
 855 {
 856     // Our erfc algorithm has 4 ulp accuracy, so relax tolerance a bit to 4*ulpTol
 857     CompareSettings settings{ Range(-9, 9), 4 * ulpTol_, std::numeric_limits<real>::min(), MatchRule::Normal };
 858     GMX_EXPECT_SIMD_FUNC_NEAR(refErfc, erfc, settings);
 859 }
 860
 861 TEST_F(SimdMathTest, sin)
 862 {
 863     CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
 864     GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sin, settings);
 865
 866     // Range reduction leads to accuracy loss, so we might want higher tolerance here
 867     settings = { Range(-10000, 10000), 2 * ulpTol_, absTol_, MatchRule::Normal };
 868     GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sin, settings);
 869 }
 870
 871 TEST_F(SimdMathTest, cos)
 872 {
 873     CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
 874     GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cos, settings);
 875
 876     // Range reduction leads to accuracy loss, so we might want higher tolerance here
 877     settings = { Range(-10000, 10000), 2 * ulpTol_, absTol_, MatchRule::Normal };
 878     GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cos, settings);
 879 }
 880
 881 TEST_F(SimdMathTest, tan)
 882 {
 883     // Tan(x) is a little sensitive due to the division in the algorithm.
 884     // Rather than using lots of extra FP operations, we accept the algorithm
 885     // presently only achieves a ~3 ulp error and use the medium tolerance.
 886     CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
 887     GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tan, settings);
 888
 889     // Range reduction leads to accuracy loss, so we might want higher tolerance here
 890     settings = { Range(-10000, 10000), 2 * ulpTol_, absTol_, MatchRule::Normal };
 891     GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tan, settings);
 892 }
 893
 894 TEST_F(SimdMathTest, asin)
 895 {
 896     // Our present asin(x) algorithm achieves 2-3 ulp accuracy
 897     CompareSettings settings{ Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal };
 898     GMX_EXPECT_SIMD_FUNC_NEAR(std::asin, asin, settings);
 899 }
 900
 901 TEST_F(SimdMathTest, acos)
 902 {
 903     // Our present acos(x) algorithm achieves 2-3 ulp accuracy
 904     CompareSettings settings{ Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal };
 905     GMX_EXPECT_SIMD_FUNC_NEAR(std::acos, acos, settings);
 906 }
 907
 908 TEST_F(SimdMathTest, atan)
 909 {
 910     // Our present atan(x) algorithm achieves 1 ulp accuracy
 911     CompareSettings settings{ Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal };
 912     GMX_EXPECT_SIMD_FUNC_NEAR(std::atan, atan, settings);
 913 }
 914
 915 TEST_F(SimdMathTest, atan2)
 916 {
 917     // test each quadrant
 918     GMX_EXPECT_SIMD_REAL_NEAR(
 919             setSimdRealFrom3R(std::atan2(c0, c3), std::atan2(c1, c4), std::atan2(c2, c5)),
 920             atan2(rSimd_c0c1c2, rSimd_c3c4c5));
 921     GMX_EXPECT_SIMD_REAL_NEAR(
 922             setSimdRealFrom3R(std::atan2(-c0, c3), std::atan2(-c1, c4), std::atan2(-c2, c5)),
 923             atan2(rSimd_m0m1m2, rSimd_c3c4c5));
 924     GMX_EXPECT_SIMD_REAL_NEAR(
 925             setSimdRealFrom3R(std::atan2(-c0, -c3), std::atan2(-c1, -c0), std::atan2(-c2, -c4)),
 926             atan2(rSimd_m0m1m2, rSimd_m3m0m4));
 927     GMX_EXPECT_SIMD_REAL_NEAR(
 928             setSimdRealFrom3R(std::atan2(c0, -c3), std::atan2(c1, -c0), std::atan2(c2, -c4)),
 929             atan2(rSimd_c0c1c2, rSimd_m3m0m4));
 930
 931     // cases important for calculating angles
 932     // values on coordinate axes
 933     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(0, c0), std::atan2(0, c1), std::atan2(0, c2)),
 934                               atan2(setZero(), rSimd_c0c1c2));
 935     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(c0, 0), std::atan2(c1, 0), std::atan2(c2, 0)),
 936                               atan2(rSimd_c0c1c2, setZero()));
 937     GMX_EXPECT_SIMD_REAL_NEAR(
 938             setSimdRealFrom3R(std::atan2(0, -c0), std::atan2(0, -c1), std::atan2(0, -c2)),
 939             atan2(setZero(), rSimd_m0m1m2));
 940     GMX_EXPECT_SIMD_REAL_NEAR(
 941             setSimdRealFrom3R(std::atan2(-c0, 0), std::atan2(-c1, 0), std::atan2(-c2, 0)),
 942             atan2(rSimd_m0m1m2, setZero()));
 943     // degenerate value (origin) should return 0.0. At least IBM xlc 13.1.5 gets the reference
 944     // value wrong (-nan) at -O3 optimization, so we compare to the correct value (0.0) instead.
 945     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom1R(0.0), atan2(setSimdRealFrom3R(0.0, 0.0, 0.0), setZero()));
 946 }
 947
 948 /*! \brief Evaluate reference version of PME force correction. */
 949 real refPmeForceCorrection(real x)
 950 {
 951     if (x != 0)
 952     {
 953         real y = std::sqrt(x);
 954         return 2 * std::exp(-x) / (std::sqrt(M_PI) * x) - std::erf(static_cast<double>(y)) / (x * y);
 955     }
 956     else
 957     {
 958         return -4 / (3 * std::sqrt(M_PI));
 959     }
 960 }
 961
 962 // The PME corrections will be added to ~1/r2, so absolute tolerance of EPS is fine.
 963 TEST_F(SimdMathTest, pmeForceCorrection)
 964 {
 965     // Pme correction relative accuracy only needs to be ~1e-6 accuracy single, 1e-10 double
 966     const std::int64_t ulpTol = (GMX_DOUBLE ? 5e-10 : 5e-6) / GMX_REAL_EPS;
 967
 968     CompareSettings settings{ Range(0.15, 4), ulpTol, GMX_REAL_EPS, MatchRule::Normal };
 969     GMX_EXPECT_SIMD_FUNC_NEAR(refPmeForceCorrection, pmeForceCorrection, settings);
 970 }
 971
 972 /*! \brief Evaluate reference version of PME potential correction. */
 973 real refPmePotentialCorrection(real x)
 974 {
 975     real y = std::sqrt(x);
 976     return std::erf(static_cast<double>(y)) / y;
 977 }
 978
 979 // The PME corrections will be added to ~1/r, so absolute tolerance of EPS is fine.
 980 TEST_F(SimdMathTest, pmePotentialCorrection)
 981 {
 982     // Pme correction relative accuracy only needs to be ~1e-6 accuracy single, 1e-10 double
 983     const std::int64_t ulpTol = (GMX_DOUBLE ? 5e-10 : 5e-6) / GMX_REAL_EPS;
 984
 985     CompareSettings settings{ Range(0.15, 4), ulpTol, GMX_REAL_EPS, MatchRule::Normal };
 986     GMX_EXPECT_SIMD_FUNC_NEAR(refPmePotentialCorrection, pmePotentialCorrection, settings);
 987 }
 988
 989 // Functions that only target single accuracy, even for double SIMD data
 990
 991 TEST_F(SimdMathTest, invsqrtSingleAccuracy)
 992 {
 993     // Here we always use float limits, since the lookup is not defined for numbers that
 994     // cannot be represented in single precision.
 995     const real low  = std::numeric_limits<float>::min();
 996     const real high = std::numeric_limits<float>::max();
 997     /* Increase the allowed error by the difference between the actual precision and single */
 998     setUlpTolSingleAccuracy(ulpTol_);
 999
1000     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1001     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, invsqrtSingleAccuracy, settings);
1002 }
1003
1004 /*! \brief Function wrapper to return first result when testing \ref invsqrtPairSingleAccuracy */
1005 SimdReal gmx_simdcall tst_invsqrt_SingleAccuracy_pair0(SimdReal x)
1006 {
1007     SimdReal r0, r1;
1008     invsqrtPairSingleAccuracy(x, x, &r0, &r1);
1009     return r0;
1010 }
1011
1012 /*! \brief Function wrapper to return second result when testing \ref invsqrtPairSingleAccuracy */
1013 SimdReal gmx_simdcall tst_invsqrt_SingleAccuracy_pair1(SimdReal x)
1014 {
1015     SimdReal r0, r1;
1016     invsqrtPairSingleAccuracy(x, x, &r0, &r1);
1017     return r1;
1018 }
1019
1020 TEST_F(SimdMathTest, invsqrtPairSingleAccuracy)
1021 {
1022     // Float limits since lookup is always performed in single
1023     const real low  = std::numeric_limits<float>::min();
1024     const real high = std::numeric_limits<float>::max();
1025     /* Increase the allowed error by the difference between the actual precision and single */
1026     setUlpTolSingleAccuracy(ulpTol_);
1027
1028     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1029     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tst_invsqrt_SingleAccuracy_pair0, settings);
1030     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tst_invsqrt_SingleAccuracy_pair1, settings);
1031 }
1032
1033 TEST_F(SimdMathTest, sqrtSingleAccuracy)
1034 {
1035     // Since the first lookup step is sometimes performed in single precision,
1036     // our SIMD sqrt can only handle single-precision input values, even when
1037     // compiled in double precision - thus we use single precision limits here.
1038
1039     // Scale lowest value by 1+eps, since we will do some arithmetics on this value
1040     const real low = std::numeric_limits<float>::min() * (1.0 + std::numeric_limits<float>::epsilon());
1041     const real      high = std::numeric_limits<float>::max();
1042     CompareSettings settings;
1043
1044     // Increase the allowed error by the difference between the actual precision and single
1045     setUlpTolSingleAccuracy(ulpTol_);
1046
1047     // First test that 0.0 and a few other values works
1048     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(0, std::sqrt(c0), std::sqrt(c1)),
1049                               sqrtSingleAccuracy(setSimdRealFrom3R(0, c0, c1)));
1050
1051 #        if GMX_DOUBLE
1052     // As mentioned above, we cannot guarantee that very small double precision
1053     // input values (below std::numeric_limits<float>::min()) are handled correctly,
1054     // so our implementation will clamp it to zero. In this range we allow either
1055     // the correct value or zero, but it's important that it does not result in NaN or Inf values.
1056     //
1057     // This test range must not be called for single precision, since if we try to divide
1058     // the interval (0.0, low( in npoints we will try to multiply by factors so small that
1059     // they end up being flushed to zero, and the loop would never end.
1060     settings = { Range(0.0, low), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
1061     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrtSingleAccuracy, settings);
1062 #        endif
1063
1064     settings = { Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1065     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrtSingleAccuracy, settings);
1066 }
1067
1068 TEST_F(SimdMathTest, sqrtSingleAccuracyUnsafe)
1069 {
1070     // Test the full range, but stick to float limits since lookup is done in single.
1071     const real low  = std::numeric_limits<float>::min();
1072     const real high = std::numeric_limits<float>::max();
1073
1074     /* Increase the allowed error by the difference between the actual precision and single */
1075     setUlpTolSingleAccuracy(ulpTol_);
1076
1077     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1078     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrtSingleAccuracy<MathOptimization::Unsafe>, settings);
1079 }
1080
1081 TEST_F(SimdMathTest, invSingleAccuracy)
1082 {
1083     // Since the first lookup step is sometimes performed in single precision,
1084     // our SIMD 1/x can only handle single-precision input values, even when
1085     // compiled in double precision.
1086
1087     // Relevant threshold points
1088     const real minSafeFloat = std::numeric_limits<float>::min()
1089                               * 10; // X value guaranteed not to result in Inf intermediates for 1/x calc.
1090     const real maxSafeFloat = std::numeric_limits<float>::max()
1091                               * 0.1; // X value guaranteed not to result in DTZ intermediates for 1/x calc.
1092     // Scale highest value by 1-eps, since we will do some arithmetics on this value
1093     const real maxFloat =
1094             std::numeric_limits<float>::max() * (1.0 - std::numeric_limits<float>::epsilon());
1095     CompareSettings settings;
1096
1097     // Increase the allowed error by the difference between the actual precision and single
1098     setUlpTolSingleAccuracy(ulpTol_);
1099
1100     // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
1101     settings = { Range(-maxFloat, -maxSafeFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
1102     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1103
1104     // Normal checks for x < 0
1105     settings = { Range(-maxSafeFloat, -minSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
1106     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1107
1108     // We do not care about the small range -minSafeFloat < x < +minSafeFloat where the result can be +/- Inf, since we don't require strict IEEE754.
1109
1110     // Normal checks for x > 0
1111     settings = { Range(minSafeFloat, maxSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
1112     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1113
1114     // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
1115     settings = { Range(maxSafeFloat, maxFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
1116     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1117 }
1118
1119 TEST_F(SimdMathTest, cbrtSingleAccuracy)
1120 {
1121     const real low  = -std::numeric_limits<real>::max();
1122     const real high = std::numeric_limits<real>::max();
1123
1124     // Increase the allowed error by the difference between the actual precision and single
1125     setUlpTolSingleAccuracy(ulpTol_);
1126
1127     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1128     GMX_EXPECT_SIMD_FUNC_NEAR(std::cbrt, cbrtSingleAccuracy, settings);
1129 }
1130
1131 TEST_F(SimdMathTest, invcbrtSingleAccuracy)
1132 {
1133     // Increase the allowed error by the difference between the actual precision and single
1134     setUlpTolSingleAccuracy(ulpTol_);
1135
1136     // Negative values first
1137     real low  = -std::numeric_limits<real>::max();
1138     real high = -std::numeric_limits<real>::min();
1139
1140     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1141     GMX_EXPECT_SIMD_FUNC_NEAR(refInvCbrt, invcbrtSingleAccuracy, settings);
1142
1143     // Positive values
1144     low      = std::numeric_limits<real>::min();
1145     high     = std::numeric_limits<real>::max();
1146     settings = { Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1147     GMX_EXPECT_SIMD_FUNC_NEAR(refInvCbrt, invcbrtSingleAccuracy, settings);
1148 }
1149
1150 TEST_F(SimdMathTest, log2SingleAccuracy)
1151 {
1152     const real low  = std::numeric_limits<real>::min();
1153     const real high = std::numeric_limits<real>::max();
1154
1155     // Increase the allowed error by the difference between the actual precision and single
1156     setUlpTolSingleAccuracy(ulpTol_);
1157
1158     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1159     GMX_EXPECT_SIMD_FUNC_NEAR(std::log2, log2SingleAccuracy, settings);
1160 }
1161
1162 TEST_F(SimdMathTest, logSingleAccuracy)
1163 {
1164     const real low  = std::numeric_limits<real>::min();
1165     const real high = std::numeric_limits<real>::max();
1166
1167     // Increase the allowed error by the difference between the actual precision and single
1168     setUlpTolSingleAccuracy(ulpTol_);
1169
1170     CompareSettings settings{ Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
1171     GMX_EXPECT_SIMD_FUNC_NEAR(std::log, logSingleAccuracy, settings);
1172 }
1173
1174 TEST_F(SimdMathTest, exp2SingleAccuracy)
1175 {
1176     // Relevant threshold points - float limits since we only target single accuracy
1177     constexpr real lowestReal = -std::numeric_limits<real>::max();
1178     constexpr real lowestRealThatProducesNormal =
1179             std::numeric_limits<real>::min_exponent
1180             - 1; // adding the significant corresponds to one more unit in exponent
1181     constexpr real lowestRealThatProducesDenormal =
1182             lowestRealThatProducesNormal
1183             - std::numeric_limits<real>::digits; // digits refer to bits in significand, so 24/53 for float/double
1184     constexpr real highestRealThatProducesNormal =
1185             std::numeric_limits<real>::max_exponent
1186             - 1; // adding the significant corresponds to one more unit in exponent
1187     CompareSettings settings;
1188
1189     // Increase the allowed error by the difference between the actual precision and single
1190     setUlpTolSingleAccuracy(ulpTol_);
1191
1192     // Below subnormal range all results should be zero
1193     settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal };
1194     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2SingleAccuracy, settings);
1195
1196     // Subnormal range, require matching, but DTZ is fine
1197     settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal),
1198                  ulpTol_,
1199                  absTol_,
1200                  MatchRule::Dtz };
1201     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2SingleAccuracy, settings);
1202
1203     // Normal range, standard result expected
1204     settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
1205                  ulpTol_,
1206                  absTol_,
1207                  MatchRule::Normal };
1208     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2SingleAccuracy, settings);
1209 }
1210
1211 TEST_F(SimdMathTest, exp2SingleAccuracyUnsafe)
1212 {
1213     // The unsafe version is only defined in the normal range
1214     constexpr real lowestRealThatProducesNormal =
1215             std::numeric_limits<real>::min_exponent
1216             - 1; // adding the significant corresponds to one more unit in exponent
1217     constexpr real highestRealThatProducesNormal =
1218             std::numeric_limits<real>::max_exponent
1219             - 1; // adding the significant corresponds to one more unit in exponent
1220
1221     /* Increase the allowed error by the difference between the actual precision and single */
1222     setUlpTolSingleAccuracy(ulpTol_);
1223
1224     CompareSettings settings{ Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
1225                               ulpTol_,
1226                               absTol_,
1227                               MatchRule::Normal };
1228     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2SingleAccuracy<MathOptimization::Unsafe>, settings);
1229 }
1230
1231 TEST_F(SimdMathTest, expSingleAccuracy)
1232 {
1233     // See threshold point comments in normal exp() test
1234     const real lowestReal = -std::numeric_limits<real>::max();
1235     // In theory the smallest value should be (min_exponent-1)*log(2), but rounding after the multiplication will cause this
1236     // value to be a single ulp too low. This might cause failed tests on CPUs that use different DTZ modes for SIMD vs.
1237     // non-SIMD arithmetics (e.g. ARM v7), so multiply by (1.0-eps) to increase it by a single ulp.
1238     const real lowestRealThatProducesNormal = (std::numeric_limits<real>::min_exponent - 1)
1239                                               * std::log(2.0)
1240                                               * (1.0 - std::numeric_limits<real>::epsilon());
1241     const real lowestRealThatProducesDenormal =
1242             lowestRealThatProducesNormal - std::numeric_limits<real>::digits * std::log(2.0);
1243     const real highestRealThatProducesNormal =
1244             (std::numeric_limits<real>::max_exponent - 1) * std::log(2.0);
1245     CompareSettings settings;
1246
1247     // Increase the allowed error by the difference between the actual precision and single
1248     setUlpTolSingleAccuracy(ulpTol_);
1249
1250     // Below subnormal range all results should be zero (so, match the reference)
1251     settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal };
1252     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, expSingleAccuracy, settings);
1253
1254     // Subnormal range, require matching, but DTZ is fine
1255     settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal),
1256                  ulpTol_,
1257                  absTol_,
1258                  MatchRule::Dtz };
1259     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, expSingleAccuracy, settings);
1260
1261     // Normal range, standard result expected
1262     settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
1263                  ulpTol_,
1264                  absTol_,
1265                  MatchRule::Normal };
1266     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, expSingleAccuracy, settings);
1267 }
1268
1269 TEST_F(SimdMathTest, expSingleAccuracyUnsafe)
1270 {
1271     // See test of exp() for comments about test ranges
1272     const real lowestRealThatProducesNormal = (std::numeric_limits<real>::min_exponent - 1)
1273                                               * std::log(2.0)
1274                                               * (1 - std::numeric_limits<real>::epsilon());
1275     const real highestRealThatProducesNormal =
1276             (std::numeric_limits<real>::max_exponent - 1) * std::log(2.0);
1277
1278     // Increase the allowed error by the difference between the actual precision and single
1279     setUlpTolSingleAccuracy(ulpTol_);
1280
1281     CompareSettings settings{ Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),
1282                               ulpTol_,
1283                               absTol_,
1284                               MatchRule::Normal };
1285     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, expSingleAccuracy<MathOptimization::Unsafe>, settings);
1286 }
1287
1288 TEST_F(SimdMathTest, powSingleAccuracy)
1289 {
1290     // We already test the log2/exp2 components of pow() extensively above, and it's a very
1291     // simple single-line function, so here we just test a handful of values to catch typos
1292     // and then some special values.
1293
1294     // Increase the allowed error by the difference between the actual precision and single
1295     setUlpTolSingleAccuracy(ulpTol_);
1296
1297     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, c3), std::pow(c1, c4), std::pow(c2, c5)),
1298                               powSingleAccuracy(rSimd_c0c1c2, rSimd_c3c4c5));
1299
1300     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, -c3), std::pow(c1, -c0), std::pow(c2, -c4)),
1301                               powSingleAccuracy(rSimd_c0c1c2, rSimd_m3m0m4));
1302
1303     // 0^0 = 1 , 0^c1=0, -c1^0=1
1304     GMX_EXPECT_SIMD_REAL_NEAR(
1305             setSimdRealFrom3R(1.0, 0.0, 1.0),
1306             powSingleAccuracy(setSimdRealFrom3R(0, 0.0, -c1), setSimdRealFrom3R(0.0, c1, 0.0)));
1307 }
1308
1309 TEST_F(SimdMathTest, powSingleAccuracyUnsafe)
1310 {
1311     // We already test the log2/exp2 components of pow() extensively above, and it's a very
1312     // simple single-line function, so here we just test a handful of values to catch typos
1313     // and then some special values.
1314
1315     // Increase the allowed error by the difference between the actual precision and single
1316     setUlpTolSingleAccuracy(ulpTol_);
1317
1318     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, c3), std::pow(c1, c4), std::pow(c2, c5)),
1319                               powSingleAccuracy<MathOptimization::Unsafe>(rSimd_c0c1c2, rSimd_c3c4c5));
1320
1321     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::pow(c0, -c3), std::pow(c1, -c0), std::pow(c2, -c4)),
1322                               powSingleAccuracy<MathOptimization::Unsafe>(rSimd_c0c1c2, rSimd_m3m0m4));
1323 }
1324
1325 TEST_F(SimdMathTest, erfSingleAccuracy)
1326 {
1327     // Increase the allowed error by the difference between the actual precision and single
1328     setUlpTolSingleAccuracy(ulpTol_);
1329
1330     CompareSettings settings{ Range(-9, 9), ulpTol_, GMX_REAL_MIN, MatchRule::Normal };
1331     GMX_EXPECT_SIMD_FUNC_NEAR(refErf, erfSingleAccuracy, settings);
1332 }
1333
1334 TEST_F(SimdMathTest, erfcSingleAccuracy)
1335 {
1336     // Increase the allowed error by the difference between the actual precision and single
1337     setUlpTolSingleAccuracy(ulpTol_);
1338
1339     // Our erfc algorithm has 4 ulp accuracy, so relax tolerance a bit
1340     CompareSettings settings{ Range(-9, 9), 4 * ulpTol_, GMX_REAL_MIN, MatchRule::Normal };
1341     GMX_EXPECT_SIMD_FUNC_NEAR(refErfc, erfcSingleAccuracy, settings);
1342 }
1343
1344
1345 TEST_F(SimdMathTest, sinSingleAccuracy)
1346 {
1347     /* Increase the allowed error by the difference between the actual precision and single */
1348     setUlpTolSingleAccuracy(ulpTol_);
1349
1350     CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
1351     GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sinSingleAccuracy, settings);
1352
1353     // Range reduction leads to accuracy loss, so we might want higher tolerance here
1354     settings = { Range(-10000, 10000), 2 * ulpTol_, absTol_, MatchRule::Normal };
1355     GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sinSingleAccuracy, settings);
1356 }
1357
1358 TEST_F(SimdMathTest, cosSingleAccuracy)
1359 {
1360     /* Increase the allowed error by the difference between the actual precision and single */
1361     setUlpTolSingleAccuracy(ulpTol_);
1362
1363     CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
1364     GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cosSingleAccuracy, settings);
1365
1366     // Range reduction leads to accuracy loss, so we might want higher tolerance here
1367     settings = { Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal };
1368     GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cosSingleAccuracy, settings);
1369 }
1370
1371 TEST_F(SimdMathTest, tanSingleAccuracy)
1372 {
1373     /* Increase the allowed error by the difference between the actual precision and single */
1374     setUlpTolSingleAccuracy(ulpTol_);
1375
1376     // Tan(x) is a little sensitive due to the division in the algorithm.
1377     // Rather than using lots of extra FP operations, we accept the algorithm
1378     // presently only achieves a ~3 ulp error and use the medium tolerance.
1379     CompareSettings settings{ Range(-8 * M_PI, 8 * M_PI), ulpTol_, absTol_, MatchRule::Normal };
1380     GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tanSingleAccuracy, settings);
1381
1382     // Range reduction leads to accuracy loss, so we might want higher tolerance here
1383     settings = { Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal };
1384     GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tanSingleAccuracy, settings);
1385 }
1386
1387 TEST_F(SimdMathTest, asinSingleAccuracy)
1388 {
1389     /* Increase the allowed error by the difference between the actual precision and single */
1390     setUlpTolSingleAccuracy(ulpTol_);
1391
1392     // Our present asin(x) algorithm achieves 2-3 ulp accuracy
1393     CompareSettings settings{ Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal };
1394     GMX_EXPECT_SIMD_FUNC_NEAR(std::asin, asinSingleAccuracy, settings);
1395 }
1396
1397 TEST_F(SimdMathTest, acosSingleAccuracy)
1398 {
1399     /* Increase the allowed error by the difference between the actual precision and single */
1400     setUlpTolSingleAccuracy(ulpTol_);
1401
1402     // Our present acos(x) algorithm achieves 2-3 ulp accuracy
1403     CompareSettings settings{ Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal };
1404     GMX_EXPECT_SIMD_FUNC_NEAR(std::acos, acosSingleAccuracy, settings);
1405 }
1406
1407 TEST_F(SimdMathTest, atanSingleAccuracy)
1408 {
1409     /* Increase the allowed error by the difference between the actual precision and single */
1410     setUlpTolSingleAccuracy(ulpTol_);
1411
1412     // Our present atan(x) algorithm achieves 1 ulp accuracy
1413     CompareSettings settings{ Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal };
1414     GMX_EXPECT_SIMD_FUNC_NEAR(std::atan, atanSingleAccuracy, settings);
1415 }
1416
1417 TEST_F(SimdMathTest, atan2SingleAccuracy)
1418 {
1419     /* Increase the allowed error by the difference between the actual precision and single */
1420     setUlpTolSingleAccuracy(ulpTol_);
1421
1422     // test each quadrant
1423     GMX_EXPECT_SIMD_REAL_NEAR(
1424             setSimdRealFrom3R(std::atan2(c0, c3), std::atan2(c1, c4), std::atan2(c2, c5)),
1425             atan2SingleAccuracy(rSimd_c0c1c2, rSimd_c3c4c5));
1426     GMX_EXPECT_SIMD_REAL_NEAR(
1427             setSimdRealFrom3R(std::atan2(-c0, c3), std::atan2(-c1, c4), std::atan2(-c2, c5)),
1428             atan2SingleAccuracy(rSimd_m0m1m2, rSimd_c3c4c5));
1429     GMX_EXPECT_SIMD_REAL_NEAR(
1430             setSimdRealFrom3R(std::atan2(-c0, -c3), std::atan2(-c1, -c0), std::atan2(-c2, -c4)),
1431             atan2SingleAccuracy(rSimd_m0m1m2, rSimd_m3m0m4));
1432     GMX_EXPECT_SIMD_REAL_NEAR(
1433             setSimdRealFrom3R(std::atan2(c0, -c3), std::atan2(c1, -c0), std::atan2(c2, -c4)),
1434             atan2SingleAccuracy(rSimd_c0c1c2, rSimd_m3m0m4));
1435     // cases important for calculating angles
1436     // values on coordinate axes
1437     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(0, c0), std::atan2(0, c1), std::atan2(0, c2)),
1438                               atan2SingleAccuracy(setZero(), rSimd_c0c1c2));
1439     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(c0, 0), std::atan2(c1, 0), std::atan2(c2, 0)),
1440                               atan2SingleAccuracy(rSimd_c0c1c2, setZero()));
1441     GMX_EXPECT_SIMD_REAL_NEAR(
1442             setSimdRealFrom3R(std::atan2(0, -c0), std::atan2(0, -c1), std::atan2(0, -c2)),
1443             atan2SingleAccuracy(setZero(), rSimd_m0m1m2));
1444     GMX_EXPECT_SIMD_REAL_NEAR(
1445             setSimdRealFrom3R(std::atan2(-c0, 0), std::atan2(-c1, 0), std::atan2(-c2, 0)),
1446             atan2SingleAccuracy(rSimd_m0m1m2, setZero()));
1447
1448     // degenerate value (origin) should return 0.0. At least IBM xlc 13.1.5 gets the reference
1449     // value wrong (-nan) at -O3 optimization, so we compare to the correct value (0.0) instead.
1450     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom1R(0.0),
1451                               atan2SingleAccuracy(setSimdRealFrom3R(0.0, 0.0, 0.0), setZero()));
1452 }
1453
1454 TEST_F(SimdMathTest, pmeForceCorrectionSingleAccuracy)
1455 {
1456     // The PME corrections will be added to ~1/r2, so absolute tolerance of EPS is fine.
1457     // Pme correction only needs to be ~1e-6 accuracy single.
1458     // Then increase the allowed error by the difference between the actual precision and single.
1459     setUlpTolSingleAccuracy(std::int64_t(5e-6 / GMX_FLOAT_EPS));
1460
1461     CompareSettings settings{ Range(0.15, 4), ulpTol_, GMX_FLOAT_EPS, MatchRule::Normal };
1462     GMX_EXPECT_SIMD_FUNC_NEAR(refPmeForceCorrection, pmeForceCorrectionSingleAccuracy, settings);
1463 }
1464
1465 TEST_F(SimdMathTest, pmePotentialCorrectionSingleAccuracy)
1466 {
1467     // The PME corrections will be added to ~1/r, so absolute tolerance of EPS is fine.
1468     // Pme correction only needs to be ~1e-6 accuracy single.
1469     // Then increase the allowed error by the difference between the actual precision and single.
1470     setUlpTolSingleAccuracy(std::int64_t(5e-6 / GMX_FLOAT_EPS));
1471
1472     CompareSettings settings{ Range(0.15, 4), ulpTol_, GMX_FLOAT_EPS, MatchRule::Normal };
1473     GMX_EXPECT_SIMD_FUNC_NEAR(refPmePotentialCorrection, pmePotentialCorrectionSingleAccuracy, settings);
1474 }
1475
1476 } // namespace
1477
1478 #    endif // GMX_SIMD_HAVE_REAL
1479
1480 /*! \} */
1481 /*! \endcond */
1482
1483 } // namespace test
1484 } // namespace gmx
1485
1486 #endif // GMX_SIMD