src/gromacs/simd/scalar/scalar_math.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2016,2017,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #ifndef GMX_SIMD_SCALAR_MATH_H
  36 #define GMX_SIMD_SCALAR_MATH_H
  37
  38 #include <cmath>
  39
  40 #include "gromacs/math/functions.h"
  41 #include "gromacs/math/utilities.h"
  42 #include "gromacs/simd/scalar/scalar.h"
  43
  44 /*! \libinternal \file
  45  *
  46  * \brief Scalar math functions mimicking GROMACS SIMD math functions
  47  *
  48  * These versions make it possible to write functions that are templated with
  49  * either a SIMD or scalar type. While some of these functions might not appear
  50  * SIMD-specific, we have placed them here because the only reason to use these
  51  * instead of generic function is in templated combined SIMD/non-SIMD code.
  52  * It is important that these functions match the SIMD versions exactly in their
  53  * arguments and template arguments so that overload resolution works correctly.
  54  *
  55  * \author Erik Lindahl <erik.lindahl@gmail.com>
  56  *
  57  * \inlibraryapi
  58  * \ingroup module_simd
  59  */
  60
  61 namespace gmx
  62 {
  63
  64 /*****************************************************************************
  65  *   Single-precision floating point math functions mimicking SIMD versions  *
  66  *****************************************************************************/
  67
  68
  69 /*! \brief Composes single value with the magnitude of x and the sign of y.
  70  *
  71  * \param x Value to set sign for
  72  * \param y Value used to set sign
  73  * \return  Magnitude of x, sign of y
  74  *
  75  * \note This function might be superficially meaningless, but it helps us to
  76  *       write templated SIMD/non-SIMD code. For clarity it should not be used
  77  *       outside such code.
  78  */
  79 static inline float copysign(float x, float y)
  80 {
  81     return std::copysign(x, y);
  82 }
  83
  84 // invsqrt(x) is already defined in math/functions.h
  85
  86 /*! \brief Calculate 1/sqrt(x) for two floats.
  87  *
  88  * \param x0  First argument, x0 must be positive - no argument checking.
  89  * \param x1  Second argument, x1 must be positive - no argument checking.
  90  * \param[out] out0  Result 1/sqrt(x0)
  91  * \param[out] out1  Result 1/sqrt(x1)
  92  *
  93  * \note This function might be superficially meaningless, but it helps us to
  94  *       write templated SIMD/non-SIMD code. For clarity it should not be used
  95  *       outside such code.
  96  */
  97 static inline void invsqrtPair(float x0, float x1, float* out0, float* out1)
  98 {
  99     *out0 = invsqrt(x0);
 100     *out1 = invsqrt(x1);
 101 }
 102
 103 /*! \brief Calculate 1/x for float.
 104  *
 105  *  \param x Argument that must be nonzero. This routine does not check arguments.
 106  *  \return 1/x. Result is undefined if your argument was invalid.
 107  *
 108  * \note This function might be superficially meaningless, but it helps us to
 109  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 110  *       outside such code.
 111  */
 112 static inline float inv(float x)
 113 {
 114     return 1.0F / x;
 115 }
 116
 117 /*! \brief Calculate 1/sqrt(x) for masked entry of float.
 118  *
 119  *  This routine only evaluates 1/sqrt(x) if mask is true.
 120  *  Illegal values for a masked-out float will not lead to
 121  *  floating-point exceptions.
 122  *
 123  *  \param x Argument that must be >0 if masked-in.
 124  *  \param m Mask
 125  *  \return 1/sqrt(x). Result is undefined if your argument was invalid or
 126  *          entry was not masked, and 0.0 for masked-out entries.
 127  *
 128  * \note This function might be superficially meaningless, but it helps us to
 129  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 130  *       outside such code.
 131  */
 132 static inline float maskzInvsqrt(float x, bool m)
 133 {
 134     return m ? invsqrt(x) : 0.0F;
 135 }
 136
 137 /*! \brief Calculate 1/x for masked entry of float.
 138  *
 139  *  This routine only evaluates 1/x if mask is true.
 140  *  Illegal values for a masked-out float will not lead to
 141  *  floating-point exceptions.
 142  *
 143  *  \param x Argument that must be nonzero if masked-in.
 144  *  \param m Mask
 145  *  \return 1/x. Result is undefined if your argument was invalid or
 146  *          entry was not masked, and 0.0 for masked-out entries.
 147  *
 148  * \note This function might be superficially meaningless, but it helps us to
 149  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 150  *       outside such code.
 151  */
 152 static inline float maskzInv(float x, bool m)
 153 {
 154     return m ? inv(x) : 0.0F;
 155 }
 156
 157 /*! \brief Float sqrt(x). This is the square root.
 158  *
 159  * \param x Argument, should be >= 0.
 160  * \result The square root of x. Undefined if argument is invalid.
 161  *
 162  * \note This function might be superficially meaningless, but it helps us to
 163  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 164  *       outside such code.
 165  */
 166 template<MathOptimization opt = MathOptimization::Safe>
 167 static inline float sqrt(float x)
 168 {
 169     return std::sqrt(x);
 170 }
 171
 172 /*! \brief Float log(x). This is the natural logarithm.
 173  *
 174  * \param x Argument, should be >0.
 175  * \result The natural logarithm of x. Undefined if argument is invalid.
 176  *
 177  * \note This function might be superficially meaningless, but it helps us to
 178  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 179  *       outside such code.
 180  */
 181 static inline float log(float x)
 182 {
 183     return std::log(x);
 184 }
 185
 186 /*! \brief Float 2^x.
 187  *
 188  * \param x Argument.
 189  * \result 2^x. Undefined if input argument caused overflow.
 190  *
 191  * \note This function might be superficially meaningless, but it helps us to
 192  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 193  *       outside such code.
 194  */
 195 template<MathOptimization opt = MathOptimization::Safe>
 196 static inline float exp2(float x)
 197 {
 198     return std::exp2(x);
 199 }
 200
 201 /*! \brief Float exp(x).
 202  *
 203  * \param x Argument.
 204  * \result exp(x). Undefined if input argument caused overflow.
 205  *
 206  * \note This function might be superficially meaningless, but it helps us to
 207  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 208  *       outside such code.
 209  */
 210 template<MathOptimization opt = MathOptimization::Safe>
 211 static inline float exp(float x)
 212 {
 213     return std::exp(x);
 214 }
 215
 216 /*! \brief Float erf(x).
 217  *
 218  * \param x Argument.
 219  * \result erf(x)
 220  *
 221  * \note This function might be superficially meaningless, but it helps us to
 222  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 223  *       outside such code.
 224  */
 225 static inline float erf(float x)
 226 {
 227     return std::erf(x);
 228 }
 229
 230 /*! \brief Float erfc(x).
 231  *
 232  * \param x Argument.
 233  * \result erfc(x)
 234  *
 235  * \note This function might be superficially meaningless, but it helps us to
 236  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 237  *       outside such code.
 238  */
 239 static inline float erfc(float x)
 240 {
 241     return std::erfc(x);
 242 }
 243
 244
 245 /*! \brief Float sin \& cos.
 246  *
 247  * \param x The argument to evaluate sin/cos for
 248  * \param[out] sinval Sin(x)
 249  * \param[out] cosval Cos(x)
 250  *
 251  * \note This function might be superficially meaningless, but it helps us to
 252  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 253  *       outside such code.
 254  */
 255 static inline void sincos(float x, float* sinval, float* cosval)
 256 {
 257     *sinval = std::sin(x);
 258     *cosval = std::cos(x);
 259 }
 260
 261 /*! \brief Float sin.
 262  *
 263  * \param x The argument to evaluate sin for
 264  * \result Sin(x)
 265  *
 266  * \note This function might be superficially meaningless, but it helps us to
 267  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 268  *       outside such code.
 269  */
 270 static inline float sin(float x)
 271 {
 272     return std::sin(x);
 273 }
 274
 275 /*! \brief Float cos.
 276  *
 277  * \param x The argument to evaluate cos for
 278  * \result Cos(x)
 279  *
 280  * \note This function might be superficially meaningless, but it helps us to
 281  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 282  *       outside such code.
 283  */
 284 static inline float cos(float x)
 285 {
 286     return std::cos(x);
 287 }
 288
 289 /*! \brief Float tan.
 290  *
 291  * \param x The argument to evaluate tan for
 292  * \result Tan(x)
 293  *
 294  * \note This function might be superficially meaningless, but it helps us to
 295  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 296  *       outside such code.
 297  */
 298 static inline float tan(float x)
 299 {
 300     return std::tan(x);
 301 }
 302
 303 /*! \brief float asin.
 304  *
 305  * \param x The argument to evaluate asin for
 306  * \result Asin(x)
 307  *
 308  * \note This function might be superficially meaningless, but it helps us to
 309  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 310  *       outside such code.
 311  */
 312 static inline float asin(float x)
 313 {
 314     return std::asin(x);
 315 }
 316
 317 /*! \brief Float acos.
 318  *
 319  * \param x The argument to evaluate acos for
 320  * \result Acos(x)
 321  *
 322  * \note This function might be superficially meaningless, but it helps us to
 323  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 324  *       outside such code.
 325  */
 326 static inline float acos(float x)
 327 {
 328     return std::acos(x);
 329 }
 330
 331 /*! \brief Float atan.
 332  *
 333  * \param x The argument to evaluate atan for
 334  * \result Atan(x)
 335  *
 336  * \note This function might be superficially meaningless, but it helps us to
 337  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 338  *       outside such code.
 339  */
 340 static inline float atan(float x)
 341 {
 342     return std::atan(x);
 343 }
 344
 345 /*! \brief Float atan2(y,x).
 346  *
 347  * \param y Y component of vector, any quartile
 348  * \param x X component of vector, any quartile
 349  * \result Atan(y,x)
 350  *
 351  * \note This function might be superficially meaningless, but it helps us to
 352  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 353  *       outside such code.
 354  */
 355 static inline float atan2(float y, float x)
 356 {
 357     return std::atan2(y, x);
 358 }
 359
 360 /*! \brief Calculate the force correction due to PME analytically in float.
 361  *
 362  * See the SIMD version of this function for details.
 363  *
 364  * \param z2 input parameter
 365  * \returns Correction to use on force
 366  *
 367  * \note This function might be superficially meaningless, but it helps us to
 368  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 369  *       outside such code.
 370  */
 371 static inline float pmeForceCorrection(float z2)
 372 {
 373     const float FN6(-1.7357322914161492954e-8F);
 374     const float FN5(1.4703624142580877519e-6F);
 375     const float FN4(-0.000053401640219807709149F);
 376     const float FN3(0.0010054721316683106153F);
 377     const float FN2(-0.019278317264888380590F);
 378     const float FN1(0.069670166153766424023F);
 379     const float FN0(-0.75225204789749321333F);
 380
 381     const float FD4(0.0011193462567257629232F);
 382     const float FD3(0.014866955030185295499F);
 383     const float FD2(0.11583842382862377919F);
 384     const float FD1(0.50736591960530292870F);
 385     const float FD0(1.0F);
 386
 387     float z4;
 388     float polyFN0, polyFN1, polyFD0, polyFD1;
 389
 390     z4 = z2 * z2;
 391
 392     polyFD0 = fma(FD4, z4, FD2);
 393     polyFD1 = fma(FD3, z4, FD1);
 394     polyFD0 = fma(polyFD0, z4, FD0);
 395     polyFD0 = fma(polyFD1, z2, polyFD0);
 396
 397     polyFN0 = fma(FN6, z4, FN4);
 398     polyFN1 = fma(FN5, z4, FN3);
 399     polyFN0 = fma(polyFN0, z4, FN2);
 400     polyFN1 = fma(polyFN1, z4, FN1);
 401     polyFN0 = fma(polyFN0, z4, FN0);
 402     polyFN0 = fma(polyFN1, z2, polyFN0);
 403
 404     return polyFN0 / polyFD0;
 405 }
 406
 407 /*! \brief Calculate the potential correction due to PME analytically in float.
 408  *
 409  * See the SIMD version of this function for details.
 410  *
 411  * \param z2 input parameter
 412  * \returns Correction to use on potential.
 413  *
 414  * \note This function might be superficially meaningless, but it helps us to
 415  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 416  *       outside such code.
 417  */
 418 static inline float pmePotentialCorrection(float z2)
 419 {
 420     const float VN6(1.9296833005951166339e-8F);
 421     const float VN5(-1.4213390571557850962e-6F);
 422     const float VN4(0.000041603292906656984871F);
 423     const float VN3(-0.00013134036773265025626F);
 424     const float VN2(0.038657983986041781264F);
 425     const float VN1(0.11285044772717598220F);
 426     const float VN0(1.1283802385263030286F);
 427
 428     const float VD3(0.0066752224023576045451F);
 429     const float VD2(0.078647795836373922256F);
 430     const float VD1(0.43336185284710920150F);
 431     const float VD0(1.0F);
 432
 433     float z4;
 434     float polyVN0, polyVN1, polyVD0, polyVD1;
 435
 436     z4 = z2 * z2;
 437
 438     polyVD1 = fma(VD3, z4, VD1);
 439     polyVD0 = fma(VD2, z4, VD0);
 440     polyVD0 = fma(polyVD1, z2, polyVD0);
 441
 442     polyVN0 = fma(VN6, z4, VN4);
 443     polyVN1 = fma(VN5, z4, VN3);
 444     polyVN0 = fma(polyVN0, z4, VN2);
 445     polyVN1 = fma(polyVN1, z4, VN1);
 446     polyVN0 = fma(polyVN0, z4, VN0);
 447     polyVN0 = fma(polyVN1, z2, polyVN0);
 448
 449     return polyVN0 / polyVD0;
 450 }
 451
 452 /*****************************************************************************
 453  *   Double-precision floating point math functions mimicking SIMD versions  *
 454  *****************************************************************************/
 455
 456
 457 /*! \brief Composes double value with the magnitude of x and the sign of y.
 458  *
 459  * \param x Value to set sign for
 460  * \param y Value used to set sign
 461  * \return  Magnitude of x, sign of y
 462  *
 463  * \note This function might be superficially meaningless, but it helps us to
 464  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 465  *       outside such code.
 466  */
 467 static inline double copysign(double x, double y)
 468 {
 469     return std::copysign(x, y);
 470 }
 471
 472 // invsqrt(x) is already defined in math/functions.h
 473
 474 /*! \brief Calculate 1/sqrt(x) for two doubles.
 475  *
 476  * \param x0  First argument, x0 must be positive - no argument checking.
 477  * \param x1  Second argument, x1 must be positive - no argument checking.
 478  * \param[out] out0  Result 1/sqrt(x0)
 479  * \param[out] out1  Result 1/sqrt(x1)
 480  *
 481  * \note This function might be superficially meaningless, but it helps us to
 482  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 483  *       outside such code.
 484  */
 485 static inline void invsqrtPair(double x0, double x1, double* out0, double* out1)
 486 {
 487     *out0 = invsqrt(x0);
 488     *out1 = invsqrt(x1);
 489 }
 490
 491 /*! \brief Calculate 1/x for double.
 492  *
 493  *  \param x Argument that must be nonzero. This routine does not check arguments.
 494  *  \return 1/x. Result is undefined if your argument was invalid.
 495  *
 496  * \note This function might be superficially meaningless, but it helps us to
 497  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 498  *       outside such code.
 499  */
 500 static inline double inv(double x)
 501 {
 502     return 1.0 / x;
 503 }
 504
 505 /*! \brief Calculate 1/sqrt(x) for masked entry of double.
 506  *
 507  *  This routine only evaluates 1/sqrt(x) if mask is true.
 508  *  Illegal values for a masked-out double will not lead to
 509  *  floating-point exceptions.
 510  *
 511  *  \param x Argument that must be >0 if masked-in.
 512  *  \param m Mask
 513  *  \return 1/sqrt(x). Result is undefined if your argument was invalid or
 514  *          entry was not masked, and 0.0 for masked-out entries.
 515  *
 516  * \note This function might be superficially meaningless, but it helps us to
 517  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 518  *       outside such code.
 519  */
 520 static inline double maskzInvsqrt(double x, bool m)
 521 {
 522     return m ? invsqrt(x) : 0.0;
 523 }
 524
 525 /*! \brief Calculate 1/x for masked entry of double.
 526  *
 527  *  This routine only evaluates 1/x if mask is true.
 528  *  Illegal values for a masked-out double will not lead to
 529  *  floating-point exceptions.
 530  *
 531  *  \param x Argument that must be nonzero if masked-in.
 532  *  \param m Mask
 533  *  \return 1/x. Result is undefined if your argument was invalid or
 534  *          entry was not masked, and 0.0 for masked-out entries.
 535  *
 536  * \note This function might be superficially meaningless, but it helps us to
 537  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 538  *       outside such code.
 539  */
 540 static inline double maskzInv(double x, bool m)
 541 {
 542     return m ? inv(x) : 0.0;
 543 }
 544
 545 /*! \brief Double sqrt(x). This is the square root.
 546  *
 547  * \param x Argument, should be >= 0.
 548  * \result The square root of x. Undefined if argument is invalid.
 549  *
 550  * \note This function might be superficially meaningless, but it helps us to
 551  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 552  *       outside such code.
 553  */
 554 template<MathOptimization opt = MathOptimization::Safe>
 555 static inline double sqrt(double x)
 556 {
 557     return std::sqrt(x);
 558 }
 559
 560 /*! \brief Double log(x). This is the natural logarithm.
 561  *
 562  * \param x Argument, should be >0.
 563  * \result The natural logarithm of x. Undefined if argument is invalid.
 564  *
 565  * \note This function might be superficially meaningless, but it helps us to
 566  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 567  *       outside such code.
 568  */
 569 static inline double log(double x)
 570 {
 571     return std::log(x);
 572 }
 573
 574 /*! \brief Double 2^x.
 575  *
 576  * \param x Argument.
 577  * \result 2^x. Undefined if input argument caused overflow.
 578  *
 579  * \note This function might be superficially meaningless, but it helps us to
 580  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 581  *       outside such code.
 582  */
 583 template<MathOptimization opt = MathOptimization::Safe>
 584 static inline double exp2(double x)
 585 {
 586     return std::exp2(x);
 587 }
 588
 589 /*! \brief Double exp(x).
 590  *
 591  * \param x Argument.
 592  * \result exp(x). Undefined if input argument caused overflow.
 593  *
 594  * \note This function might be superficially meaningless, but it helps us to
 595  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 596  *       outside such code.
 597  */
 598 template<MathOptimization opt = MathOptimization::Safe>
 599 static inline double exp(double x)
 600 {
 601     return std::exp(x);
 602 }
 603
 604 /*! \brief Double erf(x).
 605  *
 606  * \param x Argument.
 607  * \result erf(x)
 608  *
 609  * \note This function might be superficially meaningless, but it helps us to
 610  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 611  *       outside such code.
 612  */
 613 static inline double erf(double x)
 614 {
 615     return std::erf(x);
 616 }
 617
 618 /*! \brief Double erfc(x).
 619  *
 620  * \param x Argument.
 621  * \result erfc(x)
 622  *
 623  * \note This function might be superficially meaningless, but it helps us to
 624  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 625  *       outside such code.
 626  */
 627 static inline double erfc(double x)
 628 {
 629     return std::erfc(x);
 630 }
 631
 632
 633 /*! \brief Double sin \& cos.
 634  *
 635  * \param x The argument to evaluate sin/cos for
 636  * \param[out] sinval Sin(x)
 637  * \param[out] cosval Cos(x)
 638  *
 639  * \note This function might be superficially meaningless, but it helps us to
 640  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 641  *       outside such code.
 642  */
 643 static inline void sincos(double x, double* sinval, double* cosval)
 644 {
 645     *sinval = std::sin(x);
 646     *cosval = std::cos(x);
 647 }
 648
 649 /*! \brief Double sin.
 650  *
 651  * \param x The argument to evaluate sin for
 652  * \result Sin(x)
 653  *
 654  * \note This function might be superficially meaningless, but it helps us to
 655  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 656  *       outside such code.
 657  */
 658 static inline double sin(double x)
 659 {
 660     return std::sin(x);
 661 }
 662
 663 /*! \brief Double cos.
 664  *
 665  * \param x The argument to evaluate cos for
 666  * \result Cos(x)
 667  *
 668  * \note This function might be superficially meaningless, but it helps us to
 669  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 670  *       outside such code.
 671  */
 672 static inline double cos(double x)
 673 {
 674     return std::cos(x);
 675 }
 676
 677 /*! \brief Double tan.
 678  *
 679  * \param x The argument to evaluate tan for
 680  * \result Tan(x)
 681  *
 682  * \note This function might be superficially meaningless, but it helps us to
 683  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 684  *       outside such code.
 685  */
 686 static inline double tan(double x)
 687 {
 688     return std::tan(x);
 689 }
 690
 691 /*! \brief Double asin.
 692  *
 693  * \param x The argument to evaluate asin for
 694  * \result Asin(x)
 695  *
 696  * \note This function might be superficially meaningless, but it helps us to
 697  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 698  *       outside such code.
 699  */
 700 static inline double asin(double x)
 701 {
 702     return std::asin(x);
 703 }
 704
 705 /*! \brief Double acos.
 706  *
 707  * \param x The argument to evaluate acos for
 708  * \result Acos(x)
 709  *
 710  * \note This function might be superficially meaningless, but it helps us to
 711  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 712  *       outside such code.
 713  */
 714 static inline double acos(double x)
 715 {
 716     return std::acos(x);
 717 }
 718
 719 /*! \brief Double atan.
 720  *
 721  * \param x The argument to evaluate atan for
 722  * \result Atan(x)
 723  *
 724  * \note This function might be superficially meaningless, but it helps us to
 725  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 726  *       outside such code.
 727  */
 728 static inline double atan(double x)
 729 {
 730     return std::atan(x);
 731 }
 732
 733 /*! \brief Double atan2(y,x).
 734  *
 735  * \param y Y component of vector, any quartile
 736  * \param x X component of vector, any quartile
 737  * \result Atan(y,x)
 738  *
 739  * \note This function might be superficially meaningless, but it helps us to
 740  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 741  *       outside such code.
 742  */
 743 static inline double atan2(double y, double x)
 744 {
 745     return std::atan2(y, x);
 746 }
 747
 748 /*! \brief Calculate the force correction due to PME analytically in double.
 749  *
 750  * See the SIMD version of this function for details.
 751  *
 752  * \param z2 input parameter
 753  * \returns Correction to use on force
 754  *
 755  * \note This function might be superficially meaningless, but it helps us to
 756  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 757  *       outside such code.
 758  */
 759 static inline double pmeForceCorrection(double z2)
 760 {
 761     const double FN10(-8.0072854618360083154e-14);
 762     const double FN9(1.1859116242260148027e-11);
 763     const double FN8(-8.1490406329798423616e-10);
 764     const double FN7(3.4404793543907847655e-8);
 765     const double FN6(-9.9471420832602741006e-7);
 766     const double FN5(0.000020740315999115847456);
 767     const double FN4(-0.00031991745139313364005);
 768     const double FN3(0.0035074449373659008203);
 769     const double FN2(-0.031750380176100813405);
 770     const double FN1(0.13884101728898463426);
 771     const double FN0(-0.75225277815249618847);
 772
 773     const double FD5(0.000016009278224355026701);
 774     const double FD4(0.00051055686934806966046);
 775     const double FD3(0.0081803507497974289008);
 776     const double FD2(0.077181146026670287235);
 777     const double FD1(0.41543303143712535988);
 778     const double FD0(1.0);
 779
 780     double z4;
 781     double polyFN0, polyFN1, polyFD0, polyFD1;
 782
 783     z4 = z2 * z2;
 784
 785     polyFD1 = fma(FD5, z4, FD3);
 786     polyFD1 = fma(polyFD1, z4, FD1);
 787     polyFD1 = polyFD1 * z2;
 788     polyFD0 = fma(FD4, z4, FD2);
 789     polyFD0 = fma(polyFD0, z4, FD0);
 790     polyFD0 = polyFD0 + polyFD1;
 791
 792     polyFD0 = inv(polyFD0);
 793
 794     polyFN0 = fma(FN10, z4, FN8);
 795     polyFN0 = fma(polyFN0, z4, FN6);
 796     polyFN0 = fma(polyFN0, z4, FN4);
 797     polyFN0 = fma(polyFN0, z4, FN2);
 798     polyFN0 = fma(polyFN0, z4, FN0);
 799     polyFN1 = fma(FN9, z4, FN7);
 800     polyFN1 = fma(polyFN1, z4, FN5);
 801     polyFN1 = fma(polyFN1, z4, FN3);
 802     polyFN1 = fma(polyFN1, z4, FN1);
 803     polyFN0 = fma(polyFN1, z2, polyFN0);
 804
 805     return polyFN0 * polyFD0;
 806 }
 807
 808 /*! \brief Calculate the potential correction due to PME analytically in double.
 809  *
 810  * See the SIMD version of this function for details.
 811  *
 812  * \param z2 input parameter
 813  * \returns Correction to use on potential.
 814  *
 815  * \note This function might be superficially meaningless, but it helps us to
 816  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 817  *       outside such code.
 818  */
 819 static inline double pmePotentialCorrection(double z2)
 820 {
 821     const double VN9(-9.3723776169321855475e-13);
 822     const double VN8(1.2280156762674215741e-10);
 823     const double VN7(-7.3562157912251309487e-9);
 824     const double VN6(2.6215886208032517509e-7);
 825     const double VN5(-4.9532491651265819499e-6);
 826     const double VN4(0.00025907400778966060389);
 827     const double VN3(0.0010585044856156469792);
 828     const double VN2(0.045247661136833092885);
 829     const double VN1(0.11643931522926034421);
 830     const double VN0(1.1283791671726767970);
 831
 832     const double VD5(0.000021784709867336150342);
 833     const double VD4(0.00064293662010911388448);
 834     const double VD3(0.0096311444822588683504);
 835     const double VD2(0.085608012351550627051);
 836     const double VD1(0.43652499166614811084);
 837     const double VD0(1.0);
 838
 839     double z4;
 840     double polyVN0, polyVN1, polyVD0, polyVD1;
 841
 842     z4 = z2 * z2;
 843
 844     polyVD1 = fma(VD5, z4, VD3);
 845     polyVD0 = fma(VD4, z4, VD2);
 846     polyVD1 = fma(polyVD1, z4, VD1);
 847     polyVD0 = fma(polyVD0, z4, VD0);
 848     polyVD0 = fma(polyVD1, z2, polyVD0);
 849
 850     polyVD0 = inv(polyVD0);
 851
 852     polyVN1 = fma(VN9, z4, VN7);
 853     polyVN0 = fma(VN8, z4, VN6);
 854     polyVN1 = fma(polyVN1, z4, VN5);
 855     polyVN0 = fma(polyVN0, z4, VN4);
 856     polyVN1 = fma(polyVN1, z4, VN3);
 857     polyVN0 = fma(polyVN0, z4, VN2);
 858     polyVN1 = fma(polyVN1, z4, VN1);
 859     polyVN0 = fma(polyVN0, z4, VN0);
 860     polyVN0 = fma(polyVN1, z2, polyVN0);
 861
 862     return polyVN0 * polyVD0;
 863 }
 864
 865
 866 /*****************************************************************************
 867  *           Floating point math functions mimicking SIMD versions.          *
 868  *              Double precision data, single precision accuracy.            *
 869  *****************************************************************************/
 870
 871
 872 /*! \brief Calculate 1/sqrt(x) for double, but with single accuracy.
 873  *
 874  *  \param x Argument that must be >0. This routine does not check arguments.
 875  *  \return 1/sqrt(x). Result is undefined if your argument was invalid.
 876  *
 877  * \note This function might be superficially meaningless, but it helps us to
 878  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 879  *       outside such code.
 880  */
 881 static inline double invsqrtSingleAccuracy(double x)
 882 {
 883     return invsqrt(static_cast<float>(x));
 884 }
 885
 886 /*! \brief Calculate 1/sqrt(x) for two doubles, but with single accuracy.
 887  *
 888  * \param x0  First argument, x0 must be positive - no argument checking.
 889  * \param x1  Second argument, x1 must be positive - no argument checking.
 890  * \param[out] out0  Result 1/sqrt(x0)
 891  * \param[out] out1  Result 1/sqrt(x1)
 892  *
 893  * \note This function might be superficially meaningless, but it helps us to
 894  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 895  *       outside such code.
 896  */
 897 static inline void invsqrtPairSingleAccuracy(double x0, double x1, double* out0, double* out1)
 898 {
 899     *out0 = invsqrt(static_cast<float>(x0));
 900     *out1 = invsqrt(static_cast<float>(x1));
 901 }
 902
 903 /*! \brief Calculate 1/x for double, but with single accuracy.
 904  *
 905  *  \param x Argument that must be nonzero. This routine does not check arguments.
 906  *  \return 1/x. Result is undefined if your argument was invalid.
 907  *
 908  * \note This function might be superficially meaningless, but it helps us to
 909  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 910  *       outside such code.
 911  */
 912 static inline double invSingleAccuracy(double x)
 913 {
 914     return 1.0F / x;
 915 }
 916
 917 /*! \brief Calculate 1/sqrt(x) for masked entry of double, but with single accuracy.
 918  *
 919  *  This routine only evaluates 1/sqrt(x) if mask is true.
 920  *  Illegal values for a masked-out double will not lead to
 921  *  floating-point exceptions.
 922  *
 923  *  \param x Argument that must be >0 if masked-in.
 924  *  \param m Mask
 925  *  \return 1/sqrt(x). Result is undefined if your argument was invalid or
 926  *          entry was not masked, and 0.0 for masked-out entries.
 927  *
 928  * \note This function might be superficially meaningless, but it helps us to
 929  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 930  *       outside such code.
 931  */
 932 static inline double maskzInvsqrtSingleAccuracy(double x, bool m)
 933 {
 934     return m ? invsqrtSingleAccuracy(x) : 0.0;
 935 }
 936
 937 /*! \brief Calculate 1/x for masked entry of double, but with single accuracy.
 938  *
 939  *  This routine only evaluates 1/x if mask is true.
 940  *  Illegal values for a masked-out double will not lead to
 941  *  floating-point exceptions.
 942  *
 943  *  \param x Argument that must be nonzero if masked-in.
 944  *  \param m Mask
 945  *  \return 1/x. Result is undefined if your argument was invalid or
 946  *          entry was not masked, and 0.0 for masked-out entries.
 947  *
 948  * \note This function might be superficially meaningless, but it helps us to
 949  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 950  *       outside such code.
 951  */
 952 static inline double maskzInvSingleAccuracy(double x, bool m)
 953 {
 954     return m ? invSingleAccuracy(x) : 0.0;
 955 }
 956
 957 /*! \brief Calculate sqrt(x) for double, but with single accuracy.
 958  *
 959  *  \param x Argument that must be >=0.
 960  *  \return sqrt(x).
 961  *
 962  * \note This function might be superficially meaningless, but it helps us to
 963  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 964  *       outside such code.
 965  */
 966 static inline double sqrtSingleAccuracy(double x)
 967 {
 968     return std::sqrt(static_cast<float>(x));
 969 }
 970
 971 /*! \brief Double log(x), but with single accuracy. This is the natural logarithm.
 972  *
 973  * \param x Argument, should be >0.
 974  * \result The natural logarithm of x. Undefined if argument is invalid.
 975  *
 976  * \note This function might be superficially meaningless, but it helps us to
 977  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 978  *       outside such code.
 979  */
 980 static inline double logSingleAccuracy(double x)
 981 {
 982     return std::log(static_cast<float>(x));
 983 }
 984
 985 /*! \brief Double 2^x, but with single accuracy.
 986  *
 987  * \param x Argument.
 988  * \result 2^x. Undefined if input argument caused overflow.
 989  *
 990  * \note This function might be superficially meaningless, but it helps us to
 991  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 992  *       outside such code.
 993  */
 994 static inline double exp2SingleAccuracy(double x)
 995 {
 996     return std::exp2(static_cast<float>(x));
 997 }
 998
 999 /*! \brief Double exp(x), but with single accuracy.
1000  *
1001  * \param x Argument.
1002  * \result exp(x). Undefined if input argument caused overflow.
1003  *
1004  * \note This function might be superficially meaningless, but it helps us to
1005  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1006  *       outside such code.
1007  */
1008 static inline double expSingleAccuracy(double x)
1009 {
1010     return std::exp(static_cast<float>(x));
1011 }
1012
1013 /*! \brief Double erf(x), but with single accuracy.
1014  *
1015  * \param x Argument.
1016  * \result erf(x)
1017  *
1018  * \note This function might be superficially meaningless, but it helps us to
1019  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1020  *       outside such code.
1021  */
1022 static inline double erfSingleAccuracy(double x)
1023 {
1024     return std::erf(static_cast<float>(x));
1025 }
1026
1027 /*! \brief Double erfc(x), but with single accuracy.
1028  *
1029  * \param x Argument.
1030  * \result erfc(x)
1031  *
1032  * \note This function might be superficially meaningless, but it helps us to
1033  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1034  *       outside such code.
1035  */
1036 static inline double erfcSingleAccuracy(double x)
1037 {
1038     return std::erfc(static_cast<float>(x));
1039 }
1040
1041
1042 /*! \brief Double sin \& cos, but with single accuracy.
1043  *
1044  * \param x The argument to evaluate sin/cos for
1045  * \param[out] sinval Sin(x)
1046  * \param[out] cosval Cos(x)
1047  *
1048  * \note This function might be superficially meaningless, but it helps us to
1049  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1050  *       outside such code.
1051  */
1052 static inline void sincosSingleAccuracy(double x, double* sinval, double* cosval)
1053 {
1054     // There is no single-precision sincos guaranteed in C++11, so use
1055     // separate functions and hope the compiler optimizes it for us.
1056     *sinval = std::sin(static_cast<float>(x));
1057     *cosval = std::cos(static_cast<float>(x));
1058 }
1059
1060 /*! \brief Double sin, but with single accuracy.
1061  *
1062  * \param x The argument to evaluate sin for
1063  * \result Sin(x)
1064  *
1065  * \note This function might be superficially meaningless, but it helps us to
1066  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1067  *       outside such code.
1068  */
1069 static inline double sinSingleAccuracy(double x)
1070 {
1071     return std::sin(static_cast<float>(x));
1072 }
1073
1074 /*! \brief Double cos, but with single accuracy.
1075  *
1076  * \param x The argument to evaluate cos for
1077  * \result Cos(x)
1078  *
1079  * \note This function might be superficially meaningless, but it helps us to
1080  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1081  *       outside such code.
1082  */
1083 static inline double cosSingleAccuracy(double x)
1084 {
1085     return std::cos(static_cast<float>(x));
1086 }
1087
1088 /*! \brief Double tan, but with single accuracy.
1089  *
1090  * \param x The argument to evaluate tan for
1091  * \result Tan(x)
1092  *
1093  * \note This function might be superficially meaningless, but it helps us to
1094  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1095  *       outside such code.
1096  */
1097 static inline double tanSingleAccuracy(double x)
1098 {
1099     return std::tan(static_cast<float>(x));
1100 }
1101
1102 /*! \brief Double asin, but with single accuracy.
1103  *
1104  * \param x The argument to evaluate asin for
1105  * \result Asin(x)
1106  *
1107  * \note This function might be superficially meaningless, but it helps us to
1108  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1109  *       outside such code.
1110  */
1111 static inline double asinSingleAccuracy(double x)
1112 {
1113     return std::asin(static_cast<float>(x));
1114 }
1115
1116 /*! \brief Double acos, but with single accuracy.
1117  *
1118  * \param x The argument to evaluate acos for
1119  * \result Acos(x)
1120  *
1121  * \note This function might be superficially meaningless, but it helps us to
1122  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1123  *       outside such code.
1124  */
1125 static inline double acosSingleAccuracy(double x)
1126 {
1127     return std::acos(static_cast<float>(x));
1128 }
1129
1130 /*! \brief Double atan, but with single accuracy.
1131  *
1132  * \param x The argument to evaluate atan for
1133  * \result Atan(x)
1134  *
1135  * \note This function might be superficially meaningless, but it helps us to
1136  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1137  *       outside such code.
1138  */
1139 static inline double atanSingleAccuracy(double x)
1140 {
1141     return std::atan(static_cast<float>(x));
1142 }
1143
1144 /*! \brief Double atan2(y,x), but with single accuracy.
1145  *
1146  * \param y Y component of vector, any quartile
1147  * \param x X component of vector, any quartile
1148  * \result Atan(y,x)
1149  *
1150  * \note This function might be superficially meaningless, but it helps us to
1151  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1152  *       outside such code.
1153  */
1154 static inline double atan2SingleAccuracy(double y, double x)
1155 {
1156     return std::atan2(static_cast<float>(y), static_cast<float>(x));
1157 }
1158
1159 /*! \brief Force correction due to PME in double, but with single accuracy.
1160  *
1161  * See the SIMD version of this function for details.
1162  *
1163  * \param z2 input parameter
1164  * \returns Correction to use on force
1165  *
1166  * \note This function might be superficially meaningless, but it helps us to
1167  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1168  *       outside such code.
1169  */
1170 static inline double pmeForceCorrectionSingleAccuracy(double z2)
1171 {
1172     const float FN6(-1.7357322914161492954e-8F);
1173     const float FN5(1.4703624142580877519e-6F);
1174     const float FN4(-0.000053401640219807709149F);
1175     const float FN3(0.0010054721316683106153F);
1176     const float FN2(-0.019278317264888380590F);
1177     const float FN1(0.069670166153766424023F);
1178     const float FN0(-0.75225204789749321333F);
1179
1180     const float FD4(0.0011193462567257629232F);
1181     const float FD3(0.014866955030185295499F);
1182     const float FD2(0.11583842382862377919F);
1183     const float FD1(0.50736591960530292870F);
1184     const float FD0(1.0F);
1185
1186     float z4;
1187     float polyFN0, polyFN1, polyFD0, polyFD1;
1188
1189     float z2f = z2;
1190
1191     z4 = z2f * z2f;
1192
1193     polyFD0 = fma(FD4, z4, FD2);
1194     polyFD1 = fma(FD3, z4, FD1);
1195     polyFD0 = fma(polyFD0, z4, FD0);
1196     polyFD0 = fma(polyFD1, z2f, polyFD0);
1197
1198     polyFN0 = fma(FN6, z4, FN4);
1199     polyFN1 = fma(FN5, z4, FN3);
1200     polyFN0 = fma(polyFN0, z4, FN2);
1201     polyFN1 = fma(polyFN1, z4, FN1);
1202     polyFN0 = fma(polyFN0, z4, FN0);
1203     polyFN0 = fma(polyFN1, z2f, polyFN0);
1204
1205     return polyFN0 / polyFD0;
1206 }
1207
1208 /*! \brief Potential correction due to PME in double, but with single accuracy.
1209  *
1210  * See the SIMD version of this function for details.
1211  *
1212  * \param z2 input parameter
1213  * \returns Correction to use on potential.
1214  *
1215  * \note This function might be superficially meaningless, but it helps us to
1216  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1217  *       outside such code.
1218  */
1219 static inline double pmePotentialCorrectionSingleAccuracy(double z2)
1220 {
1221     const float VN6(1.9296833005951166339e-8F);
1222     const float VN5(-1.4213390571557850962e-6F);
1223     const float VN4(0.000041603292906656984871F);
1224     const float VN3(-0.00013134036773265025626F);
1225     const float VN2(0.038657983986041781264F);
1226     const float VN1(0.11285044772717598220F);
1227     const float VN0(1.1283802385263030286F);
1228
1229     const float VD3(0.0066752224023576045451F);
1230     const float VD2(0.078647795836373922256F);
1231     const float VD1(0.43336185284710920150F);
1232     const float VD0(1.0F);
1233
1234     float z4;
1235     float polyVN0, polyVN1, polyVD0, polyVD1;
1236
1237     float z2f = z2;
1238
1239     z4 = z2f * z2f;
1240
1241     polyVD1 = fma(VD3, z4, VD1);
1242     polyVD0 = fma(VD2, z4, VD0);
1243     polyVD0 = fma(polyVD1, z2f, polyVD0);
1244
1245     polyVN0 = fma(VN6, z4, VN4);
1246     polyVN1 = fma(VN5, z4, VN3);
1247     polyVN0 = fma(polyVN0, z4, VN2);
1248     polyVN1 = fma(polyVN1, z4, VN1);
1249     polyVN0 = fma(polyVN0, z4, VN0);
1250     polyVN0 = fma(polyVN1, z2f, polyVN0);
1251
1252     return polyVN0 / polyVD0;
1253 }
1254
1255
1256 } // namespace gmx
1257
1258
1259 #endif // GMX_SIMD_SCALAR_MATH_H