2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2016,2017,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 #ifndef GMX_SIMD_SCALAR_MATH_H
36 #define GMX_SIMD_SCALAR_MATH_H
40 #include "gromacs/math/functions.h"
41 #include "gromacs/math/utilities.h"
42 #include "gromacs/simd/scalar/scalar.h"
44 /*! \libinternal \file
46 * \brief Scalar math functions mimicking GROMACS SIMD math functions
48 * These versions make it possible to write functions that are templated with
49 * either a SIMD or scalar type. While some of these functions might not appear
50 * SIMD-specific, we have placed them here because the only reason to use these
51 * instead of generic function is in templated combined SIMD/non-SIMD code.
52 * It is important that these functions match the SIMD versions exactly in their
53 * arguments and template arguments so that overload resolution works correctly.
55 * \author Erik Lindahl <erik.lindahl@gmail.com>
58 * \ingroup module_simd
64 /*****************************************************************************
65 * Single-precision floating point math functions mimicking SIMD versions *
66 *****************************************************************************/
69 /*! \brief Composes single value with the magnitude of x and the sign of y.
71 * \param x Value to set sign for
72 * \param y Value used to set sign
73 * \return Magnitude of x, sign of y
75 * \note This function might be superficially meaningless, but it helps us to
76 * write templated SIMD/non-SIMD code. For clarity it should not be used
79 static inline float copysign(float x, float y)
81 return std::copysign(x, y);
84 // invsqrt(x) is already defined in math/functions.h
86 /*! \brief Calculate 1/sqrt(x) for two floats.
88 * \param x0 First argument, x0 must be positive - no argument checking.
89 * \param x1 Second argument, x1 must be positive - no argument checking.
90 * \param[out] out0 Result 1/sqrt(x0)
91 * \param[out] out1 Result 1/sqrt(x1)
93 * \note This function might be superficially meaningless, but it helps us to
94 * write templated SIMD/non-SIMD code. For clarity it should not be used
97 static inline void invsqrtPair(float x0, float x1, float* out0, float* out1)
103 /*! \brief Calculate 1/x for float.
105 * \param x Argument that must be nonzero. This routine does not check arguments.
106 * \return 1/x. Result is undefined if your argument was invalid.
108 * \note This function might be superficially meaningless, but it helps us to
109 * write templated SIMD/non-SIMD code. For clarity it should not be used
112 static inline float inv(float x)
117 /*! \brief Calculate 1/sqrt(x) for masked entry of float.
119 * This routine only evaluates 1/sqrt(x) if mask is true.
120 * Illegal values for a masked-out float will not lead to
121 * floating-point exceptions.
123 * \param x Argument that must be >0 if masked-in.
125 * \return 1/sqrt(x). Result is undefined if your argument was invalid or
126 * entry was not masked, and 0.0 for masked-out entries.
128 * \note This function might be superficially meaningless, but it helps us to
129 * write templated SIMD/non-SIMD code. For clarity it should not be used
132 static inline float maskzInvsqrt(float x, bool m)
134 return m ? invsqrt(x) : 0.0F;
137 /*! \brief Calculate 1/x for masked entry of float.
139 * This routine only evaluates 1/x if mask is true.
140 * Illegal values for a masked-out float will not lead to
141 * floating-point exceptions.
143 * \param x Argument that must be nonzero if masked-in.
145 * \return 1/x. Result is undefined if your argument was invalid or
146 * entry was not masked, and 0.0 for masked-out entries.
148 * \note This function might be superficially meaningless, but it helps us to
149 * write templated SIMD/non-SIMD code. For clarity it should not be used
152 static inline float maskzInv(float x, bool m)
154 return m ? inv(x) : 0.0F;
157 /*! \brief Float sqrt(x). This is the square root.
159 * \param x Argument, should be >= 0.
160 * \result The square root of x. Undefined if argument is invalid.
162 * \note This function might be superficially meaningless, but it helps us to
163 * write templated SIMD/non-SIMD code. For clarity it should not be used
166 template<MathOptimization opt = MathOptimization::Safe>
167 static inline float sqrt(float x)
172 /*! \brief Float log(x). This is the natural logarithm.
174 * \param x Argument, should be >0.
175 * \result The natural logarithm of x. Undefined if argument is invalid.
177 * \note This function might be superficially meaningless, but it helps us to
178 * write templated SIMD/non-SIMD code. For clarity it should not be used
181 static inline float log(float x)
186 /*! \brief Float 2^x.
189 * \result 2^x. Undefined if input argument caused overflow.
191 * \note This function might be superficially meaningless, but it helps us to
192 * write templated SIMD/non-SIMD code. For clarity it should not be used
195 template<MathOptimization opt = MathOptimization::Safe>
196 static inline float exp2(float x)
201 /*! \brief Float exp(x).
204 * \result exp(x). Undefined if input argument caused overflow.
206 * \note This function might be superficially meaningless, but it helps us to
207 * write templated SIMD/non-SIMD code. For clarity it should not be used
210 template<MathOptimization opt = MathOptimization::Safe>
211 static inline float exp(float x)
216 /*! \brief Float erf(x).
221 * \note This function might be superficially meaningless, but it helps us to
222 * write templated SIMD/non-SIMD code. For clarity it should not be used
225 static inline float erf(float x)
230 /*! \brief Float erfc(x).
235 * \note This function might be superficially meaningless, but it helps us to
236 * write templated SIMD/non-SIMD code. For clarity it should not be used
239 static inline float erfc(float x)
245 /*! \brief Float sin \& cos.
247 * \param x The argument to evaluate sin/cos for
248 * \param[out] sinval Sin(x)
249 * \param[out] cosval Cos(x)
251 * \note This function might be superficially meaningless, but it helps us to
252 * write templated SIMD/non-SIMD code. For clarity it should not be used
255 static inline void sincos(float x, float* sinval, float* cosval)
257 *sinval = std::sin(x);
258 *cosval = std::cos(x);
261 /*! \brief Float sin.
263 * \param x The argument to evaluate sin for
266 * \note This function might be superficially meaningless, but it helps us to
267 * write templated SIMD/non-SIMD code. For clarity it should not be used
270 static inline float sin(float x)
275 /*! \brief Float cos.
277 * \param x The argument to evaluate cos for
280 * \note This function might be superficially meaningless, but it helps us to
281 * write templated SIMD/non-SIMD code. For clarity it should not be used
284 static inline float cos(float x)
289 /*! \brief Float tan.
291 * \param x The argument to evaluate tan for
294 * \note This function might be superficially meaningless, but it helps us to
295 * write templated SIMD/non-SIMD code. For clarity it should not be used
298 static inline float tan(float x)
303 /*! \brief float asin.
305 * \param x The argument to evaluate asin for
308 * \note This function might be superficially meaningless, but it helps us to
309 * write templated SIMD/non-SIMD code. For clarity it should not be used
312 static inline float asin(float x)
317 /*! \brief Float acos.
319 * \param x The argument to evaluate acos for
322 * \note This function might be superficially meaningless, but it helps us to
323 * write templated SIMD/non-SIMD code. For clarity it should not be used
326 static inline float acos(float x)
331 /*! \brief Float atan.
333 * \param x The argument to evaluate atan for
336 * \note This function might be superficially meaningless, but it helps us to
337 * write templated SIMD/non-SIMD code. For clarity it should not be used
340 static inline float atan(float x)
345 /*! \brief Float atan2(y,x).
347 * \param y Y component of vector, any quartile
348 * \param x X component of vector, any quartile
351 * \note This function might be superficially meaningless, but it helps us to
352 * write templated SIMD/non-SIMD code. For clarity it should not be used
355 static inline float atan2(float y, float x)
357 return std::atan2(y, x);
360 /*! \brief Calculate the force correction due to PME analytically in float.
362 * See the SIMD version of this function for details.
364 * \param z2 input parameter
365 * \returns Correction to use on force
367 * \note This function might be superficially meaningless, but it helps us to
368 * write templated SIMD/non-SIMD code. For clarity it should not be used
371 static inline float pmeForceCorrection(float z2)
373 const float FN6(-1.7357322914161492954e-8F);
374 const float FN5(1.4703624142580877519e-6F);
375 const float FN4(-0.000053401640219807709149F);
376 const float FN3(0.0010054721316683106153F);
377 const float FN2(-0.019278317264888380590F);
378 const float FN1(0.069670166153766424023F);
379 const float FN0(-0.75225204789749321333F);
381 const float FD4(0.0011193462567257629232F);
382 const float FD3(0.014866955030185295499F);
383 const float FD2(0.11583842382862377919F);
384 const float FD1(0.50736591960530292870F);
385 const float FD0(1.0F);
388 float polyFN0, polyFN1, polyFD0, polyFD1;
392 polyFD0 = fma(FD4, z4, FD2);
393 polyFD1 = fma(FD3, z4, FD1);
394 polyFD0 = fma(polyFD0, z4, FD0);
395 polyFD0 = fma(polyFD1, z2, polyFD0);
397 polyFN0 = fma(FN6, z4, FN4);
398 polyFN1 = fma(FN5, z4, FN3);
399 polyFN0 = fma(polyFN0, z4, FN2);
400 polyFN1 = fma(polyFN1, z4, FN1);
401 polyFN0 = fma(polyFN0, z4, FN0);
402 polyFN0 = fma(polyFN1, z2, polyFN0);
404 return polyFN0 / polyFD0;
407 /*! \brief Calculate the potential correction due to PME analytically in float.
409 * See the SIMD version of this function for details.
411 * \param z2 input parameter
412 * \returns Correction to use on potential.
414 * \note This function might be superficially meaningless, but it helps us to
415 * write templated SIMD/non-SIMD code. For clarity it should not be used
418 static inline float pmePotentialCorrection(float z2)
420 const float VN6(1.9296833005951166339e-8F);
421 const float VN5(-1.4213390571557850962e-6F);
422 const float VN4(0.000041603292906656984871F);
423 const float VN3(-0.00013134036773265025626F);
424 const float VN2(0.038657983986041781264F);
425 const float VN1(0.11285044772717598220F);
426 const float VN0(1.1283802385263030286F);
428 const float VD3(0.0066752224023576045451F);
429 const float VD2(0.078647795836373922256F);
430 const float VD1(0.43336185284710920150F);
431 const float VD0(1.0F);
434 float polyVN0, polyVN1, polyVD0, polyVD1;
438 polyVD1 = fma(VD3, z4, VD1);
439 polyVD0 = fma(VD2, z4, VD0);
440 polyVD0 = fma(polyVD1, z2, polyVD0);
442 polyVN0 = fma(VN6, z4, VN4);
443 polyVN1 = fma(VN5, z4, VN3);
444 polyVN0 = fma(polyVN0, z4, VN2);
445 polyVN1 = fma(polyVN1, z4, VN1);
446 polyVN0 = fma(polyVN0, z4, VN0);
447 polyVN0 = fma(polyVN1, z2, polyVN0);
449 return polyVN0 / polyVD0;
452 /*****************************************************************************
453 * Double-precision floating point math functions mimicking SIMD versions *
454 *****************************************************************************/
457 /*! \brief Composes double value with the magnitude of x and the sign of y.
459 * \param x Value to set sign for
460 * \param y Value used to set sign
461 * \return Magnitude of x, sign of y
463 * \note This function might be superficially meaningless, but it helps us to
464 * write templated SIMD/non-SIMD code. For clarity it should not be used
467 static inline double copysign(double x, double y)
469 return std::copysign(x, y);
472 // invsqrt(x) is already defined in math/functions.h
474 /*! \brief Calculate 1/sqrt(x) for two doubles.
476 * \param x0 First argument, x0 must be positive - no argument checking.
477 * \param x1 Second argument, x1 must be positive - no argument checking.
478 * \param[out] out0 Result 1/sqrt(x0)
479 * \param[out] out1 Result 1/sqrt(x1)
481 * \note This function might be superficially meaningless, but it helps us to
482 * write templated SIMD/non-SIMD code. For clarity it should not be used
485 static inline void invsqrtPair(double x0, double x1, double* out0, double* out1)
491 /*! \brief Calculate 1/x for double.
493 * \param x Argument that must be nonzero. This routine does not check arguments.
494 * \return 1/x. Result is undefined if your argument was invalid.
496 * \note This function might be superficially meaningless, but it helps us to
497 * write templated SIMD/non-SIMD code. For clarity it should not be used
500 static inline double inv(double x)
505 /*! \brief Calculate 1/sqrt(x) for masked entry of double.
507 * This routine only evaluates 1/sqrt(x) if mask is true.
508 * Illegal values for a masked-out double will not lead to
509 * floating-point exceptions.
511 * \param x Argument that must be >0 if masked-in.
513 * \return 1/sqrt(x). Result is undefined if your argument was invalid or
514 * entry was not masked, and 0.0 for masked-out entries.
516 * \note This function might be superficially meaningless, but it helps us to
517 * write templated SIMD/non-SIMD code. For clarity it should not be used
520 static inline double maskzInvsqrt(double x, bool m)
522 return m ? invsqrt(x) : 0.0;
525 /*! \brief Calculate 1/x for masked entry of double.
527 * This routine only evaluates 1/x if mask is true.
528 * Illegal values for a masked-out double will not lead to
529 * floating-point exceptions.
531 * \param x Argument that must be nonzero if masked-in.
533 * \return 1/x. Result is undefined if your argument was invalid or
534 * entry was not masked, and 0.0 for masked-out entries.
536 * \note This function might be superficially meaningless, but it helps us to
537 * write templated SIMD/non-SIMD code. For clarity it should not be used
540 static inline double maskzInv(double x, bool m)
542 return m ? inv(x) : 0.0;
545 /*! \brief Double sqrt(x). This is the square root.
547 * \param x Argument, should be >= 0.
548 * \result The square root of x. Undefined if argument is invalid.
550 * \note This function might be superficially meaningless, but it helps us to
551 * write templated SIMD/non-SIMD code. For clarity it should not be used
554 template<MathOptimization opt = MathOptimization::Safe>
555 static inline double sqrt(double x)
560 /*! \brief Double log(x). This is the natural logarithm.
562 * \param x Argument, should be >0.
563 * \result The natural logarithm of x. Undefined if argument is invalid.
565 * \note This function might be superficially meaningless, but it helps us to
566 * write templated SIMD/non-SIMD code. For clarity it should not be used
569 static inline double log(double x)
574 /*! \brief Double 2^x.
577 * \result 2^x. Undefined if input argument caused overflow.
579 * \note This function might be superficially meaningless, but it helps us to
580 * write templated SIMD/non-SIMD code. For clarity it should not be used
583 template<MathOptimization opt = MathOptimization::Safe>
584 static inline double exp2(double x)
589 /*! \brief Double exp(x).
592 * \result exp(x). Undefined if input argument caused overflow.
594 * \note This function might be superficially meaningless, but it helps us to
595 * write templated SIMD/non-SIMD code. For clarity it should not be used
598 template<MathOptimization opt = MathOptimization::Safe>
599 static inline double exp(double x)
604 /*! \brief Double erf(x).
609 * \note This function might be superficially meaningless, but it helps us to
610 * write templated SIMD/non-SIMD code. For clarity it should not be used
613 static inline double erf(double x)
618 /*! \brief Double erfc(x).
623 * \note This function might be superficially meaningless, but it helps us to
624 * write templated SIMD/non-SIMD code. For clarity it should not be used
627 static inline double erfc(double x)
633 /*! \brief Double sin \& cos.
635 * \param x The argument to evaluate sin/cos for
636 * \param[out] sinval Sin(x)
637 * \param[out] cosval Cos(x)
639 * \note This function might be superficially meaningless, but it helps us to
640 * write templated SIMD/non-SIMD code. For clarity it should not be used
643 static inline void sincos(double x, double* sinval, double* cosval)
645 *sinval = std::sin(x);
646 *cosval = std::cos(x);
649 /*! \brief Double sin.
651 * \param x The argument to evaluate sin for
654 * \note This function might be superficially meaningless, but it helps us to
655 * write templated SIMD/non-SIMD code. For clarity it should not be used
658 static inline double sin(double x)
663 /*! \brief Double cos.
665 * \param x The argument to evaluate cos for
668 * \note This function might be superficially meaningless, but it helps us to
669 * write templated SIMD/non-SIMD code. For clarity it should not be used
672 static inline double cos(double x)
677 /*! \brief Double tan.
679 * \param x The argument to evaluate tan for
682 * \note This function might be superficially meaningless, but it helps us to
683 * write templated SIMD/non-SIMD code. For clarity it should not be used
686 static inline double tan(double x)
691 /*! \brief Double asin.
693 * \param x The argument to evaluate asin for
696 * \note This function might be superficially meaningless, but it helps us to
697 * write templated SIMD/non-SIMD code. For clarity it should not be used
700 static inline double asin(double x)
705 /*! \brief Double acos.
707 * \param x The argument to evaluate acos for
710 * \note This function might be superficially meaningless, but it helps us to
711 * write templated SIMD/non-SIMD code. For clarity it should not be used
714 static inline double acos(double x)
719 /*! \brief Double atan.
721 * \param x The argument to evaluate atan for
724 * \note This function might be superficially meaningless, but it helps us to
725 * write templated SIMD/non-SIMD code. For clarity it should not be used
728 static inline double atan(double x)
733 /*! \brief Double atan2(y,x).
735 * \param y Y component of vector, any quartile
736 * \param x X component of vector, any quartile
739 * \note This function might be superficially meaningless, but it helps us to
740 * write templated SIMD/non-SIMD code. For clarity it should not be used
743 static inline double atan2(double y, double x)
745 return std::atan2(y, x);
748 /*! \brief Calculate the force correction due to PME analytically in double.
750 * See the SIMD version of this function for details.
752 * \param z2 input parameter
753 * \returns Correction to use on force
755 * \note This function might be superficially meaningless, but it helps us to
756 * write templated SIMD/non-SIMD code. For clarity it should not be used
759 static inline double pmeForceCorrection(double z2)
761 const double FN10(-8.0072854618360083154e-14);
762 const double FN9(1.1859116242260148027e-11);
763 const double FN8(-8.1490406329798423616e-10);
764 const double FN7(3.4404793543907847655e-8);
765 const double FN6(-9.9471420832602741006e-7);
766 const double FN5(0.000020740315999115847456);
767 const double FN4(-0.00031991745139313364005);
768 const double FN3(0.0035074449373659008203);
769 const double FN2(-0.031750380176100813405);
770 const double FN1(0.13884101728898463426);
771 const double FN0(-0.75225277815249618847);
773 const double FD5(0.000016009278224355026701);
774 const double FD4(0.00051055686934806966046);
775 const double FD3(0.0081803507497974289008);
776 const double FD2(0.077181146026670287235);
777 const double FD1(0.41543303143712535988);
778 const double FD0(1.0);
781 double polyFN0, polyFN1, polyFD0, polyFD1;
785 polyFD1 = fma(FD5, z4, FD3);
786 polyFD1 = fma(polyFD1, z4, FD1);
787 polyFD1 = polyFD1 * z2;
788 polyFD0 = fma(FD4, z4, FD2);
789 polyFD0 = fma(polyFD0, z4, FD0);
790 polyFD0 = polyFD0 + polyFD1;
792 polyFD0 = inv(polyFD0);
794 polyFN0 = fma(FN10, z4, FN8);
795 polyFN0 = fma(polyFN0, z4, FN6);
796 polyFN0 = fma(polyFN0, z4, FN4);
797 polyFN0 = fma(polyFN0, z4, FN2);
798 polyFN0 = fma(polyFN0, z4, FN0);
799 polyFN1 = fma(FN9, z4, FN7);
800 polyFN1 = fma(polyFN1, z4, FN5);
801 polyFN1 = fma(polyFN1, z4, FN3);
802 polyFN1 = fma(polyFN1, z4, FN1);
803 polyFN0 = fma(polyFN1, z2, polyFN0);
805 return polyFN0 * polyFD0;
808 /*! \brief Calculate the potential correction due to PME analytically in double.
810 * See the SIMD version of this function for details.
812 * \param z2 input parameter
813 * \returns Correction to use on potential.
815 * \note This function might be superficially meaningless, but it helps us to
816 * write templated SIMD/non-SIMD code. For clarity it should not be used
819 static inline double pmePotentialCorrection(double z2)
821 const double VN9(-9.3723776169321855475e-13);
822 const double VN8(1.2280156762674215741e-10);
823 const double VN7(-7.3562157912251309487e-9);
824 const double VN6(2.6215886208032517509e-7);
825 const double VN5(-4.9532491651265819499e-6);
826 const double VN4(0.00025907400778966060389);
827 const double VN3(0.0010585044856156469792);
828 const double VN2(0.045247661136833092885);
829 const double VN1(0.11643931522926034421);
830 const double VN0(1.1283791671726767970);
832 const double VD5(0.000021784709867336150342);
833 const double VD4(0.00064293662010911388448);
834 const double VD3(0.0096311444822588683504);
835 const double VD2(0.085608012351550627051);
836 const double VD1(0.43652499166614811084);
837 const double VD0(1.0);
840 double polyVN0, polyVN1, polyVD0, polyVD1;
844 polyVD1 = fma(VD5, z4, VD3);
845 polyVD0 = fma(VD4, z4, VD2);
846 polyVD1 = fma(polyVD1, z4, VD1);
847 polyVD0 = fma(polyVD0, z4, VD0);
848 polyVD0 = fma(polyVD1, z2, polyVD0);
850 polyVD0 = inv(polyVD0);
852 polyVN1 = fma(VN9, z4, VN7);
853 polyVN0 = fma(VN8, z4, VN6);
854 polyVN1 = fma(polyVN1, z4, VN5);
855 polyVN0 = fma(polyVN0, z4, VN4);
856 polyVN1 = fma(polyVN1, z4, VN3);
857 polyVN0 = fma(polyVN0, z4, VN2);
858 polyVN1 = fma(polyVN1, z4, VN1);
859 polyVN0 = fma(polyVN0, z4, VN0);
860 polyVN0 = fma(polyVN1, z2, polyVN0);
862 return polyVN0 * polyVD0;
866 /*****************************************************************************
867 * Floating point math functions mimicking SIMD versions. *
868 * Double precision data, single precision accuracy. *
869 *****************************************************************************/
872 /*! \brief Calculate 1/sqrt(x) for double, but with single accuracy.
874 * \param x Argument that must be >0. This routine does not check arguments.
875 * \return 1/sqrt(x). Result is undefined if your argument was invalid.
877 * \note This function might be superficially meaningless, but it helps us to
878 * write templated SIMD/non-SIMD code. For clarity it should not be used
881 static inline double invsqrtSingleAccuracy(double x)
883 return invsqrt(static_cast<float>(x));
886 /*! \brief Calculate 1/sqrt(x) for two doubles, but with single accuracy.
888 * \param x0 First argument, x0 must be positive - no argument checking.
889 * \param x1 Second argument, x1 must be positive - no argument checking.
890 * \param[out] out0 Result 1/sqrt(x0)
891 * \param[out] out1 Result 1/sqrt(x1)
893 * \note This function might be superficially meaningless, but it helps us to
894 * write templated SIMD/non-SIMD code. For clarity it should not be used
897 static inline void invsqrtPairSingleAccuracy(double x0, double x1, double* out0, double* out1)
899 *out0 = invsqrt(static_cast<float>(x0));
900 *out1 = invsqrt(static_cast<float>(x1));
903 /*! \brief Calculate 1/x for double, but with single accuracy.
905 * \param x Argument that must be nonzero. This routine does not check arguments.
906 * \return 1/x. Result is undefined if your argument was invalid.
908 * \note This function might be superficially meaningless, but it helps us to
909 * write templated SIMD/non-SIMD code. For clarity it should not be used
912 static inline double invSingleAccuracy(double x)
917 /*! \brief Calculate 1/sqrt(x) for masked entry of double, but with single accuracy.
919 * This routine only evaluates 1/sqrt(x) if mask is true.
920 * Illegal values for a masked-out double will not lead to
921 * floating-point exceptions.
923 * \param x Argument that must be >0 if masked-in.
925 * \return 1/sqrt(x). Result is undefined if your argument was invalid or
926 * entry was not masked, and 0.0 for masked-out entries.
928 * \note This function might be superficially meaningless, but it helps us to
929 * write templated SIMD/non-SIMD code. For clarity it should not be used
932 static inline double maskzInvsqrtSingleAccuracy(double x, bool m)
934 return m ? invsqrtSingleAccuracy(x) : 0.0;
937 /*! \brief Calculate 1/x for masked entry of double, but with single accuracy.
939 * This routine only evaluates 1/x if mask is true.
940 * Illegal values for a masked-out double will not lead to
941 * floating-point exceptions.
943 * \param x Argument that must be nonzero if masked-in.
945 * \return 1/x. Result is undefined if your argument was invalid or
946 * entry was not masked, and 0.0 for masked-out entries.
948 * \note This function might be superficially meaningless, but it helps us to
949 * write templated SIMD/non-SIMD code. For clarity it should not be used
952 static inline double maskzInvSingleAccuracy(double x, bool m)
954 return m ? invSingleAccuracy(x) : 0.0;
957 /*! \brief Calculate sqrt(x) for double, but with single accuracy.
959 * \param x Argument that must be >=0.
962 * \note This function might be superficially meaningless, but it helps us to
963 * write templated SIMD/non-SIMD code. For clarity it should not be used
966 static inline double sqrtSingleAccuracy(double x)
968 return std::sqrt(static_cast<float>(x));
971 /*! \brief Double log(x), but with single accuracy. This is the natural logarithm.
973 * \param x Argument, should be >0.
974 * \result The natural logarithm of x. Undefined if argument is invalid.
976 * \note This function might be superficially meaningless, but it helps us to
977 * write templated SIMD/non-SIMD code. For clarity it should not be used
980 static inline double logSingleAccuracy(double x)
982 return std::log(static_cast<float>(x));
985 /*! \brief Double 2^x, but with single accuracy.
988 * \result 2^x. Undefined if input argument caused overflow.
990 * \note This function might be superficially meaningless, but it helps us to
991 * write templated SIMD/non-SIMD code. For clarity it should not be used
994 static inline double exp2SingleAccuracy(double x)
996 return std::exp2(static_cast<float>(x));
999 /*! \brief Double exp(x), but with single accuracy.
1001 * \param x Argument.
1002 * \result exp(x). Undefined if input argument caused overflow.
1004 * \note This function might be superficially meaningless, but it helps us to
1005 * write templated SIMD/non-SIMD code. For clarity it should not be used
1006 * outside such code.
1008 static inline double expSingleAccuracy(double x)
1010 return std::exp(static_cast<float>(x));
1013 /*! \brief Double erf(x), but with single accuracy.
1015 * \param x Argument.
1018 * \note This function might be superficially meaningless, but it helps us to
1019 * write templated SIMD/non-SIMD code. For clarity it should not be used
1020 * outside such code.
1022 static inline double erfSingleAccuracy(double x)
1024 return std::erf(static_cast<float>(x));
1027 /*! \brief Double erfc(x), but with single accuracy.
1029 * \param x Argument.
1032 * \note This function might be superficially meaningless, but it helps us to
1033 * write templated SIMD/non-SIMD code. For clarity it should not be used
1034 * outside such code.
1036 static inline double erfcSingleAccuracy(double x)
1038 return std::erfc(static_cast<float>(x));
1042 /*! \brief Double sin \& cos, but with single accuracy.
1044 * \param x The argument to evaluate sin/cos for
1045 * \param[out] sinval Sin(x)
1046 * \param[out] cosval Cos(x)
1048 * \note This function might be superficially meaningless, but it helps us to
1049 * write templated SIMD/non-SIMD code. For clarity it should not be used
1050 * outside such code.
1052 static inline void sincosSingleAccuracy(double x, double* sinval, double* cosval)
1054 // There is no single-precision sincos guaranteed in C++11, so use
1055 // separate functions and hope the compiler optimizes it for us.
1056 *sinval = std::sin(static_cast<float>(x));
1057 *cosval = std::cos(static_cast<float>(x));
1060 /*! \brief Double sin, but with single accuracy.
1062 * \param x The argument to evaluate sin for
1065 * \note This function might be superficially meaningless, but it helps us to
1066 * write templated SIMD/non-SIMD code. For clarity it should not be used
1067 * outside such code.
1069 static inline double sinSingleAccuracy(double x)
1071 return std::sin(static_cast<float>(x));
1074 /*! \brief Double cos, but with single accuracy.
1076 * \param x The argument to evaluate cos for
1079 * \note This function might be superficially meaningless, but it helps us to
1080 * write templated SIMD/non-SIMD code. For clarity it should not be used
1081 * outside such code.
1083 static inline double cosSingleAccuracy(double x)
1085 return std::cos(static_cast<float>(x));
1088 /*! \brief Double tan, but with single accuracy.
1090 * \param x The argument to evaluate tan for
1093 * \note This function might be superficially meaningless, but it helps us to
1094 * write templated SIMD/non-SIMD code. For clarity it should not be used
1095 * outside such code.
1097 static inline double tanSingleAccuracy(double x)
1099 return std::tan(static_cast<float>(x));
1102 /*! \brief Double asin, but with single accuracy.
1104 * \param x The argument to evaluate asin for
1107 * \note This function might be superficially meaningless, but it helps us to
1108 * write templated SIMD/non-SIMD code. For clarity it should not be used
1109 * outside such code.
1111 static inline double asinSingleAccuracy(double x)
1113 return std::asin(static_cast<float>(x));
1116 /*! \brief Double acos, but with single accuracy.
1118 * \param x The argument to evaluate acos for
1121 * \note This function might be superficially meaningless, but it helps us to
1122 * write templated SIMD/non-SIMD code. For clarity it should not be used
1123 * outside such code.
1125 static inline double acosSingleAccuracy(double x)
1127 return std::acos(static_cast<float>(x));
1130 /*! \brief Double atan, but with single accuracy.
1132 * \param x The argument to evaluate atan for
1135 * \note This function might be superficially meaningless, but it helps us to
1136 * write templated SIMD/non-SIMD code. For clarity it should not be used
1137 * outside such code.
1139 static inline double atanSingleAccuracy(double x)
1141 return std::atan(static_cast<float>(x));
1144 /*! \brief Double atan2(y,x), but with single accuracy.
1146 * \param y Y component of vector, any quartile
1147 * \param x X component of vector, any quartile
1150 * \note This function might be superficially meaningless, but it helps us to
1151 * write templated SIMD/non-SIMD code. For clarity it should not be used
1152 * outside such code.
1154 static inline double atan2SingleAccuracy(double y, double x)
1156 return std::atan2(static_cast<float>(y), static_cast<float>(x));
1159 /*! \brief Force correction due to PME in double, but with single accuracy.
1161 * See the SIMD version of this function for details.
1163 * \param z2 input parameter
1164 * \returns Correction to use on force
1166 * \note This function might be superficially meaningless, but it helps us to
1167 * write templated SIMD/non-SIMD code. For clarity it should not be used
1168 * outside such code.
1170 static inline double pmeForceCorrectionSingleAccuracy(double z2)
1172 const float FN6(-1.7357322914161492954e-8F);
1173 const float FN5(1.4703624142580877519e-6F);
1174 const float FN4(-0.000053401640219807709149F);
1175 const float FN3(0.0010054721316683106153F);
1176 const float FN2(-0.019278317264888380590F);
1177 const float FN1(0.069670166153766424023F);
1178 const float FN0(-0.75225204789749321333F);
1180 const float FD4(0.0011193462567257629232F);
1181 const float FD3(0.014866955030185295499F);
1182 const float FD2(0.11583842382862377919F);
1183 const float FD1(0.50736591960530292870F);
1184 const float FD0(1.0F);
1187 float polyFN0, polyFN1, polyFD0, polyFD1;
1193 polyFD0 = fma(FD4, z4, FD2);
1194 polyFD1 = fma(FD3, z4, FD1);
1195 polyFD0 = fma(polyFD0, z4, FD0);
1196 polyFD0 = fma(polyFD1, z2f, polyFD0);
1198 polyFN0 = fma(FN6, z4, FN4);
1199 polyFN1 = fma(FN5, z4, FN3);
1200 polyFN0 = fma(polyFN0, z4, FN2);
1201 polyFN1 = fma(polyFN1, z4, FN1);
1202 polyFN0 = fma(polyFN0, z4, FN0);
1203 polyFN0 = fma(polyFN1, z2f, polyFN0);
1205 return polyFN0 / polyFD0;
1208 /*! \brief Potential correction due to PME in double, but with single accuracy.
1210 * See the SIMD version of this function for details.
1212 * \param z2 input parameter
1213 * \returns Correction to use on potential.
1215 * \note This function might be superficially meaningless, but it helps us to
1216 * write templated SIMD/non-SIMD code. For clarity it should not be used
1217 * outside such code.
1219 static inline double pmePotentialCorrectionSingleAccuracy(double z2)
1221 const float VN6(1.9296833005951166339e-8F);
1222 const float VN5(-1.4213390571557850962e-6F);
1223 const float VN4(0.000041603292906656984871F);
1224 const float VN3(-0.00013134036773265025626F);
1225 const float VN2(0.038657983986041781264F);
1226 const float VN1(0.11285044772717598220F);
1227 const float VN0(1.1283802385263030286F);
1229 const float VD3(0.0066752224023576045451F);
1230 const float VD2(0.078647795836373922256F);
1231 const float VD1(0.43336185284710920150F);
1232 const float VD0(1.0F);
1235 float polyVN0, polyVN1, polyVD0, polyVD1;
1241 polyVD1 = fma(VD3, z4, VD1);
1242 polyVD0 = fma(VD2, z4, VD0);
1243 polyVD0 = fma(polyVD1, z2f, polyVD0);
1245 polyVN0 = fma(VN6, z4, VN4);
1246 polyVN1 = fma(VN5, z4, VN3);
1247 polyVN0 = fma(polyVN0, z4, VN2);
1248 polyVN1 = fma(polyVN1, z4, VN1);
1249 polyVN0 = fma(polyVN0, z4, VN0);
1250 polyVN0 = fma(polyVN1, z2f, polyVN0);
1252 return polyVN0 / polyVD0;
1259 #endif // GMX_SIMD_SCALAR_MATH_H