2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 #ifndef GMX_SIMD_SCALAR_H
36 #define GMX_SIMD_SCALAR_H
44 /*! \libinternal \file
46 * \brief Scalar float functions corresponding to GROMACS SIMD functions
48 * These versions make it possible to write functions that are templated with
49 * either a SIMD or scalar type. While some of these functions might not appear
50 * SIMD-specific, we have placed them here because the only reason to use these
51 * instead of generic function is in templated combined SIMD/non-SIMD code.
53 * There are a handful of limitations, in particular that it is impossible
54 * to overload the bitwise logical operators on built-in types.
56 * \author Erik Lindahl <erik.lindahl@gmail.com>
59 * \ingroup module_simd
65 /************************************************************************
66 * Single-precision floating point functions mimicking SIMD versions *
67 ************************************************************************/
69 /*! \brief Store contents of float variable to aligned memory m.
71 * \param[out] m Pointer to memory.
72 * \param a float variable to store
74 * \note This function might be superficially meaningless, but it helps us to
75 * write templated SIMD/non-SIMD code. For clarity it should not be used
78 static inline void store(float* m, float a)
83 /*! \brief Store contents of float variable to unaligned memory m.
85 * \param[out] m Pointer to memory, no alignment requirement.
86 * \param a float variable to store.
88 * \note This function might be superficially meaningless, but it helps us to
89 * write templated SIMD/non-SIMD code. For clarity it should not be used
92 static inline void storeU(float* m, float a)
97 // We cannot overload the logical operators and, or, andNot, xor for
100 /*! \brief Float Fused-multiply-add. Result is a*b + c.
107 * \note This function might be superficially meaningless, but it helps us to
108 * write templated SIMD/non-SIMD code. For clarity it should not be used
111 static inline float fma(float a, float b, float c)
113 // Note that we purposely do not use the single-rounding std::fma
114 // as that can be very slow without hardware support
118 /*! \brief Float Fused-multiply-subtract. Result is a*b - c.
125 * \note This function might be superficially meaningless, but it helps us to
126 * write templated SIMD/non-SIMD code. For clarity it should not be used
129 static inline float fms(float a, float b, float c)
134 /*! \brief Float Fused-negated-multiply-add. Result is -a*b + c.
141 * \note This function might be superficially meaningless, but it helps us to
142 * write templated SIMD/non-SIMD code. For clarity it should not be used
145 static inline float fnma(float a, float b, float c)
150 /*! \brief Float Fused-negated-multiply-subtract. Result is -a*b - c.
157 * \note This function might be superficially meaningless, but it helps us to
158 * write templated SIMD/non-SIMD code. For clarity it should not be used
161 static inline float fnms(float a, float b, float c)
166 /*! \brief Add two float variables, masked version.
171 * \return a+b where mask is true, a otherwise.
173 * \note This function might be superficially meaningless, but it helps us to
174 * write templated SIMD/non-SIMD code. For clarity it should not be used
177 static inline float maskAdd(float a, float b, float m)
179 return a + (m != 0.0F ? b : 0.0F);
182 /*! \brief Multiply two float variables, masked version.
187 * \return a*b where mask is true, 0.0 otherwise.
189 * \note This function might be superficially meaningless, but it helps us to
190 * write templated SIMD/non-SIMD code. For clarity it should not be used
193 static inline float maskzMul(float a, float b, float m)
195 return m != 0.0F ? (a * b) : 0.0F;
198 /*! \brief Float fused multiply-add, masked version.
204 * \return a*b+c where mask is true, 0.0 otherwise.
206 * \note This function might be superficially meaningless, but it helps us to
207 * write templated SIMD/non-SIMD code. For clarity it should not be used
210 static inline float maskzFma(float a, float b, float c, float m)
212 return m != 0.0F ? (a * b + c) : 0.0F;
215 /*! \brief Float Floating-point abs().
217 * \param a any floating point values
218 * \return abs(a) for each element.
220 * \note This function might be superficially meaningless, but it helps us to
221 * write templated SIMD/non-SIMD code. For clarity it should not be used
224 static inline float abs(float a)
229 /*! \brief Set each float element to the largest from two variables.
231 * \param a Any floating-point value
232 * \param b Any floating-point value
233 * \return max(a,b) for each element.
235 * \note This function might be superficially meaningless, but it helps us to
236 * write templated SIMD/non-SIMD code. For clarity it should not be used
239 static inline float max(float a, float b)
241 return std::max(a, b);
244 /*! \brief Set each float element to the smallest from two variables.
246 * \param a Any floating-point value
247 * \param b Any floating-point value
248 * \return min(a,b) for each element.
250 * \note This function might be superficially meaningless, but it helps us to
251 * write templated SIMD/non-SIMD code. For clarity it should not be used
254 static inline float min(float a, float b)
256 return std::min(a, b);
259 /*! \brief Float round to nearest integer value (in floating-point format).
261 * \param a Any floating-point value
262 * \return The nearest integer, represented in floating-point format.
264 * \note This function might be superficially meaningless, but it helps us to
265 * write templated SIMD/non-SIMD code. For clarity it should not be used
268 static inline float round(float a)
270 return std::round(a);
273 /*! \brief Truncate float, i.e. round towards zero - common hardware instruction.
275 * \param a Any floating-point value
276 * \return Integer rounded towards zero, represented in floating-point format.
278 * \note This function might be superficially meaningless, but it helps us to
279 * write templated SIMD/non-SIMD code. For clarity it should not be used
282 static inline float trunc(float a)
284 return std::trunc(a);
287 /*! \brief Return sum of all elements in float variable (i.e., the variable itself).
289 * \param a variable to reduce/sum.
290 * \return The argument variable itself.
292 * \note This function might be superficially meaningless, but it helps us to
293 * write templated SIMD/non-SIMD code. For clarity it should not be used
296 static inline float reduce(float a)
301 /*! \brief Bitwise andnot for two scalar float variables.
305 * \return (~data1) & data2
307 * \note This function might be superficially meaningless, but it helps us to
308 * write templated SIMD/non-SIMD code. For clarity it should not be used
311 static inline float andNot(float a, float b)
321 conv1.i = (~conv1.i) & conv2.i;
326 /*! \brief Return true if any bits are set in the float variable.
328 * This function is used to handle bitmasks, mainly for exclusions in the
329 * inner kernels. Note that it will return true even for -0.0f (sign bit set),
330 * so it is not identical to not-equal.
333 * \return True if any bit in a is nonzero.
335 * \note This function might be superficially meaningless, but it helps us to
336 * write templated SIMD/non-SIMD code. For clarity it should not be used
339 static inline bool testBits(float a)
347 return (conv.i != 0);
350 /*! \brief Returns if the boolean is true.
352 * \param a Logical variable.
353 * \return true if a is true, otherwise false.
355 * \note This function might be superficially meaningless, but it helps us to
356 * write templated SIMD/non-SIMD code. For clarity it should not be used
359 static inline bool anyTrue(bool a)
364 /*! \brief Select from single precision variable where boolean is true.
366 * \param a Floating-point variable to select from
367 * \param mask Boolean selector
368 * \return a is selected for true, 0 for false.
370 * \note This function might be superficially meaningless, but it helps us to
371 * write templated SIMD/non-SIMD code. For clarity it should not be used
374 static inline float selectByMask(float a, bool mask)
376 return mask ? a : 0.0F;
379 /*! \brief Select from single precision variable where boolean is false.
381 * \param a Floating-point variable to select from
382 * \param mask Boolean selector
383 * \return a is selected for false, 0 for true.
385 * \note This function might be superficially meaningless, but it helps us to
386 * write templated SIMD/non-SIMD code. For clarity it should not be used
389 static inline float selectByNotMask(float a, bool mask)
391 return mask ? 0.0F : a;
394 /*! \brief Blend float selection.
396 * \param a First source
397 * \param b Second source
398 * \param sel Boolean selector
399 * \return Select b if sel is true, a otherwise.
401 * \note This function might be superficially meaningless, but it helps us to
402 * write templated SIMD/non-SIMD code. For clarity it should not be used
405 static inline float blend(float a, float b, bool sel)
410 /*! \brief Round single precision floating point to integer.
413 * \return Integer format, a rounded to nearest integer.
415 * \note This function might be superficially meaningless, but it helps us to
416 * write templated SIMD/non-SIMD code. For clarity it should not be used
419 static inline std::int32_t cvtR2I(float a)
421 return static_cast<std::int32_t>(std::round(a));
424 /*! \brief Truncate single precision floating point to integer.
427 * \return Integer format, a truncated to integer.
429 * \note This function might be superficially meaningless, but it helps us to
430 * write templated SIMD/non-SIMD code. For clarity it should not be used
433 static inline std::int32_t cvttR2I(float a)
435 return static_cast<std::int32_t>(std::trunc(a));
438 /*! \brief Return integer.
440 * This function mimicks the SIMD integer-to-real conversion routines. By
441 * simply returning an integer, we let the compiler sort out whether the
442 * conversion should be to float or double rather than using proxy objects.
445 * \return same value (a)
447 * \note This function might be superficially meaningless, but it helps us to
448 * write templated SIMD/non-SIMD code. For clarity it should not be used
451 static inline std::int32_t cvtI2R(std::int32_t a)
456 /************************************************************************
457 * Double-precision floating point functions mimicking SIMD versions *
458 ************************************************************************/
460 /*! \brief Store contents of double variable to aligned memory m.
462 * \param[out] m Pointer to memory.
463 * \param a double variable to store
465 * \note This function might be superficially meaningless, but it helps us to
466 * write templated SIMD/non-SIMD code. For clarity it should not be used
469 static inline void store(double* m, double a)
474 /*! \brief Store contents of double variable to unaligned memory m.
476 * \param[out] m Pointer to memory, no alignment requirement.
477 * \param a double variable to store.
479 * \note This function might be superficially meaningless, but it helps us to
480 * write templated SIMD/non-SIMD code. For clarity it should not be used
483 static inline void storeU(double* m, double a)
488 // We cannot overload the logical operators and, or, andNot, xor for
491 /*! \brief double Fused-multiply-add. Result is a*b + c.
498 * \note This function might be superficially meaningless, but it helps us to
499 * write templated SIMD/non-SIMD code. For clarity it should not be used
502 static inline double fma(double a, double b, double c)
504 // Note that we purposely do not use the single-rounding std::fma
505 // as that can be very slow without hardware support
509 /*! \brief double Fused-multiply-subtract. Result is a*b - c.
516 * \note This function might be superficially meaningless, but it helps us to
517 * write templated SIMD/non-SIMD code. For clarity it should not be used
520 static inline double fms(double a, double b, double c)
525 /*! \brief double Fused-negated-multiply-add. Result is - a*b + c.
532 * \note This function might be superficially meaningless, but it helps us to
533 * write templated SIMD/non-SIMD code. For clarity it should not be used
536 static inline double fnma(double a, double b, double c)
541 /*! \brief double Fused-negated-multiply-subtract. Result is -a*b - c.
548 * \note This function might be superficially meaningless, but it helps us to
549 * write templated SIMD/non-SIMD code. For clarity it should not be used
552 static inline double fnms(double a, double b, double c)
557 /*! \brief Add two double variables, masked version.
562 * \return a+b where mask is true, a otherwise.
564 * \note This function might be superficially meaningless, but it helps us to
565 * write templated SIMD/non-SIMD code. For clarity it should not be used
568 static inline double maskAdd(double a, double b, double m)
570 return a + (m != 0.0 ? b : 0.0);
573 /*! \brief Multiply two double variables, masked version.
578 * \return a*b where mask is true, 0.0 otherwise.
580 * \note This function might be superficially meaningless, but it helps us to
581 * write templated SIMD/non-SIMD code. For clarity it should not be used
584 static inline double maskzMul(double a, double b, double m)
586 return m != 0.0 ? (a * b) : 0.0;
589 /*! \brief double fused multiply-add, masked version.
595 * \return a*b+c where mask is true, 0.0 otherwise.
597 * \note This function might be superficially meaningless, but it helps us to
598 * write templated SIMD/non-SIMD code. For clarity it should not be used
601 static inline double maskzFma(double a, double b, double c, double m)
603 return m != 0.0 ? (a * b + c) : 0.0;
606 /*! \brief double doubleing-point abs().
608 * \param a any doubleing point values
609 * \return abs(a) for each element.
611 * \note This function might be superficially meaningless, but it helps us to
612 * write templated SIMD/non-SIMD code. For clarity it should not be used
615 static inline double abs(double a)
620 /*! \brief Set each double element to the largest from two variables.
622 * \param a Any doubleing-point value
623 * \param b Any doubleing-point value
624 * \return max(a,b) for each element.
626 * \note This function might be superficially meaningless, but it helps us to
627 * write templated SIMD/non-SIMD code. For clarity it should not be used
630 static inline double max(double a, double b)
632 return std::max(a, b);
635 /*! \brief Set each double element to the smallest from two variables.
637 * \param a Any doubleing-point value
638 * \param b Any doubleing-point value
639 * \return min(a,b) for each element.
641 * \note This function might be superficially meaningless, but it helps us to
642 * write templated SIMD/non-SIMD code. For clarity it should not be used
645 static inline double min(double a, double b)
647 return std::min(a, b);
650 /*! \brief double round to nearest integer value (in doubleing-point format).
652 * \param a Any doubleing-point value
653 * \return The nearest integer, represented in doubleing-point format.
655 * \note This function might be superficially meaningless, but it helps us to
656 * write templated SIMD/non-SIMD code. For clarity it should not be used
659 static inline double round(double a)
661 return std::round(a);
664 /*! \brief Truncate double, i.e. round towards zero - common hardware instruction.
666 * \param a Any doubleing-point value
667 * \return Integer rounded towards zero, represented in doubleing-point format.
669 * \note This function might be superficially meaningless, but it helps us to
670 * write templated SIMD/non-SIMD code. For clarity it should not be used
673 static inline double trunc(double a)
675 return std::trunc(a);
678 /*! \brief Return sum of all elements in double variable (i.e., the variable itself).
680 * \param a variable to reduce/sum.
681 * \return The argument variable itself.
683 * \note This function might be superficially meaningless, but it helps us to
684 * write templated SIMD/non-SIMD code. For clarity it should not be used
687 static inline double reduce(double a)
692 /*! \brief Bitwise andnot for two scalar double variables.
696 * \return (~data1) & data2
698 * \note This function might be superficially meaningless, but it helps us to
699 * write templated SIMD/non-SIMD code. For clarity it should not be used
702 static inline double andNot(double a, double b)
712 conv1.i = (~conv1.i) & conv2.i;
717 /*! \brief Return true if any bits are set in the double variable.
719 * This function is used to handle bitmasks, mainly for exclusions in the
720 * inner kernels. Note that it will return true even for -0.0 (sign bit set),
721 * so it is not identical to not-equal.
724 * \return True if any bit in a is nonzero.
726 * \note This function might be superficially meaningless, but it helps us to
727 * write templated SIMD/non-SIMD code. For clarity it should not be used
730 static inline bool testBits(double a)
738 return (conv.i != 0);
741 /*! \brief Select from double precision variable where boolean is true.
743 * \param a double variable to select from
744 * \param mask Boolean selector
745 * \return a is selected for true, 0 for false.
747 * \note This function might be superficially meaningless, but it helps us to
748 * write templated SIMD/non-SIMD code. For clarity it should not be used
751 static inline double selectByMask(double a, bool mask)
753 return mask ? a : 0.0;
756 /*! \brief Select from double precision variable where boolean is false.
758 * \param a double variable to select from
759 * \param mask Boolean selector
760 * \return a is selected for false, 0 for true.
762 * \note This function might be superficially meaningless, but it helps us to
763 * write templated SIMD/non-SIMD code. For clarity it should not be used
766 static inline double selectByNotMask(double a, bool mask)
768 return mask ? 0.0 : a;
771 /*! \brief Blend double selection.
773 * \param a First source
774 * \param b Second source
775 * \param sel Boolean selector
776 * \return Select b if sel is true, a otherwise.
778 * \note This function might be superficially meaningless, but it helps us to
779 * write templated SIMD/non-SIMD code. For clarity it should not be used
782 static inline double blend(double a, double b, bool sel)
787 /*! \brief Round single precision doubleing point to integer.
790 * \return Integer format, a rounded to nearest integer.
792 * \note This function might be superficially meaningless, but it helps us to
793 * write templated SIMD/non-SIMD code. For clarity it should not be used
796 static inline std::int32_t cvtR2I(double a)
798 return static_cast<std::int32_t>(std::round(a));
801 /*! \brief Truncate single precision doubleing point to integer.
804 * \return Integer format, a truncated to integer.
806 * \note This function might be superficially meaningless, but it helps us to
807 * write templated SIMD/non-SIMD code. For clarity it should not be used
810 static inline std::int32_t cvttR2I(double a)
812 return static_cast<std::int32_t>(std::trunc(a));
815 // We do not have a separate cvtI2R for double, since that would require
816 // proxy objects. Instead, the float version returns an integer and lets the
817 // compiler sort out the conversion type.
820 /*! \brief Convert float to double (mimicks SIMD conversion)
823 * \return a, as double double
825 * \note This function might be superficially meaningless, but it helps us to
826 * write templated SIMD/non-SIMD code. For clarity it should not be used
829 static inline double cvtF2D(float a)
834 /*! \brief Convert double to float (mimicks SIMD conversion)
837 * \return a, as float
839 * \note This function might be superficially meaningless, but it helps us to
840 * write templated SIMD/non-SIMD code. For clarity it should not be used
843 static inline float cvtD2F(double a)
848 /************************************************
849 * Integer functions mimicking SIMD versions *
850 ************************************************/
852 /*! \brief Store contents of integer variable to aligned memory m.
854 * \param[out] m Pointer to memory.
855 * \param a integer variable to store
857 * \note This function might be superficially meaningless, but it helps us to
858 * write templated SIMD/non-SIMD code. For clarity it should not be used
861 static inline void store(std::int32_t* m, std::int32_t a)
866 /*! \brief Store contents of integer variable to unaligned memory m.
868 * \param[out] m Pointer to memory, no alignment requirement.
869 * \param a integer variable to store.
871 * \note This function might be superficially meaningless, but it helps us to
872 * write templated SIMD/non-SIMD code. For clarity it should not be used
875 static inline void storeU(std::int32_t* m, std::int32_t a)
880 /*! \brief Bitwise andnot for two scalar integer variables.
884 * \return (~data1) & data2
886 * \note This function might be superficially meaningless, but it helps us to
887 * write templated SIMD/non-SIMD code. For clarity it should not be used
890 static inline std::int32_t andNot(std::int32_t a, std::int32_t b)
895 /*! \brief Return true if any bits are set in the integer variable.
897 * This function is used to handle bitmasks, mainly for exclusions in the
901 * \return True if any bit in a is nonzero.
903 * \note This function might be superficially meaningless, but it helps us to
904 * write templated SIMD/non-SIMD code. For clarity it should not be used
907 static inline bool testBits(std::int32_t a)
912 /*! \brief Select from integer variable where boolean is true.
914 * \param a Integer variable to select from
915 * \param mask Boolean selector
916 * \return a is selected for true, 0 for false.
918 * \note This function might be superficially meaningless, but it helps us to
919 * write templated SIMD/non-SIMD code. For clarity it should not be used
922 static inline std::int32_t selectByMask(std::int32_t a, bool mask)
927 /*! \brief Select from integer variable where boolean is false.
929 * \param a Integer variable to select from
930 * \param mask Boolean selector
931 * \return a is selected for false, 0 for true.
933 * \note This function might be superficially meaningless, but it helps us to
934 * write templated SIMD/non-SIMD code. For clarity it should not be used
937 static inline std::int32_t selectByNotMask(std::int32_t a, bool mask)
942 /*! \brief Blend integer selection.
944 * \param a First source
945 * \param b Second source
946 * \param sel Boolean selector
947 * \return Select b if sel is true, a otherwise.
949 * \note This function might be superficially meaningless, but it helps us to
950 * write templated SIMD/non-SIMD code. For clarity it should not be used
953 static inline std::int32_t blend(std::int32_t a, std::int32_t b, bool sel)
958 /*! \brief Just return a boolean (mimicks SIMD real-to-int bool conversions)
961 * \return same boolean
963 * \note This function might be superficially meaningless, but it helps us to
964 * write templated SIMD/non-SIMD code. For clarity it should not be used
967 static inline bool cvtB2IB(bool a)
972 /*! \brief Just return a boolean (mimicks SIMD int-to-real bool conversions)
975 * \return same boolean
977 * \note This function might be superficially meaningless, but it helps us to
978 * write templated SIMD/non-SIMD code. For clarity it should not be used
981 static inline bool cvtIB2B(bool a)
989 #endif // GMX_SIMD_SCALAR_FLOAT_H