2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2019,2020,2021, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_SIMD_IMPL_REFERENCE_SIMD_FLOAT_H
37 #define GMX_SIMD_IMPL_REFERENCE_SIMD_FLOAT_H
39 /*! \libinternal \file
41 * \brief Reference implementation, SIMD single precision.
43 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
45 * \ingroup module_simd
58 #include "gromacs/math/utilities.h"
60 #include "impl_reference_definitions.h"
66 /*! \addtogroup module_simd */
69 /*! \name SIMD implementation data types and built-in conversions between types
73 /*! \libinternal \brief Float SIMD variable. Available if GMX_SIMD_HAVE_FLOAT is 1.
75 * \note This variable cannot be placed inside other structures or classes, since
76 * some compilers (including at least clang-3.7) appear to lose the
77 * alignment. This is likely particularly severe when allocating such
78 * memory on the heap, but it occurs for stack structures too.
85 //! \brief Construct from scalar
86 SimdFloat(float f) { simdInternal_.fill(f); }
88 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
90 * This has to be public to enable usage in combination with static inline
91 * functions, but it should never, EVER, be accessed by any code outside
92 * the corresponding implementation directory since the type will depend
93 * on the architecture.
95 std::array<float, GMX_SIMD_FLOAT_WIDTH> simdInternal_;
98 /*! \libinternal \brief Integer SIMD variable type to use for conversions to/from float.
100 * This is also the widest integer SIMD type. Available if GMX_SIMD_HAVE_FLOAT is 1.
102 * \note The integer SIMD type will always be available, but on architectures
103 * that do not have any real integer SIMD support it might be defined as the
104 * floating-point type. This will work fine, since there are separate defines
105 * for whether the implementation can actually do any operations on integer
107 * \note This variable cannot be placed inside other structures or classes, since
108 * some compilers (including at least clang-3.7) appear to lose the
109 * alignment. This is likely particularly severe when allocating such
110 * memory on the heap, but it occurs for stack structures too.
117 //! \brief Construct from scalar
118 SimdFInt32(std::int32_t i) { simdInternal_.fill(i); }
120 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
122 * This has to be public to enable usage in combination with static inline
123 * functions, but it should never, EVER, be accessed by any code outside
124 * the corresponding implementation directory since the type will depend
125 * on the architecture.
127 std::array<std::int32_t, GMX_SIMD_FINT32_WIDTH> simdInternal_;
130 /*! \libinternal \brief Boolean type for float SIMD data.
132 * Available if GMX_SIMD_HAVE_FLOAT is 1.
134 * \note This variable cannot be placed inside other structures or classes, since
135 * some compilers (including at least clang-3.7) appear to lose the
136 * alignment. This is likely particularly severe when allocating such
137 * memory on the heap, but it occurs for stack structures too.
144 //! \brief Construct from scalar
145 SimdFBool(bool b) { simdInternal_.fill(b); }
147 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
149 * This has to be public to enable usage in combination with static inline
150 * functions, but it should never, EVER, be accessed by any code outside
151 * the corresponding implementation directory since the type will depend
152 * on the architecture.
154 std::array<bool, GMX_SIMD_FLOAT_WIDTH> simdInternal_;
157 /*! \libinternal \brief Boolean type for integer datatypes corresponding to float SIMD.
159 * Available if GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
161 * \note This variable cannot be placed inside other structures or classes, since
162 * some compilers (including at least clang-3.7) appear to lose the
163 * alignment. This is likely particularly severe when allocating such
164 * memory on the heap, but it occurs for stack structures too.
171 //! \brief Construct from scalar
172 SimdFIBool(bool b) { simdInternal_.fill(b); }
174 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
176 * This has to be public to enable usage in combination with static inline
177 * functions, but it should never, EVER, be accessed by any code outside
178 * the corresponding implementation directory since the type will depend
179 * on the architecture.
181 std::array<bool, GMX_SIMD_FINT32_WIDTH> simdInternal_;
186 * \name SIMD implementation load/store operations for single precision floating point
190 /*! \brief Load \ref GMX_SIMD_FLOAT_WIDTH float numbers from aligned memory.
192 * \param m Pointer to memory aligned to the SIMD width.
193 * \return SIMD variable with data loaded.
195 static inline SimdFloat gmx_simdcall simdLoad(const float* m, SimdFloatTag = {})
199 assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(float)) == 0);
201 std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
205 /*! \brief Store the contents of SIMD float variable to aligned memory m.
207 * \param[out] m Pointer to memory, aligned to SIMD width.
208 * \param a SIMD variable to store
210 static inline void gmx_simdcall store(float* m, SimdFloat a)
212 assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(float)) == 0);
214 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
217 /*! \brief Load SIMD float from unaligned memory.
219 * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
221 * \param m Pointer to memory, no alignment requirement.
222 * \return SIMD variable with data loaded.
224 static inline SimdFloat gmx_simdcall simdLoadU(const float* m, SimdFloatTag = {})
227 std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
231 /*! \brief Store SIMD float to unaligned memory.
233 * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
235 * \param[out] m Pointer to memory, no alignment requirement.
236 * \param a SIMD variable to store.
238 static inline void gmx_simdcall storeU(float* m, SimdFloat a)
240 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
243 /*! \brief Set all SIMD float variable elements to 0.0.
245 * You should typically just call \ref gmx::setZero(), which uses proxy objects
246 * internally to handle all types rather than adding the suffix used here.
250 static inline SimdFloat gmx_simdcall setZeroF()
252 return SimdFloat(0.0F);
259 * \name SIMD implementation load/store operations for integers (corresponding to float)
263 /*! \brief Load aligned SIMD integer data, width corresponds to \ref gmx::SimdFloat.
265 * You should typically just call \ref gmx::load(), which uses proxy objects
266 * internally to handle all types rather than adding the suffix used here.
268 * \param m Pointer to memory, aligned to (float) integer SIMD width.
269 * \return SIMD integer variable.
271 static inline SimdFInt32 gmx_simdcall simdLoad(const std::int32_t* m, SimdFInt32Tag)
275 assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(std::int32_t)) == 0);
277 std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
281 /*! \brief Store aligned SIMD integer data, width corresponds to \ref gmx::SimdFloat.
283 * \param m Memory aligned to (float) integer SIMD width.
284 * \param a SIMD variable to store.
286 static inline void gmx_simdcall store(std::int32_t* m, SimdFInt32 a)
288 assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(std::int32_t)) == 0);
290 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
293 /*! \brief Load unaligned integer SIMD data, width corresponds to \ref gmx::SimdFloat.
295 * You should typically just call \ref gmx::loadU(), which uses proxy objects
296 * internally to handle all types rather than adding the suffix used here.
298 * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
300 * \param m Pointer to memory, no alignment requirements.
301 * \return SIMD integer variable.
303 static inline SimdFInt32 gmx_simdcall simdLoadU(const std::int32_t* m, SimdFInt32Tag)
306 std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
310 /*! \brief Store unaligned SIMD integer data, width corresponds to \ref gmx::SimdFloat.
312 * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
314 * \param m Memory pointer, no alignment requirements.
315 * \param a SIMD variable to store.
317 static inline void gmx_simdcall storeU(std::int32_t* m, SimdFInt32 a)
319 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
322 /*! \brief Set all SIMD (float) integer variable elements to 0.
324 * You should typically just call \ref gmx::setZero(), which uses proxy objects
325 * internally to handle all types rather than adding the suffix used here.
329 static inline SimdFInt32 gmx_simdcall setZeroFI()
331 return SimdFInt32(0);
334 /*! \brief Extract element with index i from \ref gmx::SimdFInt32.
336 * Available if \ref GMX_SIMD_HAVE_FINT32_EXTRACT is 1.
338 * \tparam index Compile-time constant, position to extract (first position is 0)
339 * \param a SIMD variable from which to extract value.
340 * \return Single integer from position index in SIMD variable.
343 static inline std::int32_t gmx_simdcall extract(SimdFInt32 a)
345 return a.simdInternal_[index];
350 * \name SIMD implementation single precision floating-point bitwise logical operations
354 /*! \brief Bitwise and for two SIMD float variables.
356 * Supported if \ref GMX_SIMD_HAVE_LOGICAL is 1.
360 * \return data1 & data2
362 static inline SimdFloat gmx_simdcall operator&(SimdFloat a, SimdFloat b)
372 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
374 conv1.r = a.simdInternal_[i];
375 conv2.r = b.simdInternal_[i];
376 conv1.i = conv1.i & conv2.i;
377 res.simdInternal_[i] = conv1.r;
382 /*! \brief Bitwise andnot for SIMD float.
384 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
388 * \return (~data1) & data2
390 static inline SimdFloat gmx_simdcall andNot(SimdFloat a, SimdFloat b)
400 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
402 conv1.r = a.simdInternal_[i];
403 conv2.r = b.simdInternal_[i];
404 conv1.i = ~conv1.i & conv2.i;
405 res.simdInternal_[i] = conv1.r;
410 /*! \brief Bitwise or for SIMD float.
412 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
416 * \return data1 | data2
418 static inline SimdFloat gmx_simdcall operator|(SimdFloat a, SimdFloat b)
428 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
430 conv1.r = a.simdInternal_[i];
431 conv2.r = b.simdInternal_[i];
432 conv1.i = conv1.i | conv2.i;
433 res.simdInternal_[i] = conv1.r;
438 /*! \brief Bitwise xor for SIMD float.
440 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
444 * \return data1 ^ data2
446 static inline SimdFloat gmx_simdcall operator^(SimdFloat a, SimdFloat b)
456 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
458 conv1.r = a.simdInternal_[i];
459 conv2.r = b.simdInternal_[i];
460 conv1.i = conv1.i ^ conv2.i;
461 res.simdInternal_[i] = conv1.r;
468 * \name SIMD implementation single precision floating-point arithmetics
472 /*! \brief Add two float SIMD variables.
478 static inline SimdFloat gmx_simdcall operator+(SimdFloat a, SimdFloat b)
482 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
484 res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
489 /*! \brief Subtract two float SIMD variables.
495 static inline SimdFloat gmx_simdcall operator-(SimdFloat a, SimdFloat b)
499 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
501 res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
506 /*! \brief SIMD single precision negate.
508 * \param a SIMD double precision value
511 static inline SimdFloat gmx_simdcall operator-(SimdFloat a)
515 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
517 res.simdInternal_[i] = -a.simdInternal_[i];
522 /*! \brief Multiply two float SIMD variables.
528 static inline SimdFloat gmx_simdcall operator*(SimdFloat a, SimdFloat b)
532 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
534 res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
539 /*! \brief SIMD float Fused-multiply-add. Result is a*b+c.
546 static inline SimdFloat gmx_simdcall fma(SimdFloat a, SimdFloat b, SimdFloat c)
551 /*! \brief SIMD float Fused-multiply-subtract. Result is a*b-c.
558 static inline SimdFloat gmx_simdcall fms(SimdFloat a, SimdFloat b, SimdFloat c)
563 /*! \brief SIMD float Fused-negated-multiply-add. Result is -a*b+c.
570 static inline SimdFloat gmx_simdcall fnma(SimdFloat a, SimdFloat b, SimdFloat c)
575 /*! \brief SIMD float Fused-negated-multiply-subtract. Result is -a*b-c.
582 static inline SimdFloat gmx_simdcall fnms(SimdFloat a, SimdFloat b, SimdFloat c)
587 /*! \brief SIMD float 1.0/sqrt(x) lookup.
589 * This is a low-level instruction that should only be called from routines
590 * implementing the inverse square root in simd_math.h.
592 * \param x Argument, x>0
593 * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
595 static inline SimdFloat gmx_simdcall rsqrt(SimdFloat x)
599 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
601 res.simdInternal_[i] = 1.0F / std::sqrt(x.simdInternal_[i]);
606 /*! \brief SIMD float 1.0/x lookup.
608 * This is a low-level instruction that should only be called from routines
609 * implementing the reciprocal in simd_math.h.
611 * \param x Argument, x!=0
612 * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
614 static inline SimdFloat gmx_simdcall rcp(SimdFloat x)
618 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
620 res.simdInternal_[i] = 1.0F / x.simdInternal_[i];
625 /*! \brief Add two float SIMD variables, masked version.
630 * \return a+b where mask is true, a otherwise.
632 static inline SimdFloat gmx_simdcall maskAdd(SimdFloat a, SimdFloat b, SimdFBool m)
636 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
638 res.simdInternal_[i] = a.simdInternal_[i] + (m.simdInternal_[i] ? b.simdInternal_[i] : 0.0F);
643 /*! \brief Multiply two float SIMD variables, masked version.
648 * \return a*b where mask is true, 0.0 otherwise.
650 static inline SimdFloat gmx_simdcall maskzMul(SimdFloat a, SimdFloat b, SimdFBool m)
654 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
656 res.simdInternal_[i] = m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i]) : 0.0F;
661 /*! \brief SIMD float fused multiply-add, masked version.
667 * \return a*b+c where mask is true, 0.0 otherwise.
669 static inline SimdFloat gmx_simdcall maskzFma(SimdFloat a, SimdFloat b, SimdFloat c, SimdFBool m)
673 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
675 res.simdInternal_[i] =
676 m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i] + c.simdInternal_[i]) : 0.0F;
681 /*! \brief SIMD float 1.0/sqrt(x) lookup, masked version.
683 * This is a low-level instruction that should only be called from routines
684 * implementing the inverse square root in simd_math.h.
686 * \param x Argument, x>0 for entries where mask is true.
688 * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
689 * The result for masked-out entries will be 0.0.
691 static inline SimdFloat gmx_simdcall maskzRsqrt(SimdFloat x, SimdFBool m)
695 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
697 res.simdInternal_[i] = (m.simdInternal_[i] != 0) ? 1.0F / std::sqrt(x.simdInternal_[i]) : 0.0F;
702 /*! \brief SIMD float 1.0/x lookup, masked version.
704 * This is a low-level instruction that should only be called from routines
705 * implementing the reciprocal in simd_math.h.
707 * \param x Argument, x>0 for entries where mask is true.
709 * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
710 * The result for masked-out entries will be 0.0.
712 static inline SimdFloat gmx_simdcall maskzRcp(SimdFloat x, SimdFBool m)
716 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
718 res.simdInternal_[i] = (m.simdInternal_[i] != 0) ? 1.0F / x.simdInternal_[i] : 0.0F;
723 /*! \brief SIMD float Floating-point abs().
725 * \param a any floating point values
726 * \return abs(a) for each element.
728 static inline SimdFloat gmx_simdcall abs(SimdFloat a)
732 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
734 res.simdInternal_[i] = std::abs(a.simdInternal_[i]);
739 /*! \brief Set each SIMD float element to the largest from two variables.
741 * \param a Any floating-point value
742 * \param b Any floating-point value
743 * \return max(a,b) for each element.
745 static inline SimdFloat gmx_simdcall max(SimdFloat a, SimdFloat b)
749 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
751 res.simdInternal_[i] = std::max(a.simdInternal_[i], b.simdInternal_[i]);
756 /*! \brief Set each SIMD float element to the smallest from two variables.
758 * \param a Any floating-point value
759 * \param b Any floating-point value
760 * \return min(a,b) for each element.
762 static inline SimdFloat gmx_simdcall min(SimdFloat a, SimdFloat b)
766 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
768 res.simdInternal_[i] = std::min(a.simdInternal_[i], b.simdInternal_[i]);
773 /*! \brief SIMD float round to nearest integer value (in floating-point format).
775 * \param a Any floating-point value
776 * \return The nearest integer, represented in floating-point format.
778 * \note Round mode is implementation defined. The only guarantee is that it
779 * is consistent between rounding functions (round, cvtR2I).
781 static inline SimdFloat gmx_simdcall round(SimdFloat a)
785 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
787 res.simdInternal_[i] = std::round(a.simdInternal_[i]);
792 /*! \brief Truncate SIMD float, i.e. round towards zero - common hardware instruction.
794 * \param a Any floating-point value
795 * \return Integer rounded towards zero, represented in floating-point format.
797 * \note This is truncation towards zero, not floor(). The reason for this
798 * is that truncation is virtually always present as a dedicated hardware
799 * instruction, but floor() frequently isn't.
801 static inline SimdFloat gmx_simdcall trunc(SimdFloat a)
805 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
807 res.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
812 /*! \brief Extract (integer) exponent and fraction from single precision SIMD.
814 * \tparam opt By default this function behaves like the standard
815 * library such that frexp(+-0,exp) returns +-0 and
816 * stores 0 in the exponent when value is 0. If you
817 * know the argument is always nonzero, you can set
818 * the template parameter to MathOptimization::Unsafe
819 * to make it slightly faster.
821 * \param value Floating-point value to extract from
822 * \param[out] exponent Returned exponent of value, integer SIMD format.
823 * \return Fraction of value, floating-point SIMD format.
825 template<MathOptimization opt = MathOptimization::Safe>
826 static inline SimdFloat gmx_simdcall frexp(SimdFloat value, SimdFInt32* exponent)
830 for (std::size_t i = 0; i < fraction.simdInternal_.size(); i++)
832 fraction.simdInternal_[i] = std::frexp(value.simdInternal_[i], &exponent->simdInternal_[i]);
837 /*! \brief Multiply a SIMD float value by the number 2 raised to an exp power.
839 * \tparam opt By default, this routine will return zero for input arguments
840 * that are so small they cannot be reproduced in the current
841 * precision. If the unsafe math optimization template parameter
842 * setting is used, these tests are skipped, and the result will
843 * be undefined (possible even NaN). This might happen below -127
844 * in single precision or -1023 in double, although some
845 * might use denormal support to extend the range.
847 * \param value Floating-point number to multiply with new exponent
848 * \param exponent Integer that will not overflow as 2^exponent.
849 * \return value*2^exponent
851 template<MathOptimization opt = MathOptimization::Safe>
852 static inline SimdFloat gmx_simdcall ldexp(SimdFloat value, SimdFInt32 exponent)
856 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
858 // std::ldexp already takes care of clamping arguments, so we do not
859 // need to do anything in the reference implementation
860 res.simdInternal_[i] = std::ldexp(value.simdInternal_[i], exponent.simdInternal_[i]);
865 /*! \brief Return sum of all elements in SIMD float variable.
867 * \param a SIMD variable to reduce/sum.
868 * \return The sum of all elements in the argument variable.
871 static inline float gmx_simdcall reduce(SimdFloat a)
875 for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
877 sum += a.simdInternal_[i];
884 * \name SIMD implementation single precision floating-point comparisons, boolean, selection.
888 /*! \brief SIMD a==b for single SIMD.
892 * \return Each element of the boolean will be set to true if a==b.
894 * Beware that exact floating-point comparisons are difficult.
896 static inline SimdFBool gmx_simdcall operator==(SimdFloat a, SimdFloat b)
900 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
902 res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
907 /*! \brief SIMD a!=b for single SIMD.
911 * \return Each element of the boolean will be set to true if a!=b.
913 * Beware that exact floating-point comparisons are difficult.
915 static inline SimdFBool gmx_simdcall operator!=(SimdFloat a, SimdFloat b)
919 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
921 res.simdInternal_[i] = (a.simdInternal_[i] != b.simdInternal_[i]);
926 /*! \brief SIMD a<b for single SIMD.
930 * \return Each element of the boolean will be set to true if a<b.
932 static inline SimdFBool gmx_simdcall operator<(SimdFloat a, SimdFloat b)
936 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
938 res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
943 /*! \brief SIMD a<=b for single SIMD.
947 * \return Each element of the boolean will be set to true if a<=b.
949 static inline SimdFBool gmx_simdcall operator<=(SimdFloat a, SimdFloat b)
953 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
955 res.simdInternal_[i] = (a.simdInternal_[i] <= b.simdInternal_[i]);
960 /*! \brief Return true if any bits are set in the single precision SIMD.
962 * This function is used to handle bitmasks, mainly for exclusions in the
963 * inner kernels. Note that it will return true even for -0.0F (sign bit set),
964 * so it is not identical to not-equal.
967 * \return Each element of the boolean will be true if any bit in a is nonzero.
969 static inline SimdFBool gmx_simdcall testBits(SimdFloat a)
973 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
981 conv.f = a.simdInternal_[i];
982 res.simdInternal_[i] = (conv.i != 0);
987 /*! \brief Logical \a and on single precision SIMD booleans.
989 * \param a logical vars 1
990 * \param b logical vars 2
991 * \return For each element, the result boolean is true if a \& b are true.
993 * \note This is not necessarily a bitwise operation - the storage format
994 * of booleans is implementation-dependent.
996 static inline SimdFBool gmx_simdcall operator&&(SimdFBool a, SimdFBool b)
1000 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1002 res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1007 /*! \brief Logical \a or on single precision SIMD booleans.
1009 * \param a logical vars 1
1010 * \param b logical vars 2
1011 * \return For each element, the result boolean is true if a or b is true.
1013 * Note that this is not necessarily a bitwise operation - the storage format
1014 * of booleans is implementation-dependent.
1017 static inline SimdFBool gmx_simdcall operator||(SimdFBool a, SimdFBool b)
1021 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1023 res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1028 /*! \brief Returns non-zero if any of the boolean in SIMD a is True, otherwise 0.
1030 * \param a Logical variable.
1031 * \return true if any element in a is true, otherwise false.
1033 * The actual return value for truth will depend on the architecture,
1034 * so any non-zero value is considered truth.
1036 static inline bool gmx_simdcall anyTrue(SimdFBool a)
1040 for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1042 res = res || a.simdInternal_[i];
1047 /*! \brief Select from single precision SIMD variable where boolean is true.
1049 * \param a Floating-point variable to select from
1050 * \param mask Boolean selector
1051 * \return For each element, a is selected for true, 0 for false.
1053 static inline SimdFloat gmx_simdcall selectByMask(SimdFloat a, SimdFBool mask)
1057 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1059 res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0F;
1064 /*! \brief Select from single precision SIMD variable where boolean is false.
1066 * \param a Floating-point variable to select from
1067 * \param mask Boolean selector
1068 * \return For each element, a is selected for false, 0 for true (sic).
1070 static inline SimdFloat gmx_simdcall selectByNotMask(SimdFloat a, SimdFBool mask)
1074 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1076 res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0F : a.simdInternal_[i];
1081 /*! \brief Vector-blend SIMD float selection.
1083 * \param a First source
1084 * \param b Second source
1085 * \param sel Boolean selector
1086 * \return For each element, select b if sel is true, a otherwise.
1088 static inline SimdFloat gmx_simdcall blend(SimdFloat a, SimdFloat b, SimdFBool sel)
1092 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1094 res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1101 * \name SIMD implementation integer (corresponding to float) bitwise logical operations
1105 /*! \brief Integer SIMD bitwise and.
1107 * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1109 * \note You can \a not use this operation directly to select based on a boolean
1110 * SIMD variable, since booleans are separate from integer SIMD. If that
1111 * is what you need, have a look at \ref gmx::selectByMask instead.
1113 * \param a first integer SIMD
1114 * \param b second integer SIMD
1115 * \return a \& b (bitwise and)
1117 static inline SimdFInt32 gmx_simdcall operator&(SimdFInt32 a, SimdFInt32 b)
1121 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1123 res.simdInternal_[i] = a.simdInternal_[i] & b.simdInternal_[i];
1128 /*! \brief Integer SIMD bitwise not/complement.
1130 * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1132 * \note You can \a not use this operation directly to select based on a boolean
1133 * SIMD variable, since booleans are separate from integer SIMD. If that
1134 * is what you need, have a look at \ref gmx::selectByMask instead.
1136 * \param a integer SIMD
1137 * \param b integer SIMD
1140 static inline SimdFInt32 gmx_simdcall andNot(SimdFInt32 a, SimdFInt32 b)
1144 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1146 res.simdInternal_[i] = ~a.simdInternal_[i] & b.simdInternal_[i];
1151 /*! \brief Integer SIMD bitwise or.
1153 * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1155 * \param a first integer SIMD
1156 * \param b second integer SIMD
1157 * \return a \| b (bitwise or)
1159 static inline SimdFInt32 gmx_simdcall operator|(SimdFInt32 a, SimdFInt32 b)
1163 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1165 res.simdInternal_[i] = a.simdInternal_[i] | b.simdInternal_[i];
1170 /*! \brief Integer SIMD bitwise xor.
1172 * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1174 * \param a first integer SIMD
1175 * \param b second integer SIMD
1176 * \return a ^ b (bitwise xor)
1178 static inline SimdFInt32 gmx_simdcall operator^(SimdFInt32 a, SimdFInt32 b)
1182 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1184 res.simdInternal_[i] = a.simdInternal_[i] ^ b.simdInternal_[i];
1191 * \name SIMD implementation integer (corresponding to float) arithmetics
1195 /*! \brief Add SIMD integers.
1197 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
1198 * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1.
1204 static inline SimdFInt32 gmx_simdcall operator+(SimdFInt32 a, SimdFInt32 b)
1208 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1210 res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
1215 /*! \brief Subtract SIMD integers.
1217 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
1218 * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1.
1224 static inline SimdFInt32 gmx_simdcall operator-(SimdFInt32 a, SimdFInt32 b)
1228 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1230 res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
1235 /*! \brief Multiply SIMD integers.
1237 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
1238 * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1.
1244 * \note Only the low 32 bits are retained, so this can overflow.
1246 static inline SimdFInt32 gmx_simdcall operator*(SimdFInt32 a, SimdFInt32 b)
1250 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1252 res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
1259 * \name SIMD implementation integer (corresponding to float) comparisons, boolean, selection
1263 /*! \brief Equality comparison of two integers corresponding to float values.
1265 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1267 * \param a SIMD integer1
1268 * \param b SIMD integer2
1269 * \return SIMD integer boolean with true for elements where a==b
1271 static inline SimdFIBool gmx_simdcall operator==(SimdFInt32 a, SimdFInt32 b)
1275 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1277 res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
1282 /*! \brief Less-than comparison of two SIMD integers corresponding to float values.
1284 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1286 * \param a SIMD integer1
1287 * \param b SIMD integer2
1288 * \return SIMD integer boolean with true for elements where a<b
1290 static inline SimdFIBool gmx_simdcall operator<(SimdFInt32 a, SimdFInt32 b)
1294 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1296 res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
1301 /*! \brief Check if any bit is set in each element
1303 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1305 * \param a SIMD integer
1306 * \return SIMD integer boolean with true for elements where any bit is set
1308 static inline SimdFIBool gmx_simdcall testBits(SimdFInt32 a)
1312 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1314 res.simdInternal_[i] = (a.simdInternal_[i] != 0);
1319 /*! \brief Logical AND on SimdFIBool.
1321 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1323 * \param a SIMD boolean 1
1324 * \param b SIMD boolean 2
1325 * \return True for elements where both a and b are true.
1327 static inline SimdFIBool gmx_simdcall operator&&(SimdFIBool a, SimdFIBool b)
1331 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1333 res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1338 /*! \brief Logical OR on SimdFIBool.
1340 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1342 * \param a SIMD boolean 1
1343 * \param b SIMD boolean 2
1344 * \return True for elements where both a and b are true.
1346 static inline SimdFIBool gmx_simdcall operator||(SimdFIBool a, SimdFIBool b)
1350 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1352 res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1357 /*! \brief Returns true if any of the boolean in x is True, otherwise 0.
1359 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1361 * The actual return value for "any true" will depend on the architecture.
1362 * Any non-zero value should be considered truth.
1364 * \param a SIMD boolean
1365 * \return True if any of the elements in a is true, otherwise 0.
1367 static inline bool gmx_simdcall anyTrue(SimdFIBool a)
1371 for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1373 res = res || a.simdInternal_[i];
1378 /*! \brief Select from \ref gmx::SimdFInt32 variable where boolean is true.
1380 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1382 * \param a SIMD integer to select from
1383 * \param mask Boolean selector
1384 * \return Elements from a where sel is true, 0 otherwise.
1386 static inline SimdFInt32 gmx_simdcall selectByMask(SimdFInt32 a, SimdFIBool mask)
1390 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1392 res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0F;
1397 /*! \brief Select from \ref gmx::SimdFInt32 variable where boolean is false.
1399 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1401 * \param a SIMD integer to select from
1402 * \param mask Boolean selector
1403 * \return Elements from a where sel is false, 0 otherwise (sic).
1405 static inline SimdFInt32 gmx_simdcall selectByNotMask(SimdFInt32 a, SimdFIBool mask)
1409 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1411 res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0F : a.simdInternal_[i];
1416 /*! \brief Vector-blend SIMD integer selection.
1418 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1420 * \param a First source
1421 * \param b Second source
1422 * \param sel Boolean selector
1423 * \return For each element, select b if sel is true, a otherwise.
1425 static inline SimdFInt32 gmx_simdcall blend(SimdFInt32 a, SimdFInt32 b, SimdFIBool sel)
1429 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1431 res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1438 * \name SIMD implementation conversion operations
1442 /*! \brief Round single precision floating point to integer.
1444 * \param a SIMD floating-point
1445 * \return SIMD integer, rounded to nearest integer.
1447 * \note Round mode is implementation defined. The only guarantee is that it
1448 * is consistent between rounding functions (round, cvtR2I).
1450 static inline SimdFInt32 gmx_simdcall cvtR2I(SimdFloat a)
1454 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1456 b.simdInternal_[i] = std::round(a.simdInternal_[i]);
1461 /*! \brief Truncate single precision floating point to integer.
1463 * \param a SIMD floating-point
1464 * \return SIMD integer, truncated to nearest integer.
1466 static inline SimdFInt32 gmx_simdcall cvttR2I(SimdFloat a)
1470 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1472 b.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
1477 /*! \brief Convert integer to single precision floating point.
1479 * \param a SIMD integer
1480 * \return SIMD floating-point
1482 static inline SimdFloat gmx_simdcall cvtI2R(SimdFInt32 a)
1486 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1488 b.simdInternal_[i] = a.simdInternal_[i];
1493 /*! \brief Convert from single precision boolean to corresponding integer boolean
1495 * \param a SIMD floating-point boolean
1496 * \return SIMD integer boolean
1498 static inline SimdFIBool gmx_simdcall cvtB2IB(SimdFBool a)
1502 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1504 b.simdInternal_[i] = a.simdInternal_[i];
1509 /*! \brief Convert from integer boolean to corresponding single precision boolean
1511 * \param a SIMD integer boolean
1512 * \return SIMD floating-point boolean
1514 static inline SimdFBool gmx_simdcall cvtIB2B(SimdFIBool a)
1518 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1520 b.simdInternal_[i] = a.simdInternal_[i];
1532 #endif // GMX_SIMD_IMPL_REFERENCE_SIMD_FLOAT_H