2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2019,2020,2021, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H
37 #define GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H
39 /*! \libinternal \file
41 * \brief Reference implementation, SIMD double precision.
43 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
45 * \ingroup module_simd
58 #include "gromacs/math/utilities.h"
59 #include "gromacs/utility/fatalerror.h"
61 #include "impl_reference_definitions.h"
62 #include "impl_reference_simd_float.h"
68 /*! \addtogroup module_simd */
71 /*! \name SIMD implementation data types
75 /*! \libinternal \brief Double SIMD variable. Available if GMX_SIMD_HAVE_DOUBLE is 1.
77 * \note This variable cannot be placed inside other structures or classes, since
78 * some compilers (including at least clang-3.7) appear to lose the
79 * alignment. This is likely particularly severe when allocating such
80 * memory on the heap, but it occurs for stack structures too.
87 //! \brief Construct from scalar
88 SimdDouble(double d) { simdInternal_.fill(d); }
90 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
92 * This has to be public to enable usage in combination with static inline
93 * functions, but it should never, EVER, be accessed by any code outside
94 * the corresponding implementation directory since the type will depend
95 * on the architecture.
97 std::array<double, GMX_SIMD_DOUBLE_WIDTH> simdInternal_;
100 /*! \libinternal \brief Integer SIMD variable type to use for conversions to/from double.
102 * Available if GMX_SIMD_HAVE_DOUBLE is 1.
104 * \note The integer SIMD type will always be available, but on architectures
105 * that do not have any real integer SIMD support it might be defined as the
106 * floating-point type. This will work fine, since there are separate defines
107 * for whether the implementation can actually do any operations on integer
110 * \note This variable cannot be placed inside other structures or classes, since
111 * some compilers (including at least clang-3.7) appear to lose the
112 * alignment. This is likely particularly severe when allocating such
113 * memory on the heap, but it occurs for stack structures too.
120 //! \brief Construct from scalar
121 SimdDInt32(std::int32_t i) { simdInternal_.fill(i); }
123 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
125 * This has to be public to enable usage in combination with static inline
126 * functions, but it should never, EVER, be accessed by any code outside
127 * the corresponding implementation directory since the type will depend
128 * on the architecture.
130 std::array<std::int32_t, GMX_SIMD_DINT32_WIDTH> simdInternal_;
133 /*! \libinternal \brief Boolean type for double SIMD data.
135 * Available if GMX_SIMD_HAVE_DOUBLE is 1.
137 * \note This variable cannot be placed inside other structures or classes, since
138 * some compilers (including at least clang-3.7) appear to lose the
139 * alignment. This is likely particularly severe when allocating such
140 * memory on the heap, but it occurs for stack structures too.
147 //! \brief Construct from scalar bool
148 SimdDBool(bool b) { simdInternal_.fill(b); }
150 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
152 * This has to be public to enable usage in combination with static inline
153 * functions, but it should never, EVER, be accessed by any code outside
154 * the corresponding implementation directory since the type will depend
155 * on the architecture.
157 std::array<bool, GMX_SIMD_DOUBLE_WIDTH> simdInternal_;
160 /*! \libinternal \brief Boolean type for integer datatypes corresponding to double SIMD.
162 * Available if GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
164 * \note This variable cannot be placed inside other structures or classes, since
165 * some compilers (including at least clang-3.7) appear to lose the
166 * alignment. This is likely particularly severe when allocating such
167 * memory on the heap, but it occurs for stack structures too.
174 //! \brief Construct from scalar
175 SimdDIBool(bool b) { simdInternal_.fill(b); }
177 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
179 * This has to be public to enable usage in combination with static inline
180 * functions, but it should never, EVER, be accessed by any code outside
181 * the corresponding implementation directory since the type will depend
182 * on the architecture.
184 std::array<bool, GMX_SIMD_DINT32_WIDTH> simdInternal_;
189 * \name SIMD implementation load/store operations for double precision floating point
193 /*! \brief Load \ref GMX_SIMD_DOUBLE_WIDTH numbers from aligned memory.
195 * \param m Pointer to memory aligned to the SIMD width.
196 * \return SIMD variable with data loaded.
198 static inline SimdDouble gmx_simdcall simdLoad(const double* m, SimdDoubleTag = {})
202 assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(double)) == 0);
204 std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
208 /*! \brief Store the contents of SIMD double variable to aligned memory m.
210 * \param[out] m Pointer to memory, aligned to SIMD width.
211 * \param a SIMD variable to store
213 static inline void gmx_simdcall store(double* m, SimdDouble a)
215 assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(double)) == 0);
217 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
220 /*! \brief Load SIMD double from unaligned memory.
222 * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
224 * \param m Pointer to memory, no alignment requirement.
225 * \return SIMD variable with data loaded.
227 static inline SimdDouble gmx_simdcall simdLoadU(const double* m, SimdDoubleTag = {})
230 std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
234 /*! \brief Store SIMD double to unaligned memory.
236 * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
238 * \param[out] m Pointer to memory, no alignment requirement.
239 * \param a SIMD variable to store.
241 static inline void gmx_simdcall storeU(double* m, SimdDouble a)
243 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
246 /*! \brief Set all SIMD double variable elements to 0.0.
248 * You should typically just call \ref gmx::setZero(), which uses proxy objects
249 * internally to handle all types rather than adding the suffix used here.
253 static inline SimdDouble gmx_simdcall setZeroD()
255 return SimdDouble(0.0);
260 * \name SIMD implementation load/store operations for integers (corresponding to double)
264 /*! \brief Load aligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
266 * You should typically just call \ref gmx::load(), which uses proxy objects
267 * internally to handle all types rather than adding the suffix used here.
269 * \param m Pointer to memory, aligned to (double) integer SIMD width.
270 * \return SIMD integer variable.
272 static inline SimdDInt32 gmx_simdcall simdLoad(const std::int32_t* m, SimdDInt32Tag)
276 assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(std::int32_t)) == 0);
278 std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
282 /*! \brief Store aligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
284 * \param m Memory aligned to (double) integer SIMD width.
285 * \param a SIMD (double) integer variable to store.
287 static inline void gmx_simdcall store(std::int32_t* m, SimdDInt32 a)
289 assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(std::int32_t)) == 0);
291 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
294 /*! \brief Load unaligned integer SIMD data, width corresponds to \ref gmx::SimdDouble.
296 * You should typically just call \ref gmx::loadU(), which uses proxy objects
297 * internally to handle all types rather than adding the suffix used here.
299 * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
301 * \param m Pointer to memory, no alignment requirements.
302 * \return SIMD integer variable.
304 static inline SimdDInt32 gmx_simdcall simdLoadU(const std::int32_t* m, SimdDInt32Tag)
307 std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
311 /*! \brief Store unaligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
313 * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
315 * \param m Memory pointer, no alignment requirements.
316 * \param a SIMD (double) integer variable to store.
318 static inline void gmx_simdcall storeU(std::int32_t* m, SimdDInt32 a)
320 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
323 /*! \brief Set all SIMD (double) integer variable elements to 0.
325 * You should typically just call \ref gmx::setZero(), which uses proxy objects
326 * internally to handle all types rather than adding the suffix used here.
330 static inline SimdDInt32 gmx_simdcall setZeroDI()
332 return SimdDInt32(0);
335 /*! \brief Extract element with index i from \ref gmx::SimdDInt32.
337 * Available if \ref GMX_SIMD_HAVE_DINT32_EXTRACT is 1.
339 * \tparam index Compile-time constant, position to extract (first position is 0)
340 * \param a SIMD variable from which to extract value.
341 * \return Single integer from position index in SIMD variable.
344 static inline std::int32_t gmx_simdcall extract(SimdDInt32 a)
346 return a.simdInternal_[index];
351 * \name SIMD implementation double precision floating-point bitwise logical operations
355 /*! \brief Bitwise and for two SIMD double variables.
357 * Supported if \ref GMX_SIMD_HAVE_LOGICAL is 1.
361 * \return data1 & data2
363 static inline SimdDouble gmx_simdcall operator&(SimdDouble a, SimdDouble b)
373 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
375 conv1.r = a.simdInternal_[i];
376 conv2.r = b.simdInternal_[i];
377 conv1.i = conv1.i & conv2.i;
378 res.simdInternal_[i] = conv1.r;
383 /*! \brief Bitwise andnot for SIMD double.
385 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
389 * \return (~data1) & data2
391 static inline SimdDouble gmx_simdcall andNot(SimdDouble a, SimdDouble b)
401 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
403 conv1.r = a.simdInternal_[i];
404 conv2.r = b.simdInternal_[i];
405 conv1.i = ~conv1.i & conv2.i;
406 res.simdInternal_[i] = conv1.r;
411 /*! \brief Bitwise or for SIMD double.
413 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
417 * \return data1 | data2
419 static inline SimdDouble gmx_simdcall operator|(SimdDouble a, SimdDouble b)
429 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
431 conv1.r = a.simdInternal_[i];
432 conv2.r = b.simdInternal_[i];
433 conv1.i = conv1.i | conv2.i;
434 res.simdInternal_[i] = conv1.r;
439 /*! \brief Bitwise xor for SIMD double.
441 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
445 * \return data1 ^ data2
447 static inline SimdDouble gmx_simdcall operator^(SimdDouble a, SimdDouble b)
457 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
459 conv1.r = a.simdInternal_[i];
460 conv2.r = b.simdInternal_[i];
461 conv1.i = conv1.i ^ conv2.i;
462 res.simdInternal_[i] = conv1.r;
469 * \name SIMD implementation double precision floating-point arithmetics
473 /*! \brief Add two double SIMD variables.
479 static inline SimdDouble gmx_simdcall operator+(SimdDouble a, SimdDouble b)
483 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
485 res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
490 /*! \brief Subtract two double SIMD variables.
496 static inline SimdDouble gmx_simdcall operator-(SimdDouble a, SimdDouble b)
500 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
502 res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
507 /*! \brief SIMD double precision negate.
509 * \param a SIMD double precision value
512 static inline SimdDouble gmx_simdcall operator-(SimdDouble a)
516 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
518 res.simdInternal_[i] = -a.simdInternal_[i];
523 /*! \brief Multiply two double SIMD variables.
529 static inline SimdDouble gmx_simdcall operator*(SimdDouble a, SimdDouble b)
533 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
535 res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
540 /*! \brief SIMD double Fused-multiply-add. Result is a*b+c.
547 static inline SimdDouble gmx_simdcall fma(SimdDouble a, SimdDouble b, SimdDouble c)
552 /*! \brief SIMD double Fused-multiply-subtract. Result is a*b-c.
559 static inline SimdDouble gmx_simdcall fms(SimdDouble a, SimdDouble b, SimdDouble c)
564 /*! \brief SIMD double Fused-negated-multiply-add. Result is -a*b+c.
571 static inline SimdDouble gmx_simdcall fnma(SimdDouble a, SimdDouble b, SimdDouble c)
576 /*! \brief SIMD double Fused-negated-multiply-subtract. Result is -a*b-c.
583 static inline SimdDouble gmx_simdcall fnms(SimdDouble a, SimdDouble b, SimdDouble c)
588 /*! \brief double SIMD 1.0/sqrt(x) lookup.
590 * This is a low-level instruction that should only be called from routines
591 * implementing the inverse square root in simd_math.h.
593 * \param x Argument, x>0
594 * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
596 static inline SimdDouble gmx_simdcall rsqrt(SimdDouble x)
600 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
602 // sic - we only use single precision for the lookup
603 res.simdInternal_[i] = 1.0F / std::sqrt(static_cast<float>(x.simdInternal_[i]));
608 /*! \brief SIMD double 1.0/x lookup.
610 * This is a low-level instruction that should only be called from routines
611 * implementing the reciprocal in simd_math.h.
613 * \param x Argument, x!=0
614 * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
616 static inline SimdDouble gmx_simdcall rcp(SimdDouble x)
620 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
622 // sic - we only use single precision for the lookup
623 res.simdInternal_[i] = 1.0F / static_cast<float>(x.simdInternal_[i]);
628 /*! \brief Add two double SIMD variables, masked version.
633 * \return a+b where mask is true, 0.0 otherwise.
635 static inline SimdDouble gmx_simdcall maskAdd(SimdDouble a, SimdDouble b, SimdDBool m)
639 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
641 res.simdInternal_[i] = a.simdInternal_[i] + (m.simdInternal_[i] ? b.simdInternal_[i] : 0.0);
646 /*! \brief Multiply two double SIMD variables, masked version.
651 * \return a*b where mask is true, 0.0 otherwise.
653 static inline SimdDouble gmx_simdcall maskzMul(SimdDouble a, SimdDouble b, SimdDBool m)
657 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
659 res.simdInternal_[i] = m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i]) : 0.0;
664 /*! \brief SIMD double fused multiply-add, masked version.
670 * \return a*b+c where mask is true, 0.0 otherwise.
672 static inline SimdDouble gmx_simdcall maskzFma(SimdDouble a, SimdDouble b, SimdDouble c, SimdDBool m)
676 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
678 res.simdInternal_[i] =
679 m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i] + c.simdInternal_[i]) : 0.0;
684 /*! \brief SIMD double 1.0/sqrt(x) lookup, masked version.
686 * This is a low-level instruction that should only be called from routines
687 * implementing the inverse square root in simd_math.h.
689 * \param x Argument, x>0 for entries where mask is true.
691 * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
692 * The result for masked-out entries will be 0.0.
694 static inline SimdDouble gmx_simdcall maskzRsqrt(SimdDouble x, SimdDBool m)
698 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
700 // sic - we only use single precision for the lookup
701 res.simdInternal_[i] = (m.simdInternal_[i] != 0)
702 ? 1.0F / std::sqrt(static_cast<float>(x.simdInternal_[i]))
708 /*! \brief SIMD double 1.0/x lookup, masked version.
710 * This is a low-level instruction that should only be called from routines
711 * implementing the reciprocal in simd_math.h.
713 * \param x Argument, x>0 for entries where mask is true.
715 * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
716 * The result for masked-out entries will be 0.0.
718 static inline SimdDouble gmx_simdcall maskzRcp(SimdDouble x, SimdDBool m)
722 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
724 res.simdInternal_[i] =
725 (m.simdInternal_[i] != 0) ? 1.0F / static_cast<float>(x.simdInternal_[i]) : 0.0;
730 /*! \brief SIMD double floating-point fabs().
732 * \param a any floating point values
733 * \return fabs(a) for each element.
735 static inline SimdDouble gmx_simdcall abs(SimdDouble a)
739 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
741 res.simdInternal_[i] = std::abs(a.simdInternal_[i]);
746 /*! \brief Set each SIMD double element to the largest from two variables.
748 * \param a Any floating-point value
749 * \param b Any floating-point value
750 * \return max(a,b) for each element.
752 static inline SimdDouble gmx_simdcall max(SimdDouble a, SimdDouble b)
756 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
758 res.simdInternal_[i] = std::max(a.simdInternal_[i], b.simdInternal_[i]);
763 /*! \brief Set each SIMD double element to the smallest from two variables.
765 * \param a Any floating-point value
766 * \param b Any floating-point value
767 * \return min(a,b) for each element.
769 static inline SimdDouble gmx_simdcall min(SimdDouble a, SimdDouble b)
773 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
775 res.simdInternal_[i] = std::min(a.simdInternal_[i], b.simdInternal_[i]);
780 /*! \brief SIMD double round to nearest integer value (in floating-point format).
782 * \param a Any floating-point value
783 * \return The nearest integer, represented in floating-point format.
785 * \note Round mode is implementation defined. The only guarantee is that it
786 * is consistent between rounding functions (round, cvtR2I).
788 static inline SimdDouble gmx_simdcall round(SimdDouble a)
792 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
794 res.simdInternal_[i] = std::round(a.simdInternal_[i]);
799 /*! \brief Truncate SIMD double, i.e. round towards zero - common hardware instruction.
801 * \param a Any floating-point value
802 * \return Integer rounded towards zero, represented in floating-point format.
804 * \note This is truncation towards zero, not floor(). The reason for this
805 * is that truncation is virtually always present as a dedicated hardware
806 * instruction, but floor() frequently isn't.
808 static inline SimdDouble gmx_simdcall trunc(SimdDouble a)
812 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
814 res.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
819 /*! \brief Extract (integer) exponent and fraction from double precision SIMD.
821 * \tparam opt By default this function behaves like the standard
822 * library such that frexp(+-0,exp) returns +-0 and
823 * stores 0 in the exponent when value is 0. If you
824 * know the argument is always nonzero, you can set
825 * the template parameter to MathOptimization::Unsafe
826 * to make it slightly faster.
828 * \param value Floating-point value to extract from
829 * \param[out] exponent Returned exponent of value, integer SIMD format.
830 * \return Fraction of value, floating-point SIMD format.
832 template<MathOptimization opt = MathOptimization::Safe>
833 static inline SimdDouble gmx_simdcall frexp(SimdDouble value, SimdDInt32* exponent)
837 for (std::size_t i = 0; i < fraction.simdInternal_.size(); i++)
839 fraction.simdInternal_[i] = std::frexp(value.simdInternal_[i], &exponent->simdInternal_[i]);
844 /*! \brief Multiply a SIMD double value by the number 2 raised to an exp power.
846 * \tparam opt By default, this routine will return zero for input arguments
847 * that are so small they cannot be reproduced in the current
848 * precision. If the unsafe math optimization template parameter
849 * setting is used, these tests are skipped, and the result will
850 * be undefined (possible even NaN). This might happen below -127
851 * in single precision or -1023 in double, although some
852 * might use denormal support to extend the range.
854 * \param value Floating-point number to multiply with new exponent
855 * \param exponent Integer that will not overflow as 2^exponent.
856 * \return value*2^exponent
858 template<MathOptimization opt = MathOptimization::Safe>
859 static inline SimdDouble gmx_simdcall ldexp(SimdDouble value, SimdDInt32 exponent)
863 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
865 // std::ldexp already takes care of clamping arguments, so we do not
866 // need to do anything in the reference implementation
867 res.simdInternal_[i] = std::ldexp(value.simdInternal_[i], exponent.simdInternal_[i]);
872 /*! \brief Return sum of all elements in SIMD double variable.
874 * \param a SIMD variable to reduce/sum.
875 * \return The sum of all elements in the argument variable.
878 static inline double gmx_simdcall reduce(SimdDouble a)
882 for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
884 sum += a.simdInternal_[i];
891 * \name SIMD implementation double precision floating-point comparison, boolean, selection.
895 /*! \brief SIMD a==b for double SIMD.
899 * \return Each element of the boolean will be set to true if a==b.
901 * Beware that exact floating-point comparisons are difficult.
903 static inline SimdDBool gmx_simdcall operator==(SimdDouble a, SimdDouble b)
907 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
909 res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
914 /*! \brief SIMD a!=b for double SIMD.
918 * \return Each element of the boolean will be set to true if a!=b.
920 * Beware that exact floating-point comparisons are difficult.
922 static inline SimdDBool gmx_simdcall operator!=(SimdDouble a, SimdDouble b)
926 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
928 res.simdInternal_[i] = (a.simdInternal_[i] != b.simdInternal_[i]);
933 /*! \brief SIMD a<b for double SIMD.
937 * \return Each element of the boolean will be set to true if a<b.
939 static inline SimdDBool gmx_simdcall operator<(SimdDouble a, SimdDouble b)
943 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
945 res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
950 /*! \brief SIMD a<=b for double SIMD.
954 * \return Each element of the boolean will be set to true if a<=b.
956 static inline SimdDBool gmx_simdcall operator<=(SimdDouble a, SimdDouble b)
960 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
962 res.simdInternal_[i] = (a.simdInternal_[i] <= b.simdInternal_[i]);
967 /*! \brief Return true if any bits are set in the single precision SIMD.
969 * This function is used to handle bitmasks, mainly for exclusions in the
970 * inner kernels. Note that it will return true even for -0.0 (sign bit set),
971 * so it is not identical to not-equal.
974 * \return Each element of the boolean will be true if any bit in a is nonzero.
976 static inline SimdDBool gmx_simdcall testBits(SimdDouble a)
980 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
988 conv.d = a.simdInternal_[i];
989 res.simdInternal_[i] = (conv.i != 0);
994 /*! \brief Logical \a and on double precision SIMD booleans.
996 * \param a logical vars 1
997 * \param b logical vars 2
998 * \return For each element, the result boolean is true if a \& b are true.
1000 * \note This is not necessarily a bitwise operation - the storage format
1001 * of booleans is implementation-dependent.
1003 static inline SimdDBool gmx_simdcall operator&&(SimdDBool a, SimdDBool b)
1007 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1009 res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1014 /*! \brief Logical \a or on double precision SIMD booleans.
1016 * \param a logical vars 1
1017 * \param b logical vars 2
1018 * \return For each element, the result boolean is true if a or b is true.
1020 * Note that this is not necessarily a bitwise operation - the storage format
1021 * of booleans is implementation-dependent.
1024 static inline SimdDBool gmx_simdcall operator||(SimdDBool a, SimdDBool b)
1028 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1030 res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1035 /*! \brief Returns non-zero if any of the boolean in SIMD a is True, otherwise 0.
1037 * \param a Logical variable.
1038 * \return true if any element in a is true, otherwise false.
1040 * The actual return value for truth will depend on the architecture,
1041 * so any non-zero value is considered truth.
1043 static inline bool gmx_simdcall anyTrue(SimdDBool a)
1047 for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1049 res = res || a.simdInternal_[i];
1054 /*! \brief Select from double precision SIMD variable where boolean is true.
1056 * \param a Floating-point variable to select from
1057 * \param mask Boolean selector
1058 * \return For each element, a is selected for true, 0 for false.
1060 static inline SimdDouble gmx_simdcall selectByMask(SimdDouble a, SimdDBool mask)
1064 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1066 res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0;
1071 /*! \brief Select from double precision SIMD variable where boolean is false.
1073 * \param a Floating-point variable to select from
1074 * \param mask Boolean selector
1075 * \return For each element, a is selected for false, 0 for true (sic).
1077 static inline SimdDouble gmx_simdcall selectByNotMask(SimdDouble a, SimdDBool mask)
1081 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1083 res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0 : a.simdInternal_[i];
1088 /*! \brief Vector-blend SIMD double selection.
1090 * \param a First source
1091 * \param b Second source
1092 * \param sel Boolean selector
1093 * \return For each element, select b if sel is true, a otherwise.
1095 static inline SimdDouble gmx_simdcall blend(SimdDouble a, SimdDouble b, SimdDBool sel)
1099 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1101 res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1108 * \name SIMD implementation integer (corresponding to double) bitwise logical operations
1112 /*! \brief Integer SIMD bitwise and.
1114 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1116 * \note You can \a not use this operation directly to select based on a boolean
1117 * SIMD variable, since booleans are separate from integer SIMD. If that
1118 * is what you need, have a look at \ref gmx::selectByMask instead.
1120 * \param a first integer SIMD
1121 * \param b second integer SIMD
1122 * \return a \& b (bitwise and)
1124 static inline SimdDInt32 gmx_simdcall operator&(SimdDInt32 a, SimdDInt32 b)
1128 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1130 res.simdInternal_[i] = a.simdInternal_[i] & b.simdInternal_[i];
1135 /*! \brief Integer SIMD bitwise not/complement.
1137 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1139 * \note You can \a not use this operation directly to select based on a boolean
1140 * SIMD variable, since booleans are separate from integer SIMD. If that
1141 * is what you need, have a look at \ref gmx::selectByMask instead.
1143 * \param a integer SIMD
1144 * \param b integer SIMD
1147 static inline SimdDInt32 gmx_simdcall andNot(SimdDInt32 a, SimdDInt32 b)
1151 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1153 res.simdInternal_[i] = ~a.simdInternal_[i] & b.simdInternal_[i];
1158 /*! \brief Integer SIMD bitwise or.
1160 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1162 * \param a first integer SIMD
1163 * \param b second integer SIMD
1164 * \return a \| b (bitwise or)
1166 static inline SimdDInt32 gmx_simdcall operator|(SimdDInt32 a, SimdDInt32 b)
1170 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1172 res.simdInternal_[i] = a.simdInternal_[i] | b.simdInternal_[i];
1177 /*! \brief Integer SIMD bitwise xor.
1179 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1181 * \param a first integer SIMD
1182 * \param b second integer SIMD
1183 * \return a ^ b (bitwise xor)
1185 static inline SimdDInt32 gmx_simdcall operator^(SimdDInt32 a, SimdDInt32 b)
1189 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1191 res.simdInternal_[i] = a.simdInternal_[i] ^ b.simdInternal_[i];
1198 * \name SIMD implementation integer (corresponding to double) arithmetics
1202 /*! \brief Add SIMD integers.
1204 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1210 static inline SimdDInt32 gmx_simdcall operator+(SimdDInt32 a, SimdDInt32 b)
1214 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1216 res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
1221 /*! \brief Subtract SIMD integers.
1223 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1229 static inline SimdDInt32 gmx_simdcall operator-(SimdDInt32 a, SimdDInt32 b)
1233 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1235 res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
1240 /*! \brief Multiply SIMD integers.
1242 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1248 * \note Only the low 32 bits are retained, so this can overflow.
1250 static inline SimdDInt32 gmx_simdcall operator*(SimdDInt32 a, SimdDInt32 b)
1254 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1256 res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
1263 * \name SIMD implementation integer (corresponding to double) comparisons, boolean selection
1267 /*! \brief Equality comparison of two integers corresponding to double values.
1269 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1271 * \param a SIMD integer1
1272 * \param b SIMD integer2
1273 * \return SIMD integer boolean with true for elements where a==b
1275 static inline SimdDIBool gmx_simdcall operator==(SimdDInt32 a, SimdDInt32 b)
1279 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1281 res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
1286 /*! \brief Less-than comparison of two SIMD integers corresponding to double values.
1288 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1290 * \param a SIMD integer1
1291 * \param b SIMD integer2
1292 * \return SIMD integer boolean with true for elements where a<b
1294 static inline SimdDIBool gmx_simdcall operator<(SimdDInt32 a, SimdDInt32 b)
1298 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1300 res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
1305 /*! \brief Check if any bit is set in each element
1307 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1309 * \param a SIMD integer
1310 * \return SIMD integer boolean with true for elements where any bit is set
1312 static inline SimdDIBool gmx_simdcall testBits(SimdDInt32 a)
1316 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1318 res.simdInternal_[i] = (a.simdInternal_[i] != 0);
1323 /*! \brief Logical AND on SimdDIBool.
1325 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1327 * \param a SIMD boolean 1
1328 * \param b SIMD boolean 2
1329 * \return True for elements where both a and b are true.
1331 static inline SimdDIBool gmx_simdcall operator&&(SimdDIBool a, SimdDIBool b)
1335 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1337 res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1342 /*! \brief Logical OR on SimdDIBool.
1344 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1346 * \param a SIMD boolean 1
1347 * \param b SIMD boolean 2
1348 * \return True for elements where both a and b are true.
1350 static inline SimdDIBool gmx_simdcall operator||(SimdDIBool a, SimdDIBool b)
1354 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1356 res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1361 /*! \brief Returns true if any of the boolean in x is True, otherwise 0.
1363 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1365 * The actual return value for "any true" will depend on the architecture.
1366 * Any non-zero value should be considered truth.
1368 * \param a SIMD boolean
1369 * \return True if any of the elements in a is true, otherwise 0.
1371 static inline bool gmx_simdcall anyTrue(SimdDIBool a)
1375 for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1377 res = res || a.simdInternal_[i];
1382 /*! \brief Select from \ref gmx::SimdDInt32 variable where boolean is true.
1384 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1386 * \param a SIMD integer to select from
1387 * \param mask Boolean selector
1388 * \return Elements from a where sel is true, 0 otherwise.
1390 static inline SimdDInt32 gmx_simdcall selectByMask(SimdDInt32 a, SimdDIBool mask)
1394 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1396 res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0;
1401 /*! \brief Select from \ref gmx::SimdDInt32 variable where boolean is false.
1403 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1405 * \param a SIMD integer to select from
1406 * \param mask Boolean selector
1407 * \return Elements from a where sel is false, 0 otherwise (sic).
1409 static inline SimdDInt32 gmx_simdcall selectByNotMask(SimdDInt32 a, SimdDIBool mask)
1413 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1415 res.simdInternal_[i] = mask.simdInternal_[i] ? 0 : a.simdInternal_[i];
1420 /*! \brief Vector-blend SIMD integer selection.
1422 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1424 * \param a First source
1425 * \param b Second source
1426 * \param sel Boolean selector
1427 * \return For each element, select b if sel is true, a otherwise.
1429 static inline SimdDInt32 gmx_simdcall blend(SimdDInt32 a, SimdDInt32 b, SimdDIBool sel)
1433 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1435 res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1442 * \name SIMD implementation conversion operations
1446 /*! \brief Round double precision floating point to integer.
1448 * \param a SIMD floating-point
1449 * \return SIMD integer, rounded to nearest integer.
1451 * \note Round mode is implementation defined. The only guarantee is that it
1452 * is consistent between rounding functions (round, cvtR2I).
1454 static inline SimdDInt32 gmx_simdcall cvtR2I(SimdDouble a)
1458 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1460 b.simdInternal_[i] = std::round(a.simdInternal_[i]);
1465 /*! \brief Truncate double precision floating point to integer.
1467 * \param a SIMD floating-point
1468 * \return SIMD integer, truncated to nearest integer.
1470 static inline SimdDInt32 gmx_simdcall cvttR2I(SimdDouble a)
1474 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1476 b.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
1481 /*! \brief Convert integer to double precision floating point.
1483 * \param a SIMD integer
1484 * \return SIMD floating-point
1486 static inline SimdDouble gmx_simdcall cvtI2R(SimdDInt32 a)
1490 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1492 b.simdInternal_[i] = a.simdInternal_[i];
1497 /*! \brief Convert from double precision boolean to corresponding integer boolean
1499 * \param a SIMD floating-point boolean
1500 * \return SIMD integer boolean
1502 static inline SimdDIBool gmx_simdcall cvtB2IB(SimdDBool a)
1506 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1508 b.simdInternal_[i] = a.simdInternal_[i];
1513 /*! \brief Convert from integer boolean to corresponding double precision boolean
1515 * \param a SIMD integer boolean
1516 * \return SIMD floating-point boolean
1518 static inline SimdDBool gmx_simdcall cvtIB2B(SimdDIBool a)
1522 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1524 b.simdInternal_[i] = a.simdInternal_[i];
1529 /*! \brief Convert SIMD float to double.
1531 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is identical to
1532 * \ref GMX_SIMD_DOUBLE_WIDTH.
1534 * Float/double conversions are complex since the SIMD width could either
1535 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1536 * need to check for the width in the code, and have different code paths.
1538 * \param f Single-precision SIMD variable
1539 * \return Double-precision SIMD variable of the same width
1541 static inline SimdDouble gmx_simdcall cvtF2D(SimdFloat gmx_unused f)
1543 #if (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
1545 for (std::size_t i = 0; i < d.simdInternal_.size(); i++)
1547 d.simdInternal_[i] = f.simdInternal_[i];
1551 gmx_fatal(FARGS, "cvtF2D() requires GMX_SIMD_FLOAT_WIDTH==GMX_SIMD_DOUBLE_WIDTH");
1555 /*! \brief Convert SIMD double to float.
1557 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is identical to
1558 * \ref GMX_SIMD_DOUBLE_WIDTH.
1560 * Float/double conversions are complex since the SIMD width could either
1561 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1562 * need to check for the width in the code, and have different code paths.
1564 * \param d Double-precision SIMD variable
1565 * \return Single-precision SIMD variable of the same width
1567 static inline SimdFloat gmx_simdcall cvtD2F(SimdDouble gmx_unused d)
1569 #if (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
1571 for (std::size_t i = 0; i < f.simdInternal_.size(); i++)
1573 f.simdInternal_[i] = d.simdInternal_[i];
1577 gmx_fatal(FARGS, "cvtD2F() requires GMX_SIMD_FLOAT_WIDTH==GMX_SIMD_DOUBLE_WIDTH");
1581 /*! \brief Convert SIMD float to double.
1583 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is twice as large
1584 * as \ref GMX_SIMD_DOUBLE_WIDTH.
1586 * Float/double conversions are complex since the SIMD width could either
1587 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1588 * need to check for the width in the code, and have different code paths.
1590 * \param f Single-precision SIMD variable
1591 * \param[out] d0 Double-precision SIMD variable, first half of values from f.
1592 * \param[out] d1 Double-precision SIMD variable, second half of values from f.
1594 static inline void gmx_simdcall cvtF2DD(SimdFloat gmx_unused f,
1595 SimdDouble gmx_unused* d0,
1596 SimdDouble gmx_unused* d1)
1598 #if (GMX_SIMD_FLOAT_WIDTH == 2 * GMX_SIMD_DOUBLE_WIDTH)
1599 for (std::size_t i = 0; i < d0->simdInternal_.size(); i++)
1601 d0->simdInternal_[i] = f.simdInternal_[i];
1602 d1->simdInternal_[i] = f.simdInternal_[f.simdInternal_.size() / 2 + i];
1605 gmx_fatal(FARGS, "simdCvtF2DD() requires GMX_SIMD_FLOAT_WIDTH==2*GMX_SIMD_DOUBLE_WIDTH");
1609 /*! \brief Convert SIMD double to float.
1611 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is twice as large
1612 * as \ref GMX_SIMD_DOUBLE_WIDTH.
1614 * Float/double conversions are complex since the SIMD width could either
1615 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1616 * need to check for the width in the code, and have different code paths.
1618 * \param d0 Double-precision SIMD variable, first half of values to put in f.
1619 * \param d1 Double-precision SIMD variable, second half of values to put in f.
1620 * \return Single-precision SIMD variable with all values.
1622 static inline SimdFloat gmx_simdcall cvtDD2F(SimdDouble gmx_unused d0, SimdDouble gmx_unused d1)
1624 #if (GMX_SIMD_FLOAT_WIDTH == 2 * GMX_SIMD_DOUBLE_WIDTH)
1626 for (std::size_t i = 0; i < d0.simdInternal_.size(); i++)
1628 f.simdInternal_[i] = d0.simdInternal_[i];
1629 f.simdInternal_[f.simdInternal_.size() / 2 + i] = d1.simdInternal_[i];
1633 gmx_fatal(FARGS, "simdCvtDD2F() requires GMX_SIMD_FLOAT_WIDTH==2*GMX_SIMD_DOUBLE_WIDTH");
1644 #endif // GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H