2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H
37 #define GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H
39 /*! \libinternal \file
41 * \brief Reference implementation, SIMD double precision.
43 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
45 * \ingroup module_simd
58 #include "gromacs/math/utilities.h"
59 #include "gromacs/utility/fatalerror.h"
61 #include "impl_reference_definitions.h"
62 #include "impl_reference_simd_float.h"
68 /*! \addtogroup module_simd */
71 /* \name SIMD implementation data types
75 /*! \libinternal \brief Double SIMD variable. Available if GMX_SIMD_HAVE_DOUBLE is 1.
77 * \note This variable cannot be placed inside other structures or classes, since
78 * some compilers (including at least clang-3.7) appear to lose the
79 * alignment. This is likely particularly severe when allocating such
80 * memory on the heap, but it occurs for stack structures too.
87 //! \brief Construct from scalar
88 SimdDouble(double d) { simdInternal_.fill(d); }
90 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
92 * This has to be public to enable usage in combination with static inline
93 * functions, but it should never, EVER, be accessed by any code outside
94 * the corresponding implementation directory since the type will depend
95 * on the architecture.
97 std::array<double, GMX_SIMD_DOUBLE_WIDTH> simdInternal_;
100 /*! \libinternal \brief Integer SIMD variable type to use for conversions to/from double.
102 * Available if GMX_SIMD_HAVE_DOUBLE is 1.
104 * \note The integer SIMD type will always be available, but on architectures
105 * that do not have any real integer SIMD support it might be defined as the
106 * floating-point type. This will work fine, since there are separate defines
107 * for whether the implementation can actually do any operations on integer
110 * \note This variable cannot be placed inside other structures or classes, since
111 * some compilers (including at least clang-3.7) appear to lose the
112 * alignment. This is likely particularly severe when allocating such
113 * memory on the heap, but it occurs for stack structures too.
120 //! \brief Construct from scalar
121 SimdDInt32(std::int32_t i) { simdInternal_.fill(i); }
123 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
125 * This has to be public to enable usage in combination with static inline
126 * functions, but it should never, EVER, be accessed by any code outside
127 * the corresponding implementation directory since the type will depend
128 * on the architecture.
130 std::array<std::int32_t, GMX_SIMD_DINT32_WIDTH> simdInternal_;
133 /*! \libinternal \brief Boolean type for double SIMD data.
135 * Available if GMX_SIMD_HAVE_DOUBLE is 1.
137 * \note This variable cannot be placed inside other structures or classes, since
138 * some compilers (including at least clang-3.7) appear to lose the
139 * alignment. This is likely particularly severe when allocating such
140 * memory on the heap, but it occurs for stack structures too.
147 //! \brief Construct from scalar bool
148 SimdDBool(bool b) { simdInternal_.fill(b); }
150 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
152 * This has to be public to enable usage in combination with static inline
153 * functions, but it should never, EVER, be accessed by any code outside
154 * the corresponding implementation directory since the type will depend
155 * on the architecture.
157 std::array<bool, GMX_SIMD_DOUBLE_WIDTH> simdInternal_;
160 /*! \libinternal \brief Boolean type for integer datatypes corresponding to double SIMD.
162 * Available if GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
164 * \note This variable cannot be placed inside other structures or classes, since
165 * some compilers (including at least clang-3.7) appear to lose the
166 * alignment. This is likely particularly severe when allocating such
167 * memory on the heap, but it occurs for stack structures too.
174 //! \brief Construct from scalar
175 SimdDIBool(bool b) { simdInternal_.fill(b); }
177 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
179 * This has to be public to enable usage in combination with static inline
180 * functions, but it should never, EVER, be accessed by any code outside
181 * the corresponding implementation directory since the type will depend
182 * on the architecture.
184 std::array<bool, GMX_SIMD_DINT32_WIDTH> simdInternal_;
189 * \name SIMD implementation load/store operations for double precision floating point
193 /*! \brief Load \ref GMX_SIMD_DOUBLE_WIDTH numbers from aligned memory.
195 * \param m Pointer to memory aligned to the SIMD width.
196 * \return SIMD variable with data loaded.
198 static inline SimdDouble gmx_simdcall
199 simdLoad(const double *m, SimdDoubleTag = {})
203 assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(double)) == 0);
205 std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
209 /*! \brief Store the contents of SIMD double variable to aligned memory m.
211 * \param[out] m Pointer to memory, aligned to SIMD width.
212 * \param a SIMD variable to store
214 static inline void gmx_simdcall
215 store(double *m, SimdDouble a)
217 assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(double)) == 0);
219 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
222 /*! \brief Load SIMD double from unaligned memory.
224 * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
226 * \param m Pointer to memory, no alignment requirement.
227 * \return SIMD variable with data loaded.
229 static inline SimdDouble gmx_simdcall
230 simdLoadU(const double *m, SimdDoubleTag = {})
233 std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
237 /*! \brief Store SIMD double to unaligned memory.
239 * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
241 * \param[out] m Pointer to memory, no alignment requirement.
242 * \param a SIMD variable to store.
244 static inline void gmx_simdcall
245 storeU(double *m, SimdDouble a)
247 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
250 /*! \brief Set all SIMD double variable elements to 0.0.
252 * You should typically just call \ref gmx::setZero(), which uses proxy objects
253 * internally to handle all types rather than adding the suffix used here.
257 static inline SimdDouble gmx_simdcall
260 return SimdDouble(0.0);
265 * \name SIMD implementation load/store operations for integers (corresponding to double)
269 /*! \brief Load aligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
271 * You should typically just call \ref gmx::load(), which uses proxy objects
272 * internally to handle all types rather than adding the suffix used here.
274 * \param m Pointer to memory, aligned to (double) integer SIMD width.
275 * \return SIMD integer variable.
277 static inline SimdDInt32 gmx_simdcall
278 simdLoad(const std::int32_t * m, SimdDInt32Tag)
282 assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(std::int32_t)) == 0);
284 std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
288 /*! \brief Store aligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
290 * \param m Memory aligned to (double) integer SIMD width.
291 * \param a SIMD (double) integer variable to store.
293 static inline void gmx_simdcall
294 store(std::int32_t * m, SimdDInt32 a)
296 assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(std::int32_t)) == 0);
298 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
301 /*! \brief Load unaligned integer SIMD data, width corresponds to \ref gmx::SimdDouble.
303 * You should typically just call \ref gmx::loadU(), which uses proxy objects
304 * internally to handle all types rather than adding the suffix used here.
306 * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
308 * \param m Pointer to memory, no alignment requirements.
309 * \return SIMD integer variable.
311 static inline SimdDInt32 gmx_simdcall
312 simdLoadU(const std::int32_t *m, SimdDInt32Tag)
315 std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
319 /*! \brief Store unaligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
321 * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
323 * \param m Memory pointer, no alignment requirements.
324 * \param a SIMD (double) integer variable to store.
326 static inline void gmx_simdcall
327 storeU(std::int32_t * m, SimdDInt32 a)
329 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
332 /*! \brief Set all SIMD (double) integer variable elements to 0.
334 * You should typically just call \ref gmx::setZero(), which uses proxy objects
335 * internally to handle all types rather than adding the suffix used here.
339 static inline SimdDInt32 gmx_simdcall
342 return SimdDInt32(0);
345 /*! \brief Extract element with index i from \ref gmx::SimdDInt32.
347 * Available if \ref GMX_SIMD_HAVE_DINT32_EXTRACT is 1.
349 * \tparam index Compile-time constant, position to extract (first position is 0)
350 * \param a SIMD variable from which to extract value.
351 * \return Single integer from position index in SIMD variable.
354 static inline std::int32_t gmx_simdcall
355 extract(SimdDInt32 a)
357 return a.simdInternal_[index];
362 * \name SIMD implementation double precision floating-point bitwise logical operations
366 /*! \brief Bitwise and for two SIMD double variables.
368 * Supported if \ref GMX_SIMD_HAVE_LOGICAL is 1.
372 * \return data1 & data2
374 static inline SimdDouble gmx_simdcall
375 operator&(SimdDouble a, SimdDouble b)
386 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
388 conv1.r = a.simdInternal_[i];
389 conv2.r = b.simdInternal_[i];
390 conv1.i = conv1.i & conv2.i;
391 res.simdInternal_[i] = conv1.r;
396 /*! \brief Bitwise andnot for SIMD double.
398 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
402 * \return (~data1) & data2
404 static inline SimdDouble gmx_simdcall
405 andNot(SimdDouble a, SimdDouble b)
416 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
418 conv1.r = a.simdInternal_[i];
419 conv2.r = b.simdInternal_[i];
420 conv1.i = ~conv1.i & conv2.i;
421 res.simdInternal_[i] = conv1.r;
426 /*! \brief Bitwise or for SIMD double.
428 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
432 * \return data1 | data2
434 static inline SimdDouble gmx_simdcall
435 operator|(SimdDouble a, SimdDouble b)
446 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
448 conv1.r = a.simdInternal_[i];
449 conv2.r = b.simdInternal_[i];
450 conv1.i = conv1.i | conv2.i;
451 res.simdInternal_[i] = conv1.r;
456 /*! \brief Bitwise xor for SIMD double.
458 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
462 * \return data1 ^ data2
464 static inline SimdDouble gmx_simdcall
465 operator^(SimdDouble a, SimdDouble b)
476 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
478 conv1.r = a.simdInternal_[i];
479 conv2.r = b.simdInternal_[i];
480 conv1.i = conv1.i ^ conv2.i;
481 res.simdInternal_[i] = conv1.r;
488 * \name SIMD implementation double precision floating-point arithmetics
492 /*! \brief Add two double SIMD variables.
498 static inline SimdDouble gmx_simdcall
499 operator+(SimdDouble a, SimdDouble b)
503 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
505 res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
510 /*! \brief Subtract two double SIMD variables.
516 static inline SimdDouble gmx_simdcall
517 operator-(SimdDouble a, SimdDouble b)
521 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
523 res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
528 /*! \brief SIMD double precision negate.
530 * \param a SIMD double precision value
533 static inline SimdDouble gmx_simdcall
534 operator-(SimdDouble a)
538 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
540 res.simdInternal_[i] = -a.simdInternal_[i];
545 /*! \brief Multiply two double SIMD variables.
551 static inline SimdDouble gmx_simdcall
552 operator*(SimdDouble a, SimdDouble b)
556 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
558 res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
563 /*! \brief SIMD double Fused-multiply-add. Result is a*b+c.
570 static inline SimdDouble gmx_simdcall
571 fma(SimdDouble a, SimdDouble b, SimdDouble c)
576 /*! \brief SIMD double Fused-multiply-subtract. Result is a*b-c.
583 static inline SimdDouble gmx_simdcall
584 fms(SimdDouble a, SimdDouble b, SimdDouble c)
589 /*! \brief SIMD double Fused-negated-multiply-add. Result is -a*b+c.
596 static inline SimdDouble gmx_simdcall
597 fnma(SimdDouble a, SimdDouble b, SimdDouble c)
602 /*! \brief SIMD double Fused-negated-multiply-subtract. Result is -a*b-c.
609 static inline SimdDouble gmx_simdcall
610 fnms(SimdDouble a, SimdDouble b, SimdDouble c)
615 /*! \brief double SIMD 1.0/sqrt(x) lookup.
617 * This is a low-level instruction that should only be called from routines
618 * implementing the inverse square root in simd_math.h.
620 * \param x Argument, x>0
621 * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
623 static inline SimdDouble gmx_simdcall
628 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
630 // sic - we only use single precision for the lookup
631 res.simdInternal_[i] = 1.0F / std::sqrt(static_cast<float>(x.simdInternal_[i]));
636 /*! \brief SIMD double 1.0/x lookup.
638 * This is a low-level instruction that should only be called from routines
639 * implementing the reciprocal in simd_math.h.
641 * \param x Argument, x!=0
642 * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
644 static inline SimdDouble gmx_simdcall
649 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
651 // sic - we only use single precision for the lookup
652 res.simdInternal_[i] = 1.0F / static_cast<float>(x.simdInternal_[i]);
657 /*! \brief Add two double SIMD variables, masked version.
662 * \return a+b where mask is true, 0.0 otherwise.
664 static inline SimdDouble gmx_simdcall
665 maskAdd(SimdDouble a, SimdDouble b, SimdDBool m)
669 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
671 res.simdInternal_[i] = a.simdInternal_[i] + (m.simdInternal_[i] ? b.simdInternal_[i] : 0.0);
676 /*! \brief Multiply two double SIMD variables, masked version.
681 * \return a*b where mask is true, 0.0 otherwise.
683 static inline SimdDouble gmx_simdcall
684 maskzMul(SimdDouble a, SimdDouble b, SimdDBool m)
688 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
690 res.simdInternal_[i] = m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i]) : 0.0;
695 /*! \brief SIMD double fused multiply-add, masked version.
701 * \return a*b+c where mask is true, 0.0 otherwise.
703 static inline SimdDouble gmx_simdcall
704 maskzFma(SimdDouble a, SimdDouble b, SimdDouble c, SimdDBool m)
708 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
710 res.simdInternal_[i] = m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i] + c.simdInternal_[i]) : 0.0;
715 /*! \brief SIMD double 1.0/sqrt(x) lookup, masked version.
717 * This is a low-level instruction that should only be called from routines
718 * implementing the inverse square root in simd_math.h.
720 * \param x Argument, x>0 for entries where mask is true.
722 * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
723 * The result for masked-out entries will be 0.0.
725 static inline SimdDouble gmx_simdcall
726 maskzRsqrt(SimdDouble x, SimdDBool m)
730 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
732 // sic - we only use single precision for the lookup
733 res.simdInternal_[i] = (m.simdInternal_[i] != 0) ? 1.0F / std::sqrt(static_cast<float>(x.simdInternal_[i])) : 0.0;
738 /*! \brief SIMD double 1.0/x lookup, masked version.
740 * This is a low-level instruction that should only be called from routines
741 * implementing the reciprocal in simd_math.h.
743 * \param x Argument, x>0 for entries where mask is true.
745 * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
746 * The result for masked-out entries will be 0.0.
748 static inline SimdDouble gmx_simdcall
749 maskzRcp(SimdDouble x, SimdDBool m)
753 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
755 res.simdInternal_[i] = (m.simdInternal_[i] != 0) ? 1.0F / static_cast<float>(x.simdInternal_[i]) : 0.0;
760 /*! \brief SIMD double floating-point fabs().
762 * \param a any floating point values
763 * \return fabs(a) for each element.
765 static inline SimdDouble gmx_simdcall
770 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
772 res.simdInternal_[i] = std::abs(a.simdInternal_[i]);
777 /*! \brief Set each SIMD double element to the largest from two variables.
779 * \param a Any floating-point value
780 * \param b Any floating-point value
781 * \return max(a,b) for each element.
783 static inline SimdDouble gmx_simdcall
784 max(SimdDouble a, SimdDouble b)
788 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
790 res.simdInternal_[i] = std::max(a.simdInternal_[i], b.simdInternal_[i]);
795 /*! \brief Set each SIMD double element to the smallest from two variables.
797 * \param a Any floating-point value
798 * \param b Any floating-point value
799 * \return min(a,b) for each element.
801 static inline SimdDouble gmx_simdcall
802 min(SimdDouble a, SimdDouble b)
806 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
808 res.simdInternal_[i] = std::min(a.simdInternal_[i], b.simdInternal_[i]);
813 /*! \brief SIMD double round to nearest integer value (in floating-point format).
815 * \param a Any floating-point value
816 * \return The nearest integer, represented in floating-point format.
818 * \note Round mode is implementation defined. The only guarantee is that it
819 * is consistent between rounding functions (round, cvtR2I).
821 static inline SimdDouble gmx_simdcall
826 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
828 res.simdInternal_[i] = std::round(a.simdInternal_[i]);
833 /*! \brief Truncate SIMD double, i.e. round towards zero - common hardware instruction.
835 * \param a Any floating-point value
836 * \return Integer rounded towards zero, represented in floating-point format.
838 * \note This is truncation towards zero, not floor(). The reason for this
839 * is that truncation is virtually always present as a dedicated hardware
840 * instruction, but floor() frequently isn't.
842 static inline SimdDouble gmx_simdcall
847 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
849 res.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
854 /*! \brief Extract (integer) exponent and fraction from double precision SIMD.
856 * \param value Floating-point value to extract from
857 * \param[out] exponent Returned exponent of value, integer SIMD format.
858 * \return Fraction of value, floating-point SIMD format.
860 static inline SimdDouble gmx_simdcall
861 frexp(SimdDouble value, SimdDInt32 * exponent)
865 for (std::size_t i = 0; i < fraction.simdInternal_.size(); i++)
867 fraction.simdInternal_[i] = std::frexp(value.simdInternal_[i], &exponent->simdInternal_[i]);
872 /*! \brief Multiply a SIMD double value by the number 2 raised to an exp power.
874 * \tparam opt By default, this routine will return zero for input arguments
875 * that are so small they cannot be reproduced in the current
876 * precision. If the unsafe math optimization template parameter
877 * setting is used, these tests are skipped, and the result will
878 * be undefined (possible even NaN). This might happen below -127
879 * in single precision or -1023 in double, although some
880 * might use denormal support to extend the range.
882 * \param value Floating-point number to multiply with new exponent
883 * \param exponent Integer that will not overflow as 2^exponent.
884 * \return value*2^exponent
886 template <MathOptimization opt = MathOptimization::Safe>
887 static inline SimdDouble gmx_simdcall
888 ldexp(SimdDouble value, SimdDInt32 exponent)
892 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
894 // std::ldexp already takes care of clamping arguments, so we do not
895 // need to do anything in the reference implementation
896 res.simdInternal_[i] = std::ldexp(value.simdInternal_[i], exponent.simdInternal_[i]);
901 /*! \brief Return sum of all elements in SIMD double variable.
903 * \param a SIMD variable to reduce/sum.
904 * \return The sum of all elements in the argument variable.
907 static inline double gmx_simdcall
912 for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
914 sum += a.simdInternal_[i];
921 * \name SIMD implementation double precision floating-point comparison, boolean, selection.
925 /*! \brief SIMD a==b for double SIMD.
929 * \return Each element of the boolean will be set to true if a==b.
931 * Beware that exact floating-point comparisons are difficult.
933 static inline SimdDBool gmx_simdcall
934 operator==(SimdDouble a, SimdDouble b)
938 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
940 res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
945 /*! \brief SIMD a!=b for double SIMD.
949 * \return Each element of the boolean will be set to true if a!=b.
951 * Beware that exact floating-point comparisons are difficult.
953 static inline SimdDBool gmx_simdcall
954 operator!=(SimdDouble a, SimdDouble b)
958 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
960 res.simdInternal_[i] = (a.simdInternal_[i] != b.simdInternal_[i]);
965 /*! \brief SIMD a<b for double SIMD.
969 * \return Each element of the boolean will be set to true if a<b.
971 static inline SimdDBool gmx_simdcall
972 operator<(SimdDouble a, SimdDouble b)
976 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
978 res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
983 /*! \brief SIMD a<=b for double SIMD.
987 * \return Each element of the boolean will be set to true if a<=b.
989 static inline SimdDBool gmx_simdcall
990 operator<=(SimdDouble a, SimdDouble b)
994 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
996 res.simdInternal_[i] = (a.simdInternal_[i] <= b.simdInternal_[i]);
1001 /*! \brief Return true if any bits are set in the single precision SIMD.
1003 * This function is used to handle bitmasks, mainly for exclusions in the
1004 * inner kernels. Note that it will return true even for -0.0 (sign bit set),
1005 * so it is not identical to not-equal.
1008 * \return Each element of the boolean will be true if any bit in a is nonzero.
1010 static inline SimdDBool gmx_simdcall
1011 testBits(SimdDouble a)
1015 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1023 conv.d = a.simdInternal_[i];
1024 res.simdInternal_[i] = (conv.i != 0);
1029 /*! \brief Logical \a and on double precision SIMD booleans.
1031 * \param a logical vars 1
1032 * \param b logical vars 2
1033 * \return For each element, the result boolean is true if a \& b are true.
1035 * \note This is not necessarily a bitwise operation - the storage format
1036 * of booleans is implementation-dependent.
1038 static inline SimdDBool gmx_simdcall
1039 operator&&(SimdDBool a, SimdDBool b)
1043 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1045 res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1050 /*! \brief Logical \a or on double precision SIMD booleans.
1052 * \param a logical vars 1
1053 * \param b logical vars 2
1054 * \return For each element, the result boolean is true if a or b is true.
1056 * Note that this is not necessarily a bitwise operation - the storage format
1057 * of booleans is implementation-dependent.
1060 static inline SimdDBool gmx_simdcall
1061 operator||(SimdDBool a, SimdDBool b)
1065 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1067 res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1072 /*! \brief Returns non-zero if any of the boolean in SIMD a is True, otherwise 0.
1074 * \param a Logical variable.
1075 * \return true if any element in a is true, otherwise false.
1077 * The actual return value for truth will depend on the architecture,
1078 * so any non-zero value is considered truth.
1080 static inline bool gmx_simdcall
1081 anyTrue(SimdDBool a)
1085 for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1087 res = res || a.simdInternal_[i];
1092 /*! \brief Select from double precision SIMD variable where boolean is true.
1094 * \param a Floating-point variable to select from
1095 * \param mask Boolean selector
1096 * \return For each element, a is selected for true, 0 for false.
1098 static inline SimdDouble gmx_simdcall
1099 selectByMask(SimdDouble a, SimdDBool mask)
1103 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1105 res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0;
1110 /*! \brief Select from double precision SIMD variable where boolean is false.
1112 * \param a Floating-point variable to select from
1113 * \param mask Boolean selector
1114 * \return For each element, a is selected for false, 0 for true (sic).
1116 static inline SimdDouble gmx_simdcall
1117 selectByNotMask(SimdDouble a, SimdDBool mask)
1121 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1123 res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0 : a.simdInternal_[i];
1128 /*! \brief Vector-blend SIMD double selection.
1130 * \param a First source
1131 * \param b Second source
1132 * \param sel Boolean selector
1133 * \return For each element, select b if sel is true, a otherwise.
1135 static inline SimdDouble gmx_simdcall
1136 blend(SimdDouble a, SimdDouble b, SimdDBool sel)
1140 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1142 res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1149 * \name SIMD implementation integer (corresponding to double) bitwise logical operations
1153 /*! \brief Integer SIMD bitwise and.
1155 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1157 * \note You can \a not use this operation directly to select based on a boolean
1158 * SIMD variable, since booleans are separate from integer SIMD. If that
1159 * is what you need, have a look at \ref gmx::selectByMask instead.
1161 * \param a first integer SIMD
1162 * \param b second integer SIMD
1163 * \return a \& b (bitwise and)
1165 static inline SimdDInt32 gmx_simdcall
1166 operator&(SimdDInt32 a, SimdDInt32 b)
1170 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1172 res.simdInternal_[i] = a.simdInternal_[i] & b.simdInternal_[i];
1177 /*! \brief Integer SIMD bitwise not/complement.
1179 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1181 * \note You can \a not use this operation directly to select based on a boolean
1182 * SIMD variable, since booleans are separate from integer SIMD. If that
1183 * is what you need, have a look at \ref gmx::selectByMask instead.
1185 * \param a integer SIMD
1186 * \param b integer SIMD
1189 static inline SimdDInt32 gmx_simdcall
1190 andNot(SimdDInt32 a, SimdDInt32 b)
1194 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1196 res.simdInternal_[i] = ~a.simdInternal_[i] & b.simdInternal_[i];
1201 /*! \brief Integer SIMD bitwise or.
1203 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1205 * \param a first integer SIMD
1206 * \param b second integer SIMD
1207 * \return a \| b (bitwise or)
1209 static inline SimdDInt32 gmx_simdcall
1210 operator|(SimdDInt32 a, SimdDInt32 b)
1214 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1216 res.simdInternal_[i] = a.simdInternal_[i] | b.simdInternal_[i];
1221 /*! \brief Integer SIMD bitwise xor.
1223 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1225 * \param a first integer SIMD
1226 * \param b second integer SIMD
1227 * \return a ^ b (bitwise xor)
1229 static inline SimdDInt32 gmx_simdcall
1230 operator^(SimdDInt32 a, SimdDInt32 b)
1234 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1236 res.simdInternal_[i] = a.simdInternal_[i] ^ b.simdInternal_[i];
1243 * \name SIMD implementation integer (corresponding to double) arithmetics
1247 /*! \brief Add SIMD integers.
1249 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1255 static inline SimdDInt32 gmx_simdcall
1256 operator+(SimdDInt32 a, SimdDInt32 b)
1260 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1262 res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
1267 /*! \brief Subtract SIMD integers.
1269 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1275 static inline SimdDInt32 gmx_simdcall
1276 operator-(SimdDInt32 a, SimdDInt32 b)
1280 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1282 res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
1287 /*! \brief Multiply SIMD integers.
1289 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1295 * \note Only the low 32 bits are retained, so this can overflow.
1297 static inline SimdDInt32 gmx_simdcall
1298 operator*(SimdDInt32 a, SimdDInt32 b)
1302 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1304 res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
1311 * \name SIMD implementation integer (corresponding to double) comparisons, boolean selection
1315 /*! \brief Equality comparison of two integers corresponding to double values.
1317 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1319 * \param a SIMD integer1
1320 * \param b SIMD integer2
1321 * \return SIMD integer boolean with true for elements where a==b
1323 static inline SimdDIBool gmx_simdcall
1324 operator==(SimdDInt32 a, SimdDInt32 b)
1328 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1330 res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
1335 /*! \brief Less-than comparison of two SIMD integers corresponding to double values.
1337 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1339 * \param a SIMD integer1
1340 * \param b SIMD integer2
1341 * \return SIMD integer boolean with true for elements where a<b
1343 static inline SimdDIBool gmx_simdcall
1344 operator<(SimdDInt32 a, SimdDInt32 b)
1348 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1350 res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
1355 /*! \brief Check if any bit is set in each element
1357 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1359 * \param a SIMD integer
1360 * \return SIMD integer boolean with true for elements where any bit is set
1362 static inline SimdDIBool gmx_simdcall
1363 testBits(SimdDInt32 a)
1367 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1369 res.simdInternal_[i] = (a.simdInternal_[i] != 0);
1374 /*! \brief Logical AND on SimdDIBool.
1376 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1378 * \param a SIMD boolean 1
1379 * \param b SIMD boolean 2
1380 * \return True for elements where both a and b are true.
1382 static inline SimdDIBool gmx_simdcall
1383 operator&&(SimdDIBool a, SimdDIBool b)
1387 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1389 res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1394 /*! \brief Logical OR on SimdDIBool.
1396 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1398 * \param a SIMD boolean 1
1399 * \param b SIMD boolean 2
1400 * \return True for elements where both a and b are true.
1402 static inline SimdDIBool gmx_simdcall
1403 operator||(SimdDIBool a, SimdDIBool b)
1407 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1409 res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1414 /*! \brief Returns true if any of the boolean in x is True, otherwise 0.
1416 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1418 * The actual return value for "any true" will depend on the architecture.
1419 * Any non-zero value should be considered truth.
1421 * \param a SIMD boolean
1422 * \return True if any of the elements in a is true, otherwise 0.
1424 static inline bool gmx_simdcall
1425 anyTrue(SimdDIBool a)
1429 for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1431 res = res || a.simdInternal_[i];
1436 /*! \brief Select from \ref gmx::SimdDInt32 variable where boolean is true.
1438 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1440 * \param a SIMD integer to select from
1441 * \param mask Boolean selector
1442 * \return Elements from a where sel is true, 0 otherwise.
1444 static inline SimdDInt32 gmx_simdcall
1445 selectByMask(SimdDInt32 a, SimdDIBool mask)
1449 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1451 res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0;
1456 /*! \brief Select from \ref gmx::SimdDInt32 variable where boolean is false.
1458 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1460 * \param a SIMD integer to select from
1461 * \param mask Boolean selector
1462 * \return Elements from a where sel is false, 0 otherwise (sic).
1464 static inline SimdDInt32 gmx_simdcall
1465 selectByNotMask(SimdDInt32 a, SimdDIBool mask)
1469 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1471 res.simdInternal_[i] = mask.simdInternal_[i] ? 0 : a.simdInternal_[i];
1476 /*! \brief Vector-blend SIMD integer selection.
1478 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1480 * \param a First source
1481 * \param b Second source
1482 * \param sel Boolean selector
1483 * \return For each element, select b if sel is true, a otherwise.
1485 static inline SimdDInt32 gmx_simdcall
1486 blend(SimdDInt32 a, SimdDInt32 b, SimdDIBool sel)
1490 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1492 res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1499 * \name SIMD implementation conversion operations
1503 /*! \brief Round double precision floating point to integer.
1505 * \param a SIMD floating-point
1506 * \return SIMD integer, rounded to nearest integer.
1508 * \note Round mode is implementation defined. The only guarantee is that it
1509 * is consistent between rounding functions (round, cvtR2I).
1511 static inline SimdDInt32 gmx_simdcall
1512 cvtR2I(SimdDouble a)
1516 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1518 b.simdInternal_[i] = std::round(a.simdInternal_[i]);
1523 /*! \brief Truncate double precision floating point to integer.
1525 * \param a SIMD floating-point
1526 * \return SIMD integer, truncated to nearest integer.
1528 static inline SimdDInt32 gmx_simdcall
1529 cvttR2I(SimdDouble a)
1533 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1535 b.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
1540 /*! \brief Convert integer to double precision floating point.
1542 * \param a SIMD integer
1543 * \return SIMD floating-point
1545 static inline SimdDouble gmx_simdcall
1546 cvtI2R(SimdDInt32 a)
1550 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1552 b.simdInternal_[i] = a.simdInternal_[i];
1557 /*! \brief Convert from double precision boolean to corresponding integer boolean
1559 * \param a SIMD floating-point boolean
1560 * \return SIMD integer boolean
1562 static inline SimdDIBool gmx_simdcall
1563 cvtB2IB(SimdDBool a)
1567 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1569 b.simdInternal_[i] = a.simdInternal_[i];
1574 /*! \brief Convert from integer boolean to corresponding double precision boolean
1576 * \param a SIMD integer boolean
1577 * \return SIMD floating-point boolean
1579 static inline SimdDBool gmx_simdcall
1580 cvtIB2B(SimdDIBool a)
1584 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1586 b.simdInternal_[i] = a.simdInternal_[i];
1591 /*! \brief Convert SIMD float to double.
1593 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is identical to
1594 * \ref GMX_SIMD_DOUBLE_WIDTH.
1596 * Float/double conversions are complex since the SIMD width could either
1597 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1598 * need to check for the width in the code, and have different code paths.
1600 * \param f Single-precision SIMD variable
1601 * \return Double-precision SIMD variable of the same width
1603 static inline SimdDouble gmx_simdcall
1604 cvtF2D(SimdFloat gmx_unused f)
1606 #if (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
1608 for (std::size_t i = 0; i < d.simdInternal_.size(); i++)
1610 d.simdInternal_[i] = f.simdInternal_[i];
1614 gmx_fatal(FARGS, "cvtF2D() requires GMX_SIMD_FLOAT_WIDTH==GMX_SIMD_DOUBLE_WIDTH");
1618 /*! \brief Convert SIMD double to float.
1620 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is identical to
1621 * \ref GMX_SIMD_DOUBLE_WIDTH.
1623 * Float/double conversions are complex since the SIMD width could either
1624 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1625 * need to check for the width in the code, and have different code paths.
1627 * \param d Double-precision SIMD variable
1628 * \return Single-precision SIMD variable of the same width
1630 static inline SimdFloat gmx_simdcall
1631 cvtD2F(SimdDouble gmx_unused d)
1633 #if (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
1635 for (std::size_t i = 0; i < f.simdInternal_.size(); i++)
1637 f.simdInternal_[i] = d.simdInternal_[i];
1641 gmx_fatal(FARGS, "cvtD2F() requires GMX_SIMD_FLOAT_WIDTH==GMX_SIMD_DOUBLE_WIDTH");
1645 /*! \brief Convert SIMD float to double.
1647 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is twice as large
1648 * as \ref GMX_SIMD_DOUBLE_WIDTH.
1650 * Float/double conversions are complex since the SIMD width could either
1651 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1652 * need to check for the width in the code, and have different code paths.
1654 * \param f Single-precision SIMD variable
1655 * \param[out] d0 Double-precision SIMD variable, first half of values from f.
1656 * \param[out] d1 Double-precision SIMD variable, second half of values from f.
1658 static inline void gmx_simdcall
1659 cvtF2DD(SimdFloat gmx_unused f, SimdDouble gmx_unused * d0, SimdDouble gmx_unused * d1)
1661 #if (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH)
1662 for (std::size_t i = 0; i < d0->simdInternal_.size(); i++)
1664 d0->simdInternal_[i] = f.simdInternal_[i];
1665 d1->simdInternal_[i] = f.simdInternal_[f.simdInternal_.size()/2 + i];
1668 gmx_fatal(FARGS, "simdCvtF2DD() requires GMX_SIMD_FLOAT_WIDTH==2*GMX_SIMD_DOUBLE_WIDTH");
1672 /*! \brief Convert SIMD double to float.
1674 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is twice as large
1675 * as \ref GMX_SIMD_DOUBLE_WIDTH.
1677 * Float/double conversions are complex since the SIMD width could either
1678 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1679 * need to check for the width in the code, and have different code paths.
1681 * \param d0 Double-precision SIMD variable, first half of values to put in f.
1682 * \param d1 Double-precision SIMD variable, second half of values to put in f.
1683 * \return Single-precision SIMD variable with all values.
1685 static inline SimdFloat gmx_simdcall
1686 cvtDD2F(SimdDouble gmx_unused d0, SimdDouble gmx_unused d1)
1688 #if (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH)
1690 for (std::size_t i = 0; i < d0.simdInternal_.size(); i++)
1692 f.simdInternal_[i] = d0.simdInternal_[i];
1693 f.simdInternal_[f.simdInternal_.size()/2 + i] = d1.simdInternal_[i];
1697 gmx_fatal(FARGS, "simdCvtDD2F() requires GMX_SIMD_FLOAT_WIDTH==2*GMX_SIMD_DOUBLE_WIDTH");
1708 #endif // GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H