2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_SIMD_IMPL_REFERENCE_H
37 #define GMX_SIMD_IMPL_REFERENCE_H
39 /*! \libinternal \file
41 * \brief Reference SIMD implementation, including SIMD documentation.
43 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
45 * \ingroup module_simd
51 #include "gromacs/utility/fatalerror.h"
54 /*! \addtogroup module_simd */
57 /*! \name SIMD implementation capability definitions
62 * Defined when SIMD float support is present.
64 * You should only use this to specifically check for single precision SIMD,
65 * support, even when the rest of Gromacs uses double precision.
66 * \sa GMX_SIMD_HAVE_REAL, GMX_SIMD_HAVE_DOUBLE
68 #define GMX_SIMD_HAVE_FLOAT
70 /*! \brief Defined if SIMD double support is present. */
71 #define GMX_SIMD_HAVE_DOUBLE
73 /*! \brief Defined if SIMD is implemented with real hardware instructions. */
74 #define GMX_SIMD_HAVE_HARDWARE /* For Doxygen */
75 #undef GMX_SIMD_HAVE_HARDWARE /* Reference implementation setting */
77 /*! \brief Defined if the SIMD implementation supports unaligned loads. */
78 #define GMX_SIMD_HAVE_LOADU
80 /*! \brief Defined if the SIMD implementation supports unaligned stores. */
81 #define GMX_SIMD_HAVE_STOREU
83 /*! \brief Defined if SIMD implementation has logical operations on floating-point data. */
84 #define GMX_SIMD_HAVE_LOGICAL
86 /*! \brief Defined if SIMD fused multiply-add uses hardware instructions */
87 #define GMX_SIMD_HAVE_FMA /* For Doxygen */
88 #undef GMX_SIMD_HAVE_FMA /* Reference implementation setting */
90 /*! \brief Defined if the SIMD fraction has a direct hardware instruction. */
91 #define GMX_SIMD_HAVE_FRACTION /* For Doxygen */
92 #undef GMX_SIMD_HAVE_FRACTION /* Reference implementation setting */
94 /*! \brief Defined if the SIMD implementation has \ref gmx_simd_fint32_t. */
95 #define GMX_SIMD_HAVE_FINT32
97 /*! \brief Support for extracting integers from \ref gmx_simd_fint32_t. */
98 #define GMX_SIMD_HAVE_FINT32_EXTRACT
100 /*! \brief Defined if SIMD logical operations are supported for \ref gmx_simd_fint32_t */
101 #define GMX_SIMD_HAVE_FINT32_LOGICAL
103 /*! \brief Defined if SIMD arithmetic operations are supported for \ref gmx_simd_fint32_t */
104 #define GMX_SIMD_HAVE_FINT32_ARITHMETICS
106 /*! \brief Defined if the SIMD implementation has \ref gmx_simd_dint32_t.
108 * \note The Gromacs SIMD module works entirely with 32 bit integers, both
109 * in single and double precision, since some platforms do not support 64 bit
110 * SIMD integers at all. In particular, this means it is up to each
111 * implementation to get this working even if the architectures internal
112 * representation uses 64 bit integers when converting to/from double SIMD
113 * variables. For now we will try HARD to use conversions, packing or shuffling
114 * so the integer datatype has the same width as the floating-point type, i.e.
115 * if you use double precision SIMD with a width of 8, we want the integers
116 * we work with to also use a SIMD width of 8 to make it easy to load/store
117 * indices from arrays. This refers entirely to the function calls
118 * and how many integers we load/store in one call; the actual SIMD registers
119 * might be wider for integers internally (e.g. on x86 gmx_simd_dint32_t will
120 * only fill half the register), but this is none of the user's business.
121 * While this works for all current architectures, and we think it will work
122 * for future ones, we might have to alter this decision in the future. To
123 * avoid rewriting every single instance that refers to the SIMD width we still
124 * provide separate defines for the width of SIMD integer variables that you
127 #define GMX_SIMD_HAVE_DINT32
129 /*! \brief Support for extracting integer from \ref gmx_simd_dint32_t */
130 #define GMX_SIMD_HAVE_DINT32_EXTRACT
132 /*! \brief Defined if logical operations are supported for \ref gmx_simd_dint32_t */
133 #define GMX_SIMD_HAVE_DINT32_LOGICAL
135 /*! \brief Defined if SIMD arithmetic operations are supported for \ref gmx_simd_dint32_t */
136 #define GMX_SIMD_HAVE_DINT32_ARITHMETICS
138 /*! \brief Defined if the implementation provides \ref gmx_simd4_float_t. */
139 #define GMX_SIMD4_HAVE_FLOAT
141 /*! \brief Defined if the implementation provides \ref gmx_simd4_double_t. */
142 #define GMX_SIMD4_HAVE_DOUBLE
144 #ifdef GMX_SIMD_REF_FLOAT_WIDTH
145 # define GMX_SIMD_FLOAT_WIDTH GMX_SIMD_REF_FLOAT_WIDTH
147 /*! \brief Width of the \ref gmx_simd_float_t datatype. */
148 # define GMX_SIMD_FLOAT_WIDTH 4
151 #ifdef GMX_SIMD_REF_DOUBLE_WIDTH
152 # define GMX_SIMD_DOUBLE_WIDTH GMX_SIMD_REF_DOUBLE_WIDTH
154 /*! \brief Width of the \ref gmx_simd_double_t datatype. */
155 # define GMX_SIMD_DOUBLE_WIDTH 4
158 /*! \brief Width of the \ref gmx_simd_fint32_t datatype. */
159 #define GMX_SIMD_FINT32_WIDTH GMX_SIMD_FLOAT_WIDTH
161 /*! \brief Width of the \ref gmx_simd_dint32_t datatype. */
162 #define GMX_SIMD_DINT32_WIDTH GMX_SIMD_DOUBLE_WIDTH
164 /*! \brief Accuracy of SIMD 1/sqrt(x) lookup. Used to determine number of iterations. */
165 #define GMX_SIMD_RSQRT_BITS 23
167 /*! \brief Accuracy of SIMD 1/x lookup. Used to determine number of iterations. */
168 #define GMX_SIMD_RCP_BITS 23
172 * \name SIMD implementation data types
175 /*! \libinternal \brief Float SIMD variable. Supported with GMX_SIMD_HAVE_FLOAT.
179 float r[GMX_SIMD_FLOAT_WIDTH]; /**< Implementation dependent. Don't touch. */
183 /*! \libinternal \brief Floating-point SIMD variable type in double precision.
185 * Supported with GMX_SIMD_HAVE_DOUBLE.
189 double r[GMX_SIMD_DOUBLE_WIDTH]; /**< Implementation dependent. Don't touch. */
193 /*! \libinternal \brief Integer SIMD variable type to use for conversions to/from float.
195 * This is also the widest integer SIMD type.
199 gmx_int32_t i[GMX_SIMD_FINT32_WIDTH]; /**< Implementation dependent. Don't touch. */
203 /*! \libinternal \brief Integer SIMD variable type to use for conversions to/from double.
205 * Available with GMX_SIMD_HAVE_DINT32.
209 gmx_int32_t i[GMX_SIMD_DINT32_WIDTH]; /**< Implementation dependent. Don't touch. */
213 /*! \libinternal \brief Boolean type for float SIMD data.
215 * You should likely use gmx_simd_bool_t
216 * (for gmx_simd_real_t) instead, unless you really know what you are doing.
220 gmx_int32_t b[GMX_SIMD_FLOAT_WIDTH]; /**< Implementation dependent. Don't touch. */
224 /*! \libinternal \brief Boolean type for double precision SIMD data.
226 * Use the generic gmx_simd_bool_t
227 * (for gmx_simd_real_t) instead, unless you really know what you are doing.
231 gmx_int32_t b[GMX_SIMD_DOUBLE_WIDTH]; /**< Implementation dependent. Don't touch. */
235 /*! \libinternal \brief Boolean type for integer datatypes corresponding to float SIMD. */
238 gmx_int32_t b[GMX_SIMD_FINT32_WIDTH]; /**< Implementation dependent. Don't touch. */
242 /*! \libinternal \brief Boolean type for integer datatypes corresponding to double SIMD.
244 * You should likely use gmx_simd_ibool_t (for gmx_simd_int32_t) instead,
245 * unless you really know what you are doing.
249 gmx_int32_t b[GMX_SIMD_DINT32_WIDTH]; /**< Implementation dependent. Don't touch. */
255 * \name SIMD implementation load/store operations for single precision floating point
259 /*! \brief Load \ref GMX_SIMD_FLOAT_WIDTH numbers from aligned memory.
261 * \param m Pointer to memory aligned to the SIMD width.
262 * \return SIMD variable with data loaded.
264 static gmx_inline gmx_simd_float_t
265 gmx_simd_load_f(const float *m)
270 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
277 /*! \brief Set all SIMD variable elements to float pointed to by m (unaligned).
279 * \param m Pointer to single value in memory.
280 * \return SIMD variable with all elements set to *m.
282 static gmx_inline gmx_simd_float_t
283 gmx_simd_load1_f(const float *m)
289 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
296 /*! \brief Set all SIMD float variable elements to the value r.
298 * \param r floating-point constant
299 * \return SIMD variable with all elements set to r.
301 static gmx_inline gmx_simd_float_t
302 gmx_simd_set1_f(float r)
307 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
314 /*! \brief Set all SIMD float variable elements to 0.0f.
316 * \return The value 0.0 in all elements of a SIMD variable.
318 static gmx_inline gmx_simd_float_t
324 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
331 /*! \brief Store the contents of the SIMD float variable pr to aligned memory m.
333 * \param[out] m Pointer to memory, aligned to SIMD width.
334 * \param a SIMD variable to store
336 static gmx_inline void
337 gmx_simd_store_f(float *m, gmx_simd_float_t a)
341 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
347 /*! \brief Load SIMD float from unaligned memory.
349 * Available with \ref GMX_SIMD_HAVE_LOADU.
351 * \param m Pointer to memory, no alignment requirement.
352 * \return SIMD variable with data loaded.
354 #define gmx_simd_loadu_f gmx_simd_load_f
356 /*! \brief Store SIMD float to unaligned memory.
358 * Available with \ref GMX_SIMD_HAVE_STOREU.
360 * \param[out] m Pointer to memory, no alignment requirement.
361 * \param a SIMD variable to store.
363 #define gmx_simd_storeu_f gmx_simd_store_f
367 * \name SIMD implementation load/store operations for double precision floating point
371 /*! \brief Load \ref GMX_SIMD_DOUBLE_WIDTH numbers from aligned memory.
373 * \copydetails gmx_simd_load_f
375 static gmx_inline gmx_simd_double_t
376 gmx_simd_load_d(const double *m)
381 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
388 /*! \brief Set all SIMD variable elements to double pointed to by m (unaligned).
390 * \copydetails gmx_simd_load1_f
392 static gmx_inline gmx_simd_double_t
393 gmx_simd_load1_d(const double *m)
399 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
406 /*! \brief Set all SIMD double variable elements to the value r.
408 * \copydetails gmx_simd_set1_f
410 static gmx_inline gmx_simd_double_t
411 gmx_simd_set1_d(double r)
416 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
423 /*! \brief Set all SIMD double variable elements to 0.0.
425 * \copydetails gmx_simd_setzero_f
427 static gmx_inline gmx_simd_double_t
433 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
440 /*! \brief Store the contents of the SIMD double variable pr to aligned memory m.
442 * \copydetails gmx_simd_store_f
444 static gmx_inline void
445 gmx_simd_store_d(double *m, gmx_simd_double_t a)
449 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
455 /*! \brief Load SIMD double from unaligned memory.
457 * Available with \ref GMX_SIMD_HAVE_LOADU.
459 * \copydetails gmx_simd_loadu_f
461 #define gmx_simd_loadu_d gmx_simd_load_d
463 /*! \brief Store SIMD double to unaligned memory.
465 * Available with \ref GMX_SIMD_HAVE_STOREU.
467 * \copydetails gmx_simd_storeu_f
469 #define gmx_simd_storeu_d gmx_simd_store_d
473 * \name SIMD implementation load/store operations for integers (corresponding to float)
477 /*! \brief Load aligned SIMD integer data, width corresponds to \ref gmx_simd_float_t.
479 * You should typically call the real-precision \ref gmx_simd_load_i.
481 * \param m Pointer to memory, aligned to integer SIMD width.
482 * \return SIMD integer variable.
484 static gmx_inline gmx_simd_fint32_t
485 gmx_simd_load_fi(const gmx_int32_t * m)
489 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
496 /*! \brief Set SIMD from integer, width corresponds to \ref gmx_simd_float_t.
498 * You should typically call the real-precision \ref gmx_simd_set1_i.
500 * \param b integer value to set variable to.
501 * \return SIMD variable with all elements set to b.
503 static gmx_inline gmx_simd_fint32_t
504 gmx_simd_set1_fi(gmx_int32_t b)
508 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
515 /*! \brief Set all SIMD variable elements to 0, width corresponds to \ref gmx_simd_float_t.
517 * You should typically call the real-precision \ref gmx_simd_setzero_i.
519 * \return SIMD integer variable with all bits set to zero.
521 static gmx_inline gmx_simd_fint32_t
522 gmx_simd_setzero_fi()
527 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
534 /*! \brief Store aligned SIMD integer data, width corresponds to \ref gmx_simd_float_t.
536 * You should typically call the real-precision \ref gmx_simd_store_i.
538 * \param m Memory aligned to integer SIMD width.
539 * \param a SIMD variable to store.
541 static gmx_inline gmx_simd_fint32_t
542 gmx_simd_store_fi(int * m, gmx_simd_fint32_t a)
545 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
552 /*! \brief Load unaligned integer SIMD data, width corresponds to \ref gmx_simd_float_t.
554 * You should typically call the real-precision \ref gmx_simd_loadu_i.
556 * Supported with \ref GMX_SIMD_HAVE_LOADU.
558 * \param m Pointer to memory, no alignment requirements.
559 * \return SIMD integer variable.
561 #define gmx_simd_loadu_fi gmx_simd_load_fi
563 /*! \brief Store unaligned SIMD integer data, width corresponds to \ref gmx_simd_float_t.
565 * You should typically call the real-precision \ref gmx_simd_storeu_i.
567 * Supported with \ref GMX_SIMD_HAVE_STOREU.
569 * \param m Memory pointer, no alignment requirements.
570 * \param a SIMD variable to store.
572 #define gmx_simd_storeu_fi gmx_simd_store_fi
574 /*! \brief Extract element with index i from \ref gmx_simd_fint32_t.
576 * You should typically call the real-precision \ref gmx_simd_extract_i.
578 * Available with \ref GMX_SIMD_HAVE_FINT32_EXTRACT.
580 * \param a SIMD variable
581 * \param index Position to extract integer from
582 * \return Single integer from position index in SIMD variable.
584 static gmx_inline gmx_int32_t
585 gmx_simd_extract_fi(gmx_simd_fint32_t a, int index)
592 * \name SIMD implementation load/store operations for integers (corresponding to double)
596 /*! \brief Load aligned SIMD integer data, width corresponds to \ref gmx_simd_double_t.
598 * \copydetails gmx_simd_load_fi
600 static gmx_inline gmx_simd_dint32_t
601 gmx_simd_load_di(const gmx_int32_t * m)
605 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
612 /*! \brief Set SIMD from integer, width corresponds to \ref gmx_simd_double_t.
614 * \copydetails gmx_simd_set1_fi
616 static gmx_inline gmx_simd_dint32_t
617 gmx_simd_set1_di(gmx_int32_t b)
621 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
628 /*! \brief Set all SIMD variable elements to 0, width corresponds to \ref gmx_simd_double_t.
630 * \copydetails gmx_simd_setzero_fi
632 static gmx_inline gmx_simd_dint32_t
633 gmx_simd_setzero_di()
638 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
645 /*! \brief Store aligned SIMD integer data, width corresponds to \ref gmx_simd_double_t.
647 * \copydetails gmx_simd_store_fi
649 static gmx_inline gmx_simd_dint32_t
650 gmx_simd_store_di(gmx_int32_t * m, gmx_simd_dint32_t a)
653 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
660 /*! \brief Load unaligned integer SIMD data, width corresponds to \ref gmx_simd_double_t.
662 * \copydetails gmx_simd_loadu_fi
664 #define gmx_simd_loadu_di gmx_simd_load_di
666 /*! \brief Store unaligned SIMD integer data, width corresponds to \ref gmx_simd_double_t.
668 * \copydetails gmx_simd_storeu_fi
670 #define gmx_simd_storeu_di gmx_simd_store_di
672 /*! \brief Extract element with index i from \ref gmx_simd_dint32_t.
674 * \copydetails gmx_simd_extract_fi
676 static gmx_inline gmx_int32_t
677 gmx_simd_extract_di(gmx_simd_dint32_t a, int index)
684 * \name SIMD implementation single precision floating-point bitwise logical operations
688 /*! \brief Bitwise and for two SIMD float variables. Supported with \ref GMX_SIMD_HAVE_LOGICAL.
690 * You should typically call the real-precision \ref gmx_simd_and_r.
694 * \return data1 & data2
696 static gmx_inline gmx_simd_float_t
697 gmx_simd_and_f(gmx_simd_float_t a, gmx_simd_float_t b)
702 gmx_int32_t val1, val2, res;
712 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
715 val1 = reinterpret_cast<int &>(a.r[i]);
716 val2 = reinterpret_cast<int &>(b.r[i]);
718 c.r[i] = reinterpret_cast<float &>(res);
722 conv1.i = conv1.i & conv2.i;
729 /*! \brief Bitwise andnot for SIMD float. c=(~a) & b. Supported with \ref GMX_SIMD_HAVE_LOGICAL.
731 * You should typically call the real-precision \ref gmx_simd_andnot_r.
735 * \return (~data1) & data2
737 static gmx_inline gmx_simd_float_t
738 gmx_simd_andnot_f(gmx_simd_float_t a, gmx_simd_float_t b)
743 gmx_int32_t val1, val2, res;
753 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
756 val1 = reinterpret_cast<int &>(a.r[i]);
757 val2 = reinterpret_cast<int &>(b.r[i]);
758 res = (~val1) & val2;
759 c.r[i] = reinterpret_cast<float &>(res);
763 conv1.i = (~conv1.i) & conv2.i;
770 /*! \brief Bitwise or for SIMD float. Supported with \ref GMX_SIMD_HAVE_LOGICAL.
772 * You should typically call the real-precision \ref gmx_simd_or_r.
776 * \return data1 | data2
778 static gmx_inline gmx_simd_float_t
779 gmx_simd_or_f(gmx_simd_float_t a, gmx_simd_float_t b)
784 gmx_int32_t val1, val2, res;
794 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
797 val1 = reinterpret_cast<int &>(a.r[i]);
798 val2 = reinterpret_cast<int &>(b.r[i]);
800 c.r[i] = reinterpret_cast<float &>(res);
804 conv1.i = conv1.i | conv2.i;
811 /*! \brief Bitwise xor for SIMD float. Supported with \ref GMX_SIMD_HAVE_LOGICAL.
813 * You should typically call the real-precision \ref gmx_simd_xor_r.
817 * \return data1 ^ data2
819 static gmx_inline gmx_simd_float_t
820 gmx_simd_xor_f(gmx_simd_float_t a, gmx_simd_float_t b)
825 gmx_int32_t val1, val2, res;
835 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
838 val1 = reinterpret_cast<int &>(a.r[i]);
839 val2 = reinterpret_cast<int &>(b.r[i]);
841 c.r[i] = reinterpret_cast<float &>(res);
845 conv1.i = conv1.i ^ conv2.i;
854 * \name SIMD implementation single precision floating-point arithmetics
857 /*! \brief Add two float SIMD variables.
859 * You should typically call the real-precision \ref gmx_simd_add_r.
865 static gmx_inline gmx_simd_float_t
866 gmx_simd_add_f(gmx_simd_float_t a, gmx_simd_float_t b)
871 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
873 c.r[i] = a.r[i] + b.r[i];
878 /*! \brief Subtract two SIMD variables.
880 * You should typically call the real-precision \ref gmx_simd_sub_r.
886 static gmx_inline gmx_simd_float_t
887 gmx_simd_sub_f(gmx_simd_float_t a, gmx_simd_float_t b)
892 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
894 c.r[i] = a.r[i] - b.r[i];
899 /*! \brief Multiply two SIMD variables.
901 * You should typically call the real-precision \ref gmx_simd_mul_r.
907 static gmx_inline gmx_simd_float_t
908 gmx_simd_mul_f(gmx_simd_float_t a, gmx_simd_float_t b)
913 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
915 c.r[i] = a.r[i]*b.r[i];
920 /*! \brief Fused-multiply-add. Result is a*b+c.
922 * You should typically call the real-precision \ref gmx_simd_fmadd_r.
924 * If \ref GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.
931 * For some implementations you save an instruction if you assign the result
934 #define gmx_simd_fmadd_f(a, b, c) gmx_simd_add_f(gmx_simd_mul_f(a, b), c)
937 /*! \brief Fused-multiply-subtract. Result is a*b-c.
939 * You should typically call the real-precision \ref gmx_simd_fmsub_r.
941 * If \ref GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.
948 * For some implementations you save an instruction if you assign the result
951 #define gmx_simd_fmsub_f(a, b, c) gmx_simd_sub_f(gmx_simd_mul_f(a, b), c)
954 /*! \brief Fused-negated-multiply-add. Result is -a*b+c.
956 * You should typically call the real-precision \ref gmx_simd_fnmadd_r.
958 * If \ref GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.
965 * For some implementations you save an instruction if you assign the result
968 #define gmx_simd_fnmadd_f(a, b, c) gmx_simd_sub_f(c, gmx_simd_mul_f(a, b))
971 /*! \brief Fused-negated-multiply-sub. Result is -a*b-c.
973 * You should typically call the real-precision \ref gmx_simd_fnmsub_r.
975 * If \ref GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction.
982 * For some implementations you save an instruction if you assign the result
985 #define gmx_simd_fnmsub_f(a, b, c) gmx_simd_sub_f(gmx_simd_setzero_f(), gmx_simd_fmadd_f(a, b, c))
987 /*! \brief SIMD 1.0/sqrt(x) lookup.
989 * You should typically call the real-precision \ref gmx_simd_rsqrt_r.
991 * This is a low-level instruction that should only be called from routines
992 * implementing the inverse square root in simd_math.h.
994 * \param x Argument, x>0
995 * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
997 static gmx_inline gmx_simd_float_t
998 gmx_simd_rsqrt_f(gmx_simd_float_t x)
1003 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
1005 b.r[i] = (x.r[i] > 0.0f) ? 1.0f/sqrtf(x.r[i]) : 0.0f;
1010 /*! \brief SIMD 1.0/x lookup.
1012 * You should typically call the real-precision \ref gmx_simd_rcp_r.
1014 * This is a low-level instruction that should only be called from routines
1015 * implementing the reciprocal in simd_math.h.
1017 * \param x Argument, x!=0
1018 * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
1020 static gmx_inline gmx_simd_float_t
1021 gmx_simd_rcp_f(gmx_simd_float_t x)
1026 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
1028 b.r[i] = (x.r[i] != 0.0f) ? 1.0f/x.r[i] : 0.0f;
1033 /*! \brief SIMD Floating-point fabs().
1035 * You should typically call the real-precision \ref gmx_simd_fabs_r.
1037 * \param a any floating point values
1038 * \return fabs(a) for each element.
1040 static gmx_inline gmx_simd_float_t
1041 gmx_simd_fabs_f(gmx_simd_float_t a)
1046 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
1048 c.r[i] = fabsf(a.r[i]);
1053 /*! \brief SIMD floating-point negate.
1055 * You should typically call the real-precision \ref gmx_simd_fneg_r.
1057 * \param a Any floating-point value
1060 static gmx_inline gmx_simd_float_t
1061 gmx_simd_fneg_f(gmx_simd_float_t a)
1066 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
1073 /*! \brief Set each SIMD element to the largest from two variables.
1075 * You should typically call the real-precision \ref gmx_simd_max_r.
1077 * \param a Any floating-point value
1078 * \param b Any floating-point value
1079 * \return max(a,b) for each element.
1081 static gmx_inline gmx_simd_float_t
1082 gmx_simd_max_f(gmx_simd_float_t a, gmx_simd_float_t b)
1087 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
1089 c.r[i] = (a.r[i] >= b.r[i] ? a.r[i] : b.r[i]);
1094 /*! \brief Set each SIMD element to the smallest from two variables.
1096 * You should typically call the real-precision \ref gmx_simd_min_r.
1098 * \param a Any floating-point value
1099 * \param b Any floating-point value
1100 * \return min(a,b) for each element.
1102 static gmx_inline gmx_simd_float_t
1103 gmx_simd_min_f(gmx_simd_float_t a, gmx_simd_float_t b)
1108 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
1110 c.r[i] = (a.r[i] <= b.r[i] ? a.r[i] : b.r[i]);
1115 /*! \brief Round to nearest integer value (in floating-point format).
1117 * You should typically call the real-precision \ref gmx_simd_round_r.
1119 * \param a Any floating-point value
1120 * \return The nearest integer, represented in floating-point format.
1122 * \note The reference implementation rounds exact half-way cases
1123 * away from zero, whereas most SIMD intrinsics will round to nearest even.
1124 * This could be fixed by using rint/rintf, but the bigger problem is that
1125 * MSVC does not support full C99, and none of the round or rint
1126 * functions are defined. It's much easier to approximately implement
1127 * round() than rint(), so we do that and hope we never get bitten in
1128 * testing. (Thanks, Microsoft.)
1130 static gmx_inline gmx_simd_float_t
1131 gmx_simd_round_f(gmx_simd_float_t a)
1136 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
1139 int temp = (a.r[i] >= 0.0f) ? (a.r[i] + 0.5f) : (a.r[i] - 0.5f);
1142 b.r[i] = roundf(a.r[i]);
1148 /*! \brief Truncate SIMD, i.e. round towards zero - common hardware instruction.
1150 * You should typically call the real-precision \ref gmx_simd_trunc_r.
1152 * \param a Any floating-point value
1153 * \return Integer rounded towards zero, represented in floating-point format.
1155 * \note This is truncation towards zero, not floor(). The reason for this
1156 * is that truncation is virtually always present as a dedicated hardware
1157 * instruction, but floor() frequently isn't.
1159 static gmx_inline gmx_simd_float_t
1160 gmx_simd_trunc_f(gmx_simd_float_t a)
1165 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
1167 b.r[i] = truncf(a.r[i]);
1173 /*! \brief Fraction of the SIMD floating point number.
1175 * You should typically call the real-precision \ref gmx_simd_fraction_r.
1177 * \param a Any floating-point value
1178 * \return a-trunc(r)
1180 * To maximize compatibility, we use the same definition of fractions as used
1181 * e.g. for the AMD64 hardware instructions. This relies on truncation towards
1182 * zero for the integer part, and the remaining fraction can thus be either
1183 * positive or negative. As an example, -1.42 would return the fraction -0.42.
1185 * Hardware support with \ref GMX_SIMD_HAVE_FRACTION, otherwise emulated.
1187 static gmx_inline gmx_simd_float_t
1188 gmx_simd_fraction_f(gmx_simd_float_t a)
1190 return gmx_simd_sub_f(a, gmx_simd_trunc_f(a));
1193 /*! \brief Extract (integer) exponent from single precision SIMD.
1195 * You should typically call the real-precision \ref gmx_simd_get_exponent_r.
1197 * \param a Any floating-point value
1198 * \return Exponent value, represented in floating-point format.
1200 * The IEEE754 exponent field is selected, the bias removed, and it is converted to
1201 * a normal floating-point SIMD.
1203 static gmx_inline gmx_simd_float_t
1204 gmx_simd_get_exponent_f(gmx_simd_float_t a)
1206 /* Mask with ones for the exponent field of single precision fp */
1207 const gmx_int32_t expmask = 0x7f800000;
1217 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
1220 /* Keep exponent, shift 23 right (float mantissa), remove bias (127) */
1221 b.r[i] = ((conv.i & expmask) >> 23) - 127;
1226 /*! \brief Get SIMD mantissa.
1228 * You should typically call the real-precision \ref gmx_simd_get_mantissa_r.
1230 * \param a Any floating-point value
1231 * \return Mantissa, represented in floating-point format.
1233 * The mantissa field is selected, and a new neutral exponent created.
1235 static gmx_inline gmx_simd_float_t
1236 gmx_simd_get_mantissa_f(gmx_simd_float_t a)
1238 const gmx_int32_t mantmask = 0x007fffff;
1239 const gmx_int32_t one = 0x3f800000;
1249 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
1252 /* remove current exponent, add a biased exponent for 1.0 (i.e., 2^0=1) */
1253 conv.i = (conv.i & (mantmask)) | one;
1259 /*! \brief Set (integer) exponent from single precision floating-point SIMD.
1261 * You should typically call the real-precision \ref gmx_simd_set_exponent_r.
1263 * \param a A floating point value that will not overflow as 2^a.
1264 * \return 2^(round(a)).
1266 * The input is \a rounded to the nearest integer, the exponent bias is added
1267 * to this integer, and the bits are shifted to the IEEE754 exponent part of the number.
1269 * \note The argument will be \a rounded to nearest integer since that is what
1270 * we need for the exponential functions, and this integer x will be set as the
1271 * exponent so the new floating-point number will be 2^x.
1273 static gmx_inline gmx_simd_float_t
1274 gmx_simd_set_exponent_f(gmx_simd_float_t a)
1286 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
1288 /* Critical to use same algorithm as for gmx_simd_round_f() */
1290 iexp = (a.r[i] >= 0.0f) ? (a.r[i] + 0.5f) : (a.r[i] - 0.5f);
1292 iexp = roundf(a.r[i]);
1294 /* Add bias (127), and shift 23 bits left (mantissa size) */
1295 conv.i = (iexp + 127) << 23;
1303 * \name SIMD implementation single precision floating-point comparisons, boolean, selection.
1306 /*! \brief SIMD a==b for single SIMD.
1308 * You should typically call the real-precision \ref gmx_simd_cmpeq_r.
1312 * \return Each element of the boolean will be set to true if a==b.
1314 * Beware that exact floating-point comparisons are difficult.
1316 static gmx_inline gmx_simd_fbool_t
1317 gmx_simd_cmpeq_f(gmx_simd_float_t a, gmx_simd_float_t b)
1322 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
1324 c.b[i] = (a.r[i] == b.r[i]);
1329 /*! \brief SIMD a<b for single SIMD.
1331 * You should typically call the real-precision \ref gmx_simd_cmplt_r.
1335 * \return Each element of the boolean will be set to true if a<b.
1337 static gmx_inline gmx_simd_fbool_t
1338 gmx_simd_cmplt_f(gmx_simd_float_t a, gmx_simd_float_t b)
1343 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
1345 c.b[i] = (a.r[i] < b.r[i]);
1350 /*! \brief SIMD a<=b for single SIMD.
1352 * You should typically call the real-precision \ref gmx_simd_cmple_r.
1356 * \return Each element of the boolean will be set to true if a<=b.
1358 static gmx_inline gmx_simd_fbool_t
1359 gmx_simd_cmple_f(gmx_simd_float_t a, gmx_simd_float_t b)
1364 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
1366 c.b[i] = (a.r[i] <= b.r[i]);
1371 /*! \brief Logical \a and on single precision SIMD booleans.
1373 * You should typically call the real-precision \ref gmx_simd_and_r.
1375 * \param a logical vars 1
1376 * \param b logical vars 2
1377 * \return For each element, the result boolean is true if a \& b are true.
1379 * \note This is not necessarily a bitwise operation - the storage format
1380 * of booleans is implementation-dependent.
1382 * \sa gmx_simd_and_ib
1384 static gmx_inline gmx_simd_fbool_t
1385 gmx_simd_and_fb(gmx_simd_fbool_t a, gmx_simd_fbool_t b)
1390 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
1392 c.b[i] = (a.b[i] && b.b[i]);
1397 /*! \brief Logical \a or on single precision SIMD booleans.
1399 * You should typically call the real-precision \ref gmx_simd_or_r.
1401 * \param a logical vars 1
1402 * \param b logical vars 2
1403 * \return For each element, the result boolean is true if a or b is true.
1405 * Note that this is not necessarily a bitwise operation - the storage format
1406 * of booleans is implementation-dependent.
1408 * \sa gmx_simd_or_ib
1410 static gmx_inline gmx_simd_fbool_t
1411 gmx_simd_or_fb(gmx_simd_fbool_t a, gmx_simd_fbool_t b)
1416 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
1418 c.b[i] = (a.b[i] || b.b[i]);
1423 /*! \brief Returns non-zero if any of the boolean in x is True, otherwise 0.
1425 * You should typically call the real-precision \ref gmx_simd_anytrue_b.
1427 * \param a Logical variable.
1428 * \return non-zero if any element in a is true, otherwise 0.
1430 * The actual return value for truth will depend on the architecture,
1431 * so any non-zero value is considered truth.
1433 static gmx_inline int
1434 gmx_simd_anytrue_fb(gmx_simd_fbool_t a)
1440 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
1442 anytrue = anytrue || a.b[i];
1447 /*! \brief Select from single precision SIMD variable where boolean is true.
1449 * You should typically call the real-precision \ref gmx_simd_blendzero_r.
1451 * \param a Floating-point variable to select from
1452 * \param sel Boolean selector
1453 * \return For each element, a is selected for true, 0 for false.
1455 static gmx_inline gmx_simd_float_t
1456 gmx_simd_blendzero_f(gmx_simd_float_t a, gmx_simd_fbool_t sel)
1461 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
1463 c.r[i] = sel.b[i] ? a.r[i] : 0.0;
1468 /*! \brief Select from single precision SIMD variable where boolean is false.
1470 * You should typically call the real-precision \ref gmx_simd_blendnotzero_r.
1472 * \param a Floating-point variable to select from
1473 * \param sel Boolean selector
1474 * \return For each element, a is selected for false, 0 for true (sic).
1476 static gmx_inline gmx_simd_float_t
1477 gmx_simd_blendnotzero_f(gmx_simd_float_t a, gmx_simd_fbool_t sel)
1482 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
1484 c.r[i] = sel.b[i] ? 0.0 : a.r[i];
1489 /*! \brief Vector-blend SIMD selection.
1491 * You should typically call the real-precision \ref gmx_simd_blendv_r.
1493 * \param a First source
1494 * \param b Second source
1495 * \param sel Boolean selector
1496 * \return For each element, select b if sel is true, a otherwise.
1498 static gmx_inline gmx_simd_float_t
1499 gmx_simd_blendv_f(gmx_simd_float_t a, gmx_simd_float_t b, gmx_simd_fbool_t sel)
1504 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
1506 d.r[i] = sel.b[i] ? b.r[i] : a.r[i];
1511 /*! \brief Return sum of all elements in SIMD float variable.
1513 * You should typically call the real-precision \ref gmx_simd_reduce_r.
1515 * \param a SIMD variable to reduce/sum.
1516 * \return The sum of all elements in the argument variable.
1519 static gmx_inline float
1520 gmx_simd_reduce_f(gmx_simd_float_t a)
1525 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
1534 * \name SIMD implementation double precision floating-point bitwise logical operations
1537 /*! \brief Bitwise and for two SIMD double variables. Supported with \ref GMX_SIMD_HAVE_LOGICAL.
1539 * \copydetails gmx_simd_and_f
1541 static gmx_inline gmx_simd_double_t
1542 gmx_simd_and_d(gmx_simd_double_t a, gmx_simd_double_t b)
1544 gmx_simd_double_t c;
1547 gmx_int64_t val1, val2, res;
1557 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
1560 val1 = reinterpret_cast<gmx_int64_t &>(a.r[i]);
1561 val2 = reinterpret_cast<gmx_int64_t &>(b.r[i]);
1563 c.r[i] = reinterpret_cast<double &>(res);
1567 conv1.i = conv1.i & conv2.i;
1574 /*! \brief Bitwise andnot for SIMD double. c=(~a) & b. Supported with \ref GMX_SIMD_HAVE_LOGICAL.
1576 * \copydetails gmx_simd_andnot_f
1578 static gmx_inline gmx_simd_double_t
1579 gmx_simd_andnot_d(gmx_simd_double_t a, gmx_simd_double_t b)
1581 gmx_simd_double_t c;
1584 gmx_int64_t val1, val2, res;
1594 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
1597 val1 = reinterpret_cast<gmx_int64_t &>(a.r[i]);
1598 val2 = reinterpret_cast<gmx_int64_t &>(b.r[i]);
1599 res = (~val1) & val2;
1600 c.r[i] = reinterpret_cast<double &>(res);
1604 conv1.i = conv1.i & conv2.i;
1611 /*! \brief Bitwise or for SIMD double. Supported with \ref GMX_SIMD_HAVE_LOGICAL.
1613 * \copydetails gmx_simd_or_f
1615 static gmx_inline gmx_simd_double_t
1616 gmx_simd_or_d(gmx_simd_double_t a, gmx_simd_double_t b)
1618 gmx_simd_double_t c;
1621 gmx_int64_t val1, val2, res;
1631 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
1634 val1 = reinterpret_cast<gmx_int64_t &>(a.r[i]);
1635 val2 = reinterpret_cast<gmx_int64_t &>(b.r[i]);
1637 c.r[i] = reinterpret_cast<double &>(res);
1641 conv1.i = conv1.i & conv2.i;
1648 /*! \brief Bitwise xor for SIMD double. Supported with \ref GMX_SIMD_HAVE_LOGICAL.
1650 * \copydetails gmx_simd_xor_f
1652 static gmx_inline gmx_simd_double_t
1653 gmx_simd_xor_d(gmx_simd_double_t a, gmx_simd_double_t b)
1655 gmx_simd_double_t c;
1658 gmx_int64_t val1, val2, res;
1668 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
1671 val1 = reinterpret_cast<gmx_int64_t &>(a.r[i]);
1672 val2 = reinterpret_cast<gmx_int64_t &>(b.r[i]);
1674 c.r[i] = reinterpret_cast<double &>(res);
1678 conv1.i = conv1.i & conv2.i;
1687 * \name SIMD implementation double precision floating-point arithmetics
1690 /*! \brief Add two double SIMD variables.
1692 * \copydetails gmx_simd_add_f
1694 static gmx_inline gmx_simd_double_t
1695 gmx_simd_add_d(gmx_simd_double_t a, gmx_simd_double_t b)
1697 gmx_simd_double_t c;
1700 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
1702 c.r[i] = a.r[i] + b.r[i];
1707 /*! \brief Add two float SIMD variables.
1709 * \copydetails gmx_simd_sub_f
1711 static gmx_inline gmx_simd_double_t
1712 gmx_simd_sub_d(gmx_simd_double_t a, gmx_simd_double_t b)
1714 gmx_simd_double_t c;
1717 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
1719 c.r[i] = a.r[i] - b.r[i];
1724 /*! \brief Multiply two SIMD variables.
1726 * \copydetails gmx_simd_mul_f
1728 static gmx_inline gmx_simd_double_t
1729 gmx_simd_mul_d(gmx_simd_double_t a, gmx_simd_double_t b)
1731 gmx_simd_double_t c;
1734 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
1736 c.r[i] = a.r[i]*b.r[i];
1741 /*! \brief Fused-multiply-add. Result is a*b+c.
1743 * \copydetails gmx_simd_fmadd_f
1745 #define gmx_simd_fmadd_d(a, b, c) gmx_simd_add_d(gmx_simd_mul_d(a, b), c)
1747 /*! \brief Fused-multiply-subtract. Result is a*b-c.
1749 * \copydetails gmx_simd_fmsub_f
1751 #define gmx_simd_fmsub_d(a, b, c) gmx_simd_sub_d(gmx_simd_mul_d(a, b), c)
1753 /*! \brief Fused-negated-multiply-add. Result is -a*b+c.
1755 * \copydetails gmx_simd_fnmadd_f
1757 #define gmx_simd_fnmadd_d(a, b, c) gmx_simd_sub_d(c, gmx_simd_mul_d(a, b))
1759 /*! \brief Fused-negated-multiply-add. Result is -a*b-c.
1761 * \copydetails gmx_simd_fnmsub_f
1763 #define gmx_simd_fnmsub_d(a, b, c) gmx_simd_sub_d(gmx_simd_setzero_d(), gmx_simd_fmadd_d(a, b, c))
1765 /*! \brief SIMD 1.0/sqrt(x) lookup.
1767 * \copydetails gmx_simd_rsqrt_f
1769 static gmx_inline gmx_simd_double_t
1770 gmx_simd_rsqrt_d(gmx_simd_double_t x)
1772 gmx_simd_double_t b;
1775 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
1777 /* Sic - we only need single precision for the reference lookup, since
1778 * we have defined GMX_SIMD_RSQRT_BITS to 23.
1780 b.r[i] = (x.r[i] > 0.0) ? 1.0f/sqrtf(x.r[i]) : 0.0;
1785 /*! \brief 1.0/x lookup.
1787 * \copydetails gmx_simd_rcp_f
1789 static gmx_inline gmx_simd_double_t
1790 gmx_simd_rcp_d(gmx_simd_double_t x)
1792 gmx_simd_double_t b;
1795 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
1797 /* Sic - we only need single precision for the reference lookup, since
1798 * we have defined GMX_SIMD_RCP_BITS to 23.
1800 b.r[i] = (x.r[i] != 0.0) ? 1.0f/x.r[i] : 0.0;
1805 /*! \brief SIMD Floating-point fabs().
1807 * \copydetails gmx_simd_fabs_f
1809 static gmx_inline gmx_simd_double_t
1810 gmx_simd_fabs_d(gmx_simd_double_t a)
1812 gmx_simd_double_t c;
1815 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
1817 c.r[i] = fabs(a.r[i]);
1822 /*! \brief SIMD floating-point negate.
1824 * \copydetails gmx_simd_fneg_f
1826 static gmx_inline gmx_simd_double_t
1827 gmx_simd_fneg_d(gmx_simd_double_t a)
1829 gmx_simd_double_t c;
1832 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
1839 /*! \brief Set each SIMD element to the largest from two variables.
1841 * \copydetails gmx_simd_max_f
1843 static gmx_inline gmx_simd_double_t
1844 gmx_simd_max_d(gmx_simd_double_t a, gmx_simd_double_t b)
1846 gmx_simd_double_t c;
1849 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
1851 c.r[i] = (a.r[i] >= b.r[i] ? a.r[i] : b.r[i]);
1856 /*! \brief Set each SIMD element to the smallest from two variables.
1858 * \copydetails gmx_simd_min_f
1860 static gmx_inline gmx_simd_double_t
1861 gmx_simd_min_d(gmx_simd_double_t a, gmx_simd_double_t b)
1863 gmx_simd_double_t c;
1866 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
1868 c.r[i] = (a.r[i] <= b.r[i] ? a.r[i] : b.r[i]);
1873 /*! \brief Round to nearest integer value (in double floating-point format).
1875 * \copydetails gmx_simd_round_f
1877 static gmx_inline gmx_simd_double_t
1878 gmx_simd_round_d(gmx_simd_double_t a)
1880 gmx_simd_double_t b;
1883 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
1886 int temp = (a.r[i] >= 0.0) ? (a.r[i] + 0.5) : (a.r[i] - 0.5);
1889 b.r[i] = round(a.r[i]);
1895 /*! \brief Truncate SIMD, i.e. round towards zero - common hardware instruction.
1897 * \copydetails gmx_simd_trunc_f
1899 static gmx_inline gmx_simd_double_t
1900 gmx_simd_trunc_d(gmx_simd_double_t a)
1902 gmx_simd_double_t b;
1905 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
1907 b.r[i] = trunc(a.r[i]);
1912 /*! \brief Fraction of the SIMD floating point number.
1914 * \copydetails gmx_simd_fraction_f
1916 static gmx_inline gmx_simd_double_t
1917 gmx_simd_fraction_d(gmx_simd_double_t a)
1919 return gmx_simd_sub_d(a, gmx_simd_trunc_d(a));
1923 /*! \brief Extract (integer) exponent from double precision SIMD.
1925 * \copydetails gmx_simd_get_exponent_f
1927 static gmx_inline gmx_simd_double_t
1928 gmx_simd_get_exponent_d(gmx_simd_double_t a)
1930 /* Mask with ones for the exponent field of double precision fp */
1931 const gmx_int64_t expmask = 0x7ff0000000000000LL;
1932 gmx_simd_double_t b;
1941 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
1944 /* Zero everything but exponent field (remove sign),
1945 * shift 23 bits right (mantissa size), and remove exponent bias (1023).
1947 b.r[i] = ((conv.i & expmask) >> 52) - 1023;
1952 /*! \brief Get SIMD doublemantissa.
1954 * \copydetails gmx_simd_get_mantissa_f
1956 static gmx_inline gmx_simd_double_t
1957 gmx_simd_get_mantissa_d(gmx_simd_double_t a)
1959 const gmx_int64_t mantmask = 0x000fffffffffffffLL;
1960 const gmx_int64_t one = 0x3ff0000000000000LL;
1961 gmx_simd_double_t b;
1970 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
1973 conv.i = (conv.i & (mantmask)) | one;
1979 /*! \brief Set (integer) exponent from single precision floating-point SIMD.
1981 * \copydetails gmx_simd_set_exponent_f
1983 static gmx_inline gmx_simd_double_t
1984 gmx_simd_set_exponent_d(gmx_simd_double_t a)
1986 gmx_simd_double_t b;
1996 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
1998 /* Critical to use same algorithm as for gmx_simd_round_d() */
2000 iexp = (a.r[i] >= 0.0) ? (a.r[i] + 0.5) : (a.r[i] - 0.5);
2002 iexp = round(a.r[i]);
2004 /* Add bias (1023), and shift 52 bits left (mantissa size) */
2005 conv.i = (iexp + 1023) << 52;
2013 * \name SIMD implementation double precision floating-point comparison, boolean, selection.
2016 /*! \brief SIMD a==b for double SIMD.
2018 * \copydetails gmx_simd_cmpeq_f
2020 static gmx_inline gmx_simd_dbool_t
2021 gmx_simd_cmpeq_d(gmx_simd_double_t a, gmx_simd_double_t b)
2026 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
2028 c.b[i] = (a.r[i] == b.r[i]);
2033 /*! \brief SIMD a<b for double SIMD.
2035 * \copydetails gmx_simd_cmplt_f
2037 static gmx_inline gmx_simd_dbool_t
2038 gmx_simd_cmplt_d(gmx_simd_double_t a, gmx_simd_double_t b)
2043 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
2045 c.b[i] = (a.r[i] < b.r[i]);
2050 /*! \brief SIMD a<=b for double SIMD.
2052 * \copydetails gmx_simd_cmple_f
2054 static gmx_inline gmx_simd_dbool_t
2055 gmx_simd_cmple_d(gmx_simd_double_t a, gmx_simd_double_t b)
2060 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
2062 c.b[i] = (a.r[i] <= b.r[i]);
2068 /*! \brief Logical \a and on double precision SIMD booleans.
2070 * \copydetails gmx_simd_and_fb
2072 static gmx_inline gmx_simd_dbool_t
2073 gmx_simd_and_db(gmx_simd_dbool_t a, gmx_simd_dbool_t b)
2078 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
2080 c.b[i] = (a.b[i] && b.b[i]);
2085 /*! \brief Logical \a or on double precision SIMD booleans.
2087 * \copydetails gmx_simd_or_fb
2089 static gmx_inline gmx_simd_dbool_t
2090 gmx_simd_or_db(gmx_simd_dbool_t a, gmx_simd_dbool_t b)
2095 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
2097 c.b[i] = (a.b[i] || b.b[i]);
2103 /*! \brief Returns non-zero if any of the boolean in x is True, otherwise 0.
2105 * \copydetails gmx_simd_anytrue_fb
2107 static gmx_inline int
2108 gmx_simd_anytrue_db(gmx_simd_dbool_t a)
2114 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
2116 anytrue = anytrue || a.b[i];
2122 /*! \brief Select from double SIMD variable where boolean is true.
2124 * \copydetails gmx_simd_blendzero_f
2126 static gmx_inline gmx_simd_double_t
2127 gmx_simd_blendzero_d(gmx_simd_double_t a, gmx_simd_dbool_t sel)
2129 gmx_simd_double_t c;
2132 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
2134 c.r[i] = sel.b[i] ? a.r[i] : 0.0;
2139 /*! \brief Select from double SIMD variable where boolean is false.
2141 * \copydetails gmx_simd_blendnotzero_f
2143 static gmx_inline gmx_simd_double_t
2144 gmx_simd_blendnotzero_d(gmx_simd_double_t a, gmx_simd_dbool_t sel)
2146 gmx_simd_double_t c;
2149 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
2151 c.r[i] = sel.b[i] ? 0.0 : a.r[i];
2156 /*! \brief Vector-blend double SIMD selection.
2158 * \copydetails gmx_simd_blendv_f
2160 static gmx_inline gmx_simd_double_t
2161 gmx_simd_blendv_d(gmx_simd_double_t a, gmx_simd_double_t b, gmx_simd_dbool_t sel)
2163 gmx_simd_double_t d;
2166 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
2168 d.r[i] = sel.b[i] ? b.r[i] : a.r[i];
2173 /*! \brief Return sum of all elements in SIMD double variable.
2175 * \copydetails gmx_simd_reduce_f
2178 static gmx_inline double
2179 gmx_simd_reduce_d(gmx_simd_double_t a)
2184 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
2193 * \name SIMD implementation integer (corresponding to float) bitwise logical operations
2197 /*! \brief SIMD integer shift left logical, based on immediate value.
2199 * You should typically call the real-precision \ref gmx_simd_slli_i.
2201 * Logical shift. Each element is shifted (independently) up to 32 positions
2202 * left, while zeros are shifted in from the right. Only available if
2203 * \ref GMX_SIMD_HAVE_FINT32_LOGICAL (single) or \ref GMX_SIMD_HAVE_DINT32_LOGICAL
2204 * (double) is defined.
2206 * \param a integer data to shift
2207 * \param n number of positions to shift left. n<=32.
2208 * \return shifted values
2210 static gmx_inline gmx_simd_fint32_t
2211 gmx_simd_slli_fi(gmx_simd_fint32_t a, int n)
2213 gmx_simd_fint32_t c;
2216 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
2218 c.i[i] = a.i[i] << n;
2223 /*! \brief SIMD integer shift right logical, based on immediate value.
2225 * You should typically call the real-precision \ref gmx_simd_srli_i.
2227 * Logical shift. Each element is shifted (independently) up to 32 positions
2228 * right, while zeros are shifted in from the left. Only available if
2229 * \ref GMX_SIMD_HAVE_FINT32_LOGICAL (single) or \ref GMX_SIMD_HAVE_DINT32_LOGICAL
2230 * (double) is defined.
2232 * \param a integer data to shift
2233 * \param n number of positions to shift right. n<=32.
2234 * \return shifted values
2236 static gmx_inline gmx_simd_fint32_t
2237 gmx_simd_srli_fi(gmx_simd_fint32_t a, int n)
2239 gmx_simd_fint32_t c;
2242 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
2244 c.i[i] = a.i[i] >> n;
2249 /*! \brief Integer SIMD bitwise and.
2251 * You should typically call the real-precision \ref gmx_simd_and_i.
2253 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL (single)
2254 * or \ref GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined.
2256 * \note You can \a not use this operation directly to select based on a boolean
2257 * SIMD variable, since booleans are separate from integer SIMD. If that
2258 * is what you need, have a look at \ref gmx_simd_blendzero_i instead.
2260 * \param a first integer SIMD
2261 * \param b second integer SIMD
2262 * \return a \& b (bitwise and)
2264 static gmx_inline gmx_simd_fint32_t
2265 gmx_simd_and_fi(gmx_simd_fint32_t a, gmx_simd_fint32_t b)
2267 gmx_simd_fint32_t c;
2270 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
2272 c.i[i] = a.i[i] & b.i[i];
2277 /*! \brief Integer SIMD bitwise not-and.
2279 * You should typically call the real-precision \ref gmx_simd_andnot_i.
2281 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL (single)
2282 * or \ref GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined.
2284 * Note that you can NOT use this operation directly to select based on a boolean
2285 * SIMD variable, since booleans are separate from integer SIMD. If that
2286 * is what you need, have a look at \ref gmx_simd_blendnotzero_i instead.
2288 * \param a first integer SIMD
2289 * \param b second integer SIMD
2290 * \return (~a) \& b (bitwise andnot)
2292 static gmx_inline gmx_simd_fint32_t
2293 gmx_simd_andnot_fi(gmx_simd_fint32_t a, gmx_simd_fint32_t b)
2295 gmx_simd_fint32_t c;
2298 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
2300 c.i[i] = (~a.i[i]) & b.i[i];
2305 /*! \brief Integer SIMD bitwise or.
2307 * You should typically call the real-precision \ref gmx_simd_or_i.
2309 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL (single)
2310 * or \ref GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined.
2312 * \param a first integer SIMD
2313 * \param b second integer SIMD
2314 * \return a \| b (bitwise or)
2316 static gmx_inline gmx_simd_fint32_t
2317 gmx_simd_or_fi(gmx_simd_fint32_t a, gmx_simd_fint32_t b)
2319 gmx_simd_fint32_t c;
2322 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
2324 c.i[i] = a.i[i] | b.i[i];
2329 /*! \brief Integer SIMD bitwise xor.
2331 * You should typically call the real-precision \ref gmx_simd_xor_i.
2333 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL (single)
2334 * or \ref GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined.
2336 * \param a first integer SIMD
2337 * \param b second integer SIMD
2338 * \return a ^ b (bitwise xor)
2340 static gmx_inline gmx_simd_fint32_t
2341 gmx_simd_xor_fi(gmx_simd_fint32_t a, gmx_simd_fint32_t b)
2343 gmx_simd_fint32_t c;
2346 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
2348 c.i[i] = a.i[i] ^ b.i[i];
2355 * \name SIMD implementation integer (corresponding to float) arithmetics
2358 /*! \brief Add SIMD integers.
2360 * You should typically call the real-precision \ref gmx_simd_xor_i.
2362 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
2363 * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.
2369 static gmx_inline gmx_simd_fint32_t
2370 gmx_simd_add_fi(gmx_simd_fint32_t a, gmx_simd_fint32_t b)
2372 gmx_simd_fint32_t c;
2375 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
2377 c.i[i] = a.i[i] + b.i[i];
2382 /*! \brief Subtract SIMD integers.
2384 * You should typically call the real-precision \ref gmx_simd_xor_i.
2386 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
2387 * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.
2393 static gmx_inline gmx_simd_fint32_t
2394 gmx_simd_sub_fi(gmx_simd_fint32_t a, gmx_simd_fint32_t b)
2396 gmx_simd_fint32_t c;
2399 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
2401 c.i[i] = a.i[i] - b.i[i];
2406 /*! \brief Multiply SIMD integers.
2408 * You should typically call the real-precision \ref gmx_simd_xor_i.
2410 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
2411 * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.
2417 * \note Only the low 32 bits are retained, so this can overflow.
2419 static gmx_inline gmx_simd_fint32_t
2420 gmx_simd_mul_fi(gmx_simd_fint32_t a, gmx_simd_fint32_t b)
2422 gmx_simd_fint32_t c;
2425 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
2427 c.i[i] = a.i[i]*b.i[i];
2434 * \name SIMD implementation integer (corresponding to float) comparisons, boolean, selection
2438 /*! \brief Equality comparison of two integers corresponding to float values.
2440 * You should typically call the real-precision \ref gmx_simd_cmpeq_i.
2442 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
2443 * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.
2445 * \param a SIMD integer1
2446 * \param b SIMD integer2
2447 * \return SIMD integer boolean with true for elements where a==b
2449 static gmx_inline gmx_simd_fibool_t
2450 gmx_simd_cmpeq_fi(gmx_simd_fint32_t a, gmx_simd_fint32_t b)
2452 gmx_simd_fibool_t c;
2455 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
2457 c.b[i] = (a.i[i] == b.i[i]);
2462 /*! \brief Less-than comparison of two SIMD integers corresponding to float values.
2464 * You should typically call the real-precision \ref gmx_simd_cmplt_i.
2466 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
2467 * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.
2469 * \param a SIMD integer1
2470 * \param b SIMD integer2
2471 * \return SIMD integer boolean with true for elements where a<b
2473 static gmx_inline gmx_simd_fibool_t
2474 gmx_simd_cmplt_fi(gmx_simd_fint32_t a, gmx_simd_fint32_t b)
2476 gmx_simd_fibool_t c;
2479 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
2481 c.b[i] = (a.i[i] < b.i[i]);
2486 /*! \brief Logical AND on gmx_simd_fibool_t.
2488 * You should typically call the real-precision \ref gmx_simd_and_ib.
2490 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
2491 * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.
2493 * \param a SIMD boolean 1
2494 * \param b SIMD boolean 2
2495 * \return True for elements where both a and b are true.
2497 static gmx_inline gmx_simd_fibool_t
2498 gmx_simd_and_fib(gmx_simd_fibool_t a, gmx_simd_fibool_t b)
2500 gmx_simd_fibool_t c;
2503 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
2505 c.b[i] = (a.b[i] && b.b[i]);
2510 /*! \brief Logical OR on gmx_simd_fibool_t.
2512 * You should typically call the real-precision \ref gmx_simd_or_ib.
2514 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
2515 * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.
2517 * \param a SIMD boolean 1
2518 * \param b SIMD boolean 2
2519 * \return True for elements where both a and b are true.
2521 static gmx_inline gmx_simd_fibool_t
2522 gmx_simd_or_fib(gmx_simd_fibool_t a, gmx_simd_fibool_t b)
2524 gmx_simd_fibool_t c;
2527 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
2529 c.b[i] = (a.b[i] || b.b[i]);
2534 /*! \brief Returns non-zero if any of the boolean in x is True, otherwise 0.
2536 * You should typically call the real-precision \ref gmx_simd_anytrue_ib.
2538 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
2539 * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.
2541 * The actual return value for "any true" will depend on the architecture.
2542 * Any non-zero value should be considered truth.
2544 * \param a SIMD boolean
2545 * \return Nonzero integer if any of the elements in a is true, otherwise 0.
2547 static gmx_inline int
2548 gmx_simd_anytrue_fib(gmx_simd_fibool_t a)
2554 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
2556 anytrue = anytrue || a.b[i];
2561 /*! \brief Select from \ref gmx_simd_fint32_t variable where boolean is true.
2563 * You should typically call the real-precision \ref gmx_simd_blendzero_i.
2565 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
2566 * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.
2568 * \param a SIMD integer to select from
2569 * \param sel Boolean selector
2570 * \return Elements from a where sel is true, 0 otherwise.
2572 static gmx_inline gmx_simd_fint32_t
2573 gmx_simd_blendzero_fi(gmx_simd_fint32_t a, gmx_simd_fibool_t sel)
2575 gmx_simd_fint32_t c;
2578 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
2580 c.i[i] = sel.b[i] ? a.i[i] : 0.0;
2585 /*! \brief Select from \ref gmx_simd_fint32_t variable where boolean is false.
2587 * You should typically call the real-precision \ref gmx_simd_blendnotzero_i.
2589 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
2590 * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.
2592 * \param a SIMD integer to select from
2593 * \param sel Boolean selector
2594 * \return Elements from a where sel is false, 0 otherwise (sic).
2596 static gmx_inline gmx_simd_fint32_t
2597 gmx_simd_blendnotzero_fi(gmx_simd_fint32_t a, gmx_simd_fibool_t sel)
2599 gmx_simd_fint32_t c;
2602 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
2604 c.i[i] = sel.b[i] ? 0.0 : a.i[i];
2609 /*! \brief Vector-blend SIMD selection.
2611 * You should typically call the real-precision \ref gmx_simd_blendv_i.
2613 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
2614 * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined.
2616 * \param a First source
2617 * \param b Second source
2618 * \param sel Boolean selector
2619 * \return For each element, select b if sel is true, a otherwise.
2621 static gmx_inline gmx_simd_fint32_t
2622 gmx_simd_blendv_fi(gmx_simd_fint32_t a, gmx_simd_fint32_t b, gmx_simd_fibool_t sel)
2624 gmx_simd_fint32_t d;
2627 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
2629 d.i[i] = sel.b[i] ? b.i[i] : a.i[i];
2636 * \name SIMD implementation integer (corresponding to double) bitwise logical operations
2640 /*! \brief SIMD integer shift left, based on immediate value.
2642 * \copydetails gmx_simd_slli_fi
2644 static gmx_inline gmx_simd_dint32_t
2645 gmx_simd_slli_di(gmx_simd_dint32_t a, int n)
2647 gmx_simd_dint32_t c;
2650 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
2652 c.i[i] = a.i[i] << n;
2657 /*! \brief SIMD integer shift right, based on immediate value.
2659 * \copydetails gmx_simd_srli_fi
2661 static gmx_inline gmx_simd_dint32_t
2662 gmx_simd_srli_di(gmx_simd_dint32_t a, int n)
2664 gmx_simd_dint32_t c;
2667 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
2669 c.i[i] = a.i[i] >> n;
2674 /*! \brief Integer bitwise and for SIMD variables.
2676 * \copydetails gmx_simd_and_fi
2678 static gmx_inline gmx_simd_dint32_t
2679 gmx_simd_and_di(gmx_simd_dint32_t a, gmx_simd_dint32_t b)
2681 gmx_simd_dint32_t c;
2684 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
2686 c.i[i] = a.i[i] & b.i[i];
2691 /*! \brief Integer bitwise not-and for SIMD variables.
2693 * \copydetails gmx_simd_andnot_fi
2695 static gmx_inline gmx_simd_dint32_t
2696 gmx_simd_andnot_di(gmx_simd_dint32_t a, gmx_simd_dint32_t b)
2698 gmx_simd_dint32_t c;
2701 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
2703 c.i[i] = (~a.i[i]) & b.i[i];
2708 /*! \brief Integer bitwise or for SIMD variables.
2710 * \copydetails gmx_simd_or_fi
2712 static gmx_inline gmx_simd_dint32_t
2713 gmx_simd_or_di(gmx_simd_dint32_t a, gmx_simd_dint32_t b)
2715 gmx_simd_dint32_t c;
2718 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
2720 c.i[i] = a.i[i] | b.i[i];
2725 /*! \brief Integer bitwise xor for SIMD variables.
2727 * \copydetails gmx_simd_xor_fi
2729 static gmx_inline gmx_simd_dint32_t
2730 gmx_simd_xor_di(gmx_simd_dint32_t a, gmx_simd_dint32_t b)
2732 gmx_simd_dint32_t c;
2735 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
2737 c.i[i] = a.i[i] ^ b.i[i];
2744 * \name SIMD implementation integer (corresponding to double) arithmetics
2747 /*! \brief Add SIMD integers, corresponding to double precision.
2749 * \copydetails gmx_simd_add_fi
2751 static gmx_inline gmx_simd_dint32_t
2752 gmx_simd_add_di(gmx_simd_dint32_t a, gmx_simd_dint32_t b)
2754 gmx_simd_dint32_t c;
2757 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
2759 c.i[i] = a.i[i] + b.i[i];
2764 /*! \brief Subtract SIMD integers, corresponding to double precision.
2766 * \copydetails gmx_simd_sub_fi
2768 static gmx_inline gmx_simd_dint32_t
2769 gmx_simd_sub_di(gmx_simd_dint32_t a, gmx_simd_dint32_t b)
2771 gmx_simd_dint32_t c;
2774 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
2776 c.i[i] = a.i[i] - b.i[i];
2781 /*! \brief Multiply SIMD integers, corresponding to double precision.
2783 * \copydetails gmx_simd_mul_fi
2785 static gmx_inline gmx_simd_dint32_t
2786 gmx_simd_mul_di(gmx_simd_dint32_t a, gmx_simd_dint32_t b)
2788 gmx_simd_dint32_t c;
2791 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
2793 c.i[i] = a.i[i]*b.i[i];
2800 * \name SIMD implementation integer (corresponding to double) comparisons, boolean selection
2804 /*! \brief Equality comparison of two ints corresponding to double SIMD data.
2806 * \copydetails gmx_simd_cmpeq_fi
2808 static gmx_inline gmx_simd_dibool_t
2809 gmx_simd_cmpeq_di(gmx_simd_dint32_t a, gmx_simd_dint32_t b)
2811 gmx_simd_dibool_t c;
2814 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
2816 c.b[i] = (a.i[i] == b.i[i]);
2821 /*! \brief Less-than comparison of two ints corresponding to double SIMD data.
2823 * \copydetails gmx_simd_cmplt_fi
2825 static gmx_inline gmx_simd_dibool_t
2826 gmx_simd_cmplt_di(gmx_simd_dint32_t a, gmx_simd_dint32_t b)
2828 gmx_simd_dibool_t c;
2831 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
2833 c.b[i] = (a.i[i] < b.i[i]);
2838 /*! \brief Logical AND on gmx_simd_dibool_t.
2840 * \copydetails gmx_simd_and_fib
2842 static gmx_inline gmx_simd_dibool_t
2843 gmx_simd_and_dib(gmx_simd_dibool_t a, gmx_simd_dibool_t b)
2845 gmx_simd_dibool_t c;
2848 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
2850 c.b[i] = (a.b[i] && b.b[i]);
2855 /*! \brief Logical OR on gmx_simd_dibool_t.
2857 * \copydetails gmx_simd_or_fib
2859 static gmx_inline gmx_simd_dibool_t
2860 gmx_simd_or_dib(gmx_simd_dibool_t a, gmx_simd_dibool_t b)
2862 gmx_simd_dibool_t c;
2865 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
2867 c.b[i] = (a.b[i] || b.b[i]);
2872 /*! \brief Returns non-zero if any of the double-int SIMD booleans in x is True, otherwise 0.
2874 * \copydetails gmx_simd_anytrue_fib
2876 static gmx_inline int
2877 gmx_simd_anytrue_dib(gmx_simd_dibool_t a)
2883 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
2885 anytrue = anytrue || a.b[i];
2890 /*! \brief Select from SIMD ints (corresponding to double) where boolean is true.
2892 * \copydetails gmx_simd_blendzero_fi
2894 static gmx_inline gmx_simd_dint32_t
2895 gmx_simd_blendzero_di(gmx_simd_dint32_t a, gmx_simd_dibool_t sel)
2897 gmx_simd_dint32_t c;
2900 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
2902 c.i[i] = sel.b[i] ? a.i[i] : 0.0;
2907 /*! \brief Select from SIMD ints (corresponding to double) where boolean is false.
2909 * \copydetails gmx_simd_blendnotzero_fi
2911 static gmx_inline gmx_simd_dint32_t
2912 gmx_simd_blendnotzero_di(gmx_simd_dint32_t a, gmx_simd_dibool_t sel)
2914 gmx_simd_dint32_t c;
2917 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
2919 c.i[i] = sel.b[i] ? 0.0 : a.i[i];
2924 /*! \brief Vector-blend SIMD selection for double-int SIMD.
2926 * \copydetails gmx_simd_blendv_fi
2928 static gmx_inline gmx_simd_dint32_t
2929 gmx_simd_blendv_di(gmx_simd_dint32_t a, gmx_simd_dint32_t b, gmx_simd_dibool_t sel)
2931 gmx_simd_dint32_t d;
2934 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
2936 d.i[i] = sel.b[i] ? b.i[i] : a.i[i];
2943 * \name SIMD implementation conversion operations
2947 /*! \brief Round single precision floating point to integer.
2949 * You should typically call the real-precision \ref gmx_simd_cvt_r2i.
2951 * \param a SIMD floating-point
2952 * \return SIMD integer, rounded to nearest integer.
2954 static gmx_inline gmx_simd_fint32_t
2955 gmx_simd_cvt_f2i(gmx_simd_float_t a)
2957 gmx_simd_fint32_t b;
2960 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
2963 b.i[i] = (a.r[i] >= 0.0) ? (a.r[i] + 0.5) : (a.r[i] - 0.5);
2965 b.i[i] = roundf(a.r[i]);
2971 /*! \brief Truncate single precision floating point to integer.
2973 * You should typically call the real-precision \ref gmx_simd_cvtt_r2i.
2975 * \param a SIMD floating-point
2976 * \return SIMD integer, truncated towards zero.
2978 static gmx_inline gmx_simd_fint32_t
2979 gmx_simd_cvtt_f2i(gmx_simd_float_t a)
2981 gmx_simd_fint32_t b;
2984 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
2991 /*! \brief Convert integer to single precision floating-point.
2993 * You should typically call the real-precision \ref gmx_simd_cvt_i2r.
2995 * \param a SIMD integer
2996 * \return SIMD floating-pint
2998 static gmx_inline gmx_simd_float_t
2999 gmx_simd_cvt_i2f(gmx_simd_fint32_t a)
3004 for (i = 0; i < GMX_SIMD_FINT32_WIDTH; i++)
3011 /*! \brief Round double precision floating point to integer.
3013 * \copydetails gmx_simd_cvt_f2i
3015 static gmx_inline gmx_simd_dint32_t
3016 gmx_simd_cvt_d2i(gmx_simd_double_t a)
3018 gmx_simd_dint32_t b;
3021 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
3024 b.i[i] = (a.r[i] >= 0.0) ? (a.r[i] + 0.5) : (a.r[i] - 0.5);
3026 b.i[i] = round(a.r[i]);
3032 /*! \brief Truncate double precision floating point to integer.
3034 * \copydetails gmx_simd_cvtt_f2i
3036 static gmx_inline gmx_simd_dint32_t
3037 gmx_simd_cvtt_d2i(gmx_simd_double_t a)
3039 gmx_simd_dint32_t b;
3042 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
3049 /*! \brief Convert integer to single precision floating-point.
3051 * \copydetails gmx_simd_cvt_i2f
3053 static gmx_inline gmx_simd_double_t
3054 gmx_simd_cvt_i2d(gmx_simd_dint32_t a)
3056 gmx_simd_double_t b;
3059 for (i = 0; i < GMX_SIMD_DINT32_WIDTH; i++)
3066 /*! \brief Convert from float boolean to corresponding integer boolean.
3068 * You should typically call the real-precision \ref gmx_simd_cvt_b2ib.
3070 * \param a Boolean corresponding to SIMD floating-point
3071 * \return Boolean that can be applied to SIMD integer operations.
3073 static gmx_inline gmx_simd_fibool_t
3074 gmx_simd_cvt_fb2fib(gmx_simd_fbool_t a)
3076 gmx_simd_fibool_t b;
3079 /* Integer width >= float width */
3080 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
3087 /*! \brief Convert from integer boolean (corresponding to float) to float boolean.
3089 * You should typically call the real-precision \ref gmx_simd_cvt_ib2b.
3091 * \param a Boolean corresponding to SIMD integer
3092 * \return Boolean that can be applied to SIMD floating-point.
3094 static gmx_inline gmx_simd_fbool_t
3095 gmx_simd_cvt_fib2fb(gmx_simd_fibool_t a)
3100 /* Integer width >= float width */
3101 for (i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
3108 /*! \brief Convert from double boolean to corresponding integer boolean.
3110 * \copydetails gmx_simd_cvt_fb2fib
3112 static gmx_inline gmx_simd_dibool_t
3113 gmx_simd_cvt_db2dib(gmx_simd_dbool_t a)
3115 gmx_simd_dibool_t b;
3118 /* Integer width >= double width */
3119 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
3126 /*! \brief Convert from integer boolean (corresponding to double) to double boolean.
3128 * \copydetails gmx_simd_cvt_fib2fb
3130 static gmx_inline gmx_simd_dbool_t
3131 gmx_simd_cvt_dib2db(gmx_simd_dibool_t a)
3136 /* Integer width >= double width */
3137 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
3144 /*! \brief Convert SIMD float to double.
3146 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is identical to
3147 * \ref GMX_SIMD_DOUBLE_WIDTH.
3149 * Float/double conversions are complex since the SIMD width could either
3150 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
3151 * need to check for the width in the code, and have different code paths.
3153 * \param f Single-precision SIMD variable
3154 * \return Double-precision SIMD variable of the same width
3156 static gmx_inline gmx_simd_double_t
3157 gmx_simd_cvt_f2d(gmx_simd_float_t f)
3159 gmx_simd_double_t d;
3160 #if (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
3162 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
3167 gmx_fatal(FARGS, "gmx_simd_cvt_f2d() requires GMX_SIMD_FLOAT_WIDTH==GMX_SIMD_DOUBLE_WIDTH");
3168 /* Avoid compiler warnings */
3174 /*! \brief Convert SIMD double to float.
3176 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is identical to
3177 * \ref GMX_SIMD_DOUBLE_WIDTH.
3179 * Float/double conversions are complex since the SIMD width could either
3180 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
3181 * need to check for the width in the code, and have different code paths.
3183 * \param d Double-precision SIMD variable
3184 * \return Single-precision SIMD variable of the same width
3186 static gmx_inline gmx_simd_float_t
3187 gmx_simd_cvt_d2f(gmx_simd_double_t d)
3190 #if (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
3192 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
3197 gmx_fatal(FARGS, "gmx_simd_cvt_d2f() requires GMX_SIMD_FLOAT_WIDTH==GMX_SIMD_DOUBLE_WIDTH");
3198 /* Avoid compiler warnings */
3204 /*! \brief Convert SIMD float to double.
3206 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is twice as large
3207 * as \ref GMX_SIMD_DOUBLE_WIDTH.
3209 * Float/double conversions are complex since the SIMD width could either
3210 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
3211 * need to check for the width in the code, and have different code paths.
3213 * \param f Single-precision SIMD variable
3214 * \param[out] d0 Double-precision SIMD variable, first half of values from f.
3215 * \param[out] d1 Double-precision SIMD variable, second half of values from f.
3217 static gmx_inline void
3218 gmx_simd_cvt_f2dd(gmx_simd_float_t f, gmx_simd_double_t *d0, gmx_simd_double_t *d1)
3220 #if (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH)
3222 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
3225 d1->r[i] = f.r[GMX_SIMD_DOUBLE_WIDTH+i];
3228 gmx_fatal(FARGS, "gmx_simd_cvt_f2dd() requires GMX_SIMD_FLOAT_WIDTH==2*GMX_SIMD_DOUBLE_WIDTH");
3229 /* Avoid compiler warnings about unused arguments */
3235 /*! \brief Convert SIMD double to float.
3237 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is twice as large
3238 * as \ref GMX_SIMD_DOUBLE_WIDTH.
3240 * Float/double conversions are complex since the SIMD width could either
3241 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
3242 * need to check for the width in the code, and have different code paths.
3244 * \param d0 Double-precision SIMD variable, first half of values to put in f.
3245 * \param d1 Double-precision SIMD variable, second half of values to put in f.
3246 * \return Single-precision SIMD variable with all values.
3248 static gmx_inline gmx_simd_float_t
3249 gmx_simd_cvt_dd2f(gmx_simd_double_t d0, gmx_simd_double_t d1)
3252 #if (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH)
3254 for (i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
3257 f.r[GMX_SIMD_DOUBLE_WIDTH+i] = d1.r[i];
3260 gmx_fatal(FARGS, "gmx_simd_cvt_dd2f() requires GMX_SIMD_FLOAT_WIDTH==2*GMX_SIMD_DOUBLE_WIDTH");
3261 /* Avoid compiler warnings about unused arguments & uninitialized f */
3262 f.r[0] = d0.r[0] + d1.r[0];
3269 /*! \name SIMD4. Constant width-4 SIMD types and instructions
3273 #if (GMX_SIMD_FLOAT_WIDTH == 4) || (defined DOXYGEN)
3276 /*! \brief SIMD4 float type. Available with \ref GMX_SIMD4_HAVE_FLOAT.
3278 * Unless you specifically want a single-precision type you should check
3279 * \ref gmx_simd4_real_t instead.
3281 * While the SIMD4 datatype is identical to the normal SIMD type in the
3282 * reference implementation, this will often not be the case for
3283 * other architectures.
3285 # define gmx_simd4_float_t gmx_simd_float_t
3287 /*! \brief Load SIMD4 float from aligned memory.
3288 * \copydetails gmx_simd_load_f
3290 # define gmx_simd4_load_f gmx_simd_load_f
3292 /*! \brief Set all elements of SIMD4 float from single pointer.
3293 * \copydetails gmx_simd_load1_f
3295 # define gmx_simd4_load1_f gmx_simd_load1_f
3297 /*! \brief Set all SIMD4 float elements to the value r.
3298 * \copydetails gmx_simd_set1_f
3300 # define gmx_simd4_set1_f gmx_simd_set1_f
3302 /*! \brief Store the contents of SIMD4 float pr to aligned memory m.
3303 * \copydetails gmx_simd_store_f
3305 # define gmx_simd4_store_f gmx_simd_store_f
3307 /*! \brief Load SIMD4 float from unaligned memory.
3308 * \copydetails gmx_simd_loadu_f
3310 # define gmx_simd4_loadu_f gmx_simd_loadu_f
3312 /*! \brief Store SIMD4 float to unaligned memory.
3313 * \copydetails gmx_simd_storeu_f
3315 # define gmx_simd4_storeu_f gmx_simd_storeu_f
3317 /*! \brief Set all SIMD4 float elements to 0.
3318 * \copydetails gmx_simd_setzero_f
3320 # define gmx_simd4_setzero_f gmx_simd_setzero_f
3322 /*! \brief Bitwise and for two SIMD4 float variables.
3323 * \copydetails gmx_simd_and_f
3325 # define gmx_simd4_and_f gmx_simd_and_f
3327 /*! \brief Bitwise andnot for two SIMD4 float variables. c=(~a) & b.
3328 * \copydetails gmx_simd_andnot_f
3330 # define gmx_simd4_andnot_f gmx_simd_andnot_f
3332 /*! \brief Bitwise or for two SIMD4 float variables.
3333 * \copydetails gmx_simd_or_f
3335 # define gmx_simd4_or_f gmx_simd_or_f
3337 /*! \brief Bitwise xor for two SIMD4 float variables.
3338 * \copydetails gmx_simd_xor_f
3340 # define gmx_simd4_xor_f gmx_simd_xor_f
3342 /*! \brief Add two SIMD4 float variables.
3343 * \copydetails gmx_simd_add_f
3345 # define gmx_simd4_add_f gmx_simd_add_f
3347 /*! \brief Subtract two SIMD4 float variables.
3348 * \copydetails gmx_simd_sub_f
3350 # define gmx_simd4_sub_f gmx_simd_sub_f
3352 /*! \brief Multiply two SIMD4 float variables.
3353 * \copydetails gmx_simd_mul_f
3355 # define gmx_simd4_mul_f gmx_simd_mul_f
3357 /*! \brief Fused-multiply-add for SIMD4 float. Result is a*b+c.
3358 * \copydetails gmx_simd_fmadd_f
3360 # define gmx_simd4_fmadd_f gmx_simd_fmadd_f
3362 /*! \brief Fused-multiply-subtract for SIMD4 float. Result is a*b-c.
3363 * \copydetails gmx_simd_fmsub_f
3365 # define gmx_simd4_fmsub_f gmx_simd_fmsub_f
3367 /*! \brief Fused-negated-multiply-add for SIMD4 float. Result is -a*b+c.
3368 * \copydetails gmx_simd_fnmadd_f
3370 # define gmx_simd4_fnmadd_f gmx_simd_fnmadd_f
3372 /*! \brief Fused-negated-multiply-add for SIMD4 float. Result is -a*b-c.
3373 * \copydetails gmx_simd_fnmsub_f
3375 # define gmx_simd4_fnmsub_f gmx_simd_fnmsub_f
3377 /*! \brief Lookup of approximate 1/sqrt(x) for SIMD4 float.
3378 * \copydetails gmx_simd_rsqrt_f
3380 # define gmx_simd4_rsqrt_f gmx_simd_rsqrt_f
3382 /*! \brief Floating-point absolute value for SIMD4 float.
3383 * \copydetails gmx_simd_fabs_f
3385 # define gmx_simd4_fabs_f gmx_simd_fabs_f
3387 /*! \brief Floating-point negate for SIMD4 float.
3388 * \copydetails gmx_simd_fneg_f
3390 # define gmx_simd4_fneg_f gmx_simd_fneg_f
3392 /*! \brief Set each SIMD4 float element to the largest from two variables.
3393 * \copydetails gmx_simd_max_f
3395 # define gmx_simd4_max_f gmx_simd_max_f
3397 /*! \brief Set each SIMD4 float element to the smallest from two variables.
3398 * \copydetails gmx_simd_min_f
3400 # define gmx_simd4_min_f gmx_simd_min_f
3402 /*! \brief Round to nearest integer value for SIMD4 float.
3403 * \copydetails gmx_simd_round_f
3405 # define gmx_simd4_round_f gmx_simd_round_f
3407 /*! \brief Round to largest integral value for SIMD4 float.
3408 * \copydetails gmx_simd_trunc_f
3410 # define gmx_simd4_trunc_f gmx_simd_trunc_f
3412 /*! \brief Return dot product of two single precision SIMD4 variables.
3414 * The dot product is calculated between the first three elements in the two
3415 * vectors, while the fourth is ignored. The result is returned as a scalar.
3419 * \result a[0]*b[0]+a[1]*b[1]+a[2]*b[2], returned as scalar. Last element is ignored.
3421 static gmx_inline float
3422 gmx_simd4_dotproduct3_f(gmx_simd_float_t a, gmx_simd_float_t b)
3424 return a.r[0]*b.r[0]+a.r[1]*b.r[1]+a.r[2]*b.r[2];
3427 /*! \brief SIMD4 variable type to use for logical comparisons on floats.
3428 * \copydetails gmx_simd_fbool_t
3430 # define gmx_simd4_fbool_t gmx_simd_fbool_t
3432 /*! \brief Equality comparison of two single precision SIMD4.
3433 * \copydetails gmx_simd_cmpeq_f
3435 # define gmx_simd4_cmpeq_f gmx_simd_cmpeq_f
3437 /*! \brief Less-than comparison of two single precision SIMD4.
3438 * \copydetails gmx_simd_cmplt_f
3440 # define gmx_simd4_cmplt_f gmx_simd_cmplt_f
3442 /*! \brief Less-than comparison of two single precision SIMD4.
3443 * \copydetails gmx_simd_cmple_f
3445 # define gmx_simd4_cmple_f gmx_simd_cmple_f
3447 /*! \brief Logical AND on float SIMD4 booleans.
3448 * \copydetails gmx_simd_and_fb
3450 # define gmx_simd4_and_fb gmx_simd_and_fb
3452 /*! \brief Logical OR on float SIMD4 booleans.
3453 * \copydetails gmx_simd_or_fb
3455 # define gmx_simd4_or_fb gmx_simd_or_fb
3457 /*! \brief Returns non-zero if any of the SIMD4 boolean in x is True.
3458 * \copydetails gmx_simd_anytrue_fb
3460 # define gmx_simd4_anytrue_fb gmx_simd_anytrue_fb
3462 /*! \brief Select from single precision SIMD4 variable where boolean is true.
3463 * \copydetails gmx_simd_blendzero_f
3465 # define gmx_simd4_blendzero_f gmx_simd_blendzero_f
3467 /*! \brief Select from single precision SIMD4 variable where boolean is false.
3468 * \copydetails gmx_simd_blendnotzero_f
3470 # define gmx_simd4_blendnotzero_f gmx_simd_blendnotzero_f
3472 /*! \brief Vector-blend instruction form SIMD4 float.
3473 * \copydetails gmx_simd_blendv_f
3475 # define gmx_simd4_blendv_f gmx_simd_blendv_f
3477 /*! \brief Return sum of all elements in SIMD4 float.
3478 * \copydetails gmx_simd_reduce_f
3480 # define gmx_simd4_reduce_f gmx_simd_reduce_f
3482 #else /* GMX_SIMD_FLOAT_WIDTH!=4 */
3483 # undef GMX_SIMD4_HAVE_FLOAT
3487 #if (GMX_SIMD_DOUBLE_WIDTH == 4) || (defined DOXYGEN)
3489 /*! \brief SIMD4 double type. Available with \ref GMX_SIMD4_HAVE_DOUBLE.
3491 * Unless you specifically want a double-precision type you should check
3492 * \ref gmx_simd4_real_t instead.
3494 * While the SIMD4 datatype is identical to the normal SIMD type in the
3495 * reference implementation, this will often not be the case for
3496 * other architectures.
3498 # define gmx_simd4_double_t gmx_simd_double_t
3500 /*! \brief Double precision SIMD4 load aligned.
3501 * \copydetails gmx_simd_load_d
3503 # define gmx_simd4_load_d gmx_simd_load_d
3505 /*! \brief Double precision SIMD4 load single value to all elements.
3506 * \copydetails gmx_simd_load1_d
3508 # define gmx_simd4_load1_d gmx_simd_load1_d
3510 /*! \brief Double precision SIMD4 set all elements from value.
3511 * \copydetails gmx_simd_set1_d
3513 # define gmx_simd4_set1_d gmx_simd_set1_d
3515 /*! \brief Double precision SIMD4 store to aligned memory.
3516 * \copydetails gmx_simd_store_d
3518 # define gmx_simd4_store_d gmx_simd_store_d
3520 /*! \brief Load unaligned SIMD4 double.
3521 * \copydetails gmx_simd_loadu_d
3523 # define gmx_simd4_loadu_d gmx_simd_loadu_d
3525 /*! \brief Store unaligned SIMD4 double.
3526 * \copydetails gmx_simd_storeu_d
3528 # define gmx_simd4_storeu_d gmx_simd_storeu_d
3530 /*! \brief Set all elements in SIMD4 double to 0.0.
3531 * \copydetails gmx_simd_setzero_d
3533 # define gmx_simd4_setzero_d gmx_simd_setzero_d
3535 /*! \brief Bitwise and for two SIMD4 double variables.
3536 * \copydetails gmx_simd_and_d
3538 # define gmx_simd4_and_d gmx_simd_and_d
3540 /*! \brief Bitwise andnot for SIMD4 double. c=(~a) & b.
3541 * \copydetails gmx_simd_andnot_d
3543 # define gmx_simd4_andnot_d gmx_simd_andnot_d
3545 /*! \brief Bitwise or for SIMD4 double.
3546 * \copydetails gmx_simd_or_d
3548 # define gmx_simd4_or_d gmx_simd_or_d
3550 /*! \brief Bitwise xor for SIMD4 double.
3551 * \copydetails gmx_simd_xor_d
3553 # define gmx_simd4_xor_d gmx_simd_xor_d
3555 /*! \brief Add two SIMD4 double values.
3556 * \copydetails gmx_simd_add_d
3558 # define gmx_simd4_add_d gmx_simd_add_d
3560 /*! \brief Subtract two SIMD4 double values.
3561 * \copydetails gmx_simd_sub_d
3563 # define gmx_simd4_sub_d gmx_simd_sub_d
3565 /*! \brief Multiply two SIMD4 double values.
3566 * \copydetails gmx_simd_mul_d
3568 # define gmx_simd4_mul_d gmx_simd_mul_d
3570 /*! \brief Fused-multiply-add for SIMD4 double. Result is a*b+c.
3571 * \copydetails gmx_simd_fmadd_d
3573 # define gmx_simd4_fmadd_d gmx_simd_fmadd_d
3575 /*! \brief Fused-multiply-subtract for SIMD4 double. Result is a*b-c.
3576 * \copydetails gmx_simd_fmsub_d
3578 # define gmx_simd4_fmsub_d gmx_simd_fmsub_d
3580 /*! \brief Fused-negated-multiply-add for SIMD4 double. Result is -a*b+c.
3581 * \copydetails gmx_simd_fnmadd_d
3583 # define gmx_simd4_fnmadd_d gmx_simd_fnmadd_d
3585 /*! \brief Fused-negated-multiply-sub for SIMD4 double. Result is -a*b-c.
3586 * \copydetails gmx_simd_fnmsub_d
3588 # define gmx_simd4_fnmsub_d gmx_simd_fnmsub_d
3590 /*! \brief SIMD4 double 1.0/sqrt(x) lookup.
3591 * \copydetails gmx_simd_rsqrt_d
3593 # define gmx_simd4_rsqrt_d gmx_simd_rsqrt_d
3595 /*! \brief SIMD4 double Floating-point fabs().
3596 * \copydetails gmx_simd_fabs_d
3598 # define gmx_simd4_fabs_d gmx_simd_fabs_d
3600 /*! \brief SIMD4 double floating-point negate.
3601 * \copydetails gmx_simd_fneg_d
3603 # define gmx_simd4_fneg_d gmx_simd_fneg_d
3605 /*! \brief Set each SIMD4 element to the largest from two variables.
3606 * \copydetails gmx_simd_max_d
3608 # define gmx_simd4_max_d gmx_simd_max_d
3610 /*! \brief Set each SIMD4 element to the smallest from two variables.
3611 * \copydetails gmx_simd_min_d
3613 # define gmx_simd4_min_d gmx_simd_min_d
3615 /*! \brief Round SIMD4 double to nearest integer value (in floating-point format).
3616 * \copydetails gmx_simd_round_d
3618 # define gmx_simd4_round_d gmx_simd_round_d
3620 /*! \brief Truncate SIMD4 double, i.e. round towards zero.
3621 * \copydetails gmx_simd_trunc_d
3623 # define gmx_simd4_trunc_d gmx_simd_trunc_d
3625 /*! \brief Return dot product of two double precision SIMD4 variables.
3626 * \copydetails gmx_simd_setzero_f
3628 static gmx_inline double
3629 gmx_simd4_dotproduct3_d(gmx_simd_double_t a, gmx_simd_double_t b)
3631 return a.r[0]*b.r[0]+a.r[1]*b.r[1]+a.r[2]*b.r[2];
3634 /*! \brief SIMD4 variable type to use for logical comparisons on doubles.
3635 * \copydetails gmx_simd_dbool_t
3637 # define gmx_simd4_dbool_t gmx_simd_dbool_t
3639 /*! \brief Equality comparison of two double precision SIMD4 values.
3640 * \copydetails gmx_simd_cmpeq_d
3642 # define gmx_simd4_cmpeq_d gmx_simd_cmpeq_d
3644 /*! \brief Less-than comparison of two double precision SIMD4 values.
3645 * \copydetails gmx_simd_cmplt_d
3647 # define gmx_simd4_cmplt_d gmx_simd_cmplt_d
3649 /*! \brief Less-than comparison of two double precision SIMD4 values.
3650 * \copydetails gmx_simd_cmple_d
3652 # define gmx_simd4_cmple_d gmx_simd_cmple_d
3654 /*! \brief Logical AND on double SIMD4 booleans.
3655 * \copydetails gmx_simd_and_db
3657 # define gmx_simd4_and_db gmx_simd_and_db
3659 /*! \brief Logical OR on double SIMD4 booleans.
3660 * \copydetails gmx_simd_or_db
3662 # define gmx_simd4_or_db gmx_simd_or_db
3664 /*! \brief Returns non-zero if any of the SIMD4 booleans in x is True.
3665 * \copydetails gmx_simd_anytrue_db
3667 # define gmx_simd4_anytrue_db gmx_simd_anytrue_db
3669 /*! \brief Select from double precision SIMD4 variable where boolean is true.
3670 * \copydetails gmx_simd_blendzero_d
3672 # define gmx_simd4_blendzero_d gmx_simd_blendzero_d
3674 /*! \brief Select from double precision SIMD4 variable where boolean is false.
3675 * \copydetails gmx_simd_blendnotzero_d
3677 # define gmx_simd4_blendnotzero_d gmx_simd_blendnotzero_d
3679 /*! \brief Vector-blend instruction for SIMD4 double.
3680 * \copydetails gmx_simd_blendv_d
3682 # define gmx_simd4_blendv_d gmx_simd_blendv_d
3684 /*! \brief Return sum of all elements in SIMD4 double.
3685 * \copydetails gmx_simd_reduce_d
3687 # define gmx_simd4_reduce_d gmx_simd_reduce_d
3689 #else /* GMX_SIMD4_DOUBLE_WIDTH!=4 */
3690 # undef GMX_SIMD4_HAVE_DOUBLE
3696 /*! \brief Return 1 if SIMD floating-point ops have overflowed, and reset check.
3698 * This function to check whether SIMD operations have resulted in overflow,
3699 * and returns 1 if it occured, 0 otherwise.
3700 * For now, this is unfortunately a dummy for all architectures except x86.
3703 gmx_simd_check_and_reset_overflow(void)
3711 #endif /* GMX_SIMD_IMPL_REFERENCE_H */