2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * \defgroup module_simd SIMD intrinsics interface (simd)
38 * \ingroup group_utilitymodules
40 * \brief Provides an architecture-independent way of doing SIMD coding.
42 * Overview of the SIMD implementation is provided in \ref page_simd.
43 * The details are documented in simd.h and the reference implementation
46 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
49 #ifndef GMX_SIMD_SIMD_H
50 #define GMX_SIMD_SIMD_H
52 /*! \libinternal \file
54 * \brief Definitions, capabilities, and wrappers for SIMD module.
56 * The macros in this file are intended to be used for writing
57 * architecture-independent SIMD intrinsics code.
58 * To support a new architecture, adding a new sub-include with macros here
59 * should be (nearly) all that is needed.
61 * The defines in this top-level file will set default Gromacs real precision
62 * operations to either single or double precision based on whether
63 * GMX_DOUBLE is defined. The actual implementation - including e.g.
64 * conversion operations specifically between single and double - is documented
65 * in impl_reference.h.
67 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
70 * \ingroup module_simd
78 #include "gromacs/legacyheaders/types/simple.h"
80 /* Forward declarations so memory allocation can be used in implementations */
81 static gmx_inline float * gmx_simd_align_f(float *p);
82 static gmx_inline double * gmx_simd_align_d(double *p);
83 static gmx_inline int * gmx_simd_align_fi(int *p);
84 static gmx_inline int * gmx_simd_align_di(int *p);
85 static gmx_inline float * gmx_simd4_align_f(float *p);
86 static gmx_inline double * gmx_simd4_align_d(double *p);
89 /*! \addtogroup module_simd */
92 /*! \name SIMD predefined macros to describe high-level capabilities
94 * These macros are used to describe the features available in default
95 * Gromacs real precision. They are set from the lower-level implementation
96 * files that have macros describing single and double precision individually,
97 * as well as the implementation details.
102 * GMX_SIMD indicates that some sort of SIMD support is present in software.
104 * It is disabled if no architecture, neither reference SIMD, has been selected.
109 /* Intel MIC is a bit special since it is a co-processor. This means the rest
110 * of GROMACS (which runs on the CPU) should use a default SIMD set like AVX,
111 * while the part running on the coprocessor defines __MIC__. All functions in
112 * this SIMD module are static, so it will work perfectly fine to include this
113 * file with different SIMD definitions for different files.
116 # include "gromacs/simd/impl_intel_mic/impl_intel_mic.h"
117 #elif defined GMX_SIMD_X86_AVX2_256
118 # include "gromacs/simd/impl_x86_avx2_256/impl_x86_avx2_256.h"
119 #elif defined GMX_SIMD_X86_AVX_256
120 # include "gromacs/simd/impl_x86_avx_256/impl_x86_avx_256.h"
121 #elif defined GMX_SIMD_X86_AVX_128_FMA
122 # include "gromacs/simd/impl_x86_avx_128_fma/impl_x86_avx_128_fma.h"
123 #elif defined GMX_SIMD_X86_SSE4_1
124 # include "gromacs/simd/impl_x86_sse4_1/impl_x86_sse4_1.h"
125 #elif defined GMX_SIMD_X86_SSE2
126 # include "gromacs/simd/impl_x86_sse2/impl_x86_sse2.h"
127 #elif defined GMX_SIMD_IBM_QPX
128 # include "gromacs/simd/impl_ibm_qpx/impl_ibm_qpx.h"
129 #elif (defined GMX_SIMD_REFERENCE) || (defined DOXYGEN)
130 /* Plain C SIMD reference implementation, also serves as documentation.
131 * For now this code path will also be taken for Sparc64_HPC_ACE since we have
132 * not yet added the verlet kernel extensions there. The group kernels do not
133 * depend on this file, so they will still be accelerated with SIMD.
135 # include "gromacs/simd/impl_reference/impl_reference.h"
137 /* Turn off the GMX_SIMD flag if we do not even have reference support */
142 * SIMD4 width is always 4, but use this for clarity in definitions.
144 * It improves code readability to allocate e.g. 2*GMX_SIMD4_WIDTH instead of 8.
146 #define GMX_SIMD4_WIDTH 4
150 /*! \name SIMD memory alignment operations
155 * Align a float pointer for usage with SIMD instructions.
157 * You should typically \a not call this function directly (unless you explicitly
158 * want single precision even when GMX_DOUBLE is set), but use the
159 * \ref gmx_simd_align_r macro to align memory in default Gromacs real precision.
161 * \param p Pointer to memory, allocate at least \ref GMX_SIMD_FLOAT_WIDTH extra elements.
163 * \return Aligned pointer (>=p) suitable for loading/storing float fp SIMD.
164 * If \ref GMX_SIMD_HAVE_FLOAT is not set, p will be returned unchanged.
166 * Start by allocating an extra \ref GMX_SIMD_FLOAT_WIDTH float elements of memory,
167 * and then call this function. The returned pointer will be greater or equal
168 * to the one you provided, and point to an address inside your provided memory
169 * that is aligned to the SIMD width.
171 static gmx_inline float *
172 gmx_simd_align_f(float *p)
174 # ifdef GMX_SIMD_HAVE_FLOAT
175 return (float *)(((size_t)((p)+GMX_SIMD_FLOAT_WIDTH-1)) & (~((size_t)(GMX_SIMD_FLOAT_WIDTH*sizeof(float)-1))));
182 * Align a double pointer for usage with SIMD instructions.
184 * You should typically \a not call this function directly (unless you explicitly
185 * want double precision even when GMX_DOUBLE is not set), but use the
186 * \ref gmx_simd_align_r macro to align memory in default Gromacs real precision.
188 * \param p Pointer to memory, allocate at least \ref GMX_SIMD_DOUBLE_WIDTH extra elements.
190 * \return Aligned pointer (>=p) suitable for loading/storing double fp SIMD.
191 * If \ref GMX_SIMD_HAVE_DOUBLE is not set, p will be returned unchanged.
193 * Start by allocating an extra \ref GMX_SIMD_DOUBLE_WIDTH double elements of memory,
194 * and then call this function. The returned pointer will be greater or equal
195 * to the one you provided, and point to an address inside your provided memory
196 * that is aligned to the SIMD width.
198 static gmx_inline double *
199 gmx_simd_align_d(double *p)
201 # ifdef GMX_SIMD_HAVE_DOUBLE
202 return (double *)(((size_t)((p)+GMX_SIMD_DOUBLE_WIDTH-1)) & (~((size_t)(GMX_SIMD_DOUBLE_WIDTH*sizeof(double)-1))));
209 * Align a (float) integer pointer for usage with SIMD instructions.
211 * You should typically \a not call this function directly (unless you explicitly
212 * want integers corresponding to single precision even when GMX_DOUBLE is
213 * set), but use the \ref gmx_simd_align_i macro to align integer memory
214 * corresponding to Gromacs default floating-point precision.
216 * \param p Pointer to memory, allocate at least \ref GMX_SIMD_FINT32_WIDTH extra elements.
218 * \return Aligned pointer (>=p) suitable for loading/storing float-integer SIMD.
219 * If \ref GMX_SIMD_HAVE_FINT32 is not set, p will be returned unchanged.
221 * This routine provides aligned memory for usage with \ref gmx_simd_fint32_t. You
222 * should have allocated an extra \ref GMX_SIMD_FINT32_WIDTH * sizeof(int) bytes. The
223 * reason why we need to separate float-integer vs. double-integer is that the
224 * width of registers after conversions from the floating-point types might not
225 * be identical, or even supported, in both cases.
227 static gmx_inline int *
228 gmx_simd_align_fi(int *p)
230 # ifdef GMX_SIMD_HAVE_FINT32
231 return (int *)(((size_t)((p)+GMX_SIMD_FINT32_WIDTH-1)) & (~((size_t)(GMX_SIMD_FINT32_WIDTH*sizeof(int)-1))));
238 * Align a (double) integer pointer for usage with SIMD instructions.
240 * You should typically \a not call this function directly (unless you explicitly
241 * want integers corresponding to doublele precision even when GMX_DOUBLE is
242 * not set), but use the \ref gmx_simd_align_i macro to align integer memory
243 * corresponding to Gromacs default floating-point precision.
245 * \param p Pointer to memory, allocate at least \ref GMX_SIMD_DINT32_WIDTH extra elements.
247 * \return Aligned pointer (>=p) suitable for loading/storing double-integer SIMD.
248 * If \ref GMX_SIMD_HAVE_DINT32 is not set, p will be returned unchanged.
250 * This routine provides aligned memory for usage with \ref gmx_simd_dint32_t. You
251 * should have allocated an extra \ref GMX_SIMD_DINT32_WIDTH*sizeof(int) bytes. The
252 * reason why we need to separate float-integer vs. double-integer is that the
253 * width of registers after conversions from the floating-point types might not
254 * be identical, or even supported, in both cases.
256 static gmx_inline int *
257 gmx_simd_align_di(int *p)
259 # ifdef GMX_SIMD_HAVE_DINT32
260 return (int *)(((size_t)((p)+GMX_SIMD_DINT32_WIDTH-1)) & (~((size_t)(GMX_SIMD_DINT32_WIDTH*sizeof(int)-1))));
267 * Align a float pointer for usage with SIMD4 instructions.
269 * You should typically \a not call this function directly (unless you explicitly
270 * want single precision even when GMX_DOUBLE is set), but use the
271 * \ref gmx_simd4_align_r macro to align memory in default Gromacs real precision.
273 * \param p Pointer to memory, allocate at least \ref GMX_SIMD4_WIDTH extra elements.
275 * \return Aligned pointer (>=p) suitable for loading/storing float SIMD.
276 * If \ref GMX_SIMD4_HAVE_FLOAT is not set, p will be returned unchanged.
278 * This routine provides aligned memory for usage with \ref gmx_simd4_float_t.
279 * should have allocated an extra \ref GMX_SIMD4_WIDTH * sizeof(float) bytes.
281 static gmx_inline float *
282 gmx_simd4_align_f(float *p)
284 # ifdef GMX_SIMD4_HAVE_FLOAT
285 return (float *)(((size_t)((p)+GMX_SIMD4_WIDTH-1)) & (~((size_t)(GMX_SIMD4_WIDTH*sizeof(float)-1))));
292 * Align a double pointer for usage with SIMD4 instructions.
294 * You should typically \a not call this function directly (unless you explicitly
295 * want double precision even when GMX_DOUBLE is not set), but use the
296 * \ref gmx_simd4_align_r macro to align memory in default Gromacs real precision.
298 * \param p Pointer to memory, allocate at least \ref GMX_SIMD4_WIDTH extra elements.
300 * \return Aligned pointer (>=p) suitable for loading/storing float SIMD.
301 * If \ref GMX_SIMD4_HAVE_DOUBLE is not set, p will be returned unchanged.
303 * This routine provides aligned memory for usage with \ref gmx_simd4_double_t.
304 * should have allocated an extra \ref GMX_SIMD4_WIDTH * sizeof(double) bytes.
306 static gmx_inline double *
307 gmx_simd4_align_d(double *p)
309 # ifdef GMX_SIMD4_HAVE_DOUBLE
310 return (double *)(((size_t)((p)+GMX_SIMD4_WIDTH-1)) & (~((size_t)(GMX_SIMD4_WIDTH*sizeof(double)-1))));
319 /* Define Gromacs "real" precision macros depending on Gromacs config. Note
320 * that conversions float-to-double and v.v. are not included here since they
321 * are not precision-dependent - find them in the implementation files.
324 /* Double floating-point. The documentation is in the float part below */
325 # define gmx_simd_real_t gmx_simd_double_t
326 # define gmx_simd_load_r gmx_simd_load_d
327 # define gmx_simd_load1_r gmx_simd_load1_d
328 # define gmx_simd_set1_r gmx_simd_set1_d
329 # define gmx_simd_store_r gmx_simd_store_d
330 # define gmx_simd_loadu_r gmx_simd_loadu_d
331 # define gmx_simd_storeu_r gmx_simd_storeu_d
332 # define gmx_simd_setzero_r gmx_simd_setzero_d
333 # define gmx_simd_add_r gmx_simd_add_d
334 # define gmx_simd_sub_r gmx_simd_sub_d
335 # define gmx_simd_mul_r gmx_simd_mul_d
336 # define gmx_simd_fmadd_r gmx_simd_fmadd_d
337 # define gmx_simd_fmsub_r gmx_simd_fmsub_d
338 # define gmx_simd_fnmadd_r gmx_simd_fnmadd_d
339 # define gmx_simd_fnmsub_r gmx_simd_fnmsub_d
340 # define gmx_simd_and_r gmx_simd_and_d
341 # define gmx_simd_andnot_r gmx_simd_andnot_d
342 # define gmx_simd_or_r gmx_simd_or_d
343 # define gmx_simd_xor_r gmx_simd_xor_d
344 # define gmx_simd_rsqrt_r gmx_simd_rsqrt_d
345 # define gmx_simd_rcp_r gmx_simd_rcp_d
346 # define gmx_simd_fabs_r gmx_simd_fabs_d
347 # define gmx_simd_fneg_r gmx_simd_fneg_d
348 # define gmx_simd_max_r gmx_simd_max_d
349 # define gmx_simd_min_r gmx_simd_min_d
350 # define gmx_simd_round_r gmx_simd_round_d
351 # define gmx_simd_trunc_r gmx_simd_trunc_d
352 # define gmx_simd_fraction_r gmx_simd_fraction_d
353 # define gmx_simd_get_exponent_r gmx_simd_get_exponent_d
354 # define gmx_simd_get_mantissa_r gmx_simd_get_mantissa_d
355 # define gmx_simd_set_exponent_r gmx_simd_set_exponent_d
356 /* Double integer and conversions */
357 # define gmx_simd_int32_t gmx_simd_dint32_t
358 # define gmx_simd_load_i gmx_simd_load_di
359 # define gmx_simd_set1_i gmx_simd_set1_di
360 # define gmx_simd_store_i gmx_simd_store_di
361 # define gmx_simd_loadu_i gmx_simd_loadu_di
362 # define gmx_simd_storeu_i gmx_simd_storeu_di
363 # define gmx_simd_setzero_i gmx_simd_setzero_di
364 # define gmx_simd_cvt_r2i gmx_simd_cvt_d2i
365 # define gmx_simd_cvtt_r2i gmx_simd_cvtt_d2i
366 # define gmx_simd_cvt_i2r gmx_simd_cvt_i2d
367 # define gmx_simd_extract_i gmx_simd_extract_di
368 # define gmx_simd_slli_i gmx_simd_slli_di
369 # define gmx_simd_srli_i gmx_simd_srli_di
370 # define gmx_simd_and_i gmx_simd_and_di
371 # define gmx_simd_andnot_i gmx_simd_andnot_di
372 # define gmx_simd_or_i gmx_simd_or_di
373 # define gmx_simd_xor_i gmx_simd_xor_di
374 # define gmx_simd_add_i gmx_simd_add_di
375 # define gmx_simd_sub_i gmx_simd_sub_di
376 # define gmx_simd_mul_i gmx_simd_mul_di
377 /* Double booleans and selection */
378 # define gmx_simd_bool_t gmx_simd_dbool_t
379 # define gmx_simd_cmpeq_r gmx_simd_cmpeq_d
380 # define gmx_simd_cmplt_r gmx_simd_cmplt_d
381 # define gmx_simd_cmple_r gmx_simd_cmple_d
382 # define gmx_simd_and_b gmx_simd_and_db
383 # define gmx_simd_or_b gmx_simd_or_db
384 # define gmx_simd_anytrue_b gmx_simd_anytrue_db
385 # define gmx_simd_blendzero_r gmx_simd_blendzero_d
386 # define gmx_simd_blendnotzero_r gmx_simd_blendnotzero_d
387 # define gmx_simd_blendv_r gmx_simd_blendv_d
388 # define gmx_simd_reduce_r gmx_simd_reduce_d
389 # define gmx_simd_ibool_t gmx_simd_dibool_t
390 # define gmx_simd_cmpeq_i gmx_simd_cmpeq_di
391 # define gmx_simd_cmplt_i gmx_simd_cmplt_di
392 # define gmx_simd_and_ib gmx_simd_and_dib
393 # define gmx_simd_or_ib gmx_simd_or_dib
394 # define gmx_simd_anytrue_ib gmx_simd_anytrue_dib
395 # define gmx_simd_blendzero_i gmx_simd_blendzero_di
396 # define gmx_simd_blendnotzero_i gmx_simd_blendnotzero_di
397 # define gmx_simd_blendv_i gmx_simd_blendv_di
398 /* Conversions between integer and double floating-point booleans */
399 # define gmx_simd_cvt_b2ib gmx_simd_cvt_db2dib
400 # define gmx_simd_cvt_ib2b gmx_simd_cvt_dib2db
402 /* SIMD4 double fp - we only support a subset of SIMD instructions for SIMD4 */
403 # define gmx_simd4_real_t gmx_simd4_double_t
404 # define gmx_simd4_load_r gmx_simd4_load_d
405 # define gmx_simd4_load1_r gmx_simd4_load1_d
406 # define gmx_simd4_set1_r gmx_simd4_set1_d
407 # define gmx_simd4_store_r gmx_simd4_store_d
408 # define gmx_simd4_loadu_r gmx_simd4_loadu_d
409 # define gmx_simd4_storeu_r gmx_simd4_storeu_d
410 # define gmx_simd4_setzero_r gmx_simd4_setzero_d
411 # define gmx_simd4_add_r gmx_simd4_add_d
412 # define gmx_simd4_sub_r gmx_simd4_sub_d
413 # define gmx_simd4_mul_r gmx_simd4_mul_d
414 # define gmx_simd4_fmadd_r gmx_simd4_fmadd_d
415 # define gmx_simd4_fmsub_r gmx_simd4_fmsub_d
416 # define gmx_simd4_fnmadd_r gmx_simd4_fnmadd_d
417 # define gmx_simd4_fnmsub_r gmx_simd4_fnmsub_d
418 # define gmx_simd4_and_r gmx_simd4_and_d
419 # define gmx_simd4_andnot_r gmx_simd4_andnot_d
420 # define gmx_simd4_or_r gmx_simd4_or_d
421 # define gmx_simd4_xor_r gmx_simd4_xor_d
422 # define gmx_simd4_rsqrt_r gmx_simd4_rsqrt_d
423 # define gmx_simd4_fabs_r gmx_simd4_fabs_d
424 # define gmx_simd4_fneg_r gmx_simd4_fneg_d
425 # define gmx_simd4_max_r gmx_simd4_max_d
426 # define gmx_simd4_min_r gmx_simd4_min_d
427 # define gmx_simd4_round_r gmx_simd4_round_d
428 # define gmx_simd4_trunc_r gmx_simd4_trunc_d
429 # define gmx_simd4_dotproduct3_r gmx_simd4_dotproduct3_d
430 # define gmx_simd4_bool_t gmx_simd4_dbool_t
431 # define gmx_simd4_cmpeq_r gmx_simd4_cmpeq_d
432 # define gmx_simd4_cmplt_r gmx_simd4_cmplt_d
433 # define gmx_simd4_cmple_r gmx_simd4_cmple_d
434 # define gmx_simd4_and_b gmx_simd4_and_db
435 # define gmx_simd4_or_b gmx_simd4_or_db
436 # define gmx_simd4_anytrue_b gmx_simd4_anytrue_db
437 # define gmx_simd4_blendzero_r gmx_simd4_blendzero_d
438 # define gmx_simd4_blendnotzero_r gmx_simd4_blendnotzero_d
439 # define gmx_simd4_blendv_r gmx_simd4_blendv_d
440 # define gmx_simd4_reduce_r gmx_simd4_reduce_d
442 /* Memory allocation */
443 # define gmx_simd_align_r gmx_simd_align_d
444 # define gmx_simd_align_i gmx_simd_align_di
445 # define gmx_simd4_align_r gmx_simd4_align_d
447 # ifdef GMX_SIMD_HAVE_DOUBLE
448 # define GMX_SIMD_HAVE_REAL
449 # define GMX_SIMD_REAL_WIDTH GMX_SIMD_DOUBLE_WIDTH
451 # ifdef GMX_SIMD_HAVE_DINT32
452 # define GMX_SIMD_HAVE_INT32
453 # define GMX_SIMD_INT32_WIDTH GMX_SIMD_DINT32_WIDTH
455 # ifdef GMX_SIMD_HAVE_DINT32_EXTRACT
456 # define GMX_SIMD_HAVE_INT32_EXTRACT
458 # ifdef GMX_SIMD_HAVE_DINT32_LOGICAL
459 # define GMX_SIMD_HAVE_INT32_LOGICAL
461 # ifdef GMX_SIMD_HAVE_DINT32_ARITHMETICS
462 # define GMX_SIMD_HAVE_INT32_ARITHMETICS
464 # ifdef GMX_SIMD4_HAVE_DOUBLE
465 # define GMX_SIMD4_HAVE_REAL
468 #else /* GMX_DOUBLE */
470 /*! \name SIMD data types
472 * The actual storage of these types is implementation dependent. The
473 * documentation is generated from the reference implementation, but for
474 * normal usage this will likely not be what you are using.
477 /*! \brief Real precision floating-point SIMD datatype.
479 * This type is only available if \ref GMX_SIMD_HAVE_REAL is defined.
481 * If GMX_DOUBLE is defined, this will be set to \ref gmx_simd_double_t
482 * internally, otherwise \ref gmx_simd_float_t.
484 # define gmx_simd_real_t gmx_simd_float_t
486 /*! \brief 32-bit integer SIMD type.
488 * This type is only available if \ref GMX_SIMD_HAVE_INT32 is defined.
490 * If GMX_DOUBLE is defined, this will be set to \ref gmx_simd_dint32_t
491 * internally, otherwise \ref gmx_simd_fint32_t. This might seem a strange
492 * implementation detail, but it is because some SIMD implementations use
493 * different types/widths of integers registers when converting from
494 * double vs. single precision floating point. As long as you just use
495 * this type you will not have to worry about precision.
497 # define gmx_simd_int32_t gmx_simd_fint32_t
499 /*! \brief Boolean SIMD type for usage with \ref gmx_simd_real_t.
501 * This type is only available if \ref GMX_SIMD_HAVE_REAL is defined.
503 * If GMX_DOUBLE is defined, this will be set to \ref gmx_simd_dbool_t
504 * internally, otherwise \ref gmx_simd_fbool_t. This is necessary since some
505 * SIMD implementations use bitpatterns for marking truth, so single-
506 * vs. double precision booleans are not necessarily exchangable.
507 * As long as you just use this type you will not have to worry about precision.
509 * See \ref gmx_simd_ibool_t for an explanation of real vs. integer booleans.
511 # define gmx_simd_bool_t gmx_simd_fbool_t
513 /*! \brief Boolean SIMD type for usage with \ref gmx_simd_int32_t.
515 * This type is only available if \ref GMX_SIMD_HAVE_INT32 is defined.
517 * If GMX_DOUBLE is defined, this will be set to \ref gmx_simd_dibool_t
518 * internally, otherwise \ref gmx_simd_fibool_t. This is necessary since some
519 * SIMD implementations use bitpatterns for marking truth, so single-
520 * vs. double precision booleans are not necessarily exchangable, and while
521 * a double-precision boolean might be represented with a 64-bit mask, the
522 * corresponding integer might only use a 32-bit mask.
524 * We provide conversion routines for these cases, so the only thing you need to
525 * keep in mind is to use \ref gmx_simd_bool_t when working with
526 * \ref gmx_simd_real_t while you pick \ref gmx_simd_ibool_t when working with
527 * \ref gmx_simd_int32_t.
529 * To convert between them, use \ref gmx_simd_cvt_b2ib and \ref gmx_simd_cvt_ib2b.
531 # define gmx_simd_ibool_t gmx_simd_fibool_t
535 * \name SIMD load/store operations on gmx_simd_real_t
537 * \note Unaligned load/stores are only available when
538 * \ref GMX_SIMD_HAVE_LOADU and \ref GMX_SIMD_HAVE_STOREU are set, respectively.
542 /*! \brief Load \ref GMX_SIMD_REAL_WIDTH values from aligned memory to \ref gmx_simd_real_t
544 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_load_d,
545 * otherwise \ref gmx_simd_load_f.
547 * \copydetails gmx_simd_load_f
549 # define gmx_simd_load_r gmx_simd_load_f
551 /*! \brief Set all elements in \ref gmx_simd_real_t from single value in memory.
553 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_load1_d,
554 * otherwise \ref gmx_simd_load1_f.
556 * \copydetails gmx_simd_load1_f
558 # define gmx_simd_load1_r gmx_simd_load1_f
560 /*! \brief Set all elements in \ref gmx_simd_real_t from a scalar.
562 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_set1_d,
563 * otherwise \ref gmx_simd_set1_f.
565 * \copydetails gmx_simd_set1_f
567 # define gmx_simd_set1_r gmx_simd_set1_f
569 /*! \brief Store \ref GMX_SIMD_REAL_WIDTH values from \ref gmx_simd_real_t to aligned memory.
571 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_store_d,
572 * otherwise \ref gmx_simd_store_f.
574 * \copydetails gmx_simd_store_f
576 # define gmx_simd_store_r gmx_simd_store_f
578 /*! \brief Load \ref GMX_SIMD_REAL_WIDTH values from unaligned memory to \ref gmx_simd_real_t.
580 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_loadu_d,
581 * otherwise \ref gmx_simd_loadu_f.
583 * \copydetails gmx_simd_loadu_f
585 # define gmx_simd_loadu_r gmx_simd_loadu_f
587 /*! \brief Store \ref GMX_SIMD_REAL_WIDTH values from \ref gmx_simd_real_t to unaligned memory.
589 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_storeu_d,
590 * otherwise \ref gmx_simd_storeu_f.
592 * \copydetails gmx_simd_storeu_f
594 # define gmx_simd_storeu_r gmx_simd_storeu_f
596 /*! \brief Set all elements in \ref gmx_simd_real_t to 0.0.
598 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_setzero_d,
599 * otherwise \ref gmx_simd_setzero_f.
601 * \copydetails gmx_simd_setzero_f
603 # define gmx_simd_setzero_r gmx_simd_setzero_f
606 * \name SIMD load/store operations on gmx_simd_int32_t
608 * \note Unaligned load/stores are only available when
609 * \ref GMX_SIMD_HAVE_LOADU and \ref GMX_SIMD_HAVE_STOREU are set, respectively.
613 /*! \brief Load \ref GMX_SIMD_INT32_WIDTH values from aligned memory to \ref gmx_simd_int32_t .
615 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_load_di ,
616 * otherwise \ref gmx_simd_load_fi .
618 * \copydetails gmx_simd_load_fi
620 # define gmx_simd_load_i gmx_simd_load_fi
622 /*! \brief Set all elements in \ref gmx_simd_int32_t from a single integer.
624 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_set1_di ,
625 * otherwise \ref gmx_simd_set1_fi .
627 * \copydetails gmx_simd_set1_fi
629 # define gmx_simd_set1_i gmx_simd_set1_fi
631 /*! \brief Store \ref GMX_SIMD_REAL_WIDTH values from \ref gmx_simd_int32_t to aligned memory.
633 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_store_di ,
634 * otherwise \ref gmx_simd_store_fi .
636 * \copydetails gmx_simd_store_fi
638 # define gmx_simd_store_i gmx_simd_store_fi
640 /*! \brief Load \ref GMX_SIMD_REAL_WIDTH values from unaligned memory to \ref gmx_simd_int32_t.
642 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_loadu_di ,
643 * otherwise \ref gmx_simd_loadu_fi .
645 * \copydetails gmx_simd_loadu_fi
647 # define gmx_simd_loadu_i gmx_simd_loadu_fi
649 /*! \brief Store \ref GMX_SIMD_REAL_WIDTH values from \ref gmx_simd_int32_t to unaligned memory.
651 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_storeu_di ,
652 * otherwise \ref gmx_simd_storeu_fi .
654 * \copydetails gmx_simd_storeu_fi
656 # define gmx_simd_storeu_i gmx_simd_storeu_fi
658 /*! \brief Extract single integer from \ref gmx_simd_int32_t element.
660 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_extract_di ,
661 * otherwise \ref gmx_simd_extract_fi .
663 * \copydetails gmx_simd_extract_fi
665 # define gmx_simd_extract_i gmx_simd_extract_fi
667 /*! \brief Set all elements in \ref gmx_simd_int32_t to 0.
669 * If GMX_DOUBLE is defined, it will be aliased to \ref gmx_simd_setzero_di ,
670 * otherwise \ref gmx_simd_setzero_fi .
672 * \copydetails gmx_simd_setzero_fi
674 # define gmx_simd_setzero_i gmx_simd_setzero_fi
678 * \name SIMD floating-point logical operations on gmx_simd_real_t
680 * These instructions are available if \ref GMX_SIMD_HAVE_LOGICAL is defined.
684 /*! \brief Bitwise \a and on two \ref gmx_simd_real_t.
686 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_and_d,
687 * otherwise \ref gmx_simd_and_f.
689 * \copydetails gmx_simd_and_f
691 # define gmx_simd_and_r gmx_simd_and_f
693 /*! \brief Bitwise \a and-not on two \ref gmx_simd_real_t; 1st arg is complemented.
695 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_andnot_d,
696 * otherwise \ref gmx_simd_andnot_f.
698 * \copydetails gmx_simd_andnot_f
700 # define gmx_simd_andnot_r gmx_simd_andnot_f
702 /*! \brief Bitwise \a or on two \ref gmx_simd_real_t.
704 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_or_d,
705 * otherwise \ref gmx_simd_or_f.
707 * \copydetails gmx_simd_or_f
709 # define gmx_simd_or_r gmx_simd_or_f
711 /*! \brief Bitwise \a exclusive-or on two \ref gmx_simd_real_t.
713 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_xor_d,
714 * otherwise \ref gmx_simd_xor_f.
716 * \copydetails gmx_simd_xor_f
718 # define gmx_simd_xor_r gmx_simd_xor_f
721 * \name SIMD floating-point arithmetic operations on gmx_simd_real_t
725 /*! \brief SIMD a+b for two \ref gmx_simd_real_t.
727 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_add_d,
728 * otherwise \ref gmx_simd_add_f.
730 * \copydetails gmx_simd_add_f
732 # define gmx_simd_add_r gmx_simd_add_f
734 /*! \brief SIMD a-b for two \ref gmx_simd_real_t.
736 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_sub_d,
737 * otherwise \ref gmx_simd_sub_f.
739 * \copydetails gmx_simd_sub_f
741 # define gmx_simd_sub_r gmx_simd_sub_f
743 /*! \brief SIMD a*b for two \ref gmx_simd_real_t.
745 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_mul_d,
746 * otherwise \ref gmx_simd_mul_f.
748 * \copydetails gmx_simd_mul_f
750 # define gmx_simd_mul_r gmx_simd_mul_f
752 /*! \brief SIMD a*b+c for three \ref gmx_simd_real_t.
754 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fmadd_d,
755 * otherwise \ref gmx_simd_fmadd_f.
757 * \copydetails gmx_simd_fmadd_f
759 # define gmx_simd_fmadd_r gmx_simd_fmadd_f
761 /*! \brief SIMD a*b-c for three \ref gmx_simd_real_t.
763 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fmsub_d,
764 * otherwise \ref gmx_simd_fmsub_f.
766 * \copydetails gmx_simd_fmsub_f
768 # define gmx_simd_fmsub_r gmx_simd_fmsub_f
770 /*! \brief SIMD -a*b+c for three \ref gmx_simd_real_t.
772 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fnmadd_d,
773 * otherwise \ref gmx_simd_fnmadd_f.
775 * \copydetails gmx_simd_fnmadd_f
777 # define gmx_simd_fnmadd_r gmx_simd_fnmadd_f
779 /*! \brief SIMD -a*b-c for three \ref gmx_simd_real_t.
781 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fnmsub_d,
782 * otherwise \ref gmx_simd_fnmsub_f.
784 * \copydetails gmx_simd_fnmsub_f
786 # define gmx_simd_fnmsub_r gmx_simd_fnmsub_f
788 /*! \brief SIMD table lookup for 1/sqrt(x) approximation.
790 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_rsqrt_d,
791 * otherwise \ref gmx_simd_rsqrt_f.
793 * \copydetails gmx_simd_rsqrt_f
795 # define gmx_simd_rsqrt_r gmx_simd_rsqrt_f
797 /*! \brief SIMD table lookup for 1/x approximation.
799 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_rcp_d,
800 * otherwise \ref gmx_simd_rcp_f.
802 * \copydetails gmx_simd_rcp_f
804 # define gmx_simd_rcp_r gmx_simd_rcp_f
806 /*! \brief SIMD fabs(x) for \ref gmx_simd_real_t.
808 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fabs_d,
809 * otherwise \ref gmx_simd_fabs_f.
811 * \copydetails gmx_simd_fabs_f
813 # define gmx_simd_fabs_r gmx_simd_fabs_f
815 /*! \brief SIMD -x for \ref gmx_simd_real_t.
817 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fneg_d,
818 * otherwise \ref gmx_simd_fneg_f.
820 * \copydetails gmx_simd_fneg_f
822 # define gmx_simd_fneg_r gmx_simd_fneg_f
824 /*! \brief SIMD max(a,b) for each element in \ref gmx_simd_real_t.
826 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_max_d,
827 * otherwise \ref gmx_simd_max_f.
829 * \copydetails gmx_simd_max_f
831 # define gmx_simd_max_r gmx_simd_max_f
833 /*! \brief SIMD min(a,b) for each element in \ref gmx_simd_real_t.
835 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_min_d,
836 * otherwise \ref gmx_simd_min_f.
838 * \copydetails gmx_simd_min_f
840 # define gmx_simd_min_r gmx_simd_min_f
842 /*! \brief Round \ref gmx_simd_real_t to nearest int, return \ref gmx_simd_real_t.
844 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_round_d,
845 * otherwise \ref gmx_simd_round_f.
847 * \copydetails gmx_simd_round_f
849 # define gmx_simd_round_r gmx_simd_round_f
851 /*! \brief Truncate \ref gmx_simd_real_t towards 0, return \ref gmx_simd_real_t.
853 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_trunc_d,
854 * otherwise \ref gmx_simd_trunc_f.
856 * \copydetails gmx_simd_trunc_f
858 # define gmx_simd_trunc_r gmx_simd_trunc_f
860 /*! \brief SIMD Fraction, i.e. x-trunc(x) for \ref gmx_simd_real_t.
862 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fraction_d,
863 * otherwise \ref gmx_simd_fraction_f.
865 * \copydetails gmx_simd_fraction_f
867 # define gmx_simd_fraction_r gmx_simd_fraction_f
869 /*! \brief Return the FP exponent of a SIMD \ref gmx_simd_real_t as a \ref gmx_simd_real_t.
871 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_get_exponent_d,
872 * otherwise \ref gmx_simd_get_exponent_f.
874 * \copydetails gmx_simd_exponent_f
876 # define gmx_simd_get_exponent_r gmx_simd_get_exponent_f
878 /*! \brief Return the FP mantissa of a SIMD \ref gmx_simd_real_t as a \ref gmx_simd_real_t.
880 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_get_mantissa_d,
881 * otherwise \ref gmx_simd_get_mantissa_f.
883 * \copydetails gmx_simd_mantissa_f
885 # define gmx_simd_get_mantissa_r gmx_simd_get_mantissa_f
887 /*! \brief Set the exponent of a SIMD \ref gmx_simd_real_t from a \ref gmx_simd_real_t.
889 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_set_exponent_d,
890 * otherwise \ref gmx_simd_set_exponent_f.
892 * \copydetails gmx_simd_set_exponent_f
894 # define gmx_simd_set_exponent_r gmx_simd_set_exponent_f
897 * \name SIMD comparison, boolean, and select operations for gmx_simd_real_t
901 /*! \brief SIMD a==b for \ref gmx_simd_real_t. Returns a \ref gmx_simd_bool_t.
903 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cmpeq_d,
904 * otherwise \ref gmx_simd_cmpeq_f.
906 * \copydetails gmx_simd_cmpeq_f
908 # define gmx_simd_cmpeq_r gmx_simd_cmpeq_f
910 /*! \brief SIMD a<b for \ref gmx_simd_real_t. Returns a \ref gmx_simd_bool_t.
912 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cmplt_d,
913 * otherwise \ref gmx_simd_cmplt_f.
915 * \copydetails gmx_simd_cmplt_f
917 # define gmx_simd_cmplt_r gmx_simd_cmplt_f
919 /*! \brief SIMD a<=b for \ref gmx_simd_real_t. Returns a \ref gmx_simd_bool_t.
921 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cmple_d,
922 * otherwise \ref gmx_simd_cmple_f.
924 * \copydetails gmx_simd_cmple_f
926 # define gmx_simd_cmple_r gmx_simd_cmple_f
928 /*! \brief For each element, the result boolean is true if both arguments are true
930 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_and_db,
931 * otherwise \ref gmx_simd_and_fb.
933 * \copydetails gmx_simd_and_fb
935 # define gmx_simd_and_b gmx_simd_and_fb
937 /*! \brief For each element, the result boolean is true if either argument is true
939 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_or_db,
940 * otherwise \ref gmx_simd_or_fb.
942 * \copydetails gmx_simd_or_fn
944 # define gmx_simd_or_b gmx_simd_or_fb
946 /*! \brief Return nonzero if any element in gmx_simd_bool_t is true, otherwise 0.
948 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_anytrue_db,
949 * otherwise \ref gmx_simd_anytrue_fb.
951 * \copydetails gmx_simd_anytrue_fb
953 # define gmx_simd_anytrue_b gmx_simd_anytrue_fb
955 /*! \brief Selects elements from \ref gmx_simd_real_t where boolean is true, otherwise 0.
957 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendzero_d,
958 * otherwise \ref gmx_simd_blendzero_f.
960 * \copydetails gmx_simd_blendzero_f
962 * \sa gmx_simd_blendzero_i
964 # define gmx_simd_blendzero_r gmx_simd_blendzero_f
966 /*! \brief Selects elements from \ref gmx_simd_real_t where boolean is false, otherwise 0.
968 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendnotzero_d,
969 * otherwise \ref gmx_simd_blendnotzero_f.
971 * \copydetails gmx_simd_blendnotzero_f
973 # define gmx_simd_blendnotzero_r gmx_simd_blendnotzero_f
975 /*! \brief Selects from 2nd real SIMD arg where boolean is true, otherwise 1st arg.
977 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendv_d,
978 * otherwise \ref gmx_simd_blendv_f.
980 * \copydetails gmx_simd_blendv_f
982 # define gmx_simd_blendv_r gmx_simd_blendv_f
984 /*! \brief Return sum of all elements in SIMD floating-point variable.
986 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_reduce_d,
987 * otherwise \ref gmx_simd_reduce_f.
989 * \copydetails gmx_simd_reduce_f
991 # define gmx_simd_reduce_r gmx_simd_reduce_f
994 * \name SIMD integer logical operations on gmx_simd_int32_t
996 * These instructions are available if \ref GMX_SIMD_HAVE_INT32_LOGICAL is defined.
1000 /*! \brief Shift each element in \ref gmx_simd_int32_t left by immediate
1002 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_slli_di,
1003 * otherwise \ref gmx_simd_slli_fi.
1005 * \copydetails gmx_simd_slli_fi
1007 # define gmx_simd_slli_i gmx_simd_slli_fi
1009 /*! \brief Shift each element in \ref gmx_simd_int32_t right by immediate
1011 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_srli_di,
1012 * otherwise \ref gmx_simd_srli_fi.
1014 * \copydetails gmx_simd_srli_fi
1016 # define gmx_simd_srli_i gmx_simd_srli_fi
1018 /*! \brief Bitwise \a and on two \ref gmx_simd_int32_t.
1020 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_and_di,
1021 * otherwise \ref gmx_simd_and_fi.
1023 * \copydetails gmx_simd_and_fi
1025 # define gmx_simd_and_i gmx_simd_and_fi
1027 /*! \brief Bitwise \a and-not on two \ref gmx_simd_int32_t; 1st arg is complemented.
1029 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_andnot_di,
1030 * otherwise \ref gmx_simd_andnot_fi.
1032 * \copydetails gmx_simd_andnot_fi
1034 # define gmx_simd_andnot_i gmx_simd_andnot_fi
1036 /*! \brief Bitwise \a or on two \ref gmx_simd_int32_t.
1038 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_or_di,
1039 * otherwise \ref gmx_simd_or_fi.
1041 * \copydetails gmx_simd_or_fi
1043 # define gmx_simd_or_i gmx_simd_or_fi
1045 /*! \brief Bitwise \a xor on two \ref gmx_simd_int32_t.
1047 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_xor_di,
1048 * otherwise \ref gmx_simd_xor_fi.
1050 * \copydetails gmx_simd_xor_fi
1052 # define gmx_simd_xor_i gmx_simd_xor_fi
1055 * \name SIMD integer arithmetic operations on gmx_simd_int32_t
1057 * These instructions are available if \ref GMX_SIMD_HAVE_INT32_ARITHMETICS is defined.
1061 /*! \brief SIMD a+b for two \ref gmx_simd_int32_t.
1063 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_add_di,
1064 * otherwise \ref gmx_simd_add_fi.
1066 * \copydetails gmx_simd_add_fi
1068 # define gmx_simd_add_i gmx_simd_add_fi
1070 /*! \brief SIMD a-b for two \ref gmx_simd_int32_t.
1072 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_sub_di,
1073 * otherwise \ref gmx_simd_sub_fi.
1075 * \copydetails gmx_simd_sub_fi
1077 # define gmx_simd_sub_i gmx_simd_sub_fi
1079 /*! \brief SIMD a*b for two \ref gmx_simd_int32_t.
1081 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_mul_di,
1082 * otherwise \ref gmx_simd_mul_fi.
1084 * \copydetails gmx_simd_mul_fi
1086 # define gmx_simd_mul_i gmx_simd_mul_fi
1089 * \name SIMD integer comparison, booleans, and selection on gmx_simd_int32_t
1091 * These instructions are available if \ref GMX_SIMD_HAVE_INT32_ARITHMETICS is defined.
1095 /*! \brief Returns boolean describing whether a==b, for \ref gmx_simd_int32_t
1097 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cmpeq_di,
1098 * otherwise \ref gmx_simd_cmpeq_fi.
1100 * \copydetails gmx_simd_cmpeq_fi
1102 # define gmx_simd_cmpeq_i gmx_simd_cmpeq_fi
1104 /*! \brief Returns boolean describing whether a<b, for \ref gmx_simd_int32_t
1106 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cmplt_di,
1107 * otherwise \ref gmx_simd_cmplt_fi.
1109 * \copydetails gmx_simd_cmplt_fi
1111 # define gmx_simd_cmplt_i gmx_simd_cmplt_fi
1113 /*! \brief For each element, the result boolean is true if both arguments are true
1115 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_and_dib,
1116 * otherwise \ref gmx_simd_and_fib.
1118 * \copydetails gmx_simd_and_fib
1120 # define gmx_simd_and_ib gmx_simd_and_fib
1122 /*! \brief For each element, the result boolean is true if either argument is true.
1124 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_or_dib,
1125 * otherwise \ref gmx_simd_or_fib.
1127 * \copydetails gmx_simd_or_fib
1129 # define gmx_simd_or_ib gmx_simd_or_fib
1131 /*! \brief Return nonzero if any element in gmx_simd_ibool_t is true, otherwise 0.
1133 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_anytrue_dib,
1134 * otherwise \ref gmx_simd_anytrue_fib.
1136 * \copydetails gmx_simd_anytrue_fib
1138 # define gmx_simd_anytrue_ib gmx_simd_anytrue_fib
1140 /*! \brief Selects elements from \ref gmx_simd_int32_t where boolean is true, otherwise 0.
1142 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendzero_di,
1143 * otherwise \ref gmx_simd_blendzero_fi.
1145 * \copydetails gmx_simd_blendzero_fi
1147 # define gmx_simd_blendzero_i gmx_simd_blendzero_fi
1149 /*! \brief Selects elements from \ref gmx_simd_int32_t where boolean is false, otherwise 0.
1151 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendnotzero_di,
1152 * otherwise \ref gmx_simd_blendnotzero_fi.
1154 * \copydetails gmx_simd_blendnotzero_fi
1156 # define gmx_simd_blendnotzero_i gmx_simd_blendnotzero_fi
1158 /*! \brief Selects from 2nd int SIMD arg where boolean is true, otherwise 1st arg.
1160 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendv_di,
1161 * otherwise \ref gmx_simd_blendv_fi.
1163 * \copydetails gmx_simd_blendv_fi
1165 # define gmx_simd_blendv_i gmx_simd_blendv_fi
1168 * \name SIMD conversion operations
1170 * These instructions are available when both types involved in the conversion
1171 * are defined, e.g. \ref GMX_SIMD_HAVE_REAL and \ref GMX_SIMD_HAVE_INT32
1172 * for real-to-integer conversion.
1176 /*! \brief Convert gmx_simd_real_t to gmx_simd_int32_t, round to nearest integer.
1178 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cvt_d2i,
1179 * otherwise \ref gmx_simd_cvt_f2i.
1181 * \copydetails gmx_simd_cvt_f2i
1183 # define gmx_simd_cvt_r2i gmx_simd_cvt_f2i
1185 /*! \brief Convert gmx_simd_real_t to gmx_simd_int32_t, truncate towards zero
1187 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cvtt_d2i,
1188 * otherwise \ref gmx_simd_cvtt_f2i.
1190 * \copydetails gmx_simd_cvtt_f2i
1192 # define gmx_simd_cvtt_r2i gmx_simd_cvtt_f2i
1194 /*! \brief Convert gmx_simd_int32_t to gmx_simd_real_t
1196 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cvt_i2d,
1197 * otherwise \ref gmx_simd_cvt_i2f.
1199 * \copydetails gmx_simd_cvt_i2f
1201 # define gmx_simd_cvt_i2r gmx_simd_cvt_i2f
1203 /*! \brief Convert from gmx_simd_bool_t to gmx_simd_ibool_t
1205 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cvt_db2dib,
1206 * otherwise \ref gmx_simd_cvt_fb2fib.
1208 * \copydetails gmx_simd_cvt_fb2fib
1210 # define gmx_simd_cvt_b2ib gmx_simd_cvt_fb2fib
1212 /*! \brief Convert from gmx_simd_ibool_t to gmx_simd_bool_t
1214 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cvt_dib2db,
1215 * otherwise \ref gmx_simd_cvt_fib2fb.
1217 * \copydetails gmx_simd_cvt_fib2fb
1219 # define gmx_simd_cvt_ib2b gmx_simd_cvt_fib2fb
1223 * \name SIMD memory alignment operations
1227 /*! \brief Align real memory for SIMD usage.
1229 * This routine will only align memory if \ref GMX_SIMD_HAVE_REAL is defined.
1230 * Otherwise the original pointer will be returned.
1232 * Start by allocating an extra \ref GMX_SIMD_REAL_WIDTH float elements of memory,
1233 * and then call this function. The returned pointer will be greater or equal
1234 * to the one you provided, and point to an address inside your provided memory
1235 * that is aligned to the SIMD width.
1237 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_align_d,
1238 * otherwise \ref gmx_simd_align_f. For detailed documentation, see the
1239 * precision-specific implementation routines.
1241 # define gmx_simd_align_r gmx_simd_align_f
1243 /*! \brief Align integer memory for SIMD usage.
1245 * This routine will only align memory if \ref GMX_SIMD_HAVE_INT32 is defined.
1246 * Otherwise the original pointer will be returned.
1248 * Start by allocating an extra \ref GMX_SIMD_INT32_WIDTH elements of memory,
1249 * and then call this function. The returned pointer will be greater or equal
1250 * to the one you provided, and point to an address inside your provided memory
1251 * that is aligned to the SIMD width.
1253 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_align_di,
1254 * otherwise \ref gmx_simd_align_fi. For detailed documentation, see the
1255 * precision-specific implementation routines.
1257 # define gmx_simd_align_i gmx_simd_align_fi
1261 /*! \name SIMD4 - constant width-four SIMD datatypes
1263 * These operations are only meant to be used for a few coordinate
1264 * manipulation and grid interpolation routines, so we only support a subset
1265 * of operations for SIMD4. To avoid repeating all the documentation from
1266 * the generic width SIMD routines, we only provide brief documentation for
1267 * these operations. Follow the link to the implementation documentation or the
1268 * reference to the corresponding generic SIMD routine. The format will be
1269 * exactly the same, but they have SIMD replaced with SIMD4.
1273 /*! \brief SIMD real datatype guaranteed to be 4 elements wide, if available.
1275 * All the SIMD4 datatypes and operations behave like their counterparts for
1276 * the generic SIMD implementation, but they might be implemented with different
1277 * registers, or not supported at all. It is important that you check the
1278 * define \ref GMX_SIMD4_HAVE_REAL before using it.
1280 * Just as the normal SIMD operations, all SIMD4 types and routines will
1281 * be aliased to either single or double precision ones based on whether
1282 * GMX_DOUBLE is defined.
1284 * \note There is no support for integer or math operations in SIMD4.
1286 # define gmx_simd4_real_t gmx_simd4_float_t
1288 /*! \brief Boolean for \ref gmx_simd4_real_t comparision/selection */
1289 # define gmx_simd4_bool_t gmx_simd4_fbool_t
1291 /*! \brief Load aligned data to gmx_simd4_real_t.
1293 * \copydetails gmx_simd4_load_f
1295 # define gmx_simd4_load_r gmx_simd4_load_f
1297 /*! \brief Load single element to gmx_simd4_real_t
1299 * \copydetails gmx_simd4_load1_f
1301 # define gmx_simd4_load1_r gmx_simd4_load1_f
1303 /*! \brief Set gmx_simd4_real_t from scalar value
1305 * \copydetails gmx_simd4_set1_f
1307 # define gmx_simd4_set1_r gmx_simd4_set1_f
1309 /*! \brief store aligned data from gmx_simd4_real_t
1311 * \copydetails gmx_simd4_store_f
1313 # define gmx_simd4_store_r gmx_simd4_store_f
1315 /*! \brief Load unaligned data to gmx_simd4_real_t
1317 * \copydetails gmx_simd4_loadu_f
1319 # define gmx_simd4_loadu_r gmx_simd4_loadu_f
1321 /*! \brief Store unaligned data from gmx_simd4_real_t
1323 * \copydetails gmx_simd4_storeu_f
1325 # define gmx_simd4_storeu_r gmx_simd4_storeu_f
1327 /*! \brief Set all elements in gmx_simd4_real_t to 0.0
1329 * \copydetails gmx_simd4_setzero_f
1331 # define gmx_simd4_setzero_r gmx_simd4_setzero_f
1333 /*! \brief Bitwise and for two gmx_simd4_real_t
1335 * \copydetails gmx_simd4_and_f
1337 # define gmx_simd4_and_r gmx_simd4_and_f
1339 /*! \brief Bitwise and-not for two gmx_simd4_real_t. 1st arg is complemented.
1341 * \copydetails gmx_simd4_andnot_f
1343 # define gmx_simd4_andnot_r gmx_simd4_andnot_f
1345 /*! \brief Bitwise or for two gmx_simd4_real_t
1347 * \copydetails gmx_simd4_or_f
1349 # define gmx_simd4_or_r gmx_simd4_or_f
1351 /*! \brief Bitwise xor for two gmx_simd4_real_t
1353 * \copydetails gmx_simd4_xor_f
1355 # define gmx_simd4_xor_r gmx_simd4_xor_f
1357 /*! \brief a+b for \ref gmx_simd4_real_t
1359 * \copydetails gmx_simd4_add_f
1361 # define gmx_simd4_add_r gmx_simd4_add_f
1363 /*! \brief a-b for \ref gmx_simd4_real_t
1365 * \copydetails gmx_simd4_sub_f
1367 # define gmx_simd4_sub_r gmx_simd4_sub_f
1369 /*! \brief a*b for \ref gmx_simd4_real_t
1371 * \copydetails gmx_simd4_mul_f
1373 # define gmx_simd4_mul_r gmx_simd4_mul_f
1375 /*! \brief a*b+c for \ref gmx_simd4_real_t
1377 * \copydetails gmx_simd4_fmadd_f
1379 # define gmx_simd4_fmadd_r gmx_simd4_fmadd_f
1381 /*! \brief a*b-c for \ref gmx_simd4_real_t
1383 * \copydetails gmx_simd4_fmsub_f
1385 # define gmx_simd4_fmsub_r gmx_simd4_fmsub_f
1387 /*! \brief -a*b+c for \ref gmx_simd4_real_t
1389 * \copydetails gmx_simd4_fnmadd_f
1391 # define gmx_simd4_fnmadd_r gmx_simd4_fnmadd_f
1393 /*! \brief -a*b-c for \ref gmx_simd4_real_t
1395 * \copydetails gmx_simd4_fnmsub_f
1397 # define gmx_simd4_fnmsub_r gmx_simd4_fnmsub_f
1399 /*! \brief 1/sqrt(x) approximate lookup for \ref gmx_simd4_real_t
1401 * \copydetails gmx_simd4_rsqrt_f
1403 # define gmx_simd4_rsqrt_r gmx_simd4_rsqrt_f
1405 /*! \brief fabs(x) for \ref gmx_simd4_real_t
1407 * \copydetails gmx_simd4_fabs_f
1409 # define gmx_simd4_fabs_r gmx_simd4_fabs_f
1411 /*! \brief Change sign (-x) for \ref gmx_simd4_real_t
1413 * \copydetails gmx_simd4_fneg_f
1415 # define gmx_simd4_fneg_r gmx_simd4_fneg_f
1417 /*! \brief Select maximum of each pair of elements from args for \ref gmx_simd4_real_t
1419 * \copydetails gmx_simd4_max_f
1421 # define gmx_simd4_max_r gmx_simd4_max_f
1423 /*! \brief Select minimum of each pair of elements from args for \ref gmx_simd4_real_t
1425 * \copydetails gmx_simd4_min_f
1427 # define gmx_simd4_min_r gmx_simd4_min_f
1429 /*! \brief Round \ref gmx_simd4_real_t to nearest integer, return \ref gmx_simd4_real_t
1431 * \copydetails gmx_simd4_round_f
1433 # define gmx_simd4_round_r gmx_simd4_round_f
1435 /*! \brief Truncate \ref gmx_simd4_real_t towards zero, return \ref gmx_simd4_real_t
1437 * \copydetails gmx_simd4_trunc_f
1439 # define gmx_simd4_trunc_r gmx_simd4_trunc_f
1441 /*! \brief Scalar product of first three elements of two \ref gmx_simd4_real_t *
1443 * \copydetails gmx_simd4_dotproduct3_f
1445 # define gmx_simd4_dotproduct3_r gmx_simd4_dotproduct3_f
1447 /*! \brief Return booleans whether a==b for each element two \ref gmx_simd4_real_t
1449 * \copydetails gmx_simd4_cmpeq_f
1451 # define gmx_simd4_cmpeq_r gmx_simd4_cmpeq_f
1452 /*! \brief Return booleans whether a<b for each element two \ref gmx_simd4_real_t
1454 * \copydetails gmx_simd4_cmplt_f
1456 # define gmx_simd4_cmplt_r gmx_simd4_cmplt_f
1457 /*! \brief Return booleans whether a<=b for each element two \ref gmx_simd4_real_t
1459 * \copydetails gmx_simd4_cmple_f
1461 # define gmx_simd4_cmple_r gmx_simd4_cmple_f
1463 /*! \brief Logical and for two \ref gmx_simd4_bool_t
1465 * \copydetails gmx_simd4_and_fb
1467 # define gmx_simd4_and_b gmx_simd4_and_fb
1468 /*! \brief Logical or for two \ref gmx_simd4_bool_t
1470 * \copydetails gmx_simd4_or_fb
1472 # define gmx_simd4_or_b gmx_simd4_or_fb
1474 /*! \brief Return nonzero if any element in \ref gmx_simd4_bool_t is true, otherwise 0
1476 * \copydetails gmx_simd4_anytrue_fb
1478 # define gmx_simd4_anytrue_b gmx_simd4_anytrue_fb
1480 /*! \brief Selects from 2nd real SIMD4 arg where boolean is true, otherwise 1st arg
1482 * \copydetails gmx_simd4_blendzero_f
1484 # define gmx_simd4_blendzero_r gmx_simd4_blendzero_f
1486 /*! \brief Selects from 2nd real SIMD4 arg where boolean is false, otherwise 1st arg
1488 * \copydetails gmx_simd4_blendnotzero_f
1490 # define gmx_simd4_blendnotzero_r gmx_simd4_blendnotzero_f
1492 /*! \brief Selects from 2nd real SIMD4 arg where boolean is true, otherwise 1st arg
1494 * \copydetails gmx_simd4_blendv_f
1496 # define gmx_simd4_blendv_r gmx_simd4_blendv_f
1498 /*! \brief Return sum of all elements in SIMD4 floating-point variable.
1500 * \copydetails gmx_simd4_reduce_f
1502 # define gmx_simd4_reduce_r gmx_simd4_reduce_f
1504 /*! \brief Align real memory for SIMD4 usage.
1506 * \copydetails gmx_simd4_align_f
1508 # define gmx_simd4_align_r gmx_simd4_align_f
1512 /*! \name SIMD predefined macros to describe high-level capabilities
1516 # if (defined GMX_SIMD_HAVE_FLOAT) || (defined DOXYGEN)
1517 /*! \brief Defined if gmx_simd_real_t is available.
1519 * if GMX_DOUBLE is defined, this will be aliased to
1520 * \ref GMX_SIMD_HAVE_DOUBLE, otherwise GMX_SIMD_HAVE_FLOAT.
1522 # define GMX_SIMD_HAVE_REAL
1523 /*! \brief Width of gmx_simd_real_t.
1525 * if GMX_DOUBLE is defined, this will be aliased to
1526 * \ref GMX_SIMD_DOUBLE_WIDTH, otherwise GMX_SIMD_FLOAT_WIDTH.
1528 # define GMX_SIMD_REAL_WIDTH GMX_SIMD_FLOAT_WIDTH
1530 # if (defined GMX_SIMD_HAVE_FINT32) || (defined DOXYGEN)
1531 /*! \brief Defined if gmx_simd_int32_t is available.
1533 * if GMX_DOUBLE is defined, this will be aliased to
1534 * \ref GMX_SIMD_HAVE_DINT32, otherwise GMX_SIMD_HAVE_FINT32.
1536 # define GMX_SIMD_HAVE_INT32
1537 /*! \brief Width of gmx_simd_int32_t.
1539 * if GMX_DOUBLE is defined, this will be aliased to
1540 * \ref GMX_SIMD_DINT32_WIDTH, otherwise GMX_SIMD_FINT32_WIDTH.
1542 # define GMX_SIMD_INT32_WIDTH GMX_SIMD_FINT32_WIDTH
1544 # if (defined GMX_SIMD_HAVE_FINT32_EXTRACT) || (defined DOXYGEN)
1545 /*! \brief Defined if gmx_simd_extract_i() is available.
1547 * if GMX_DOUBLE is defined, this will be aliased to
1548 * \ref GMX_SIMD_HAVE_DINT32_EXTRACT, otherwise GMX_SIMD_HAVE_FINT32_EXTRACT.
1550 # define GMX_SIMD_HAVE_INT32_EXTRACT
1552 # if (defined GMX_SIMD_HAVE_FINT32_LOGICAL) || (defined DOXYGEN)
1553 /*! \brief Defined if logical ops are supported on gmx_simd_int32_t.
1555 * if GMX_DOUBLE is defined, this will be aliased to
1556 * \ref GMX_SIMD_HAVE_DINT32_LOGICAL, otherwise GMX_SIMD_HAVE_FINT32_LOGICAL.
1558 # define GMX_SIMD_HAVE_INT32_LOGICAL
1560 # if (defined GMX_SIMD_HAVE_FINT32_ARITHMETICS) || (defined DOXYGEN)
1561 /*! \brief Defined if arithmetic ops are supported on gmx_simd_int32_t.
1563 * if GMX_DOUBLE is defined, this will be aliased to
1564 * \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS, otherwise GMX_SIMD_HAVE_FINT32_ARITHMETICS.
1566 # define GMX_SIMD_HAVE_INT32_ARITHMETICS
1568 # if (defined GMX_SIMD4_HAVE_FLOAT) || (defined DOXYGEN)
1569 /*! \brief Defined if gmx_simd4_real_t is available.
1571 * if GMX_DOUBLE is defined, this will be aliased to
1572 * \ref GMX_SIMD4_HAVE_DOUBLE, otherwise GMX_SIMD4_HAVE_FLOAT.
1574 # define GMX_SIMD4_HAVE_REAL
1579 #endif /* GMX_DOUBLE */
1584 #endif /* GMX_SIMD_SIMD_H */