2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * \defgroup module_simd SIMD intrinsics interface (simd)
38 * \ingroup group_utilitymodules
40 * \brief Provides an architecture-independent way of doing SIMD coding.
42 * Overview of the SIMD implementation is provided in \ref page_simd.
43 * The details are documented in simd.h and the reference implementation
46 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
49 #ifndef GMX_SIMD_SIMD_H
50 #define GMX_SIMD_SIMD_H
52 /*! \libinternal \file
54 * \brief Definitions, capabilities, and wrappers for SIMD module.
56 * The macros in this file are intended to be used for writing
57 * architecture-independent SIMD intrinsics code.
58 * To support a new architecture, adding a new sub-include with macros here
59 * should be (nearly) all that is needed.
61 * The defines in this top-level file will set default Gromacs real precision
62 * operations to either single or double precision based on whether
63 * GMX_DOUBLE is defined. The actual implementation - including e.g.
64 * conversion operations specifically between single and double - is documented
65 * in impl_reference.h.
67 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
70 * \ingroup module_simd
77 #include "gromacs/utility/basedefinitions.h"
79 /* Forward declarations so memory allocation can be used in implementations */
80 static gmx_inline float * gmx_simd_align_f(float *p);
81 static gmx_inline double * gmx_simd_align_d(double *p);
82 static gmx_inline int * gmx_simd_align_fi(int *p);
83 static gmx_inline int * gmx_simd_align_di(int *p);
84 static gmx_inline float * gmx_simd4_align_f(float *p);
85 static gmx_inline double * gmx_simd4_align_d(double *p);
88 /*! \addtogroup module_simd */
91 /*! \name SIMD predefined macros to describe high-level capabilities
93 * These macros are used to describe the features available in default
94 * Gromacs real precision. They are set from the lower-level implementation
95 * files that have macros describing single and double precision individually,
96 * as well as the implementation details.
101 * GMX_SIMD indicates that some sort of SIMD support is present in software.
103 * It is disabled if no architecture, neither reference SIMD, has been selected.
108 /* Intel MIC is a bit special since it is a co-processor. This means the rest
109 * of GROMACS (which runs on the CPU) should use a default SIMD set like AVX,
110 * while the part running on the coprocessor defines __MIC__. All functions in
111 * this SIMD module are static, so it will work perfectly fine to include this
112 * file with different SIMD definitions for different files.
115 # include "impl_intel_mic/impl_intel_mic.h"
116 #elif defined GMX_SIMD_X86_AVX2_256
117 # include "impl_x86_avx2_256/impl_x86_avx2_256.h"
118 #elif defined GMX_SIMD_X86_AVX_256
119 # include "impl_x86_avx_256/impl_x86_avx_256.h"
120 #elif defined GMX_SIMD_X86_AVX_128_FMA
121 # include "impl_x86_avx_128_fma/impl_x86_avx_128_fma.h"
122 #elif defined GMX_SIMD_X86_SSE4_1
123 # include "impl_x86_sse4_1/impl_x86_sse4_1.h"
124 #elif defined GMX_SIMD_X86_SSE2
125 # include "impl_x86_sse2/impl_x86_sse2.h"
126 #elif defined GMX_SIMD_ARM_NEON
127 # include "impl_arm_neon/impl_arm_neon.h"
128 #elif defined GMX_SIMD_ARM_NEON_ASIMD
129 # include "impl_arm_neon_asimd/impl_arm_neon_asimd.h"
130 #elif defined GMX_SIMD_IBM_QPX
131 # include "impl_ibm_qpx/impl_ibm_qpx.h"
132 #elif defined GMX_SIMD_IBM_VMX
133 # include "impl_ibm_vmx/impl_ibm_vmx.h"
134 #elif defined GMX_SIMD_SPARC64_HPC_ACE
135 # include "impl_sparc64_hpc_ace/impl_sparc64_hpc_ace.h"
136 #elif (defined GMX_SIMD_REFERENCE) || (defined DOXYGEN)
137 /* Plain C SIMD reference implementation, also serves as documentation.
138 * For now this code path will also be taken for Sparc64_HPC_ACE since we have
139 * not yet added the verlet kernel extensions there. The group kernels do not
140 * depend on this file, so they will still be accelerated with SIMD.
142 # include "impl_reference/impl_reference.h"
144 /* Turn off the GMX_SIMD flag if we do not even have reference support */
149 * SIMD4 width is always 4, but use this for clarity in definitions.
151 * It improves code readability to allocate e.g. 2*GMX_SIMD4_WIDTH instead of 8.
153 #define GMX_SIMD4_WIDTH 4
157 /*! \name SIMD memory alignment operations
162 * Align a float pointer for usage with SIMD instructions.
164 * You should typically \a not call this function directly (unless you explicitly
165 * want single precision even when GMX_DOUBLE is set), but use the
166 * \ref gmx_simd_align_r macro to align memory in default Gromacs real precision.
168 * \param p Pointer to memory, allocate at least \ref GMX_SIMD_FLOAT_WIDTH extra elements.
170 * \return Aligned pointer (>=p) suitable for loading/storing float fp SIMD.
171 * If \ref GMX_SIMD_HAVE_FLOAT is not set, p will be returned unchanged.
173 * Start by allocating an extra \ref GMX_SIMD_FLOAT_WIDTH float elements of memory,
174 * and then call this function. The returned pointer will be greater or equal
175 * to the one you provided, and point to an address inside your provided memory
176 * that is aligned to the SIMD width.
178 static gmx_inline float *
179 gmx_simd_align_f(float *p)
181 # ifdef GMX_SIMD_HAVE_FLOAT
182 return (float *)(((size_t)((p)+GMX_SIMD_FLOAT_WIDTH-1)) & (~((size_t)(GMX_SIMD_FLOAT_WIDTH*sizeof(float)-1))));
189 * Align a double pointer for usage with SIMD instructions.
191 * You should typically \a not call this function directly (unless you explicitly
192 * want double precision even when GMX_DOUBLE is not set), but use the
193 * \ref gmx_simd_align_r macro to align memory in default Gromacs real precision.
195 * \param p Pointer to memory, allocate at least \ref GMX_SIMD_DOUBLE_WIDTH extra elements.
197 * \return Aligned pointer (>=p) suitable for loading/storing double fp SIMD.
198 * If \ref GMX_SIMD_HAVE_DOUBLE is not set, p will be returned unchanged.
200 * Start by allocating an extra \ref GMX_SIMD_DOUBLE_WIDTH double elements of memory,
201 * and then call this function. The returned pointer will be greater or equal
202 * to the one you provided, and point to an address inside your provided memory
203 * that is aligned to the SIMD width.
205 static gmx_inline double *
206 gmx_simd_align_d(double *p)
208 # ifdef GMX_SIMD_HAVE_DOUBLE
209 return (double *)(((size_t)((p)+GMX_SIMD_DOUBLE_WIDTH-1)) & (~((size_t)(GMX_SIMD_DOUBLE_WIDTH*sizeof(double)-1))));
216 * Align a (float) integer pointer for usage with SIMD instructions.
218 * You should typically \a not call this function directly (unless you explicitly
219 * want integers corresponding to single precision even when GMX_DOUBLE is
220 * set), but use the \ref gmx_simd_align_i macro to align integer memory
221 * corresponding to Gromacs default floating-point precision.
223 * \param p Pointer to memory, allocate at least \ref GMX_SIMD_FINT32_WIDTH extra elements.
225 * \return Aligned pointer (>=p) suitable for loading/storing float-integer SIMD.
226 * If \ref GMX_SIMD_HAVE_FINT32 is not set, p will be returned unchanged.
228 * This routine provides aligned memory for usage with \ref gmx_simd_fint32_t. You
229 * should have allocated an extra \ref GMX_SIMD_FINT32_WIDTH * sizeof(int) bytes. The
230 * reason why we need to separate float-integer vs. double-integer is that the
231 * width of registers after conversions from the floating-point types might not
232 * be identical, or even supported, in both cases.
234 static gmx_inline int *
235 gmx_simd_align_fi(int *p)
237 # ifdef GMX_SIMD_HAVE_FINT32
238 return (int *)(((size_t)((p)+GMX_SIMD_FINT32_WIDTH-1)) & (~((size_t)(GMX_SIMD_FINT32_WIDTH*sizeof(int)-1))));
245 * Align a (double) integer pointer for usage with SIMD instructions.
247 * You should typically \a not call this function directly (unless you explicitly
248 * want integers corresponding to doublele precision even when GMX_DOUBLE is
249 * not set), but use the \ref gmx_simd_align_i macro to align integer memory
250 * corresponding to Gromacs default floating-point precision.
252 * \param p Pointer to memory, allocate at least \ref GMX_SIMD_DINT32_WIDTH extra elements.
254 * \return Aligned pointer (>=p) suitable for loading/storing double-integer SIMD.
255 * If \ref GMX_SIMD_HAVE_DINT32 is not set, p will be returned unchanged.
257 * This routine provides aligned memory for usage with \ref gmx_simd_dint32_t. You
258 * should have allocated an extra \ref GMX_SIMD_DINT32_WIDTH*sizeof(int) bytes. The
259 * reason why we need to separate float-integer vs. double-integer is that the
260 * width of registers after conversions from the floating-point types might not
261 * be identical, or even supported, in both cases.
263 static gmx_inline int *
264 gmx_simd_align_di(int *p)
266 # ifdef GMX_SIMD_HAVE_DINT32
267 return (int *)(((size_t)((p)+GMX_SIMD_DINT32_WIDTH-1)) & (~((size_t)(GMX_SIMD_DINT32_WIDTH*sizeof(int)-1))));
274 * Align a float pointer for usage with SIMD4 instructions.
276 * You should typically \a not call this function directly (unless you explicitly
277 * want single precision even when GMX_DOUBLE is set), but use the
278 * \ref gmx_simd4_align_r macro to align memory in default Gromacs real precision.
280 * \param p Pointer to memory, allocate at least \ref GMX_SIMD4_WIDTH extra elements.
282 * \return Aligned pointer (>=p) suitable for loading/storing float SIMD.
283 * If \ref GMX_SIMD4_HAVE_FLOAT is not set, p will be returned unchanged.
285 * This routine provides aligned memory for usage with \ref gmx_simd4_float_t.
286 * should have allocated an extra \ref GMX_SIMD4_WIDTH * sizeof(float) bytes.
288 static gmx_inline float *
289 gmx_simd4_align_f(float *p)
291 # ifdef GMX_SIMD4_HAVE_FLOAT
292 return (float *)(((size_t)((p)+GMX_SIMD4_WIDTH-1)) & (~((size_t)(GMX_SIMD4_WIDTH*sizeof(float)-1))));
299 * Align a double pointer for usage with SIMD4 instructions.
301 * You should typically \a not call this function directly (unless you explicitly
302 * want double precision even when GMX_DOUBLE is not set), but use the
303 * \ref gmx_simd4_align_r macro to align memory in default Gromacs real precision.
305 * \param p Pointer to memory, allocate at least \ref GMX_SIMD4_WIDTH extra elements.
307 * \return Aligned pointer (>=p) suitable for loading/storing float SIMD.
308 * If \ref GMX_SIMD4_HAVE_DOUBLE is not set, p will be returned unchanged.
310 * This routine provides aligned memory for usage with \ref gmx_simd4_double_t.
311 * should have allocated an extra \ref GMX_SIMD4_WIDTH * sizeof(double) bytes.
313 static gmx_inline double *
314 gmx_simd4_align_d(double *p)
316 # ifdef GMX_SIMD4_HAVE_DOUBLE
317 return (double *)(((size_t)((p)+GMX_SIMD4_WIDTH-1)) & (~((size_t)(GMX_SIMD4_WIDTH*sizeof(double)-1))));
326 /* Define Gromacs "real" precision macros depending on Gromacs config. Note
327 * that conversions float-to-double and v.v. are not included here since they
328 * are not precision-dependent - find them in the implementation files.
331 /* Double floating-point. The documentation is in the float part below */
332 # define gmx_simd_real_t gmx_simd_double_t
333 # define gmx_simd_load_r gmx_simd_load_d
334 # define gmx_simd_load1_r gmx_simd_load1_d
335 # define gmx_simd_set1_r gmx_simd_set1_d
336 # define gmx_simd_store_r gmx_simd_store_d
337 # define gmx_simd_loadu_r gmx_simd_loadu_d
338 # define gmx_simd_storeu_r gmx_simd_storeu_d
339 # define gmx_simd_setzero_r gmx_simd_setzero_d
340 # define gmx_simd_add_r gmx_simd_add_d
341 # define gmx_simd_sub_r gmx_simd_sub_d
342 # define gmx_simd_mul_r gmx_simd_mul_d
343 # define gmx_simd_fmadd_r gmx_simd_fmadd_d
344 # define gmx_simd_fmsub_r gmx_simd_fmsub_d
345 # define gmx_simd_fnmadd_r gmx_simd_fnmadd_d
346 # define gmx_simd_fnmsub_r gmx_simd_fnmsub_d
347 # define gmx_simd_and_r gmx_simd_and_d
348 # define gmx_simd_andnot_r gmx_simd_andnot_d
349 # define gmx_simd_or_r gmx_simd_or_d
350 # define gmx_simd_xor_r gmx_simd_xor_d
351 # define gmx_simd_rsqrt_r gmx_simd_rsqrt_d
352 # define gmx_simd_rcp_r gmx_simd_rcp_d
353 # define gmx_simd_fabs_r gmx_simd_fabs_d
354 # define gmx_simd_fneg_r gmx_simd_fneg_d
355 # define gmx_simd_max_r gmx_simd_max_d
356 # define gmx_simd_min_r gmx_simd_min_d
357 # define gmx_simd_round_r gmx_simd_round_d
358 # define gmx_simd_trunc_r gmx_simd_trunc_d
359 # define gmx_simd_fraction_r gmx_simd_fraction_d
360 # define gmx_simd_get_exponent_r gmx_simd_get_exponent_d
361 # define gmx_simd_get_mantissa_r gmx_simd_get_mantissa_d
362 # define gmx_simd_set_exponent_r gmx_simd_set_exponent_d
363 /* Double integer and conversions */
364 # define gmx_simd_int32_t gmx_simd_dint32_t
365 # define gmx_simd_load_i gmx_simd_load_di
366 # define gmx_simd_set1_i gmx_simd_set1_di
367 # define gmx_simd_store_i gmx_simd_store_di
368 # define gmx_simd_loadu_i gmx_simd_loadu_di
369 # define gmx_simd_storeu_i gmx_simd_storeu_di
370 # define gmx_simd_setzero_i gmx_simd_setzero_di
371 # define gmx_simd_cvt_r2i gmx_simd_cvt_d2i
372 # define gmx_simd_cvtt_r2i gmx_simd_cvtt_d2i
373 # define gmx_simd_cvt_i2r gmx_simd_cvt_i2d
374 # define gmx_simd_extract_i gmx_simd_extract_di
375 # define gmx_simd_slli_i gmx_simd_slli_di
376 # define gmx_simd_srli_i gmx_simd_srli_di
377 # define gmx_simd_and_i gmx_simd_and_di
378 # define gmx_simd_andnot_i gmx_simd_andnot_di
379 # define gmx_simd_or_i gmx_simd_or_di
380 # define gmx_simd_xor_i gmx_simd_xor_di
381 # define gmx_simd_add_i gmx_simd_add_di
382 # define gmx_simd_sub_i gmx_simd_sub_di
383 # define gmx_simd_mul_i gmx_simd_mul_di
384 /* Double booleans and selection */
385 # define gmx_simd_bool_t gmx_simd_dbool_t
386 # define gmx_simd_cmpeq_r gmx_simd_cmpeq_d
387 # define gmx_simd_cmplt_r gmx_simd_cmplt_d
388 # define gmx_simd_cmple_r gmx_simd_cmple_d
389 # define gmx_simd_and_b gmx_simd_and_db
390 # define gmx_simd_or_b gmx_simd_or_db
391 # define gmx_simd_anytrue_b gmx_simd_anytrue_db
392 # define gmx_simd_blendzero_r gmx_simd_blendzero_d
393 # define gmx_simd_blendnotzero_r gmx_simd_blendnotzero_d
394 # define gmx_simd_blendv_r gmx_simd_blendv_d
395 # define gmx_simd_reduce_r gmx_simd_reduce_d
396 # define gmx_simd_ibool_t gmx_simd_dibool_t
397 # define gmx_simd_cmpeq_i gmx_simd_cmpeq_di
398 # define gmx_simd_cmplt_i gmx_simd_cmplt_di
399 # define gmx_simd_and_ib gmx_simd_and_dib
400 # define gmx_simd_or_ib gmx_simd_or_dib
401 # define gmx_simd_anytrue_ib gmx_simd_anytrue_dib
402 # define gmx_simd_blendzero_i gmx_simd_blendzero_di
403 # define gmx_simd_blendnotzero_i gmx_simd_blendnotzero_di
404 # define gmx_simd_blendv_i gmx_simd_blendv_di
405 /* Conversions between integer and double floating-point booleans */
406 # define gmx_simd_cvt_b2ib gmx_simd_cvt_db2dib
407 # define gmx_simd_cvt_ib2b gmx_simd_cvt_dib2db
409 /* SIMD4 double fp - we only support a subset of SIMD instructions for SIMD4 */
410 # define gmx_simd4_real_t gmx_simd4_double_t
411 # define gmx_simd4_load_r gmx_simd4_load_d
412 # define gmx_simd4_load1_r gmx_simd4_load1_d
413 # define gmx_simd4_set1_r gmx_simd4_set1_d
414 # define gmx_simd4_store_r gmx_simd4_store_d
415 # define gmx_simd4_loadu_r gmx_simd4_loadu_d
416 # define gmx_simd4_storeu_r gmx_simd4_storeu_d
417 # define gmx_simd4_setzero_r gmx_simd4_setzero_d
418 # define gmx_simd4_add_r gmx_simd4_add_d
419 # define gmx_simd4_sub_r gmx_simd4_sub_d
420 # define gmx_simd4_mul_r gmx_simd4_mul_d
421 # define gmx_simd4_fmadd_r gmx_simd4_fmadd_d
422 # define gmx_simd4_fmsub_r gmx_simd4_fmsub_d
423 # define gmx_simd4_fnmadd_r gmx_simd4_fnmadd_d
424 # define gmx_simd4_fnmsub_r gmx_simd4_fnmsub_d
425 # define gmx_simd4_and_r gmx_simd4_and_d
426 # define gmx_simd4_andnot_r gmx_simd4_andnot_d
427 # define gmx_simd4_or_r gmx_simd4_or_d
428 # define gmx_simd4_xor_r gmx_simd4_xor_d
429 # define gmx_simd4_rsqrt_r gmx_simd4_rsqrt_d
430 # define gmx_simd4_fabs_r gmx_simd4_fabs_d
431 # define gmx_simd4_fneg_r gmx_simd4_fneg_d
432 # define gmx_simd4_max_r gmx_simd4_max_d
433 # define gmx_simd4_min_r gmx_simd4_min_d
434 # define gmx_simd4_round_r gmx_simd4_round_d
435 # define gmx_simd4_trunc_r gmx_simd4_trunc_d
436 # define gmx_simd4_dotproduct3_r gmx_simd4_dotproduct3_d
437 # define gmx_simd4_bool_t gmx_simd4_dbool_t
438 # define gmx_simd4_cmpeq_r gmx_simd4_cmpeq_d
439 # define gmx_simd4_cmplt_r gmx_simd4_cmplt_d
440 # define gmx_simd4_cmple_r gmx_simd4_cmple_d
441 # define gmx_simd4_and_b gmx_simd4_and_db
442 # define gmx_simd4_or_b gmx_simd4_or_db
443 # define gmx_simd4_anytrue_b gmx_simd4_anytrue_db
444 # define gmx_simd4_blendzero_r gmx_simd4_blendzero_d
445 # define gmx_simd4_blendnotzero_r gmx_simd4_blendnotzero_d
446 # define gmx_simd4_blendv_r gmx_simd4_blendv_d
447 # define gmx_simd4_reduce_r gmx_simd4_reduce_d
449 /* Memory allocation */
450 # define gmx_simd_align_r gmx_simd_align_d
451 # define gmx_simd_align_i gmx_simd_align_di
452 # define gmx_simd4_align_r gmx_simd4_align_d
454 # ifdef GMX_SIMD_HAVE_DOUBLE
455 # define GMX_SIMD_HAVE_REAL
456 # define GMX_SIMD_REAL_WIDTH GMX_SIMD_DOUBLE_WIDTH
458 # ifdef GMX_SIMD_HAVE_DINT32
459 # define GMX_SIMD_HAVE_INT32
460 # define GMX_SIMD_INT32_WIDTH GMX_SIMD_DINT32_WIDTH
462 # ifdef GMX_SIMD_HAVE_DINT32_EXTRACT
463 # define GMX_SIMD_HAVE_INT32_EXTRACT
465 # ifdef GMX_SIMD_HAVE_DINT32_LOGICAL
466 # define GMX_SIMD_HAVE_INT32_LOGICAL
468 # ifdef GMX_SIMD_HAVE_DINT32_ARITHMETICS
469 # define GMX_SIMD_HAVE_INT32_ARITHMETICS
471 # ifdef GMX_SIMD4_HAVE_DOUBLE
472 # define GMX_SIMD4_HAVE_REAL
475 #else /* GMX_DOUBLE */
477 /*! \name SIMD data types
479 * The actual storage of these types is implementation dependent. The
480 * documentation is generated from the reference implementation, but for
481 * normal usage this will likely not be what you are using.
484 /*! \brief Real precision floating-point SIMD datatype.
486 * This type is only available if \ref GMX_SIMD_HAVE_REAL is defined.
488 * If GMX_DOUBLE is defined, this will be set to \ref gmx_simd_double_t
489 * internally, otherwise \ref gmx_simd_float_t.
491 # define gmx_simd_real_t gmx_simd_float_t
493 /*! \brief 32-bit integer SIMD type.
495 * This type is only available if \ref GMX_SIMD_HAVE_INT32 is defined.
497 * If GMX_DOUBLE is defined, this will be set to \ref gmx_simd_dint32_t
498 * internally, otherwise \ref gmx_simd_fint32_t. This might seem a strange
499 * implementation detail, but it is because some SIMD implementations use
500 * different types/widths of integers registers when converting from
501 * double vs. single precision floating point. As long as you just use
502 * this type you will not have to worry about precision.
504 # define gmx_simd_int32_t gmx_simd_fint32_t
506 /*! \brief Boolean SIMD type for usage with \ref gmx_simd_real_t.
508 * This type is only available if \ref GMX_SIMD_HAVE_REAL is defined.
510 * If GMX_DOUBLE is defined, this will be set to \ref gmx_simd_dbool_t
511 * internally, otherwise \ref gmx_simd_fbool_t. This is necessary since some
512 * SIMD implementations use bitpatterns for marking truth, so single-
513 * vs. double precision booleans are not necessarily exchangable.
514 * As long as you just use this type you will not have to worry about precision.
516 * See \ref gmx_simd_ibool_t for an explanation of real vs. integer booleans.
518 # define gmx_simd_bool_t gmx_simd_fbool_t
520 /*! \brief Boolean SIMD type for usage with \ref gmx_simd_int32_t.
522 * This type is only available if \ref GMX_SIMD_HAVE_INT32 is defined.
524 * If GMX_DOUBLE is defined, this will be set to \ref gmx_simd_dibool_t
525 * internally, otherwise \ref gmx_simd_fibool_t. This is necessary since some
526 * SIMD implementations use bitpatterns for marking truth, so single-
527 * vs. double precision booleans are not necessarily exchangable, and while
528 * a double-precision boolean might be represented with a 64-bit mask, the
529 * corresponding integer might only use a 32-bit mask.
531 * We provide conversion routines for these cases, so the only thing you need to
532 * keep in mind is to use \ref gmx_simd_bool_t when working with
533 * \ref gmx_simd_real_t while you pick \ref gmx_simd_ibool_t when working with
534 * \ref gmx_simd_int32_t.
536 * To convert between them, use \ref gmx_simd_cvt_b2ib and \ref gmx_simd_cvt_ib2b.
538 # define gmx_simd_ibool_t gmx_simd_fibool_t
542 * \name SIMD load/store operations on gmx_simd_real_t
544 * \note Unaligned load/stores are only available when
545 * \ref GMX_SIMD_HAVE_LOADU and \ref GMX_SIMD_HAVE_STOREU are set, respectively.
549 /*! \brief Load \ref GMX_SIMD_REAL_WIDTH values from aligned memory to \ref gmx_simd_real_t
551 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_load_d,
552 * otherwise \ref gmx_simd_load_f.
554 * \copydetails gmx_simd_load_f
556 # define gmx_simd_load_r gmx_simd_load_f
558 /*! \brief Set all elements in \ref gmx_simd_real_t from single value in memory.
560 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_load1_d,
561 * otherwise \ref gmx_simd_load1_f.
563 * \copydetails gmx_simd_load1_f
565 # define gmx_simd_load1_r gmx_simd_load1_f
567 /*! \brief Set all elements in \ref gmx_simd_real_t from a scalar.
569 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_set1_d,
570 * otherwise \ref gmx_simd_set1_f.
572 * \copydetails gmx_simd_set1_f
574 # define gmx_simd_set1_r gmx_simd_set1_f
576 /*! \brief Store \ref GMX_SIMD_REAL_WIDTH values from \ref gmx_simd_real_t to aligned memory.
578 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_store_d,
579 * otherwise \ref gmx_simd_store_f.
581 * \copydetails gmx_simd_store_f
583 # define gmx_simd_store_r gmx_simd_store_f
585 /*! \brief Load \ref GMX_SIMD_REAL_WIDTH values from unaligned memory to \ref gmx_simd_real_t.
587 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_loadu_d,
588 * otherwise \ref gmx_simd_loadu_f.
590 * \copydetails gmx_simd_loadu_f
592 # define gmx_simd_loadu_r gmx_simd_loadu_f
594 /*! \brief Store \ref GMX_SIMD_REAL_WIDTH values from \ref gmx_simd_real_t to unaligned memory.
596 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_storeu_d,
597 * otherwise \ref gmx_simd_storeu_f.
599 * \copydetails gmx_simd_storeu_f
601 # define gmx_simd_storeu_r gmx_simd_storeu_f
603 /*! \brief Set all elements in \ref gmx_simd_real_t to 0.0.
605 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_setzero_d,
606 * otherwise \ref gmx_simd_setzero_f.
608 * \copydetails gmx_simd_setzero_f
610 # define gmx_simd_setzero_r gmx_simd_setzero_f
613 * \name SIMD load/store operations on gmx_simd_int32_t
615 * \note Unaligned load/stores are only available when
616 * \ref GMX_SIMD_HAVE_LOADU and \ref GMX_SIMD_HAVE_STOREU are set, respectively.
620 /*! \brief Load \ref GMX_SIMD_INT32_WIDTH values from aligned memory to \ref gmx_simd_int32_t .
622 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_load_di ,
623 * otherwise \ref gmx_simd_load_fi .
625 * \copydetails gmx_simd_load_fi
627 # define gmx_simd_load_i gmx_simd_load_fi
629 /*! \brief Set all elements in \ref gmx_simd_int32_t from a single integer.
631 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_set1_di ,
632 * otherwise \ref gmx_simd_set1_fi .
634 * \copydetails gmx_simd_set1_fi
636 # define gmx_simd_set1_i gmx_simd_set1_fi
638 /*! \brief Store \ref GMX_SIMD_REAL_WIDTH values from \ref gmx_simd_int32_t to aligned memory.
640 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_store_di ,
641 * otherwise \ref gmx_simd_store_fi .
643 * \copydetails gmx_simd_store_fi
645 # define gmx_simd_store_i gmx_simd_store_fi
647 /*! \brief Load \ref GMX_SIMD_REAL_WIDTH values from unaligned memory to \ref gmx_simd_int32_t.
649 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_loadu_di ,
650 * otherwise \ref gmx_simd_loadu_fi .
652 * \copydetails gmx_simd_loadu_fi
654 # define gmx_simd_loadu_i gmx_simd_loadu_fi
656 /*! \brief Store \ref GMX_SIMD_REAL_WIDTH values from \ref gmx_simd_int32_t to unaligned memory.
658 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_storeu_di ,
659 * otherwise \ref gmx_simd_storeu_fi .
661 * \copydetails gmx_simd_storeu_fi
663 # define gmx_simd_storeu_i gmx_simd_storeu_fi
665 /*! \brief Extract single integer from \ref gmx_simd_int32_t element.
667 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_extract_di ,
668 * otherwise \ref gmx_simd_extract_fi .
670 * \copydetails gmx_simd_extract_fi
672 # define gmx_simd_extract_i gmx_simd_extract_fi
674 /*! \brief Set all elements in \ref gmx_simd_int32_t to 0.
676 * If GMX_DOUBLE is defined, it will be aliased to \ref gmx_simd_setzero_di ,
677 * otherwise \ref gmx_simd_setzero_fi .
679 * \copydetails gmx_simd_setzero_fi
681 # define gmx_simd_setzero_i gmx_simd_setzero_fi
685 * \name SIMD floating-point logical operations on gmx_simd_real_t
687 * These instructions are available if \ref GMX_SIMD_HAVE_LOGICAL is defined.
691 /*! \brief Bitwise \a and on two \ref gmx_simd_real_t.
693 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_and_d,
694 * otherwise \ref gmx_simd_and_f.
696 * \copydetails gmx_simd_and_f
698 # define gmx_simd_and_r gmx_simd_and_f
700 /*! \brief Bitwise \a and-not on two \ref gmx_simd_real_t; 1st arg is complemented.
702 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_andnot_d,
703 * otherwise \ref gmx_simd_andnot_f.
705 * \copydetails gmx_simd_andnot_f
707 # define gmx_simd_andnot_r gmx_simd_andnot_f
709 /*! \brief Bitwise \a or on two \ref gmx_simd_real_t.
711 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_or_d,
712 * otherwise \ref gmx_simd_or_f.
714 * \copydetails gmx_simd_or_f
716 # define gmx_simd_or_r gmx_simd_or_f
718 /*! \brief Bitwise \a exclusive-or on two \ref gmx_simd_real_t.
720 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_xor_d,
721 * otherwise \ref gmx_simd_xor_f.
723 * \copydetails gmx_simd_xor_f
725 # define gmx_simd_xor_r gmx_simd_xor_f
728 * \name SIMD floating-point arithmetic operations on gmx_simd_real_t
732 /*! \brief SIMD a+b for two \ref gmx_simd_real_t.
734 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_add_d,
735 * otherwise \ref gmx_simd_add_f.
737 * \copydetails gmx_simd_add_f
739 # define gmx_simd_add_r gmx_simd_add_f
741 /*! \brief SIMD a-b for two \ref gmx_simd_real_t.
743 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_sub_d,
744 * otherwise \ref gmx_simd_sub_f.
746 * \copydetails gmx_simd_sub_f
748 # define gmx_simd_sub_r gmx_simd_sub_f
750 /*! \brief SIMD a*b for two \ref gmx_simd_real_t.
752 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_mul_d,
753 * otherwise \ref gmx_simd_mul_f.
755 * \copydetails gmx_simd_mul_f
757 # define gmx_simd_mul_r gmx_simd_mul_f
759 /*! \brief SIMD a*b+c for three \ref gmx_simd_real_t.
761 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fmadd_d,
762 * otherwise \ref gmx_simd_fmadd_f.
764 * \copydetails gmx_simd_fmadd_f
766 # define gmx_simd_fmadd_r gmx_simd_fmadd_f
768 /*! \brief SIMD a*b-c for three \ref gmx_simd_real_t.
770 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fmsub_d,
771 * otherwise \ref gmx_simd_fmsub_f.
773 * \copydetails gmx_simd_fmsub_f
775 # define gmx_simd_fmsub_r gmx_simd_fmsub_f
777 /*! \brief SIMD -a*b+c for three \ref gmx_simd_real_t.
779 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fnmadd_d,
780 * otherwise \ref gmx_simd_fnmadd_f.
782 * \copydetails gmx_simd_fnmadd_f
784 # define gmx_simd_fnmadd_r gmx_simd_fnmadd_f
786 /*! \brief SIMD -a*b-c for three \ref gmx_simd_real_t.
788 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fnmsub_d,
789 * otherwise \ref gmx_simd_fnmsub_f.
791 * \copydetails gmx_simd_fnmsub_f
793 # define gmx_simd_fnmsub_r gmx_simd_fnmsub_f
795 /*! \brief SIMD table lookup for 1/sqrt(x) approximation.
797 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_rsqrt_d,
798 * otherwise \ref gmx_simd_rsqrt_f.
800 * \copydetails gmx_simd_rsqrt_f
802 # define gmx_simd_rsqrt_r gmx_simd_rsqrt_f
804 /*! \brief SIMD table lookup for 1/x approximation.
806 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_rcp_d,
807 * otherwise \ref gmx_simd_rcp_f.
809 * \copydetails gmx_simd_rcp_f
811 # define gmx_simd_rcp_r gmx_simd_rcp_f
813 /*! \brief SIMD fabs(x) for \ref gmx_simd_real_t.
815 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fabs_d,
816 * otherwise \ref gmx_simd_fabs_f.
818 * \copydetails gmx_simd_fabs_f
820 # define gmx_simd_fabs_r gmx_simd_fabs_f
822 /*! \brief SIMD -x for \ref gmx_simd_real_t.
824 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fneg_d,
825 * otherwise \ref gmx_simd_fneg_f.
827 * \copydetails gmx_simd_fneg_f
829 # define gmx_simd_fneg_r gmx_simd_fneg_f
831 /*! \brief SIMD max(a,b) for each element in \ref gmx_simd_real_t.
833 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_max_d,
834 * otherwise \ref gmx_simd_max_f.
836 * \copydetails gmx_simd_max_f
838 # define gmx_simd_max_r gmx_simd_max_f
840 /*! \brief SIMD min(a,b) for each element in \ref gmx_simd_real_t.
842 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_min_d,
843 * otherwise \ref gmx_simd_min_f.
845 * \copydetails gmx_simd_min_f
847 # define gmx_simd_min_r gmx_simd_min_f
849 /*! \brief Round \ref gmx_simd_real_t to nearest int, return \ref gmx_simd_real_t.
851 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_round_d,
852 * otherwise \ref gmx_simd_round_f.
854 * \copydetails gmx_simd_round_f
856 # define gmx_simd_round_r gmx_simd_round_f
858 /*! \brief Truncate \ref gmx_simd_real_t towards 0, return \ref gmx_simd_real_t.
860 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_trunc_d,
861 * otherwise \ref gmx_simd_trunc_f.
863 * \copydetails gmx_simd_trunc_f
865 # define gmx_simd_trunc_r gmx_simd_trunc_f
867 /*! \brief SIMD Fraction, i.e. x-trunc(x) for \ref gmx_simd_real_t.
869 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fraction_d,
870 * otherwise \ref gmx_simd_fraction_f.
872 * \copydetails gmx_simd_fraction_f
874 # define gmx_simd_fraction_r gmx_simd_fraction_f
876 /*! \brief Return the FP exponent of a SIMD \ref gmx_simd_real_t as a \ref gmx_simd_real_t.
878 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_get_exponent_d,
879 * otherwise \ref gmx_simd_get_exponent_f.
881 * \copydetails gmx_simd_exponent_f
883 # define gmx_simd_get_exponent_r gmx_simd_get_exponent_f
885 /*! \brief Return the FP mantissa of a SIMD \ref gmx_simd_real_t as a \ref gmx_simd_real_t.
887 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_get_mantissa_d,
888 * otherwise \ref gmx_simd_get_mantissa_f.
890 * \copydetails gmx_simd_mantissa_f
892 # define gmx_simd_get_mantissa_r gmx_simd_get_mantissa_f
894 /*! \brief Set the exponent of a SIMD \ref gmx_simd_real_t from a \ref gmx_simd_real_t.
896 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_set_exponent_d,
897 * otherwise \ref gmx_simd_set_exponent_f.
899 * \copydetails gmx_simd_set_exponent_f
901 # define gmx_simd_set_exponent_r gmx_simd_set_exponent_f
904 * \name SIMD comparison, boolean, and select operations for gmx_simd_real_t
908 /*! \brief SIMD a==b for \ref gmx_simd_real_t. Returns a \ref gmx_simd_bool_t.
910 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cmpeq_d,
911 * otherwise \ref gmx_simd_cmpeq_f.
913 * \copydetails gmx_simd_cmpeq_f
915 # define gmx_simd_cmpeq_r gmx_simd_cmpeq_f
917 /*! \brief SIMD a<b for \ref gmx_simd_real_t. Returns a \ref gmx_simd_bool_t.
919 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cmplt_d,
920 * otherwise \ref gmx_simd_cmplt_f.
922 * \copydetails gmx_simd_cmplt_f
924 # define gmx_simd_cmplt_r gmx_simd_cmplt_f
926 /*! \brief SIMD a<=b for \ref gmx_simd_real_t. Returns a \ref gmx_simd_bool_t.
928 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cmple_d,
929 * otherwise \ref gmx_simd_cmple_f.
931 * \copydetails gmx_simd_cmple_f
933 # define gmx_simd_cmple_r gmx_simd_cmple_f
935 /*! \brief For each element, the result boolean is true if both arguments are true
937 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_and_db,
938 * otherwise \ref gmx_simd_and_fb.
940 * \copydetails gmx_simd_and_fb
942 # define gmx_simd_and_b gmx_simd_and_fb
944 /*! \brief For each element, the result boolean is true if either argument is true
946 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_or_db,
947 * otherwise \ref gmx_simd_or_fb.
949 * \copydetails gmx_simd_or_fn
951 # define gmx_simd_or_b gmx_simd_or_fb
953 /*! \brief Return nonzero if any element in gmx_simd_bool_t is true, otherwise 0.
955 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_anytrue_db,
956 * otherwise \ref gmx_simd_anytrue_fb.
958 * \copydetails gmx_simd_anytrue_fb
960 # define gmx_simd_anytrue_b gmx_simd_anytrue_fb
962 /*! \brief Selects elements from \ref gmx_simd_real_t where boolean is true, otherwise 0.
964 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendzero_d,
965 * otherwise \ref gmx_simd_blendzero_f.
967 * \copydetails gmx_simd_blendzero_f
969 * \sa gmx_simd_blendzero_i
971 # define gmx_simd_blendzero_r gmx_simd_blendzero_f
973 /*! \brief Selects elements from \ref gmx_simd_real_t where boolean is false, otherwise 0.
975 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendnotzero_d,
976 * otherwise \ref gmx_simd_blendnotzero_f.
978 * \copydetails gmx_simd_blendnotzero_f
980 # define gmx_simd_blendnotzero_r gmx_simd_blendnotzero_f
982 /*! \brief Selects from 2nd real SIMD arg where boolean is true, otherwise 1st arg.
984 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendv_d,
985 * otherwise \ref gmx_simd_blendv_f.
987 * \copydetails gmx_simd_blendv_f
989 # define gmx_simd_blendv_r gmx_simd_blendv_f
991 /*! \brief Return sum of all elements in SIMD floating-point variable.
993 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_reduce_d,
994 * otherwise \ref gmx_simd_reduce_f.
996 * \copydetails gmx_simd_reduce_f
998 # define gmx_simd_reduce_r gmx_simd_reduce_f
1001 * \name SIMD integer logical operations on gmx_simd_int32_t
1003 * These instructions are available if \ref GMX_SIMD_HAVE_INT32_LOGICAL is defined.
1007 /*! \brief Shift each element in \ref gmx_simd_int32_t left by immediate
1009 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_slli_di,
1010 * otherwise \ref gmx_simd_slli_fi.
1012 * \copydetails gmx_simd_slli_fi
1014 # define gmx_simd_slli_i gmx_simd_slli_fi
1016 /*! \brief Shift each element in \ref gmx_simd_int32_t right by immediate
1018 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_srli_di,
1019 * otherwise \ref gmx_simd_srli_fi.
1021 * \copydetails gmx_simd_srli_fi
1023 # define gmx_simd_srli_i gmx_simd_srli_fi
1025 /*! \brief Bitwise \a and on two \ref gmx_simd_int32_t.
1027 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_and_di,
1028 * otherwise \ref gmx_simd_and_fi.
1030 * \copydetails gmx_simd_and_fi
1032 # define gmx_simd_and_i gmx_simd_and_fi
1034 /*! \brief Bitwise \a and-not on two \ref gmx_simd_int32_t; 1st arg is complemented.
1036 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_andnot_di,
1037 * otherwise \ref gmx_simd_andnot_fi.
1039 * \copydetails gmx_simd_andnot_fi
1041 # define gmx_simd_andnot_i gmx_simd_andnot_fi
1043 /*! \brief Bitwise \a or on two \ref gmx_simd_int32_t.
1045 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_or_di,
1046 * otherwise \ref gmx_simd_or_fi.
1048 * \copydetails gmx_simd_or_fi
1050 # define gmx_simd_or_i gmx_simd_or_fi
1052 /*! \brief Bitwise \a xor on two \ref gmx_simd_int32_t.
1054 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_xor_di,
1055 * otherwise \ref gmx_simd_xor_fi.
1057 * \copydetails gmx_simd_xor_fi
1059 # define gmx_simd_xor_i gmx_simd_xor_fi
1062 * \name SIMD integer arithmetic operations on gmx_simd_int32_t
1064 * These instructions are available if \ref GMX_SIMD_HAVE_INT32_ARITHMETICS is defined.
1068 /*! \brief SIMD a+b for two \ref gmx_simd_int32_t.
1070 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_add_di,
1071 * otherwise \ref gmx_simd_add_fi.
1073 * \copydetails gmx_simd_add_fi
1075 # define gmx_simd_add_i gmx_simd_add_fi
1077 /*! \brief SIMD a-b for two \ref gmx_simd_int32_t.
1079 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_sub_di,
1080 * otherwise \ref gmx_simd_sub_fi.
1082 * \copydetails gmx_simd_sub_fi
1084 # define gmx_simd_sub_i gmx_simd_sub_fi
1086 /*! \brief SIMD a*b for two \ref gmx_simd_int32_t.
1088 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_mul_di,
1089 * otherwise \ref gmx_simd_mul_fi.
1091 * \copydetails gmx_simd_mul_fi
1093 # define gmx_simd_mul_i gmx_simd_mul_fi
1096 * \name SIMD integer comparison, booleans, and selection on gmx_simd_int32_t
1098 * These instructions are available if \ref GMX_SIMD_HAVE_INT32_ARITHMETICS is defined.
1102 /*! \brief Returns boolean describing whether a==b, for \ref gmx_simd_int32_t
1104 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cmpeq_di,
1105 * otherwise \ref gmx_simd_cmpeq_fi.
1107 * \copydetails gmx_simd_cmpeq_fi
1109 # define gmx_simd_cmpeq_i gmx_simd_cmpeq_fi
1111 /*! \brief Returns boolean describing whether a<b, for \ref gmx_simd_int32_t
1113 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cmplt_di,
1114 * otherwise \ref gmx_simd_cmplt_fi.
1116 * \copydetails gmx_simd_cmplt_fi
1118 # define gmx_simd_cmplt_i gmx_simd_cmplt_fi
1120 /*! \brief For each element, the result boolean is true if both arguments are true
1122 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_and_dib,
1123 * otherwise \ref gmx_simd_and_fib.
1125 * \copydetails gmx_simd_and_fib
1127 # define gmx_simd_and_ib gmx_simd_and_fib
1129 /*! \brief For each element, the result boolean is true if either argument is true.
1131 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_or_dib,
1132 * otherwise \ref gmx_simd_or_fib.
1134 * \copydetails gmx_simd_or_fib
1136 # define gmx_simd_or_ib gmx_simd_or_fib
1138 /*! \brief Return nonzero if any element in gmx_simd_ibool_t is true, otherwise 0.
1140 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_anytrue_dib,
1141 * otherwise \ref gmx_simd_anytrue_fib.
1143 * \copydetails gmx_simd_anytrue_fib
1145 # define gmx_simd_anytrue_ib gmx_simd_anytrue_fib
1147 /*! \brief Selects elements from \ref gmx_simd_int32_t where boolean is true, otherwise 0.
1149 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendzero_di,
1150 * otherwise \ref gmx_simd_blendzero_fi.
1152 * \copydetails gmx_simd_blendzero_fi
1154 # define gmx_simd_blendzero_i gmx_simd_blendzero_fi
1156 /*! \brief Selects elements from \ref gmx_simd_int32_t where boolean is false, otherwise 0.
1158 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendnotzero_di,
1159 * otherwise \ref gmx_simd_blendnotzero_fi.
1161 * \copydetails gmx_simd_blendnotzero_fi
1163 # define gmx_simd_blendnotzero_i gmx_simd_blendnotzero_fi
1165 /*! \brief Selects from 2nd int SIMD arg where boolean is true, otherwise 1st arg.
1167 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendv_di,
1168 * otherwise \ref gmx_simd_blendv_fi.
1170 * \copydetails gmx_simd_blendv_fi
1172 # define gmx_simd_blendv_i gmx_simd_blendv_fi
1175 * \name SIMD conversion operations
1177 * These instructions are available when both types involved in the conversion
1178 * are defined, e.g. \ref GMX_SIMD_HAVE_REAL and \ref GMX_SIMD_HAVE_INT32
1179 * for real-to-integer conversion.
1183 /*! \brief Convert gmx_simd_real_t to gmx_simd_int32_t, round to nearest integer.
1185 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cvt_d2i,
1186 * otherwise \ref gmx_simd_cvt_f2i.
1188 * \copydetails gmx_simd_cvt_f2i
1190 # define gmx_simd_cvt_r2i gmx_simd_cvt_f2i
1192 /*! \brief Convert gmx_simd_real_t to gmx_simd_int32_t, truncate towards zero
1194 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cvtt_d2i,
1195 * otherwise \ref gmx_simd_cvtt_f2i.
1197 * \copydetails gmx_simd_cvtt_f2i
1199 # define gmx_simd_cvtt_r2i gmx_simd_cvtt_f2i
1201 /*! \brief Convert gmx_simd_int32_t to gmx_simd_real_t
1203 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cvt_i2d,
1204 * otherwise \ref gmx_simd_cvt_i2f.
1206 * \copydetails gmx_simd_cvt_i2f
1208 # define gmx_simd_cvt_i2r gmx_simd_cvt_i2f
1210 /*! \brief Convert from gmx_simd_bool_t to gmx_simd_ibool_t
1212 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cvt_db2dib,
1213 * otherwise \ref gmx_simd_cvt_fb2fib.
1215 * \copydetails gmx_simd_cvt_fb2fib
1217 # define gmx_simd_cvt_b2ib gmx_simd_cvt_fb2fib
1219 /*! \brief Convert from gmx_simd_ibool_t to gmx_simd_bool_t
1221 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cvt_dib2db,
1222 * otherwise \ref gmx_simd_cvt_fib2fb.
1224 * \copydetails gmx_simd_cvt_fib2fb
1226 # define gmx_simd_cvt_ib2b gmx_simd_cvt_fib2fb
1230 * \name SIMD memory alignment operations
1234 /*! \brief Align real memory for SIMD usage.
1236 * This routine will only align memory if \ref GMX_SIMD_HAVE_REAL is defined.
1237 * Otherwise the original pointer will be returned.
1239 * Start by allocating an extra \ref GMX_SIMD_REAL_WIDTH float elements of memory,
1240 * and then call this function. The returned pointer will be greater or equal
1241 * to the one you provided, and point to an address inside your provided memory
1242 * that is aligned to the SIMD width.
1244 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_align_d,
1245 * otherwise \ref gmx_simd_align_f. For detailed documentation, see the
1246 * precision-specific implementation routines.
1248 # define gmx_simd_align_r gmx_simd_align_f
1250 /*! \brief Align integer memory for SIMD usage.
1252 * This routine will only align memory if \ref GMX_SIMD_HAVE_INT32 is defined.
1253 * Otherwise the original pointer will be returned.
1255 * Start by allocating an extra \ref GMX_SIMD_INT32_WIDTH elements of memory,
1256 * and then call this function. The returned pointer will be greater or equal
1257 * to the one you provided, and point to an address inside your provided memory
1258 * that is aligned to the SIMD width.
1260 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_align_di,
1261 * otherwise \ref gmx_simd_align_fi. For detailed documentation, see the
1262 * precision-specific implementation routines.
1264 # define gmx_simd_align_i gmx_simd_align_fi
1268 /*! \name SIMD4 - constant width-four SIMD datatypes
1270 * These operations are only meant to be used for a few coordinate
1271 * manipulation and grid interpolation routines, so we only support a subset
1272 * of operations for SIMD4. To avoid repeating all the documentation from
1273 * the generic width SIMD routines, we only provide brief documentation for
1274 * these operations. Follow the link to the implementation documentation or the
1275 * reference to the corresponding generic SIMD routine. The format will be
1276 * exactly the same, but they have SIMD replaced with SIMD4.
1280 /*! \brief SIMD real datatype guaranteed to be 4 elements wide, if available.
1282 * All the SIMD4 datatypes and operations behave like their counterparts for
1283 * the generic SIMD implementation, but they might be implemented with different
1284 * registers, or not supported at all. It is important that you check the
1285 * define \ref GMX_SIMD4_HAVE_REAL before using it.
1287 * Just as the normal SIMD operations, all SIMD4 types and routines will
1288 * be aliased to either single or double precision ones based on whether
1289 * GMX_DOUBLE is defined.
1291 * \note There is no support for integer or math operations in SIMD4.
1293 # define gmx_simd4_real_t gmx_simd4_float_t
1295 /*! \brief Boolean for \ref gmx_simd4_real_t comparision/selection */
1296 # define gmx_simd4_bool_t gmx_simd4_fbool_t
1298 /*! \brief Load aligned data to gmx_simd4_real_t.
1300 * \copydetails gmx_simd4_load_f
1302 # define gmx_simd4_load_r gmx_simd4_load_f
1304 /*! \brief Load single element to gmx_simd4_real_t
1306 * \copydetails gmx_simd4_load1_f
1308 # define gmx_simd4_load1_r gmx_simd4_load1_f
1310 /*! \brief Set gmx_simd4_real_t from scalar value
1312 * \copydetails gmx_simd4_set1_f
1314 # define gmx_simd4_set1_r gmx_simd4_set1_f
1316 /*! \brief store aligned data from gmx_simd4_real_t
1318 * \copydetails gmx_simd4_store_f
1320 # define gmx_simd4_store_r gmx_simd4_store_f
1322 /*! \brief Load unaligned data to gmx_simd4_real_t
1324 * \copydetails gmx_simd4_loadu_f
1326 # define gmx_simd4_loadu_r gmx_simd4_loadu_f
1328 /*! \brief Store unaligned data from gmx_simd4_real_t
1330 * \copydetails gmx_simd4_storeu_f
1332 # define gmx_simd4_storeu_r gmx_simd4_storeu_f
1334 /*! \brief Set all elements in gmx_simd4_real_t to 0.0
1336 * \copydetails gmx_simd4_setzero_f
1338 # define gmx_simd4_setzero_r gmx_simd4_setzero_f
1340 /*! \brief Bitwise and for two gmx_simd4_real_t
1342 * \copydetails gmx_simd4_and_f
1344 # define gmx_simd4_and_r gmx_simd4_and_f
1346 /*! \brief Bitwise and-not for two gmx_simd4_real_t. 1st arg is complemented.
1348 * \copydetails gmx_simd4_andnot_f
1350 # define gmx_simd4_andnot_r gmx_simd4_andnot_f
1352 /*! \brief Bitwise or for two gmx_simd4_real_t
1354 * \copydetails gmx_simd4_or_f
1356 # define gmx_simd4_or_r gmx_simd4_or_f
1358 /*! \brief Bitwise xor for two gmx_simd4_real_t
1360 * \copydetails gmx_simd4_xor_f
1362 # define gmx_simd4_xor_r gmx_simd4_xor_f
1364 /*! \brief a+b for \ref gmx_simd4_real_t
1366 * \copydetails gmx_simd4_add_f
1368 # define gmx_simd4_add_r gmx_simd4_add_f
1370 /*! \brief a-b for \ref gmx_simd4_real_t
1372 * \copydetails gmx_simd4_sub_f
1374 # define gmx_simd4_sub_r gmx_simd4_sub_f
1376 /*! \brief a*b for \ref gmx_simd4_real_t
1378 * \copydetails gmx_simd4_mul_f
1380 # define gmx_simd4_mul_r gmx_simd4_mul_f
1382 /*! \brief a*b+c for \ref gmx_simd4_real_t
1384 * \copydetails gmx_simd4_fmadd_f
1386 # define gmx_simd4_fmadd_r gmx_simd4_fmadd_f
1388 /*! \brief a*b-c for \ref gmx_simd4_real_t
1390 * \copydetails gmx_simd4_fmsub_f
1392 # define gmx_simd4_fmsub_r gmx_simd4_fmsub_f
1394 /*! \brief -a*b+c for \ref gmx_simd4_real_t
1396 * \copydetails gmx_simd4_fnmadd_f
1398 # define gmx_simd4_fnmadd_r gmx_simd4_fnmadd_f
1400 /*! \brief -a*b-c for \ref gmx_simd4_real_t
1402 * \copydetails gmx_simd4_fnmsub_f
1404 # define gmx_simd4_fnmsub_r gmx_simd4_fnmsub_f
1406 /*! \brief 1/sqrt(x) approximate lookup for \ref gmx_simd4_real_t
1408 * \copydetails gmx_simd4_rsqrt_f
1410 # define gmx_simd4_rsqrt_r gmx_simd4_rsqrt_f
1412 /*! \brief fabs(x) for \ref gmx_simd4_real_t
1414 * \copydetails gmx_simd4_fabs_f
1416 # define gmx_simd4_fabs_r gmx_simd4_fabs_f
1418 /*! \brief Change sign (-x) for \ref gmx_simd4_real_t
1420 * \copydetails gmx_simd4_fneg_f
1422 # define gmx_simd4_fneg_r gmx_simd4_fneg_f
1424 /*! \brief Select maximum of each pair of elements from args for \ref gmx_simd4_real_t
1426 * \copydetails gmx_simd4_max_f
1428 # define gmx_simd4_max_r gmx_simd4_max_f
1430 /*! \brief Select minimum of each pair of elements from args for \ref gmx_simd4_real_t
1432 * \copydetails gmx_simd4_min_f
1434 # define gmx_simd4_min_r gmx_simd4_min_f
1436 /*! \brief Round \ref gmx_simd4_real_t to nearest integer, return \ref gmx_simd4_real_t
1438 * \copydetails gmx_simd4_round_f
1440 # define gmx_simd4_round_r gmx_simd4_round_f
1442 /*! \brief Truncate \ref gmx_simd4_real_t towards zero, return \ref gmx_simd4_real_t
1444 * \copydetails gmx_simd4_trunc_f
1446 # define gmx_simd4_trunc_r gmx_simd4_trunc_f
1448 /*! \brief Scalar product of first three elements of two \ref gmx_simd4_real_t *
1450 * \copydetails gmx_simd4_dotproduct3_f
1452 # define gmx_simd4_dotproduct3_r gmx_simd4_dotproduct3_f
1454 /*! \brief Return booleans whether a==b for each element two \ref gmx_simd4_real_t
1456 * \copydetails gmx_simd4_cmpeq_f
1458 # define gmx_simd4_cmpeq_r gmx_simd4_cmpeq_f
1459 /*! \brief Return booleans whether a<b for each element two \ref gmx_simd4_real_t
1461 * \copydetails gmx_simd4_cmplt_f
1463 # define gmx_simd4_cmplt_r gmx_simd4_cmplt_f
1464 /*! \brief Return booleans whether a<=b for each element two \ref gmx_simd4_real_t
1466 * \copydetails gmx_simd4_cmple_f
1468 # define gmx_simd4_cmple_r gmx_simd4_cmple_f
1470 /*! \brief Logical and for two \ref gmx_simd4_bool_t
1472 * \copydetails gmx_simd4_and_fb
1474 # define gmx_simd4_and_b gmx_simd4_and_fb
1475 /*! \brief Logical or for two \ref gmx_simd4_bool_t
1477 * \copydetails gmx_simd4_or_fb
1479 # define gmx_simd4_or_b gmx_simd4_or_fb
1481 /*! \brief Return nonzero if any element in \ref gmx_simd4_bool_t is true, otherwise 0
1483 * \copydetails gmx_simd4_anytrue_fb
1485 # define gmx_simd4_anytrue_b gmx_simd4_anytrue_fb
1487 /*! \brief Selects from 2nd real SIMD4 arg where boolean is true, otherwise 1st arg
1489 * \copydetails gmx_simd4_blendzero_f
1491 # define gmx_simd4_blendzero_r gmx_simd4_blendzero_f
1493 /*! \brief Selects from 2nd real SIMD4 arg where boolean is false, otherwise 1st arg
1495 * \copydetails gmx_simd4_blendnotzero_f
1497 # define gmx_simd4_blendnotzero_r gmx_simd4_blendnotzero_f
1499 /*! \brief Selects from 2nd real SIMD4 arg where boolean is true, otherwise 1st arg
1501 * \copydetails gmx_simd4_blendv_f
1503 # define gmx_simd4_blendv_r gmx_simd4_blendv_f
1505 /*! \brief Return sum of all elements in SIMD4 floating-point variable.
1507 * \copydetails gmx_simd4_reduce_f
1509 # define gmx_simd4_reduce_r gmx_simd4_reduce_f
1511 /*! \brief Align real memory for SIMD4 usage.
1513 * \copydetails gmx_simd4_align_f
1515 # define gmx_simd4_align_r gmx_simd4_align_f
1519 /*! \name SIMD predefined macros to describe high-level capabilities
1523 # if (defined GMX_SIMD_HAVE_FLOAT) || (defined DOXYGEN)
1524 /*! \brief Defined if gmx_simd_real_t is available.
1526 * if GMX_DOUBLE is defined, this will be aliased to
1527 * \ref GMX_SIMD_HAVE_DOUBLE, otherwise GMX_SIMD_HAVE_FLOAT.
1529 # define GMX_SIMD_HAVE_REAL
1530 /*! \brief Width of gmx_simd_real_t.
1532 * if GMX_DOUBLE is defined, this will be aliased to
1533 * \ref GMX_SIMD_DOUBLE_WIDTH, otherwise GMX_SIMD_FLOAT_WIDTH.
1535 # define GMX_SIMD_REAL_WIDTH GMX_SIMD_FLOAT_WIDTH
1537 # if (defined GMX_SIMD_HAVE_FINT32) || (defined DOXYGEN)
1538 /*! \brief Defined if gmx_simd_int32_t is available.
1540 * if GMX_DOUBLE is defined, this will be aliased to
1541 * \ref GMX_SIMD_HAVE_DINT32, otherwise GMX_SIMD_HAVE_FINT32.
1543 # define GMX_SIMD_HAVE_INT32
1544 /*! \brief Width of gmx_simd_int32_t.
1546 * if GMX_DOUBLE is defined, this will be aliased to
1547 * \ref GMX_SIMD_DINT32_WIDTH, otherwise GMX_SIMD_FINT32_WIDTH.
1549 # define GMX_SIMD_INT32_WIDTH GMX_SIMD_FINT32_WIDTH
1551 # if (defined GMX_SIMD_HAVE_FINT32_EXTRACT) || (defined DOXYGEN)
1552 /*! \brief Defined if gmx_simd_extract_i() is available.
1554 * if GMX_DOUBLE is defined, this will be aliased to
1555 * \ref GMX_SIMD_HAVE_DINT32_EXTRACT, otherwise GMX_SIMD_HAVE_FINT32_EXTRACT.
1557 # define GMX_SIMD_HAVE_INT32_EXTRACT
1559 # if (defined GMX_SIMD_HAVE_FINT32_LOGICAL) || (defined DOXYGEN)
1560 /*! \brief Defined if logical ops are supported on gmx_simd_int32_t.
1562 * if GMX_DOUBLE is defined, this will be aliased to
1563 * \ref GMX_SIMD_HAVE_DINT32_LOGICAL, otherwise GMX_SIMD_HAVE_FINT32_LOGICAL.
1565 # define GMX_SIMD_HAVE_INT32_LOGICAL
1567 # if (defined GMX_SIMD_HAVE_FINT32_ARITHMETICS) || (defined DOXYGEN)
1568 /*! \brief Defined if arithmetic ops are supported on gmx_simd_int32_t.
1570 * if GMX_DOUBLE is defined, this will be aliased to
1571 * \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS, otherwise GMX_SIMD_HAVE_FINT32_ARITHMETICS.
1573 # define GMX_SIMD_HAVE_INT32_ARITHMETICS
1575 # if (defined GMX_SIMD4_HAVE_FLOAT) || (defined DOXYGEN)
1576 /*! \brief Defined if gmx_simd4_real_t is available.
1578 * if GMX_DOUBLE is defined, this will be aliased to
1579 * \ref GMX_SIMD4_HAVE_DOUBLE, otherwise GMX_SIMD4_HAVE_FLOAT.
1581 # define GMX_SIMD4_HAVE_REAL
1586 #endif /* GMX_DOUBLE */
1591 #endif /* GMX_SIMD_SIMD_H */