2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * \defgroup module_simd SIMD intrinsics interface (simd)
38 * \ingroup group_utilitymodules
40 * \brief Provides an architecture-independent way of doing SIMD coding.
42 * Overview of the SIMD implementation is provided in \ref page_simd.
43 * The details are documented in simd.h and the reference implementation
46 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
49 #ifndef GMX_SIMD_SIMD_H
50 #define GMX_SIMD_SIMD_H
52 /*! \libinternal \file
54 * \brief Definitions, capabilities, and wrappers for SIMD module.
56 * The macros in this file are intended to be used for writing
57 * architecture-independent SIMD intrinsics code.
58 * To support a new architecture, adding a new sub-include with macros here
59 * should be (nearly) all that is needed.
61 * The defines in this top-level file will set default Gromacs real precision
62 * operations to either single or double precision based on whether
63 * GMX_DOUBLE is defined. The actual implementation - including e.g.
64 * conversion operations specifically between single and double - is documented
65 * in impl_reference.h.
67 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
70 * \ingroup module_simd
78 #include "gromacs/legacyheaders/types/simple.h"
80 /* Forward declarations so memory allocation can be used in implementations */
81 static gmx_inline float * gmx_simd_align_f(float *p);
82 static gmx_inline double * gmx_simd_align_d(double *p);
83 static gmx_inline int * gmx_simd_align_fi(int *p);
84 static gmx_inline int * gmx_simd_align_di(int *p);
85 static gmx_inline float * gmx_simd4_align_f(float *p);
86 static gmx_inline double * gmx_simd4_align_d(double *p);
89 /*! \addtogroup module_simd */
92 /*! \name SIMD predefined macros to describe high-level capabilities
94 * These macros are used to describe the features available in default
95 * Gromacs real precision. They are set from the lower-level implementation
96 * files that have macros describing single and double precision individually,
97 * as well as the implementation details.
102 * GMX_SIMD indicates that some sort of SIMD support is present in software.
104 * It is disabled if no architecture, neither reference SIMD, has been selected.
109 /* Intel MIC is a bit special since it is a co-processor. This means the rest
110 * of GROMACS (which runs on the CPU) should use a default SIMD set like AVX,
111 * while the part running on the coprocessor defines __MIC__. All functions in
112 * this SIMD module are static, so it will work perfectly fine to include this
113 * file with different SIMD definitions for different files.
116 # include "gromacs/simd/impl_intel_mic/impl_intel_mic.h"
117 #elif defined GMX_SIMD_X86_AVX2_256
118 # include "gromacs/simd/impl_x86_avx2_256/impl_x86_avx2_256.h"
119 #elif defined GMX_SIMD_X86_AVX_256
120 # include "gromacs/simd/impl_x86_avx_256/impl_x86_avx_256.h"
121 #elif defined GMX_SIMD_X86_AVX_128_FMA
122 # include "gromacs/simd/impl_x86_avx_128_fma/impl_x86_avx_128_fma.h"
123 #elif defined GMX_SIMD_X86_SSE4_1
124 # include "gromacs/simd/impl_x86_sse4_1/impl_x86_sse4_1.h"
125 #elif defined GMX_SIMD_X86_SSE2
126 # include "gromacs/simd/impl_x86_sse2/impl_x86_sse2.h"
127 #elif defined GMX_SIMD_IBM_QPX
128 # include "gromacs/simd/impl_ibm_qpx/impl_ibm_qpx.h"
129 #elif defined GMX_SIMD_SPARC64_HPC_ACE
130 # include "gromacs/simd/impl_sparc64_hpc_ace/impl_sparc64_hpc_ace.h"
131 #elif (defined GMX_SIMD_REFERENCE) || (defined DOXYGEN)
132 /* Plain C SIMD reference implementation, also serves as documentation.
133 * For now this code path will also be taken for Sparc64_HPC_ACE since we have
134 * not yet added the verlet kernel extensions there. The group kernels do not
135 * depend on this file, so they will still be accelerated with SIMD.
137 # include "gromacs/simd/impl_reference/impl_reference.h"
139 /* Turn off the GMX_SIMD flag if we do not even have reference support */
144 * SIMD4 width is always 4, but use this for clarity in definitions.
146 * It improves code readability to allocate e.g. 2*GMX_SIMD4_WIDTH instead of 8.
148 #define GMX_SIMD4_WIDTH 4
152 /*! \name SIMD memory alignment operations
157 * Align a float pointer for usage with SIMD instructions.
159 * You should typically \a not call this function directly (unless you explicitly
160 * want single precision even when GMX_DOUBLE is set), but use the
161 * \ref gmx_simd_align_r macro to align memory in default Gromacs real precision.
163 * \param p Pointer to memory, allocate at least \ref GMX_SIMD_FLOAT_WIDTH extra elements.
165 * \return Aligned pointer (>=p) suitable for loading/storing float fp SIMD.
166 * If \ref GMX_SIMD_HAVE_FLOAT is not set, p will be returned unchanged.
168 * Start by allocating an extra \ref GMX_SIMD_FLOAT_WIDTH float elements of memory,
169 * and then call this function. The returned pointer will be greater or equal
170 * to the one you provided, and point to an address inside your provided memory
171 * that is aligned to the SIMD width.
173 static gmx_inline float *
174 gmx_simd_align_f(float *p)
176 # ifdef GMX_SIMD_HAVE_FLOAT
177 return (float *)(((size_t)((p)+GMX_SIMD_FLOAT_WIDTH-1)) & (~((size_t)(GMX_SIMD_FLOAT_WIDTH*sizeof(float)-1))));
184 * Align a double pointer for usage with SIMD instructions.
186 * You should typically \a not call this function directly (unless you explicitly
187 * want double precision even when GMX_DOUBLE is not set), but use the
188 * \ref gmx_simd_align_r macro to align memory in default Gromacs real precision.
190 * \param p Pointer to memory, allocate at least \ref GMX_SIMD_DOUBLE_WIDTH extra elements.
192 * \return Aligned pointer (>=p) suitable for loading/storing double fp SIMD.
193 * If \ref GMX_SIMD_HAVE_DOUBLE is not set, p will be returned unchanged.
195 * Start by allocating an extra \ref GMX_SIMD_DOUBLE_WIDTH double elements of memory,
196 * and then call this function. The returned pointer will be greater or equal
197 * to the one you provided, and point to an address inside your provided memory
198 * that is aligned to the SIMD width.
200 static gmx_inline double *
201 gmx_simd_align_d(double *p)
203 # ifdef GMX_SIMD_HAVE_DOUBLE
204 return (double *)(((size_t)((p)+GMX_SIMD_DOUBLE_WIDTH-1)) & (~((size_t)(GMX_SIMD_DOUBLE_WIDTH*sizeof(double)-1))));
211 * Align a (float) integer pointer for usage with SIMD instructions.
213 * You should typically \a not call this function directly (unless you explicitly
214 * want integers corresponding to single precision even when GMX_DOUBLE is
215 * set), but use the \ref gmx_simd_align_i macro to align integer memory
216 * corresponding to Gromacs default floating-point precision.
218 * \param p Pointer to memory, allocate at least \ref GMX_SIMD_FINT32_WIDTH extra elements.
220 * \return Aligned pointer (>=p) suitable for loading/storing float-integer SIMD.
221 * If \ref GMX_SIMD_HAVE_FINT32 is not set, p will be returned unchanged.
223 * This routine provides aligned memory for usage with \ref gmx_simd_fint32_t. You
224 * should have allocated an extra \ref GMX_SIMD_FINT32_WIDTH * sizeof(int) bytes. The
225 * reason why we need to separate float-integer vs. double-integer is that the
226 * width of registers after conversions from the floating-point types might not
227 * be identical, or even supported, in both cases.
229 static gmx_inline int *
230 gmx_simd_align_fi(int *p)
232 # ifdef GMX_SIMD_HAVE_FINT32
233 return (int *)(((size_t)((p)+GMX_SIMD_FINT32_WIDTH-1)) & (~((size_t)(GMX_SIMD_FINT32_WIDTH*sizeof(int)-1))));
240 * Align a (double) integer pointer for usage with SIMD instructions.
242 * You should typically \a not call this function directly (unless you explicitly
243 * want integers corresponding to doublele precision even when GMX_DOUBLE is
244 * not set), but use the \ref gmx_simd_align_i macro to align integer memory
245 * corresponding to Gromacs default floating-point precision.
247 * \param p Pointer to memory, allocate at least \ref GMX_SIMD_DINT32_WIDTH extra elements.
249 * \return Aligned pointer (>=p) suitable for loading/storing double-integer SIMD.
250 * If \ref GMX_SIMD_HAVE_DINT32 is not set, p will be returned unchanged.
252 * This routine provides aligned memory for usage with \ref gmx_simd_dint32_t. You
253 * should have allocated an extra \ref GMX_SIMD_DINT32_WIDTH*sizeof(int) bytes. The
254 * reason why we need to separate float-integer vs. double-integer is that the
255 * width of registers after conversions from the floating-point types might not
256 * be identical, or even supported, in both cases.
258 static gmx_inline int *
259 gmx_simd_align_di(int *p)
261 # ifdef GMX_SIMD_HAVE_DINT32
262 return (int *)(((size_t)((p)+GMX_SIMD_DINT32_WIDTH-1)) & (~((size_t)(GMX_SIMD_DINT32_WIDTH*sizeof(int)-1))));
269 * Align a float pointer for usage with SIMD4 instructions.
271 * You should typically \a not call this function directly (unless you explicitly
272 * want single precision even when GMX_DOUBLE is set), but use the
273 * \ref gmx_simd4_align_r macro to align memory in default Gromacs real precision.
275 * \param p Pointer to memory, allocate at least \ref GMX_SIMD4_WIDTH extra elements.
277 * \return Aligned pointer (>=p) suitable for loading/storing float SIMD.
278 * If \ref GMX_SIMD4_HAVE_FLOAT is not set, p will be returned unchanged.
280 * This routine provides aligned memory for usage with \ref gmx_simd4_float_t.
281 * should have allocated an extra \ref GMX_SIMD4_WIDTH * sizeof(float) bytes.
283 static gmx_inline float *
284 gmx_simd4_align_f(float *p)
286 # ifdef GMX_SIMD4_HAVE_FLOAT
287 return (float *)(((size_t)((p)+GMX_SIMD4_WIDTH-1)) & (~((size_t)(GMX_SIMD4_WIDTH*sizeof(float)-1))));
294 * Align a double pointer for usage with SIMD4 instructions.
296 * You should typically \a not call this function directly (unless you explicitly
297 * want double precision even when GMX_DOUBLE is not set), but use the
298 * \ref gmx_simd4_align_r macro to align memory in default Gromacs real precision.
300 * \param p Pointer to memory, allocate at least \ref GMX_SIMD4_WIDTH extra elements.
302 * \return Aligned pointer (>=p) suitable for loading/storing float SIMD.
303 * If \ref GMX_SIMD4_HAVE_DOUBLE is not set, p will be returned unchanged.
305 * This routine provides aligned memory for usage with \ref gmx_simd4_double_t.
306 * should have allocated an extra \ref GMX_SIMD4_WIDTH * sizeof(double) bytes.
308 static gmx_inline double *
309 gmx_simd4_align_d(double *p)
311 # ifdef GMX_SIMD4_HAVE_DOUBLE
312 return (double *)(((size_t)((p)+GMX_SIMD4_WIDTH-1)) & (~((size_t)(GMX_SIMD4_WIDTH*sizeof(double)-1))));
321 /* Define Gromacs "real" precision macros depending on Gromacs config. Note
322 * that conversions float-to-double and v.v. are not included here since they
323 * are not precision-dependent - find them in the implementation files.
326 /* Double floating-point. The documentation is in the float part below */
327 # define gmx_simd_real_t gmx_simd_double_t
328 # define gmx_simd_load_r gmx_simd_load_d
329 # define gmx_simd_load1_r gmx_simd_load1_d
330 # define gmx_simd_set1_r gmx_simd_set1_d
331 # define gmx_simd_store_r gmx_simd_store_d
332 # define gmx_simd_loadu_r gmx_simd_loadu_d
333 # define gmx_simd_storeu_r gmx_simd_storeu_d
334 # define gmx_simd_setzero_r gmx_simd_setzero_d
335 # define gmx_simd_add_r gmx_simd_add_d
336 # define gmx_simd_sub_r gmx_simd_sub_d
337 # define gmx_simd_mul_r gmx_simd_mul_d
338 # define gmx_simd_fmadd_r gmx_simd_fmadd_d
339 # define gmx_simd_fmsub_r gmx_simd_fmsub_d
340 # define gmx_simd_fnmadd_r gmx_simd_fnmadd_d
341 # define gmx_simd_fnmsub_r gmx_simd_fnmsub_d
342 # define gmx_simd_and_r gmx_simd_and_d
343 # define gmx_simd_andnot_r gmx_simd_andnot_d
344 # define gmx_simd_or_r gmx_simd_or_d
345 # define gmx_simd_xor_r gmx_simd_xor_d
346 # define gmx_simd_rsqrt_r gmx_simd_rsqrt_d
347 # define gmx_simd_rcp_r gmx_simd_rcp_d
348 # define gmx_simd_fabs_r gmx_simd_fabs_d
349 # define gmx_simd_fneg_r gmx_simd_fneg_d
350 # define gmx_simd_max_r gmx_simd_max_d
351 # define gmx_simd_min_r gmx_simd_min_d
352 # define gmx_simd_round_r gmx_simd_round_d
353 # define gmx_simd_trunc_r gmx_simd_trunc_d
354 # define gmx_simd_fraction_r gmx_simd_fraction_d
355 # define gmx_simd_get_exponent_r gmx_simd_get_exponent_d
356 # define gmx_simd_get_mantissa_r gmx_simd_get_mantissa_d
357 # define gmx_simd_set_exponent_r gmx_simd_set_exponent_d
358 /* Double integer and conversions */
359 # define gmx_simd_int32_t gmx_simd_dint32_t
360 # define gmx_simd_load_i gmx_simd_load_di
361 # define gmx_simd_set1_i gmx_simd_set1_di
362 # define gmx_simd_store_i gmx_simd_store_di
363 # define gmx_simd_loadu_i gmx_simd_loadu_di
364 # define gmx_simd_storeu_i gmx_simd_storeu_di
365 # define gmx_simd_setzero_i gmx_simd_setzero_di
366 # define gmx_simd_cvt_r2i gmx_simd_cvt_d2i
367 # define gmx_simd_cvtt_r2i gmx_simd_cvtt_d2i
368 # define gmx_simd_cvt_i2r gmx_simd_cvt_i2d
369 # define gmx_simd_extract_i gmx_simd_extract_di
370 # define gmx_simd_slli_i gmx_simd_slli_di
371 # define gmx_simd_srli_i gmx_simd_srli_di
372 # define gmx_simd_and_i gmx_simd_and_di
373 # define gmx_simd_andnot_i gmx_simd_andnot_di
374 # define gmx_simd_or_i gmx_simd_or_di
375 # define gmx_simd_xor_i gmx_simd_xor_di
376 # define gmx_simd_add_i gmx_simd_add_di
377 # define gmx_simd_sub_i gmx_simd_sub_di
378 # define gmx_simd_mul_i gmx_simd_mul_di
379 /* Double booleans and selection */
380 # define gmx_simd_bool_t gmx_simd_dbool_t
381 # define gmx_simd_cmpeq_r gmx_simd_cmpeq_d
382 # define gmx_simd_cmplt_r gmx_simd_cmplt_d
383 # define gmx_simd_cmple_r gmx_simd_cmple_d
384 # define gmx_simd_and_b gmx_simd_and_db
385 # define gmx_simd_or_b gmx_simd_or_db
386 # define gmx_simd_anytrue_b gmx_simd_anytrue_db
387 # define gmx_simd_blendzero_r gmx_simd_blendzero_d
388 # define gmx_simd_blendnotzero_r gmx_simd_blendnotzero_d
389 # define gmx_simd_blendv_r gmx_simd_blendv_d
390 # define gmx_simd_reduce_r gmx_simd_reduce_d
391 # define gmx_simd_ibool_t gmx_simd_dibool_t
392 # define gmx_simd_cmpeq_i gmx_simd_cmpeq_di
393 # define gmx_simd_cmplt_i gmx_simd_cmplt_di
394 # define gmx_simd_and_ib gmx_simd_and_dib
395 # define gmx_simd_or_ib gmx_simd_or_dib
396 # define gmx_simd_anytrue_ib gmx_simd_anytrue_dib
397 # define gmx_simd_blendzero_i gmx_simd_blendzero_di
398 # define gmx_simd_blendnotzero_i gmx_simd_blendnotzero_di
399 # define gmx_simd_blendv_i gmx_simd_blendv_di
400 /* Conversions between integer and double floating-point booleans */
401 # define gmx_simd_cvt_b2ib gmx_simd_cvt_db2dib
402 # define gmx_simd_cvt_ib2b gmx_simd_cvt_dib2db
404 /* SIMD4 double fp - we only support a subset of SIMD instructions for SIMD4 */
405 # define gmx_simd4_real_t gmx_simd4_double_t
406 # define gmx_simd4_load_r gmx_simd4_load_d
407 # define gmx_simd4_load1_r gmx_simd4_load1_d
408 # define gmx_simd4_set1_r gmx_simd4_set1_d
409 # define gmx_simd4_store_r gmx_simd4_store_d
410 # define gmx_simd4_loadu_r gmx_simd4_loadu_d
411 # define gmx_simd4_storeu_r gmx_simd4_storeu_d
412 # define gmx_simd4_setzero_r gmx_simd4_setzero_d
413 # define gmx_simd4_add_r gmx_simd4_add_d
414 # define gmx_simd4_sub_r gmx_simd4_sub_d
415 # define gmx_simd4_mul_r gmx_simd4_mul_d
416 # define gmx_simd4_fmadd_r gmx_simd4_fmadd_d
417 # define gmx_simd4_fmsub_r gmx_simd4_fmsub_d
418 # define gmx_simd4_fnmadd_r gmx_simd4_fnmadd_d
419 # define gmx_simd4_fnmsub_r gmx_simd4_fnmsub_d
420 # define gmx_simd4_and_r gmx_simd4_and_d
421 # define gmx_simd4_andnot_r gmx_simd4_andnot_d
422 # define gmx_simd4_or_r gmx_simd4_or_d
423 # define gmx_simd4_xor_r gmx_simd4_xor_d
424 # define gmx_simd4_rsqrt_r gmx_simd4_rsqrt_d
425 # define gmx_simd4_fabs_r gmx_simd4_fabs_d
426 # define gmx_simd4_fneg_r gmx_simd4_fneg_d
427 # define gmx_simd4_max_r gmx_simd4_max_d
428 # define gmx_simd4_min_r gmx_simd4_min_d
429 # define gmx_simd4_round_r gmx_simd4_round_d
430 # define gmx_simd4_trunc_r gmx_simd4_trunc_d
431 # define gmx_simd4_dotproduct3_r gmx_simd4_dotproduct3_d
432 # define gmx_simd4_bool_t gmx_simd4_dbool_t
433 # define gmx_simd4_cmpeq_r gmx_simd4_cmpeq_d
434 # define gmx_simd4_cmplt_r gmx_simd4_cmplt_d
435 # define gmx_simd4_cmple_r gmx_simd4_cmple_d
436 # define gmx_simd4_and_b gmx_simd4_and_db
437 # define gmx_simd4_or_b gmx_simd4_or_db
438 # define gmx_simd4_anytrue_b gmx_simd4_anytrue_db
439 # define gmx_simd4_blendzero_r gmx_simd4_blendzero_d
440 # define gmx_simd4_blendnotzero_r gmx_simd4_blendnotzero_d
441 # define gmx_simd4_blendv_r gmx_simd4_blendv_d
442 # define gmx_simd4_reduce_r gmx_simd4_reduce_d
444 /* Memory allocation */
445 # define gmx_simd_align_r gmx_simd_align_d
446 # define gmx_simd_align_i gmx_simd_align_di
447 # define gmx_simd4_align_r gmx_simd4_align_d
449 # ifdef GMX_SIMD_HAVE_DOUBLE
450 # define GMX_SIMD_HAVE_REAL
451 # define GMX_SIMD_REAL_WIDTH GMX_SIMD_DOUBLE_WIDTH
453 # ifdef GMX_SIMD_HAVE_DINT32
454 # define GMX_SIMD_HAVE_INT32
455 # define GMX_SIMD_INT32_WIDTH GMX_SIMD_DINT32_WIDTH
457 # ifdef GMX_SIMD_HAVE_DINT32_EXTRACT
458 # define GMX_SIMD_HAVE_INT32_EXTRACT
460 # ifdef GMX_SIMD_HAVE_DINT32_LOGICAL
461 # define GMX_SIMD_HAVE_INT32_LOGICAL
463 # ifdef GMX_SIMD_HAVE_DINT32_ARITHMETICS
464 # define GMX_SIMD_HAVE_INT32_ARITHMETICS
466 # ifdef GMX_SIMD4_HAVE_DOUBLE
467 # define GMX_SIMD4_HAVE_REAL
470 #else /* GMX_DOUBLE */
472 /*! \name SIMD data types
474 * The actual storage of these types is implementation dependent. The
475 * documentation is generated from the reference implementation, but for
476 * normal usage this will likely not be what you are using.
479 /*! \brief Real precision floating-point SIMD datatype.
481 * This type is only available if \ref GMX_SIMD_HAVE_REAL is defined.
483 * If GMX_DOUBLE is defined, this will be set to \ref gmx_simd_double_t
484 * internally, otherwise \ref gmx_simd_float_t.
486 # define gmx_simd_real_t gmx_simd_float_t
488 /*! \brief 32-bit integer SIMD type.
490 * This type is only available if \ref GMX_SIMD_HAVE_INT32 is defined.
492 * If GMX_DOUBLE is defined, this will be set to \ref gmx_simd_dint32_t
493 * internally, otherwise \ref gmx_simd_fint32_t. This might seem a strange
494 * implementation detail, but it is because some SIMD implementations use
495 * different types/widths of integers registers when converting from
496 * double vs. single precision floating point. As long as you just use
497 * this type you will not have to worry about precision.
499 # define gmx_simd_int32_t gmx_simd_fint32_t
501 /*! \brief Boolean SIMD type for usage with \ref gmx_simd_real_t.
503 * This type is only available if \ref GMX_SIMD_HAVE_REAL is defined.
505 * If GMX_DOUBLE is defined, this will be set to \ref gmx_simd_dbool_t
506 * internally, otherwise \ref gmx_simd_fbool_t. This is necessary since some
507 * SIMD implementations use bitpatterns for marking truth, so single-
508 * vs. double precision booleans are not necessarily exchangable.
509 * As long as you just use this type you will not have to worry about precision.
511 * See \ref gmx_simd_ibool_t for an explanation of real vs. integer booleans.
513 # define gmx_simd_bool_t gmx_simd_fbool_t
515 /*! \brief Boolean SIMD type for usage with \ref gmx_simd_int32_t.
517 * This type is only available if \ref GMX_SIMD_HAVE_INT32 is defined.
519 * If GMX_DOUBLE is defined, this will be set to \ref gmx_simd_dibool_t
520 * internally, otherwise \ref gmx_simd_fibool_t. This is necessary since some
521 * SIMD implementations use bitpatterns for marking truth, so single-
522 * vs. double precision booleans are not necessarily exchangable, and while
523 * a double-precision boolean might be represented with a 64-bit mask, the
524 * corresponding integer might only use a 32-bit mask.
526 * We provide conversion routines for these cases, so the only thing you need to
527 * keep in mind is to use \ref gmx_simd_bool_t when working with
528 * \ref gmx_simd_real_t while you pick \ref gmx_simd_ibool_t when working with
529 * \ref gmx_simd_int32_t.
531 * To convert between them, use \ref gmx_simd_cvt_b2ib and \ref gmx_simd_cvt_ib2b.
533 # define gmx_simd_ibool_t gmx_simd_fibool_t
537 * \name SIMD load/store operations on gmx_simd_real_t
539 * \note Unaligned load/stores are only available when
540 * \ref GMX_SIMD_HAVE_LOADU and \ref GMX_SIMD_HAVE_STOREU are set, respectively.
544 /*! \brief Load \ref GMX_SIMD_REAL_WIDTH values from aligned memory to \ref gmx_simd_real_t
546 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_load_d,
547 * otherwise \ref gmx_simd_load_f.
549 * \copydetails gmx_simd_load_f
551 # define gmx_simd_load_r gmx_simd_load_f
553 /*! \brief Set all elements in \ref gmx_simd_real_t from single value in memory.
555 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_load1_d,
556 * otherwise \ref gmx_simd_load1_f.
558 * \copydetails gmx_simd_load1_f
560 # define gmx_simd_load1_r gmx_simd_load1_f
562 /*! \brief Set all elements in \ref gmx_simd_real_t from a scalar.
564 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_set1_d,
565 * otherwise \ref gmx_simd_set1_f.
567 * \copydetails gmx_simd_set1_f
569 # define gmx_simd_set1_r gmx_simd_set1_f
571 /*! \brief Store \ref GMX_SIMD_REAL_WIDTH values from \ref gmx_simd_real_t to aligned memory.
573 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_store_d,
574 * otherwise \ref gmx_simd_store_f.
576 * \copydetails gmx_simd_store_f
578 # define gmx_simd_store_r gmx_simd_store_f
580 /*! \brief Load \ref GMX_SIMD_REAL_WIDTH values from unaligned memory to \ref gmx_simd_real_t.
582 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_loadu_d,
583 * otherwise \ref gmx_simd_loadu_f.
585 * \copydetails gmx_simd_loadu_f
587 # define gmx_simd_loadu_r gmx_simd_loadu_f
589 /*! \brief Store \ref GMX_SIMD_REAL_WIDTH values from \ref gmx_simd_real_t to unaligned memory.
591 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_storeu_d,
592 * otherwise \ref gmx_simd_storeu_f.
594 * \copydetails gmx_simd_storeu_f
596 # define gmx_simd_storeu_r gmx_simd_storeu_f
598 /*! \brief Set all elements in \ref gmx_simd_real_t to 0.0.
600 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_setzero_d,
601 * otherwise \ref gmx_simd_setzero_f.
603 * \copydetails gmx_simd_setzero_f
605 # define gmx_simd_setzero_r gmx_simd_setzero_f
608 * \name SIMD load/store operations on gmx_simd_int32_t
610 * \note Unaligned load/stores are only available when
611 * \ref GMX_SIMD_HAVE_LOADU and \ref GMX_SIMD_HAVE_STOREU are set, respectively.
615 /*! \brief Load \ref GMX_SIMD_INT32_WIDTH values from aligned memory to \ref gmx_simd_int32_t .
617 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_load_di ,
618 * otherwise \ref gmx_simd_load_fi .
620 * \copydetails gmx_simd_load_fi
622 # define gmx_simd_load_i gmx_simd_load_fi
624 /*! \brief Set all elements in \ref gmx_simd_int32_t from a single integer.
626 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_set1_di ,
627 * otherwise \ref gmx_simd_set1_fi .
629 * \copydetails gmx_simd_set1_fi
631 # define gmx_simd_set1_i gmx_simd_set1_fi
633 /*! \brief Store \ref GMX_SIMD_REAL_WIDTH values from \ref gmx_simd_int32_t to aligned memory.
635 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_store_di ,
636 * otherwise \ref gmx_simd_store_fi .
638 * \copydetails gmx_simd_store_fi
640 # define gmx_simd_store_i gmx_simd_store_fi
642 /*! \brief Load \ref GMX_SIMD_REAL_WIDTH values from unaligned memory to \ref gmx_simd_int32_t.
644 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_loadu_di ,
645 * otherwise \ref gmx_simd_loadu_fi .
647 * \copydetails gmx_simd_loadu_fi
649 # define gmx_simd_loadu_i gmx_simd_loadu_fi
651 /*! \brief Store \ref GMX_SIMD_REAL_WIDTH values from \ref gmx_simd_int32_t to unaligned memory.
653 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_storeu_di ,
654 * otherwise \ref gmx_simd_storeu_fi .
656 * \copydetails gmx_simd_storeu_fi
658 # define gmx_simd_storeu_i gmx_simd_storeu_fi
660 /*! \brief Extract single integer from \ref gmx_simd_int32_t element.
662 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_extract_di ,
663 * otherwise \ref gmx_simd_extract_fi .
665 * \copydetails gmx_simd_extract_fi
667 # define gmx_simd_extract_i gmx_simd_extract_fi
669 /*! \brief Set all elements in \ref gmx_simd_int32_t to 0.
671 * If GMX_DOUBLE is defined, it will be aliased to \ref gmx_simd_setzero_di ,
672 * otherwise \ref gmx_simd_setzero_fi .
674 * \copydetails gmx_simd_setzero_fi
676 # define gmx_simd_setzero_i gmx_simd_setzero_fi
680 * \name SIMD floating-point logical operations on gmx_simd_real_t
682 * These instructions are available if \ref GMX_SIMD_HAVE_LOGICAL is defined.
686 /*! \brief Bitwise \a and on two \ref gmx_simd_real_t.
688 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_and_d,
689 * otherwise \ref gmx_simd_and_f.
691 * \copydetails gmx_simd_and_f
693 # define gmx_simd_and_r gmx_simd_and_f
695 /*! \brief Bitwise \a and-not on two \ref gmx_simd_real_t; 1st arg is complemented.
697 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_andnot_d,
698 * otherwise \ref gmx_simd_andnot_f.
700 * \copydetails gmx_simd_andnot_f
702 # define gmx_simd_andnot_r gmx_simd_andnot_f
704 /*! \brief Bitwise \a or on two \ref gmx_simd_real_t.
706 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_or_d,
707 * otherwise \ref gmx_simd_or_f.
709 * \copydetails gmx_simd_or_f
711 # define gmx_simd_or_r gmx_simd_or_f
713 /*! \brief Bitwise \a exclusive-or on two \ref gmx_simd_real_t.
715 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_xor_d,
716 * otherwise \ref gmx_simd_xor_f.
718 * \copydetails gmx_simd_xor_f
720 # define gmx_simd_xor_r gmx_simd_xor_f
723 * \name SIMD floating-point arithmetic operations on gmx_simd_real_t
727 /*! \brief SIMD a+b for two \ref gmx_simd_real_t.
729 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_add_d,
730 * otherwise \ref gmx_simd_add_f.
732 * \copydetails gmx_simd_add_f
734 # define gmx_simd_add_r gmx_simd_add_f
736 /*! \brief SIMD a-b for two \ref gmx_simd_real_t.
738 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_sub_d,
739 * otherwise \ref gmx_simd_sub_f.
741 * \copydetails gmx_simd_sub_f
743 # define gmx_simd_sub_r gmx_simd_sub_f
745 /*! \brief SIMD a*b for two \ref gmx_simd_real_t.
747 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_mul_d,
748 * otherwise \ref gmx_simd_mul_f.
750 * \copydetails gmx_simd_mul_f
752 # define gmx_simd_mul_r gmx_simd_mul_f
754 /*! \brief SIMD a*b+c for three \ref gmx_simd_real_t.
756 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fmadd_d,
757 * otherwise \ref gmx_simd_fmadd_f.
759 * \copydetails gmx_simd_fmadd_f
761 # define gmx_simd_fmadd_r gmx_simd_fmadd_f
763 /*! \brief SIMD a*b-c for three \ref gmx_simd_real_t.
765 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fmsub_d,
766 * otherwise \ref gmx_simd_fmsub_f.
768 * \copydetails gmx_simd_fmsub_f
770 # define gmx_simd_fmsub_r gmx_simd_fmsub_f
772 /*! \brief SIMD -a*b+c for three \ref gmx_simd_real_t.
774 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fnmadd_d,
775 * otherwise \ref gmx_simd_fnmadd_f.
777 * \copydetails gmx_simd_fnmadd_f
779 # define gmx_simd_fnmadd_r gmx_simd_fnmadd_f
781 /*! \brief SIMD -a*b-c for three \ref gmx_simd_real_t.
783 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fnmsub_d,
784 * otherwise \ref gmx_simd_fnmsub_f.
786 * \copydetails gmx_simd_fnmsub_f
788 # define gmx_simd_fnmsub_r gmx_simd_fnmsub_f
790 /*! \brief SIMD table lookup for 1/sqrt(x) approximation.
792 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_rsqrt_d,
793 * otherwise \ref gmx_simd_rsqrt_f.
795 * \copydetails gmx_simd_rsqrt_f
797 # define gmx_simd_rsqrt_r gmx_simd_rsqrt_f
799 /*! \brief SIMD table lookup for 1/x approximation.
801 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_rcp_d,
802 * otherwise \ref gmx_simd_rcp_f.
804 * \copydetails gmx_simd_rcp_f
806 # define gmx_simd_rcp_r gmx_simd_rcp_f
808 /*! \brief SIMD fabs(x) for \ref gmx_simd_real_t.
810 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fabs_d,
811 * otherwise \ref gmx_simd_fabs_f.
813 * \copydetails gmx_simd_fabs_f
815 # define gmx_simd_fabs_r gmx_simd_fabs_f
817 /*! \brief SIMD -x for \ref gmx_simd_real_t.
819 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fneg_d,
820 * otherwise \ref gmx_simd_fneg_f.
822 * \copydetails gmx_simd_fneg_f
824 # define gmx_simd_fneg_r gmx_simd_fneg_f
826 /*! \brief SIMD max(a,b) for each element in \ref gmx_simd_real_t.
828 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_max_d,
829 * otherwise \ref gmx_simd_max_f.
831 * \copydetails gmx_simd_max_f
833 # define gmx_simd_max_r gmx_simd_max_f
835 /*! \brief SIMD min(a,b) for each element in \ref gmx_simd_real_t.
837 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_min_d,
838 * otherwise \ref gmx_simd_min_f.
840 * \copydetails gmx_simd_min_f
842 # define gmx_simd_min_r gmx_simd_min_f
844 /*! \brief Round \ref gmx_simd_real_t to nearest int, return \ref gmx_simd_real_t.
846 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_round_d,
847 * otherwise \ref gmx_simd_round_f.
849 * \copydetails gmx_simd_round_f
851 # define gmx_simd_round_r gmx_simd_round_f
853 /*! \brief Truncate \ref gmx_simd_real_t towards 0, return \ref gmx_simd_real_t.
855 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_trunc_d,
856 * otherwise \ref gmx_simd_trunc_f.
858 * \copydetails gmx_simd_trunc_f
860 # define gmx_simd_trunc_r gmx_simd_trunc_f
862 /*! \brief SIMD Fraction, i.e. x-trunc(x) for \ref gmx_simd_real_t.
864 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fraction_d,
865 * otherwise \ref gmx_simd_fraction_f.
867 * \copydetails gmx_simd_fraction_f
869 # define gmx_simd_fraction_r gmx_simd_fraction_f
871 /*! \brief Return the FP exponent of a SIMD \ref gmx_simd_real_t as a \ref gmx_simd_real_t.
873 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_get_exponent_d,
874 * otherwise \ref gmx_simd_get_exponent_f.
876 * \copydetails gmx_simd_exponent_f
878 # define gmx_simd_get_exponent_r gmx_simd_get_exponent_f
880 /*! \brief Return the FP mantissa of a SIMD \ref gmx_simd_real_t as a \ref gmx_simd_real_t.
882 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_get_mantissa_d,
883 * otherwise \ref gmx_simd_get_mantissa_f.
885 * \copydetails gmx_simd_mantissa_f
887 # define gmx_simd_get_mantissa_r gmx_simd_get_mantissa_f
889 /*! \brief Set the exponent of a SIMD \ref gmx_simd_real_t from a \ref gmx_simd_real_t.
891 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_set_exponent_d,
892 * otherwise \ref gmx_simd_set_exponent_f.
894 * \copydetails gmx_simd_set_exponent_f
896 # define gmx_simd_set_exponent_r gmx_simd_set_exponent_f
899 * \name SIMD comparison, boolean, and select operations for gmx_simd_real_t
903 /*! \brief SIMD a==b for \ref gmx_simd_real_t. Returns a \ref gmx_simd_bool_t.
905 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cmpeq_d,
906 * otherwise \ref gmx_simd_cmpeq_f.
908 * \copydetails gmx_simd_cmpeq_f
910 # define gmx_simd_cmpeq_r gmx_simd_cmpeq_f
912 /*! \brief SIMD a<b for \ref gmx_simd_real_t. Returns a \ref gmx_simd_bool_t.
914 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cmplt_d,
915 * otherwise \ref gmx_simd_cmplt_f.
917 * \copydetails gmx_simd_cmplt_f
919 # define gmx_simd_cmplt_r gmx_simd_cmplt_f
921 /*! \brief SIMD a<=b for \ref gmx_simd_real_t. Returns a \ref gmx_simd_bool_t.
923 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cmple_d,
924 * otherwise \ref gmx_simd_cmple_f.
926 * \copydetails gmx_simd_cmple_f
928 # define gmx_simd_cmple_r gmx_simd_cmple_f
930 /*! \brief For each element, the result boolean is true if both arguments are true
932 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_and_db,
933 * otherwise \ref gmx_simd_and_fb.
935 * \copydetails gmx_simd_and_fb
937 # define gmx_simd_and_b gmx_simd_and_fb
939 /*! \brief For each element, the result boolean is true if either argument is true
941 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_or_db,
942 * otherwise \ref gmx_simd_or_fb.
944 * \copydetails gmx_simd_or_fn
946 # define gmx_simd_or_b gmx_simd_or_fb
948 /*! \brief Return nonzero if any element in gmx_simd_bool_t is true, otherwise 0.
950 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_anytrue_db,
951 * otherwise \ref gmx_simd_anytrue_fb.
953 * \copydetails gmx_simd_anytrue_fb
955 # define gmx_simd_anytrue_b gmx_simd_anytrue_fb
957 /*! \brief Selects elements from \ref gmx_simd_real_t where boolean is true, otherwise 0.
959 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendzero_d,
960 * otherwise \ref gmx_simd_blendzero_f.
962 * \copydetails gmx_simd_blendzero_f
964 * \sa gmx_simd_blendzero_i
966 # define gmx_simd_blendzero_r gmx_simd_blendzero_f
968 /*! \brief Selects elements from \ref gmx_simd_real_t where boolean is false, otherwise 0.
970 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendnotzero_d,
971 * otherwise \ref gmx_simd_blendnotzero_f.
973 * \copydetails gmx_simd_blendnotzero_f
975 # define gmx_simd_blendnotzero_r gmx_simd_blendnotzero_f
977 /*! \brief Selects from 2nd real SIMD arg where boolean is true, otherwise 1st arg.
979 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendv_d,
980 * otherwise \ref gmx_simd_blendv_f.
982 * \copydetails gmx_simd_blendv_f
984 # define gmx_simd_blendv_r gmx_simd_blendv_f
986 /*! \brief Return sum of all elements in SIMD floating-point variable.
988 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_reduce_d,
989 * otherwise \ref gmx_simd_reduce_f.
991 * \copydetails gmx_simd_reduce_f
993 # define gmx_simd_reduce_r gmx_simd_reduce_f
996 * \name SIMD integer logical operations on gmx_simd_int32_t
998 * These instructions are available if \ref GMX_SIMD_HAVE_INT32_LOGICAL is defined.
1002 /*! \brief Shift each element in \ref gmx_simd_int32_t left by immediate
1004 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_slli_di,
1005 * otherwise \ref gmx_simd_slli_fi.
1007 * \copydetails gmx_simd_slli_fi
1009 # define gmx_simd_slli_i gmx_simd_slli_fi
1011 /*! \brief Shift each element in \ref gmx_simd_int32_t right by immediate
1013 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_srli_di,
1014 * otherwise \ref gmx_simd_srli_fi.
1016 * \copydetails gmx_simd_srli_fi
1018 # define gmx_simd_srli_i gmx_simd_srli_fi
1020 /*! \brief Bitwise \a and on two \ref gmx_simd_int32_t.
1022 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_and_di,
1023 * otherwise \ref gmx_simd_and_fi.
1025 * \copydetails gmx_simd_and_fi
1027 # define gmx_simd_and_i gmx_simd_and_fi
1029 /*! \brief Bitwise \a and-not on two \ref gmx_simd_int32_t; 1st arg is complemented.
1031 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_andnot_di,
1032 * otherwise \ref gmx_simd_andnot_fi.
1034 * \copydetails gmx_simd_andnot_fi
1036 # define gmx_simd_andnot_i gmx_simd_andnot_fi
1038 /*! \brief Bitwise \a or on two \ref gmx_simd_int32_t.
1040 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_or_di,
1041 * otherwise \ref gmx_simd_or_fi.
1043 * \copydetails gmx_simd_or_fi
1045 # define gmx_simd_or_i gmx_simd_or_fi
1047 /*! \brief Bitwise \a xor on two \ref gmx_simd_int32_t.
1049 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_xor_di,
1050 * otherwise \ref gmx_simd_xor_fi.
1052 * \copydetails gmx_simd_xor_fi
1054 # define gmx_simd_xor_i gmx_simd_xor_fi
1057 * \name SIMD integer arithmetic operations on gmx_simd_int32_t
1059 * These instructions are available if \ref GMX_SIMD_HAVE_INT32_ARITHMETICS is defined.
1063 /*! \brief SIMD a+b for two \ref gmx_simd_int32_t.
1065 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_add_di,
1066 * otherwise \ref gmx_simd_add_fi.
1068 * \copydetails gmx_simd_add_fi
1070 # define gmx_simd_add_i gmx_simd_add_fi
1072 /*! \brief SIMD a-b for two \ref gmx_simd_int32_t.
1074 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_sub_di,
1075 * otherwise \ref gmx_simd_sub_fi.
1077 * \copydetails gmx_simd_sub_fi
1079 # define gmx_simd_sub_i gmx_simd_sub_fi
1081 /*! \brief SIMD a*b for two \ref gmx_simd_int32_t.
1083 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_mul_di,
1084 * otherwise \ref gmx_simd_mul_fi.
1086 * \copydetails gmx_simd_mul_fi
1088 # define gmx_simd_mul_i gmx_simd_mul_fi
1091 * \name SIMD integer comparison, booleans, and selection on gmx_simd_int32_t
1093 * These instructions are available if \ref GMX_SIMD_HAVE_INT32_ARITHMETICS is defined.
1097 /*! \brief Returns boolean describing whether a==b, for \ref gmx_simd_int32_t
1099 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cmpeq_di,
1100 * otherwise \ref gmx_simd_cmpeq_fi.
1102 * \copydetails gmx_simd_cmpeq_fi
1104 # define gmx_simd_cmpeq_i gmx_simd_cmpeq_fi
1106 /*! \brief Returns boolean describing whether a<b, for \ref gmx_simd_int32_t
1108 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cmplt_di,
1109 * otherwise \ref gmx_simd_cmplt_fi.
1111 * \copydetails gmx_simd_cmplt_fi
1113 # define gmx_simd_cmplt_i gmx_simd_cmplt_fi
1115 /*! \brief For each element, the result boolean is true if both arguments are true
1117 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_and_dib,
1118 * otherwise \ref gmx_simd_and_fib.
1120 * \copydetails gmx_simd_and_fib
1122 # define gmx_simd_and_ib gmx_simd_and_fib
1124 /*! \brief For each element, the result boolean is true if either argument is true.
1126 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_or_dib,
1127 * otherwise \ref gmx_simd_or_fib.
1129 * \copydetails gmx_simd_or_fib
1131 # define gmx_simd_or_ib gmx_simd_or_fib
1133 /*! \brief Return nonzero if any element in gmx_simd_ibool_t is true, otherwise 0.
1135 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_anytrue_dib,
1136 * otherwise \ref gmx_simd_anytrue_fib.
1138 * \copydetails gmx_simd_anytrue_fib
1140 # define gmx_simd_anytrue_ib gmx_simd_anytrue_fib
1142 /*! \brief Selects elements from \ref gmx_simd_int32_t where boolean is true, otherwise 0.
1144 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendzero_di,
1145 * otherwise \ref gmx_simd_blendzero_fi.
1147 * \copydetails gmx_simd_blendzero_fi
1149 # define gmx_simd_blendzero_i gmx_simd_blendzero_fi
1151 /*! \brief Selects elements from \ref gmx_simd_int32_t where boolean is false, otherwise 0.
1153 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendnotzero_di,
1154 * otherwise \ref gmx_simd_blendnotzero_fi.
1156 * \copydetails gmx_simd_blendnotzero_fi
1158 # define gmx_simd_blendnotzero_i gmx_simd_blendnotzero_fi
1160 /*! \brief Selects from 2nd int SIMD arg where boolean is true, otherwise 1st arg.
1162 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendv_di,
1163 * otherwise \ref gmx_simd_blendv_fi.
1165 * \copydetails gmx_simd_blendv_fi
1167 # define gmx_simd_blendv_i gmx_simd_blendv_fi
1170 * \name SIMD conversion operations
1172 * These instructions are available when both types involved in the conversion
1173 * are defined, e.g. \ref GMX_SIMD_HAVE_REAL and \ref GMX_SIMD_HAVE_INT32
1174 * for real-to-integer conversion.
1178 /*! \brief Convert gmx_simd_real_t to gmx_simd_int32_t, round to nearest integer.
1180 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cvt_d2i,
1181 * otherwise \ref gmx_simd_cvt_f2i.
1183 * \copydetails gmx_simd_cvt_f2i
1185 # define gmx_simd_cvt_r2i gmx_simd_cvt_f2i
1187 /*! \brief Convert gmx_simd_real_t to gmx_simd_int32_t, truncate towards zero
1189 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cvtt_d2i,
1190 * otherwise \ref gmx_simd_cvtt_f2i.
1192 * \copydetails gmx_simd_cvtt_f2i
1194 # define gmx_simd_cvtt_r2i gmx_simd_cvtt_f2i
1196 /*! \brief Convert gmx_simd_int32_t to gmx_simd_real_t
1198 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cvt_i2d,
1199 * otherwise \ref gmx_simd_cvt_i2f.
1201 * \copydetails gmx_simd_cvt_i2f
1203 # define gmx_simd_cvt_i2r gmx_simd_cvt_i2f
1205 /*! \brief Convert from gmx_simd_bool_t to gmx_simd_ibool_t
1207 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cvt_db2dib,
1208 * otherwise \ref gmx_simd_cvt_fb2fib.
1210 * \copydetails gmx_simd_cvt_fb2fib
1212 # define gmx_simd_cvt_b2ib gmx_simd_cvt_fb2fib
1214 /*! \brief Convert from gmx_simd_ibool_t to gmx_simd_bool_t
1216 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cvt_dib2db,
1217 * otherwise \ref gmx_simd_cvt_fib2fb.
1219 * \copydetails gmx_simd_cvt_fib2fb
1221 # define gmx_simd_cvt_ib2b gmx_simd_cvt_fib2fb
1225 * \name SIMD memory alignment operations
1229 /*! \brief Align real memory for SIMD usage.
1231 * This routine will only align memory if \ref GMX_SIMD_HAVE_REAL is defined.
1232 * Otherwise the original pointer will be returned.
1234 * Start by allocating an extra \ref GMX_SIMD_REAL_WIDTH float elements of memory,
1235 * and then call this function. The returned pointer will be greater or equal
1236 * to the one you provided, and point to an address inside your provided memory
1237 * that is aligned to the SIMD width.
1239 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_align_d,
1240 * otherwise \ref gmx_simd_align_f. For detailed documentation, see the
1241 * precision-specific implementation routines.
1243 # define gmx_simd_align_r gmx_simd_align_f
1245 /*! \brief Align integer memory for SIMD usage.
1247 * This routine will only align memory if \ref GMX_SIMD_HAVE_INT32 is defined.
1248 * Otherwise the original pointer will be returned.
1250 * Start by allocating an extra \ref GMX_SIMD_INT32_WIDTH elements of memory,
1251 * and then call this function. The returned pointer will be greater or equal
1252 * to the one you provided, and point to an address inside your provided memory
1253 * that is aligned to the SIMD width.
1255 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_align_di,
1256 * otherwise \ref gmx_simd_align_fi. For detailed documentation, see the
1257 * precision-specific implementation routines.
1259 # define gmx_simd_align_i gmx_simd_align_fi
1263 /*! \name SIMD4 - constant width-four SIMD datatypes
1265 * These operations are only meant to be used for a few coordinate
1266 * manipulation and grid interpolation routines, so we only support a subset
1267 * of operations for SIMD4. To avoid repeating all the documentation from
1268 * the generic width SIMD routines, we only provide brief documentation for
1269 * these operations. Follow the link to the implementation documentation or the
1270 * reference to the corresponding generic SIMD routine. The format will be
1271 * exactly the same, but they have SIMD replaced with SIMD4.
1275 /*! \brief SIMD real datatype guaranteed to be 4 elements wide, if available.
1277 * All the SIMD4 datatypes and operations behave like their counterparts for
1278 * the generic SIMD implementation, but they might be implemented with different
1279 * registers, or not supported at all. It is important that you check the
1280 * define \ref GMX_SIMD4_HAVE_REAL before using it.
1282 * Just as the normal SIMD operations, all SIMD4 types and routines will
1283 * be aliased to either single or double precision ones based on whether
1284 * GMX_DOUBLE is defined.
1286 * \note There is no support for integer or math operations in SIMD4.
1288 # define gmx_simd4_real_t gmx_simd4_float_t
1290 /*! \brief Boolean for \ref gmx_simd4_real_t comparision/selection */
1291 # define gmx_simd4_bool_t gmx_simd4_fbool_t
1293 /*! \brief Load aligned data to gmx_simd4_real_t.
1295 * \copydetails gmx_simd4_load_f
1297 # define gmx_simd4_load_r gmx_simd4_load_f
1299 /*! \brief Load single element to gmx_simd4_real_t
1301 * \copydetails gmx_simd4_load1_f
1303 # define gmx_simd4_load1_r gmx_simd4_load1_f
1305 /*! \brief Set gmx_simd4_real_t from scalar value
1307 * \copydetails gmx_simd4_set1_f
1309 # define gmx_simd4_set1_r gmx_simd4_set1_f
1311 /*! \brief store aligned data from gmx_simd4_real_t
1313 * \copydetails gmx_simd4_store_f
1315 # define gmx_simd4_store_r gmx_simd4_store_f
1317 /*! \brief Load unaligned data to gmx_simd4_real_t
1319 * \copydetails gmx_simd4_loadu_f
1321 # define gmx_simd4_loadu_r gmx_simd4_loadu_f
1323 /*! \brief Store unaligned data from gmx_simd4_real_t
1325 * \copydetails gmx_simd4_storeu_f
1327 # define gmx_simd4_storeu_r gmx_simd4_storeu_f
1329 /*! \brief Set all elements in gmx_simd4_real_t to 0.0
1331 * \copydetails gmx_simd4_setzero_f
1333 # define gmx_simd4_setzero_r gmx_simd4_setzero_f
1335 /*! \brief Bitwise and for two gmx_simd4_real_t
1337 * \copydetails gmx_simd4_and_f
1339 # define gmx_simd4_and_r gmx_simd4_and_f
1341 /*! \brief Bitwise and-not for two gmx_simd4_real_t. 1st arg is complemented.
1343 * \copydetails gmx_simd4_andnot_f
1345 # define gmx_simd4_andnot_r gmx_simd4_andnot_f
1347 /*! \brief Bitwise or for two gmx_simd4_real_t
1349 * \copydetails gmx_simd4_or_f
1351 # define gmx_simd4_or_r gmx_simd4_or_f
1353 /*! \brief Bitwise xor for two gmx_simd4_real_t
1355 * \copydetails gmx_simd4_xor_f
1357 # define gmx_simd4_xor_r gmx_simd4_xor_f
1359 /*! \brief a+b for \ref gmx_simd4_real_t
1361 * \copydetails gmx_simd4_add_f
1363 # define gmx_simd4_add_r gmx_simd4_add_f
1365 /*! \brief a-b for \ref gmx_simd4_real_t
1367 * \copydetails gmx_simd4_sub_f
1369 # define gmx_simd4_sub_r gmx_simd4_sub_f
1371 /*! \brief a*b for \ref gmx_simd4_real_t
1373 * \copydetails gmx_simd4_mul_f
1375 # define gmx_simd4_mul_r gmx_simd4_mul_f
1377 /*! \brief a*b+c for \ref gmx_simd4_real_t
1379 * \copydetails gmx_simd4_fmadd_f
1381 # define gmx_simd4_fmadd_r gmx_simd4_fmadd_f
1383 /*! \brief a*b-c for \ref gmx_simd4_real_t
1385 * \copydetails gmx_simd4_fmsub_f
1387 # define gmx_simd4_fmsub_r gmx_simd4_fmsub_f
1389 /*! \brief -a*b+c for \ref gmx_simd4_real_t
1391 * \copydetails gmx_simd4_fnmadd_f
1393 # define gmx_simd4_fnmadd_r gmx_simd4_fnmadd_f
1395 /*! \brief -a*b-c for \ref gmx_simd4_real_t
1397 * \copydetails gmx_simd4_fnmsub_f
1399 # define gmx_simd4_fnmsub_r gmx_simd4_fnmsub_f
1401 /*! \brief 1/sqrt(x) approximate lookup for \ref gmx_simd4_real_t
1403 * \copydetails gmx_simd4_rsqrt_f
1405 # define gmx_simd4_rsqrt_r gmx_simd4_rsqrt_f
1407 /*! \brief fabs(x) for \ref gmx_simd4_real_t
1409 * \copydetails gmx_simd4_fabs_f
1411 # define gmx_simd4_fabs_r gmx_simd4_fabs_f
1413 /*! \brief Change sign (-x) for \ref gmx_simd4_real_t
1415 * \copydetails gmx_simd4_fneg_f
1417 # define gmx_simd4_fneg_r gmx_simd4_fneg_f
1419 /*! \brief Select maximum of each pair of elements from args for \ref gmx_simd4_real_t
1421 * \copydetails gmx_simd4_max_f
1423 # define gmx_simd4_max_r gmx_simd4_max_f
1425 /*! \brief Select minimum of each pair of elements from args for \ref gmx_simd4_real_t
1427 * \copydetails gmx_simd4_min_f
1429 # define gmx_simd4_min_r gmx_simd4_min_f
1431 /*! \brief Round \ref gmx_simd4_real_t to nearest integer, return \ref gmx_simd4_real_t
1433 * \copydetails gmx_simd4_round_f
1435 # define gmx_simd4_round_r gmx_simd4_round_f
1437 /*! \brief Truncate \ref gmx_simd4_real_t towards zero, return \ref gmx_simd4_real_t
1439 * \copydetails gmx_simd4_trunc_f
1441 # define gmx_simd4_trunc_r gmx_simd4_trunc_f
1443 /*! \brief Scalar product of first three elements of two \ref gmx_simd4_real_t *
1445 * \copydetails gmx_simd4_dotproduct3_f
1447 # define gmx_simd4_dotproduct3_r gmx_simd4_dotproduct3_f
1449 /*! \brief Return booleans whether a==b for each element two \ref gmx_simd4_real_t
1451 * \copydetails gmx_simd4_cmpeq_f
1453 # define gmx_simd4_cmpeq_r gmx_simd4_cmpeq_f
1454 /*! \brief Return booleans whether a<b for each element two \ref gmx_simd4_real_t
1456 * \copydetails gmx_simd4_cmplt_f
1458 # define gmx_simd4_cmplt_r gmx_simd4_cmplt_f
1459 /*! \brief Return booleans whether a<=b for each element two \ref gmx_simd4_real_t
1461 * \copydetails gmx_simd4_cmple_f
1463 # define gmx_simd4_cmple_r gmx_simd4_cmple_f
1465 /*! \brief Logical and for two \ref gmx_simd4_bool_t
1467 * \copydetails gmx_simd4_and_fb
1469 # define gmx_simd4_and_b gmx_simd4_and_fb
1470 /*! \brief Logical or for two \ref gmx_simd4_bool_t
1472 * \copydetails gmx_simd4_or_fb
1474 # define gmx_simd4_or_b gmx_simd4_or_fb
1476 /*! \brief Return nonzero if any element in \ref gmx_simd4_bool_t is true, otherwise 0
1478 * \copydetails gmx_simd4_anytrue_fb
1480 # define gmx_simd4_anytrue_b gmx_simd4_anytrue_fb
1482 /*! \brief Selects from 2nd real SIMD4 arg where boolean is true, otherwise 1st arg
1484 * \copydetails gmx_simd4_blendzero_f
1486 # define gmx_simd4_blendzero_r gmx_simd4_blendzero_f
1488 /*! \brief Selects from 2nd real SIMD4 arg where boolean is false, otherwise 1st arg
1490 * \copydetails gmx_simd4_blendnotzero_f
1492 # define gmx_simd4_blendnotzero_r gmx_simd4_blendnotzero_f
1494 /*! \brief Selects from 2nd real SIMD4 arg where boolean is true, otherwise 1st arg
1496 * \copydetails gmx_simd4_blendv_f
1498 # define gmx_simd4_blendv_r gmx_simd4_blendv_f
1500 /*! \brief Return sum of all elements in SIMD4 floating-point variable.
1502 * \copydetails gmx_simd4_reduce_f
1504 # define gmx_simd4_reduce_r gmx_simd4_reduce_f
1506 /*! \brief Align real memory for SIMD4 usage.
1508 * \copydetails gmx_simd4_align_f
1510 # define gmx_simd4_align_r gmx_simd4_align_f
1514 /*! \name SIMD predefined macros to describe high-level capabilities
1518 # if (defined GMX_SIMD_HAVE_FLOAT) || (defined DOXYGEN)
1519 /*! \brief Defined if gmx_simd_real_t is available.
1521 * if GMX_DOUBLE is defined, this will be aliased to
1522 * \ref GMX_SIMD_HAVE_DOUBLE, otherwise GMX_SIMD_HAVE_FLOAT.
1524 # define GMX_SIMD_HAVE_REAL
1525 /*! \brief Width of gmx_simd_real_t.
1527 * if GMX_DOUBLE is defined, this will be aliased to
1528 * \ref GMX_SIMD_DOUBLE_WIDTH, otherwise GMX_SIMD_FLOAT_WIDTH.
1530 # define GMX_SIMD_REAL_WIDTH GMX_SIMD_FLOAT_WIDTH
1532 # if (defined GMX_SIMD_HAVE_FINT32) || (defined DOXYGEN)
1533 /*! \brief Defined if gmx_simd_int32_t is available.
1535 * if GMX_DOUBLE is defined, this will be aliased to
1536 * \ref GMX_SIMD_HAVE_DINT32, otherwise GMX_SIMD_HAVE_FINT32.
1538 # define GMX_SIMD_HAVE_INT32
1539 /*! \brief Width of gmx_simd_int32_t.
1541 * if GMX_DOUBLE is defined, this will be aliased to
1542 * \ref GMX_SIMD_DINT32_WIDTH, otherwise GMX_SIMD_FINT32_WIDTH.
1544 # define GMX_SIMD_INT32_WIDTH GMX_SIMD_FINT32_WIDTH
1546 # if (defined GMX_SIMD_HAVE_FINT32_EXTRACT) || (defined DOXYGEN)
1547 /*! \brief Defined if gmx_simd_extract_i() is available.
1549 * if GMX_DOUBLE is defined, this will be aliased to
1550 * \ref GMX_SIMD_HAVE_DINT32_EXTRACT, otherwise GMX_SIMD_HAVE_FINT32_EXTRACT.
1552 # define GMX_SIMD_HAVE_INT32_EXTRACT
1554 # if (defined GMX_SIMD_HAVE_FINT32_LOGICAL) || (defined DOXYGEN)
1555 /*! \brief Defined if logical ops are supported on gmx_simd_int32_t.
1557 * if GMX_DOUBLE is defined, this will be aliased to
1558 * \ref GMX_SIMD_HAVE_DINT32_LOGICAL, otherwise GMX_SIMD_HAVE_FINT32_LOGICAL.
1560 # define GMX_SIMD_HAVE_INT32_LOGICAL
1562 # if (defined GMX_SIMD_HAVE_FINT32_ARITHMETICS) || (defined DOXYGEN)
1563 /*! \brief Defined if arithmetic ops are supported on gmx_simd_int32_t.
1565 * if GMX_DOUBLE is defined, this will be aliased to
1566 * \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS, otherwise GMX_SIMD_HAVE_FINT32_ARITHMETICS.
1568 # define GMX_SIMD_HAVE_INT32_ARITHMETICS
1570 # if (defined GMX_SIMD4_HAVE_FLOAT) || (defined DOXYGEN)
1571 /*! \brief Defined if gmx_simd4_real_t is available.
1573 * if GMX_DOUBLE is defined, this will be aliased to
1574 * \ref GMX_SIMD4_HAVE_DOUBLE, otherwise GMX_SIMD4_HAVE_FLOAT.
1576 # define GMX_SIMD4_HAVE_REAL
1581 #endif /* GMX_DOUBLE */
1586 #endif /* GMX_SIMD_SIMD_H */