2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
5 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
38 * \defgroup module_simd SIMD intrinsics interface (simd)
39 * \ingroup group_utilitymodules
41 * \brief Provides an architecture-independent way of doing SIMD coding.
43 * Overview of the SIMD implementation is provided in \ref page_simd.
44 * The details are documented in gromacs/simd/simd.h and the reference
45 * implementation impl_reference.h.
47 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
50 #ifndef GMX_SIMD_SIMD_H
51 #define GMX_SIMD_SIMD_H
53 /*! \libinternal \file
55 * \brief Definitions, capabilities, and wrappers for SIMD module.
57 * The macros in this file are intended to be used for writing
58 * architecture-independent SIMD intrinsics code.
59 * To support a new architecture, adding a new sub-include with macros here
60 * should be (nearly) all that is needed.
62 * The defines in this top-level file will set default Gromacs real precision
63 * operations to either single or double precision based on whether
64 * GMX_DOUBLE is 1. The actual implementation - including e.g.
65 * conversion operations specifically between single and double - is documented
66 * in impl_reference.h.
68 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
71 * \ingroup module_simd
80 #include <type_traits>
82 #include "gromacs/utility/basedefinitions.h"
83 #include "gromacs/utility/classhelpers.h"
84 #include "gromacs/utility/real.h"
89 /*! \addtogroup module_simd
95 /*! \libinternal \brief Tag type to select to load SimdFloat with simdLoad(U) */
99 /*! \libinternal \brief Tag type to select to load SimdDouble with simdLoad(U) */
103 /*! \libinternal \brief Tag type to select to load SimdFInt32 with simdLoad(U) */
107 /*! \libinternal \brief Tag type to select to load SimdDInt32 with simdLoad(U) */
113 /*! \name SIMD predefined macros to describe high-level capabilities
115 * These macros are used to describe the features available in default
116 * Gromacs real precision. They are set from the lower-level implementation
117 * files that have macros describing single and double precision individually,
118 * as well as the implementation details.
123 # pragma clang diagnostic push
124 /* reinterpret_cast is used for SIMD->scalar conversion
126 * In general using reinterpret_cast for bit_cast is UB but
127 * for intrinsics types it works for all known compilers
128 * and not all compilers produce as good code for memcpy.
130 # pragma clang diagnostic ignored "-Wundefined-reinterpret-cast"
133 #if GMX_SIMD_X86_SSE2
134 # include "impl_x86_sse2/impl_x86_sse2.h"
135 #elif GMX_SIMD_X86_SSE4_1
136 # include "impl_x86_sse4_1/impl_x86_sse4_1.h"
137 #elif GMX_SIMD_X86_AVX_128_FMA
138 # include "impl_x86_avx_128_fma/impl_x86_avx_128_fma.h"
139 #elif GMX_SIMD_X86_AVX_256
140 # include "impl_x86_avx_256/impl_x86_avx_256.h"
141 #elif GMX_SIMD_X86_AVX2_256
142 # include "impl_x86_avx2_256/impl_x86_avx2_256.h"
143 #elif GMX_SIMD_X86_AVX2_128
144 # include "impl_x86_avx2_128/impl_x86_avx2_128.h"
145 #elif GMX_SIMD_X86_MIC
146 # include "impl_x86_mic/impl_x86_mic.h"
147 #elif GMX_SIMD_X86_AVX_512
148 # include "impl_x86_avx_512/impl_x86_avx_512.h"
149 #elif GMX_SIMD_X86_AVX_512_KNL
150 # include "impl_x86_avx_512_knl/impl_x86_avx_512_knl.h"
151 #elif GMX_SIMD_ARM_NEON
152 # include "impl_arm_neon/impl_arm_neon.h"
153 #elif GMX_SIMD_ARM_NEON_ASIMD
154 # include "impl_arm_neon_asimd/impl_arm_neon_asimd.h"
155 #elif GMX_SIMD_ARM_SVE
156 # include "impl_arm_sve/impl_arm_sve.h"
157 #elif GMX_SIMD_IBM_VMX
158 # include "impl_ibm_vmx/impl_ibm_vmx.h"
159 #elif GMX_SIMD_IBM_VSX
160 # include "impl_ibm_vsx/impl_ibm_vsx.h"
161 #elif (GMX_SIMD_REFERENCE || defined DOXYGEN)
162 # include "impl_reference/impl_reference.h" // Includes doxygen documentation
164 # include "impl_none/impl_none.h"
168 # pragma clang diagnostic pop
171 // The scalar SIMD-mimicking functions are always included so we can use
172 // templated functions even without SIMD support.
173 #include "gromacs/simd/scalar/scalar.h"
174 #include "gromacs/simd/scalar/scalar_math.h"
175 #include "gromacs/simd/scalar/scalar_util.h"
179 # define GMX_SIMD_HAVE_REAL GMX_SIMD_HAVE_DOUBLE
180 # define GMX_SIMD_REAL_WIDTH GMX_SIMD_DOUBLE_WIDTH
181 # define GMX_SIMD_HAVE_INT32_EXTRACT GMX_SIMD_HAVE_DINT32_EXTRACT
182 # define GMX_SIMD_HAVE_INT32_LOGICAL GMX_SIMD_HAVE_DINT32_LOGICAL
183 # define GMX_SIMD_HAVE_INT32_ARITHMETICS GMX_SIMD_HAVE_DINT32_ARITHMETICS
184 # define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL \
185 GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_DOUBLE
186 # define GMX_SIMD_HAVE_HSIMD_UTIL_REAL GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE
187 # define GMX_SIMD4_HAVE_REAL GMX_SIMD4_HAVE_DOUBLE
190 /*! \brief 1 if SimdReal is available, otherwise 0.
192 * \ref GMX_SIMD_HAVE_DOUBLE if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD_HAVE_FLOAT.
194 # define GMX_SIMD_HAVE_REAL GMX_SIMD_HAVE_FLOAT
196 /*! \brief Width of SimdReal.
198 * \ref GMX_SIMD_DOUBLE_WIDTH if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD_FLOAT_WIDTH.
200 # define GMX_SIMD_REAL_WIDTH GMX_SIMD_FLOAT_WIDTH
202 /*! \brief 1 if support is available for extracting elements from SimdInt32, otherwise 0
204 * \ref GMX_SIMD_HAVE_DINT32_EXTRACT if GMX_DOUBLE is 1, otherwise
205 * \ref GMX_SIMD_HAVE_FINT32_EXTRACT.
207 # define GMX_SIMD_HAVE_INT32_EXTRACT GMX_SIMD_HAVE_FINT32_EXTRACT
209 /*! \brief 1 if logical ops are supported on SimdInt32, otherwise 0.
211 * \ref GMX_SIMD_HAVE_DINT32_LOGICAL if GMX_DOUBLE is 1, otherwise
212 * \ref GMX_SIMD_HAVE_FINT32_LOGICAL.
214 # define GMX_SIMD_HAVE_INT32_LOGICAL GMX_SIMD_HAVE_FINT32_LOGICAL
216 /*! \brief 1 if arithmetic ops are supported on SimdInt32, otherwise 0.
218 * \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS if GMX_DOUBLE is 1, otherwise
219 * \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS.
221 # define GMX_SIMD_HAVE_INT32_ARITHMETICS GMX_SIMD_HAVE_FINT32_ARITHMETICS
223 /*! \brief 1 if gmx::simdGatherLoadUBySimdIntTranspose is present, otherwise 0
225 * \ref GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_DOUBLE if GMX_DOUBLE is 1, otherwise
226 * \ref GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_FLOAT.
228 # define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL \
229 GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_FLOAT
231 /*! \brief 1 if real half-register load/store/reduce utils present, otherwise 0
233 * \ref GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE if GMX_DOUBLE is 1, otherwise
234 * \ref GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT.
236 # define GMX_SIMD_HAVE_HSIMD_UTIL_REAL GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT
238 /*! \brief 1 if Simd4Real is available, otherwise 0.
240 * \ref GMX_SIMD4_HAVE_DOUBLE if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD4_HAVE_FLOAT.
242 # define GMX_SIMD4_HAVE_REAL GMX_SIMD4_HAVE_FLOAT
246 //! \} end of name-group describing high-level capabilities
251 template<class T, size_t N>
254 #if GMX_SIMD_HAVE_FLOAT
255 /*! \libinternal \brief Identical to std::array with GMX_SIMD_FLOAT_WIDTH alignment.
256 * Should not be deleted through base pointer (destructor is non-virtual).
259 struct alignas(GMX_SIMD_FLOAT_WIDTH * sizeof(float)) AlignedArray<float, N> :
260 public std::array<float, N>
265 #if GMX_SIMD_HAVE_DOUBLE
266 /*! \libinternal \brief Identical to std::array with GMX_SIMD_DOUBLE_WIDTH alignment.
267 * Should not be deleted through base pointer (destructor is non-virtual).
270 struct alignas(GMX_SIMD_DOUBLE_WIDTH * sizeof(double)) AlignedArray<double, N> :
271 public std::array<double, N>
276 #if GMX_SIMD_HAVE_REAL
278 /*! \name SIMD data types
280 * The actual storage of these types is implementation dependent. The
281 * documentation is generated from the reference implementation, but for
282 * normal usage this will likely not be what you are using.
286 /*! \brief Real precision floating-point SIMD datatype.
288 * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1.
290 * \ref SimdDouble if GMX_DOUBLE is 1, otherwise \ref SimdFloat.
292 * \note This variable cannot be placed inside other structures or classes, since
293 * some compilers (including at least clang-3.7) appear to lose the
294 * alignment. This is likely particularly severe when allocating such
295 * memory on the heap, but it occurs for stack structures too.
298 typedef SimdDouble SimdReal;
300 typedef SimdFloat SimdReal;
304 /*! \brief Boolean SIMD type for usage with \ref SimdReal.
306 * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1.
308 * If GMX_DOUBLE is 1, this will be set to \ref SimdDBool
309 * internally, otherwise \ref SimdFBool. This is necessary since some
310 * SIMD implementations use bitpatterns for marking truth, so single-
311 * vs. double precision booleans are not necessarily exchangable.
312 * As long as you just use this type you will not have to worry about precision.
314 * See \ref SimdIBool for an explanation of real vs. integer booleans.
316 * \note This variable cannot be placed inside other structures or classes, since
317 * some compilers (including at least clang-3.7) appear to lose the
318 * alignment. This is likely particularly severe when allocating such
319 * memory on the heap, but it occurs for stack structures too.
322 typedef SimdDBool SimdBool;
324 typedef SimdFBool SimdBool;
328 /*! \brief 32-bit integer SIMD type.
330 * If GMX_DOUBLE is 1, this will be set to \ref SimdDInt32
331 * internally, otherwise \ref SimdFInt32. This might seem a strange
332 * implementation detail, but it is because some SIMD implementations use
333 * different types/widths of integers registers when converting from
334 * double vs. single precision floating point. As long as you just use
335 * this type you will not have to worry about precision.
337 * \note This variable cannot be placed inside other structures or classes, since
338 * some compilers (including at least clang-3.7) appear to lose the
339 * alignment. This is likely particularly severe when allocating such
340 * memory on the heap, but it occurs for stack structures too.
343 typedef SimdDInt32 SimdInt32;
345 typedef SimdFInt32 SimdInt32;
348 # if GMX_SIMD_HAVE_INT32_ARITHMETICS
349 /*! \brief Boolean SIMD type for usage with \ref SimdInt32.
351 * This type is only available if \ref GMX_SIMD_HAVE_INT32_ARITHMETICS is 1.
353 * If GMX_DOUBLE is 1, this will be set to \ref SimdDIBool
354 * internally, otherwise \ref SimdFIBool. This is necessary since some
355 * SIMD implementations use bitpatterns for marking truth, so single-
356 * vs. double precision booleans are not necessarily exchangable, and while
357 * a double-precision boolean might be represented with a 64-bit mask, the
358 * corresponding integer might only use a 32-bit mask.
360 * We provide conversion routines for these cases, so the only thing you need to
361 * keep in mind is to use \ref SimdBool when working with
362 * \ref SimdReal while you pick \ref SimdIBool when working with
365 * To convert between them, use \ref cvtB2IB and \ref cvtIB2B.
367 * \note This variable cannot be placed inside other structures or classes, since
368 * some compilers (including at least clang-3.7) appear to lose the
369 * alignment. This is likely particularly severe when allocating such
370 * memory on the heap, but it occurs for stack structures too.
373 typedef SimdDIBool SimdIBool;
375 typedef SimdFIBool SimdIBool;
377 # endif // GMX_SIMD_HAVE_INT32_ARITHMETICS
381 const int c_simdBestPairAlignment = c_simdBestPairAlignmentDouble;
383 const int c_simdBestPairAlignment = c_simdBestPairAlignmentFloat;
386 #endif // GMX_SIMD_HAVE_REAL
388 #if GMX_SIMD4_HAVE_REAL
389 /*! \brief Real precision floating-point SIMD4 datatype.
391 * This type is only available if \ref GMX_SIMD4_HAVE_REAL is 1.
393 * \ref Simd4Double if GMX_DOUBLE is 1, otherwise \ref Simd4Float.
395 * \note This variable cannot be placed inside other structures or classes, since
396 * some compilers (including at least clang-3.7) appear to lose the
397 * alignment. This is likely particularly severe when allocating such
398 * memory on the heap, but it occurs for stack structures too.
401 typedef Simd4Double Simd4Real;
403 typedef Simd4Float Simd4Real;
407 /*! \brief Boolean SIMD4 type for usage with \ref SimdReal.
409 * This type is only available if \ref GMX_SIMD4_HAVE_REAL is 1.
411 * If GMX_DOUBLE is 1, this will be set to \ref Simd4DBool
412 * internally, otherwise \ref Simd4FBool. This is necessary since some
413 * SIMD implementations use bitpatterns for marking truth, so single-
414 * vs. double precision booleans are not necessarily exchangable.
415 * As long as you just use this type you will not have to worry about precision.
417 * \note This variable cannot be placed inside other structures or classes, since
418 * some compilers (including at least clang-3.7) appear to lose the
419 * alignment. This is likely particularly severe when allocating such
420 * memory on the heap, but it occurs for stack structures too.
423 typedef Simd4DBool Simd4Bool;
425 typedef Simd4FBool Simd4Bool;
427 #endif // GMX_SIMD4_HAVE_REAL
429 //! \} end of name-group describing SIMD data types
431 /*! \name High-level SIMD proxy objects to disambiguate load/set operations
437 /*! \libinternal \brief Simd traits
439 * These traits are used to query data about SIMD types. Currently provided
440 * data is useful for SIMD loads (load function and helper classes for
441 * ArrayRef<> in simd_memory.h). Provided data:
442 * - type: scalar type corresponding to the SIMD type
443 * - width: SIMD width
444 * - tag: tag used for type dispatch of load function
451 #if GMX_SIMD_HAVE_FLOAT
453 struct SimdTraits<SimdFloat>
456 static constexpr int width = GMX_SIMD_FLOAT_WIDTH;
457 using tag = SimdFloatTag;
460 #if GMX_SIMD_HAVE_DOUBLE
462 struct SimdTraits<SimdDouble>
465 static constexpr int width = GMX_SIMD_DOUBLE_WIDTH;
466 using tag = SimdDoubleTag;
469 #if GMX_SIMD_HAVE_FLOAT
471 struct SimdTraits<SimdFInt32>
474 static constexpr int width = GMX_SIMD_FINT32_WIDTH;
475 using tag = SimdFInt32Tag;
478 #if GMX_SIMD_HAVE_DOUBLE
480 struct SimdTraits<SimdDInt32>
483 static constexpr int width = GMX_SIMD_DINT32_WIDTH;
484 using tag = SimdDInt32Tag;
488 using SimdTraitsT = typename SimdTraits<T>::type;
490 struct SimdTraits<const T>
492 using type = const SimdTraitsT<T>;
493 static constexpr int width = SimdTraits<T>::width;
494 using tag = typename SimdTraits<T>::tag;
496 } // namespace internal
498 /*! \brief Load function that returns SIMD or scalar
500 * Note that a load of T* where T is const returns a value, which is a
501 * copy, and the caller cannot be constrained to not change it, so the
502 * return type uses std::remove_const_t.
504 * \tparam T Type to load (type is always mandatory)
505 * \param m Pointer to aligned memory
506 * \return Loaded value
509 static inline std::remove_const_t<T> load(const internal::SimdTraitsT<T>* m) // disabled by SFINAE for non-SIMD types
511 return simdLoad(m, typename internal::SimdTraits<T>::tag());
516 /* the enable_if serves to prevent two different type of misuse:
517 * 1) load<SimdReal>(SimdReal*); should only be called on real* or int*
518 * 2) load(real*); template parameter is mandatory because otherwise ambiguity is
519 * created. The dependent type disables type deduction.
521 load(const std::enable_if_t<std::is_arithmetic_v<T>, T> *m)
526 template<typename T, size_t N>
527 static inline T gmx_simdcall load(const AlignedArray<internal::SimdTraitsT<T>, N>& m)
529 return simdLoad(m.data(), typename internal::SimdTraits<T>::tag());
532 /*! \brief Load function that returns SIMD or scalar based on template argument
534 * \tparam T Type to load (type is always mandatory)
535 * \param m Pointer to unaligned memory
536 * \return Loaded SimdFloat/Double/Int or basic scalar type
539 static inline T loadU(const internal::SimdTraitsT<T>* m)
541 return simdLoadU(m, typename internal::SimdTraits<T>::tag());
545 static inline T loadU(const std::enable_if_t<std::is_arithmetic_v<T>, T>* m)
550 template<typename T, size_t N>
551 static inline T gmx_simdcall loadU(const AlignedArray<internal::SimdTraitsT<T>, N>& m)
553 return simdLoadU(m.data(), typename internal::SimdTraits<T>::tag());
556 /*! \libinternal \brief Proxy object to enable setZero() for SIMD and real types.
558 * This object is returned by setZero(), and depending on what type you assign
559 * the result to the conversion method will call the right low-level function.
561 class SimdSetZeroProxy
564 //!\brief Conversion method that returns 0.0 as float
565 operator float() const { return 0.0F; }
566 //!\brief Conversion method that returns 0.0 as double
567 operator double() const { return 0.0; }
568 //!\brief Conversion method that returns 0.0 as int32
569 operator std::int32_t() const { return 0; }
570 #if GMX_SIMD_HAVE_FLOAT
571 //!\brief Conversion method that will execute setZero() for SimdFloat
572 operator SimdFloat() const { return setZeroF(); }
573 //!\brief Conversion method that will execute setZero() for SimdFInt32
574 operator SimdFInt32() const { return setZeroFI(); }
576 #if GMX_SIMD4_HAVE_FLOAT
577 //!\brief Conversion method that will execute setZero() for Simd4Float
578 operator Simd4Float() const { return simd4SetZeroF(); }
580 #if GMX_SIMD_HAVE_DOUBLE
581 //!\brief Conversion method that will execute setZero() for SimdDouble
582 operator SimdDouble() const { return setZeroD(); }
583 //!\brief Conversion method that will execute setZero() for SimdDInt32
584 operator SimdDInt32() const { return setZeroDI(); }
586 #if GMX_SIMD4_HAVE_DOUBLE
587 //!\brief Conversion method that will execute setZero() for Simd4Double
588 operator Simd4Double() const { return simd4SetZeroD(); }
592 /*! \brief Helper function to set any SIMD or scalar variable to zero
594 * \return Proxy object that will call the actual function to set a SIMD/scalar
595 * variable to zero based on the conversion function called when you
598 static inline SimdSetZeroProxy gmx_simdcall setZero()
605 // TODO: Don't forward function but properly rename them and use proper traits
611 #if GMX_SIMD4_HAVE_FLOAT
613 struct Simd4Traits<Simd4Float>
619 #if GMX_SIMD4_HAVE_DOUBLE
621 struct Simd4Traits<Simd4Double>
627 using Simd4TraitsT = typename Simd4Traits<T>::type;
628 } // namespace internal
630 #if GMX_SIMD4_HAVE_REAL
632 T load(const internal::Simd4TraitsT<T>* m)
637 T loadU(const internal::Simd4TraitsT<T>* m)
643 /* Implement most of 4xn functions by forwarding them to other functions when possible.
644 * The functions forwarded here don't need to be implemented by each implementation.
645 * For width=4 all functions are forwarded and for width=8 all but loadU4NOffset are forwarded.
647 #if GMX_SIMD_HAVE_FLOAT
648 # if GMX_SIMD_FLOAT_WIDTH < 4
649 # define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT (GMX_SIMD_HAVE_LOADU && GMX_SIMD4_HAVE_FLOAT)
650 # elif GMX_SIMD_FLOAT_WIDTH == 4
651 # define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT GMX_SIMD_HAVE_LOADU
652 // For GMX_SIMD_FLOAT_WIDTH>4 it is the reponsibility of the implementation to set
653 // GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
656 # if GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
657 # if GMX_SIMD_FLOAT_WIDTH < 4
658 using Simd4NFloat = Simd4Float;
659 # define GMX_SIMD4N_FLOAT_WIDTH 4
661 using Simd4NFloat = SimdFloat;
662 # define GMX_SIMD4N_FLOAT_WIDTH GMX_SIMD_FLOAT_WIDTH
665 # if GMX_SIMD_FLOAT_WIDTH <= 4
666 static inline Simd4NFloat gmx_simdcall loadUNDuplicate4(const float* f)
668 return Simd4NFloat(*f);
670 static inline Simd4NFloat gmx_simdcall load4DuplicateN(const float* f)
672 return load<Simd4NFloat>(f);
674 static inline Simd4NFloat gmx_simdcall loadU4NOffset(const float* f, int)
676 return loadU<Simd4NFloat>(f);
678 # elif GMX_SIMD_FLOAT_WIDTH == 8
679 static inline Simd4NFloat gmx_simdcall loadUNDuplicate4(const float* f)
681 return loadU1DualHsimd(f);
683 static inline Simd4NFloat gmx_simdcall load4DuplicateN(const float* f)
685 return loadDuplicateHsimd(f);
688 # endif // GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
689 #else // GMX_SIMD_HAVE_FLOAT
690 # define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT 0
693 #if GMX_SIMD_HAVE_DOUBLE
694 # if GMX_SIMD_DOUBLE_WIDTH < 4
695 # define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE (GMX_SIMD_HAVE_LOADU && GMX_SIMD4_HAVE_DOUBLE)
696 # elif GMX_SIMD_DOUBLE_WIDTH == 4
697 # define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE GMX_SIMD_HAVE_LOADU
698 // For GMX_SIMD_DOUBLE_WIDTH>4 it is the reponsibility of the implementation to set
699 // GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
702 # if GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
703 # if GMX_SIMD_DOUBLE_WIDTH < 4
704 using Simd4NDouble = Simd4Double;
705 # define GMX_SIMD4N_DOUBLE_WIDTH 4
707 using Simd4NDouble = SimdDouble;
708 # define GMX_SIMD4N_DOUBLE_WIDTH GMX_SIMD_DOUBLE_WIDTH
711 # if GMX_SIMD_DOUBLE_WIDTH <= 4
712 static inline Simd4NDouble gmx_simdcall loadUNDuplicate4(const double* f)
714 return Simd4NDouble(*f);
716 static inline Simd4NDouble gmx_simdcall load4DuplicateN(const double* f)
718 return load<Simd4NDouble>(f);
720 static inline Simd4NDouble gmx_simdcall loadU4NOffset(const double* f, int /*unused*/)
722 return loadU<Simd4NDouble>(f);
724 # elif GMX_SIMD_DOUBLE_WIDTH == 8
725 static inline Simd4NDouble gmx_simdcall loadUNDuplicate4(const double* f)
727 return loadU1DualHsimd(f);
729 static inline Simd4NDouble gmx_simdcall load4DuplicateN(const double* f)
731 return loadDuplicateHsimd(f);
734 # endif // GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
735 #else // GMX_SIMD_HAVE_DOUBLE
736 # define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE 0
740 # define GMX_SIMD_HAVE_4NSIMD_UTIL_REAL GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
742 # define GMX_SIMD_HAVE_4NSIMD_UTIL_REAL GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
745 #if GMX_SIMD_HAVE_4NSIMD_UTIL_REAL
747 using Simd4NReal = Simd4NDouble;
748 # define GMX_SIMD4N_REAL_WIDTH GMX_SIMD4N_DOUBLE_WIDTH
750 using Simd4NReal = Simd4NFloat;
751 # define GMX_SIMD4N_REAL_WIDTH GMX_SIMD4N_FLOAT_WIDTH
755 //! \} end of name-group proxy objects
759 //! \} end of module_simd
761 //! \endcond end of condition libapi
764 #if GMX_SIMD_HAVE_FLOAT
766 /*! \brief Returns whether a pointer to float is aligned to a SIMD boundary
768 * \param[in] ptr A pointer to a float
770 static inline bool isSimdAligned(const float* ptr)
772 return reinterpret_cast<std::size_t>(ptr) % (GMX_SIMD_FLOAT_WIDTH * sizeof(float)) == 0;
775 #endif // GMX_SIMD_HAVE_FLOAT
777 #if GMX_SIMD_HAVE_DOUBLE
779 /*! \brief Returns whether a pointer to double is aligned to a SIMD boundary
781 * \param[in] ptr A pointer to a double
783 static inline bool isSimdAligned(const double* ptr)
785 return reinterpret_cast<std::size_t>(ptr) % (GMX_SIMD_DOUBLE_WIDTH * sizeof(double)) == 0;
788 #endif // GMX_SIMD_HAVE_DOUBLE
791 #if GMX_SIMD_HAVE_REAL
792 # if GMX_SIMD_REAL_WIDTH > GMX_REAL_MAX_SIMD_WIDTH
793 # error "GMX_SIMD_REAL_WIDTH > GMX_REAL_MAX_SIMD_WIDTH: increase GMX_REAL_MAX_SIMD_WIDTH in real.h"
799 /* This is a hack to cover the corner case of using an
800 explicit GMX_SIMD_HAVE_FLOAT or GMX_SIMD_HAVE_DOUBLE, rather than
803 Such code is expected to include simd.h to get those symbols
804 defined, but the actual definitions are in the implemention headers
805 included by simd.h. check-source.py is not a full preprocessor, so
806 it does not see the definitions in the implementation headers as
807 belonging to simd.h, thus it cannot check that simd.h is being used
808 correctly in the above hypothetical corner case. However, the
809 checker also does not parse #if 0, so we can fool the checker into
810 thinking that definition occurs here, and that will work well
813 If there's ever other kinds of SIMD code that might have the same
814 problem, we might want to add other variables here.
816 # define GMX_SIMD_HAVE_FLOAT 1
817 # define GMX_SIMD_HAVE_DOUBLE 1
819 #endif // end of hack
821 // The ArrayRef<SimdReal> specialization is always included, because compiler
822 // errors are confusing when template specialization aren't available.
823 #include "gromacs/simd/simd_memory.h"
825 #endif // GMX_SIMD_SIMD_H