src/gromacs/simd/simd.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \libinternal
  37  * \defgroup module_simd SIMD intrinsics interface (simd)
  38  * \ingroup group_utilitymodules
  39  *
  40  * \brief Provides an architecture-independent way of doing SIMD coding.
  41  *
  42  * Overview of the SIMD implementation is provided in \ref page_simd.
  43  * The details are documented in gromacs/simd/simd.h and the reference
  44  * implementation impl_reference.h.
  45  *
  46  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  47  */
  48
  49 #ifndef GMX_SIMD_SIMD_H
  50 #define GMX_SIMD_SIMD_H
  51
  52 /*! \libinternal \file
  53  *
  54  * \brief Definitions, capabilities, and wrappers for SIMD module.
  55  *
  56  * The macros in this file are intended to be used for writing
  57  * architecture-independent SIMD intrinsics code.
  58  * To support a new architecture, adding a new sub-include with macros here
  59  * should be (nearly) all that is needed.
  60  *
  61  * The defines in this top-level file will set default Gromacs real precision
  62  * operations to either single or double precision based on whether
  63  * GMX_DOUBLE is 1. The actual implementation - including e.g.
  64  * conversion operations specifically between single and double - is documented
  65  * in impl_reference.h.
  66  *
  67  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  68  *
  69  * \inlibraryapi
  70  * \ingroup module_simd
  71  */
  72
  73 #include "config.h"
  74
  75 #include <cstddef>
  76 #include <cstdint>
  77
  78 #include <array>
  79
  80 #include "gromacs/utility/classhelpers.h"
  81 #include "gromacs/utility/real.h"
  82
  83 //! \cond libapi
  84
  85
  86 /*! \addtogroup module_simd
  87  * \{
  88  */
  89
  90 namespace gmx
  91 {
  92 /*! \libinternal \brief Tag type to select to load SimdFloat with simdLoad(U) */
  93 struct SimdFloatTag {};
  94 /*! \libinternal \brief Tag type to select to load SimdDouble with simdLoad(U) */
  95 struct SimdDoubleTag {};
  96 /*! \libinternal \brief Tag type to select to load SimdFInt32 with simdLoad(U) */
  97 struct SimdFInt32Tag {};
  98 /*! \libinternal \brief Tag type to select to load SimdDInt32 with simdLoad(U) */
  99 struct SimdDInt32Tag {};
 100 }  // namespace gmx
 101
 102 /*! \name SIMD predefined macros to describe high-level capabilities
 103  *
 104  *  These macros are used to describe the features available in default
 105  *  Gromacs real precision. They are set from the lower-level implementation
 106  *  files that have macros describing single and double precision individually,
 107  *  as well as the implementation details.
 108  *  \{
 109  */
 110
 111 #ifdef __clang__
 112 #pragma clang diagnostic push
 113 /* reinterpret_cast is used for SIMD->scalar conversion
 114  *
 115  * In general using reinterpret_cast for bit_cast is UB but
 116  * for intrinsics types it works for all known compilers
 117  * and not all compilers produce as good code for memcpy.
 118  */
 119 #pragma clang diagnostic ignored "-Wundefined-reinterpret-cast"
 120 #endif
 121
 122 #if GMX_SIMD_X86_SSE2
 123 #    include "impl_x86_sse2/impl_x86_sse2.h"
 124 #elif GMX_SIMD_X86_SSE4_1
 125 #    include "impl_x86_sse4_1/impl_x86_sse4_1.h"
 126 #elif GMX_SIMD_X86_AVX_128_FMA
 127 #    include "impl_x86_avx_128_fma/impl_x86_avx_128_fma.h"
 128 #elif GMX_SIMD_X86_AVX_256
 129 #    include "impl_x86_avx_256/impl_x86_avx_256.h"
 130 #elif GMX_SIMD_X86_AVX2_256
 131 #    include "impl_x86_avx2_256/impl_x86_avx2_256.h"
 132 #elif GMX_SIMD_X86_AVX2_128
 133 #    include "impl_x86_avx2_128/impl_x86_avx2_128.h"
 134 #elif GMX_SIMD_X86_MIC
 135 #    include "impl_x86_mic/impl_x86_mic.h"
 136 #elif GMX_SIMD_X86_AVX_512
 137 #    include "impl_x86_avx_512/impl_x86_avx_512.h"
 138 #elif GMX_SIMD_X86_AVX_512_KNL
 139 #    include "impl_x86_avx_512_knl/impl_x86_avx_512_knl.h"
 140 #elif GMX_SIMD_ARM_NEON
 141 #    include "impl_arm_neon/impl_arm_neon.h"
 142 #elif GMX_SIMD_ARM_NEON_ASIMD
 143 #    include "impl_arm_neon_asimd/impl_arm_neon_asimd.h"
 144 #elif GMX_SIMD_IBM_VMX
 145 #    include "impl_ibm_vmx/impl_ibm_vmx.h"
 146 #elif GMX_SIMD_IBM_VSX
 147 #    include "impl_ibm_vsx/impl_ibm_vsx.h"
 148 #elif (GMX_SIMD_REFERENCE || defined DOXYGEN)
 149 #    include "impl_reference/impl_reference.h" // Includes doxygen documentation
 150 #else
 151 #    include "impl_none/impl_none.h"
 152 #endif
 153
 154 #ifdef __clang__
 155 #pragma clang diagnostic pop
 156 #endif
 157
 158 // The scalar SIMD-mimicking functions are always included so we can use
 159 // templated functions even without SIMD support.
 160 #include "gromacs/simd/scalar/scalar.h"
 161 #include "gromacs/simd/scalar/scalar_math.h"
 162 #include "gromacs/simd/scalar/scalar_util.h"
 163
 164
 165 #if GMX_DOUBLE
 166 #    define GMX_SIMD_HAVE_REAL                                     GMX_SIMD_HAVE_DOUBLE
 167 #    define GMX_SIMD_REAL_WIDTH                                    GMX_SIMD_DOUBLE_WIDTH
 168 #    define GMX_SIMD_HAVE_INT32_EXTRACT                            GMX_SIMD_HAVE_DINT32_EXTRACT
 169 #    define GMX_SIMD_HAVE_INT32_LOGICAL                            GMX_SIMD_HAVE_DINT32_LOGICAL
 170 #    define GMX_SIMD_HAVE_INT32_ARITHMETICS                        GMX_SIMD_HAVE_DINT32_ARITHMETICS
 171 #    define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL    GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_DOUBLE
 172 #    define GMX_SIMD_HAVE_HSIMD_UTIL_REAL                          GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE
 173 #    define GMX_SIMD4_HAVE_REAL                                    GMX_SIMD4_HAVE_DOUBLE
 174 #else // GMX_DOUBLE
 175
 176 /*! \brief 1 if SimdReal is available, otherwise 0.
 177  *
 178  *  \ref GMX_SIMD_HAVE_DOUBLE if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD_HAVE_FLOAT.
 179  */
 180 #    define GMX_SIMD_HAVE_REAL               GMX_SIMD_HAVE_FLOAT
 181
 182 /*! \brief Width of SimdReal.
 183  *
 184  *  \ref GMX_SIMD_DOUBLE_WIDTH if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD_FLOAT_WIDTH.
 185  */
 186 #    define GMX_SIMD_REAL_WIDTH              GMX_SIMD_FLOAT_WIDTH
 187
 188 /*! \brief 1 if support is available for extracting elements from SimdInt32, otherwise 0
 189  *
 190  *  \ref GMX_SIMD_HAVE_DINT32_EXTRACT if GMX_DOUBLE is 1, otherwise
 191  *  \ref GMX_SIMD_HAVE_FINT32_EXTRACT.
 192  */
 193 #    define GMX_SIMD_HAVE_INT32_EXTRACT      GMX_SIMD_HAVE_FINT32_EXTRACT
 194
 195 /*! \brief 1 if logical ops are supported on SimdInt32, otherwise 0.
 196  *
 197  *  \ref GMX_SIMD_HAVE_DINT32_LOGICAL if GMX_DOUBLE is 1, otherwise
 198  *  \ref GMX_SIMD_HAVE_FINT32_LOGICAL.
 199  */
 200 #    define GMX_SIMD_HAVE_INT32_LOGICAL      GMX_SIMD_HAVE_FINT32_LOGICAL
 201
 202 /*! \brief 1 if arithmetic ops are supported on SimdInt32, otherwise 0.
 203  *
 204  *  \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS if GMX_DOUBLE is 1, otherwise
 205  *  \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS.
 206  */
 207 #    define GMX_SIMD_HAVE_INT32_ARITHMETICS  GMX_SIMD_HAVE_FINT32_ARITHMETICS
 208
 209 /*! \brief 1 if gmx::simdGatherLoadUBySimdIntTranspose is present, otherwise 0
 210  *
 211  *  \ref GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_DOUBLE if GMX_DOUBLE is 1, otherwise
 212  *  \ref GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_FLOAT.
 213  */
 214 #    define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL    GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_FLOAT
 215
 216 /*! \brief 1 if real half-register load/store/reduce utils present, otherwise 0
 217  *
 218  *  \ref GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE if GMX_DOUBLE is 1, otherwise
 219  *  \ref GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT.
 220  */
 221 #    define GMX_SIMD_HAVE_HSIMD_UTIL_REAL    GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT
 222
 223 /*! \brief 1 if Simd4Real is available, otherwise 0.
 224  *
 225  *  \ref GMX_SIMD4_HAVE_DOUBLE if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD4_HAVE_FLOAT.
 226  */
 227 #    define GMX_SIMD4_HAVE_REAL              GMX_SIMD4_HAVE_FLOAT
 228
 229 #endif // GMX_DOUBLE
 230
 231 //! \}  end of name-group describing high-level capabilities
 232
 233 namespace gmx
 234 {
 235
 236 template<class T, size_t N>
 237 struct AlignedArray;
 238
 239 #if GMX_SIMD_HAVE_FLOAT
 240 /*! \libinternal \brief Identical to std::array with GMX_SIMD_FLOAT_WIDTH alignment.
 241  *  Should not be deleted through base pointer (destructor is non-virtual).
 242  */
 243 template<size_t N>
 244 struct alignas(GMX_SIMD_FLOAT_WIDTH*sizeof(float))AlignedArray<float, N> : public std::array<float, N>
 245 {
 246 };
 247 #endif
 248
 249 #if GMX_SIMD_HAVE_DOUBLE
 250 /*! \libinternal \brief  Identical to std::array with GMX_SIMD_DOUBLE_WIDTH alignment.
 251  *  Should not be deleted through base pointer (destructor is non-virtual).
 252  */
 253 template<size_t N>
 254 struct alignas(GMX_SIMD_DOUBLE_WIDTH*sizeof(double))AlignedArray<double, N> : public std::array<double, N>
 255 {
 256 };
 257 #endif
 258
 259 #if GMX_SIMD_HAVE_REAL
 260
 261 /*! \name SIMD data types
 262  *
 263  *  The actual storage of these types is implementation dependent. The
 264  *  documentation is generated from the reference implementation, but for
 265  *  normal usage this will likely not be what you are using.
 266  * \{
 267  */
 268
 269 /*! \brief Real precision floating-point SIMD datatype.
 270  *
 271  * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1.
 272  *
 273  * \ref SimdDouble if GMX_DOUBLE is 1, otherwise \ref SimdFloat.
 274  *
 275  * \note This variable cannot be placed inside other structures or classes, since
 276  *       some compilers (including at least clang-3.7) appear to lose the
 277  *       alignment. This is likely particularly severe when allocating such
 278  *       memory on the heap, but it occurs for stack structures too.
 279  */
 280 #    if GMX_DOUBLE
 281 typedef SimdDouble               SimdReal;
 282 #    else
 283 typedef SimdFloat                SimdReal;
 284 #    endif
 285
 286
 287 /*! \brief Boolean SIMD type for usage with \ref SimdReal.
 288  *
 289  * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1.
 290  *
 291  * If GMX_DOUBLE is 1, this will be set to \ref SimdDBool
 292  * internally, otherwise \ref SimdFBool. This is necessary since some
 293  * SIMD implementations use bitpatterns for marking truth, so single-
 294  * vs. double precision booleans are not necessarily exchangable.
 295  * As long as you just use this type you will not have to worry about precision.
 296  *
 297  * See \ref SimdIBool for an explanation of real vs. integer booleans.
 298  *
 299  * \note This variable cannot be placed inside other structures or classes, since
 300  *       some compilers (including at least clang-3.7) appear to lose the
 301  *       alignment. This is likely particularly severe when allocating such
 302  *       memory on the heap, but it occurs for stack structures too.
 303  */
 304 #    if GMX_DOUBLE
 305 typedef SimdDBool                SimdBool;
 306 #    else
 307 typedef SimdFBool                SimdBool;
 308 #    endif
 309
 310
 311 /*! \brief 32-bit integer SIMD type.
 312  *
 313  * If GMX_DOUBLE is 1, this will be set to \ref SimdDInt32
 314  * internally, otherwise \ref SimdFInt32. This might seem a strange
 315  * implementation detail, but it is because some SIMD implementations use
 316  * different types/widths of integers registers when converting from
 317  * double vs. single precision floating point. As long as you just use
 318  * this type you will not have to worry about precision.
 319  *
 320  * \note This variable cannot be placed inside other structures or classes, since
 321  *       some compilers (including at least clang-3.7) appear to lose the
 322  *       alignment. This is likely particularly severe when allocating such
 323  *       memory on the heap, but it occurs for stack structures too.
 324  */
 325 #    if GMX_DOUBLE
 326 typedef SimdDInt32               SimdInt32;
 327 #    else
 328 typedef SimdFInt32               SimdInt32;
 329 #    endif
 330
 331 #if GMX_SIMD_HAVE_INT32_ARITHMETICS
 332 /*! \brief Boolean SIMD type for usage with \ref SimdInt32.
 333  *
 334  * This type is only available if \ref GMX_SIMD_HAVE_INT32_ARITHMETICS is 1.
 335  *
 336  * If GMX_DOUBLE is 1, this will be set to \ref SimdDIBool
 337  * internally, otherwise \ref SimdFIBool. This is necessary since some
 338  * SIMD implementations use bitpatterns for marking truth, so single-
 339  * vs. double precision booleans are not necessarily exchangable, and while
 340  * a double-precision boolean might be represented with a 64-bit mask, the
 341  * corresponding integer might only use a 32-bit mask.
 342  *
 343  * We provide conversion routines for these cases, so the only thing you need to
 344  * keep in mind is to use \ref SimdBool when working with
 345  * \ref SimdReal while you pick \ref SimdIBool when working with
 346  * \ref SimdInt32 .
 347  *
 348  * To convert between them, use \ref cvtB2IB and \ref cvtIB2B.
 349  *
 350  * \note This variable cannot be placed inside other structures or classes, since
 351  *       some compilers (including at least clang-3.7) appear to lose the
 352  *       alignment. This is likely particularly severe when allocating such
 353  *       memory on the heap, but it occurs for stack structures too.
 354  */
 355 #    if GMX_DOUBLE
 356 typedef SimdDIBool               SimdIBool;
 357 #    else
 358 typedef SimdFIBool               SimdIBool;
 359 #    endif
 360 #endif  // GMX_SIMD_HAVE_INT32_ARITHMETICS
 361
 362
 363 #if GMX_DOUBLE
 364 const int c_simdBestPairAlignment = c_simdBestPairAlignmentDouble;
 365 #else
 366 const int c_simdBestPairAlignment = c_simdBestPairAlignmentFloat;
 367 #endif
 368
 369 #endif  // GMX_SIMD_HAVE_REAL
 370
 371 #if GMX_SIMD4_HAVE_REAL
 372 /*! \brief Real precision floating-point SIMD4 datatype.
 373  *
 374  * This type is only available if \ref GMX_SIMD4_HAVE_REAL is 1.
 375  *
 376  * \ref Simd4Double if GMX_DOUBLE is 1, otherwise \ref Simd4Float.
 377  *
 378  * \note This variable cannot be placed inside other structures or classes, since
 379  *       some compilers (including at least clang-3.7) appear to lose the
 380  *       alignment. This is likely particularly severe when allocating such
 381  *       memory on the heap, but it occurs for stack structures too.
 382  */
 383 #    if GMX_DOUBLE
 384 typedef Simd4Double               Simd4Real;
 385 #    else
 386 typedef Simd4Float                Simd4Real;
 387 #    endif
 388
 389
 390 /*! \brief Boolean SIMD4 type for usage with \ref SimdReal.
 391  *
 392  * This type is only available if \ref GMX_SIMD4_HAVE_REAL is 1.
 393  *
 394  * If GMX_DOUBLE is 1, this will be set to \ref Simd4DBool
 395  * internally, otherwise \ref Simd4FBool. This is necessary since some
 396  * SIMD implementations use bitpatterns for marking truth, so single-
 397  * vs. double precision booleans are not necessarily exchangable.
 398  * As long as you just use this type you will not have to worry about precision.
 399  *
 400  * \note This variable cannot be placed inside other structures or classes, since
 401  *       some compilers (including at least clang-3.7) appear to lose the
 402  *       alignment. This is likely particularly severe when allocating such
 403  *       memory on the heap, but it occurs for stack structures too.
 404  */
 405 #    if GMX_DOUBLE
 406 typedef Simd4DBool                Simd4Bool;
 407 #    else
 408 typedef Simd4FBool                Simd4Bool;
 409 #    endif
 410 #endif // GMX_SIMD4_HAVE_REAL
 411
 412 //! \}  end of name-group describing SIMD data types
 413
 414 /*! \name High-level SIMD proxy objects to disambiguate load/set operations
 415  * \{
 416  */
 417
 418 namespace internal
 419 {
 420 /*! \libinternal \brief Simd traits
 421  *
 422  * These traits are used to query data about SIMD types. Currently provided
 423  * data is useful for SIMD loads (load function and helper classes for
 424  * ArrayRef<> in simd_memory.h). Provided data:
 425  *  - type: scalar type corresponding to the SIMD type
 426  *  - width: SIMD width
 427  *  - tag: tag used for type dispatch of load function
 428  */
 429 template<typename T>
 430 struct SimdTraits {};
 431
 432 #if GMX_SIMD_HAVE_FLOAT
 433 template<>
 434 struct SimdTraits<SimdFloat>
 435 {
 436     using type = float;
 437     static constexpr int width = GMX_SIMD_FLOAT_WIDTH;
 438     using tag = SimdFloatTag;
 439 };
 440 #endif
 441 #if GMX_SIMD_HAVE_DOUBLE
 442 template<>
 443 struct SimdTraits<SimdDouble>
 444 {
 445     using type = double;
 446     static constexpr int width = GMX_SIMD_DOUBLE_WIDTH;
 447     using tag = SimdDoubleTag;
 448 };
 449 #endif
 450 #if GMX_SIMD_HAVE_FLOAT
 451 template<>
 452 struct SimdTraits<SimdFInt32>
 453 {
 454     using type = int;
 455     static constexpr int width = GMX_SIMD_FINT32_WIDTH;
 456     using tag = SimdFInt32Tag;
 457 };
 458 #endif
 459 #if GMX_SIMD_HAVE_DOUBLE
 460 template<>
 461 struct SimdTraits<SimdDInt32>
 462 {
 463     using type = int;
 464     static constexpr int width = GMX_SIMD_DINT32_WIDTH;
 465     using tag = SimdDInt32Tag;
 466 };
 467 #endif
 468
 469 template<typename T>
 470 struct SimdTraits<const T>
 471 {
 472     using type = const typename SimdTraits<T>::type;
 473     static constexpr int width = SimdTraits<T>::width;
 474     using tag = typename SimdTraits<T>::tag;
 475 };
 476 }   //namespace internal
 477
 478 /*! \brief Load function that returns SIMD or scalar
 479  *
 480  * \tparam T Type to load (type is always mandatory)
 481  * \param  m Pointer to aligned memory
 482  * \return   Loaded value
 483  */
 484 template<typename T>
 485 static inline T
 486 load(const typename internal::SimdTraits<T>::type *m) //disabled by SFINAE for non-SIMD types
 487 {
 488     return simdLoad(m, typename internal::SimdTraits<T>::tag());
 489 }
 490
 491 template<typename T>
 492 static inline T
 493 /* the enable_if serves to prevent two different type of misuse:
 494  * 1) load<SimdReal>(SimdReal*); should only be called on real* or int*
 495  * 2) load(real*); template parameter is mandatory because otherwise ambiguity is
 496  *    created. The dependent type disables type deduction.
 497  */
 498 load(const typename std::enable_if<std::is_arithmetic<T>::value, T>::type *m)
 499 {
 500     return *m;
 501 }
 502
 503 template <typename T, size_t N>
 504 static inline T gmx_simdcall
 505 load(const AlignedArray<typename internal::SimdTraits<T>::type, N> &m)
 506 {
 507     return simdLoad(m.data(), typename internal::SimdTraits<T>::tag());
 508 }
 509
 510 /*! \brief Load function that returns SIMD or scalar based on template argument
 511  *
 512  * \tparam T Type to load (type is always mandatory)
 513  * \param m Pointer to unaligned memory
 514  * \return Loaded SimdFloat/Double/Int or basic scalar type
 515  */
 516 template<typename T>
 517 static inline T
 518 loadU(const typename internal::SimdTraits<T>::type *m)
 519 {
 520     return simdLoadU(m, typename internal::SimdTraits<T>::tag());
 521 }
 522
 523 template<typename T>
 524 static inline T
 525 loadU(const typename std::enable_if<std::is_arithmetic<T>::value, T>::type *m)
 526 {
 527     return *m;
 528 }
 529
 530 template <typename T, size_t N>
 531 static inline T gmx_simdcall
 532 loadU(const AlignedArray<typename internal::SimdTraits<T>::type, N> &m)
 533 {
 534     return simdLoadU(m.data(), typename internal::SimdTraits<T>::tag());
 535 }
 536
 537 /*! \libinternal \brief Proxy object to enable setZero() for SIMD and real types.
 538  *
 539  * This object is returned by setZero(), and depending on what type you assign
 540  * the result to the conversion method will call the right low-level function.
 541  */
 542 class SimdSetZeroProxy
 543 {
 544     public:
 545         //!\brief Conversion method that returns 0.0 as float
 546         operator float() const { return 0.0f; }
 547         //!\brief Conversion method that returns 0.0 as double
 548         operator double() const { return 0.0; }
 549         //!\brief Conversion method that returns 0.0 as int32
 550         operator std::int32_t() const { return 0; }
 551 #if GMX_SIMD_HAVE_FLOAT
 552         //!\brief Conversion method that will execute setZero() for SimdFloat
 553         operator SimdFloat() const { return setZeroF(); }
 554         //!\brief Conversion method that will execute setZero() for SimdFInt32
 555         operator SimdFInt32() const { return setZeroFI(); }
 556 #endif
 557 #if GMX_SIMD4_HAVE_FLOAT
 558         //!\brief Conversion method that will execute setZero() for Simd4Float
 559         operator Simd4Float() const { return simd4SetZeroF(); }
 560 #endif
 561 #if GMX_SIMD_HAVE_DOUBLE
 562         //!\brief Conversion method that will execute setZero() for SimdDouble
 563         operator SimdDouble() const { return setZeroD(); }
 564         //!\brief Conversion method that will execute setZero() for SimdDInt32
 565         operator SimdDInt32() const { return setZeroDI(); }
 566 #endif
 567 #if GMX_SIMD4_HAVE_DOUBLE
 568         //!\brief Conversion method that will execute setZero() for Simd4Double
 569         operator Simd4Double() const { return simd4SetZeroD(); }
 570 #endif
 571 };
 572
 573 /*! \brief Helper function to set any SIMD or scalar variable to zero
 574  *
 575  * \return Proxy object that will call the actual function to set a SIMD/scalar
 576  *         variable to zero based on the conversion function called when you
 577  *         assign the result.
 578  */
 579 static inline const SimdSetZeroProxy gmx_simdcall
 580 setZero()
 581 {
 582     return {};
 583 }
 584
 585 namespace internal
 586 {
 587 //TODO: Don't foward function but properly rename them and use proper traits
 588 template<typename T>
 589 struct Simd4Traits {};
 590
 591 #if GMX_SIMD4_HAVE_FLOAT
 592 template<>
 593 struct Simd4Traits<Simd4Float>
 594 {
 595     using type = float;
 596 };
 597 #endif
 598
 599 #if GMX_SIMD4_HAVE_DOUBLE
 600 template<>
 601 struct Simd4Traits<Simd4Double>
 602 {
 603     using type = double;
 604 };
 605 #endif
 606 }   //namespace internal
 607
 608 #if GMX_SIMD4_HAVE_REAL
 609 template<typename T>
 610 T load(const typename internal::Simd4Traits<T>::type* m)
 611 {
 612     return load4(m);
 613 }
 614 template<typename T>
 615 T loadU(const typename internal::Simd4Traits<T>::type* m)
 616 {
 617     return load4U(m);
 618 }
 619 #endif
 620
 621 /* Implement most of 4xn functions by forwarding them to other functions when possible.
 622  * The functions forwarded here don't need to be implemented by each implementation.
 623  * For width=4 all functions are forwarded and for width=8 all but loadU4NOffset are forwarded.
 624  */
 625 #if GMX_SIMD_HAVE_FLOAT
 626 #if GMX_SIMD_FLOAT_WIDTH < 4
 627 #define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT (GMX_SIMD_HAVE_LOADU && GMX_SIMD4_HAVE_FLOAT)
 628 #elif GMX_SIMD_FLOAT_WIDTH == 4
 629 #define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT GMX_SIMD_HAVE_LOADU
 630 //For GMX_SIMD_FLOAT_WIDTH>4 it is the reponsibility of the implementation to set
 631 //GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
 632 #endif
 633
 634 #if GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
 635 #if GMX_SIMD_FLOAT_WIDTH < 4
 636 using Simd4NFloat = Simd4Float;
 637 #define GMX_SIMD4N_FLOAT_WIDTH 4
 638 #else
 639 using Simd4NFloat = SimdFloat;
 640 #define GMX_SIMD4N_FLOAT_WIDTH GMX_SIMD_FLOAT_WIDTH
 641 #endif
 642
 643 #if GMX_SIMD_FLOAT_WIDTH <= 4
 644 static inline Simd4NFloat gmx_simdcall
 645 loadUNDuplicate4(const float* f)
 646 {
 647     return Simd4NFloat(*f);
 648 }
 649 static inline Simd4NFloat gmx_simdcall
 650 load4DuplicateN(const float* f)
 651 {
 652     return load<Simd4NFloat>(f);
 653 }
 654 static inline Simd4NFloat gmx_simdcall
 655 loadU4NOffset(const float* f, int)
 656 {
 657     return loadU<Simd4NFloat>(f);
 658 }
 659 #elif GMX_SIMD_FLOAT_WIDTH == 8
 660 static inline Simd4NFloat gmx_simdcall
 661 loadUNDuplicate4(const float* f)
 662 {
 663     return loadU1DualHsimd(f);
 664 }
 665 static inline Simd4NFloat gmx_simdcall
 666 load4DuplicateN(const float* f)
 667 {
 668     return loadDuplicateHsimd(f);
 669 }
 670 #endif
 671 #endif //GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
 672 #else  //GMX_SIMD_HAVE_FLOAT
 673 #define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT 0
 674 #endif
 675
 676 #if GMX_SIMD_HAVE_DOUBLE
 677 #if GMX_SIMD_DOUBLE_WIDTH < 4
 678 #define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE (GMX_SIMD_HAVE_LOADU && GMX_SIMD4_HAVE_DOUBLE)
 679 #elif GMX_SIMD_DOUBLE_WIDTH == 4
 680 #define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE GMX_SIMD_HAVE_LOADU
 681 //For GMX_SIMD_DOUBLE_WIDTH>4 it is the reponsibility of the implementation to set
 682 //GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
 683 #endif
 684
 685 #if GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
 686 #if GMX_SIMD_DOUBLE_WIDTH < 4
 687 using Simd4NDouble = Simd4Double;
 688 #define GMX_SIMD4N_DOUBLE_WIDTH 4
 689 #else
 690 using Simd4NDouble = SimdDouble;
 691 #define GMX_SIMD4N_DOUBLE_WIDTH GMX_SIMD_DOUBLE_WIDTH
 692 #endif
 693
 694 #if GMX_SIMD_DOUBLE_WIDTH <= 4
 695 static inline Simd4NDouble gmx_simdcall
 696 loadUNDuplicate4(const double* f)
 697 {
 698     return Simd4NDouble(*f);
 699 }
 700 static inline Simd4NDouble gmx_simdcall
 701 load4DuplicateN(const double* f)
 702 {
 703     return load<Simd4NDouble>(f);
 704 }
 705 static inline Simd4NDouble gmx_simdcall
 706 loadU4NOffset(const double* f, int /*unused*/)
 707 {
 708     return loadU<Simd4NDouble>(f);
 709 }
 710 #elif GMX_SIMD_DOUBLE_WIDTH == 8
 711 static inline Simd4NDouble gmx_simdcall
 712 loadUNDuplicate4(const double* f)
 713 {
 714     return loadU1DualHsimd(f);
 715 }
 716 static inline Simd4NDouble gmx_simdcall
 717 load4DuplicateN(const double* f)
 718 {
 719     return loadDuplicateHsimd(f);
 720 }
 721 #endif
 722 #endif //GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
 723 #else  //GMX_SIMD_HAVE_DOUBLE
 724 #define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE 0
 725 #endif
 726
 727 #if GMX_DOUBLE
 728 #define GMX_SIMD_HAVE_4NSIMD_UTIL_REAL GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
 729 #else
 730 #define GMX_SIMD_HAVE_4NSIMD_UTIL_REAL GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
 731 #endif
 732
 733 #if GMX_SIMD_HAVE_4NSIMD_UTIL_REAL
 734 #if GMX_DOUBLE
 735 using Simd4NReal = Simd4NDouble;
 736 #define GMX_SIMD4N_REAL_WIDTH GMX_SIMD4N_DOUBLE_WIDTH
 737 #else
 738 using Simd4NReal = Simd4NFloat;
 739 #define GMX_SIMD4N_REAL_WIDTH GMX_SIMD4N_FLOAT_WIDTH
 740 #endif
 741 #endif
 742
 743 //! \}  end of name-group proxy objects
 744
 745 }      // namespace gmx
 746
 747 // \}          end of module_simd
 748
 749 //! \endcond   end of condition libapi
 750
 751
 752 #if GMX_SIMD_HAVE_FLOAT
 753
 754 /*! \brief Returns whether a pointer to float is aligned to a SIMD boundary
 755  *
 756  * \param[in] ptr  A pointer to a float
 757  */
 758 static inline bool isSimdAligned(const float *ptr)
 759 {
 760     return reinterpret_cast<std::size_t>(ptr) % (GMX_SIMD_FLOAT_WIDTH*sizeof(float)) == 0;
 761 }
 762
 763 #endif // GMX_SIMD_HAVE_FLOAT
 764
 765 #if GMX_SIMD_HAVE_DOUBLE
 766
 767 /*! \brief Returns whether a pointer to double is aligned to a SIMD boundary
 768  *
 769  * \param[in] ptr  A pointer to a double
 770  */
 771 static inline bool isSimdAligned(const double *ptr)
 772 {
 773     return reinterpret_cast<std::size_t>(ptr) % (GMX_SIMD_DOUBLE_WIDTH*sizeof(double)) == 0;
 774 }
 775
 776 #endif // GMX_SIMD_HAVE_DOUBLE
 777
 778
 779 #if GMX_SIMD_HAVE_REAL
 780 #if GMX_SIMD_REAL_WIDTH > GMX_REAL_MAX_SIMD_WIDTH
 781 #error "GMX_SIMD_REAL_WIDTH > GMX_REAL_MAX_SIMD_WIDTH: increase GMX_REAL_MAX_SIMD_WIDTH in real.h"
 782 #endif
 783 #endif
 784
 785
 786 #if 0
 787 /* This is a hack to cover the corner case of using an
 788    explicit GMX_SIMD_HAVE_FLOAT or GMX_SIMD_HAVE_DOUBLE, rather than
 789    GMX_SIMD_HAVE_REAL.
 790
 791    Such code is expected to include simd.h to get those symbols
 792    defined, but the actual definitions are in the implemention headers
 793    included by simd.h. check-source.py is not a full preprocessor, so
 794    it does not see the definitions in the implementation headers as
 795    belonging to simd.h, thus it cannot check that simd.h is being used
 796    correctly in the above hypothetical corner case. However, the
 797    checker also does not parse #if 0, so we can fool the checker into
 798    thinking that definition occurs here, and that will work well
 799    enough.
 800
 801    If there's ever other kinds of SIMD code that might have the same
 802    problem, we might want to add other variables here.
 803  */
 804 #    define GMX_SIMD_HAVE_FLOAT         1
 805 #    define GMX_SIMD_HAVE_DOUBLE        1
 806
 807 #endif // end of hack
 808
 809 // The ArrayRef<SimdReal> specialization is always included, because compiler
 810 // errors are confusing when template specialization aren't available.
 811 #include "gromacs/simd/simd_memory.h"
 812
 813 #endif // GMX_SIMD_SIMD_H