src/gromacs/simd/simd.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   5  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36
  37 /*! \libinternal
  38  * \defgroup module_simd SIMD intrinsics interface (simd)
  39  * \ingroup group_utilitymodules
  40  *
  41  * \brief Provides an architecture-independent way of doing SIMD coding.
  42  *
  43  * Overview of the SIMD implementation is provided in \ref page_simd.
  44  * The details are documented in gromacs/simd/simd.h and the reference
  45  * implementation impl_reference.h.
  46  *
  47  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  48  */
  49
  50 #ifndef GMX_SIMD_SIMD_H
  51 #define GMX_SIMD_SIMD_H
  52
  53 /*! \libinternal \file
  54  *
  55  * \brief Definitions, capabilities, and wrappers for SIMD module.
  56  *
  57  * The macros in this file are intended to be used for writing
  58  * architecture-independent SIMD intrinsics code.
  59  * To support a new architecture, adding a new sub-include with macros here
  60  * should be (nearly) all that is needed.
  61  *
  62  * The defines in this top-level file will set default Gromacs real precision
  63  * operations to either single or double precision based on whether
  64  * GMX_DOUBLE is 1. The actual implementation - including e.g.
  65  * conversion operations specifically between single and double - is documented
  66  * in impl_reference.h.
  67  *
  68  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  69  *
  70  * \inlibraryapi
  71  * \ingroup module_simd
  72  */
  73
  74 #include "config.h"
  75
  76 #include <cstddef>
  77 #include <cstdint>
  78
  79 #include <array>
  80 #include <type_traits>
  81
  82 #include "gromacs/utility/basedefinitions.h"
  83 #include "gromacs/utility/classhelpers.h"
  84 #include "gromacs/utility/real.h"
  85
  86 //! \cond libapi
  87
  88
  89 /*! \addtogroup module_simd
  90  * \{
  91  */
  92
  93 namespace gmx
  94 {
  95 /*! \libinternal \brief Tag type to select to load SimdFloat with simdLoad(U) */
  96 struct SimdFloatTag
  97 {
  98 };
  99 /*! \libinternal \brief Tag type to select to load SimdDouble with simdLoad(U) */
 100 struct SimdDoubleTag
 101 {
 102 };
 103 /*! \libinternal \brief Tag type to select to load SimdFInt32 with simdLoad(U) */
 104 struct SimdFInt32Tag
 105 {
 106 };
 107 /*! \libinternal \brief Tag type to select to load SimdDInt32 with simdLoad(U) */
 108 struct SimdDInt32Tag
 109 {
 110 };
 111 } // namespace gmx
 112
 113 /*! \name SIMD predefined macros to describe high-level capabilities
 114  *
 115  *  These macros are used to describe the features available in default
 116  *  Gromacs real precision. They are set from the lower-level implementation
 117  *  files that have macros describing single and double precision individually,
 118  *  as well as the implementation details.
 119  *  \{
 120  */
 121
 122 #ifdef __clang__
 123 #    pragma clang diagnostic push
 124 /* reinterpret_cast is used for SIMD->scalar conversion
 125  *
 126  * In general using reinterpret_cast for bit_cast is UB but
 127  * for intrinsics types it works for all known compilers
 128  * and not all compilers produce as good code for memcpy.
 129  */
 130 #    pragma clang diagnostic ignored "-Wundefined-reinterpret-cast"
 131 #endif
 132
 133 #if GMX_SIMD_X86_SSE2
 134 #    include "impl_x86_sse2/impl_x86_sse2.h"
 135 #elif GMX_SIMD_X86_SSE4_1
 136 #    include "impl_x86_sse4_1/impl_x86_sse4_1.h"
 137 #elif GMX_SIMD_X86_AVX_128_FMA
 138 #    include "impl_x86_avx_128_fma/impl_x86_avx_128_fma.h"
 139 #elif GMX_SIMD_X86_AVX_256
 140 #    include "impl_x86_avx_256/impl_x86_avx_256.h"
 141 #elif GMX_SIMD_X86_AVX2_256
 142 #    include "impl_x86_avx2_256/impl_x86_avx2_256.h"
 143 #elif GMX_SIMD_X86_AVX2_128
 144 #    include "impl_x86_avx2_128/impl_x86_avx2_128.h"
 145 #elif GMX_SIMD_X86_MIC
 146 #    include "impl_x86_mic/impl_x86_mic.h"
 147 #elif GMX_SIMD_X86_AVX_512
 148 #    include "impl_x86_avx_512/impl_x86_avx_512.h"
 149 #elif GMX_SIMD_X86_AVX_512_KNL
 150 #    include "impl_x86_avx_512_knl/impl_x86_avx_512_knl.h"
 151 #elif GMX_SIMD_ARM_NEON
 152 #    include "impl_arm_neon/impl_arm_neon.h"
 153 #elif GMX_SIMD_ARM_NEON_ASIMD
 154 #    include "impl_arm_neon_asimd/impl_arm_neon_asimd.h"
 155 #elif GMX_SIMD_ARM_SVE
 156 #    include "impl_arm_sve/impl_arm_sve.h"
 157 #elif GMX_SIMD_IBM_VMX
 158 #    include "impl_ibm_vmx/impl_ibm_vmx.h"
 159 #elif GMX_SIMD_IBM_VSX
 160 #    include "impl_ibm_vsx/impl_ibm_vsx.h"
 161 #elif (GMX_SIMD_REFERENCE || defined DOXYGEN)
 162 #    include "impl_reference/impl_reference.h" // Includes doxygen documentation
 163 #else
 164 #    include "impl_none/impl_none.h"
 165 #endif
 166
 167 #ifdef __clang__
 168 #    pragma clang diagnostic pop
 169 #endif
 170
 171 // The scalar SIMD-mimicking functions are always included so we can use
 172 // templated functions even without SIMD support.
 173 #include "gromacs/simd/scalar/scalar.h"
 174 #include "gromacs/simd/scalar/scalar_math.h"
 175 #include "gromacs/simd/scalar/scalar_util.h"
 176
 177
 178 #if GMX_DOUBLE
 179 #    define GMX_SIMD_HAVE_REAL GMX_SIMD_HAVE_DOUBLE
 180 #    define GMX_SIMD_REAL_WIDTH GMX_SIMD_DOUBLE_WIDTH
 181 #    define GMX_SIMD_HAVE_INT32_EXTRACT GMX_SIMD_HAVE_DINT32_EXTRACT
 182 #    define GMX_SIMD_HAVE_INT32_LOGICAL GMX_SIMD_HAVE_DINT32_LOGICAL
 183 #    define GMX_SIMD_HAVE_INT32_ARITHMETICS GMX_SIMD_HAVE_DINT32_ARITHMETICS
 184 #    define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL \
 185         GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_DOUBLE
 186 #    define GMX_SIMD_HAVE_HSIMD_UTIL_REAL GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE
 187 #    define GMX_SIMD4_HAVE_REAL GMX_SIMD4_HAVE_DOUBLE
 188 #else // GMX_DOUBLE
 189
 190 /*! \brief 1 if SimdReal is available, otherwise 0.
 191  *
 192  *  \ref GMX_SIMD_HAVE_DOUBLE if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD_HAVE_FLOAT.
 193  */
 194 #    define GMX_SIMD_HAVE_REAL GMX_SIMD_HAVE_FLOAT
 195
 196 /*! \brief Width of SimdReal.
 197  *
 198  *  \ref GMX_SIMD_DOUBLE_WIDTH if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD_FLOAT_WIDTH.
 199  */
 200 #    define GMX_SIMD_REAL_WIDTH GMX_SIMD_FLOAT_WIDTH
 201
 202 /*! \brief 1 if support is available for extracting elements from SimdInt32, otherwise 0
 203  *
 204  *  \ref GMX_SIMD_HAVE_DINT32_EXTRACT if GMX_DOUBLE is 1, otherwise
 205  *  \ref GMX_SIMD_HAVE_FINT32_EXTRACT.
 206  */
 207 #    define GMX_SIMD_HAVE_INT32_EXTRACT GMX_SIMD_HAVE_FINT32_EXTRACT
 208
 209 /*! \brief 1 if logical ops are supported on SimdInt32, otherwise 0.
 210  *
 211  *  \ref GMX_SIMD_HAVE_DINT32_LOGICAL if GMX_DOUBLE is 1, otherwise
 212  *  \ref GMX_SIMD_HAVE_FINT32_LOGICAL.
 213  */
 214 #    define GMX_SIMD_HAVE_INT32_LOGICAL GMX_SIMD_HAVE_FINT32_LOGICAL
 215
 216 /*! \brief 1 if arithmetic ops are supported on SimdInt32, otherwise 0.
 217  *
 218  *  \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS if GMX_DOUBLE is 1, otherwise
 219  *  \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS.
 220  */
 221 #    define GMX_SIMD_HAVE_INT32_ARITHMETICS GMX_SIMD_HAVE_FINT32_ARITHMETICS
 222
 223 /*! \brief 1 if gmx::simdGatherLoadUBySimdIntTranspose is present, otherwise 0
 224  *
 225  *  \ref GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_DOUBLE if GMX_DOUBLE is 1, otherwise
 226  *  \ref GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_FLOAT.
 227  */
 228 #    define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL \
 229         GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_FLOAT
 230
 231 /*! \brief 1 if real half-register load/store/reduce utils present, otherwise 0
 232  *
 233  *  \ref GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE if GMX_DOUBLE is 1, otherwise
 234  *  \ref GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT.
 235  */
 236 #    define GMX_SIMD_HAVE_HSIMD_UTIL_REAL GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT
 237
 238 /*! \brief 1 if Simd4Real is available, otherwise 0.
 239  *
 240  *  \ref GMX_SIMD4_HAVE_DOUBLE if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD4_HAVE_FLOAT.
 241  */
 242 #    define GMX_SIMD4_HAVE_REAL GMX_SIMD4_HAVE_FLOAT
 243
 244 #endif // GMX_DOUBLE
 245
 246 //! \}  end of name-group describing high-level capabilities
 247
 248 namespace gmx
 249 {
 250
 251 template<class T, size_t N>
 252 struct AlignedArray;
 253
 254 #if GMX_SIMD_HAVE_FLOAT
 255 /*! \libinternal \brief Identical to std::array with GMX_SIMD_FLOAT_WIDTH alignment.
 256  *  Should not be deleted through base pointer (destructor is non-virtual).
 257  */
 258 template<size_t N>
 259 struct alignas(GMX_SIMD_FLOAT_WIDTH * sizeof(float)) AlignedArray<float, N> :
 260     public std::array<float, N>
 261 {
 262 };
 263 #endif
 264
 265 #if GMX_SIMD_HAVE_DOUBLE
 266 /*! \libinternal \brief  Identical to std::array with GMX_SIMD_DOUBLE_WIDTH alignment.
 267  *  Should not be deleted through base pointer (destructor is non-virtual).
 268  */
 269 template<size_t N>
 270 struct alignas(GMX_SIMD_DOUBLE_WIDTH * sizeof(double)) AlignedArray<double, N> :
 271     public std::array<double, N>
 272 {
 273 };
 274 #endif
 275
 276 #if GMX_SIMD_HAVE_REAL
 277
 278 /*! \name SIMD data types
 279  *
 280  *  The actual storage of these types is implementation dependent. The
 281  *  documentation is generated from the reference implementation, but for
 282  *  normal usage this will likely not be what you are using.
 283  * \{
 284  */
 285
 286 /*! \brief Real precision floating-point SIMD datatype.
 287  *
 288  * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1.
 289  *
 290  * \ref SimdDouble if GMX_DOUBLE is 1, otherwise \ref SimdFloat.
 291  *
 292  * \note This variable cannot be placed inside other structures or classes, since
 293  *       some compilers (including at least clang-3.7) appear to lose the
 294  *       alignment. This is likely particularly severe when allocating such
 295  *       memory on the heap, but it occurs for stack structures too.
 296  */
 297 #    if GMX_DOUBLE
 298 typedef SimdDouble SimdReal;
 299 #    else
 300 typedef SimdFloat  SimdReal;
 301 #    endif
 302
 303
 304 /*! \brief Boolean SIMD type for usage with \ref SimdReal.
 305  *
 306  * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1.
 307  *
 308  * If GMX_DOUBLE is 1, this will be set to \ref SimdDBool
 309  * internally, otherwise \ref SimdFBool. This is necessary since some
 310  * SIMD implementations use bitpatterns for marking truth, so single-
 311  * vs. double precision booleans are not necessarily exchangable.
 312  * As long as you just use this type you will not have to worry about precision.
 313  *
 314  * See \ref SimdIBool for an explanation of real vs. integer booleans.
 315  *
 316  * \note This variable cannot be placed inside other structures or classes, since
 317  *       some compilers (including at least clang-3.7) appear to lose the
 318  *       alignment. This is likely particularly severe when allocating such
 319  *       memory on the heap, but it occurs for stack structures too.
 320  */
 321 #    if GMX_DOUBLE
 322 typedef SimdDBool SimdBool;
 323 #    else
 324 typedef SimdFBool  SimdBool;
 325 #    endif
 326
 327
 328 /*! \brief 32-bit integer SIMD type.
 329  *
 330  * If GMX_DOUBLE is 1, this will be set to \ref SimdDInt32
 331  * internally, otherwise \ref SimdFInt32. This might seem a strange
 332  * implementation detail, but it is because some SIMD implementations use
 333  * different types/widths of integers registers when converting from
 334  * double vs. single precision floating point. As long as you just use
 335  * this type you will not have to worry about precision.
 336  *
 337  * \note This variable cannot be placed inside other structures or classes, since
 338  *       some compilers (including at least clang-3.7) appear to lose the
 339  *       alignment. This is likely particularly severe when allocating such
 340  *       memory on the heap, but it occurs for stack structures too.
 341  */
 342 #    if GMX_DOUBLE
 343 typedef SimdDInt32 SimdInt32;
 344 #    else
 345 typedef SimdFInt32 SimdInt32;
 346 #    endif
 347
 348 #    if GMX_SIMD_HAVE_INT32_ARITHMETICS
 349 /*! \brief Boolean SIMD type for usage with \ref SimdInt32.
 350  *
 351  * This type is only available if \ref GMX_SIMD_HAVE_INT32_ARITHMETICS is 1.
 352  *
 353  * If GMX_DOUBLE is 1, this will be set to \ref SimdDIBool
 354  * internally, otherwise \ref SimdFIBool. This is necessary since some
 355  * SIMD implementations use bitpatterns for marking truth, so single-
 356  * vs. double precision booleans are not necessarily exchangable, and while
 357  * a double-precision boolean might be represented with a 64-bit mask, the
 358  * corresponding integer might only use a 32-bit mask.
 359  *
 360  * We provide conversion routines for these cases, so the only thing you need to
 361  * keep in mind is to use \ref SimdBool when working with
 362  * \ref SimdReal while you pick \ref SimdIBool when working with
 363  * \ref SimdInt32 .
 364  *
 365  * To convert between them, use \ref cvtB2IB and \ref cvtIB2B.
 366  *
 367  * \note This variable cannot be placed inside other structures or classes, since
 368  *       some compilers (including at least clang-3.7) appear to lose the
 369  *       alignment. This is likely particularly severe when allocating such
 370  *       memory on the heap, but it occurs for stack structures too.
 371  */
 372 #        if GMX_DOUBLE
 373 typedef SimdDIBool SimdIBool;
 374 #        else
 375 typedef SimdFIBool SimdIBool;
 376 #        endif
 377 #    endif // GMX_SIMD_HAVE_INT32_ARITHMETICS
 378
 379
 380 #    if GMX_DOUBLE
 381 const int c_simdBestPairAlignment = c_simdBestPairAlignmentDouble;
 382 #    else
 383 const int          c_simdBestPairAlignment = c_simdBestPairAlignmentFloat;
 384 #    endif
 385
 386 #endif // GMX_SIMD_HAVE_REAL
 387
 388 #if GMX_SIMD4_HAVE_REAL
 389 /*! \brief Real precision floating-point SIMD4 datatype.
 390  *
 391  * This type is only available if \ref GMX_SIMD4_HAVE_REAL is 1.
 392  *
 393  * \ref Simd4Double if GMX_DOUBLE is 1, otherwise \ref Simd4Float.
 394  *
 395  * \note This variable cannot be placed inside other structures or classes, since
 396  *       some compilers (including at least clang-3.7) appear to lose the
 397  *       alignment. This is likely particularly severe when allocating such
 398  *       memory on the heap, but it occurs for stack structures too.
 399  */
 400 #    if GMX_DOUBLE
 401 typedef Simd4Double Simd4Real;
 402 #    else
 403 typedef Simd4Float Simd4Real;
 404 #    endif
 405
 406
 407 /*! \brief Boolean SIMD4 type for usage with \ref SimdReal.
 408  *
 409  * This type is only available if \ref GMX_SIMD4_HAVE_REAL is 1.
 410  *
 411  * If GMX_DOUBLE is 1, this will be set to \ref Simd4DBool
 412  * internally, otherwise \ref Simd4FBool. This is necessary since some
 413  * SIMD implementations use bitpatterns for marking truth, so single-
 414  * vs. double precision booleans are not necessarily exchangable.
 415  * As long as you just use this type you will not have to worry about precision.
 416  *
 417  * \note This variable cannot be placed inside other structures or classes, since
 418  *       some compilers (including at least clang-3.7) appear to lose the
 419  *       alignment. This is likely particularly severe when allocating such
 420  *       memory on the heap, but it occurs for stack structures too.
 421  */
 422 #    if GMX_DOUBLE
 423 typedef Simd4DBool Simd4Bool;
 424 #    else
 425 typedef Simd4FBool Simd4Bool;
 426 #    endif
 427 #endif // GMX_SIMD4_HAVE_REAL
 428
 429 //! \}  end of name-group describing SIMD data types
 430
 431 /*! \name High-level SIMD proxy objects to disambiguate load/set operations
 432  * \{
 433  */
 434
 435 namespace internal
 436 {
 437 /*! \libinternal \brief Simd traits
 438  *
 439  * These traits are used to query data about SIMD types. Currently provided
 440  * data is useful for SIMD loads (load function and helper classes for
 441  * ArrayRef<> in simd_memory.h). Provided data:
 442  *  - type: scalar type corresponding to the SIMD type
 443  *  - width: SIMD width
 444  *  - tag: tag used for type dispatch of load function
 445  */
 446 template<typename T>
 447 struct SimdTraits
 448 {
 449 };
 450
 451 #if GMX_SIMD_HAVE_FLOAT
 452 template<>
 453 struct SimdTraits<SimdFloat>
 454 {
 455     using type                 = float;
 456     static constexpr int width = GMX_SIMD_FLOAT_WIDTH;
 457     using tag                  = SimdFloatTag;
 458 };
 459 #endif
 460 #if GMX_SIMD_HAVE_DOUBLE
 461 template<>
 462 struct SimdTraits<SimdDouble>
 463 {
 464     using type                 = double;
 465     static constexpr int width = GMX_SIMD_DOUBLE_WIDTH;
 466     using tag                  = SimdDoubleTag;
 467 };
 468 #endif
 469 #if GMX_SIMD_HAVE_FLOAT
 470 template<>
 471 struct SimdTraits<SimdFInt32>
 472 {
 473     using type                 = int;
 474     static constexpr int width = GMX_SIMD_FINT32_WIDTH;
 475     using tag                  = SimdFInt32Tag;
 476 };
 477 #endif
 478 #if GMX_SIMD_HAVE_DOUBLE
 479 template<>
 480 struct SimdTraits<SimdDInt32>
 481 {
 482     using type                 = int;
 483     static constexpr int width = GMX_SIMD_DINT32_WIDTH;
 484     using tag                  = SimdDInt32Tag;
 485 };
 486 #endif
 487 template<typename T>
 488 using SimdTraitsT = typename SimdTraits<T>::type;
 489 template<typename T>
 490 struct SimdTraits<const T>
 491 {
 492     using type                 = const SimdTraitsT<T>;
 493     static constexpr int width = SimdTraits<T>::width;
 494     using tag                  = typename SimdTraits<T>::tag;
 495 };
 496 } // namespace internal
 497
 498 /*! \brief Load function that returns SIMD or scalar
 499  *
 500  * Note that a load of T* where T is const returns a value, which is a
 501  * copy, and the caller cannot be constrained to not change it, so the
 502  * return type uses std::remove_const_t.
 503  *
 504  * \tparam T Type to load (type is always mandatory)
 505  * \param  m Pointer to aligned memory
 506  * \return   Loaded value
 507  */
 508 template<typename T>
 509 static inline std::remove_const_t<T> load(const internal::SimdTraitsT<T>* m) // disabled by SFINAE for non-SIMD types
 510 {
 511     return simdLoad(m, typename internal::SimdTraits<T>::tag());
 512 }
 513
 514 template<typename T>
 515 static inline T
 516 /* the enable_if serves to prevent two different type of misuse:
 517  * 1) load<SimdReal>(SimdReal*); should only be called on real* or int*
 518  * 2) load(real*); template parameter is mandatory because otherwise ambiguity is
 519  *    created. The dependent type disables type deduction.
 520  */
 521 load(const std::enable_if_t<std::is_arithmetic_v<T>, T> *m)
 522 {
 523     return *m;
 524 }
 525
 526 template<typename T, size_t N>
 527 static inline T gmx_simdcall load(const AlignedArray<internal::SimdTraitsT<T>, N>& m)
 528 {
 529     return simdLoad(m.data(), typename internal::SimdTraits<T>::tag());
 530 }
 531
 532 /*! \brief Load function that returns SIMD or scalar based on template argument
 533  *
 534  * \tparam T Type to load (type is always mandatory)
 535  * \param m Pointer to unaligned memory
 536  * \return Loaded SimdFloat/Double/Int or basic scalar type
 537  */
 538 template<typename T>
 539 static inline T loadU(const internal::SimdTraitsT<T>* m)
 540 {
 541     return simdLoadU(m, typename internal::SimdTraits<T>::tag());
 542 }
 543
 544 template<typename T>
 545 static inline T loadU(const std::enable_if_t<std::is_arithmetic_v<T>, T>* m)
 546 {
 547     return *m;
 548 }
 549
 550 template<typename T, size_t N>
 551 static inline T gmx_simdcall loadU(const AlignedArray<internal::SimdTraitsT<T>, N>& m)
 552 {
 553     return simdLoadU(m.data(), typename internal::SimdTraits<T>::tag());
 554 }
 555
 556 /*! \libinternal \brief Proxy object to enable setZero() for SIMD and real types.
 557  *
 558  * This object is returned by setZero(), and depending on what type you assign
 559  * the result to the conversion method will call the right low-level function.
 560  */
 561 class SimdSetZeroProxy
 562 {
 563 public:
 564     //!\brief Conversion method that returns 0.0 as float
 565     operator float() const { return 0.0F; }
 566     //!\brief Conversion method that returns 0.0 as double
 567     operator double() const { return 0.0; }
 568     //!\brief Conversion method that returns 0.0 as int32
 569     operator std::int32_t() const { return 0; }
 570 #if GMX_SIMD_HAVE_FLOAT
 571     //!\brief Conversion method that will execute setZero() for SimdFloat
 572     operator SimdFloat() const { return setZeroF(); }
 573     //!\brief Conversion method that will execute setZero() for SimdFInt32
 574     operator SimdFInt32() const { return setZeroFI(); }
 575 #endif
 576 #if GMX_SIMD4_HAVE_FLOAT
 577     //!\brief Conversion method that will execute setZero() for Simd4Float
 578     operator Simd4Float() const { return simd4SetZeroF(); }
 579 #endif
 580 #if GMX_SIMD_HAVE_DOUBLE
 581     //!\brief Conversion method that will execute setZero() for SimdDouble
 582     operator SimdDouble() const { return setZeroD(); }
 583     //!\brief Conversion method that will execute setZero() for SimdDInt32
 584     operator SimdDInt32() const { return setZeroDI(); }
 585 #endif
 586 #if GMX_SIMD4_HAVE_DOUBLE
 587     //!\brief Conversion method that will execute setZero() for Simd4Double
 588     operator Simd4Double() const { return simd4SetZeroD(); }
 589 #endif
 590 };
 591
 592 /*! \brief Helper function to set any SIMD or scalar variable to zero
 593  *
 594  * \return Proxy object that will call the actual function to set a SIMD/scalar
 595  *         variable to zero based on the conversion function called when you
 596  *         assign the result.
 597  */
 598 static inline SimdSetZeroProxy gmx_simdcall setZero()
 599 {
 600     return {};
 601 }
 602
 603 namespace internal
 604 {
 605 // TODO: Don't forward function but properly rename them and use proper traits
 606 template<typename T>
 607 struct Simd4Traits
 608 {
 609 };
 610
 611 #if GMX_SIMD4_HAVE_FLOAT
 612 template<>
 613 struct Simd4Traits<Simd4Float>
 614 {
 615     using type = float;
 616 };
 617 #endif
 618
 619 #if GMX_SIMD4_HAVE_DOUBLE
 620 template<>
 621 struct Simd4Traits<Simd4Double>
 622 {
 623     using type = double;
 624 };
 625 #endif
 626 template<typename T>
 627 using Simd4TraitsT = typename Simd4Traits<T>::type;
 628 } // namespace internal
 629
 630 #if GMX_SIMD4_HAVE_REAL
 631 template<typename T>
 632 T load(const internal::Simd4TraitsT<T>* m)
 633 {
 634     return load4(m);
 635 }
 636 template<typename T>
 637 T loadU(const internal::Simd4TraitsT<T>* m)
 638 {
 639     return load4U(m);
 640 }
 641 #endif
 642
 643 /* Implement most of 4xn functions by forwarding them to other functions when possible.
 644  * The functions forwarded here don't need to be implemented by each implementation.
 645  * For width=4 all functions are forwarded and for width=8 all but loadU4NOffset are forwarded.
 646  */
 647 #if GMX_SIMD_HAVE_FLOAT
 648 #    if GMX_SIMD_FLOAT_WIDTH < 4
 649 #        define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT (GMX_SIMD_HAVE_LOADU && GMX_SIMD4_HAVE_FLOAT)
 650 #    elif GMX_SIMD_FLOAT_WIDTH == 4
 651 #        define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT GMX_SIMD_HAVE_LOADU
 652 // For GMX_SIMD_FLOAT_WIDTH>4 it is the reponsibility of the implementation to set
 653 // GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
 654 #    endif
 655
 656 #    if GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
 657 #        if GMX_SIMD_FLOAT_WIDTH < 4
 658 using Simd4NFloat = Simd4Float;
 659 #            define GMX_SIMD4N_FLOAT_WIDTH 4
 660 #        else
 661 using Simd4NFloat = SimdFloat;
 662 #            define GMX_SIMD4N_FLOAT_WIDTH GMX_SIMD_FLOAT_WIDTH
 663 #        endif
 664
 665 #        if GMX_SIMD_FLOAT_WIDTH <= 4
 666 static inline Simd4NFloat gmx_simdcall loadUNDuplicate4(const float* f)
 667 {
 668     return Simd4NFloat(*f);
 669 }
 670 static inline Simd4NFloat gmx_simdcall load4DuplicateN(const float* f)
 671 {
 672     return load<Simd4NFloat>(f);
 673 }
 674 static inline Simd4NFloat gmx_simdcall loadU4NOffset(const float* f, int)
 675 {
 676     return loadU<Simd4NFloat>(f);
 677 }
 678 #        elif GMX_SIMD_FLOAT_WIDTH == 8
 679 static inline Simd4NFloat gmx_simdcall loadUNDuplicate4(const float* f)
 680 {
 681     return loadU1DualHsimd(f);
 682 }
 683 static inline Simd4NFloat gmx_simdcall load4DuplicateN(const float* f)
 684 {
 685     return loadDuplicateHsimd(f);
 686 }
 687 #        endif
 688 #    endif // GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
 689 #else      // GMX_SIMD_HAVE_FLOAT
 690 #    define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT 0
 691 #endif
 692
 693 #if GMX_SIMD_HAVE_DOUBLE
 694 #    if GMX_SIMD_DOUBLE_WIDTH < 4
 695 #        define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE (GMX_SIMD_HAVE_LOADU && GMX_SIMD4_HAVE_DOUBLE)
 696 #    elif GMX_SIMD_DOUBLE_WIDTH == 4
 697 #        define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE GMX_SIMD_HAVE_LOADU
 698 // For GMX_SIMD_DOUBLE_WIDTH>4 it is the reponsibility of the implementation to set
 699 // GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
 700 #    endif
 701
 702 #    if GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
 703 #        if GMX_SIMD_DOUBLE_WIDTH < 4
 704 using Simd4NDouble = Simd4Double;
 705 #            define GMX_SIMD4N_DOUBLE_WIDTH 4
 706 #        else
 707 using Simd4NDouble = SimdDouble;
 708 #            define GMX_SIMD4N_DOUBLE_WIDTH GMX_SIMD_DOUBLE_WIDTH
 709 #        endif
 710
 711 #        if GMX_SIMD_DOUBLE_WIDTH <= 4
 712 static inline Simd4NDouble gmx_simdcall loadUNDuplicate4(const double* f)
 713 {
 714     return Simd4NDouble(*f);
 715 }
 716 static inline Simd4NDouble gmx_simdcall load4DuplicateN(const double* f)
 717 {
 718     return load<Simd4NDouble>(f);
 719 }
 720 static inline Simd4NDouble gmx_simdcall loadU4NOffset(const double* f, int /*unused*/)
 721 {
 722     return loadU<Simd4NDouble>(f);
 723 }
 724 #        elif GMX_SIMD_DOUBLE_WIDTH == 8
 725 static inline Simd4NDouble gmx_simdcall loadUNDuplicate4(const double* f)
 726 {
 727     return loadU1DualHsimd(f);
 728 }
 729 static inline Simd4NDouble gmx_simdcall load4DuplicateN(const double* f)
 730 {
 731     return loadDuplicateHsimd(f);
 732 }
 733 #        endif
 734 #    endif // GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
 735 #else      // GMX_SIMD_HAVE_DOUBLE
 736 #    define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE 0
 737 #endif
 738
 739 #if GMX_DOUBLE
 740 #    define GMX_SIMD_HAVE_4NSIMD_UTIL_REAL GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
 741 #else
 742 #    define GMX_SIMD_HAVE_4NSIMD_UTIL_REAL GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
 743 #endif
 744
 745 #if GMX_SIMD_HAVE_4NSIMD_UTIL_REAL
 746 #    if GMX_DOUBLE
 747 using Simd4NReal = Simd4NDouble;
 748 #        define GMX_SIMD4N_REAL_WIDTH GMX_SIMD4N_DOUBLE_WIDTH
 749 #    else
 750 using Simd4NReal = Simd4NFloat;
 751 #        define GMX_SIMD4N_REAL_WIDTH GMX_SIMD4N_FLOAT_WIDTH
 752 #    endif
 753 #endif
 754
 755 //! \}  end of name-group proxy objects
 756
 757 } // namespace gmx
 758
 759 //! \}          end of module_simd
 760
 761 //! \endcond   end of condition libapi
 762
 763
 764 #if GMX_SIMD_HAVE_FLOAT
 765
 766 /*! \brief Returns whether a pointer to float is aligned to a SIMD boundary
 767  *
 768  * \param[in] ptr  A pointer to a float
 769  */
 770 static inline bool isSimdAligned(const float* ptr)
 771 {
 772     return reinterpret_cast<std::size_t>(ptr) % (GMX_SIMD_FLOAT_WIDTH * sizeof(float)) == 0;
 773 }
 774
 775 #endif // GMX_SIMD_HAVE_FLOAT
 776
 777 #if GMX_SIMD_HAVE_DOUBLE
 778
 779 /*! \brief Returns whether a pointer to double is aligned to a SIMD boundary
 780  *
 781  * \param[in] ptr  A pointer to a double
 782  */
 783 static inline bool isSimdAligned(const double* ptr)
 784 {
 785     return reinterpret_cast<std::size_t>(ptr) % (GMX_SIMD_DOUBLE_WIDTH * sizeof(double)) == 0;
 786 }
 787
 788 #endif // GMX_SIMD_HAVE_DOUBLE
 789
 790
 791 #if GMX_SIMD_HAVE_REAL
 792 #    if GMX_SIMD_REAL_WIDTH > GMX_REAL_MAX_SIMD_WIDTH
 793 #        error "GMX_SIMD_REAL_WIDTH > GMX_REAL_MAX_SIMD_WIDTH: increase GMX_REAL_MAX_SIMD_WIDTH in real.h"
 794 #    endif
 795 #endif
 796
 797
 798 #if 0
 799 /* This is a hack to cover the corner case of using an
 800    explicit GMX_SIMD_HAVE_FLOAT or GMX_SIMD_HAVE_DOUBLE, rather than
 801    GMX_SIMD_HAVE_REAL.
 802
 803    Such code is expected to include simd.h to get those symbols
 804    defined, but the actual definitions are in the implemention headers
 805    included by simd.h. check-source.py is not a full preprocessor, so
 806    it does not see the definitions in the implementation headers as
 807    belonging to simd.h, thus it cannot check that simd.h is being used
 808    correctly in the above hypothetical corner case. However, the
 809    checker also does not parse #if 0, so we can fool the checker into
 810    thinking that definition occurs here, and that will work well
 811    enough.
 812
 813    If there's ever other kinds of SIMD code that might have the same
 814    problem, we might want to add other variables here.
 815  */
 816 #    define GMX_SIMD_HAVE_FLOAT 1
 817 #    define GMX_SIMD_HAVE_DOUBLE 1
 818
 819 #endif // end of hack
 820
 821 // The ArrayRef<SimdReal> specialization is always included, because compiler
 822 // errors are confusing when template specialization aren't available.
 823 #include "gromacs/simd/simd_memory.h"
 824
 825 #endif // GMX_SIMD_SIMD_H