src/gromacs/simd/simd.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \libinternal
  37  * \defgroup module_simd SIMD intrinsics interface (simd)
  38  * \ingroup group_utilitymodules
  39  *
  40  * \brief Provides an architecture-independent way of doing SIMD coding.
  41  *
  42  * Overview of the SIMD implementation is provided in \ref page_simd.
  43  * The details are documented in gromacs/simd/simd.h and the reference
  44  * implementation impl_reference.h.
  45  *
  46  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  47  */
  48
  49 #ifndef GMX_SIMD_SIMD_H
  50 #define GMX_SIMD_SIMD_H
  51
  52 /*! \libinternal \file
  53  *
  54  * \brief Definitions, capabilities, and wrappers for SIMD module.
  55  *
  56  * The macros in this file are intended to be used for writing
  57  * architecture-independent SIMD intrinsics code.
  58  * To support a new architecture, adding a new sub-include with macros here
  59  * should be (nearly) all that is needed.
  60  *
  61  * The defines in this top-level file will set default Gromacs real precision
  62  * operations to either single or double precision based on whether
  63  * GMX_DOUBLE is 1. The actual implementation - including e.g.
  64  * conversion operations specifically between single and double - is documented
  65  * in impl_reference.h.
  66  *
  67  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  68  *
  69  * \inlibraryapi
  70  * \ingroup module_simd
  71  */
  72
  73 #include "config.h"
  74
  75 #include <cstddef>
  76 #include <cstdint>
  77
  78 #include <array>
  79 #include <type_traits>
  80
  81 #include "gromacs/utility/classhelpers.h"
  82 #include "gromacs/utility/real.h"
  83
  84 //! \cond libapi
  85
  86
  87 /*! \addtogroup module_simd
  88  * \{
  89  */
  90
  91 namespace gmx
  92 {
  93 /*! \libinternal \brief Tag type to select to load SimdFloat with simdLoad(U) */
  94 struct SimdFloatTag
  95 {
  96 };
  97 /*! \libinternal \brief Tag type to select to load SimdDouble with simdLoad(U) */
  98 struct SimdDoubleTag
  99 {
 100 };
 101 /*! \libinternal \brief Tag type to select to load SimdFInt32 with simdLoad(U) */
 102 struct SimdFInt32Tag
 103 {
 104 };
 105 /*! \libinternal \brief Tag type to select to load SimdDInt32 with simdLoad(U) */
 106 struct SimdDInt32Tag
 107 {
 108 };
 109 } // namespace gmx
 110
 111 /*! \name SIMD predefined macros to describe high-level capabilities
 112  *
 113  *  These macros are used to describe the features available in default
 114  *  Gromacs real precision. They are set from the lower-level implementation
 115  *  files that have macros describing single and double precision individually,
 116  *  as well as the implementation details.
 117  *  \{
 118  */
 119
 120 #ifdef __clang__
 121 #    pragma clang diagnostic push
 122 /* reinterpret_cast is used for SIMD->scalar conversion
 123  *
 124  * In general using reinterpret_cast for bit_cast is UB but
 125  * for intrinsics types it works for all known compilers
 126  * and not all compilers produce as good code for memcpy.
 127  */
 128 #    pragma clang diagnostic ignored "-Wundefined-reinterpret-cast"
 129 #endif
 130
 131 #if GMX_SIMD_X86_SSE2
 132 #    include "impl_x86_sse2/impl_x86_sse2.h"
 133 #elif GMX_SIMD_X86_SSE4_1
 134 #    include "impl_x86_sse4_1/impl_x86_sse4_1.h"
 135 #elif GMX_SIMD_X86_AVX_128_FMA
 136 #    include "impl_x86_avx_128_fma/impl_x86_avx_128_fma.h"
 137 #elif GMX_SIMD_X86_AVX_256
 138 #    include "impl_x86_avx_256/impl_x86_avx_256.h"
 139 #elif GMX_SIMD_X86_AVX2_256
 140 #    include "impl_x86_avx2_256/impl_x86_avx2_256.h"
 141 #elif GMX_SIMD_X86_AVX2_128
 142 #    include "impl_x86_avx2_128/impl_x86_avx2_128.h"
 143 #elif GMX_SIMD_X86_MIC
 144 #    include "impl_x86_mic/impl_x86_mic.h"
 145 #elif GMX_SIMD_X86_AVX_512
 146 #    include "impl_x86_avx_512/impl_x86_avx_512.h"
 147 #elif GMX_SIMD_X86_AVX_512_KNL
 148 #    include "impl_x86_avx_512_knl/impl_x86_avx_512_knl.h"
 149 #elif GMX_SIMD_ARM_NEON
 150 #    include "impl_arm_neon/impl_arm_neon.h"
 151 #elif GMX_SIMD_ARM_NEON_ASIMD
 152 #    include "impl_arm_neon_asimd/impl_arm_neon_asimd.h"
 153 #elif GMX_SIMD_IBM_VMX
 154 #    include "impl_ibm_vmx/impl_ibm_vmx.h"
 155 #elif GMX_SIMD_IBM_VSX
 156 #    include "impl_ibm_vsx/impl_ibm_vsx.h"
 157 #elif (GMX_SIMD_REFERENCE || defined DOXYGEN)
 158 #    include "impl_reference/impl_reference.h" // Includes doxygen documentation
 159 #else
 160 #    include "impl_none/impl_none.h"
 161 #endif
 162
 163 #ifdef __clang__
 164 #    pragma clang diagnostic pop
 165 #endif
 166
 167 // The scalar SIMD-mimicking functions are always included so we can use
 168 // templated functions even without SIMD support.
 169 #include "gromacs/simd/scalar/scalar.h"
 170 #include "gromacs/simd/scalar/scalar_math.h"
 171 #include "gromacs/simd/scalar/scalar_util.h"
 172
 173
 174 #if GMX_DOUBLE
 175 #    define GMX_SIMD_HAVE_REAL GMX_SIMD_HAVE_DOUBLE
 176 #    define GMX_SIMD_REAL_WIDTH GMX_SIMD_DOUBLE_WIDTH
 177 #    define GMX_SIMD_HAVE_INT32_EXTRACT GMX_SIMD_HAVE_DINT32_EXTRACT
 178 #    define GMX_SIMD_HAVE_INT32_LOGICAL GMX_SIMD_HAVE_DINT32_LOGICAL
 179 #    define GMX_SIMD_HAVE_INT32_ARITHMETICS GMX_SIMD_HAVE_DINT32_ARITHMETICS
 180 #    define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL \
 181         GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_DOUBLE
 182 #    define GMX_SIMD_HAVE_HSIMD_UTIL_REAL GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE
 183 #    define GMX_SIMD4_HAVE_REAL GMX_SIMD4_HAVE_DOUBLE
 184 #else // GMX_DOUBLE
 185
 186 /*! \brief 1 if SimdReal is available, otherwise 0.
 187  *
 188  *  \ref GMX_SIMD_HAVE_DOUBLE if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD_HAVE_FLOAT.
 189  */
 190 #    define GMX_SIMD_HAVE_REAL GMX_SIMD_HAVE_FLOAT
 191
 192 /*! \brief Width of SimdReal.
 193  *
 194  *  \ref GMX_SIMD_DOUBLE_WIDTH if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD_FLOAT_WIDTH.
 195  */
 196 #    define GMX_SIMD_REAL_WIDTH GMX_SIMD_FLOAT_WIDTH
 197
 198 /*! \brief 1 if support is available for extracting elements from SimdInt32, otherwise 0
 199  *
 200  *  \ref GMX_SIMD_HAVE_DINT32_EXTRACT if GMX_DOUBLE is 1, otherwise
 201  *  \ref GMX_SIMD_HAVE_FINT32_EXTRACT.
 202  */
 203 #    define GMX_SIMD_HAVE_INT32_EXTRACT GMX_SIMD_HAVE_FINT32_EXTRACT
 204
 205 /*! \brief 1 if logical ops are supported on SimdInt32, otherwise 0.
 206  *
 207  *  \ref GMX_SIMD_HAVE_DINT32_LOGICAL if GMX_DOUBLE is 1, otherwise
 208  *  \ref GMX_SIMD_HAVE_FINT32_LOGICAL.
 209  */
 210 #    define GMX_SIMD_HAVE_INT32_LOGICAL GMX_SIMD_HAVE_FINT32_LOGICAL
 211
 212 /*! \brief 1 if arithmetic ops are supported on SimdInt32, otherwise 0.
 213  *
 214  *  \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS if GMX_DOUBLE is 1, otherwise
 215  *  \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS.
 216  */
 217 #    define GMX_SIMD_HAVE_INT32_ARITHMETICS GMX_SIMD_HAVE_FINT32_ARITHMETICS
 218
 219 /*! \brief 1 if gmx::simdGatherLoadUBySimdIntTranspose is present, otherwise 0
 220  *
 221  *  \ref GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_DOUBLE if GMX_DOUBLE is 1, otherwise
 222  *  \ref GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_FLOAT.
 223  */
 224 #    define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL \
 225         GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_FLOAT
 226
 227 /*! \brief 1 if real half-register load/store/reduce utils present, otherwise 0
 228  *
 229  *  \ref GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE if GMX_DOUBLE is 1, otherwise
 230  *  \ref GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT.
 231  */
 232 #    define GMX_SIMD_HAVE_HSIMD_UTIL_REAL GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT
 233
 234 /*! \brief 1 if Simd4Real is available, otherwise 0.
 235  *
 236  *  \ref GMX_SIMD4_HAVE_DOUBLE if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD4_HAVE_FLOAT.
 237  */
 238 #    define GMX_SIMD4_HAVE_REAL GMX_SIMD4_HAVE_FLOAT
 239
 240 #endif // GMX_DOUBLE
 241
 242 //! \}  end of name-group describing high-level capabilities
 243
 244 namespace gmx
 245 {
 246
 247 template<class T, size_t N>
 248 struct AlignedArray;
 249
 250 #if GMX_SIMD_HAVE_FLOAT
 251 /*! \libinternal \brief Identical to std::array with GMX_SIMD_FLOAT_WIDTH alignment.
 252  *  Should not be deleted through base pointer (destructor is non-virtual).
 253  */
 254 template<size_t N>
 255 struct alignas(GMX_SIMD_FLOAT_WIDTH * sizeof(float)) AlignedArray<float, N> :
 256     public std::array<float, N>
 257 {
 258 };
 259 #endif
 260
 261 #if GMX_SIMD_HAVE_DOUBLE
 262 /*! \libinternal \brief  Identical to std::array with GMX_SIMD_DOUBLE_WIDTH alignment.
 263  *  Should not be deleted through base pointer (destructor is non-virtual).
 264  */
 265 template<size_t N>
 266 struct alignas(GMX_SIMD_DOUBLE_WIDTH * sizeof(double)) AlignedArray<double, N> :
 267     public std::array<double, N>
 268 {
 269 };
 270 #endif
 271
 272 #if GMX_SIMD_HAVE_REAL
 273
 274 /*! \name SIMD data types
 275  *
 276  *  The actual storage of these types is implementation dependent. The
 277  *  documentation is generated from the reference implementation, but for
 278  *  normal usage this will likely not be what you are using.
 279  * \{
 280  */
 281
 282 /*! \brief Real precision floating-point SIMD datatype.
 283  *
 284  * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1.
 285  *
 286  * \ref SimdDouble if GMX_DOUBLE is 1, otherwise \ref SimdFloat.
 287  *
 288  * \note This variable cannot be placed inside other structures or classes, since
 289  *       some compilers (including at least clang-3.7) appear to lose the
 290  *       alignment. This is likely particularly severe when allocating such
 291  *       memory on the heap, but it occurs for stack structures too.
 292  */
 293 #    if GMX_DOUBLE
 294 typedef SimdDouble SimdReal;
 295 #    else
 296 typedef SimdFloat  SimdReal;
 297 #    endif
 298
 299
 300 /*! \brief Boolean SIMD type for usage with \ref SimdReal.
 301  *
 302  * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1.
 303  *
 304  * If GMX_DOUBLE is 1, this will be set to \ref SimdDBool
 305  * internally, otherwise \ref SimdFBool. This is necessary since some
 306  * SIMD implementations use bitpatterns for marking truth, so single-
 307  * vs. double precision booleans are not necessarily exchangable.
 308  * As long as you just use this type you will not have to worry about precision.
 309  *
 310  * See \ref SimdIBool for an explanation of real vs. integer booleans.
 311  *
 312  * \note This variable cannot be placed inside other structures or classes, since
 313  *       some compilers (including at least clang-3.7) appear to lose the
 314  *       alignment. This is likely particularly severe when allocating such
 315  *       memory on the heap, but it occurs for stack structures too.
 316  */
 317 #    if GMX_DOUBLE
 318 typedef SimdDBool SimdBool;
 319 #    else
 320 typedef SimdFBool  SimdBool;
 321 #    endif
 322
 323
 324 /*! \brief 32-bit integer SIMD type.
 325  *
 326  * If GMX_DOUBLE is 1, this will be set to \ref SimdDInt32
 327  * internally, otherwise \ref SimdFInt32. This might seem a strange
 328  * implementation detail, but it is because some SIMD implementations use
 329  * different types/widths of integers registers when converting from
 330  * double vs. single precision floating point. As long as you just use
 331  * this type you will not have to worry about precision.
 332  *
 333  * \note This variable cannot be placed inside other structures or classes, since
 334  *       some compilers (including at least clang-3.7) appear to lose the
 335  *       alignment. This is likely particularly severe when allocating such
 336  *       memory on the heap, but it occurs for stack structures too.
 337  */
 338 #    if GMX_DOUBLE
 339 typedef SimdDInt32 SimdInt32;
 340 #    else
 341 typedef SimdFInt32 SimdInt32;
 342 #    endif
 343
 344 #    if GMX_SIMD_HAVE_INT32_ARITHMETICS
 345 /*! \brief Boolean SIMD type for usage with \ref SimdInt32.
 346  *
 347  * This type is only available if \ref GMX_SIMD_HAVE_INT32_ARITHMETICS is 1.
 348  *
 349  * If GMX_DOUBLE is 1, this will be set to \ref SimdDIBool
 350  * internally, otherwise \ref SimdFIBool. This is necessary since some
 351  * SIMD implementations use bitpatterns for marking truth, so single-
 352  * vs. double precision booleans are not necessarily exchangable, and while
 353  * a double-precision boolean might be represented with a 64-bit mask, the
 354  * corresponding integer might only use a 32-bit mask.
 355  *
 356  * We provide conversion routines for these cases, so the only thing you need to
 357  * keep in mind is to use \ref SimdBool when working with
 358  * \ref SimdReal while you pick \ref SimdIBool when working with
 359  * \ref SimdInt32 .
 360  *
 361  * To convert between them, use \ref cvtB2IB and \ref cvtIB2B.
 362  *
 363  * \note This variable cannot be placed inside other structures or classes, since
 364  *       some compilers (including at least clang-3.7) appear to lose the
 365  *       alignment. This is likely particularly severe when allocating such
 366  *       memory on the heap, but it occurs for stack structures too.
 367  */
 368 #        if GMX_DOUBLE
 369 typedef SimdDIBool SimdIBool;
 370 #        else
 371 typedef SimdFIBool SimdIBool;
 372 #        endif
 373 #    endif // GMX_SIMD_HAVE_INT32_ARITHMETICS
 374
 375
 376 #    if GMX_DOUBLE
 377 const int c_simdBestPairAlignment = c_simdBestPairAlignmentDouble;
 378 #    else
 379 const int          c_simdBestPairAlignment = c_simdBestPairAlignmentFloat;
 380 #    endif
 381
 382 #endif // GMX_SIMD_HAVE_REAL
 383
 384 #if GMX_SIMD4_HAVE_REAL
 385 /*! \brief Real precision floating-point SIMD4 datatype.
 386  *
 387  * This type is only available if \ref GMX_SIMD4_HAVE_REAL is 1.
 388  *
 389  * \ref Simd4Double if GMX_DOUBLE is 1, otherwise \ref Simd4Float.
 390  *
 391  * \note This variable cannot be placed inside other structures or classes, since
 392  *       some compilers (including at least clang-3.7) appear to lose the
 393  *       alignment. This is likely particularly severe when allocating such
 394  *       memory on the heap, but it occurs for stack structures too.
 395  */
 396 #    if GMX_DOUBLE
 397 typedef Simd4Double Simd4Real;
 398 #    else
 399 typedef Simd4Float Simd4Real;
 400 #    endif
 401
 402
 403 /*! \brief Boolean SIMD4 type for usage with \ref SimdReal.
 404  *
 405  * This type is only available if \ref GMX_SIMD4_HAVE_REAL is 1.
 406  *
 407  * If GMX_DOUBLE is 1, this will be set to \ref Simd4DBool
 408  * internally, otherwise \ref Simd4FBool. This is necessary since some
 409  * SIMD implementations use bitpatterns for marking truth, so single-
 410  * vs. double precision booleans are not necessarily exchangable.
 411  * As long as you just use this type you will not have to worry about precision.
 412  *
 413  * \note This variable cannot be placed inside other structures or classes, since
 414  *       some compilers (including at least clang-3.7) appear to lose the
 415  *       alignment. This is likely particularly severe when allocating such
 416  *       memory on the heap, but it occurs for stack structures too.
 417  */
 418 #    if GMX_DOUBLE
 419 typedef Simd4DBool Simd4Bool;
 420 #    else
 421 typedef Simd4FBool Simd4Bool;
 422 #    endif
 423 #endif // GMX_SIMD4_HAVE_REAL
 424
 425 //! \}  end of name-group describing SIMD data types
 426
 427 /*! \name High-level SIMD proxy objects to disambiguate load/set operations
 428  * \{
 429  */
 430
 431 namespace internal
 432 {
 433 /*! \libinternal \brief Simd traits
 434  *
 435  * These traits are used to query data about SIMD types. Currently provided
 436  * data is useful for SIMD loads (load function and helper classes for
 437  * ArrayRef<> in simd_memory.h). Provided data:
 438  *  - type: scalar type corresponding to the SIMD type
 439  *  - width: SIMD width
 440  *  - tag: tag used for type dispatch of load function
 441  */
 442 template<typename T>
 443 struct SimdTraits
 444 {
 445 };
 446
 447 #if GMX_SIMD_HAVE_FLOAT
 448 template<>
 449 struct SimdTraits<SimdFloat>
 450 {
 451     using type                 = float;
 452     static constexpr int width = GMX_SIMD_FLOAT_WIDTH;
 453     using tag                  = SimdFloatTag;
 454 };
 455 #endif
 456 #if GMX_SIMD_HAVE_DOUBLE
 457 template<>
 458 struct SimdTraits<SimdDouble>
 459 {
 460     using type                 = double;
 461     static constexpr int width = GMX_SIMD_DOUBLE_WIDTH;
 462     using tag                  = SimdDoubleTag;
 463 };
 464 #endif
 465 #if GMX_SIMD_HAVE_FLOAT
 466 template<>
 467 struct SimdTraits<SimdFInt32>
 468 {
 469     using type                 = int;
 470     static constexpr int width = GMX_SIMD_FINT32_WIDTH;
 471     using tag                  = SimdFInt32Tag;
 472 };
 473 #endif
 474 #if GMX_SIMD_HAVE_DOUBLE
 475 template<>
 476 struct SimdTraits<SimdDInt32>
 477 {
 478     using type                 = int;
 479     static constexpr int width = GMX_SIMD_DINT32_WIDTH;
 480     using tag                  = SimdDInt32Tag;
 481 };
 482 #endif
 483
 484 template<typename T>
 485 struct SimdTraits<const T>
 486 {
 487     using type                 = const typename SimdTraits<T>::type;
 488     static constexpr int width = SimdTraits<T>::width;
 489     using tag                  = typename SimdTraits<T>::tag;
 490 };
 491 } // namespace internal
 492
 493 /*! \brief Load function that returns SIMD or scalar
 494  *
 495  * Note that a load of T* where T is const returns a value, which is a
 496  * copy, and the caller cannot be constrained to not change it, so the
 497  * return type uses std::remove_const_t.
 498  *
 499  * \tparam T Type to load (type is always mandatory)
 500  * \param  m Pointer to aligned memory
 501  * \return   Loaded value
 502  */
 503 template<typename T>
 504 static inline std::remove_const_t<T>
 505 load(const typename internal::SimdTraits<T>::type* m) // disabled by SFINAE for non-SIMD types
 506 {
 507     return simdLoad(m, typename internal::SimdTraits<T>::tag());
 508 }
 509
 510 template<typename T>
 511 static inline T
 512 /* the enable_if serves to prevent two different type of misuse:
 513  * 1) load<SimdReal>(SimdReal*); should only be called on real* or int*
 514  * 2) load(real*); template parameter is mandatory because otherwise ambiguity is
 515  *    created. The dependent type disables type deduction.
 516  */
 517 load(const std::enable_if_t<std::is_arithmetic<T>::value, T> *m)
 518 {
 519     return *m;
 520 }
 521
 522 template<typename T, size_t N>
 523 static inline T gmx_simdcall load(const AlignedArray<typename internal::SimdTraits<T>::type, N>& m)
 524 {
 525     return simdLoad(m.data(), typename internal::SimdTraits<T>::tag());
 526 }
 527
 528 /*! \brief Load function that returns SIMD or scalar based on template argument
 529  *
 530  * \tparam T Type to load (type is always mandatory)
 531  * \param m Pointer to unaligned memory
 532  * \return Loaded SimdFloat/Double/Int or basic scalar type
 533  */
 534 template<typename T>
 535 static inline T loadU(const typename internal::SimdTraits<T>::type* m)
 536 {
 537     return simdLoadU(m, typename internal::SimdTraits<T>::tag());
 538 }
 539
 540 template<typename T>
 541 static inline T loadU(const std::enable_if_t<std::is_arithmetic<T>::value, T>* m)
 542 {
 543     return *m;
 544 }
 545
 546 template<typename T, size_t N>
 547 static inline T gmx_simdcall loadU(const AlignedArray<typename internal::SimdTraits<T>::type, N>& m)
 548 {
 549     return simdLoadU(m.data(), typename internal::SimdTraits<T>::tag());
 550 }
 551
 552 /*! \libinternal \brief Proxy object to enable setZero() for SIMD and real types.
 553  *
 554  * This object is returned by setZero(), and depending on what type you assign
 555  * the result to the conversion method will call the right low-level function.
 556  */
 557 class SimdSetZeroProxy
 558 {
 559 public:
 560     //!\brief Conversion method that returns 0.0 as float
 561     operator float() const { return 0.0F; }
 562     //!\brief Conversion method that returns 0.0 as double
 563     operator double() const { return 0.0; }
 564     //!\brief Conversion method that returns 0.0 as int32
 565     operator std::int32_t() const { return 0; }
 566 #if GMX_SIMD_HAVE_FLOAT
 567     //!\brief Conversion method that will execute setZero() for SimdFloat
 568     operator SimdFloat() const { return setZeroF(); }
 569     //!\brief Conversion method that will execute setZero() for SimdFInt32
 570     operator SimdFInt32() const { return setZeroFI(); }
 571 #endif
 572 #if GMX_SIMD4_HAVE_FLOAT
 573     //!\brief Conversion method that will execute setZero() for Simd4Float
 574     operator Simd4Float() const { return simd4SetZeroF(); }
 575 #endif
 576 #if GMX_SIMD_HAVE_DOUBLE
 577     //!\brief Conversion method that will execute setZero() for SimdDouble
 578     operator SimdDouble() const { return setZeroD(); }
 579     //!\brief Conversion method that will execute setZero() for SimdDInt32
 580     operator SimdDInt32() const { return setZeroDI(); }
 581 #endif
 582 #if GMX_SIMD4_HAVE_DOUBLE
 583     //!\brief Conversion method that will execute setZero() for Simd4Double
 584     operator Simd4Double() const { return simd4SetZeroD(); }
 585 #endif
 586 };
 587
 588 /*! \brief Helper function to set any SIMD or scalar variable to zero
 589  *
 590  * \return Proxy object that will call the actual function to set a SIMD/scalar
 591  *         variable to zero based on the conversion function called when you
 592  *         assign the result.
 593  */
 594 static inline SimdSetZeroProxy gmx_simdcall setZero()
 595 {
 596     return {};
 597 }
 598
 599 namespace internal
 600 {
 601 // TODO: Don't foward function but properly rename them and use proper traits
 602 template<typename T>
 603 struct Simd4Traits
 604 {
 605 };
 606
 607 #if GMX_SIMD4_HAVE_FLOAT
 608 template<>
 609 struct Simd4Traits<Simd4Float>
 610 {
 611     using type = float;
 612 };
 613 #endif
 614
 615 #if GMX_SIMD4_HAVE_DOUBLE
 616 template<>
 617 struct Simd4Traits<Simd4Double>
 618 {
 619     using type = double;
 620 };
 621 #endif
 622 } // namespace internal
 623
 624 #if GMX_SIMD4_HAVE_REAL
 625 template<typename T>
 626 T load(const typename internal::Simd4Traits<T>::type* m)
 627 {
 628     return load4(m);
 629 }
 630 template<typename T>
 631 T loadU(const typename internal::Simd4Traits<T>::type* m)
 632 {
 633     return load4U(m);
 634 }
 635 #endif
 636
 637 /* Implement most of 4xn functions by forwarding them to other functions when possible.
 638  * The functions forwarded here don't need to be implemented by each implementation.
 639  * For width=4 all functions are forwarded and for width=8 all but loadU4NOffset are forwarded.
 640  */
 641 #if GMX_SIMD_HAVE_FLOAT
 642 #    if GMX_SIMD_FLOAT_WIDTH < 4
 643 #        define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT (GMX_SIMD_HAVE_LOADU && GMX_SIMD4_HAVE_FLOAT)
 644 #    elif GMX_SIMD_FLOAT_WIDTH == 4
 645 #        define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT GMX_SIMD_HAVE_LOADU
 646 // For GMX_SIMD_FLOAT_WIDTH>4 it is the reponsibility of the implementation to set
 647 // GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
 648 #    endif
 649
 650 #    if GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
 651 #        if GMX_SIMD_FLOAT_WIDTH < 4
 652 using Simd4NFloat = Simd4Float;
 653 #            define GMX_SIMD4N_FLOAT_WIDTH 4
 654 #        else
 655 using Simd4NFloat = SimdFloat;
 656 #            define GMX_SIMD4N_FLOAT_WIDTH GMX_SIMD_FLOAT_WIDTH
 657 #        endif
 658
 659 #        if GMX_SIMD_FLOAT_WIDTH <= 4
 660 static inline Simd4NFloat gmx_simdcall loadUNDuplicate4(const float* f)
 661 {
 662     return Simd4NFloat(*f);
 663 }
 664 static inline Simd4NFloat gmx_simdcall load4DuplicateN(const float* f)
 665 {
 666     return load<Simd4NFloat>(f);
 667 }
 668 static inline Simd4NFloat gmx_simdcall loadU4NOffset(const float* f, int)
 669 {
 670     return loadU<Simd4NFloat>(f);
 671 }
 672 #        elif GMX_SIMD_FLOAT_WIDTH == 8
 673 static inline Simd4NFloat gmx_simdcall loadUNDuplicate4(const float* f)
 674 {
 675     return loadU1DualHsimd(f);
 676 }
 677 static inline Simd4NFloat gmx_simdcall load4DuplicateN(const float* f)
 678 {
 679     return loadDuplicateHsimd(f);
 680 }
 681 #        endif
 682 #    endif // GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
 683 #else      // GMX_SIMD_HAVE_FLOAT
 684 #    define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT 0
 685 #endif
 686
 687 #if GMX_SIMD_HAVE_DOUBLE
 688 #    if GMX_SIMD_DOUBLE_WIDTH < 4
 689 #        define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE (GMX_SIMD_HAVE_LOADU && GMX_SIMD4_HAVE_DOUBLE)
 690 #    elif GMX_SIMD_DOUBLE_WIDTH == 4
 691 #        define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE GMX_SIMD_HAVE_LOADU
 692 // For GMX_SIMD_DOUBLE_WIDTH>4 it is the reponsibility of the implementation to set
 693 // GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
 694 #    endif
 695
 696 #    if GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
 697 #        if GMX_SIMD_DOUBLE_WIDTH < 4
 698 using Simd4NDouble = Simd4Double;
 699 #            define GMX_SIMD4N_DOUBLE_WIDTH 4
 700 #        else
 701 using Simd4NDouble = SimdDouble;
 702 #            define GMX_SIMD4N_DOUBLE_WIDTH GMX_SIMD_DOUBLE_WIDTH
 703 #        endif
 704
 705 #        if GMX_SIMD_DOUBLE_WIDTH <= 4
 706 static inline Simd4NDouble gmx_simdcall loadUNDuplicate4(const double* f)
 707 {
 708     return Simd4NDouble(*f);
 709 }
 710 static inline Simd4NDouble gmx_simdcall load4DuplicateN(const double* f)
 711 {
 712     return load<Simd4NDouble>(f);
 713 }
 714 static inline Simd4NDouble gmx_simdcall loadU4NOffset(const double* f, int /*unused*/)
 715 {
 716     return loadU<Simd4NDouble>(f);
 717 }
 718 #        elif GMX_SIMD_DOUBLE_WIDTH == 8
 719 static inline Simd4NDouble gmx_simdcall loadUNDuplicate4(const double* f)
 720 {
 721     return loadU1DualHsimd(f);
 722 }
 723 static inline Simd4NDouble gmx_simdcall load4DuplicateN(const double* f)
 724 {
 725     return loadDuplicateHsimd(f);
 726 }
 727 #        endif
 728 #    endif // GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
 729 #else      // GMX_SIMD_HAVE_DOUBLE
 730 #    define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE 0
 731 #endif
 732
 733 #if GMX_DOUBLE
 734 #    define GMX_SIMD_HAVE_4NSIMD_UTIL_REAL GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
 735 #else
 736 #    define GMX_SIMD_HAVE_4NSIMD_UTIL_REAL GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
 737 #endif
 738
 739 #if GMX_SIMD_HAVE_4NSIMD_UTIL_REAL
 740 #    if GMX_DOUBLE
 741 using Simd4NReal = Simd4NDouble;
 742 #        define GMX_SIMD4N_REAL_WIDTH GMX_SIMD4N_DOUBLE_WIDTH
 743 #    else
 744 using Simd4NReal = Simd4NFloat;
 745 #        define GMX_SIMD4N_REAL_WIDTH GMX_SIMD4N_FLOAT_WIDTH
 746 #    endif
 747 #endif
 748
 749 //! \}  end of name-group proxy objects
 750
 751 } // namespace gmx
 752
 753 // \}          end of module_simd
 754
 755 //! \endcond   end of condition libapi
 756
 757
 758 #if GMX_SIMD_HAVE_FLOAT
 759
 760 /*! \brief Returns whether a pointer to float is aligned to a SIMD boundary
 761  *
 762  * \param[in] ptr  A pointer to a float
 763  */
 764 static inline bool isSimdAligned(const float* ptr)
 765 {
 766     return reinterpret_cast<std::size_t>(ptr) % (GMX_SIMD_FLOAT_WIDTH * sizeof(float)) == 0;
 767 }
 768
 769 #endif // GMX_SIMD_HAVE_FLOAT
 770
 771 #if GMX_SIMD_HAVE_DOUBLE
 772
 773 /*! \brief Returns whether a pointer to double is aligned to a SIMD boundary
 774  *
 775  * \param[in] ptr  A pointer to a double
 776  */
 777 static inline bool isSimdAligned(const double* ptr)
 778 {
 779     return reinterpret_cast<std::size_t>(ptr) % (GMX_SIMD_DOUBLE_WIDTH * sizeof(double)) == 0;
 780 }
 781
 782 #endif // GMX_SIMD_HAVE_DOUBLE
 783
 784
 785 #if GMX_SIMD_HAVE_REAL
 786 #    if GMX_SIMD_REAL_WIDTH > GMX_REAL_MAX_SIMD_WIDTH
 787 #        error "GMX_SIMD_REAL_WIDTH > GMX_REAL_MAX_SIMD_WIDTH: increase GMX_REAL_MAX_SIMD_WIDTH in real.h"
 788 #    endif
 789 #endif
 790
 791
 792 #if 0
 793 /* This is a hack to cover the corner case of using an
 794    explicit GMX_SIMD_HAVE_FLOAT or GMX_SIMD_HAVE_DOUBLE, rather than
 795    GMX_SIMD_HAVE_REAL.
 796
 797    Such code is expected to include simd.h to get those symbols
 798    defined, but the actual definitions are in the implemention headers
 799    included by simd.h. check-source.py is not a full preprocessor, so
 800    it does not see the definitions in the implementation headers as
 801    belonging to simd.h, thus it cannot check that simd.h is being used
 802    correctly in the above hypothetical corner case. However, the
 803    checker also does not parse #if 0, so we can fool the checker into
 804    thinking that definition occurs here, and that will work well
 805    enough.
 806
 807    If there's ever other kinds of SIMD code that might have the same
 808    problem, we might want to add other variables here.
 809  */
 810 #    define GMX_SIMD_HAVE_FLOAT 1
 811 #    define GMX_SIMD_HAVE_DOUBLE 1
 812
 813 #endif // end of hack
 814
 815 // The ArrayRef<SimdReal> specialization is always included, because compiler
 816 // errors are confusing when template specialization aren't available.
 817 #include "gromacs/simd/simd_memory.h"
 818
 819 #endif // GMX_SIMD_SIMD_H