src/gromacs/simd/impl_reference/impl_reference_simd_float.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2019,2020,2021, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPL_REFERENCE_SIMD_FLOAT_H
  37 #define GMX_SIMD_IMPL_REFERENCE_SIMD_FLOAT_H
  38
  39 /*! \libinternal \file
  40  *
  41  * \brief Reference implementation, SIMD single precision.
  42
  43  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  44  *
  45  * \ingroup module_simd
  46  */
  47
  48 #include "config.h"
  49
  50 #include <cassert>
  51 #include <cmath>
  52 #include <cstddef>
  53 #include <cstdint>
  54
  55 #include <algorithm>
  56 #include <array>
  57
  58 #include "gromacs/math/utilities.h"
  59
  60 #include "impl_reference_definitions.h"
  61
  62 namespace gmx
  63 {
  64
  65 /*! \cond libapi */
  66 /*! \addtogroup module_simd */
  67 /*! \{ */
  68
  69 /*! \name SIMD implementation data types and built-in conversions between types
  70  * \{
  71  */
  72
  73 /*! \libinternal \brief Float SIMD variable. Available if GMX_SIMD_HAVE_FLOAT is 1.
  74  *
  75  * \note This variable cannot be placed inside other structures or classes, since
  76  *       some compilers (including at least clang-3.7) appear to lose the
  77  *       alignment. This is likely particularly severe when allocating such
  78  *       memory on the heap, but it occurs for stack structures too.
  79  */
  80 class SimdFloat
  81 {
  82 public:
  83     SimdFloat() {}
  84
  85     //! \brief Construct from scalar
  86     SimdFloat(float f) { simdInternal_.fill(f); }
  87
  88     /*! \brief Internal SIMD data. Implementation dependent, don't touch.
  89      *
  90      * This has to be public to enable usage in combination with static inline
  91      * functions, but it should never, EVER, be accessed by any code outside
  92      * the corresponding implementation directory since the type will depend
  93      * on the architecture.
  94      */
  95     std::array<float, GMX_SIMD_FLOAT_WIDTH> simdInternal_;
  96 };
  97
  98 /*! \libinternal \brief Integer SIMD variable type to use for conversions to/from float.
  99  *
 100  * This is also the widest integer SIMD type. Available if GMX_SIMD_HAVE_FLOAT is 1.
 101  *
 102  * \note The integer SIMD type will always be available, but on architectures
 103  * that do not have any real integer SIMD support it might be defined as the
 104  * floating-point type. This will work fine, since there are separate defines
 105  * for whether the implementation can actually do any operations on integer
 106  * SIMD types.
 107  * \note This variable cannot be placed inside other structures or classes, since
 108  *       some compilers (including at least clang-3.7) appear to lose the
 109  *       alignment. This is likely particularly severe when allocating such
 110  *       memory on the heap, but it occurs for stack structures too.
 111  */
 112 class SimdFInt32
 113 {
 114 public:
 115     SimdFInt32() {}
 116
 117     //! \brief Construct from scalar
 118     SimdFInt32(std::int32_t i) { simdInternal_.fill(i); }
 119
 120     /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 121      *
 122      * This has to be public to enable usage in combination with static inline
 123      * functions, but it should never, EVER, be accessed by any code outside
 124      * the corresponding implementation directory since the type will depend
 125      * on the architecture.
 126      */
 127     std::array<std::int32_t, GMX_SIMD_FINT32_WIDTH> simdInternal_;
 128 };
 129
 130 /*! \libinternal \brief Boolean type for float SIMD data.
 131  *
 132  *  Available if GMX_SIMD_HAVE_FLOAT is 1.
 133  *
 134  * \note This variable cannot be placed inside other structures or classes, since
 135  *       some compilers (including at least clang-3.7) appear to lose the
 136  *       alignment. This is likely particularly severe when allocating such
 137  *       memory on the heap, but it occurs for stack structures too.
 138  */
 139 class SimdFBool
 140 {
 141 public:
 142     SimdFBool() {}
 143
 144     //! \brief Construct from scalar
 145     SimdFBool(bool b) { simdInternal_.fill(b); }
 146
 147     /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 148      *
 149      * This has to be public to enable usage in combination with static inline
 150      * functions, but it should never, EVER, be accessed by any code outside
 151      * the corresponding implementation directory since the type will depend
 152      * on the architecture.
 153      */
 154     std::array<bool, GMX_SIMD_FLOAT_WIDTH> simdInternal_;
 155 };
 156
 157 /*! \libinternal \brief Boolean type for integer datatypes corresponding to float SIMD.
 158  *
 159  * Available if GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
 160  *
 161  * \note This variable cannot be placed inside other structures or classes, since
 162  *       some compilers (including at least clang-3.7) appear to lose the
 163  *       alignment. This is likely particularly severe when allocating such
 164  *       memory on the heap, but it occurs for stack structures too.
 165  */
 166 class SimdFIBool
 167 {
 168 public:
 169     SimdFIBool() {}
 170
 171     //! \brief Construct from scalar
 172     SimdFIBool(bool b) { simdInternal_.fill(b); }
 173
 174     /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 175      *
 176      * This has to be public to enable usage in combination with static inline
 177      * functions, but it should never, EVER, be accessed by any code outside
 178      * the corresponding implementation directory since the type will depend
 179      * on the architecture.
 180      */
 181     std::array<bool, GMX_SIMD_FINT32_WIDTH> simdInternal_;
 182 };
 183
 184 /*! \}
 185  *
 186  * \name SIMD implementation load/store operations for single precision floating point
 187  * \{
 188  */
 189
 190 /*! \brief Load \ref GMX_SIMD_FLOAT_WIDTH float numbers from aligned memory.
 191  *
 192  * \param m Pointer to memory aligned to the SIMD width.
 193  * \return SIMD variable with data loaded.
 194  */
 195 static inline SimdFloat gmx_simdcall simdLoad(const float* m, SimdFloatTag = {})
 196 {
 197     SimdFloat a;
 198
 199     assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(float)) == 0);
 200
 201     std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
 202     return a;
 203 }
 204
 205 /*! \brief Store the contents of SIMD float variable to aligned memory m.
 206  *
 207  * \param[out] m Pointer to memory, aligned to SIMD width.
 208  * \param a SIMD variable to store
 209  */
 210 static inline void gmx_simdcall store(float* m, SimdFloat a)
 211 {
 212     assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(float)) == 0);
 213
 214     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 215 }
 216
 217 /*! \brief Load SIMD float from unaligned memory.
 218  *
 219  * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
 220  *
 221  * \param m Pointer to memory, no alignment requirement.
 222  * \return SIMD variable with data loaded.
 223  */
 224 static inline SimdFloat gmx_simdcall simdLoadU(const float* m, SimdFloatTag = {})
 225 {
 226     SimdFloat a;
 227     std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
 228     return a;
 229 }
 230
 231 /*! \brief Store SIMD float to unaligned memory.
 232  *
 233  * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
 234  *
 235  * \param[out] m Pointer to memory, no alignment requirement.
 236  * \param a SIMD variable to store.
 237  */
 238 static inline void gmx_simdcall storeU(float* m, SimdFloat a)
 239 {
 240     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 241 }
 242
 243 /*! \brief Set all SIMD float variable elements to 0.0.
 244  *
 245  * You should typically just call \ref gmx::setZero(), which uses proxy objects
 246  * internally to handle all types rather than adding the suffix used here.
 247  *
 248  * \return SIMD 0.0F
 249  */
 250 static inline SimdFloat gmx_simdcall setZeroF()
 251 {
 252     return SimdFloat(0.0F);
 253 }
 254
 255 /*! \} */
 256
 257
 258 /*!
 259  * \name SIMD implementation load/store operations for integers (corresponding to float)
 260  * \{
 261  */
 262
 263 /*! \brief Load aligned SIMD integer data, width corresponds to \ref gmx::SimdFloat.
 264  *
 265  * You should typically just call \ref gmx::load(), which uses proxy objects
 266  * internally to handle all types rather than adding the suffix used here.
 267  *
 268  * \param m Pointer to memory, aligned to (float) integer SIMD width.
 269  * \return SIMD integer variable.
 270  */
 271 static inline SimdFInt32 gmx_simdcall simdLoad(const std::int32_t* m, SimdFInt32Tag)
 272 {
 273     SimdFInt32 a;
 274
 275     assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(std::int32_t)) == 0);
 276
 277     std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
 278     return a;
 279 };
 280
 281 /*! \brief Store aligned SIMD integer data, width corresponds to \ref gmx::SimdFloat.
 282  *
 283  * \param m Memory aligned to (float) integer SIMD width.
 284  * \param a SIMD variable to store.
 285  */
 286 static inline void gmx_simdcall store(std::int32_t* m, SimdFInt32 a)
 287 {
 288     assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(std::int32_t)) == 0);
 289
 290     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 291 };
 292
 293 /*! \brief Load unaligned integer SIMD data, width corresponds to \ref gmx::SimdFloat.
 294  *
 295  * You should typically just call \ref gmx::loadU(), which uses proxy objects
 296  * internally to handle all types rather than adding the suffix used here.
 297  *
 298  * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
 299  *
 300  * \param m Pointer to memory, no alignment requirements.
 301  * \return SIMD integer variable.
 302  */
 303 static inline SimdFInt32 gmx_simdcall simdLoadU(const std::int32_t* m, SimdFInt32Tag)
 304 {
 305     SimdFInt32 a;
 306     std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
 307     return a;
 308 }
 309
 310 /*! \brief Store unaligned SIMD integer data, width corresponds to \ref gmx::SimdFloat.
 311  *
 312  * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
 313  *
 314  * \param m Memory pointer, no alignment requirements.
 315  * \param a SIMD variable to store.
 316  */
 317 static inline void gmx_simdcall storeU(std::int32_t* m, SimdFInt32 a)
 318 {
 319     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 320 }
 321
 322 /*! \brief Set all SIMD (float) integer variable elements to 0.
 323  *
 324  * You should typically just call \ref gmx::setZero(), which uses proxy objects
 325  * internally to handle all types rather than adding the suffix used here.
 326  *
 327  * \return SIMD 0
 328  */
 329 static inline SimdFInt32 gmx_simdcall setZeroFI()
 330 {
 331     return SimdFInt32(0);
 332 }
 333
 334 /*! \brief Extract element with index i from \ref gmx::SimdFInt32.
 335  *
 336  * Available if \ref GMX_SIMD_HAVE_FINT32_EXTRACT is 1.
 337  *
 338  * \tparam index Compile-time constant, position to extract (first position is 0)
 339  * \param  a     SIMD variable from which to extract value.
 340  * \return Single integer from position index in SIMD variable.
 341  */
 342 template<int index>
 343 static inline std::int32_t gmx_simdcall extract(SimdFInt32 a)
 344 {
 345     return a.simdInternal_[index];
 346 }
 347
 348 /*! \}
 349  *
 350  * \name SIMD implementation single precision floating-point bitwise logical operations
 351  * \{
 352  */
 353
 354 /*! \brief Bitwise and for two SIMD float variables.
 355  *
 356  * Supported if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 357  *
 358  * \param a data1
 359  * \param b data2
 360  * \return data1 & data2
 361  */
 362 static inline SimdFloat gmx_simdcall operator&(SimdFloat a, SimdFloat b)
 363 {
 364     SimdFloat res;
 365
 366     union
 367     {
 368         float        r;
 369         std::int32_t i;
 370     } conv1, conv2;
 371
 372     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 373     {
 374         conv1.r              = a.simdInternal_[i];
 375         conv2.r              = b.simdInternal_[i];
 376         conv1.i              = conv1.i & conv2.i;
 377         res.simdInternal_[i] = conv1.r;
 378     }
 379     return res;
 380 }
 381
 382 /*! \brief Bitwise andnot for SIMD float.
 383  *
 384  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 385  *
 386  * \param a data1
 387  * \param b data2
 388  * \return (~data1) & data2
 389  */
 390 static inline SimdFloat gmx_simdcall andNot(SimdFloat a, SimdFloat b)
 391 {
 392     SimdFloat res;
 393
 394     union
 395     {
 396         float        r;
 397         std::int32_t i;
 398     } conv1, conv2;
 399
 400     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 401     {
 402         conv1.r              = a.simdInternal_[i];
 403         conv2.r              = b.simdInternal_[i];
 404         conv1.i              = ~conv1.i & conv2.i;
 405         res.simdInternal_[i] = conv1.r;
 406     }
 407     return res;
 408 }
 409
 410 /*! \brief Bitwise or for SIMD float.
 411  *
 412  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 413  *
 414  * \param a data1
 415  * \param b data2
 416  * \return data1 | data2
 417  */
 418 static inline SimdFloat gmx_simdcall operator|(SimdFloat a, SimdFloat b)
 419 {
 420     SimdFloat res;
 421
 422     union
 423     {
 424         float        r;
 425         std::int32_t i;
 426     } conv1, conv2;
 427
 428     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 429     {
 430         conv1.r              = a.simdInternal_[i];
 431         conv2.r              = b.simdInternal_[i];
 432         conv1.i              = conv1.i | conv2.i;
 433         res.simdInternal_[i] = conv1.r;
 434     }
 435     return res;
 436 }
 437
 438 /*! \brief Bitwise xor for SIMD float.
 439  *
 440  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 441  *
 442  * \param a data1
 443  * \param b data2
 444  * \return data1 ^ data2
 445  */
 446 static inline SimdFloat gmx_simdcall operator^(SimdFloat a, SimdFloat b)
 447 {
 448     SimdFloat res;
 449
 450     union
 451     {
 452         float        r;
 453         std::int32_t i;
 454     } conv1, conv2;
 455
 456     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 457     {
 458         conv1.r              = a.simdInternal_[i];
 459         conv2.r              = b.simdInternal_[i];
 460         conv1.i              = conv1.i ^ conv2.i;
 461         res.simdInternal_[i] = conv1.r;
 462     }
 463     return res;
 464 }
 465
 466 /*! \}
 467  *
 468  * \name SIMD implementation single precision floating-point arithmetics
 469  * \{
 470  */
 471
 472 /*! \brief Add two float SIMD variables.
 473  *
 474  * \param a term1
 475  * \param b term2
 476  * \return a+b
 477  */
 478 static inline SimdFloat gmx_simdcall operator+(SimdFloat a, SimdFloat b)
 479 {
 480     SimdFloat res;
 481
 482     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 483     {
 484         res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
 485     }
 486     return res;
 487 }
 488
 489 /*! \brief Subtract two float SIMD variables.
 490  *
 491  * \param a term1
 492  * \param b term2
 493  * \return a-b
 494  */
 495 static inline SimdFloat gmx_simdcall operator-(SimdFloat a, SimdFloat b)
 496 {
 497     SimdFloat res;
 498
 499     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 500     {
 501         res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
 502     }
 503     return res;
 504 }
 505
 506 /*! \brief SIMD single precision negate.
 507  *
 508  * \param a SIMD double precision value
 509  * \return -a
 510  */
 511 static inline SimdFloat gmx_simdcall operator-(SimdFloat a)
 512 {
 513     SimdFloat res;
 514
 515     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 516     {
 517         res.simdInternal_[i] = -a.simdInternal_[i];
 518     }
 519     return res;
 520 }
 521
 522 /*! \brief Multiply two float SIMD variables.
 523  *
 524  * \param a factor1
 525  * \param b factor2
 526  * \return a*b.
 527  */
 528 static inline SimdFloat gmx_simdcall operator*(SimdFloat a, SimdFloat b)
 529 {
 530     SimdFloat res;
 531
 532     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 533     {
 534         res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
 535     }
 536     return res;
 537 }
 538
 539 /*! \brief SIMD float Fused-multiply-add. Result is a*b+c.
 540  *
 541  * \param a factor1
 542  * \param b factor2
 543  * \param c term
 544  * \return a*b+c
 545  */
 546 static inline SimdFloat gmx_simdcall fma(SimdFloat a, SimdFloat b, SimdFloat c)
 547 {
 548     return a * b + c;
 549 }
 550
 551 /*! \brief SIMD float Fused-multiply-subtract. Result is a*b-c.
 552  *
 553  * \param a factor1
 554  * \param b factor2
 555  * \param c term
 556  * \return a*b-c
 557  */
 558 static inline SimdFloat gmx_simdcall fms(SimdFloat a, SimdFloat b, SimdFloat c)
 559 {
 560     return a * b - c;
 561 }
 562
 563 /*! \brief SIMD float Fused-negated-multiply-add. Result is -a*b+c.
 564  *
 565  * \param a factor1
 566  * \param b factor2
 567  * \param c term
 568  * \return -a*b+c
 569  */
 570 static inline SimdFloat gmx_simdcall fnma(SimdFloat a, SimdFloat b, SimdFloat c)
 571 {
 572     return c - a * b;
 573 }
 574
 575 /*! \brief SIMD float Fused-negated-multiply-subtract. Result is -a*b-c.
 576  *
 577  * \param a factor1
 578  * \param b factor2
 579  * \param c term
 580  * \return -a*b-c
 581  */
 582 static inline SimdFloat gmx_simdcall fnms(SimdFloat a, SimdFloat b, SimdFloat c)
 583 {
 584     return -a * b - c;
 585 }
 586
 587 /*! \brief SIMD float 1.0/sqrt(x) lookup.
 588  *
 589  * This is a low-level instruction that should only be called from routines
 590  * implementing the inverse square root in simd_math.h.
 591  *
 592  * \param x Argument, x>0
 593  * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
 594  */
 595 static inline SimdFloat gmx_simdcall rsqrt(SimdFloat x)
 596 {
 597     SimdFloat res;
 598
 599     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 600     {
 601         res.simdInternal_[i] = 1.0F / std::sqrt(x.simdInternal_[i]);
 602     }
 603     return res;
 604 };
 605
 606 /*! \brief SIMD float 1.0/x lookup.
 607  *
 608  * This is a low-level instruction that should only be called from routines
 609  * implementing the reciprocal in simd_math.h.
 610  *
 611  * \param x Argument, x!=0
 612  * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
 613  */
 614 static inline SimdFloat gmx_simdcall rcp(SimdFloat x)
 615 {
 616     SimdFloat res;
 617
 618     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 619     {
 620         res.simdInternal_[i] = 1.0F / x.simdInternal_[i];
 621     }
 622     return res;
 623 };
 624
 625 /*! \brief Add two float SIMD variables, masked version.
 626  *
 627  * \param a term1
 628  * \param b term2
 629  * \param m mask
 630  * \return a+b where mask is true, a otherwise.
 631  */
 632 static inline SimdFloat gmx_simdcall maskAdd(SimdFloat a, SimdFloat b, SimdFBool m)
 633 {
 634     SimdFloat res;
 635
 636     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 637     {
 638         res.simdInternal_[i] = a.simdInternal_[i] + (m.simdInternal_[i] ? b.simdInternal_[i] : 0.0F);
 639     }
 640     return res;
 641 }
 642
 643 /*! \brief Multiply two float SIMD variables, masked version.
 644  *
 645  * \param a factor1
 646  * \param b factor2
 647  * \param m mask
 648  * \return a*b where mask is true, 0.0 otherwise.
 649  */
 650 static inline SimdFloat gmx_simdcall maskzMul(SimdFloat a, SimdFloat b, SimdFBool m)
 651 {
 652     SimdFloat res;
 653
 654     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 655     {
 656         res.simdInternal_[i] = m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i]) : 0.0F;
 657     }
 658     return res;
 659 }
 660
 661 /*! \brief SIMD float fused multiply-add, masked version.
 662  *
 663  * \param a factor1
 664  * \param b factor2
 665  * \param c term
 666  * \param m mask
 667  * \return a*b+c where mask is true, 0.0 otherwise.
 668  */
 669 static inline SimdFloat gmx_simdcall maskzFma(SimdFloat a, SimdFloat b, SimdFloat c, SimdFBool m)
 670 {
 671     SimdFloat res;
 672
 673     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 674     {
 675         res.simdInternal_[i] =
 676                 m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i] + c.simdInternal_[i]) : 0.0F;
 677     }
 678     return res;
 679 }
 680
 681 /*! \brief SIMD float 1.0/sqrt(x) lookup, masked version.
 682  *
 683  * This is a low-level instruction that should only be called from routines
 684  * implementing the inverse square root in simd_math.h.
 685  *
 686  * \param x Argument, x>0 for entries where mask is true.
 687  * \param m Mask
 688  * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
 689  *         The result for masked-out entries will be 0.0.
 690  */
 691 static inline SimdFloat gmx_simdcall maskzRsqrt(SimdFloat x, SimdFBool m)
 692 {
 693     SimdFloat res;
 694
 695     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 696     {
 697         res.simdInternal_[i] = (m.simdInternal_[i] != 0) ? 1.0F / std::sqrt(x.simdInternal_[i]) : 0.0F;
 698     }
 699     return res;
 700 }
 701
 702 /*! \brief SIMD float 1.0/x lookup, masked version.
 703  *
 704  * This is a low-level instruction that should only be called from routines
 705  * implementing the reciprocal in simd_math.h.
 706  *
 707  * \param x Argument, x>0 for entries where mask is true.
 708  * \param m Mask
 709  * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
 710  *         The result for masked-out entries will be 0.0.
 711  */
 712 static inline SimdFloat gmx_simdcall maskzRcp(SimdFloat x, SimdFBool m)
 713 {
 714     SimdFloat res;
 715
 716     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 717     {
 718         res.simdInternal_[i] = (m.simdInternal_[i] != 0) ? 1.0F / x.simdInternal_[i] : 0.0F;
 719     }
 720     return res;
 721 }
 722
 723 /*! \brief SIMD float Floating-point abs().
 724  *
 725  * \param a any floating point values
 726  * \return abs(a) for each element.
 727  */
 728 static inline SimdFloat gmx_simdcall abs(SimdFloat a)
 729 {
 730     SimdFloat res;
 731
 732     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 733     {
 734         res.simdInternal_[i] = std::abs(a.simdInternal_[i]);
 735     }
 736     return res;
 737 }
 738
 739 /*! \brief Set each SIMD float element to the largest from two variables.
 740  *
 741  * \param a Any floating-point value
 742  * \param b Any floating-point value
 743  * \return max(a,b) for each element.
 744  */
 745 static inline SimdFloat gmx_simdcall max(SimdFloat a, SimdFloat b)
 746 {
 747     SimdFloat res;
 748
 749     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 750     {
 751         res.simdInternal_[i] = std::max(a.simdInternal_[i], b.simdInternal_[i]);
 752     }
 753     return res;
 754 }
 755
 756 /*! \brief Set each SIMD float element to the smallest from two variables.
 757  *
 758  * \param a Any floating-point value
 759  * \param b Any floating-point value
 760  * \return min(a,b) for each element.
 761  */
 762 static inline SimdFloat gmx_simdcall min(SimdFloat a, SimdFloat b)
 763 {
 764     SimdFloat res;
 765
 766     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 767     {
 768         res.simdInternal_[i] = std::min(a.simdInternal_[i], b.simdInternal_[i]);
 769     }
 770     return res;
 771 }
 772
 773 /*! \brief SIMD float round to nearest integer value (in floating-point format).
 774  *
 775  * \param a Any floating-point value
 776  * \return The nearest integer, represented in floating-point format.
 777  *
 778  * \note Round mode is implementation defined. The only guarantee is that it
 779  * is consistent between rounding functions (round, cvtR2I).
 780  */
 781 static inline SimdFloat gmx_simdcall round(SimdFloat a)
 782 {
 783     SimdFloat res;
 784
 785     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 786     {
 787         res.simdInternal_[i] = std::round(a.simdInternal_[i]);
 788     }
 789     return res;
 790 }
 791
 792 /*! \brief Truncate SIMD float, i.e. round towards zero - common hardware instruction.
 793  *
 794  * \param a Any floating-point value
 795  * \return Integer rounded towards zero, represented in floating-point format.
 796  *
 797  * \note This is truncation towards zero, not floor(). The reason for this
 798  * is that truncation is virtually always present as a dedicated hardware
 799  * instruction, but floor() frequently isn't.
 800  */
 801 static inline SimdFloat gmx_simdcall trunc(SimdFloat a)
 802 {
 803     SimdFloat res;
 804
 805     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 806     {
 807         res.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
 808     }
 809     return res;
 810 }
 811
 812 /*! \brief Extract (integer) exponent and fraction from single precision SIMD.
 813  *
 814  * \tparam      opt       By default this function behaves like the standard
 815  *                        library such that frexp(+-0,exp) returns +-0 and
 816  *                        stores 0 in the exponent when value is 0. If you
 817  *                        know the argument is always nonzero, you can set
 818  *                        the template parameter to MathOptimization::Unsafe
 819  *                        to make it slightly faster.
 820  *
 821  * \param       value     Floating-point value to extract from
 822  * \param[out]  exponent  Returned exponent of value, integer SIMD format.
 823  * \return      Fraction of value, floating-point SIMD format.
 824  */
 825 template<MathOptimization opt = MathOptimization::Safe>
 826 static inline SimdFloat gmx_simdcall frexp(SimdFloat value, SimdFInt32* exponent)
 827 {
 828     SimdFloat fraction;
 829
 830     for (std::size_t i = 0; i < fraction.simdInternal_.size(); i++)
 831     {
 832         fraction.simdInternal_[i] = std::frexp(value.simdInternal_[i], &exponent->simdInternal_[i]);
 833     }
 834     return fraction;
 835 }
 836
 837 /*! \brief Multiply a SIMD float value by the number 2 raised to an exp power.
 838  *
 839  * \tparam opt By default, this routine will return zero for input arguments
 840  *             that are so small they cannot be reproduced in the current
 841  *             precision. If the unsafe math optimization template parameter
 842  *             setting is used, these tests are skipped, and the result will
 843  *             be undefined (possible even NaN). This might happen below -127
 844  *             in single precision or -1023 in double, although some
 845  *             might use denormal support to extend the range.
 846  *
 847  * \param value Floating-point number to multiply with new exponent
 848  * \param exponent Integer that will not overflow as 2^exponent.
 849  * \return value*2^exponent
 850  */
 851 template<MathOptimization opt = MathOptimization::Safe>
 852 static inline SimdFloat gmx_simdcall ldexp(SimdFloat value, SimdFInt32 exponent)
 853 {
 854     SimdFloat res;
 855
 856     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 857     {
 858         // std::ldexp already takes care of clamping arguments, so we do not
 859         // need to do anything in the reference implementation
 860         res.simdInternal_[i] = std::ldexp(value.simdInternal_[i], exponent.simdInternal_[i]);
 861     }
 862     return res;
 863 }
 864
 865 /*! \brief Return sum of all elements in SIMD float variable.
 866  *
 867  * \param a SIMD variable to reduce/sum.
 868  * \return The sum of all elements in the argument variable.
 869  *
 870  */
 871 static inline float gmx_simdcall reduce(SimdFloat a)
 872 {
 873     float sum = 0.0F;
 874
 875     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
 876     {
 877         sum += a.simdInternal_[i];
 878     }
 879     return sum;
 880 }
 881
 882 /*! \}
 883  *
 884  * \name SIMD implementation single precision floating-point comparisons, boolean, selection.
 885  * \{
 886  */
 887
 888 /*! \brief SIMD a==b for single SIMD.
 889  *
 890  * \param a value1
 891  * \param b value2
 892  * \return Each element of the boolean will be set to true if a==b.
 893  *
 894  * Beware that exact floating-point comparisons are difficult.
 895  */
 896 static inline SimdFBool gmx_simdcall operator==(SimdFloat a, SimdFloat b)
 897 {
 898     SimdFBool res;
 899
 900     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 901     {
 902         res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
 903     }
 904     return res;
 905 }
 906
 907 /*! \brief SIMD a!=b for single SIMD.
 908  *
 909  * \param a value1
 910  * \param b value2
 911  * \return Each element of the boolean will be set to true if a!=b.
 912  *
 913  * Beware that exact floating-point comparisons are difficult.
 914  */
 915 static inline SimdFBool gmx_simdcall operator!=(SimdFloat a, SimdFloat b)
 916 {
 917     SimdFBool res;
 918
 919     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 920     {
 921         res.simdInternal_[i] = (a.simdInternal_[i] != b.simdInternal_[i]);
 922     }
 923     return res;
 924 }
 925
 926 /*! \brief SIMD a<b for single SIMD.
 927  *
 928  * \param a value1
 929  * \param b value2
 930  * \return Each element of the boolean will be set to true if a<b.
 931  */
 932 static inline SimdFBool gmx_simdcall operator<(SimdFloat a, SimdFloat b)
 933 {
 934     SimdFBool res;
 935
 936     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 937     {
 938         res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
 939     }
 940     return res;
 941 }
 942
 943 /*! \brief SIMD a<=b for single SIMD.
 944  *
 945  * \param a value1
 946  * \param b value2
 947  * \return Each element of the boolean will be set to true if a<=b.
 948  */
 949 static inline SimdFBool gmx_simdcall operator<=(SimdFloat a, SimdFloat b)
 950 {
 951     SimdFBool res;
 952
 953     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 954     {
 955         res.simdInternal_[i] = (a.simdInternal_[i] <= b.simdInternal_[i]);
 956     }
 957     return res;
 958 }
 959
 960 /*! \brief Return true if any bits are set in the single precision SIMD.
 961  *
 962  * This function is used to handle bitmasks, mainly for exclusions in the
 963  * inner kernels. Note that it will return true even for -0.0F (sign bit set),
 964  * so it is not identical to not-equal.
 965  *
 966  * \param a value
 967  * \return Each element of the boolean will be true if any bit in a is nonzero.
 968  */
 969 static inline SimdFBool gmx_simdcall testBits(SimdFloat a)
 970 {
 971     SimdFBool res;
 972
 973     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 974     {
 975         union
 976         {
 977             std::uint32_t i;
 978             float         f;
 979         } conv;
 980
 981         conv.f               = a.simdInternal_[i];
 982         res.simdInternal_[i] = (conv.i != 0);
 983     }
 984     return res;
 985 }
 986
 987 /*! \brief Logical \a and on single precision SIMD booleans.
 988  *
 989  * \param a logical vars 1
 990  * \param b logical vars 2
 991  * \return For each element, the result boolean is true if a \& b are true.
 992  *
 993  * \note This is not necessarily a bitwise operation - the storage format
 994  * of booleans is implementation-dependent.
 995  */
 996 static inline SimdFBool gmx_simdcall operator&&(SimdFBool a, SimdFBool b)
 997 {
 998     SimdFBool res;
 999
1000     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1001     {
1002         res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1003     }
1004     return res;
1005 }
1006
1007 /*! \brief Logical \a or on single precision SIMD booleans.
1008  *
1009  * \param a logical vars 1
1010  * \param b logical vars 2
1011  * \return For each element, the result boolean is true if a or b is true.
1012  *
1013  * Note that this is not necessarily a bitwise operation - the storage format
1014  * of booleans is implementation-dependent.
1015  *
1016  \ */
1017 static inline SimdFBool gmx_simdcall operator||(SimdFBool a, SimdFBool b)
1018 {
1019     SimdFBool res;
1020
1021     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1022     {
1023         res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1024     }
1025     return res;
1026 }
1027
1028 /*! \brief Returns non-zero if any of the boolean in SIMD a is True, otherwise 0.
1029  *
1030  * \param a Logical variable.
1031  * \return true if any element in a is true, otherwise false.
1032  *
1033  * The actual return value for truth will depend on the architecture,
1034  * so any non-zero value is considered truth.
1035  */
1036 static inline bool gmx_simdcall anyTrue(SimdFBool a)
1037 {
1038     bool res = false;
1039
1040     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1041     {
1042         res = res || a.simdInternal_[i];
1043     }
1044     return res;
1045 }
1046
1047 /*! \brief Select from single precision SIMD variable where boolean is true.
1048  *
1049  * \param a Floating-point variable to select from
1050  * \param mask Boolean selector
1051  * \return  For each element, a is selected for true, 0 for false.
1052  */
1053 static inline SimdFloat gmx_simdcall selectByMask(SimdFloat a, SimdFBool mask)
1054 {
1055     SimdFloat res;
1056
1057     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1058     {
1059         res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0F;
1060     }
1061     return res;
1062 }
1063
1064 /*! \brief Select from single precision SIMD variable where boolean is false.
1065  *
1066  * \param a Floating-point variable to select from
1067  * \param mask Boolean selector
1068  * \return  For each element, a is selected for false, 0 for true (sic).
1069  */
1070 static inline SimdFloat gmx_simdcall selectByNotMask(SimdFloat a, SimdFBool mask)
1071 {
1072     SimdFloat res;
1073
1074     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1075     {
1076         res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0F : a.simdInternal_[i];
1077     }
1078     return res;
1079 }
1080
1081 /*! \brief Vector-blend SIMD float selection.
1082  *
1083  * \param a First source
1084  * \param b Second source
1085  * \param sel Boolean selector
1086  * \return For each element, select b if sel is true, a otherwise.
1087  */
1088 static inline SimdFloat gmx_simdcall blend(SimdFloat a, SimdFloat b, SimdFBool sel)
1089 {
1090     SimdFloat res;
1091
1092     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1093     {
1094         res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1095     }
1096     return res;
1097 }
1098
1099 /*! \}
1100  *
1101  * \name SIMD implementation integer (corresponding to float) bitwise logical operations
1102  * \{
1103  */
1104
1105 /*! \brief Integer SIMD bitwise and.
1106  *
1107  * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1108  *
1109  * \note You can \a not use this operation directly to select based on a boolean
1110  * SIMD variable, since booleans are separate from integer SIMD. If that
1111  * is what you need, have a look at \ref gmx::selectByMask instead.
1112  *
1113  * \param a first integer SIMD
1114  * \param b second integer SIMD
1115  * \return a \& b (bitwise and)
1116  */
1117 static inline SimdFInt32 gmx_simdcall operator&(SimdFInt32 a, SimdFInt32 b)
1118 {
1119     SimdFInt32 res;
1120
1121     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1122     {
1123         res.simdInternal_[i] = a.simdInternal_[i] & b.simdInternal_[i];
1124     }
1125     return res;
1126 }
1127
1128 /*! \brief Integer SIMD bitwise not/complement.
1129  *
1130  * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1131  *
1132  * \note You can \a not use this operation directly to select based on a boolean
1133  * SIMD variable, since booleans are separate from integer SIMD. If that
1134  * is what you need, have a look at \ref gmx::selectByMask instead.
1135  *
1136  * \param a integer SIMD
1137  * \param b integer SIMD
1138  * \return (~a) & b
1139  */
1140 static inline SimdFInt32 gmx_simdcall andNot(SimdFInt32 a, SimdFInt32 b)
1141 {
1142     SimdFInt32 res;
1143
1144     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1145     {
1146         res.simdInternal_[i] = ~a.simdInternal_[i] & b.simdInternal_[i];
1147     }
1148     return res;
1149 }
1150
1151 /*! \brief Integer SIMD bitwise or.
1152  *
1153  * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1154  *
1155  * \param a first integer SIMD
1156  * \param b second integer SIMD
1157  * \return a \| b (bitwise or)
1158  */
1159 static inline SimdFInt32 gmx_simdcall operator|(SimdFInt32 a, SimdFInt32 b)
1160 {
1161     SimdFInt32 res;
1162
1163     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1164     {
1165         res.simdInternal_[i] = a.simdInternal_[i] | b.simdInternal_[i];
1166     }
1167     return res;
1168 }
1169
1170 /*! \brief Integer SIMD bitwise xor.
1171  *
1172  * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1173  *
1174  * \param a first integer SIMD
1175  * \param b second integer SIMD
1176  * \return a ^ b (bitwise xor)
1177  */
1178 static inline SimdFInt32 gmx_simdcall operator^(SimdFInt32 a, SimdFInt32 b)
1179 {
1180     SimdFInt32 res;
1181
1182     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1183     {
1184         res.simdInternal_[i] = a.simdInternal_[i] ^ b.simdInternal_[i];
1185     }
1186     return res;
1187 }
1188
1189 /*! \}
1190  *
1191  * \name SIMD implementation integer (corresponding to float) arithmetics
1192  * \{
1193  */
1194
1195 /*! \brief Add SIMD integers.
1196  *
1197  * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
1198  *  or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1.
1199  *
1200  * \param a term1
1201  * \param b term2
1202  * \return a+b
1203  */
1204 static inline SimdFInt32 gmx_simdcall operator+(SimdFInt32 a, SimdFInt32 b)
1205 {
1206     SimdFInt32 res;
1207
1208     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1209     {
1210         res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
1211     }
1212     return res;
1213 }
1214
1215 /*! \brief Subtract SIMD integers.
1216  *
1217  * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
1218  *  or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1.
1219  *
1220  * \param a term1
1221  * \param b term2
1222  * \return a-b
1223  */
1224 static inline SimdFInt32 gmx_simdcall operator-(SimdFInt32 a, SimdFInt32 b)
1225 {
1226     SimdFInt32 res;
1227
1228     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1229     {
1230         res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
1231     }
1232     return res;
1233 }
1234
1235 /*! \brief Multiply SIMD integers.
1236  *
1237  * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
1238  *  or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1.
1239  *
1240  * \param a factor1
1241  * \param b factor2
1242  * \return a*b.
1243  *
1244  * \note Only the low 32 bits are retained, so this can overflow.
1245  */
1246 static inline SimdFInt32 gmx_simdcall operator*(SimdFInt32 a, SimdFInt32 b)
1247 {
1248     SimdFInt32 res;
1249
1250     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1251     {
1252         res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
1253     }
1254     return res;
1255 }
1256
1257 /*! \}
1258  *
1259  * \name SIMD implementation integer (corresponding to float) comparisons, boolean, selection
1260  * \{
1261  */
1262
1263 /*! \brief Equality comparison of two integers corresponding to float values.
1264  *
1265  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1266  *
1267  * \param a SIMD integer1
1268  * \param b SIMD integer2
1269  * \return SIMD integer boolean with true for elements where a==b
1270  */
1271 static inline SimdFIBool gmx_simdcall operator==(SimdFInt32 a, SimdFInt32 b)
1272 {
1273     SimdFIBool res;
1274
1275     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1276     {
1277         res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
1278     }
1279     return res;
1280 }
1281
1282 /*! \brief Less-than comparison of two SIMD integers corresponding to float values.
1283  *
1284  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1285  *
1286  * \param a SIMD integer1
1287  * \param b SIMD integer2
1288  * \return SIMD integer boolean with true for elements where a<b
1289  */
1290 static inline SimdFIBool gmx_simdcall operator<(SimdFInt32 a, SimdFInt32 b)
1291 {
1292     SimdFIBool res;
1293
1294     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1295     {
1296         res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
1297     }
1298     return res;
1299 }
1300
1301 /*! \brief Check if any bit is set in each element
1302  *
1303  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1304  *
1305  * \param a SIMD integer
1306  * \return SIMD integer boolean with true for elements where any bit is set
1307  */
1308 static inline SimdFIBool gmx_simdcall testBits(SimdFInt32 a)
1309 {
1310     SimdFIBool res;
1311
1312     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1313     {
1314         res.simdInternal_[i] = (a.simdInternal_[i] != 0);
1315     }
1316     return res;
1317 }
1318
1319 /*! \brief Logical AND on SimdFIBool.
1320  *
1321  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1322  *
1323  * \param a SIMD boolean 1
1324  * \param b SIMD boolean 2
1325  * \return True for elements where both a and b are true.
1326  */
1327 static inline SimdFIBool gmx_simdcall operator&&(SimdFIBool a, SimdFIBool b)
1328 {
1329     SimdFIBool res;
1330
1331     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1332     {
1333         res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1334     }
1335     return res;
1336 }
1337
1338 /*! \brief Logical OR on SimdFIBool.
1339  *
1340  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1341  *
1342  * \param a SIMD boolean 1
1343  * \param b SIMD boolean 2
1344  * \return True for elements where both a and b are true.
1345  */
1346 static inline SimdFIBool gmx_simdcall operator||(SimdFIBool a, SimdFIBool b)
1347 {
1348     SimdFIBool res;
1349
1350     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1351     {
1352         res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1353     }
1354     return res;
1355 }
1356
1357 /*! \brief Returns true if any of the boolean in x is True, otherwise 0.
1358  *
1359  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1360  *
1361  * The actual return value for "any true" will depend on the architecture.
1362  * Any non-zero value should be considered truth.
1363  *
1364  * \param a SIMD boolean
1365  * \return True if any of the elements in a is true, otherwise 0.
1366  */
1367 static inline bool gmx_simdcall anyTrue(SimdFIBool a)
1368 {
1369     bool res = false;
1370
1371     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1372     {
1373         res = res || a.simdInternal_[i];
1374     }
1375     return res;
1376 }
1377
1378 /*! \brief Select from \ref gmx::SimdFInt32 variable where boolean is true.
1379  *
1380  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1381  *
1382  * \param a SIMD integer to select from
1383  * \param mask Boolean selector
1384  * \return Elements from a where sel is true, 0 otherwise.
1385  */
1386 static inline SimdFInt32 gmx_simdcall selectByMask(SimdFInt32 a, SimdFIBool mask)
1387 {
1388     SimdFInt32 res;
1389
1390     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1391     {
1392         res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0F;
1393     }
1394     return res;
1395 }
1396
1397 /*! \brief Select from \ref gmx::SimdFInt32 variable where boolean is false.
1398  *
1399  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1400  *
1401  * \param a SIMD integer to select from
1402  * \param mask Boolean selector
1403  * \return Elements from a where sel is false, 0 otherwise (sic).
1404  */
1405 static inline SimdFInt32 gmx_simdcall selectByNotMask(SimdFInt32 a, SimdFIBool mask)
1406 {
1407     SimdFInt32 res;
1408
1409     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1410     {
1411         res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0F : a.simdInternal_[i];
1412     }
1413     return res;
1414 }
1415
1416 /*! \brief Vector-blend SIMD integer selection.
1417  *
1418  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1419  *
1420  * \param a First source
1421  * \param b Second source
1422  * \param sel Boolean selector
1423  * \return For each element, select b if sel is true, a otherwise.
1424  */
1425 static inline SimdFInt32 gmx_simdcall blend(SimdFInt32 a, SimdFInt32 b, SimdFIBool sel)
1426 {
1427     SimdFInt32 res;
1428
1429     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1430     {
1431         res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1432     }
1433     return res;
1434 }
1435
1436 /*! \}
1437  *
1438  * \name SIMD implementation conversion operations
1439  * \{
1440  */
1441
1442 /*! \brief Round single precision floating point to integer.
1443  *
1444  * \param a SIMD floating-point
1445  * \return SIMD integer, rounded to nearest integer.
1446  *
1447  * \note Round mode is implementation defined. The only guarantee is that it
1448  * is consistent between rounding functions (round, cvtR2I).
1449  */
1450 static inline SimdFInt32 gmx_simdcall cvtR2I(SimdFloat a)
1451 {
1452     SimdFInt32 b;
1453
1454     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1455     {
1456         b.simdInternal_[i] = std::round(a.simdInternal_[i]);
1457     }
1458     return b;
1459 };
1460
1461 /*! \brief Truncate single precision floating point to integer.
1462  *
1463  * \param a SIMD floating-point
1464  * \return SIMD integer, truncated to nearest integer.
1465  */
1466 static inline SimdFInt32 gmx_simdcall cvttR2I(SimdFloat a)
1467 {
1468     SimdFInt32 b;
1469
1470     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1471     {
1472         b.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
1473     }
1474     return b;
1475 };
1476
1477 /*! \brief Convert integer to single precision floating point.
1478  *
1479  * \param a SIMD integer
1480  * \return SIMD floating-point
1481  */
1482 static inline SimdFloat gmx_simdcall cvtI2R(SimdFInt32 a)
1483 {
1484     SimdFloat b;
1485
1486     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1487     {
1488         b.simdInternal_[i] = a.simdInternal_[i];
1489     }
1490     return b;
1491 };
1492
1493 /*! \brief Convert from single precision boolean to corresponding integer boolean
1494  *
1495  * \param a SIMD floating-point boolean
1496  * \return SIMD integer boolean
1497  */
1498 static inline SimdFIBool gmx_simdcall cvtB2IB(SimdFBool a)
1499 {
1500     SimdFIBool b;
1501
1502     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1503     {
1504         b.simdInternal_[i] = a.simdInternal_[i];
1505     }
1506     return b;
1507 };
1508
1509 /*! \brief Convert from integer boolean to corresponding single precision boolean
1510  *
1511  * \param a SIMD integer boolean
1512  * \return SIMD floating-point boolean
1513  */
1514 static inline SimdFBool gmx_simdcall cvtIB2B(SimdFIBool a)
1515 {
1516     SimdFBool b;
1517
1518     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1519     {
1520         b.simdInternal_[i] = a.simdInternal_[i];
1521     }
1522     return b;
1523 };
1524
1525 /*! \} */
1526
1527 /*! \} */
1528 /*! \endcond */
1529
1530 } // namespace gmx
1531
1532 #endif // GMX_SIMD_IMPL_REFERENCE_SIMD_FLOAT_H