src/gromacs/simd/impl_reference/impl_reference_simd_float.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPL_REFERENCE_SIMD_FLOAT_H
  37 #define GMX_SIMD_IMPL_REFERENCE_SIMD_FLOAT_H
  38
  39 /*! \libinternal \file
  40  *
  41  * \brief Reference implementation, SIMD single precision.
  42
  43  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  44  *
  45  * \ingroup module_simd
  46  */
  47
  48 #include "config.h"
  49
  50 #include <cassert>
  51 #include <cmath>
  52 #include <cstddef>
  53 #include <cstdint>
  54
  55 #include <algorithm>
  56 #include <array>
  57
  58 #include "gromacs/math/utilities.h"
  59
  60 #include "impl_reference_definitions.h"
  61
  62 namespace gmx
  63 {
  64
  65 /*! \cond libapi */
  66 /*! \addtogroup module_simd */
  67 /*! \{ */
  68
  69 /* \name SIMD implementation data types and built-in conversions between types
  70  * \{
  71  */
  72
  73 /*! \libinternal \brief Float SIMD variable. Available if GMX_SIMD_HAVE_FLOAT is 1.
  74  *
  75  * \note This variable cannot be placed inside other structures or classes, since
  76  *       some compilers (including at least clang-3.7) appear to lose the
  77  *       alignment. This is likely particularly severe when allocating such
  78  *       memory on the heap, but it occurs for stack structures too.
  79  */
  80 class SimdFloat
  81 {
  82 public:
  83     SimdFloat() {}
  84
  85     //! \brief Construct from scalar
  86     SimdFloat(float f) { simdInternal_.fill(f); }
  87
  88     /*! \brief Internal SIMD data. Implementation dependent, don't touch.
  89      *
  90      * This has to be public to enable usage in combination with static inline
  91      * functions, but it should never, EVER, be accessed by any code outside
  92      * the corresponding implementation directory since the type will depend
  93      * on the architecture.
  94      */
  95     std::array<float, GMX_SIMD_FLOAT_WIDTH> simdInternal_;
  96 };
  97
  98 /*! \libinternal \brief Integer SIMD variable type to use for conversions to/from float.
  99  *
 100  * This is also the widest integer SIMD type. Available if GMX_SIMD_HAVE_FLOAT is 1.
 101  *
 102  * \note The integer SIMD type will always be available, but on architectures
 103  * that do not have any real integer SIMD support it might be defined as the
 104  * floating-point type. This will work fine, since there are separate defines
 105  * for whether the implementation can actually do any operations on integer
 106  * SIMD types.
 107  * \note This variable cannot be placed inside other structures or classes, since
 108  *       some compilers (including at least clang-3.7) appear to lose the
 109  *       alignment. This is likely particularly severe when allocating such
 110  *       memory on the heap, but it occurs for stack structures too.
 111  */
 112 class SimdFInt32
 113 {
 114 public:
 115     SimdFInt32() {}
 116
 117     //! \brief Construct from scalar
 118     SimdFInt32(std::int32_t i) { simdInternal_.fill(i); }
 119
 120     /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 121      *
 122      * This has to be public to enable usage in combination with static inline
 123      * functions, but it should never, EVER, be accessed by any code outside
 124      * the corresponding implementation directory since the type will depend
 125      * on the architecture.
 126      */
 127     std::array<std::int32_t, GMX_SIMD_FINT32_WIDTH> simdInternal_;
 128 };
 129
 130 /*! \libinternal \brief Boolean type for float SIMD data.
 131  *
 132  *  Available if GMX_SIMD_HAVE_FLOAT is 1.
 133  *
 134  * \note This variable cannot be placed inside other structures or classes, since
 135  *       some compilers (including at least clang-3.7) appear to lose the
 136  *       alignment. This is likely particularly severe when allocating such
 137  *       memory on the heap, but it occurs for stack structures too.
 138  */
 139 class SimdFBool
 140 {
 141 public:
 142     SimdFBool() {}
 143
 144     //! \brief Construct from scalar
 145     SimdFBool(bool b) { simdInternal_.fill(b); }
 146
 147     /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 148      *
 149      * This has to be public to enable usage in combination with static inline
 150      * functions, but it should never, EVER, be accessed by any code outside
 151      * the corresponding implementation directory since the type will depend
 152      * on the architecture.
 153      */
 154     std::array<bool, GMX_SIMD_FLOAT_WIDTH> simdInternal_;
 155 };
 156
 157 /*! \libinternal \brief Boolean type for integer datatypes corresponding to float SIMD.
 158  *
 159  * Available if GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
 160  *
 161  * \note This variable cannot be placed inside other structures or classes, since
 162  *       some compilers (including at least clang-3.7) appear to lose the
 163  *       alignment. This is likely particularly severe when allocating such
 164  *       memory on the heap, but it occurs for stack structures too.
 165  */
 166 class SimdFIBool
 167 {
 168 public:
 169     SimdFIBool() {}
 170
 171     //! \brief Construct from scalar
 172     SimdFIBool(bool b) { simdInternal_.fill(b); }
 173
 174     /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 175      *
 176      * This has to be public to enable usage in combination with static inline
 177      * functions, but it should never, EVER, be accessed by any code outside
 178      * the corresponding implementation directory since the type will depend
 179      * on the architecture.
 180      */
 181     std::array<bool, GMX_SIMD_FINT32_WIDTH> simdInternal_;
 182 };
 183
 184 /*! \}
 185  *
 186  * \name SIMD implementation load/store operations for single precision floating point
 187  * \{
 188  */
 189
 190 /*! \brief Load \ref GMX_SIMD_FLOAT_WIDTH float numbers from aligned memory.
 191  *
 192  * \param m Pointer to memory aligned to the SIMD width.
 193  * \return SIMD variable with data loaded.
 194  */
 195 static inline SimdFloat gmx_simdcall simdLoad(const float* m, SimdFloatTag = {})
 196 {
 197     SimdFloat a;
 198
 199     assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(float)) == 0);
 200
 201     std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
 202     return a;
 203 }
 204
 205 /*! \brief Store the contents of SIMD float variable to aligned memory m.
 206  *
 207  * \param[out] m Pointer to memory, aligned to SIMD width.
 208  * \param a SIMD variable to store
 209  */
 210 static inline void gmx_simdcall store(float* m, SimdFloat a)
 211 {
 212     assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(float)) == 0);
 213
 214     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 215 }
 216
 217 /*! \brief Load SIMD float from unaligned memory.
 218  *
 219  * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
 220  *
 221  * \param m Pointer to memory, no alignment requirement.
 222  * \return SIMD variable with data loaded.
 223  */
 224 static inline SimdFloat gmx_simdcall simdLoadU(const float* m, SimdFloatTag = {})
 225 {
 226     SimdFloat a;
 227     std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
 228     return a;
 229 }
 230
 231 /*! \brief Store SIMD float to unaligned memory.
 232  *
 233  * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
 234  *
 235  * \param[out] m Pointer to memory, no alignment requirement.
 236  * \param a SIMD variable to store.
 237  */
 238 static inline void gmx_simdcall storeU(float* m, SimdFloat a)
 239 {
 240     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 241 }
 242
 243 /*! \brief Set all SIMD float variable elements to 0.0.
 244  *
 245  * You should typically just call \ref gmx::setZero(), which uses proxy objects
 246  * internally to handle all types rather than adding the suffix used here.
 247  *
 248  * \return SIMD 0.0F
 249  */
 250 static inline SimdFloat gmx_simdcall setZeroF()
 251 {
 252     return SimdFloat(0.0F);
 253 }
 254
 255 /*! \} */
 256
 257
 258 /*!
 259  * \name SIMD implementation load/store operations for integers (corresponding to float)
 260  * \{
 261  */
 262
 263 /*! \brief Load aligned SIMD integer data, width corresponds to \ref gmx::SimdFloat.
 264  *
 265  * You should typically just call \ref gmx::load(), which uses proxy objects
 266  * internally to handle all types rather than adding the suffix used here.
 267  *
 268  * \param m Pointer to memory, aligned to (float) integer SIMD width.
 269  * \return SIMD integer variable.
 270  */
 271 static inline SimdFInt32 gmx_simdcall simdLoad(const std::int32_t* m, SimdFInt32Tag)
 272 {
 273     SimdFInt32 a;
 274
 275     assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(std::int32_t)) == 0);
 276
 277     std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
 278     return a;
 279 };
 280
 281 /*! \brief Store aligned SIMD integer data, width corresponds to \ref gmx::SimdFloat.
 282  *
 283  * \param m Memory aligned to (float) integer SIMD width.
 284  * \param a SIMD variable to store.
 285  */
 286 static inline void gmx_simdcall store(std::int32_t* m, SimdFInt32 a)
 287 {
 288     assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(std::int32_t)) == 0);
 289
 290     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 291 };
 292
 293 /*! \brief Load unaligned integer SIMD data, width corresponds to \ref gmx::SimdFloat.
 294  *
 295  * You should typically just call \ref gmx::loadU(), which uses proxy objects
 296  * internally to handle all types rather than adding the suffix used here.
 297  *
 298  * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
 299  *
 300  * \param m Pointer to memory, no alignment requirements.
 301  * \return SIMD integer variable.
 302  */
 303 static inline SimdFInt32 gmx_simdcall simdLoadU(const std::int32_t* m, SimdFInt32Tag)
 304 {
 305     SimdFInt32 a;
 306     std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
 307     return a;
 308 }
 309
 310 /*! \brief Store unaligned SIMD integer data, width corresponds to \ref gmx::SimdFloat.
 311  *
 312  * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
 313  *
 314  * \param m Memory pointer, no alignment requirements.
 315  * \param a SIMD variable to store.
 316  */
 317 static inline void gmx_simdcall storeU(std::int32_t* m, SimdFInt32 a)
 318 {
 319     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 320 }
 321
 322 /*! \brief Set all SIMD (float) integer variable elements to 0.
 323  *
 324  * You should typically just call \ref gmx::setZero(), which uses proxy objects
 325  * internally to handle all types rather than adding the suffix used here.
 326  *
 327  * \return SIMD 0
 328  */
 329 static inline SimdFInt32 gmx_simdcall setZeroFI()
 330 {
 331     return SimdFInt32(0);
 332 }
 333
 334 /*! \brief Extract element with index i from \ref gmx::SimdFInt32.
 335  *
 336  * Available if \ref GMX_SIMD_HAVE_FINT32_EXTRACT is 1.
 337  *
 338  * \tparam index Compile-time constant, position to extract (first position is 0)
 339  * \param  a     SIMD variable from which to extract value.
 340  * \return Single integer from position index in SIMD variable.
 341  */
 342 template<int index>
 343 static inline std::int32_t gmx_simdcall extract(SimdFInt32 a)
 344 {
 345     return a.simdInternal_[index];
 346 }
 347
 348 /*! \}
 349  *
 350  * \name SIMD implementation single precision floating-point bitwise logical operations
 351  * \{
 352  */
 353
 354 /*! \brief Bitwise and for two SIMD float variables.
 355  *
 356  * Supported if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 357  *
 358  * \param a data1
 359  * \param b data2
 360  * \return data1 & data2
 361  */
 362 static inline SimdFloat gmx_simdcall operator&(SimdFloat a, SimdFloat b)
 363 {
 364     SimdFloat res;
 365
 366     union {
 367         float        r;
 368         std::int32_t i;
 369     } conv1, conv2;
 370
 371     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 372     {
 373         conv1.r              = a.simdInternal_[i];
 374         conv2.r              = b.simdInternal_[i];
 375         conv1.i              = conv1.i & conv2.i;
 376         res.simdInternal_[i] = conv1.r;
 377     }
 378     return res;
 379 }
 380
 381 /*! \brief Bitwise andnot for SIMD float.
 382  *
 383  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 384  *
 385  * \param a data1
 386  * \param b data2
 387  * \return (~data1) & data2
 388  */
 389 static inline SimdFloat gmx_simdcall andNot(SimdFloat a, SimdFloat b)
 390 {
 391     SimdFloat res;
 392
 393     union {
 394         float        r;
 395         std::int32_t i;
 396     } conv1, conv2;
 397
 398     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 399     {
 400         conv1.r              = a.simdInternal_[i];
 401         conv2.r              = b.simdInternal_[i];
 402         conv1.i              = ~conv1.i & conv2.i;
 403         res.simdInternal_[i] = conv1.r;
 404     }
 405     return res;
 406 }
 407
 408 /*! \brief Bitwise or for SIMD float.
 409  *
 410  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 411  *
 412  * \param a data1
 413  * \param b data2
 414  * \return data1 | data2
 415  */
 416 static inline SimdFloat gmx_simdcall operator|(SimdFloat a, SimdFloat b)
 417 {
 418     SimdFloat res;
 419
 420     union {
 421         float        r;
 422         std::int32_t i;
 423     } conv1, conv2;
 424
 425     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 426     {
 427         conv1.r              = a.simdInternal_[i];
 428         conv2.r              = b.simdInternal_[i];
 429         conv1.i              = conv1.i | conv2.i;
 430         res.simdInternal_[i] = conv1.r;
 431     }
 432     return res;
 433 }
 434
 435 /*! \brief Bitwise xor for SIMD float.
 436  *
 437  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 438  *
 439  * \param a data1
 440  * \param b data2
 441  * \return data1 ^ data2
 442  */
 443 static inline SimdFloat gmx_simdcall operator^(SimdFloat a, SimdFloat b)
 444 {
 445     SimdFloat res;
 446
 447     union {
 448         float        r;
 449         std::int32_t i;
 450     } conv1, conv2;
 451
 452     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 453     {
 454         conv1.r              = a.simdInternal_[i];
 455         conv2.r              = b.simdInternal_[i];
 456         conv1.i              = conv1.i ^ conv2.i;
 457         res.simdInternal_[i] = conv1.r;
 458     }
 459     return res;
 460 }
 461
 462 /*! \}
 463  *
 464  * \name SIMD implementation single precision floating-point arithmetics
 465  * \{
 466  */
 467
 468 /*! \brief Add two float SIMD variables.
 469  *
 470  * \param a term1
 471  * \param b term2
 472  * \return a+b
 473  */
 474 static inline SimdFloat gmx_simdcall operator+(SimdFloat a, SimdFloat b)
 475 {
 476     SimdFloat res;
 477
 478     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 479     {
 480         res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
 481     }
 482     return res;
 483 }
 484
 485 /*! \brief Subtract two float SIMD variables.
 486  *
 487  * \param a term1
 488  * \param b term2
 489  * \return a-b
 490  */
 491 static inline SimdFloat gmx_simdcall operator-(SimdFloat a, SimdFloat b)
 492 {
 493     SimdFloat res;
 494
 495     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 496     {
 497         res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
 498     }
 499     return res;
 500 }
 501
 502 /*! \brief SIMD single precision negate.
 503  *
 504  * \param a SIMD double precision value
 505  * \return -a
 506  */
 507 static inline SimdFloat gmx_simdcall operator-(SimdFloat a)
 508 {
 509     SimdFloat res;
 510
 511     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 512     {
 513         res.simdInternal_[i] = -a.simdInternal_[i];
 514     }
 515     return res;
 516 }
 517
 518 /*! \brief Multiply two float SIMD variables.
 519  *
 520  * \param a factor1
 521  * \param b factor2
 522  * \return a*b.
 523  */
 524 static inline SimdFloat gmx_simdcall operator*(SimdFloat a, SimdFloat b)
 525 {
 526     SimdFloat res;
 527
 528     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 529     {
 530         res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
 531     }
 532     return res;
 533 }
 534
 535 /*! \brief SIMD float Fused-multiply-add. Result is a*b+c.
 536  *
 537  * \param a factor1
 538  * \param b factor2
 539  * \param c term
 540  * \return a*b+c
 541  */
 542 static inline SimdFloat gmx_simdcall fma(SimdFloat a, SimdFloat b, SimdFloat c)
 543 {
 544     return a * b + c;
 545 }
 546
 547 /*! \brief SIMD float Fused-multiply-subtract. Result is a*b-c.
 548  *
 549  * \param a factor1
 550  * \param b factor2
 551  * \param c term
 552  * \return a*b-c
 553  */
 554 static inline SimdFloat gmx_simdcall fms(SimdFloat a, SimdFloat b, SimdFloat c)
 555 {
 556     return a * b - c;
 557 }
 558
 559 /*! \brief SIMD float Fused-negated-multiply-add. Result is -a*b+c.
 560  *
 561  * \param a factor1
 562  * \param b factor2
 563  * \param c term
 564  * \return -a*b+c
 565  */
 566 static inline SimdFloat gmx_simdcall fnma(SimdFloat a, SimdFloat b, SimdFloat c)
 567 {
 568     return c - a * b;
 569 }
 570
 571 /*! \brief SIMD float Fused-negated-multiply-subtract. Result is -a*b-c.
 572  *
 573  * \param a factor1
 574  * \param b factor2
 575  * \param c term
 576  * \return -a*b-c
 577  */
 578 static inline SimdFloat gmx_simdcall fnms(SimdFloat a, SimdFloat b, SimdFloat c)
 579 {
 580     return -a * b - c;
 581 }
 582
 583 /*! \brief SIMD float 1.0/sqrt(x) lookup.
 584  *
 585  * This is a low-level instruction that should only be called from routines
 586  * implementing the inverse square root in simd_math.h.
 587  *
 588  * \param x Argument, x>0
 589  * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
 590  */
 591 static inline SimdFloat gmx_simdcall rsqrt(SimdFloat x)
 592 {
 593     SimdFloat res;
 594
 595     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 596     {
 597         res.simdInternal_[i] = 1.0F / std::sqrt(x.simdInternal_[i]);
 598     }
 599     return res;
 600 };
 601
 602 /*! \brief SIMD float 1.0/x lookup.
 603  *
 604  * This is a low-level instruction that should only be called from routines
 605  * implementing the reciprocal in simd_math.h.
 606  *
 607  * \param x Argument, x!=0
 608  * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
 609  */
 610 static inline SimdFloat gmx_simdcall rcp(SimdFloat x)
 611 {
 612     SimdFloat res;
 613
 614     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 615     {
 616         res.simdInternal_[i] = 1.0F / x.simdInternal_[i];
 617     }
 618     return res;
 619 };
 620
 621 /*! \brief Add two float SIMD variables, masked version.
 622  *
 623  * \param a term1
 624  * \param b term2
 625  * \param m mask
 626  * \return a+b where mask is true, a otherwise.
 627  */
 628 static inline SimdFloat gmx_simdcall maskAdd(SimdFloat a, SimdFloat b, SimdFBool m)
 629 {
 630     SimdFloat res;
 631
 632     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 633     {
 634         res.simdInternal_[i] = a.simdInternal_[i] + (m.simdInternal_[i] ? b.simdInternal_[i] : 0.0F);
 635     }
 636     return res;
 637 }
 638
 639 /*! \brief Multiply two float SIMD variables, masked version.
 640  *
 641  * \param a factor1
 642  * \param b factor2
 643  * \param m mask
 644  * \return a*b where mask is true, 0.0 otherwise.
 645  */
 646 static inline SimdFloat gmx_simdcall maskzMul(SimdFloat a, SimdFloat b, SimdFBool m)
 647 {
 648     SimdFloat res;
 649
 650     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 651     {
 652         res.simdInternal_[i] = m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i]) : 0.0F;
 653     }
 654     return res;
 655 }
 656
 657 /*! \brief SIMD float fused multiply-add, masked version.
 658  *
 659  * \param a factor1
 660  * \param b factor2
 661  * \param c term
 662  * \param m mask
 663  * \return a*b+c where mask is true, 0.0 otherwise.
 664  */
 665 static inline SimdFloat gmx_simdcall maskzFma(SimdFloat a, SimdFloat b, SimdFloat c, SimdFBool m)
 666 {
 667     SimdFloat res;
 668
 669     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 670     {
 671         res.simdInternal_[i] =
 672                 m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i] + c.simdInternal_[i]) : 0.0F;
 673     }
 674     return res;
 675 }
 676
 677 /*! \brief SIMD float 1.0/sqrt(x) lookup, masked version.
 678  *
 679  * This is a low-level instruction that should only be called from routines
 680  * implementing the inverse square root in simd_math.h.
 681  *
 682  * \param x Argument, x>0 for entries where mask is true.
 683  * \param m Mask
 684  * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
 685  *         The result for masked-out entries will be 0.0.
 686  */
 687 static inline SimdFloat gmx_simdcall maskzRsqrt(SimdFloat x, SimdFBool m)
 688 {
 689     SimdFloat res;
 690
 691     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 692     {
 693         res.simdInternal_[i] = (m.simdInternal_[i] != 0) ? 1.0F / std::sqrt(x.simdInternal_[i]) : 0.0F;
 694     }
 695     return res;
 696 }
 697
 698 /*! \brief SIMD float 1.0/x lookup, masked version.
 699  *
 700  * This is a low-level instruction that should only be called from routines
 701  * implementing the reciprocal in simd_math.h.
 702  *
 703  * \param x Argument, x>0 for entries where mask is true.
 704  * \param m Mask
 705  * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
 706  *         The result for masked-out entries will be 0.0.
 707  */
 708 static inline SimdFloat gmx_simdcall maskzRcp(SimdFloat x, SimdFBool m)
 709 {
 710     SimdFloat res;
 711
 712     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 713     {
 714         res.simdInternal_[i] = (m.simdInternal_[i] != 0) ? 1.0F / x.simdInternal_[i] : 0.0F;
 715     }
 716     return res;
 717 }
 718
 719 /*! \brief SIMD float Floating-point abs().
 720  *
 721  * \param a any floating point values
 722  * \return abs(a) for each element.
 723  */
 724 static inline SimdFloat gmx_simdcall abs(SimdFloat a)
 725 {
 726     SimdFloat res;
 727
 728     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 729     {
 730         res.simdInternal_[i] = std::abs(a.simdInternal_[i]);
 731     }
 732     return res;
 733 }
 734
 735 /*! \brief Set each SIMD float element to the largest from two variables.
 736  *
 737  * \param a Any floating-point value
 738  * \param b Any floating-point value
 739  * \return max(a,b) for each element.
 740  */
 741 static inline SimdFloat gmx_simdcall max(SimdFloat a, SimdFloat b)
 742 {
 743     SimdFloat res;
 744
 745     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 746     {
 747         res.simdInternal_[i] = std::max(a.simdInternal_[i], b.simdInternal_[i]);
 748     }
 749     return res;
 750 }
 751
 752 /*! \brief Set each SIMD float element to the smallest from two variables.
 753  *
 754  * \param a Any floating-point value
 755  * \param b Any floating-point value
 756  * \return min(a,b) for each element.
 757  */
 758 static inline SimdFloat gmx_simdcall min(SimdFloat a, SimdFloat b)
 759 {
 760     SimdFloat res;
 761
 762     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 763     {
 764         res.simdInternal_[i] = std::min(a.simdInternal_[i], b.simdInternal_[i]);
 765     }
 766     return res;
 767 }
 768
 769 /*! \brief SIMD float round to nearest integer value (in floating-point format).
 770  *
 771  * \param a Any floating-point value
 772  * \return The nearest integer, represented in floating-point format.
 773  *
 774  * \note Round mode is implementation defined. The only guarantee is that it
 775  * is consistent between rounding functions (round, cvtR2I).
 776  */
 777 static inline SimdFloat gmx_simdcall round(SimdFloat a)
 778 {
 779     SimdFloat res;
 780
 781     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 782     {
 783         res.simdInternal_[i] = std::round(a.simdInternal_[i]);
 784     }
 785     return res;
 786 }
 787
 788 /*! \brief Truncate SIMD float, i.e. round towards zero - common hardware instruction.
 789  *
 790  * \param a Any floating-point value
 791  * \return Integer rounded towards zero, represented in floating-point format.
 792  *
 793  * \note This is truncation towards zero, not floor(). The reason for this
 794  * is that truncation is virtually always present as a dedicated hardware
 795  * instruction, but floor() frequently isn't.
 796  */
 797 static inline SimdFloat gmx_simdcall trunc(SimdFloat a)
 798 {
 799     SimdFloat res;
 800
 801     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 802     {
 803         res.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
 804     }
 805     return res;
 806 }
 807
 808 /*! \brief Extract (integer) exponent and fraction from single precision SIMD.
 809  *
 810  * \param       value     Floating-point value to extract from
 811  * \param[out]  exponent  Returned exponent of value, integer SIMD format.
 812  * \return      Fraction of value, floating-point SIMD format.
 813  */
 814 static inline SimdFloat gmx_simdcall frexp(SimdFloat value, SimdFInt32* exponent)
 815 {
 816     SimdFloat fraction;
 817
 818     for (std::size_t i = 0; i < fraction.simdInternal_.size(); i++)
 819     {
 820         fraction.simdInternal_[i] = std::frexp(value.simdInternal_[i], &exponent->simdInternal_[i]);
 821     }
 822     return fraction;
 823 }
 824
 825 /*! \brief Multiply a SIMD float value by the number 2 raised to an exp power.
 826  *
 827  * \tparam opt By default, this routine will return zero for input arguments
 828  *             that are so small they cannot be reproduced in the current
 829  *             precision. If the unsafe math optimization template parameter
 830  *             setting is used, these tests are skipped, and the result will
 831  *             be undefined (possible even NaN). This might happen below -127
 832  *             in single precision or -1023 in double, although some
 833  *             might use denormal support to extend the range.
 834  *
 835  * \param value Floating-point number to multiply with new exponent
 836  * \param exponent Integer that will not overflow as 2^exponent.
 837  * \return value*2^exponent
 838  */
 839 template<MathOptimization opt = MathOptimization::Safe>
 840 static inline SimdFloat gmx_simdcall ldexp(SimdFloat value, SimdFInt32 exponent)
 841 {
 842     SimdFloat res;
 843
 844     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 845     {
 846         // std::ldexp already takes care of clamping arguments, so we do not
 847         // need to do anything in the reference implementation
 848         res.simdInternal_[i] = std::ldexp(value.simdInternal_[i], exponent.simdInternal_[i]);
 849     }
 850     return res;
 851 }
 852
 853 /*! \brief Return sum of all elements in SIMD float variable.
 854  *
 855  * \param a SIMD variable to reduce/sum.
 856  * \return The sum of all elements in the argument variable.
 857  *
 858  */
 859 static inline float gmx_simdcall reduce(SimdFloat a)
 860 {
 861     float sum = 0.0F;
 862
 863     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
 864     {
 865         sum += a.simdInternal_[i];
 866     }
 867     return sum;
 868 }
 869
 870 /*! \}
 871  *
 872  * \name SIMD implementation single precision floating-point comparisons, boolean, selection.
 873  * \{
 874  */
 875
 876 /*! \brief SIMD a==b for single SIMD.
 877  *
 878  * \param a value1
 879  * \param b value2
 880  * \return Each element of the boolean will be set to true if a==b.
 881  *
 882  * Beware that exact floating-point comparisons are difficult.
 883  */
 884 static inline SimdFBool gmx_simdcall operator==(SimdFloat a, SimdFloat b)
 885 {
 886     SimdFBool res;
 887
 888     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 889     {
 890         res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
 891     }
 892     return res;
 893 }
 894
 895 /*! \brief SIMD a!=b for single SIMD.
 896  *
 897  * \param a value1
 898  * \param b value2
 899  * \return Each element of the boolean will be set to true if a!=b.
 900  *
 901  * Beware that exact floating-point comparisons are difficult.
 902  */
 903 static inline SimdFBool gmx_simdcall operator!=(SimdFloat a, SimdFloat b)
 904 {
 905     SimdFBool res;
 906
 907     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 908     {
 909         res.simdInternal_[i] = (a.simdInternal_[i] != b.simdInternal_[i]);
 910     }
 911     return res;
 912 }
 913
 914 /*! \brief SIMD a<b for single SIMD.
 915  *
 916  * \param a value1
 917  * \param b value2
 918  * \return Each element of the boolean will be set to true if a<b.
 919  */
 920 static inline SimdFBool gmx_simdcall operator<(SimdFloat a, SimdFloat b)
 921 {
 922     SimdFBool res;
 923
 924     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 925     {
 926         res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
 927     }
 928     return res;
 929 }
 930
 931 /*! \brief SIMD a<=b for single SIMD.
 932  *
 933  * \param a value1
 934  * \param b value2
 935  * \return Each element of the boolean will be set to true if a<=b.
 936  */
 937 static inline SimdFBool gmx_simdcall operator<=(SimdFloat a, SimdFloat b)
 938 {
 939     SimdFBool res;
 940
 941     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 942     {
 943         res.simdInternal_[i] = (a.simdInternal_[i] <= b.simdInternal_[i]);
 944     }
 945     return res;
 946 }
 947
 948 /*! \brief Return true if any bits are set in the single precision SIMD.
 949  *
 950  * This function is used to handle bitmasks, mainly for exclusions in the
 951  * inner kernels. Note that it will return true even for -0.0F (sign bit set),
 952  * so it is not identical to not-equal.
 953  *
 954  * \param a value
 955  * \return Each element of the boolean will be true if any bit in a is nonzero.
 956  */
 957 static inline SimdFBool gmx_simdcall testBits(SimdFloat a)
 958 {
 959     SimdFBool res;
 960
 961     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 962     {
 963         union {
 964             std::uint32_t i;
 965             float         f;
 966         } conv;
 967
 968         conv.f               = a.simdInternal_[i];
 969         res.simdInternal_[i] = (conv.i != 0);
 970     }
 971     return res;
 972 }
 973
 974 /*! \brief Logical \a and on single precision SIMD booleans.
 975  *
 976  * \param a logical vars 1
 977  * \param b logical vars 2
 978  * \return For each element, the result boolean is true if a \& b are true.
 979  *
 980  * \note This is not necessarily a bitwise operation - the storage format
 981  * of booleans is implementation-dependent.
 982  */
 983 static inline SimdFBool gmx_simdcall operator&&(SimdFBool a, SimdFBool b)
 984 {
 985     SimdFBool res;
 986
 987     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 988     {
 989         res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
 990     }
 991     return res;
 992 }
 993
 994 /*! \brief Logical \a or on single precision SIMD booleans.
 995  *
 996  * \param a logical vars 1
 997  * \param b logical vars 2
 998  * \return For each element, the result boolean is true if a or b is true.
 999  *
1000  * Note that this is not necessarily a bitwise operation - the storage format
1001  * of booleans is implementation-dependent.
1002  *
1003  \ */
1004 static inline SimdFBool gmx_simdcall operator||(SimdFBool a, SimdFBool b)
1005 {
1006     SimdFBool res;
1007
1008     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1009     {
1010         res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1011     }
1012     return res;
1013 }
1014
1015 /*! \brief Returns non-zero if any of the boolean in SIMD a is True, otherwise 0.
1016  *
1017  * \param a Logical variable.
1018  * \return true if any element in a is true, otherwise false.
1019  *
1020  * The actual return value for truth will depend on the architecture,
1021  * so any non-zero value is considered truth.
1022  */
1023 static inline bool gmx_simdcall anyTrue(SimdFBool a)
1024 {
1025     bool res = false;
1026
1027     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1028     {
1029         res = res || a.simdInternal_[i];
1030     }
1031     return res;
1032 }
1033
1034 /*! \brief Select from single precision SIMD variable where boolean is true.
1035  *
1036  * \param a Floating-point variable to select from
1037  * \param mask Boolean selector
1038  * \return  For each element, a is selected for true, 0 for false.
1039  */
1040 static inline SimdFloat gmx_simdcall selectByMask(SimdFloat a, SimdFBool mask)
1041 {
1042     SimdFloat res;
1043
1044     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1045     {
1046         res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0F;
1047     }
1048     return res;
1049 }
1050
1051 /*! \brief Select from single precision SIMD variable where boolean is false.
1052  *
1053  * \param a Floating-point variable to select from
1054  * \param mask Boolean selector
1055  * \return  For each element, a is selected for false, 0 for true (sic).
1056  */
1057 static inline SimdFloat gmx_simdcall selectByNotMask(SimdFloat a, SimdFBool mask)
1058 {
1059     SimdFloat res;
1060
1061     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1062     {
1063         res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0F : a.simdInternal_[i];
1064     }
1065     return res;
1066 }
1067
1068 /*! \brief Vector-blend SIMD float selection.
1069  *
1070  * \param a First source
1071  * \param b Second source
1072  * \param sel Boolean selector
1073  * \return For each element, select b if sel is true, a otherwise.
1074  */
1075 static inline SimdFloat gmx_simdcall blend(SimdFloat a, SimdFloat b, SimdFBool sel)
1076 {
1077     SimdFloat res;
1078
1079     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1080     {
1081         res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1082     }
1083     return res;
1084 }
1085
1086 /*! \}
1087  *
1088  * \name SIMD implementation integer (corresponding to float) bitwise logical operations
1089  * \{
1090  */
1091
1092 /*! \brief Integer SIMD bitwise and.
1093  *
1094  * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1095  *
1096  * \note You can \a not use this operation directly to select based on a boolean
1097  * SIMD variable, since booleans are separate from integer SIMD. If that
1098  * is what you need, have a look at \ref gmx::selectByMask instead.
1099  *
1100  * \param a first integer SIMD
1101  * \param b second integer SIMD
1102  * \return a \& b (bitwise and)
1103  */
1104 static inline SimdFInt32 gmx_simdcall operator&(SimdFInt32 a, SimdFInt32 b)
1105 {
1106     SimdFInt32 res;
1107
1108     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1109     {
1110         res.simdInternal_[i] = a.simdInternal_[i] & b.simdInternal_[i];
1111     }
1112     return res;
1113 }
1114
1115 /*! \brief Integer SIMD bitwise not/complement.
1116  *
1117  * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1118  *
1119  * \note You can \a not use this operation directly to select based on a boolean
1120  * SIMD variable, since booleans are separate from integer SIMD. If that
1121  * is what you need, have a look at \ref gmx::selectByMask instead.
1122  *
1123  * \param a integer SIMD
1124  * \param b integer SIMD
1125  * \return (~a) & b
1126  */
1127 static inline SimdFInt32 gmx_simdcall andNot(SimdFInt32 a, SimdFInt32 b)
1128 {
1129     SimdFInt32 res;
1130
1131     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1132     {
1133         res.simdInternal_[i] = ~a.simdInternal_[i] & b.simdInternal_[i];
1134     }
1135     return res;
1136 }
1137
1138 /*! \brief Integer SIMD bitwise or.
1139  *
1140  * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1141  *
1142  * \param a first integer SIMD
1143  * \param b second integer SIMD
1144  * \return a \| b (bitwise or)
1145  */
1146 static inline SimdFInt32 gmx_simdcall operator|(SimdFInt32 a, SimdFInt32 b)
1147 {
1148     SimdFInt32 res;
1149
1150     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1151     {
1152         res.simdInternal_[i] = a.simdInternal_[i] | b.simdInternal_[i];
1153     }
1154     return res;
1155 }
1156
1157 /*! \brief Integer SIMD bitwise xor.
1158  *
1159  * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1160  *
1161  * \param a first integer SIMD
1162  * \param b second integer SIMD
1163  * \return a ^ b (bitwise xor)
1164  */
1165 static inline SimdFInt32 gmx_simdcall operator^(SimdFInt32 a, SimdFInt32 b)
1166 {
1167     SimdFInt32 res;
1168
1169     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1170     {
1171         res.simdInternal_[i] = a.simdInternal_[i] ^ b.simdInternal_[i];
1172     }
1173     return res;
1174 }
1175
1176 /*! \}
1177  *
1178  * \name SIMD implementation integer (corresponding to float) arithmetics
1179  * \{
1180  */
1181
1182 /*! \brief Add SIMD integers.
1183  *
1184  * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
1185  *  or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1.
1186  *
1187  * \param a term1
1188  * \param b term2
1189  * \return a+b
1190  */
1191 static inline SimdFInt32 gmx_simdcall operator+(SimdFInt32 a, SimdFInt32 b)
1192 {
1193     SimdFInt32 res;
1194
1195     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1196     {
1197         res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
1198     }
1199     return res;
1200 }
1201
1202 /*! \brief Subtract SIMD integers.
1203  *
1204  * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
1205  *  or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1.
1206  *
1207  * \param a term1
1208  * \param b term2
1209  * \return a-b
1210  */
1211 static inline SimdFInt32 gmx_simdcall operator-(SimdFInt32 a, SimdFInt32 b)
1212 {
1213     SimdFInt32 res;
1214
1215     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1216     {
1217         res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
1218     }
1219     return res;
1220 }
1221
1222 /*! \brief Multiply SIMD integers.
1223  *
1224  * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
1225  *  or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1.
1226  *
1227  * \param a factor1
1228  * \param b factor2
1229  * \return a*b.
1230  *
1231  * \note Only the low 32 bits are retained, so this can overflow.
1232  */
1233 static inline SimdFInt32 gmx_simdcall operator*(SimdFInt32 a, SimdFInt32 b)
1234 {
1235     SimdFInt32 res;
1236
1237     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1238     {
1239         res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
1240     }
1241     return res;
1242 }
1243
1244 /*! \}
1245  *
1246  * \name SIMD implementation integer (corresponding to float) comparisons, boolean, selection
1247  * \{
1248  */
1249
1250 /*! \brief Equality comparison of two integers corresponding to float values.
1251  *
1252  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1253  *
1254  * \param a SIMD integer1
1255  * \param b SIMD integer2
1256  * \return SIMD integer boolean with true for elements where a==b
1257  */
1258 static inline SimdFIBool gmx_simdcall operator==(SimdFInt32 a, SimdFInt32 b)
1259 {
1260     SimdFIBool res;
1261
1262     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1263     {
1264         res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
1265     }
1266     return res;
1267 }
1268
1269 /*! \brief Less-than comparison of two SIMD integers corresponding to float values.
1270  *
1271  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1272  *
1273  * \param a SIMD integer1
1274  * \param b SIMD integer2
1275  * \return SIMD integer boolean with true for elements where a<b
1276  */
1277 static inline SimdFIBool gmx_simdcall operator<(SimdFInt32 a, SimdFInt32 b)
1278 {
1279     SimdFIBool res;
1280
1281     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1282     {
1283         res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
1284     }
1285     return res;
1286 }
1287
1288 /*! \brief Check if any bit is set in each element
1289  *
1290  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1291  *
1292  * \param a SIMD integer
1293  * \return SIMD integer boolean with true for elements where any bit is set
1294  */
1295 static inline SimdFIBool gmx_simdcall testBits(SimdFInt32 a)
1296 {
1297     SimdFIBool res;
1298
1299     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1300     {
1301         res.simdInternal_[i] = (a.simdInternal_[i] != 0);
1302     }
1303     return res;
1304 }
1305
1306 /*! \brief Logical AND on SimdFIBool.
1307  *
1308  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1309  *
1310  * \param a SIMD boolean 1
1311  * \param b SIMD boolean 2
1312  * \return True for elements where both a and b are true.
1313  */
1314 static inline SimdFIBool gmx_simdcall operator&&(SimdFIBool a, SimdFIBool b)
1315 {
1316     SimdFIBool res;
1317
1318     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1319     {
1320         res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1321     }
1322     return res;
1323 }
1324
1325 /*! \brief Logical OR on SimdFIBool.
1326  *
1327  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1328  *
1329  * \param a SIMD boolean 1
1330  * \param b SIMD boolean 2
1331  * \return True for elements where both a and b are true.
1332  */
1333 static inline SimdFIBool gmx_simdcall operator||(SimdFIBool a, SimdFIBool b)
1334 {
1335     SimdFIBool res;
1336
1337     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1338     {
1339         res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1340     }
1341     return res;
1342 }
1343
1344 /*! \brief Returns true if any of the boolean in x is True, otherwise 0.
1345  *
1346  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1347  *
1348  * The actual return value for "any true" will depend on the architecture.
1349  * Any non-zero value should be considered truth.
1350  *
1351  * \param a SIMD boolean
1352  * \return True if any of the elements in a is true, otherwise 0.
1353  */
1354 static inline bool gmx_simdcall anyTrue(SimdFIBool a)
1355 {
1356     bool res = false;
1357
1358     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1359     {
1360         res = res || a.simdInternal_[i];
1361     }
1362     return res;
1363 }
1364
1365 /*! \brief Select from \ref gmx::SimdFInt32 variable where boolean is true.
1366  *
1367  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1368  *
1369  * \param a SIMD integer to select from
1370  * \param mask Boolean selector
1371  * \return Elements from a where sel is true, 0 otherwise.
1372  */
1373 static inline SimdFInt32 gmx_simdcall selectByMask(SimdFInt32 a, SimdFIBool mask)
1374 {
1375     SimdFInt32 res;
1376
1377     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1378     {
1379         res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0F;
1380     }
1381     return res;
1382 }
1383
1384 /*! \brief Select from \ref gmx::SimdFInt32 variable where boolean is false.
1385  *
1386  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1387  *
1388  * \param a SIMD integer to select from
1389  * \param mask Boolean selector
1390  * \return Elements from a where sel is false, 0 otherwise (sic).
1391  */
1392 static inline SimdFInt32 gmx_simdcall selectByNotMask(SimdFInt32 a, SimdFIBool mask)
1393 {
1394     SimdFInt32 res;
1395
1396     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1397     {
1398         res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0F : a.simdInternal_[i];
1399     }
1400     return res;
1401 }
1402
1403 /*! \brief Vector-blend SIMD integer selection.
1404  *
1405  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1406  *
1407  * \param a First source
1408  * \param b Second source
1409  * \param sel Boolean selector
1410  * \return For each element, select b if sel is true, a otherwise.
1411  */
1412 static inline SimdFInt32 gmx_simdcall blend(SimdFInt32 a, SimdFInt32 b, SimdFIBool sel)
1413 {
1414     SimdFInt32 res;
1415
1416     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1417     {
1418         res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1419     }
1420     return res;
1421 }
1422
1423 /*! \}
1424  *
1425  * \name SIMD implementation conversion operations
1426  * \{
1427  */
1428
1429 /*! \brief Round single precision floating point to integer.
1430  *
1431  * \param a SIMD floating-point
1432  * \return SIMD integer, rounded to nearest integer.
1433  *
1434  * \note Round mode is implementation defined. The only guarantee is that it
1435  * is consistent between rounding functions (round, cvtR2I).
1436  */
1437 static inline SimdFInt32 gmx_simdcall cvtR2I(SimdFloat a)
1438 {
1439     SimdFInt32 b;
1440
1441     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1442     {
1443         b.simdInternal_[i] = std::round(a.simdInternal_[i]);
1444     }
1445     return b;
1446 };
1447
1448 /*! \brief Truncate single precision floating point to integer.
1449  *
1450  * \param a SIMD floating-point
1451  * \return SIMD integer, truncated to nearest integer.
1452  */
1453 static inline SimdFInt32 gmx_simdcall cvttR2I(SimdFloat a)
1454 {
1455     SimdFInt32 b;
1456
1457     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1458     {
1459         b.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
1460     }
1461     return b;
1462 };
1463
1464 /*! \brief Convert integer to single precision floating point.
1465  *
1466  * \param a SIMD integer
1467  * \return SIMD floating-point
1468  */
1469 static inline SimdFloat gmx_simdcall cvtI2R(SimdFInt32 a)
1470 {
1471     SimdFloat b;
1472
1473     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1474     {
1475         b.simdInternal_[i] = a.simdInternal_[i];
1476     }
1477     return b;
1478 };
1479
1480 /*! \brief Convert from single precision boolean to corresponding integer boolean
1481  *
1482  * \param a SIMD floating-point boolean
1483  * \return SIMD integer boolean
1484  */
1485 static inline SimdFIBool gmx_simdcall cvtB2IB(SimdFBool a)
1486 {
1487     SimdFIBool b;
1488
1489     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1490     {
1491         b.simdInternal_[i] = a.simdInternal_[i];
1492     }
1493     return b;
1494 };
1495
1496 /*! \brief Convert from integer boolean to corresponding single precision boolean
1497  *
1498  * \param a SIMD integer boolean
1499  * \return SIMD floating-point boolean
1500  */
1501 static inline SimdFBool gmx_simdcall cvtIB2B(SimdFIBool a)
1502 {
1503     SimdFBool b;
1504
1505     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1506     {
1507         b.simdInternal_[i] = a.simdInternal_[i];
1508     }
1509     return b;
1510 };
1511
1512 /*! \} */
1513
1514 /*! \} */
1515 /*! \endcond */
1516
1517 } // namespace gmx
1518
1519 #endif // GMX_SIMD_IMPL_REFERENCE_SIMD_FLOAT_H