src/gromacs/simd/impl_reference/impl_reference_simd_float.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPL_REFERENCE_SIMD_FLOAT_H
  37 #define GMX_SIMD_IMPL_REFERENCE_SIMD_FLOAT_H
  38
  39 /*! \libinternal \file
  40  *
  41  * \brief Reference implementation, SIMD single precision.
  42
  43  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  44  *
  45  * \ingroup module_simd
  46  */
  47
  48 #include "config.h"
  49
  50 #include <cassert>
  51 #include <cmath>
  52 #include <cstddef>
  53 #include <cstdint>
  54
  55 #include <algorithm>
  56 #include <array>
  57
  58 #include "gromacs/math/utilities.h"
  59
  60 #include "impl_reference_definitions.h"
  61
  62 namespace gmx
  63 {
  64
  65 /*! \cond libapi */
  66 /*! \addtogroup module_simd */
  67 /*! \{ */
  68
  69 /* \name SIMD implementation data types and built-in conversions between types
  70  * \{
  71  */
  72
  73 /*! \libinternal \brief Float SIMD variable. Available if GMX_SIMD_HAVE_FLOAT is 1.
  74  *
  75  * \note This variable cannot be placed inside other structures or classes, since
  76  *       some compilers (including at least clang-3.7) appear to lose the
  77  *       alignment. This is likely particularly severe when allocating such
  78  *       memory on the heap, but it occurs for stack structures too.
  79  */
  80 class SimdFloat
  81 {
  82     public:
  83         SimdFloat() {}
  84
  85         //! \brief Construct from scalar
  86         SimdFloat(float f) { simdInternal_.fill(f); }
  87
  88         /*! \brief Internal SIMD data. Implementation dependent, don't touch.
  89          *
  90          * This has to be public to enable usage in combination with static inline
  91          * functions, but it should never, EVER, be accessed by any code outside
  92          * the corresponding implementation directory since the type will depend
  93          * on the architecture.
  94          */
  95         std::array<float, GMX_SIMD_FLOAT_WIDTH>  simdInternal_;
  96 };
  97
  98 /*! \libinternal \brief Integer SIMD variable type to use for conversions to/from float.
  99  *
 100  * This is also the widest integer SIMD type. Available if GMX_SIMD_HAVE_FLOAT is 1.
 101  *
 102  * \note The integer SIMD type will always be available, but on architectures
 103  * that do not have any real integer SIMD support it might be defined as the
 104  * floating-point type. This will work fine, since there are separate defines
 105  * for whether the implementation can actually do any operations on integer
 106  * SIMD types.
 107  * \note This variable cannot be placed inside other structures or classes, since
 108  *       some compilers (including at least clang-3.7) appear to lose the
 109  *       alignment. This is likely particularly severe when allocating such
 110  *       memory on the heap, but it occurs for stack structures too.
 111  */
 112 class SimdFInt32
 113 {
 114     public:
 115         SimdFInt32() {}
 116
 117         //! \brief Construct from scalar
 118         SimdFInt32(std::int32_t i) { simdInternal_.fill(i); }
 119
 120         /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 121          *
 122          * This has to be public to enable usage in combination with static inline
 123          * functions, but it should never, EVER, be accessed by any code outside
 124          * the corresponding implementation directory since the type will depend
 125          * on the architecture.
 126          */
 127         std::array<std::int32_t, GMX_SIMD_FINT32_WIDTH>  simdInternal_;
 128 };
 129
 130 /*! \libinternal \brief Boolean type for float SIMD data.
 131  *
 132  *  Available if GMX_SIMD_HAVE_FLOAT is 1.
 133  *
 134  * \note This variable cannot be placed inside other structures or classes, since
 135  *       some compilers (including at least clang-3.7) appear to lose the
 136  *       alignment. This is likely particularly severe when allocating such
 137  *       memory on the heap, but it occurs for stack structures too.
 138  */
 139 class SimdFBool
 140 {
 141     public:
 142         SimdFBool() {}
 143
 144         //! \brief Construct from scalar
 145         SimdFBool(bool b) { simdInternal_.fill(b); }
 146
 147         /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 148          *
 149          * This has to be public to enable usage in combination with static inline
 150          * functions, but it should never, EVER, be accessed by any code outside
 151          * the corresponding implementation directory since the type will depend
 152          * on the architecture.
 153          */
 154         std::array<bool, GMX_SIMD_FLOAT_WIDTH>  simdInternal_;
 155 };
 156
 157 /*! \libinternal \brief Boolean type for integer datatypes corresponding to float SIMD.
 158  *
 159  * Available if GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
 160  *
 161  * \note This variable cannot be placed inside other structures or classes, since
 162  *       some compilers (including at least clang-3.7) appear to lose the
 163  *       alignment. This is likely particularly severe when allocating such
 164  *       memory on the heap, but it occurs for stack structures too.
 165  */
 166 class SimdFIBool
 167 {
 168     public:
 169         SimdFIBool() {}
 170
 171         //! \brief Construct from scalar
 172         SimdFIBool(bool b) { simdInternal_.fill(b); }
 173
 174         /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 175          *
 176          * This has to be public to enable usage in combination with static inline
 177          * functions, but it should never, EVER, be accessed by any code outside
 178          * the corresponding implementation directory since the type will depend
 179          * on the architecture.
 180          */
 181         std::array<bool, GMX_SIMD_FINT32_WIDTH>  simdInternal_;
 182 };
 183
 184 /*! \}
 185  *
 186  * \name SIMD implementation load/store operations for single precision floating point
 187  * \{
 188  */
 189
 190 /*! \brief Load \ref GMX_SIMD_FLOAT_WIDTH float numbers from aligned memory.
 191  *
 192  * \param m Pointer to memory aligned to the SIMD width.
 193  * \return SIMD variable with data loaded.
 194  */
 195 static inline SimdFloat gmx_simdcall
 196 simdLoad(const float *m, SimdFloatTag = {})
 197 {
 198     SimdFloat a;
 199
 200     assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(float)) == 0);
 201
 202     std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
 203     return a;
 204 }
 205
 206 /*! \brief Store the contents of SIMD float variable to aligned memory m.
 207  *
 208  * \param[out] m Pointer to memory, aligned to SIMD width.
 209  * \param a SIMD variable to store
 210  */
 211 static inline void gmx_simdcall
 212 store(float *m, SimdFloat a)
 213 {
 214     assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(float)) == 0);
 215
 216     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 217 }
 218
 219 /*! \brief Load SIMD float from unaligned memory.
 220  *
 221  * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
 222  *
 223  * \param m Pointer to memory, no alignment requirement.
 224  * \return SIMD variable with data loaded.
 225  */
 226 static inline SimdFloat gmx_simdcall
 227 simdLoadU(const float *m, SimdFloatTag = {})
 228 {
 229     SimdFloat a;
 230     std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
 231     return a;
 232 }
 233
 234 /*! \brief Store SIMD float to unaligned memory.
 235  *
 236  * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
 237  *
 238  * \param[out] m Pointer to memory, no alignment requirement.
 239  * \param a SIMD variable to store.
 240  */
 241 static inline void gmx_simdcall
 242 storeU(float *m, SimdFloat a)
 243 {
 244     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 245 }
 246
 247 /*! \brief Set all SIMD float variable elements to 0.0.
 248  *
 249  * You should typically just call \ref gmx::setZero(), which uses proxy objects
 250  * internally to handle all types rather than adding the suffix used here.
 251  *
 252  * \return SIMD 0.0F
 253  */
 254 static inline SimdFloat gmx_simdcall
 255 setZeroF()
 256 {
 257     return SimdFloat(0.0F);
 258 }
 259
 260 /*! \} */
 261
 262
 263 /*!
 264  * \name SIMD implementation load/store operations for integers (corresponding to float)
 265  * \{
 266  */
 267
 268 /*! \brief Load aligned SIMD integer data, width corresponds to \ref gmx::SimdFloat.
 269  *
 270  * You should typically just call \ref gmx::load(), which uses proxy objects
 271  * internally to handle all types rather than adding the suffix used here.
 272  *
 273  * \param m Pointer to memory, aligned to (float) integer SIMD width.
 274  * \return SIMD integer variable.
 275  */
 276 static inline SimdFInt32 gmx_simdcall
 277 simdLoad(const std::int32_t * m, SimdFInt32Tag)
 278 {
 279     SimdFInt32 a;
 280
 281     assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(std::int32_t)) == 0);
 282
 283     std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
 284     return a;
 285 };
 286
 287 /*! \brief Store aligned SIMD integer data, width corresponds to \ref gmx::SimdFloat.
 288  *
 289  * \param m Memory aligned to (float) integer SIMD width.
 290  * \param a SIMD variable to store.
 291  */
 292 static inline void gmx_simdcall
 293 store(std::int32_t * m, SimdFInt32 a)
 294 {
 295     assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(std::int32_t)) == 0);
 296
 297     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 298 };
 299
 300 /*! \brief Load unaligned integer SIMD data, width corresponds to \ref gmx::SimdFloat.
 301  *
 302  * You should typically just call \ref gmx::loadU(), which uses proxy objects
 303  * internally to handle all types rather than adding the suffix used here.
 304  *
 305  * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
 306  *
 307  * \param m Pointer to memory, no alignment requirements.
 308  * \return SIMD integer variable.
 309  */
 310 static inline SimdFInt32 gmx_simdcall
 311 simdLoadU(const std::int32_t *m, SimdFInt32Tag)
 312 {
 313     SimdFInt32 a;
 314     std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
 315     return a;
 316 }
 317
 318 /*! \brief Store unaligned SIMD integer data, width corresponds to \ref gmx::SimdFloat.
 319  *
 320  * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
 321  *
 322  * \param m Memory pointer, no alignment requirements.
 323  * \param a SIMD variable to store.
 324  */
 325 static inline void gmx_simdcall
 326 storeU(std::int32_t * m, SimdFInt32 a)
 327 {
 328     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 329 }
 330
 331 /*! \brief Set all SIMD (float) integer variable elements to 0.
 332  *
 333  * You should typically just call \ref gmx::setZero(), which uses proxy objects
 334  * internally to handle all types rather than adding the suffix used here.
 335  *
 336  * \return SIMD 0
 337  */
 338 static inline SimdFInt32 gmx_simdcall
 339 setZeroFI()
 340 {
 341     return SimdFInt32(0);
 342 }
 343
 344 /*! \brief Extract element with index i from \ref gmx::SimdFInt32.
 345  *
 346  * Available if \ref GMX_SIMD_HAVE_FINT32_EXTRACT is 1.
 347  *
 348  * \tparam index Compile-time constant, position to extract (first position is 0)
 349  * \param  a     SIMD variable from which to extract value.
 350  * \return Single integer from position index in SIMD variable.
 351  */
 352 template<int index>
 353 static inline std::int32_t gmx_simdcall
 354 extract(SimdFInt32 a)
 355 {
 356     return a.simdInternal_[index];
 357 }
 358
 359 /*! \}
 360  *
 361  * \name SIMD implementation single precision floating-point bitwise logical operations
 362  * \{
 363  */
 364
 365 /*! \brief Bitwise and for two SIMD float variables.
 366  *
 367  * Supported if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 368  *
 369  * \param a data1
 370  * \param b data2
 371  * \return data1 & data2
 372  */
 373 static inline SimdFloat gmx_simdcall
 374 operator&(SimdFloat a, SimdFloat b)
 375 {
 376     SimdFloat         res;
 377
 378     union
 379     {
 380         float         r;
 381         std::int32_t  i;
 382     }
 383     conv1, conv2;
 384
 385     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 386     {
 387         conv1.r              = a.simdInternal_[i];
 388         conv2.r              = b.simdInternal_[i];
 389         conv1.i              = conv1.i & conv2.i;
 390         res.simdInternal_[i] = conv1.r;
 391     }
 392     return res;
 393 }
 394
 395 /*! \brief Bitwise andnot for SIMD float.
 396  *
 397  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 398  *
 399  * \param a data1
 400  * \param b data2
 401  * \return (~data1) & data2
 402  */
 403 static inline SimdFloat gmx_simdcall
 404 andNot(SimdFloat a, SimdFloat b)
 405 {
 406     SimdFloat         res;
 407
 408     union
 409     {
 410         float         r;
 411         std::int32_t  i;
 412     }
 413     conv1, conv2;
 414
 415     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 416     {
 417         conv1.r              = a.simdInternal_[i];
 418         conv2.r              = b.simdInternal_[i];
 419         conv1.i              = ~conv1.i & conv2.i;
 420         res.simdInternal_[i] = conv1.r;
 421     }
 422     return res;
 423 }
 424
 425 /*! \brief Bitwise or for SIMD float.
 426  *
 427  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 428  *
 429  * \param a data1
 430  * \param b data2
 431  * \return data1 | data2
 432  */
 433 static inline SimdFloat gmx_simdcall
 434 operator|(SimdFloat a, SimdFloat b)
 435 {
 436     SimdFloat         res;
 437
 438     union
 439     {
 440         float         r;
 441         std::int32_t  i;
 442     }
 443     conv1, conv2;
 444
 445     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 446     {
 447         conv1.r              = a.simdInternal_[i];
 448         conv2.r              = b.simdInternal_[i];
 449         conv1.i              = conv1.i | conv2.i;
 450         res.simdInternal_[i] = conv1.r;
 451     }
 452     return res;
 453 }
 454
 455 /*! \brief Bitwise xor for SIMD float.
 456  *
 457  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 458  *
 459  * \param a data1
 460  * \param b data2
 461  * \return data1 ^ data2
 462  */
 463 static inline SimdFloat gmx_simdcall
 464 operator^(SimdFloat a, SimdFloat b)
 465 {
 466     SimdFloat         res;
 467
 468     union
 469     {
 470         float         r;
 471         std::int32_t  i;
 472     }
 473     conv1, conv2;
 474
 475     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 476     {
 477         conv1.r              = a.simdInternal_[i];
 478         conv2.r              = b.simdInternal_[i];
 479         conv1.i              = conv1.i ^ conv2.i;
 480         res.simdInternal_[i] = conv1.r;
 481     }
 482     return res;
 483 }
 484
 485 /*! \}
 486  *
 487  * \name SIMD implementation single precision floating-point arithmetics
 488  * \{
 489  */
 490
 491 /*! \brief Add two float SIMD variables.
 492  *
 493  * \param a term1
 494  * \param b term2
 495  * \return a+b
 496  */
 497 static inline SimdFloat gmx_simdcall
 498 operator+(SimdFloat a, SimdFloat b)
 499 {
 500     SimdFloat         res;
 501
 502     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 503     {
 504         res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
 505     }
 506     return res;
 507 }
 508
 509 /*! \brief Subtract two float SIMD variables.
 510  *
 511  * \param a term1
 512  * \param b term2
 513  * \return a-b
 514  */
 515 static inline SimdFloat gmx_simdcall
 516 operator-(SimdFloat a, SimdFloat b)
 517 {
 518     SimdFloat         res;
 519
 520     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 521     {
 522         res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
 523     }
 524     return res;
 525 }
 526
 527 /*! \brief SIMD single precision negate.
 528  *
 529  * \param a SIMD double precision value
 530  * \return -a
 531  */
 532 static inline SimdFloat gmx_simdcall
 533 operator-(SimdFloat a)
 534 {
 535     SimdFloat         res;
 536
 537     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 538     {
 539         res.simdInternal_[i] = -a.simdInternal_[i];
 540     }
 541     return res;
 542 }
 543
 544 /*! \brief Multiply two float SIMD variables.
 545  *
 546  * \param a factor1
 547  * \param b factor2
 548  * \return a*b.
 549  */
 550 static inline SimdFloat gmx_simdcall
 551 operator*(SimdFloat a, SimdFloat b)
 552 {
 553     SimdFloat         res;
 554
 555     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 556     {
 557         res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
 558     }
 559     return res;
 560 }
 561
 562 /*! \brief SIMD float Fused-multiply-add. Result is a*b+c.
 563  *
 564  * \param a factor1
 565  * \param b factor2
 566  * \param c term
 567  * \return a*b+c
 568  */
 569 static inline SimdFloat gmx_simdcall
 570 fma(SimdFloat a, SimdFloat b, SimdFloat c)
 571 {
 572     return a*b+c;
 573 }
 574
 575 /*! \brief SIMD float Fused-multiply-subtract. Result is a*b-c.
 576  *
 577  * \param a factor1
 578  * \param b factor2
 579  * \param c term
 580  * \return a*b-c
 581  */
 582 static inline SimdFloat gmx_simdcall
 583 fms(SimdFloat a, SimdFloat b, SimdFloat c)
 584 {
 585     return a*b-c;
 586 }
 587
 588 /*! \brief SIMD float Fused-negated-multiply-add. Result is -a*b+c.
 589  *
 590  * \param a factor1
 591  * \param b factor2
 592  * \param c term
 593  * \return -a*b+c
 594  */
 595 static inline SimdFloat gmx_simdcall
 596 fnma(SimdFloat a, SimdFloat b, SimdFloat c)
 597 {
 598     return c-a*b;
 599 }
 600
 601 /*! \brief SIMD float Fused-negated-multiply-subtract. Result is -a*b-c.
 602  *
 603  * \param a factor1
 604  * \param b factor2
 605  * \param c term
 606  * \return -a*b-c
 607  */
 608 static inline SimdFloat gmx_simdcall
 609 fnms(SimdFloat a, SimdFloat b, SimdFloat c)
 610 {
 611     return -a*b-c;
 612 }
 613
 614 /*! \brief SIMD float 1.0/sqrt(x) lookup.
 615  *
 616  * This is a low-level instruction that should only be called from routines
 617  * implementing the inverse square root in simd_math.h.
 618  *
 619  * \param x Argument, x>0
 620  * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
 621  */
 622 static inline SimdFloat gmx_simdcall
 623 rsqrt(SimdFloat x)
 624 {
 625     SimdFloat         res;
 626
 627     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 628     {
 629         res.simdInternal_[i] = 1.0F / std::sqrt(x.simdInternal_[i]);
 630     }
 631     return res;
 632 };
 633
 634 /*! \brief SIMD float 1.0/x lookup.
 635  *
 636  * This is a low-level instruction that should only be called from routines
 637  * implementing the reciprocal in simd_math.h.
 638  *
 639  * \param x Argument, x!=0
 640  * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
 641  */
 642 static inline SimdFloat gmx_simdcall
 643 rcp(SimdFloat x)
 644 {
 645     SimdFloat         res;
 646
 647     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 648     {
 649         res.simdInternal_[i] = 1.0F / x.simdInternal_[i];
 650     }
 651     return res;
 652 };
 653
 654 /*! \brief Add two float SIMD variables, masked version.
 655  *
 656  * \param a term1
 657  * \param b term2
 658  * \param m mask
 659  * \return a+b where mask is true, a otherwise.
 660  */
 661 static inline SimdFloat gmx_simdcall
 662 maskAdd(SimdFloat a, SimdFloat b, SimdFBool m)
 663 {
 664     SimdFloat         res;
 665
 666     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 667     {
 668         res.simdInternal_[i] = a.simdInternal_[i] + (m.simdInternal_[i] ? b.simdInternal_[i] : 0.0F);
 669     }
 670     return res;
 671 }
 672
 673 /*! \brief Multiply two float SIMD variables, masked version.
 674  *
 675  * \param a factor1
 676  * \param b factor2
 677  * \param m mask
 678  * \return a*b where mask is true, 0.0 otherwise.
 679  */
 680 static inline SimdFloat gmx_simdcall
 681 maskzMul(SimdFloat a, SimdFloat b, SimdFBool m)
 682 {
 683     SimdFloat         res;
 684
 685     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 686     {
 687         res.simdInternal_[i] = m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i]) : 0.0F;
 688     }
 689     return res;
 690 }
 691
 692 /*! \brief SIMD float fused multiply-add, masked version.
 693  *
 694  * \param a factor1
 695  * \param b factor2
 696  * \param c term
 697  * \param m mask
 698  * \return a*b+c where mask is true, 0.0 otherwise.
 699  */
 700 static inline SimdFloat gmx_simdcall
 701 maskzFma(SimdFloat a, SimdFloat b, SimdFloat c, SimdFBool m)
 702 {
 703     SimdFloat         res;
 704
 705     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 706     {
 707         res.simdInternal_[i] = m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i] + c.simdInternal_[i]) : 0.0F;
 708     }
 709     return res;
 710 }
 711
 712 /*! \brief SIMD float 1.0/sqrt(x) lookup, masked version.
 713  *
 714  * This is a low-level instruction that should only be called from routines
 715  * implementing the inverse square root in simd_math.h.
 716  *
 717  * \param x Argument, x>0 for entries where mask is true.
 718  * \param m Mask
 719  * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
 720  *         The result for masked-out entries will be 0.0.
 721  */
 722 static inline SimdFloat gmx_simdcall
 723 maskzRsqrt(SimdFloat x, SimdFBool m)
 724 {
 725     SimdFloat         res;
 726
 727     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 728     {
 729         res.simdInternal_[i] = (m.simdInternal_[i] != 0) ? 1.0F / std::sqrt(x.simdInternal_[i]) : 0.0F;
 730     }
 731     return res;
 732 }
 733
 734 /*! \brief SIMD float 1.0/x lookup, masked version.
 735  *
 736  * This is a low-level instruction that should only be called from routines
 737  * implementing the reciprocal in simd_math.h.
 738  *
 739  * \param x Argument, x>0 for entries where mask is true.
 740  * \param m Mask
 741  * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
 742  *         The result for masked-out entries will be 0.0.
 743  */
 744 static inline SimdFloat gmx_simdcall
 745 maskzRcp(SimdFloat x, SimdFBool m)
 746 {
 747     SimdFloat         res;
 748
 749     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 750     {
 751         res.simdInternal_[i] = (m.simdInternal_[i] != 0) ? 1.0F / x.simdInternal_[i] : 0.0F;
 752     }
 753     return res;
 754 }
 755
 756 /*! \brief SIMD float Floating-point abs().
 757  *
 758  * \param a any floating point values
 759  * \return abs(a) for each element.
 760  */
 761 static inline SimdFloat gmx_simdcall
 762 abs(SimdFloat a)
 763 {
 764     SimdFloat         res;
 765
 766     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 767     {
 768         res.simdInternal_[i] = std::abs(a.simdInternal_[i]);
 769     }
 770     return res;
 771 }
 772
 773 /*! \brief Set each SIMD float element to the largest from two variables.
 774  *
 775  * \param a Any floating-point value
 776  * \param b Any floating-point value
 777  * \return max(a,b) for each element.
 778  */
 779 static inline SimdFloat gmx_simdcall
 780 max(SimdFloat a, SimdFloat b)
 781 {
 782     SimdFloat         res;
 783
 784     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 785     {
 786         res.simdInternal_[i] = std::max(a.simdInternal_[i], b.simdInternal_[i]);
 787     }
 788     return res;
 789 }
 790
 791 /*! \brief Set each SIMD float element to the smallest from two variables.
 792  *
 793  * \param a Any floating-point value
 794  * \param b Any floating-point value
 795  * \return min(a,b) for each element.
 796  */
 797 static inline SimdFloat gmx_simdcall
 798 min(SimdFloat a, SimdFloat b)
 799 {
 800     SimdFloat         res;
 801
 802     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 803     {
 804         res.simdInternal_[i] = std::min(a.simdInternal_[i], b.simdInternal_[i]);
 805     }
 806     return res;
 807 }
 808
 809 /*! \brief SIMD float round to nearest integer value (in floating-point format).
 810  *
 811  * \param a Any floating-point value
 812  * \return The nearest integer, represented in floating-point format.
 813  *
 814  * \note Round mode is implementation defined. The only guarantee is that it
 815  * is consistent between rounding functions (round, cvtR2I).
 816  */
 817 static inline SimdFloat gmx_simdcall
 818 round(SimdFloat a)
 819 {
 820     SimdFloat         res;
 821
 822     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 823     {
 824         res.simdInternal_[i] = std::round(a.simdInternal_[i]);
 825     }
 826     return res;
 827 }
 828
 829 /*! \brief Truncate SIMD float, i.e. round towards zero - common hardware instruction.
 830  *
 831  * \param a Any floating-point value
 832  * \return Integer rounded towards zero, represented in floating-point format.
 833  *
 834  * \note This is truncation towards zero, not floor(). The reason for this
 835  * is that truncation is virtually always present as a dedicated hardware
 836  * instruction, but floor() frequently isn't.
 837  */
 838 static inline SimdFloat gmx_simdcall
 839 trunc(SimdFloat a)
 840 {
 841     SimdFloat         res;
 842
 843     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 844     {
 845         res.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
 846     }
 847     return res;
 848 }
 849
 850 /*! \brief Extract (integer) exponent and fraction from single precision SIMD.
 851  *
 852  * \param       value     Floating-point value to extract from
 853  * \param[out]  exponent  Returned exponent of value, integer SIMD format.
 854  * \return      Fraction of value, floating-point SIMD format.
 855  */
 856 static inline SimdFloat gmx_simdcall
 857 frexp(SimdFloat value, SimdFInt32 * exponent)
 858 {
 859     SimdFloat fraction;
 860
 861     for (std::size_t i = 0; i < fraction.simdInternal_.size(); i++)
 862     {
 863         fraction.simdInternal_[i] = std::frexp(value.simdInternal_[i], &exponent->simdInternal_[i]);
 864     }
 865     return fraction;
 866 }
 867
 868 /*! \brief Multiply a SIMD float value by the number 2 raised to an exp power.
 869  *
 870  * \tparam opt By default, this routine will return zero for input arguments
 871  *             that are so small they cannot be reproduced in the current
 872  *             precision. If the unsafe math optimization template parameter
 873  *             setting is used, these tests are skipped, and the result will
 874  *             be undefined (possible even NaN). This might happen below -127
 875  *             in single precision or -1023 in double, although some
 876  *             might use denormal support to extend the range.
 877  *
 878  * \param value Floating-point number to multiply with new exponent
 879  * \param exponent Integer that will not overflow as 2^exponent.
 880  * \return value*2^exponent
 881  */
 882 template <MathOptimization opt = MathOptimization::Safe>
 883 static inline SimdFloat gmx_simdcall
 884 ldexp(SimdFloat value, SimdFInt32 exponent)
 885 {
 886     SimdFloat           res;
 887
 888     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 889     {
 890         // std::ldexp already takes care of clamping arguments, so we do not
 891         // need to do anything in the reference implementation
 892         res.simdInternal_[i] = std::ldexp(value.simdInternal_[i], exponent.simdInternal_[i]);
 893     }
 894     return res;
 895 }
 896
 897 /*! \brief Return sum of all elements in SIMD float variable.
 898  *
 899  * \param a SIMD variable to reduce/sum.
 900  * \return The sum of all elements in the argument variable.
 901  *
 902  */
 903 static inline float gmx_simdcall
 904 reduce(SimdFloat a)
 905 {
 906     float sum = 0.0F;
 907
 908     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
 909     {
 910         sum += a.simdInternal_[i];
 911     }
 912     return sum;
 913 }
 914
 915 /*! \}
 916  *
 917  * \name SIMD implementation single precision floating-point comparisons, boolean, selection.
 918  * \{
 919  */
 920
 921 /*! \brief SIMD a==b for single SIMD.
 922  *
 923  * \param a value1
 924  * \param b value2
 925  * \return Each element of the boolean will be set to true if a==b.
 926  *
 927  * Beware that exact floating-point comparisons are difficult.
 928  */
 929 static inline SimdFBool gmx_simdcall
 930 operator==(SimdFloat a, SimdFloat b)
 931 {
 932     SimdFBool         res;
 933
 934     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 935     {
 936         res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
 937     }
 938     return res;
 939 }
 940
 941 /*! \brief SIMD a!=b for single SIMD.
 942  *
 943  * \param a value1
 944  * \param b value2
 945  * \return Each element of the boolean will be set to true if a!=b.
 946  *
 947  * Beware that exact floating-point comparisons are difficult.
 948  */
 949 static inline SimdFBool gmx_simdcall
 950 operator!=(SimdFloat a, SimdFloat b)
 951 {
 952     SimdFBool         res;
 953
 954     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 955     {
 956         res.simdInternal_[i] = (a.simdInternal_[i] != b.simdInternal_[i]);
 957     }
 958     return res;
 959 }
 960
 961 /*! \brief SIMD a<b for single SIMD.
 962  *
 963  * \param a value1
 964  * \param b value2
 965  * \return Each element of the boolean will be set to true if a<b.
 966  */
 967 static inline SimdFBool gmx_simdcall
 968 operator<(SimdFloat a, SimdFloat b)
 969 {
 970     SimdFBool          res;
 971
 972     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 973     {
 974         res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
 975     }
 976     return res;
 977 }
 978
 979 /*! \brief SIMD a<=b for single SIMD.
 980  *
 981  * \param a value1
 982  * \param b value2
 983  * \return Each element of the boolean will be set to true if a<=b.
 984  */
 985 static inline SimdFBool gmx_simdcall
 986 operator<=(SimdFloat a, SimdFloat b)
 987 {
 988     SimdFBool          res;
 989
 990     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 991     {
 992         res.simdInternal_[i] = (a.simdInternal_[i] <= b.simdInternal_[i]);
 993     }
 994     return res;
 995 }
 996
 997 /*! \brief Return true if any bits are set in the single precision SIMD.
 998  *
 999  * This function is used to handle bitmasks, mainly for exclusions in the
1000  * inner kernels. Note that it will return true even for -0.0F (sign bit set),
1001  * so it is not identical to not-equal.
1002  *
1003  * \param a value
1004  * \return Each element of the boolean will be true if any bit in a is nonzero.
1005  */
1006 static inline SimdFBool gmx_simdcall
1007 testBits(SimdFloat a)
1008 {
1009     SimdFBool         res;
1010
1011     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1012     {
1013         union
1014         {
1015             std::uint32_t i;
1016             float         f;
1017         } conv;
1018
1019         conv.f               = a.simdInternal_[i];
1020         res.simdInternal_[i] = (conv.i != 0);
1021     }
1022     return res;
1023 }
1024
1025 /*! \brief Logical \a and on single precision SIMD booleans.
1026  *
1027  * \param a logical vars 1
1028  * \param b logical vars 2
1029  * \return For each element, the result boolean is true if a \& b are true.
1030  *
1031  * \note This is not necessarily a bitwise operation - the storage format
1032  * of booleans is implementation-dependent.
1033  */
1034 static inline SimdFBool gmx_simdcall
1035 operator&&(SimdFBool a, SimdFBool b)
1036 {
1037     SimdFBool         res;
1038
1039     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1040     {
1041         res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1042     }
1043     return res;
1044 }
1045
1046 /*! \brief Logical \a or on single precision SIMD booleans.
1047  *
1048  * \param a logical vars 1
1049  * \param b logical vars 2
1050  * \return For each element, the result boolean is true if a or b is true.
1051  *
1052  * Note that this is not necessarily a bitwise operation - the storage format
1053  * of booleans is implementation-dependent.
1054  *
1055  \ */
1056 static inline SimdFBool gmx_simdcall
1057 operator||(SimdFBool a, SimdFBool b)
1058 {
1059     SimdFBool         res;
1060
1061     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1062     {
1063         res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1064     }
1065     return res;
1066 }
1067
1068 /*! \brief Returns non-zero if any of the boolean in SIMD a is True, otherwise 0.
1069  *
1070  * \param a Logical variable.
1071  * \return true if any element in a is true, otherwise false.
1072  *
1073  * The actual return value for truth will depend on the architecture,
1074  * so any non-zero value is considered truth.
1075  */
1076 static inline bool gmx_simdcall
1077 anyTrue(SimdFBool a)
1078 {
1079     bool res = false;
1080
1081     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1082     {
1083         res = res || a.simdInternal_[i];
1084     }
1085     return res;
1086 }
1087
1088 /*! \brief Select from single precision SIMD variable where boolean is true.
1089  *
1090  * \param a Floating-point variable to select from
1091  * \param mask Boolean selector
1092  * \return  For each element, a is selected for true, 0 for false.
1093  */
1094 static inline SimdFloat gmx_simdcall
1095 selectByMask(SimdFloat a, SimdFBool mask)
1096 {
1097     SimdFloat          res;
1098
1099     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1100     {
1101         res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0F;
1102     }
1103     return res;
1104 }
1105
1106 /*! \brief Select from single precision SIMD variable where boolean is false.
1107  *
1108  * \param a Floating-point variable to select from
1109  * \param mask Boolean selector
1110  * \return  For each element, a is selected for false, 0 for true (sic).
1111  */
1112 static inline SimdFloat gmx_simdcall
1113 selectByNotMask(SimdFloat a, SimdFBool mask)
1114 {
1115     SimdFloat          res;
1116
1117     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1118     {
1119         res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0F : a.simdInternal_[i];
1120     }
1121     return res;
1122 }
1123
1124 /*! \brief Vector-blend SIMD float selection.
1125  *
1126  * \param a First source
1127  * \param b Second source
1128  * \param sel Boolean selector
1129  * \return For each element, select b if sel is true, a otherwise.
1130  */
1131 static inline SimdFloat gmx_simdcall
1132 blend(SimdFloat a, SimdFloat b, SimdFBool sel)
1133 {
1134     SimdFloat         res;
1135
1136     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1137     {
1138         res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1139     }
1140     return res;
1141 }
1142
1143 /*! \}
1144  *
1145  * \name SIMD implementation integer (corresponding to float) bitwise logical operations
1146  * \{
1147  */
1148
1149 /*! \brief Integer SIMD bitwise and.
1150  *
1151  * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1152  *
1153  * \note You can \a not use this operation directly to select based on a boolean
1154  * SIMD variable, since booleans are separate from integer SIMD. If that
1155  * is what you need, have a look at \ref gmx::selectByMask instead.
1156  *
1157  * \param a first integer SIMD
1158  * \param b second integer SIMD
1159  * \return a \& b (bitwise and)
1160  */
1161 static inline SimdFInt32 gmx_simdcall
1162 operator&(SimdFInt32 a, SimdFInt32 b)
1163 {
1164     SimdFInt32         res;
1165
1166     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1167     {
1168         res.simdInternal_[i] = a.simdInternal_[i] & b.simdInternal_[i];
1169     }
1170     return res;
1171 }
1172
1173 /*! \brief Integer SIMD bitwise not/complement.
1174  *
1175  * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1176  *
1177  * \note You can \a not use this operation directly to select based on a boolean
1178  * SIMD variable, since booleans are separate from integer SIMD. If that
1179  * is what you need, have a look at \ref gmx::selectByMask instead.
1180  *
1181  * \param a integer SIMD
1182  * \param b integer SIMD
1183  * \return (~a) & b
1184  */
1185 static inline SimdFInt32 gmx_simdcall
1186 andNot(SimdFInt32 a, SimdFInt32 b)
1187 {
1188     SimdFInt32         res;
1189
1190     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1191     {
1192         res.simdInternal_[i] = ~a.simdInternal_[i] & b.simdInternal_[i];
1193     }
1194     return res;
1195 }
1196
1197 /*! \brief Integer SIMD bitwise or.
1198  *
1199  * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1200  *
1201  * \param a first integer SIMD
1202  * \param b second integer SIMD
1203  * \return a \| b (bitwise or)
1204  */
1205 static inline SimdFInt32 gmx_simdcall
1206 operator|(SimdFInt32 a, SimdFInt32 b)
1207 {
1208     SimdFInt32         res;
1209
1210     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1211     {
1212         res.simdInternal_[i] = a.simdInternal_[i] | b.simdInternal_[i];
1213     }
1214     return res;
1215 }
1216
1217 /*! \brief Integer SIMD bitwise xor.
1218  *
1219  * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1220  *
1221  * \param a first integer SIMD
1222  * \param b second integer SIMD
1223  * \return a ^ b (bitwise xor)
1224  */
1225 static inline SimdFInt32 gmx_simdcall
1226 operator^(SimdFInt32 a, SimdFInt32 b)
1227 {
1228     SimdFInt32         res;
1229
1230     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1231     {
1232         res.simdInternal_[i] = a.simdInternal_[i] ^ b.simdInternal_[i];
1233     }
1234     return res;
1235 }
1236
1237 /*! \}
1238  *
1239  * \name SIMD implementation integer (corresponding to float) arithmetics
1240  * \{
1241  */
1242
1243 /*! \brief Add SIMD integers.
1244  *
1245  * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
1246  *  or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1.
1247  *
1248  * \param a term1
1249  * \param b term2
1250  * \return a+b
1251  */
1252 static inline SimdFInt32 gmx_simdcall
1253 operator+(SimdFInt32 a, SimdFInt32 b)
1254 {
1255     SimdFInt32         res;
1256
1257     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1258     {
1259         res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
1260     }
1261     return res;
1262 }
1263
1264 /*! \brief Subtract SIMD integers.
1265  *
1266  * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
1267  *  or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1.
1268  *
1269  * \param a term1
1270  * \param b term2
1271  * \return a-b
1272  */
1273 static inline SimdFInt32 gmx_simdcall
1274 operator-(SimdFInt32 a, SimdFInt32 b)
1275 {
1276     SimdFInt32         res;
1277
1278     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1279     {
1280         res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
1281     }
1282     return res;
1283 }
1284
1285 /*! \brief Multiply SIMD integers.
1286  *
1287  * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
1288  *  or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1.
1289  *
1290  * \param a factor1
1291  * \param b factor2
1292  * \return a*b.
1293  *
1294  * \note Only the low 32 bits are retained, so this can overflow.
1295  */
1296 static inline SimdFInt32 gmx_simdcall
1297 operator*(SimdFInt32 a, SimdFInt32 b)
1298 {
1299     SimdFInt32         res;
1300
1301     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1302     {
1303         res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
1304     }
1305     return res;
1306 }
1307
1308 /*! \}
1309  *
1310  * \name SIMD implementation integer (corresponding to float) comparisons, boolean, selection
1311  * \{
1312  */
1313
1314 /*! \brief Equality comparison of two integers corresponding to float values.
1315  *
1316  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1317  *
1318  * \param a SIMD integer1
1319  * \param b SIMD integer2
1320  * \return SIMD integer boolean with true for elements where a==b
1321  */
1322 static inline SimdFIBool gmx_simdcall
1323 operator==(SimdFInt32 a, SimdFInt32 b)
1324 {
1325     SimdFIBool         res;
1326
1327     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1328     {
1329         res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
1330     }
1331     return res;
1332 }
1333
1334 /*! \brief Less-than comparison of two SIMD integers corresponding to float values.
1335  *
1336  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1337  *
1338  * \param a SIMD integer1
1339  * \param b SIMD integer2
1340  * \return SIMD integer boolean with true for elements where a<b
1341  */
1342 static inline SimdFIBool gmx_simdcall
1343 operator<(SimdFInt32 a, SimdFInt32 b)
1344 {
1345     SimdFIBool         res;
1346
1347     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1348     {
1349         res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
1350     }
1351     return res;
1352 }
1353
1354 /*! \brief Check if any bit is set in each element
1355  *
1356  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1357  *
1358  * \param a SIMD integer
1359  * \return SIMD integer boolean with true for elements where any bit is set
1360  */
1361 static inline SimdFIBool gmx_simdcall
1362 testBits(SimdFInt32 a)
1363 {
1364     SimdFIBool         res;
1365
1366     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1367     {
1368         res.simdInternal_[i] = (a.simdInternal_[i] != 0);
1369     }
1370     return res;
1371 }
1372
1373 /*! \brief Logical AND on SimdFIBool.
1374  *
1375  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1376  *
1377  * \param a SIMD boolean 1
1378  * \param b SIMD boolean 2
1379  * \return True for elements where both a and b are true.
1380  */
1381 static inline SimdFIBool gmx_simdcall
1382 operator&&(SimdFIBool a, SimdFIBool b)
1383 {
1384     SimdFIBool        res;
1385
1386     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1387     {
1388         res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1389     }
1390     return res;
1391 }
1392
1393 /*! \brief Logical OR on SimdFIBool.
1394  *
1395  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1396  *
1397  * \param a SIMD boolean 1
1398  * \param b SIMD boolean 2
1399  * \return True for elements where both a and b are true.
1400  */
1401 static inline SimdFIBool gmx_simdcall
1402 operator||(SimdFIBool a, SimdFIBool b)
1403 {
1404     SimdFIBool         res;
1405
1406     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1407     {
1408         res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1409     }
1410     return res;
1411 }
1412
1413 /*! \brief Returns true if any of the boolean in x is True, otherwise 0.
1414  *
1415  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1416  *
1417  * The actual return value for "any true" will depend on the architecture.
1418  * Any non-zero value should be considered truth.
1419  *
1420  * \param a SIMD boolean
1421  * \return True if any of the elements in a is true, otherwise 0.
1422  */
1423 static inline bool gmx_simdcall
1424 anyTrue(SimdFIBool a)
1425 {
1426     bool res = false;
1427
1428     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1429     {
1430         res = res || a.simdInternal_[i];
1431     }
1432     return res;
1433 }
1434
1435 /*! \brief Select from \ref gmx::SimdFInt32 variable where boolean is true.
1436  *
1437  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1438  *
1439  * \param a SIMD integer to select from
1440  * \param mask Boolean selector
1441  * \return Elements from a where sel is true, 0 otherwise.
1442  */
1443 static inline SimdFInt32 gmx_simdcall
1444 selectByMask(SimdFInt32 a, SimdFIBool mask)
1445 {
1446     SimdFInt32         res;
1447
1448     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1449     {
1450         res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0F;
1451     }
1452     return res;
1453 }
1454
1455 /*! \brief Select from \ref gmx::SimdFInt32 variable where boolean is false.
1456  *
1457  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1458  *
1459  * \param a SIMD integer to select from
1460  * \param mask Boolean selector
1461  * \return Elements from a where sel is false, 0 otherwise (sic).
1462  */
1463 static inline SimdFInt32 gmx_simdcall
1464 selectByNotMask(SimdFInt32 a, SimdFIBool mask)
1465 {
1466     SimdFInt32         res;
1467
1468     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1469     {
1470         res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0F : a.simdInternal_[i];
1471     }
1472     return res;
1473 }
1474
1475 /*! \brief Vector-blend SIMD integer selection.
1476  *
1477  * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1478  *
1479  * \param a First source
1480  * \param b Second source
1481  * \param sel Boolean selector
1482  * \return For each element, select b if sel is true, a otherwise.
1483  */
1484 static inline SimdFInt32 gmx_simdcall
1485 blend(SimdFInt32 a, SimdFInt32 b, SimdFIBool sel)
1486 {
1487     SimdFInt32        res;
1488
1489     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1490     {
1491         res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1492     }
1493     return res;
1494 }
1495
1496 /*! \}
1497  *
1498  * \name SIMD implementation conversion operations
1499  * \{
1500  */
1501
1502 /*! \brief Round single precision floating point to integer.
1503  *
1504  * \param a SIMD floating-point
1505  * \return SIMD integer, rounded to nearest integer.
1506  *
1507  * \note Round mode is implementation defined. The only guarantee is that it
1508  * is consistent between rounding functions (round, cvtR2I).
1509  */
1510 static inline SimdFInt32 gmx_simdcall
1511 cvtR2I(SimdFloat a)
1512 {
1513     SimdFInt32         b;
1514
1515     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1516     {
1517         b.simdInternal_[i] = std::round(a.simdInternal_[i]);
1518     }
1519     return b;
1520 };
1521
1522 /*! \brief Truncate single precision floating point to integer.
1523  *
1524  * \param a SIMD floating-point
1525  * \return SIMD integer, truncated to nearest integer.
1526  */
1527 static inline SimdFInt32 gmx_simdcall
1528 cvttR2I(SimdFloat a)
1529 {
1530     SimdFInt32         b;
1531
1532     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1533     {
1534         b.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
1535     }
1536     return b;
1537 };
1538
1539 /*! \brief Convert integer to single precision floating point.
1540  *
1541  * \param a SIMD integer
1542  * \return SIMD floating-point
1543  */
1544 static inline SimdFloat gmx_simdcall
1545 cvtI2R(SimdFInt32 a)
1546 {
1547     SimdFloat         b;
1548
1549     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1550     {
1551         b.simdInternal_[i] = a.simdInternal_[i];
1552     }
1553     return b;
1554 };
1555
1556 /*! \brief Convert from single precision boolean to corresponding integer boolean
1557  *
1558  * \param a SIMD floating-point boolean
1559  * \return SIMD integer boolean
1560  */
1561 static inline SimdFIBool gmx_simdcall
1562 cvtB2IB(SimdFBool a)
1563 {
1564     SimdFIBool         b;
1565
1566     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1567     {
1568         b.simdInternal_[i] = a.simdInternal_[i];
1569     }
1570     return b;
1571 };
1572
1573 /*! \brief Convert from integer boolean to corresponding single precision boolean
1574  *
1575  * \param a SIMD integer boolean
1576  * \return SIMD floating-point boolean
1577  */
1578 static inline SimdFBool gmx_simdcall
1579 cvtIB2B(SimdFIBool a)
1580 {
1581     SimdFBool         b;
1582
1583     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1584     {
1585         b.simdInternal_[i] = a.simdInternal_[i];
1586     }
1587     return b;
1588 };
1589
1590 /*! \} */
1591
1592 /*! \} */
1593 /*! \endcond */
1594
1595 }      // namespace gmx
1596
1597 #endif // GMX_SIMD_IMPL_REFERENCE_SIMD_FLOAT_H