src/gromacs/simd/impl_reference/impl_reference_simd_double.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H
  37 #define GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H
  38
  39 /*! \libinternal \file
  40  *
  41  * \brief Reference implementation, SIMD double precision.
  42  *
  43  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  44  *
  45  * \ingroup module_simd
  46  */
  47
  48 #include "config.h"
  49
  50 #include <cassert>
  51 #include <cmath>
  52 #include <cstddef>
  53 #include <cstdint>
  54
  55 #include <algorithm>
  56 #include <array>
  57
  58 #include "gromacs/math/utilities.h"
  59 #include "gromacs/utility/fatalerror.h"
  60
  61 #include "impl_reference_definitions.h"
  62 #include "impl_reference_simd_float.h"
  63
  64 namespace gmx
  65 {
  66
  67 /*! \cond libapi */
  68 /*! \addtogroup module_simd */
  69 /*! \{ */
  70
  71 /* \name SIMD implementation data types
  72  * \{
  73  */
  74
  75 /*! \libinternal \brief Double SIMD variable. Available if GMX_SIMD_HAVE_DOUBLE is 1.
  76  *
  77  * \note This variable cannot be placed inside other structures or classes, since
  78  *       some compilers (including at least clang-3.7) appear to lose the
  79  *       alignment. This is likely particularly severe when allocating such
  80  *       memory on the heap, but it occurs for stack structures too.
  81  */
  82 class SimdDouble
  83 {
  84     public:
  85         SimdDouble() {}
  86
  87         //! \brief Construct from scalar
  88         SimdDouble(double d) { simdInternal_.fill(d); }
  89
  90         /*! \brief Internal SIMD data. Implementation dependent, don't touch.
  91          *
  92          * This has to be public to enable usage in combination with static inline
  93          * functions, but it should never, EVER, be accessed by any code outside
  94          * the corresponding implementation directory since the type will depend
  95          * on the architecture.
  96          */
  97         std::array<double, GMX_SIMD_DOUBLE_WIDTH>  simdInternal_;
  98 };
  99
 100 /*! \libinternal \brief Integer SIMD variable type to use for conversions to/from double.
 101  *
 102  * Available if GMX_SIMD_HAVE_DOUBLE is 1.
 103  *
 104  * \note The integer SIMD type will always be available, but on architectures
 105  * that do not have any real integer SIMD support it might be defined as the
 106  * floating-point type. This will work fine, since there are separate defines
 107  * for whether the implementation can actually do any operations on integer
 108  * SIMD types.
 109  *
 110  * \note This variable cannot be placed inside other structures or classes, since
 111  *       some compilers (including at least clang-3.7) appear to lose the
 112  *       alignment. This is likely particularly severe when allocating such
 113  *       memory on the heap, but it occurs for stack structures too.
 114  */
 115 class SimdDInt32
 116 {
 117     public:
 118         SimdDInt32() {}
 119
 120         //! \brief Construct from scalar
 121         SimdDInt32(std::int32_t i) { simdInternal_.fill(i); }
 122
 123         /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 124          *
 125          * This has to be public to enable usage in combination with static inline
 126          * functions, but it should never, EVER, be accessed by any code outside
 127          * the corresponding implementation directory since the type will depend
 128          * on the architecture.
 129          */
 130         std::array<std::int32_t, GMX_SIMD_DINT32_WIDTH>  simdInternal_;
 131 };
 132
 133 /*! \libinternal \brief Boolean type for double SIMD data.
 134  *
 135  *  Available if GMX_SIMD_HAVE_DOUBLE is 1.
 136  *
 137  * \note This variable cannot be placed inside other structures or classes, since
 138  *       some compilers (including at least clang-3.7) appear to lose the
 139  *       alignment. This is likely particularly severe when allocating such
 140  *       memory on the heap, but it occurs for stack structures too.
 141  */
 142 class SimdDBool
 143 {
 144     public:
 145         SimdDBool() {}
 146
 147         //! \brief Construct from scalar bool
 148         SimdDBool(bool b) { simdInternal_.fill(b); }
 149
 150         /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 151          *
 152          * This has to be public to enable usage in combination with static inline
 153          * functions, but it should never, EVER, be accessed by any code outside
 154          * the corresponding implementation directory since the type will depend
 155          * on the architecture.
 156          */
 157         std::array<bool, GMX_SIMD_DOUBLE_WIDTH>  simdInternal_;
 158 };
 159
 160 /*! \libinternal \brief Boolean type for integer datatypes corresponding to double SIMD.
 161  *
 162  * Available if GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
 163  *
 164  * \note This variable cannot be placed inside other structures or classes, since
 165  *       some compilers (including at least clang-3.7) appear to lose the
 166  *       alignment. This is likely particularly severe when allocating such
 167  *       memory on the heap, but it occurs for stack structures too.
 168  */
 169 class SimdDIBool
 170 {
 171     public:
 172         SimdDIBool() {}
 173
 174         //! \brief Construct from scalar
 175         SimdDIBool(bool b) { simdInternal_.fill(b); }
 176
 177         /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 178          *
 179          * This has to be public to enable usage in combination with static inline
 180          * functions, but it should never, EVER, be accessed by any code outside
 181          * the corresponding implementation directory since the type will depend
 182          * on the architecture.
 183          */
 184         std::array<bool, GMX_SIMD_DINT32_WIDTH>  simdInternal_;
 185 };
 186
 187 /*! \}
 188  *
 189  * \name SIMD implementation load/store operations for double precision floating point
 190  * \{
 191  */
 192
 193 /*! \brief Load \ref GMX_SIMD_DOUBLE_WIDTH numbers from aligned memory.
 194  *
 195  * \param m Pointer to memory aligned to the SIMD width.
 196  * \return SIMD variable with data loaded.
 197  */
 198 static inline SimdDouble gmx_simdcall
 199 simdLoad(const double *m, SimdDoubleTag = {})
 200 {
 201     SimdDouble a;
 202
 203     assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(double)) == 0);
 204
 205     std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
 206     return a;
 207 }
 208
 209 /*! \brief Store the contents of SIMD double variable to aligned memory m.
 210  *
 211  * \param[out] m Pointer to memory, aligned to SIMD width.
 212  * \param a SIMD variable to store
 213  */
 214 static inline void gmx_simdcall
 215 store(double *m, SimdDouble a)
 216 {
 217     assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(double)) == 0);
 218
 219     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 220 }
 221
 222 /*! \brief Load SIMD double from unaligned memory.
 223  *
 224  * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
 225  *
 226  * \param m Pointer to memory, no alignment requirement.
 227  * \return SIMD variable with data loaded.
 228  */
 229 static inline SimdDouble gmx_simdcall
 230 simdLoadU(const double *m, SimdDoubleTag = {})
 231 {
 232     SimdDouble a;
 233     std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
 234     return a;
 235 }
 236
 237 /*! \brief Store SIMD double to unaligned memory.
 238  *
 239  * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
 240  *
 241  * \param[out] m Pointer to memory, no alignment requirement.
 242  * \param a SIMD variable to store.
 243  */
 244 static inline void gmx_simdcall
 245 storeU(double *m, SimdDouble a)
 246 {
 247     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 248 }
 249
 250 /*! \brief Set all SIMD double variable elements to 0.0.
 251  *
 252  * You should typically just call \ref gmx::setZero(), which uses proxy objects
 253  * internally to handle all types rather than adding the suffix used here.
 254  *
 255  * \return SIMD 0.0
 256  */
 257 static inline SimdDouble gmx_simdcall
 258 setZeroD()
 259 {
 260     return SimdDouble(0.0);
 261 }
 262
 263 /*! \}
 264  *
 265  * \name SIMD implementation load/store operations for integers (corresponding to double)
 266  * \{
 267  */
 268
 269 /*! \brief Load aligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
 270  *
 271  * You should typically just call \ref gmx::load(), which uses proxy objects
 272  * internally to handle all types rather than adding the suffix used here.
 273  *
 274  * \param m Pointer to memory, aligned to (double) integer SIMD width.
 275  * \return SIMD integer variable.
 276  */
 277 static inline SimdDInt32 gmx_simdcall
 278 simdLoad(const std::int32_t * m, SimdDInt32Tag)
 279 {
 280     SimdDInt32 a;
 281
 282     assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(std::int32_t)) == 0);
 283
 284     std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
 285     return a;
 286 };
 287
 288 /*! \brief Store aligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
 289  *
 290  * \param m Memory aligned to (double) integer SIMD width.
 291  * \param a SIMD (double) integer variable to store.
 292  */
 293 static inline void gmx_simdcall
 294 store(std::int32_t * m, SimdDInt32 a)
 295 {
 296     assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(std::int32_t)) == 0);
 297
 298     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 299 };
 300
 301 /*! \brief Load unaligned integer SIMD data, width corresponds to \ref gmx::SimdDouble.
 302  *
 303  * You should typically just call \ref gmx::loadU(), which uses proxy objects
 304  * internally to handle all types rather than adding the suffix used here.
 305  *
 306  * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
 307  *
 308  * \param m Pointer to memory, no alignment requirements.
 309  * \return SIMD integer variable.
 310  */
 311 static inline SimdDInt32 gmx_simdcall
 312 simdLoadU(const std::int32_t *m, SimdDInt32Tag)
 313 {
 314     SimdDInt32 a;
 315     std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
 316     return a;
 317 }
 318
 319 /*! \brief Store unaligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
 320  *
 321  * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
 322  *
 323  * \param m Memory pointer, no alignment requirements.
 324  * \param a SIMD (double) integer variable to store.
 325  */
 326 static inline void gmx_simdcall
 327 storeU(std::int32_t * m, SimdDInt32 a)
 328 {
 329     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 330 }
 331
 332 /*! \brief Set all SIMD (double) integer variable elements to 0.
 333  *
 334  * You should typically just call \ref gmx::setZero(), which uses proxy objects
 335  * internally to handle all types rather than adding the suffix used here.
 336  *
 337  * \return SIMD 0
 338  */
 339 static inline SimdDInt32 gmx_simdcall
 340 setZeroDI()
 341 {
 342     return SimdDInt32(0);
 343 }
 344
 345 /*! \brief Extract element with index i from \ref gmx::SimdDInt32.
 346  *
 347  * Available if \ref GMX_SIMD_HAVE_DINT32_EXTRACT is 1.
 348  *
 349  * \tparam index Compile-time constant, position to extract (first position is 0)
 350  * \param  a     SIMD variable from which to extract value.
 351  * \return Single integer from position index in SIMD variable.
 352  */
 353 template<int index>
 354 static inline std::int32_t gmx_simdcall
 355 extract(SimdDInt32 a)
 356 {
 357     return a.simdInternal_[index];
 358 }
 359
 360 /*! \}
 361  *
 362  * \name SIMD implementation double precision floating-point bitwise logical operations
 363  * \{
 364  */
 365
 366 /*! \brief Bitwise and for two SIMD double variables.
 367  *
 368  * Supported if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 369  *
 370  * \param a data1
 371  * \param b data2
 372  * \return data1 & data2
 373  */
 374 static inline SimdDouble gmx_simdcall
 375 operator&(SimdDouble a, SimdDouble b)
 376 {
 377     SimdDouble         res;
 378
 379     union
 380     {
 381         double        r;
 382         std::int64_t  i;
 383     }
 384     conv1, conv2;
 385
 386     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 387     {
 388         conv1.r              = a.simdInternal_[i];
 389         conv2.r              = b.simdInternal_[i];
 390         conv1.i              = conv1.i & conv2.i;
 391         res.simdInternal_[i] = conv1.r;
 392     }
 393     return res;
 394 }
 395
 396 /*! \brief Bitwise andnot for SIMD double.
 397  *
 398  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 399  *
 400  * \param a data1
 401  * \param b data2
 402  * \return (~data1) & data2
 403  */
 404 static inline SimdDouble gmx_simdcall
 405 andNot(SimdDouble a, SimdDouble b)
 406 {
 407     SimdDouble         res;
 408
 409     union
 410     {
 411         double        r;
 412         std::int64_t  i;
 413     }
 414     conv1, conv2;
 415
 416     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 417     {
 418         conv1.r              = a.simdInternal_[i];
 419         conv2.r              = b.simdInternal_[i];
 420         conv1.i              = ~conv1.i & conv2.i;
 421         res.simdInternal_[i] = conv1.r;
 422     }
 423     return res;
 424 }
 425
 426 /*! \brief Bitwise or for SIMD double.
 427  *
 428  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 429  *
 430  * \param a data1
 431  * \param b data2
 432  * \return data1 | data2
 433  */
 434 static inline SimdDouble gmx_simdcall
 435 operator|(SimdDouble a, SimdDouble b)
 436 {
 437     SimdDouble         res;
 438
 439     union
 440     {
 441         double        r;
 442         std::int64_t  i;
 443     }
 444     conv1, conv2;
 445
 446     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 447     {
 448         conv1.r              = a.simdInternal_[i];
 449         conv2.r              = b.simdInternal_[i];
 450         conv1.i              = conv1.i | conv2.i;
 451         res.simdInternal_[i] = conv1.r;
 452     }
 453     return res;
 454 }
 455
 456 /*! \brief Bitwise xor for SIMD double.
 457  *
 458  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 459  *
 460  * \param a data1
 461  * \param b data2
 462  * \return data1 ^ data2
 463  */
 464 static inline SimdDouble gmx_simdcall
 465 operator^(SimdDouble a, SimdDouble b)
 466 {
 467     SimdDouble         res;
 468
 469     union
 470     {
 471         double        r;
 472         std::int64_t  i;
 473     }
 474     conv1, conv2;
 475
 476     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 477     {
 478         conv1.r              = a.simdInternal_[i];
 479         conv2.r              = b.simdInternal_[i];
 480         conv1.i              = conv1.i ^ conv2.i;
 481         res.simdInternal_[i] = conv1.r;
 482     }
 483     return res;
 484 }
 485
 486 /*! \}
 487  *
 488  * \name SIMD implementation double precision floating-point arithmetics
 489  * \{
 490  */
 491
 492 /*! \brief Add two double SIMD variables.
 493  *
 494  * \param a term1
 495  * \param b term2
 496  * \return a+b
 497  */
 498 static inline SimdDouble gmx_simdcall
 499 operator+(SimdDouble a, SimdDouble b)
 500 {
 501     SimdDouble         res;
 502
 503     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 504     {
 505         res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
 506     }
 507     return res;
 508 }
 509
 510 /*! \brief Subtract two double SIMD variables.
 511  *
 512  * \param a term1
 513  * \param b term2
 514  * \return a-b
 515  */
 516 static inline SimdDouble gmx_simdcall
 517 operator-(SimdDouble a, SimdDouble b)
 518 {
 519     SimdDouble         res;
 520
 521     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 522     {
 523         res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
 524     }
 525     return res;
 526 }
 527
 528 /*! \brief SIMD double precision negate.
 529  *
 530  * \param a SIMD double precision value
 531  * \return -a
 532  */
 533 static inline SimdDouble gmx_simdcall
 534 operator-(SimdDouble a)
 535 {
 536     SimdDouble         res;
 537
 538     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 539     {
 540         res.simdInternal_[i] = -a.simdInternal_[i];
 541     }
 542     return res;
 543 }
 544
 545 /*! \brief Multiply two double SIMD variables.
 546  *
 547  * \param a factor1
 548  * \param b factor2
 549  * \return a*b.
 550  */
 551 static inline SimdDouble gmx_simdcall
 552 operator*(SimdDouble a, SimdDouble b)
 553 {
 554     SimdDouble         res;
 555
 556     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 557     {
 558         res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
 559     }
 560     return res;
 561 }
 562
 563 /*! \brief SIMD double Fused-multiply-add. Result is a*b+c.
 564  *
 565  * \param a factor1
 566  * \param b factor2
 567  * \param c term
 568  * \return a*b+c
 569  */
 570 static inline SimdDouble gmx_simdcall
 571 fma(SimdDouble a, SimdDouble b, SimdDouble c)
 572 {
 573     return a*b+c;
 574 }
 575
 576 /*! \brief SIMD double Fused-multiply-subtract. Result is a*b-c.
 577  *
 578  * \param a factor1
 579  * \param b factor2
 580  * \param c term
 581  * \return a*b-c
 582  */
 583 static inline SimdDouble gmx_simdcall
 584 fms(SimdDouble a, SimdDouble b, SimdDouble c)
 585 {
 586     return a*b-c;
 587 }
 588
 589 /*! \brief SIMD double Fused-negated-multiply-add. Result is -a*b+c.
 590  *
 591  * \param a factor1
 592  * \param b factor2
 593  * \param c term
 594  * \return -a*b+c
 595  */
 596 static inline SimdDouble gmx_simdcall
 597 fnma(SimdDouble a, SimdDouble b, SimdDouble c)
 598 {
 599     return c-a*b;
 600 }
 601
 602 /*! \brief SIMD double Fused-negated-multiply-subtract. Result is -a*b-c.
 603  *
 604  * \param a factor1
 605  * \param b factor2
 606  * \param c term
 607  * \return -a*b-c
 608  */
 609 static inline SimdDouble gmx_simdcall
 610 fnms(SimdDouble a, SimdDouble b, SimdDouble c)
 611 {
 612     return -a*b-c;
 613 }
 614
 615 /*! \brief double SIMD 1.0/sqrt(x) lookup.
 616  *
 617  * This is a low-level instruction that should only be called from routines
 618  * implementing the inverse square root in simd_math.h.
 619  *
 620  * \param x Argument, x>0
 621  * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
 622  */
 623 static inline SimdDouble gmx_simdcall
 624 rsqrt(SimdDouble x)
 625 {
 626     SimdDouble         res;
 627
 628     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 629     {
 630         // sic - we only use single precision for the lookup
 631         res.simdInternal_[i] = 1.0F / std::sqrt(static_cast<float>(x.simdInternal_[i]));
 632     }
 633     return res;
 634 };
 635
 636 /*! \brief SIMD double 1.0/x lookup.
 637  *
 638  * This is a low-level instruction that should only be called from routines
 639  * implementing the reciprocal in simd_math.h.
 640  *
 641  * \param x Argument, x!=0
 642  * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
 643  */
 644 static inline SimdDouble gmx_simdcall
 645 rcp(SimdDouble x)
 646 {
 647     SimdDouble         res;
 648
 649     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 650     {
 651         // sic - we only use single precision for the lookup
 652         res.simdInternal_[i] = 1.0F / static_cast<float>(x.simdInternal_[i]);
 653     }
 654     return res;
 655 };
 656
 657 /*! \brief Add two double SIMD variables, masked version.
 658  *
 659  * \param a term1
 660  * \param b term2
 661  * \param m mask
 662  * \return a+b where mask is true, 0.0 otherwise.
 663  */
 664 static inline SimdDouble gmx_simdcall
 665 maskAdd(SimdDouble a, SimdDouble b, SimdDBool m)
 666 {
 667     SimdDouble         res;
 668
 669     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 670     {
 671         res.simdInternal_[i] = a.simdInternal_[i] + (m.simdInternal_[i] ? b.simdInternal_[i] : 0.0);
 672     }
 673     return res;
 674 }
 675
 676 /*! \brief Multiply two double SIMD variables, masked version.
 677  *
 678  * \param a factor1
 679  * \param b factor2
 680  * \param m mask
 681  * \return a*b where mask is true, 0.0 otherwise.
 682  */
 683 static inline SimdDouble gmx_simdcall
 684 maskzMul(SimdDouble a, SimdDouble b, SimdDBool m)
 685 {
 686     SimdDouble         res;
 687
 688     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 689     {
 690         res.simdInternal_[i] = m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i]) : 0.0;
 691     }
 692     return res;
 693 }
 694
 695 /*! \brief SIMD double fused multiply-add, masked version.
 696  *
 697  * \param a factor1
 698  * \param b factor2
 699  * \param c term
 700  * \param m mask
 701  * \return a*b+c where mask is true, 0.0 otherwise.
 702  */
 703 static inline SimdDouble gmx_simdcall
 704 maskzFma(SimdDouble a, SimdDouble b, SimdDouble c, SimdDBool m)
 705 {
 706     SimdDouble         res;
 707
 708     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 709     {
 710         res.simdInternal_[i] = m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i] + c.simdInternal_[i]) : 0.0;
 711     }
 712     return res;
 713 }
 714
 715 /*! \brief SIMD double 1.0/sqrt(x) lookup, masked version.
 716  *
 717  * This is a low-level instruction that should only be called from routines
 718  * implementing the inverse square root in simd_math.h.
 719  *
 720  * \param x Argument, x>0 for entries where mask is true.
 721  * \param m Mask
 722  * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
 723  *         The result for masked-out entries will be 0.0.
 724  */
 725 static inline SimdDouble gmx_simdcall
 726 maskzRsqrt(SimdDouble x, SimdDBool m)
 727 {
 728     SimdDouble         res;
 729
 730     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 731     {
 732         // sic - we only use single precision for the lookup
 733         res.simdInternal_[i] = (m.simdInternal_[i] != 0) ? 1.0F / std::sqrt(static_cast<float>(x.simdInternal_[i])) : 0.0;
 734     }
 735     return res;
 736 }
 737
 738 /*! \brief SIMD double 1.0/x lookup, masked version.
 739  *
 740  * This is a low-level instruction that should only be called from routines
 741  * implementing the reciprocal in simd_math.h.
 742  *
 743  * \param x Argument, x>0 for entries where mask is true.
 744  * \param m Mask
 745  * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
 746  *         The result for masked-out entries will be 0.0.
 747  */
 748 static inline SimdDouble gmx_simdcall
 749 maskzRcp(SimdDouble x, SimdDBool m)
 750 {
 751     SimdDouble         res;
 752
 753     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 754     {
 755         res.simdInternal_[i] = (m.simdInternal_[i] != 0) ? 1.0F / static_cast<float>(x.simdInternal_[i]) : 0.0;
 756     }
 757     return res;
 758 }
 759
 760 /*! \brief SIMD double floating-point fabs().
 761  *
 762  * \param a any floating point values
 763  * \return fabs(a) for each element.
 764  */
 765 static inline SimdDouble gmx_simdcall
 766 abs(SimdDouble a)
 767 {
 768     SimdDouble         res;
 769
 770     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 771     {
 772         res.simdInternal_[i] = std::abs(a.simdInternal_[i]);
 773     }
 774     return res;
 775 }
 776
 777 /*! \brief Set each SIMD double element to the largest from two variables.
 778  *
 779  * \param a Any floating-point value
 780  * \param b Any floating-point value
 781  * \return max(a,b) for each element.
 782  */
 783 static inline SimdDouble gmx_simdcall
 784 max(SimdDouble a, SimdDouble b)
 785 {
 786     SimdDouble         res;
 787
 788     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 789     {
 790         res.simdInternal_[i] = std::max(a.simdInternal_[i], b.simdInternal_[i]);
 791     }
 792     return res;
 793 }
 794
 795 /*! \brief Set each SIMD double element to the smallest from two variables.
 796  *
 797  * \param a Any floating-point value
 798  * \param b Any floating-point value
 799  * \return min(a,b) for each element.
 800  */
 801 static inline SimdDouble gmx_simdcall
 802 min(SimdDouble a, SimdDouble b)
 803 {
 804     SimdDouble         res;
 805
 806     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 807     {
 808         res.simdInternal_[i] = std::min(a.simdInternal_[i], b.simdInternal_[i]);
 809     }
 810     return res;
 811 }
 812
 813 /*! \brief SIMD double round to nearest integer value (in floating-point format).
 814  *
 815  * \param a Any floating-point value
 816  * \return The nearest integer, represented in floating-point format.
 817  *
 818  * \note Round mode is implementation defined. The only guarantee is that it
 819  * is consistent between rounding functions (round, cvtR2I).
 820  */
 821 static inline SimdDouble gmx_simdcall
 822 round(SimdDouble a)
 823 {
 824     SimdDouble         res;
 825
 826     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 827     {
 828         res.simdInternal_[i] = std::round(a.simdInternal_[i]);
 829     }
 830     return res;
 831 }
 832
 833 /*! \brief Truncate SIMD double, i.e. round towards zero - common hardware instruction.
 834  *
 835  * \param a Any floating-point value
 836  * \return Integer rounded towards zero, represented in floating-point format.
 837  *
 838  * \note This is truncation towards zero, not floor(). The reason for this
 839  * is that truncation is virtually always present as a dedicated hardware
 840  * instruction, but floor() frequently isn't.
 841  */
 842 static inline SimdDouble gmx_simdcall
 843 trunc(SimdDouble a)
 844 {
 845     SimdDouble         res;
 846
 847     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 848     {
 849         res.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
 850     }
 851     return res;
 852 }
 853
 854 /*! \brief Extract (integer) exponent and fraction from double precision SIMD.
 855  *
 856  * \param       value     Floating-point value to extract from
 857  * \param[out]  exponent  Returned exponent of value, integer SIMD format.
 858  * \return      Fraction of value, floating-point SIMD format.
 859  */
 860 static inline SimdDouble gmx_simdcall
 861 frexp(SimdDouble value, SimdDInt32 * exponent)
 862 {
 863     SimdDouble fraction;
 864
 865     for (std::size_t i = 0; i < fraction.simdInternal_.size(); i++)
 866     {
 867         fraction.simdInternal_[i] = std::frexp(value.simdInternal_[i], &exponent->simdInternal_[i]);
 868     }
 869     return fraction;
 870 }
 871
 872 /*! \brief Multiply a SIMD double value by the number 2 raised to an exp power.
 873  *
 874  * \tparam opt By default, this routine will return zero for input arguments
 875  *             that are so small they cannot be reproduced in the current
 876  *             precision. If the unsafe math optimization template parameter
 877  *             setting is used, these tests are skipped, and the result will
 878  *             be undefined (possible even NaN). This might happen below -127
 879  *             in single precision or -1023 in double, although some
 880  *             might use denormal support to extend the range.
 881  *
 882  * \param value Floating-point number to multiply with new exponent
 883  * \param exponent Integer that will not overflow as 2^exponent.
 884  * \return value*2^exponent
 885  */
 886 template <MathOptimization opt = MathOptimization::Safe>
 887 static inline SimdDouble gmx_simdcall
 888 ldexp(SimdDouble value, SimdDInt32 exponent)
 889 {
 890     SimdDouble           res;
 891
 892     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 893     {
 894         // std::ldexp already takes care of clamping arguments, so we do not
 895         // need to do anything in the reference implementation
 896         res.simdInternal_[i] = std::ldexp(value.simdInternal_[i], exponent.simdInternal_[i]);
 897     }
 898     return res;
 899 }
 900
 901 /*! \brief Return sum of all elements in SIMD double variable.
 902  *
 903  * \param a SIMD variable to reduce/sum.
 904  * \return The sum of all elements in the argument variable.
 905  *
 906  */
 907 static inline double gmx_simdcall
 908 reduce(SimdDouble a)
 909 {
 910     double sum = 0.0;
 911
 912     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
 913     {
 914         sum += a.simdInternal_[i];
 915     }
 916     return sum;
 917 }
 918
 919 /*! \}
 920  *
 921  * \name SIMD implementation double precision floating-point comparison, boolean, selection.
 922  * \{
 923  */
 924
 925 /*! \brief SIMD a==b for double SIMD.
 926  *
 927  * \param a value1
 928  * \param b value2
 929  * \return Each element of the boolean will be set to true if a==b.
 930  *
 931  * Beware that exact floating-point comparisons are difficult.
 932  */
 933 static inline SimdDBool gmx_simdcall
 934 operator==(SimdDouble a, SimdDouble b)
 935 {
 936     SimdDBool         res;
 937
 938     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 939     {
 940         res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
 941     }
 942     return res;
 943 }
 944
 945 /*! \brief SIMD a!=b for double SIMD.
 946  *
 947  * \param a value1
 948  * \param b value2
 949  * \return Each element of the boolean will be set to true if a!=b.
 950  *
 951  * Beware that exact floating-point comparisons are difficult.
 952  */
 953 static inline SimdDBool gmx_simdcall
 954 operator!=(SimdDouble a, SimdDouble b)
 955 {
 956     SimdDBool         res;
 957
 958     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 959     {
 960         res.simdInternal_[i] = (a.simdInternal_[i] != b.simdInternal_[i]);
 961     }
 962     return res;
 963 }
 964
 965 /*! \brief SIMD a<b for double SIMD.
 966  *
 967  * \param a value1
 968  * \param b value2
 969  * \return Each element of the boolean will be set to true if a<b.
 970  */
 971 static inline SimdDBool gmx_simdcall
 972 operator<(SimdDouble a, SimdDouble b)
 973 {
 974     SimdDBool          res;
 975
 976     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 977     {
 978         res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
 979     }
 980     return res;
 981 }
 982
 983 /*! \brief SIMD a<=b for double SIMD.
 984  *
 985  * \param a value1
 986  * \param b value2
 987  * \return Each element of the boolean will be set to true if a<=b.
 988  */
 989 static inline SimdDBool gmx_simdcall
 990 operator<=(SimdDouble a, SimdDouble b)
 991 {
 992     SimdDBool          res;
 993
 994     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 995     {
 996         res.simdInternal_[i] = (a.simdInternal_[i] <= b.simdInternal_[i]);
 997     }
 998     return res;
 999 }
1000
1001 /*! \brief Return true if any bits are set in the single precision SIMD.
1002  *
1003  * This function is used to handle bitmasks, mainly for exclusions in the
1004  * inner kernels. Note that it will return true even for -0.0 (sign bit set),
1005  * so it is not identical to not-equal.
1006  *
1007  * \param a value
1008  * \return Each element of the boolean will be true if any bit in a is nonzero.
1009  */
1010 static inline SimdDBool gmx_simdcall
1011 testBits(SimdDouble a)
1012 {
1013     SimdDBool         res;
1014
1015     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1016     {
1017         union
1018         {
1019             std::uint64_t i;
1020             double        d;
1021         } conv;
1022
1023         conv.d               = a.simdInternal_[i];
1024         res.simdInternal_[i] = (conv.i != 0);
1025     }
1026     return res;
1027 }
1028
1029 /*! \brief Logical \a and on double precision SIMD booleans.
1030  *
1031  * \param a logical vars 1
1032  * \param b logical vars 2
1033  * \return For each element, the result boolean is true if a \& b are true.
1034  *
1035  * \note This is not necessarily a bitwise operation - the storage format
1036  * of booleans is implementation-dependent.
1037  */
1038 static inline SimdDBool gmx_simdcall
1039 operator&&(SimdDBool a, SimdDBool b)
1040 {
1041     SimdDBool         res;
1042
1043     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1044     {
1045         res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1046     }
1047     return res;
1048 }
1049
1050 /*! \brief Logical \a or on double precision SIMD booleans.
1051  *
1052  * \param a logical vars 1
1053  * \param b logical vars 2
1054  * \return For each element, the result boolean is true if a or b is true.
1055  *
1056  * Note that this is not necessarily a bitwise operation - the storage format
1057  * of booleans is implementation-dependent.
1058  *
1059  \ */
1060 static inline SimdDBool gmx_simdcall
1061 operator||(SimdDBool a, SimdDBool b)
1062 {
1063     SimdDBool         res;
1064
1065     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1066     {
1067         res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1068     }
1069     return res;
1070 }
1071
1072 /*! \brief Returns non-zero if any of the boolean in SIMD a is True, otherwise 0.
1073  *
1074  * \param a Logical variable.
1075  * \return true if any element in a is true, otherwise false.
1076  *
1077  * The actual return value for truth will depend on the architecture,
1078  * so any non-zero value is considered truth.
1079  */
1080 static inline bool gmx_simdcall
1081 anyTrue(SimdDBool a)
1082 {
1083     bool res = false;
1084
1085     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1086     {
1087         res = res || a.simdInternal_[i];
1088     }
1089     return res;
1090 }
1091
1092 /*! \brief Select from double precision SIMD variable where boolean is true.
1093  *
1094  * \param a Floating-point variable to select from
1095  * \param mask Boolean selector
1096  * \return  For each element, a is selected for true, 0 for false.
1097  */
1098 static inline SimdDouble gmx_simdcall
1099 selectByMask(SimdDouble a, SimdDBool mask)
1100 {
1101     SimdDouble          res;
1102
1103     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1104     {
1105         res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0;
1106     }
1107     return res;
1108 }
1109
1110 /*! \brief Select from double precision SIMD variable where boolean is false.
1111  *
1112  * \param a Floating-point variable to select from
1113  * \param mask Boolean selector
1114  * \return  For each element, a is selected for false, 0 for true (sic).
1115  */
1116 static inline SimdDouble gmx_simdcall
1117 selectByNotMask(SimdDouble a, SimdDBool mask)
1118 {
1119     SimdDouble          res;
1120
1121     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1122     {
1123         res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0 : a.simdInternal_[i];
1124     }
1125     return res;
1126 }
1127
1128 /*! \brief Vector-blend SIMD double selection.
1129  *
1130  * \param a First source
1131  * \param b Second source
1132  * \param sel Boolean selector
1133  * \return For each element, select b if sel is true, a otherwise.
1134  */
1135 static inline SimdDouble gmx_simdcall
1136 blend(SimdDouble a, SimdDouble b, SimdDBool sel)
1137 {
1138     SimdDouble         res;
1139
1140     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1141     {
1142         res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1143     }
1144     return res;
1145 }
1146
1147 /*! \}
1148  *
1149  * \name SIMD implementation integer (corresponding to double) bitwise logical operations
1150  * \{
1151  */
1152
1153 /*! \brief Integer SIMD bitwise and.
1154  *
1155  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1156  *
1157  * \note You can \a not use this operation directly to select based on a boolean
1158  * SIMD variable, since booleans are separate from integer SIMD. If that
1159  * is what you need, have a look at \ref gmx::selectByMask instead.
1160  *
1161  * \param a first integer SIMD
1162  * \param b second integer SIMD
1163  * \return a \& b (bitwise and)
1164  */
1165 static inline SimdDInt32 gmx_simdcall
1166 operator&(SimdDInt32 a, SimdDInt32 b)
1167 {
1168     SimdDInt32         res;
1169
1170     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1171     {
1172         res.simdInternal_[i] = a.simdInternal_[i] & b.simdInternal_[i];
1173     }
1174     return res;
1175 }
1176
1177 /*! \brief Integer SIMD bitwise not/complement.
1178  *
1179  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1180  *
1181  * \note You can \a not use this operation directly to select based on a boolean
1182  * SIMD variable, since booleans are separate from integer SIMD. If that
1183  * is what you need, have a look at \ref gmx::selectByMask instead.
1184  *
1185  * \param a integer SIMD
1186  * \param b integer SIMD
1187  * \return (~a) & b
1188  */
1189 static inline SimdDInt32 gmx_simdcall
1190 andNot(SimdDInt32 a, SimdDInt32 b)
1191 {
1192     SimdDInt32         res;
1193
1194     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1195     {
1196         res.simdInternal_[i] = ~a.simdInternal_[i] & b.simdInternal_[i];
1197     }
1198     return res;
1199 }
1200
1201 /*! \brief Integer SIMD bitwise or.
1202  *
1203  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1204  *
1205  * \param a first integer SIMD
1206  * \param b second integer SIMD
1207  * \return a \| b (bitwise or)
1208  */
1209 static inline SimdDInt32 gmx_simdcall
1210 operator|(SimdDInt32 a, SimdDInt32 b)
1211 {
1212     SimdDInt32         res;
1213
1214     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1215     {
1216         res.simdInternal_[i] = a.simdInternal_[i] | b.simdInternal_[i];
1217     }
1218     return res;
1219 }
1220
1221 /*! \brief Integer SIMD bitwise xor.
1222  *
1223  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1224  *
1225  * \param a first integer SIMD
1226  * \param b second integer SIMD
1227  * \return a ^ b (bitwise xor)
1228  */
1229 static inline SimdDInt32 gmx_simdcall
1230 operator^(SimdDInt32 a, SimdDInt32 b)
1231 {
1232     SimdDInt32         res;
1233
1234     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1235     {
1236         res.simdInternal_[i] = a.simdInternal_[i] ^ b.simdInternal_[i];
1237     }
1238     return res;
1239 }
1240
1241 /*! \}
1242  *
1243  * \name SIMD implementation integer (corresponding to double) arithmetics
1244  * \{
1245  */
1246
1247 /*! \brief Add SIMD integers.
1248  *
1249  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1250  *
1251  * \param a term1
1252  * \param b term2
1253  * \return a+b
1254  */
1255 static inline SimdDInt32 gmx_simdcall
1256 operator+(SimdDInt32 a, SimdDInt32 b)
1257 {
1258     SimdDInt32         res;
1259
1260     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1261     {
1262         res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
1263     }
1264     return res;
1265 }
1266
1267 /*! \brief Subtract SIMD integers.
1268  *
1269  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1270  *
1271  * \param a term1
1272  * \param b term2
1273  * \return a-b
1274  */
1275 static inline SimdDInt32 gmx_simdcall
1276 operator-(SimdDInt32 a, SimdDInt32 b)
1277 {
1278     SimdDInt32         res;
1279
1280     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1281     {
1282         res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
1283     }
1284     return res;
1285 }
1286
1287 /*! \brief Multiply SIMD integers.
1288  *
1289  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1290  *
1291  * \param a factor1
1292  * \param b factor2
1293  * \return a*b.
1294  *
1295  * \note Only the low 32 bits are retained, so this can overflow.
1296  */
1297 static inline SimdDInt32 gmx_simdcall
1298 operator*(SimdDInt32 a, SimdDInt32 b)
1299 {
1300     SimdDInt32         res;
1301
1302     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1303     {
1304         res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
1305     }
1306     return res;
1307 }
1308
1309 /*! \}
1310  *
1311  * \name SIMD implementation integer (corresponding to double) comparisons, boolean selection
1312  * \{
1313  */
1314
1315 /*! \brief Equality comparison of two integers corresponding to double values.
1316  *
1317  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1318  *
1319  * \param a SIMD integer1
1320  * \param b SIMD integer2
1321  * \return SIMD integer boolean with true for elements where a==b
1322  */
1323 static inline SimdDIBool gmx_simdcall
1324 operator==(SimdDInt32 a, SimdDInt32 b)
1325 {
1326     SimdDIBool         res;
1327
1328     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1329     {
1330         res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
1331     }
1332     return res;
1333 }
1334
1335 /*! \brief Less-than comparison of two SIMD integers corresponding to double values.
1336  *
1337  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1338  *
1339  * \param a SIMD integer1
1340  * \param b SIMD integer2
1341  * \return SIMD integer boolean with true for elements where a<b
1342  */
1343 static inline SimdDIBool gmx_simdcall
1344 operator<(SimdDInt32 a, SimdDInt32 b)
1345 {
1346     SimdDIBool         res;
1347
1348     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1349     {
1350         res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
1351     }
1352     return res;
1353 }
1354
1355 /*! \brief Check if any bit is set in each element
1356  *
1357  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1358  *
1359  * \param a SIMD integer
1360  * \return SIMD integer boolean with true for elements where any bit is set
1361  */
1362 static inline SimdDIBool gmx_simdcall
1363 testBits(SimdDInt32 a)
1364 {
1365     SimdDIBool         res;
1366
1367     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1368     {
1369         res.simdInternal_[i] = (a.simdInternal_[i] != 0);
1370     }
1371     return res;
1372 }
1373
1374 /*! \brief Logical AND on SimdDIBool.
1375  *
1376  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1377  *
1378  * \param a SIMD boolean 1
1379  * \param b SIMD boolean 2
1380  * \return True for elements where both a and b are true.
1381  */
1382 static inline SimdDIBool gmx_simdcall
1383 operator&&(SimdDIBool a, SimdDIBool b)
1384 {
1385     SimdDIBool        res;
1386
1387     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1388     {
1389         res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1390     }
1391     return res;
1392 }
1393
1394 /*! \brief Logical OR on SimdDIBool.
1395  *
1396  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1397  *
1398  * \param a SIMD boolean 1
1399  * \param b SIMD boolean 2
1400  * \return True for elements where both a and b are true.
1401  */
1402 static inline SimdDIBool gmx_simdcall
1403 operator||(SimdDIBool a, SimdDIBool b)
1404 {
1405     SimdDIBool         res;
1406
1407     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1408     {
1409         res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1410     }
1411     return res;
1412 }
1413
1414 /*! \brief Returns true if any of the boolean in x is True, otherwise 0.
1415  *
1416  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1417  *
1418  * The actual return value for "any true" will depend on the architecture.
1419  * Any non-zero value should be considered truth.
1420  *
1421  * \param a SIMD boolean
1422  * \return True if any of the elements in a is true, otherwise 0.
1423  */
1424 static inline bool gmx_simdcall
1425 anyTrue(SimdDIBool a)
1426 {
1427     bool res = false;
1428
1429     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1430     {
1431         res = res || a.simdInternal_[i];
1432     }
1433     return res;
1434 }
1435
1436 /*! \brief Select from \ref gmx::SimdDInt32 variable where boolean is true.
1437  *
1438  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1439  *
1440  * \param a SIMD integer to select from
1441  * \param mask Boolean selector
1442  * \return Elements from a where sel is true, 0 otherwise.
1443  */
1444 static inline SimdDInt32 gmx_simdcall
1445 selectByMask(SimdDInt32 a, SimdDIBool mask)
1446 {
1447     SimdDInt32         res;
1448
1449     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1450     {
1451         res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0;
1452     }
1453     return res;
1454 }
1455
1456 /*! \brief Select from \ref gmx::SimdDInt32 variable where boolean is false.
1457  *
1458  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1459  *
1460  * \param a SIMD integer to select from
1461  * \param mask Boolean selector
1462  * \return Elements from a where sel is false, 0 otherwise (sic).
1463  */
1464 static inline SimdDInt32 gmx_simdcall
1465 selectByNotMask(SimdDInt32 a, SimdDIBool mask)
1466 {
1467     SimdDInt32         res;
1468
1469     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1470     {
1471         res.simdInternal_[i] = mask.simdInternal_[i] ? 0 : a.simdInternal_[i];
1472     }
1473     return res;
1474 }
1475
1476 /*! \brief Vector-blend SIMD integer selection.
1477  *
1478  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1479  *
1480  * \param a First source
1481  * \param b Second source
1482  * \param sel Boolean selector
1483  * \return For each element, select b if sel is true, a otherwise.
1484  */
1485 static inline SimdDInt32 gmx_simdcall
1486 blend(SimdDInt32 a, SimdDInt32 b, SimdDIBool sel)
1487 {
1488     SimdDInt32        res;
1489
1490     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1491     {
1492         res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1493     }
1494     return res;
1495 }
1496
1497 /*! \}
1498  *
1499  * \name SIMD implementation conversion operations
1500  * \{
1501  */
1502
1503 /*! \brief Round double precision floating point to integer.
1504  *
1505  * \param a SIMD floating-point
1506  * \return SIMD integer, rounded to nearest integer.
1507  *
1508  * \note Round mode is implementation defined. The only guarantee is that it
1509  * is consistent between rounding functions (round, cvtR2I).
1510  */
1511 static inline SimdDInt32 gmx_simdcall
1512 cvtR2I(SimdDouble a)
1513 {
1514     SimdDInt32         b;
1515
1516     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1517     {
1518         b.simdInternal_[i] = std::round(a.simdInternal_[i]);
1519     }
1520     return b;
1521 };
1522
1523 /*! \brief Truncate double precision floating point to integer.
1524  *
1525  * \param a SIMD floating-point
1526  * \return SIMD integer, truncated to nearest integer.
1527  */
1528 static inline SimdDInt32 gmx_simdcall
1529 cvttR2I(SimdDouble a)
1530 {
1531     SimdDInt32         b;
1532
1533     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1534     {
1535         b.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
1536     }
1537     return b;
1538 };
1539
1540 /*! \brief Convert integer to double precision floating point.
1541  *
1542  * \param a SIMD integer
1543  * \return SIMD floating-point
1544  */
1545 static inline SimdDouble gmx_simdcall
1546 cvtI2R(SimdDInt32 a)
1547 {
1548     SimdDouble         b;
1549
1550     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1551     {
1552         b.simdInternal_[i] = a.simdInternal_[i];
1553     }
1554     return b;
1555 };
1556
1557 /*! \brief Convert from double precision boolean to corresponding integer boolean
1558  *
1559  * \param a SIMD floating-point boolean
1560  * \return SIMD integer boolean
1561  */
1562 static inline SimdDIBool gmx_simdcall
1563 cvtB2IB(SimdDBool a)
1564 {
1565     SimdDIBool         b;
1566
1567     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1568     {
1569         b.simdInternal_[i] = a.simdInternal_[i];
1570     }
1571     return b;
1572 };
1573
1574 /*! \brief Convert from integer boolean to corresponding double precision boolean
1575  *
1576  * \param a SIMD integer boolean
1577  * \return SIMD floating-point boolean
1578  */
1579 static inline SimdDBool gmx_simdcall
1580 cvtIB2B(SimdDIBool a)
1581 {
1582     SimdDBool         b;
1583
1584     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1585     {
1586         b.simdInternal_[i] = a.simdInternal_[i];
1587     }
1588     return b;
1589 };
1590
1591 /*! \brief Convert SIMD float to double.
1592  *
1593  * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is identical to
1594  * \ref GMX_SIMD_DOUBLE_WIDTH.
1595  *
1596  * Float/double conversions are complex since the SIMD width could either
1597  * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1598  * need to check for the width in the code, and have different code paths.
1599  *
1600  * \param f Single-precision SIMD variable
1601  * \return Double-precision SIMD variable of the same width
1602  */
1603 static inline SimdDouble gmx_simdcall
1604 cvtF2D(SimdFloat gmx_unused f)
1605 {
1606 #if (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
1607     SimdDouble        d;
1608     for (std::size_t i = 0; i < d.simdInternal_.size(); i++)
1609     {
1610         d.simdInternal_[i] = f.simdInternal_[i];
1611     }
1612     return d;
1613 #else
1614     gmx_fatal(FARGS, "cvtF2D() requires GMX_SIMD_FLOAT_WIDTH==GMX_SIMD_DOUBLE_WIDTH");
1615 #endif
1616 }
1617
1618 /*! \brief Convert SIMD double to float.
1619  *
1620  * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is identical to
1621  * \ref GMX_SIMD_DOUBLE_WIDTH.
1622  *
1623  * Float/double conversions are complex since the SIMD width could either
1624  * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1625  * need to check for the width in the code, and have different code paths.
1626  *
1627  * \param d Double-precision SIMD variable
1628  * \return Single-precision SIMD variable of the same width
1629  */
1630 static inline SimdFloat gmx_simdcall
1631 cvtD2F(SimdDouble gmx_unused d)
1632 {
1633 #if (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
1634     SimdFloat        f;
1635     for (std::size_t i = 0; i < f.simdInternal_.size(); i++)
1636     {
1637         f.simdInternal_[i] = d.simdInternal_[i];
1638     }
1639     return f;
1640 #else
1641     gmx_fatal(FARGS, "cvtD2F() requires GMX_SIMD_FLOAT_WIDTH==GMX_SIMD_DOUBLE_WIDTH");
1642 #endif
1643 }
1644
1645 /*! \brief Convert SIMD float to double.
1646  *
1647  * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is twice as large
1648  * as \ref GMX_SIMD_DOUBLE_WIDTH.
1649  *
1650  * Float/double conversions are complex since the SIMD width could either
1651  * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1652  * need to check for the width in the code, and have different code paths.
1653  *
1654  * \param f Single-precision SIMD variable
1655  * \param[out] d0 Double-precision SIMD variable, first half of values from f.
1656  * \param[out] d1 Double-precision SIMD variable, second half of values from f.
1657  */
1658 static inline void gmx_simdcall
1659 cvtF2DD(SimdFloat gmx_unused f, SimdDouble gmx_unused * d0, SimdDouble gmx_unused * d1)
1660 {
1661 #if (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH)
1662     for (std::size_t i = 0; i < d0->simdInternal_.size(); i++)
1663     {
1664         d0->simdInternal_[i] = f.simdInternal_[i];
1665         d1->simdInternal_[i] = f.simdInternal_[f.simdInternal_.size()/2 + i];
1666     }
1667 #else
1668     gmx_fatal(FARGS, "simdCvtF2DD() requires GMX_SIMD_FLOAT_WIDTH==2*GMX_SIMD_DOUBLE_WIDTH");
1669 #endif
1670 }
1671
1672 /*! \brief Convert SIMD double to float.
1673  *
1674  * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is twice as large
1675  * as \ref GMX_SIMD_DOUBLE_WIDTH.
1676  *
1677  * Float/double conversions are complex since the SIMD width could either
1678  * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1679  * need to check for the width in the code, and have different code paths.
1680  *
1681  * \param d0 Double-precision SIMD variable, first half of values to put in f.
1682  * \param d1 Double-precision SIMD variable, second half of values to put in f.
1683  * \return Single-precision SIMD variable with all values.
1684  */
1685 static inline SimdFloat gmx_simdcall
1686 cvtDD2F(SimdDouble gmx_unused d0, SimdDouble gmx_unused d1)
1687 {
1688 #if (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH)
1689     SimdFloat        f;
1690     for (std::size_t i = 0; i < d0.simdInternal_.size(); i++)
1691     {
1692         f.simdInternal_[i]                            = d0.simdInternal_[i];
1693         f.simdInternal_[f.simdInternal_.size()/2 + i] = d1.simdInternal_[i];
1694     }
1695     return f;
1696 #else
1697     gmx_fatal(FARGS, "simdCvtDD2F() requires GMX_SIMD_FLOAT_WIDTH==2*GMX_SIMD_DOUBLE_WIDTH");
1698 #endif
1699 }
1700
1701 /*! \} */
1702
1703 /*! \} */
1704 /*! \endcond */
1705
1706 }      // namespace gmx
1707
1708 #endif // GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H