src/gromacs/simd/impl_reference/impl_reference_simd_double.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2019,2020,2021, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H
  37 #define GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H
  38
  39 /*! \libinternal \file
  40  *
  41  * \brief Reference implementation, SIMD double precision.
  42  *
  43  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  44  *
  45  * \ingroup module_simd
  46  */
  47
  48 #include "config.h"
  49
  50 #include <cassert>
  51 #include <cmath>
  52 #include <cstddef>
  53 #include <cstdint>
  54
  55 #include <algorithm>
  56 #include <array>
  57
  58 #include "gromacs/math/utilities.h"
  59 #include "gromacs/utility/fatalerror.h"
  60
  61 #include "impl_reference_definitions.h"
  62 #include "impl_reference_simd_float.h"
  63
  64 namespace gmx
  65 {
  66
  67 /*! \cond libapi */
  68 /*! \addtogroup module_simd */
  69 /*! \{ */
  70
  71 /*! \name SIMD implementation data types
  72  * \{
  73  */
  74
  75 /*! \libinternal \brief Double SIMD variable. Available if GMX_SIMD_HAVE_DOUBLE is 1.
  76  *
  77  * \note This variable cannot be placed inside other structures or classes, since
  78  *       some compilers (including at least clang-3.7) appear to lose the
  79  *       alignment. This is likely particularly severe when allocating such
  80  *       memory on the heap, but it occurs for stack structures too.
  81  */
  82 class SimdDouble
  83 {
  84 public:
  85     SimdDouble() {}
  86
  87     //! \brief Construct from scalar
  88     SimdDouble(double d) { simdInternal_.fill(d); }
  89
  90     /*! \brief Internal SIMD data. Implementation dependent, don't touch.
  91      *
  92      * This has to be public to enable usage in combination with static inline
  93      * functions, but it should never, EVER, be accessed by any code outside
  94      * the corresponding implementation directory since the type will depend
  95      * on the architecture.
  96      */
  97     std::array<double, GMX_SIMD_DOUBLE_WIDTH> simdInternal_;
  98 };
  99
 100 /*! \libinternal \brief Integer SIMD variable type to use for conversions to/from double.
 101  *
 102  * Available if GMX_SIMD_HAVE_DOUBLE is 1.
 103  *
 104  * \note The integer SIMD type will always be available, but on architectures
 105  * that do not have any real integer SIMD support it might be defined as the
 106  * floating-point type. This will work fine, since there are separate defines
 107  * for whether the implementation can actually do any operations on integer
 108  * SIMD types.
 109  *
 110  * \note This variable cannot be placed inside other structures or classes, since
 111  *       some compilers (including at least clang-3.7) appear to lose the
 112  *       alignment. This is likely particularly severe when allocating such
 113  *       memory on the heap, but it occurs for stack structures too.
 114  */
 115 class SimdDInt32
 116 {
 117 public:
 118     SimdDInt32() {}
 119
 120     //! \brief Construct from scalar
 121     SimdDInt32(std::int32_t i) { simdInternal_.fill(i); }
 122
 123     /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 124      *
 125      * This has to be public to enable usage in combination with static inline
 126      * functions, but it should never, EVER, be accessed by any code outside
 127      * the corresponding implementation directory since the type will depend
 128      * on the architecture.
 129      */
 130     std::array<std::int32_t, GMX_SIMD_DINT32_WIDTH> simdInternal_;
 131 };
 132
 133 /*! \libinternal \brief Boolean type for double SIMD data.
 134  *
 135  *  Available if GMX_SIMD_HAVE_DOUBLE is 1.
 136  *
 137  * \note This variable cannot be placed inside other structures or classes, since
 138  *       some compilers (including at least clang-3.7) appear to lose the
 139  *       alignment. This is likely particularly severe when allocating such
 140  *       memory on the heap, but it occurs for stack structures too.
 141  */
 142 class SimdDBool
 143 {
 144 public:
 145     SimdDBool() {}
 146
 147     //! \brief Construct from scalar bool
 148     SimdDBool(bool b) { simdInternal_.fill(b); }
 149
 150     /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 151      *
 152      * This has to be public to enable usage in combination with static inline
 153      * functions, but it should never, EVER, be accessed by any code outside
 154      * the corresponding implementation directory since the type will depend
 155      * on the architecture.
 156      */
 157     std::array<bool, GMX_SIMD_DOUBLE_WIDTH> simdInternal_;
 158 };
 159
 160 /*! \libinternal \brief Boolean type for integer datatypes corresponding to double SIMD.
 161  *
 162  * Available if GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
 163  *
 164  * \note This variable cannot be placed inside other structures or classes, since
 165  *       some compilers (including at least clang-3.7) appear to lose the
 166  *       alignment. This is likely particularly severe when allocating such
 167  *       memory on the heap, but it occurs for stack structures too.
 168  */
 169 class SimdDIBool
 170 {
 171 public:
 172     SimdDIBool() {}
 173
 174     //! \brief Construct from scalar
 175     SimdDIBool(bool b) { simdInternal_.fill(b); }
 176
 177     /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 178      *
 179      * This has to be public to enable usage in combination with static inline
 180      * functions, but it should never, EVER, be accessed by any code outside
 181      * the corresponding implementation directory since the type will depend
 182      * on the architecture.
 183      */
 184     std::array<bool, GMX_SIMD_DINT32_WIDTH> simdInternal_;
 185 };
 186
 187 /*! \}
 188  *
 189  * \name SIMD implementation load/store operations for double precision floating point
 190  * \{
 191  */
 192
 193 /*! \brief Load \ref GMX_SIMD_DOUBLE_WIDTH numbers from aligned memory.
 194  *
 195  * \param m Pointer to memory aligned to the SIMD width.
 196  * \return SIMD variable with data loaded.
 197  */
 198 static inline SimdDouble gmx_simdcall simdLoad(const double* m, SimdDoubleTag = {})
 199 {
 200     SimdDouble a;
 201
 202     assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(double)) == 0);
 203
 204     std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
 205     return a;
 206 }
 207
 208 /*! \brief Store the contents of SIMD double variable to aligned memory m.
 209  *
 210  * \param[out] m Pointer to memory, aligned to SIMD width.
 211  * \param a SIMD variable to store
 212  */
 213 static inline void gmx_simdcall store(double* m, SimdDouble a)
 214 {
 215     assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(double)) == 0);
 216
 217     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 218 }
 219
 220 /*! \brief Load SIMD double from unaligned memory.
 221  *
 222  * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
 223  *
 224  * \param m Pointer to memory, no alignment requirement.
 225  * \return SIMD variable with data loaded.
 226  */
 227 static inline SimdDouble gmx_simdcall simdLoadU(const double* m, SimdDoubleTag = {})
 228 {
 229     SimdDouble a;
 230     std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
 231     return a;
 232 }
 233
 234 /*! \brief Store SIMD double to unaligned memory.
 235  *
 236  * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
 237  *
 238  * \param[out] m Pointer to memory, no alignment requirement.
 239  * \param a SIMD variable to store.
 240  */
 241 static inline void gmx_simdcall storeU(double* m, SimdDouble a)
 242 {
 243     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 244 }
 245
 246 /*! \brief Set all SIMD double variable elements to 0.0.
 247  *
 248  * You should typically just call \ref gmx::setZero(), which uses proxy objects
 249  * internally to handle all types rather than adding the suffix used here.
 250  *
 251  * \return SIMD 0.0
 252  */
 253 static inline SimdDouble gmx_simdcall setZeroD()
 254 {
 255     return SimdDouble(0.0);
 256 }
 257
 258 /*! \}
 259  *
 260  * \name SIMD implementation load/store operations for integers (corresponding to double)
 261  * \{
 262  */
 263
 264 /*! \brief Load aligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
 265  *
 266  * You should typically just call \ref gmx::load(), which uses proxy objects
 267  * internally to handle all types rather than adding the suffix used here.
 268  *
 269  * \param m Pointer to memory, aligned to (double) integer SIMD width.
 270  * \return SIMD integer variable.
 271  */
 272 static inline SimdDInt32 gmx_simdcall simdLoad(const std::int32_t* m, SimdDInt32Tag)
 273 {
 274     SimdDInt32 a;
 275
 276     assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(std::int32_t)) == 0);
 277
 278     std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
 279     return a;
 280 };
 281
 282 /*! \brief Store aligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
 283  *
 284  * \param m Memory aligned to (double) integer SIMD width.
 285  * \param a SIMD (double) integer variable to store.
 286  */
 287 static inline void gmx_simdcall store(std::int32_t* m, SimdDInt32 a)
 288 {
 289     assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(std::int32_t)) == 0);
 290
 291     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 292 };
 293
 294 /*! \brief Load unaligned integer SIMD data, width corresponds to \ref gmx::SimdDouble.
 295  *
 296  * You should typically just call \ref gmx::loadU(), which uses proxy objects
 297  * internally to handle all types rather than adding the suffix used here.
 298  *
 299  * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
 300  *
 301  * \param m Pointer to memory, no alignment requirements.
 302  * \return SIMD integer variable.
 303  */
 304 static inline SimdDInt32 gmx_simdcall simdLoadU(const std::int32_t* m, SimdDInt32Tag)
 305 {
 306     SimdDInt32 a;
 307     std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
 308     return a;
 309 }
 310
 311 /*! \brief Store unaligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
 312  *
 313  * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
 314  *
 315  * \param m Memory pointer, no alignment requirements.
 316  * \param a SIMD (double) integer variable to store.
 317  */
 318 static inline void gmx_simdcall storeU(std::int32_t* m, SimdDInt32 a)
 319 {
 320     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 321 }
 322
 323 /*! \brief Set all SIMD (double) integer variable elements to 0.
 324  *
 325  * You should typically just call \ref gmx::setZero(), which uses proxy objects
 326  * internally to handle all types rather than adding the suffix used here.
 327  *
 328  * \return SIMD 0
 329  */
 330 static inline SimdDInt32 gmx_simdcall setZeroDI()
 331 {
 332     return SimdDInt32(0);
 333 }
 334
 335 /*! \brief Extract element with index i from \ref gmx::SimdDInt32.
 336  *
 337  * Available if \ref GMX_SIMD_HAVE_DINT32_EXTRACT is 1.
 338  *
 339  * \tparam index Compile-time constant, position to extract (first position is 0)
 340  * \param  a     SIMD variable from which to extract value.
 341  * \return Single integer from position index in SIMD variable.
 342  */
 343 template<int index>
 344 static inline std::int32_t gmx_simdcall extract(SimdDInt32 a)
 345 {
 346     return a.simdInternal_[index];
 347 }
 348
 349 /*! \}
 350  *
 351  * \name SIMD implementation double precision floating-point bitwise logical operations
 352  * \{
 353  */
 354
 355 /*! \brief Bitwise and for two SIMD double variables.
 356  *
 357  * Supported if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 358  *
 359  * \param a data1
 360  * \param b data2
 361  * \return data1 & data2
 362  */
 363 static inline SimdDouble gmx_simdcall operator&(SimdDouble a, SimdDouble b)
 364 {
 365     SimdDouble res;
 366
 367     union
 368     {
 369         double       r;
 370         std::int64_t i;
 371     } conv1, conv2;
 372
 373     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 374     {
 375         conv1.r              = a.simdInternal_[i];
 376         conv2.r              = b.simdInternal_[i];
 377         conv1.i              = conv1.i & conv2.i;
 378         res.simdInternal_[i] = conv1.r;
 379     }
 380     return res;
 381 }
 382
 383 /*! \brief Bitwise andnot for SIMD double.
 384  *
 385  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 386  *
 387  * \param a data1
 388  * \param b data2
 389  * \return (~data1) & data2
 390  */
 391 static inline SimdDouble gmx_simdcall andNot(SimdDouble a, SimdDouble b)
 392 {
 393     SimdDouble res;
 394
 395     union
 396     {
 397         double       r;
 398         std::int64_t i;
 399     } conv1, conv2;
 400
 401     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 402     {
 403         conv1.r              = a.simdInternal_[i];
 404         conv2.r              = b.simdInternal_[i];
 405         conv1.i              = ~conv1.i & conv2.i;
 406         res.simdInternal_[i] = conv1.r;
 407     }
 408     return res;
 409 }
 410
 411 /*! \brief Bitwise or for SIMD double.
 412  *
 413  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 414  *
 415  * \param a data1
 416  * \param b data2
 417  * \return data1 | data2
 418  */
 419 static inline SimdDouble gmx_simdcall operator|(SimdDouble a, SimdDouble b)
 420 {
 421     SimdDouble res;
 422
 423     union
 424     {
 425         double       r;
 426         std::int64_t i;
 427     } conv1, conv2;
 428
 429     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 430     {
 431         conv1.r              = a.simdInternal_[i];
 432         conv2.r              = b.simdInternal_[i];
 433         conv1.i              = conv1.i | conv2.i;
 434         res.simdInternal_[i] = conv1.r;
 435     }
 436     return res;
 437 }
 438
 439 /*! \brief Bitwise xor for SIMD double.
 440  *
 441  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 442  *
 443  * \param a data1
 444  * \param b data2
 445  * \return data1 ^ data2
 446  */
 447 static inline SimdDouble gmx_simdcall operator^(SimdDouble a, SimdDouble b)
 448 {
 449     SimdDouble res;
 450
 451     union
 452     {
 453         double       r;
 454         std::int64_t i;
 455     } conv1, conv2;
 456
 457     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 458     {
 459         conv1.r              = a.simdInternal_[i];
 460         conv2.r              = b.simdInternal_[i];
 461         conv1.i              = conv1.i ^ conv2.i;
 462         res.simdInternal_[i] = conv1.r;
 463     }
 464     return res;
 465 }
 466
 467 /*! \}
 468  *
 469  * \name SIMD implementation double precision floating-point arithmetics
 470  * \{
 471  */
 472
 473 /*! \brief Add two double SIMD variables.
 474  *
 475  * \param a term1
 476  * \param b term2
 477  * \return a+b
 478  */
 479 static inline SimdDouble gmx_simdcall operator+(SimdDouble a, SimdDouble b)
 480 {
 481     SimdDouble res;
 482
 483     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 484     {
 485         res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
 486     }
 487     return res;
 488 }
 489
 490 /*! \brief Subtract two double SIMD variables.
 491  *
 492  * \param a term1
 493  * \param b term2
 494  * \return a-b
 495  */
 496 static inline SimdDouble gmx_simdcall operator-(SimdDouble a, SimdDouble b)
 497 {
 498     SimdDouble res;
 499
 500     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 501     {
 502         res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
 503     }
 504     return res;
 505 }
 506
 507 /*! \brief SIMD double precision negate.
 508  *
 509  * \param a SIMD double precision value
 510  * \return -a
 511  */
 512 static inline SimdDouble gmx_simdcall operator-(SimdDouble a)
 513 {
 514     SimdDouble res;
 515
 516     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 517     {
 518         res.simdInternal_[i] = -a.simdInternal_[i];
 519     }
 520     return res;
 521 }
 522
 523 /*! \brief Multiply two double SIMD variables.
 524  *
 525  * \param a factor1
 526  * \param b factor2
 527  * \return a*b.
 528  */
 529 static inline SimdDouble gmx_simdcall operator*(SimdDouble a, SimdDouble b)
 530 {
 531     SimdDouble res;
 532
 533     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 534     {
 535         res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
 536     }
 537     return res;
 538 }
 539
 540 /*! \brief SIMD double Fused-multiply-add. Result is a*b+c.
 541  *
 542  * \param a factor1
 543  * \param b factor2
 544  * \param c term
 545  * \return a*b+c
 546  */
 547 static inline SimdDouble gmx_simdcall fma(SimdDouble a, SimdDouble b, SimdDouble c)
 548 {
 549     return a * b + c;
 550 }
 551
 552 /*! \brief SIMD double Fused-multiply-subtract. Result is a*b-c.
 553  *
 554  * \param a factor1
 555  * \param b factor2
 556  * \param c term
 557  * \return a*b-c
 558  */
 559 static inline SimdDouble gmx_simdcall fms(SimdDouble a, SimdDouble b, SimdDouble c)
 560 {
 561     return a * b - c;
 562 }
 563
 564 /*! \brief SIMD double Fused-negated-multiply-add. Result is -a*b+c.
 565  *
 566  * \param a factor1
 567  * \param b factor2
 568  * \param c term
 569  * \return -a*b+c
 570  */
 571 static inline SimdDouble gmx_simdcall fnma(SimdDouble a, SimdDouble b, SimdDouble c)
 572 {
 573     return c - a * b;
 574 }
 575
 576 /*! \brief SIMD double Fused-negated-multiply-subtract. Result is -a*b-c.
 577  *
 578  * \param a factor1
 579  * \param b factor2
 580  * \param c term
 581  * \return -a*b-c
 582  */
 583 static inline SimdDouble gmx_simdcall fnms(SimdDouble a, SimdDouble b, SimdDouble c)
 584 {
 585     return -a * b - c;
 586 }
 587
 588 /*! \brief double SIMD 1.0/sqrt(x) lookup.
 589  *
 590  * This is a low-level instruction that should only be called from routines
 591  * implementing the inverse square root in simd_math.h.
 592  *
 593  * \param x Argument, x>0
 594  * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
 595  */
 596 static inline SimdDouble gmx_simdcall rsqrt(SimdDouble x)
 597 {
 598     SimdDouble res;
 599
 600     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 601     {
 602         // sic - we only use single precision for the lookup
 603         res.simdInternal_[i] = 1.0F / std::sqrt(static_cast<float>(x.simdInternal_[i]));
 604     }
 605     return res;
 606 };
 607
 608 /*! \brief SIMD double 1.0/x lookup.
 609  *
 610  * This is a low-level instruction that should only be called from routines
 611  * implementing the reciprocal in simd_math.h.
 612  *
 613  * \param x Argument, x!=0
 614  * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
 615  */
 616 static inline SimdDouble gmx_simdcall rcp(SimdDouble x)
 617 {
 618     SimdDouble res;
 619
 620     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 621     {
 622         // sic - we only use single precision for the lookup
 623         res.simdInternal_[i] = 1.0F / static_cast<float>(x.simdInternal_[i]);
 624     }
 625     return res;
 626 };
 627
 628 /*! \brief Add two double SIMD variables, masked version.
 629  *
 630  * \param a term1
 631  * \param b term2
 632  * \param m mask
 633  * \return a+b where mask is true, 0.0 otherwise.
 634  */
 635 static inline SimdDouble gmx_simdcall maskAdd(SimdDouble a, SimdDouble b, SimdDBool m)
 636 {
 637     SimdDouble res;
 638
 639     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 640     {
 641         res.simdInternal_[i] = a.simdInternal_[i] + (m.simdInternal_[i] ? b.simdInternal_[i] : 0.0);
 642     }
 643     return res;
 644 }
 645
 646 /*! \brief Multiply two double SIMD variables, masked version.
 647  *
 648  * \param a factor1
 649  * \param b factor2
 650  * \param m mask
 651  * \return a*b where mask is true, 0.0 otherwise.
 652  */
 653 static inline SimdDouble gmx_simdcall maskzMul(SimdDouble a, SimdDouble b, SimdDBool m)
 654 {
 655     SimdDouble res;
 656
 657     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 658     {
 659         res.simdInternal_[i] = m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i]) : 0.0;
 660     }
 661     return res;
 662 }
 663
 664 /*! \brief SIMD double fused multiply-add, masked version.
 665  *
 666  * \param a factor1
 667  * \param b factor2
 668  * \param c term
 669  * \param m mask
 670  * \return a*b+c where mask is true, 0.0 otherwise.
 671  */
 672 static inline SimdDouble gmx_simdcall maskzFma(SimdDouble a, SimdDouble b, SimdDouble c, SimdDBool m)
 673 {
 674     SimdDouble res;
 675
 676     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 677     {
 678         res.simdInternal_[i] =
 679                 m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i] + c.simdInternal_[i]) : 0.0;
 680     }
 681     return res;
 682 }
 683
 684 /*! \brief SIMD double 1.0/sqrt(x) lookup, masked version.
 685  *
 686  * This is a low-level instruction that should only be called from routines
 687  * implementing the inverse square root in simd_math.h.
 688  *
 689  * \param x Argument, x>0 for entries where mask is true.
 690  * \param m Mask
 691  * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
 692  *         The result for masked-out entries will be 0.0.
 693  */
 694 static inline SimdDouble gmx_simdcall maskzRsqrt(SimdDouble x, SimdDBool m)
 695 {
 696     SimdDouble res;
 697
 698     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 699     {
 700         // sic - we only use single precision for the lookup
 701         res.simdInternal_[i] = (m.simdInternal_[i] != 0)
 702                                        ? 1.0F / std::sqrt(static_cast<float>(x.simdInternal_[i]))
 703                                        : 0.0;
 704     }
 705     return res;
 706 }
 707
 708 /*! \brief SIMD double 1.0/x lookup, masked version.
 709  *
 710  * This is a low-level instruction that should only be called from routines
 711  * implementing the reciprocal in simd_math.h.
 712  *
 713  * \param x Argument, x>0 for entries where mask is true.
 714  * \param m Mask
 715  * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
 716  *         The result for masked-out entries will be 0.0.
 717  */
 718 static inline SimdDouble gmx_simdcall maskzRcp(SimdDouble x, SimdDBool m)
 719 {
 720     SimdDouble res;
 721
 722     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 723     {
 724         res.simdInternal_[i] =
 725                 (m.simdInternal_[i] != 0) ? 1.0F / static_cast<float>(x.simdInternal_[i]) : 0.0;
 726     }
 727     return res;
 728 }
 729
 730 /*! \brief SIMD double floating-point fabs().
 731  *
 732  * \param a any floating point values
 733  * \return fabs(a) for each element.
 734  */
 735 static inline SimdDouble gmx_simdcall abs(SimdDouble a)
 736 {
 737     SimdDouble res;
 738
 739     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 740     {
 741         res.simdInternal_[i] = std::abs(a.simdInternal_[i]);
 742     }
 743     return res;
 744 }
 745
 746 /*! \brief Set each SIMD double element to the largest from two variables.
 747  *
 748  * \param a Any floating-point value
 749  * \param b Any floating-point value
 750  * \return max(a,b) for each element.
 751  */
 752 static inline SimdDouble gmx_simdcall max(SimdDouble a, SimdDouble b)
 753 {
 754     SimdDouble res;
 755
 756     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 757     {
 758         res.simdInternal_[i] = std::max(a.simdInternal_[i], b.simdInternal_[i]);
 759     }
 760     return res;
 761 }
 762
 763 /*! \brief Set each SIMD double element to the smallest from two variables.
 764  *
 765  * \param a Any floating-point value
 766  * \param b Any floating-point value
 767  * \return min(a,b) for each element.
 768  */
 769 static inline SimdDouble gmx_simdcall min(SimdDouble a, SimdDouble b)
 770 {
 771     SimdDouble res;
 772
 773     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 774     {
 775         res.simdInternal_[i] = std::min(a.simdInternal_[i], b.simdInternal_[i]);
 776     }
 777     return res;
 778 }
 779
 780 /*! \brief SIMD double round to nearest integer value (in floating-point format).
 781  *
 782  * \param a Any floating-point value
 783  * \return The nearest integer, represented in floating-point format.
 784  *
 785  * \note Round mode is implementation defined. The only guarantee is that it
 786  * is consistent between rounding functions (round, cvtR2I).
 787  */
 788 static inline SimdDouble gmx_simdcall round(SimdDouble a)
 789 {
 790     SimdDouble res;
 791
 792     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 793     {
 794         res.simdInternal_[i] = std::round(a.simdInternal_[i]);
 795     }
 796     return res;
 797 }
 798
 799 /*! \brief Truncate SIMD double, i.e. round towards zero - common hardware instruction.
 800  *
 801  * \param a Any floating-point value
 802  * \return Integer rounded towards zero, represented in floating-point format.
 803  *
 804  * \note This is truncation towards zero, not floor(). The reason for this
 805  * is that truncation is virtually always present as a dedicated hardware
 806  * instruction, but floor() frequently isn't.
 807  */
 808 static inline SimdDouble gmx_simdcall trunc(SimdDouble a)
 809 {
 810     SimdDouble res;
 811
 812     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 813     {
 814         res.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
 815     }
 816     return res;
 817 }
 818
 819 /*! \brief Extract (integer) exponent and fraction from double precision SIMD.
 820  *
 821  * \tparam      opt       By default this function behaves like the standard
 822  *                        library such that frexp(+-0,exp) returns +-0 and
 823  *                        stores 0 in the exponent when value is 0. If you
 824  *                        know the argument is always nonzero, you can set
 825  *                        the template parameter to MathOptimization::Unsafe
 826  *                        to make it slightly faster.
 827  *
 828  * \param       value     Floating-point value to extract from
 829  * \param[out]  exponent  Returned exponent of value, integer SIMD format.
 830  * \return      Fraction of value, floating-point SIMD format.
 831  */
 832 template<MathOptimization opt = MathOptimization::Safe>
 833 static inline SimdDouble gmx_simdcall frexp(SimdDouble value, SimdDInt32* exponent)
 834 {
 835     SimdDouble fraction;
 836
 837     for (std::size_t i = 0; i < fraction.simdInternal_.size(); i++)
 838     {
 839         fraction.simdInternal_[i] = std::frexp(value.simdInternal_[i], &exponent->simdInternal_[i]);
 840     }
 841     return fraction;
 842 }
 843
 844 /*! \brief Multiply a SIMD double value by the number 2 raised to an exp power.
 845  *
 846  * \tparam opt By default, this routine will return zero for input arguments
 847  *             that are so small they cannot be reproduced in the current
 848  *             precision. If the unsafe math optimization template parameter
 849  *             setting is used, these tests are skipped, and the result will
 850  *             be undefined (possible even NaN). This might happen below -127
 851  *             in single precision or -1023 in double, although some
 852  *             might use denormal support to extend the range.
 853  *
 854  * \param value Floating-point number to multiply with new exponent
 855  * \param exponent Integer that will not overflow as 2^exponent.
 856  * \return value*2^exponent
 857  */
 858 template<MathOptimization opt = MathOptimization::Safe>
 859 static inline SimdDouble gmx_simdcall ldexp(SimdDouble value, SimdDInt32 exponent)
 860 {
 861     SimdDouble res;
 862
 863     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 864     {
 865         // std::ldexp already takes care of clamping arguments, so we do not
 866         // need to do anything in the reference implementation
 867         res.simdInternal_[i] = std::ldexp(value.simdInternal_[i], exponent.simdInternal_[i]);
 868     }
 869     return res;
 870 }
 871
 872 /*! \brief Return sum of all elements in SIMD double variable.
 873  *
 874  * \param a SIMD variable to reduce/sum.
 875  * \return The sum of all elements in the argument variable.
 876  *
 877  */
 878 static inline double gmx_simdcall reduce(SimdDouble a)
 879 {
 880     double sum = 0.0;
 881
 882     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
 883     {
 884         sum += a.simdInternal_[i];
 885     }
 886     return sum;
 887 }
 888
 889 /*! \}
 890  *
 891  * \name SIMD implementation double precision floating-point comparison, boolean, selection.
 892  * \{
 893  */
 894
 895 /*! \brief SIMD a==b for double SIMD.
 896  *
 897  * \param a value1
 898  * \param b value2
 899  * \return Each element of the boolean will be set to true if a==b.
 900  *
 901  * Beware that exact floating-point comparisons are difficult.
 902  */
 903 static inline SimdDBool gmx_simdcall operator==(SimdDouble a, SimdDouble b)
 904 {
 905     SimdDBool res;
 906
 907     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 908     {
 909         res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
 910     }
 911     return res;
 912 }
 913
 914 /*! \brief SIMD a!=b for double SIMD.
 915  *
 916  * \param a value1
 917  * \param b value2
 918  * \return Each element of the boolean will be set to true if a!=b.
 919  *
 920  * Beware that exact floating-point comparisons are difficult.
 921  */
 922 static inline SimdDBool gmx_simdcall operator!=(SimdDouble a, SimdDouble b)
 923 {
 924     SimdDBool res;
 925
 926     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 927     {
 928         res.simdInternal_[i] = (a.simdInternal_[i] != b.simdInternal_[i]);
 929     }
 930     return res;
 931 }
 932
 933 /*! \brief SIMD a<b for double SIMD.
 934  *
 935  * \param a value1
 936  * \param b value2
 937  * \return Each element of the boolean will be set to true if a<b.
 938  */
 939 static inline SimdDBool gmx_simdcall operator<(SimdDouble a, SimdDouble b)
 940 {
 941     SimdDBool res;
 942
 943     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 944     {
 945         res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
 946     }
 947     return res;
 948 }
 949
 950 /*! \brief SIMD a<=b for double SIMD.
 951  *
 952  * \param a value1
 953  * \param b value2
 954  * \return Each element of the boolean will be set to true if a<=b.
 955  */
 956 static inline SimdDBool gmx_simdcall operator<=(SimdDouble a, SimdDouble b)
 957 {
 958     SimdDBool res;
 959
 960     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 961     {
 962         res.simdInternal_[i] = (a.simdInternal_[i] <= b.simdInternal_[i]);
 963     }
 964     return res;
 965 }
 966
 967 /*! \brief Return true if any bits are set in the single precision SIMD.
 968  *
 969  * This function is used to handle bitmasks, mainly for exclusions in the
 970  * inner kernels. Note that it will return true even for -0.0 (sign bit set),
 971  * so it is not identical to not-equal.
 972  *
 973  * \param a value
 974  * \return Each element of the boolean will be true if any bit in a is nonzero.
 975  */
 976 static inline SimdDBool gmx_simdcall testBits(SimdDouble a)
 977 {
 978     SimdDBool res;
 979
 980     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 981     {
 982         union
 983         {
 984             std::uint64_t i;
 985             double        d;
 986         } conv;
 987
 988         conv.d               = a.simdInternal_[i];
 989         res.simdInternal_[i] = (conv.i != 0);
 990     }
 991     return res;
 992 }
 993
 994 /*! \brief Logical \a and on double precision SIMD booleans.
 995  *
 996  * \param a logical vars 1
 997  * \param b logical vars 2
 998  * \return For each element, the result boolean is true if a \& b are true.
 999  *
1000  * \note This is not necessarily a bitwise operation - the storage format
1001  * of booleans is implementation-dependent.
1002  */
1003 static inline SimdDBool gmx_simdcall operator&&(SimdDBool a, SimdDBool b)
1004 {
1005     SimdDBool res;
1006
1007     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1008     {
1009         res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1010     }
1011     return res;
1012 }
1013
1014 /*! \brief Logical \a or on double precision SIMD booleans.
1015  *
1016  * \param a logical vars 1
1017  * \param b logical vars 2
1018  * \return For each element, the result boolean is true if a or b is true.
1019  *
1020  * Note that this is not necessarily a bitwise operation - the storage format
1021  * of booleans is implementation-dependent.
1022  *
1023  \ */
1024 static inline SimdDBool gmx_simdcall operator||(SimdDBool a, SimdDBool b)
1025 {
1026     SimdDBool res;
1027
1028     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1029     {
1030         res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1031     }
1032     return res;
1033 }
1034
1035 /*! \brief Returns non-zero if any of the boolean in SIMD a is True, otherwise 0.
1036  *
1037  * \param a Logical variable.
1038  * \return true if any element in a is true, otherwise false.
1039  *
1040  * The actual return value for truth will depend on the architecture,
1041  * so any non-zero value is considered truth.
1042  */
1043 static inline bool gmx_simdcall anyTrue(SimdDBool a)
1044 {
1045     bool res = false;
1046
1047     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1048     {
1049         res = res || a.simdInternal_[i];
1050     }
1051     return res;
1052 }
1053
1054 /*! \brief Select from double precision SIMD variable where boolean is true.
1055  *
1056  * \param a Floating-point variable to select from
1057  * \param mask Boolean selector
1058  * \return  For each element, a is selected for true, 0 for false.
1059  */
1060 static inline SimdDouble gmx_simdcall selectByMask(SimdDouble a, SimdDBool mask)
1061 {
1062     SimdDouble res;
1063
1064     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1065     {
1066         res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0;
1067     }
1068     return res;
1069 }
1070
1071 /*! \brief Select from double precision SIMD variable where boolean is false.
1072  *
1073  * \param a Floating-point variable to select from
1074  * \param mask Boolean selector
1075  * \return  For each element, a is selected for false, 0 for true (sic).
1076  */
1077 static inline SimdDouble gmx_simdcall selectByNotMask(SimdDouble a, SimdDBool mask)
1078 {
1079     SimdDouble res;
1080
1081     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1082     {
1083         res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0 : a.simdInternal_[i];
1084     }
1085     return res;
1086 }
1087
1088 /*! \brief Vector-blend SIMD double selection.
1089  *
1090  * \param a First source
1091  * \param b Second source
1092  * \param sel Boolean selector
1093  * \return For each element, select b if sel is true, a otherwise.
1094  */
1095 static inline SimdDouble gmx_simdcall blend(SimdDouble a, SimdDouble b, SimdDBool sel)
1096 {
1097     SimdDouble res;
1098
1099     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1100     {
1101         res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1102     }
1103     return res;
1104 }
1105
1106 /*! \}
1107  *
1108  * \name SIMD implementation integer (corresponding to double) bitwise logical operations
1109  * \{
1110  */
1111
1112 /*! \brief Integer SIMD bitwise and.
1113  *
1114  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1115  *
1116  * \note You can \a not use this operation directly to select based on a boolean
1117  * SIMD variable, since booleans are separate from integer SIMD. If that
1118  * is what you need, have a look at \ref gmx::selectByMask instead.
1119  *
1120  * \param a first integer SIMD
1121  * \param b second integer SIMD
1122  * \return a \& b (bitwise and)
1123  */
1124 static inline SimdDInt32 gmx_simdcall operator&(SimdDInt32 a, SimdDInt32 b)
1125 {
1126     SimdDInt32 res;
1127
1128     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1129     {
1130         res.simdInternal_[i] = a.simdInternal_[i] & b.simdInternal_[i];
1131     }
1132     return res;
1133 }
1134
1135 /*! \brief Integer SIMD bitwise not/complement.
1136  *
1137  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1138  *
1139  * \note You can \a not use this operation directly to select based on a boolean
1140  * SIMD variable, since booleans are separate from integer SIMD. If that
1141  * is what you need, have a look at \ref gmx::selectByMask instead.
1142  *
1143  * \param a integer SIMD
1144  * \param b integer SIMD
1145  * \return (~a) & b
1146  */
1147 static inline SimdDInt32 gmx_simdcall andNot(SimdDInt32 a, SimdDInt32 b)
1148 {
1149     SimdDInt32 res;
1150
1151     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1152     {
1153         res.simdInternal_[i] = ~a.simdInternal_[i] & b.simdInternal_[i];
1154     }
1155     return res;
1156 }
1157
1158 /*! \brief Integer SIMD bitwise or.
1159  *
1160  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1161  *
1162  * \param a first integer SIMD
1163  * \param b second integer SIMD
1164  * \return a \| b (bitwise or)
1165  */
1166 static inline SimdDInt32 gmx_simdcall operator|(SimdDInt32 a, SimdDInt32 b)
1167 {
1168     SimdDInt32 res;
1169
1170     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1171     {
1172         res.simdInternal_[i] = a.simdInternal_[i] | b.simdInternal_[i];
1173     }
1174     return res;
1175 }
1176
1177 /*! \brief Integer SIMD bitwise xor.
1178  *
1179  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1180  *
1181  * \param a first integer SIMD
1182  * \param b second integer SIMD
1183  * \return a ^ b (bitwise xor)
1184  */
1185 static inline SimdDInt32 gmx_simdcall operator^(SimdDInt32 a, SimdDInt32 b)
1186 {
1187     SimdDInt32 res;
1188
1189     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1190     {
1191         res.simdInternal_[i] = a.simdInternal_[i] ^ b.simdInternal_[i];
1192     }
1193     return res;
1194 }
1195
1196 /*! \}
1197  *
1198  * \name SIMD implementation integer (corresponding to double) arithmetics
1199  * \{
1200  */
1201
1202 /*! \brief Add SIMD integers.
1203  *
1204  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1205  *
1206  * \param a term1
1207  * \param b term2
1208  * \return a+b
1209  */
1210 static inline SimdDInt32 gmx_simdcall operator+(SimdDInt32 a, SimdDInt32 b)
1211 {
1212     SimdDInt32 res;
1213
1214     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1215     {
1216         res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
1217     }
1218     return res;
1219 }
1220
1221 /*! \brief Subtract SIMD integers.
1222  *
1223  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1224  *
1225  * \param a term1
1226  * \param b term2
1227  * \return a-b
1228  */
1229 static inline SimdDInt32 gmx_simdcall operator-(SimdDInt32 a, SimdDInt32 b)
1230 {
1231     SimdDInt32 res;
1232
1233     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1234     {
1235         res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
1236     }
1237     return res;
1238 }
1239
1240 /*! \brief Multiply SIMD integers.
1241  *
1242  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1243  *
1244  * \param a factor1
1245  * \param b factor2
1246  * \return a*b.
1247  *
1248  * \note Only the low 32 bits are retained, so this can overflow.
1249  */
1250 static inline SimdDInt32 gmx_simdcall operator*(SimdDInt32 a, SimdDInt32 b)
1251 {
1252     SimdDInt32 res;
1253
1254     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1255     {
1256         res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
1257     }
1258     return res;
1259 }
1260
1261 /*! \}
1262  *
1263  * \name SIMD implementation integer (corresponding to double) comparisons, boolean selection
1264  * \{
1265  */
1266
1267 /*! \brief Equality comparison of two integers corresponding to double values.
1268  *
1269  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1270  *
1271  * \param a SIMD integer1
1272  * \param b SIMD integer2
1273  * \return SIMD integer boolean with true for elements where a==b
1274  */
1275 static inline SimdDIBool gmx_simdcall operator==(SimdDInt32 a, SimdDInt32 b)
1276 {
1277     SimdDIBool res;
1278
1279     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1280     {
1281         res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
1282     }
1283     return res;
1284 }
1285
1286 /*! \brief Less-than comparison of two SIMD integers corresponding to double values.
1287  *
1288  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1289  *
1290  * \param a SIMD integer1
1291  * \param b SIMD integer2
1292  * \return SIMD integer boolean with true for elements where a<b
1293  */
1294 static inline SimdDIBool gmx_simdcall operator<(SimdDInt32 a, SimdDInt32 b)
1295 {
1296     SimdDIBool res;
1297
1298     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1299     {
1300         res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
1301     }
1302     return res;
1303 }
1304
1305 /*! \brief Check if any bit is set in each element
1306  *
1307  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1308  *
1309  * \param a SIMD integer
1310  * \return SIMD integer boolean with true for elements where any bit is set
1311  */
1312 static inline SimdDIBool gmx_simdcall testBits(SimdDInt32 a)
1313 {
1314     SimdDIBool res;
1315
1316     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1317     {
1318         res.simdInternal_[i] = (a.simdInternal_[i] != 0);
1319     }
1320     return res;
1321 }
1322
1323 /*! \brief Logical AND on SimdDIBool.
1324  *
1325  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1326  *
1327  * \param a SIMD boolean 1
1328  * \param b SIMD boolean 2
1329  * \return True for elements where both a and b are true.
1330  */
1331 static inline SimdDIBool gmx_simdcall operator&&(SimdDIBool a, SimdDIBool b)
1332 {
1333     SimdDIBool res;
1334
1335     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1336     {
1337         res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1338     }
1339     return res;
1340 }
1341
1342 /*! \brief Logical OR on SimdDIBool.
1343  *
1344  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1345  *
1346  * \param a SIMD boolean 1
1347  * \param b SIMD boolean 2
1348  * \return True for elements where both a and b are true.
1349  */
1350 static inline SimdDIBool gmx_simdcall operator||(SimdDIBool a, SimdDIBool b)
1351 {
1352     SimdDIBool res;
1353
1354     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1355     {
1356         res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1357     }
1358     return res;
1359 }
1360
1361 /*! \brief Returns true if any of the boolean in x is True, otherwise 0.
1362  *
1363  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1364  *
1365  * The actual return value for "any true" will depend on the architecture.
1366  * Any non-zero value should be considered truth.
1367  *
1368  * \param a SIMD boolean
1369  * \return True if any of the elements in a is true, otherwise 0.
1370  */
1371 static inline bool gmx_simdcall anyTrue(SimdDIBool a)
1372 {
1373     bool res = false;
1374
1375     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1376     {
1377         res = res || a.simdInternal_[i];
1378     }
1379     return res;
1380 }
1381
1382 /*! \brief Select from \ref gmx::SimdDInt32 variable where boolean is true.
1383  *
1384  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1385  *
1386  * \param a SIMD integer to select from
1387  * \param mask Boolean selector
1388  * \return Elements from a where sel is true, 0 otherwise.
1389  */
1390 static inline SimdDInt32 gmx_simdcall selectByMask(SimdDInt32 a, SimdDIBool mask)
1391 {
1392     SimdDInt32 res;
1393
1394     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1395     {
1396         res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0;
1397     }
1398     return res;
1399 }
1400
1401 /*! \brief Select from \ref gmx::SimdDInt32 variable where boolean is false.
1402  *
1403  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1404  *
1405  * \param a SIMD integer to select from
1406  * \param mask Boolean selector
1407  * \return Elements from a where sel is false, 0 otherwise (sic).
1408  */
1409 static inline SimdDInt32 gmx_simdcall selectByNotMask(SimdDInt32 a, SimdDIBool mask)
1410 {
1411     SimdDInt32 res;
1412
1413     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1414     {
1415         res.simdInternal_[i] = mask.simdInternal_[i] ? 0 : a.simdInternal_[i];
1416     }
1417     return res;
1418 }
1419
1420 /*! \brief Vector-blend SIMD integer selection.
1421  *
1422  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1423  *
1424  * \param a First source
1425  * \param b Second source
1426  * \param sel Boolean selector
1427  * \return For each element, select b if sel is true, a otherwise.
1428  */
1429 static inline SimdDInt32 gmx_simdcall blend(SimdDInt32 a, SimdDInt32 b, SimdDIBool sel)
1430 {
1431     SimdDInt32 res;
1432
1433     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1434     {
1435         res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1436     }
1437     return res;
1438 }
1439
1440 /*! \}
1441  *
1442  * \name SIMD implementation conversion operations
1443  * \{
1444  */
1445
1446 /*! \brief Round double precision floating point to integer.
1447  *
1448  * \param a SIMD floating-point
1449  * \return SIMD integer, rounded to nearest integer.
1450  *
1451  * \note Round mode is implementation defined. The only guarantee is that it
1452  * is consistent between rounding functions (round, cvtR2I).
1453  */
1454 static inline SimdDInt32 gmx_simdcall cvtR2I(SimdDouble a)
1455 {
1456     SimdDInt32 b;
1457
1458     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1459     {
1460         b.simdInternal_[i] = std::round(a.simdInternal_[i]);
1461     }
1462     return b;
1463 };
1464
1465 /*! \brief Truncate double precision floating point to integer.
1466  *
1467  * \param a SIMD floating-point
1468  * \return SIMD integer, truncated to nearest integer.
1469  */
1470 static inline SimdDInt32 gmx_simdcall cvttR2I(SimdDouble a)
1471 {
1472     SimdDInt32 b;
1473
1474     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1475     {
1476         b.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
1477     }
1478     return b;
1479 };
1480
1481 /*! \brief Convert integer to double precision floating point.
1482  *
1483  * \param a SIMD integer
1484  * \return SIMD floating-point
1485  */
1486 static inline SimdDouble gmx_simdcall cvtI2R(SimdDInt32 a)
1487 {
1488     SimdDouble b;
1489
1490     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1491     {
1492         b.simdInternal_[i] = a.simdInternal_[i];
1493     }
1494     return b;
1495 };
1496
1497 /*! \brief Convert from double precision boolean to corresponding integer boolean
1498  *
1499  * \param a SIMD floating-point boolean
1500  * \return SIMD integer boolean
1501  */
1502 static inline SimdDIBool gmx_simdcall cvtB2IB(SimdDBool a)
1503 {
1504     SimdDIBool b;
1505
1506     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1507     {
1508         b.simdInternal_[i] = a.simdInternal_[i];
1509     }
1510     return b;
1511 };
1512
1513 /*! \brief Convert from integer boolean to corresponding double precision boolean
1514  *
1515  * \param a SIMD integer boolean
1516  * \return SIMD floating-point boolean
1517  */
1518 static inline SimdDBool gmx_simdcall cvtIB2B(SimdDIBool a)
1519 {
1520     SimdDBool b;
1521
1522     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1523     {
1524         b.simdInternal_[i] = a.simdInternal_[i];
1525     }
1526     return b;
1527 };
1528
1529 /*! \brief Convert SIMD float to double.
1530  *
1531  * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is identical to
1532  * \ref GMX_SIMD_DOUBLE_WIDTH.
1533  *
1534  * Float/double conversions are complex since the SIMD width could either
1535  * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1536  * need to check for the width in the code, and have different code paths.
1537  *
1538  * \param f Single-precision SIMD variable
1539  * \return Double-precision SIMD variable of the same width
1540  */
1541 static inline SimdDouble gmx_simdcall cvtF2D(SimdFloat gmx_unused f)
1542 {
1543 #if (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
1544     SimdDouble d;
1545     for (std::size_t i = 0; i < d.simdInternal_.size(); i++)
1546     {
1547         d.simdInternal_[i] = f.simdInternal_[i];
1548     }
1549     return d;
1550 #else
1551     gmx_fatal(FARGS, "cvtF2D() requires GMX_SIMD_FLOAT_WIDTH==GMX_SIMD_DOUBLE_WIDTH");
1552 #endif
1553 }
1554
1555 /*! \brief Convert SIMD double to float.
1556  *
1557  * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is identical to
1558  * \ref GMX_SIMD_DOUBLE_WIDTH.
1559  *
1560  * Float/double conversions are complex since the SIMD width could either
1561  * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1562  * need to check for the width in the code, and have different code paths.
1563  *
1564  * \param d Double-precision SIMD variable
1565  * \return Single-precision SIMD variable of the same width
1566  */
1567 static inline SimdFloat gmx_simdcall cvtD2F(SimdDouble gmx_unused d)
1568 {
1569 #if (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
1570     SimdFloat f;
1571     for (std::size_t i = 0; i < f.simdInternal_.size(); i++)
1572     {
1573         f.simdInternal_[i] = d.simdInternal_[i];
1574     }
1575     return f;
1576 #else
1577     gmx_fatal(FARGS, "cvtD2F() requires GMX_SIMD_FLOAT_WIDTH==GMX_SIMD_DOUBLE_WIDTH");
1578 #endif
1579 }
1580
1581 /*! \brief Convert SIMD float to double.
1582  *
1583  * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is twice as large
1584  * as \ref GMX_SIMD_DOUBLE_WIDTH.
1585  *
1586  * Float/double conversions are complex since the SIMD width could either
1587  * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1588  * need to check for the width in the code, and have different code paths.
1589  *
1590  * \param f Single-precision SIMD variable
1591  * \param[out] d0 Double-precision SIMD variable, first half of values from f.
1592  * \param[out] d1 Double-precision SIMD variable, second half of values from f.
1593  */
1594 static inline void gmx_simdcall cvtF2DD(SimdFloat gmx_unused f,
1595                                         SimdDouble gmx_unused* d0,
1596                                         SimdDouble gmx_unused* d1)
1597 {
1598 #if (GMX_SIMD_FLOAT_WIDTH == 2 * GMX_SIMD_DOUBLE_WIDTH)
1599     for (std::size_t i = 0; i < d0->simdInternal_.size(); i++)
1600     {
1601         d0->simdInternal_[i] = f.simdInternal_[i];
1602         d1->simdInternal_[i] = f.simdInternal_[f.simdInternal_.size() / 2 + i];
1603     }
1604 #else
1605     gmx_fatal(FARGS, "simdCvtF2DD() requires GMX_SIMD_FLOAT_WIDTH==2*GMX_SIMD_DOUBLE_WIDTH");
1606 #endif
1607 }
1608
1609 /*! \brief Convert SIMD double to float.
1610  *
1611  * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is twice as large
1612  * as \ref GMX_SIMD_DOUBLE_WIDTH.
1613  *
1614  * Float/double conversions are complex since the SIMD width could either
1615  * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1616  * need to check for the width in the code, and have different code paths.
1617  *
1618  * \param d0 Double-precision SIMD variable, first half of values to put in f.
1619  * \param d1 Double-precision SIMD variable, second half of values to put in f.
1620  * \return Single-precision SIMD variable with all values.
1621  */
1622 static inline SimdFloat gmx_simdcall cvtDD2F(SimdDouble gmx_unused d0, SimdDouble gmx_unused d1)
1623 {
1624 #if (GMX_SIMD_FLOAT_WIDTH == 2 * GMX_SIMD_DOUBLE_WIDTH)
1625     SimdFloat f;
1626     for (std::size_t i = 0; i < d0.simdInternal_.size(); i++)
1627     {
1628         f.simdInternal_[i]                              = d0.simdInternal_[i];
1629         f.simdInternal_[f.simdInternal_.size() / 2 + i] = d1.simdInternal_[i];
1630     }
1631     return f;
1632 #else
1633     gmx_fatal(FARGS, "simdCvtDD2F() requires GMX_SIMD_FLOAT_WIDTH==2*GMX_SIMD_DOUBLE_WIDTH");
1634 #endif
1635 }
1636
1637 /*! \} */
1638
1639 /*! \} */
1640 /*! \endcond */
1641
1642 } // namespace gmx
1643
1644 #endif // GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H